Skip to content

Commit

Permalink
Adding XLS UI elements for gcs file source
Browse files Browse the repository at this point in the history
  • Loading branch information
psainics committed Dec 10, 2023
1 parent 2a905b3 commit 65b0ea8
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 3 deletions.
25 changes: 23 additions & 2 deletions docs/GCSFile-batchsource.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,43 @@ You also can use the macro function ${conn(connection-name)}.
**Project ID:** Google Cloud Project ID, which uniquely identifies a project.
It can be found on the Dashboard in the Google Cloud Platform Console.

**Service Account Type:** Service account type, file path where the service account is located or the JSON content of
the service account.

**Service Account File Path:** Path on the local file system of the service account key. Can be set to 'auto-detect'.

**Service Account JSON:** Contents of the service account JSON file.

**Path:** Path to file(s) to be read. If a directory is specified, terminate the path name with a '/'.
For example, `gs://<bucket>/path/to/directory/`.
An asterisk ("\*") can be used as a wildcard to match a filename pattern.
If no files are found or matched, the pipeline will fail.

**Format:** Format of the data to read.
The format must be one of 'avro', 'blob', 'csv', 'delimited', 'json', 'parquet', 'text', 'tsv', or the
The format must be one of 'avro', 'blob', 'csv', 'delimited', 'json', 'parquet', 'text', 'tsv', 'xls', or the
name of any format plugin that you have deployed to your environment.
If the format is a macro, only the pre-packaged formats can be used.
If the format is 'blob', every input file will be read into a separate record.
The 'blob' format also requires a schema that contains a field named 'body' of type 'bytes'.
If the format is 'text', the schema must contain a field named 'body' of type 'string'.

**Sample Size:** The maximum number of rows that will get investigated for automatic data type detection.
The default value is 1000.

**Override:** A list of columns with the corresponding data types for whom the automatic data type detection gets
skipped.

**Terminate If Empty Row:** Whether to terminate the file reading if an empty row is encountered.
The default value is false.

**Select Sheet Using:** Select the sheet by name or number. Default is 'Sheet Number'.

**Sheet Value:** The name/number of the sheet to read from. If not specified, the first sheet will be read.
Sheet Number are 0 based, ie first sheet is 0.

**Delimiter:** Delimiter to use when the format is 'delimited'. This will be ignored for other formats.

**Use First Row as Header:** Whether to use first row as header. Supported formats are 'text', 'csv', 'tsv', 'delimited'.
**Use First Row as Header:** Whether to use first row as header. Supported formats are 'text', 'csv', 'tsv', 'delimited', 'xls'.

**Enable Quoted Values:** Whether to treat content between quotes as a value. This value will only be used if the format
is 'csv', 'tsv' or 'delimited'. For example, if this is set to true, a line that looks like `1, "a, b, c"` will output two fields.
Expand Down
22 changes: 22 additions & 0 deletions src/main/java/io/cdap/plugin/gcp/gcs/source/GCSSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ public static class GCSSourceConfig extends AbstractFileSourceConfig implements
private static final String NAME_FILE_SYSTEM_PROPERTIES = "fileSystemProperties";
private static final String NAME_FILE_REGEX = "fileRegex";
private static final String NAME_DELIMITER = "delimiter";
private static final String NAME_SHEET = "sheet";
private static final String NAME_SHEET_VALUE = "sheetValue";
private static final String NAME_TERMINATE_IF_EMPTY_ROW = "terminateIfEmptyRow";

private static final String DEFAULT_ENCRYPTED_METADATA_SUFFIX = ".metadata";

Expand Down Expand Up @@ -214,6 +217,25 @@ public static class GCSSourceConfig extends AbstractFileSourceConfig implements
@Description("The existing connection to use.")
private GCPConnectorConfig connection;

@Name(NAME_SHEET)
@Macro
@Nullable
@Description("Select the sheet by name or number. Default is 'Sheet Number'.")
private String sheet;

@Name(NAME_SHEET_VALUE)
@Macro
@Nullable
@Description("The name/number of the sheet to read from. If not specified, the first sheet will be read." +
"Sheet Number are 0 based, ie first sheet is 0.")
private String sheetValue;

@Name(NAME_TERMINATE_IF_EMPTY_ROW)
@Macro
@Nullable
@Description("Whether to terminate the file reading if an empty row is encountered. Default is 'false'.")
private String terminateIfEmptyRow;

@Override
public void validate() {
// no-op
Expand Down
71 changes: 70 additions & 1 deletion widgets/GCSFile-batchsource.json
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,42 @@
"label": "False"
}
}
},
{
"widget-type": "toggle",
"label": "Terminate If Empty Row",
"name": "terminateIfEmptyRow",
"widget-attributes": {
"default": "false",
"on": {
"value": "true",
"label": "True"
},
"off": {
"value": "false",
"label": "False"
}
}
},
{
"widget-type": "select",
"label": "Select Sheet Using",
"name": "sheet",
"widget-attributes": {
"values": [
"Sheet Name",
"Sheet Number"
],
"default": "Sheet Number"
}
},
{
"widget-type": "textbox",
"label": "Sheet Value",
"name": "sheetValue",
"widget-attributes": {
"default": "0"
}
}
]
},
Expand Down Expand Up @@ -673,13 +709,46 @@
{
"name": "skipHeader",
"condition": {
"expression": "format == 'delimited' || format == 'csv' || format == 'tsv'"
"expression": "format == 'delimited' || format == 'csv' || format == 'tsv' || format == 'xls'"
},
"show": [
{
"name": "skipHeader"
}
]
},
{
"name": "sheet",
"condition": {
"expression": "format == 'xls'"
},
"show": [
{
"name": "sheet"
}
]
},
{
"name": "sheetValue",
"condition": {
"expression": "format == 'xls'"
},
"show": [
{
"name": "sheetValue"
}
]
},
{
"name": "terminateIfEmptyRow",
"condition": {
"expression": "format == 'xls'"
},
"show": [
{
"name": "terminateIfEmptyRow"
}
]
}
],
"outputs": [
Expand Down

0 comments on commit 65b0ea8

Please sign in to comment.