From 304ec9d06dab595e5413dcdea5871091d989bebe Mon Sep 17 00:00:00 2001 From: bijay27bit Date: Thu, 12 Dec 2024 12:23:30 +0000 Subject: [PATCH 1/3] GCS Sink additional scenarios. --- .../features/gcs/sink/GCSSink.feature | 172 ++++++++++++++++++ .../features/gcs/sink/GCSSinkError.feature | 34 ++++ .../resources/errorMessage.properties | 1 + .../resources/pluginParameters.properties | 6 + 4 files changed, 213 insertions(+) diff --git a/src/e2e-test/features/gcs/sink/GCSSink.feature b/src/e2e-test/features/gcs/sink/GCSSink.feature index 563a896e87..7a4c7de3a5 100644 --- a/src/e2e-test/features/gcs/sink/GCSSink.feature +++ b/src/e2e-test/features/gcs/sink/GCSSink.feature @@ -265,3 +265,175 @@ Feature: GCS sink - Verification of GCS Sink plugin Then Open and capture logs Then Verify the pipeline status is "Succeeded" Then Verify data is transferred to target GCS bucket + +#Added new scenarios for GCS Sink - Bijay + @BQ_SOURCE_TEST @GCS_SINK_TEST + Scenario:Validate successful records transfer from BigQuery to GCS with macro enabled at sink + Given Open Datafusion Project to configure pipeline + When Source is BigQuery + When Sink is GCS + Then Connect source as "BigQuery" and sink as "GCS" to establish connection + Then Open BigQuery source properties + Then Override Service account details if set in environment variables + Then Enter the BigQuery source mandatory properties + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Open GCS sink properties + Then Enter GCS property reference name + Then Enter GCS property "projectId" as macro argument "gcsProjectId" + Then Enter GCS property "serviceAccountType" as macro argument "serviceAccountType" + Then Enter GCS property "serviceAccountFilePath" as macro argument "serviceAccount" + Then Enter GCS property "serviceAccountJSON" as macro argument "serviceAccount" + Then Enter GCS property "path" as macro argument "gcsSinkPath" + Then Enter GCS sink property "pathSuffix" as macro argument "gcsPathSuffix" + Then Enter GCS property "format" as macro argument "gcsFormat" + Then Click on the Macro button of Property: "writeHeader" and set the value to: "WriteHeader" + Then Click on the Macro button of Property: "location" and set the value to: "gcsSinkLocation" + Then Click on the Macro button of Property: "contentType" and set the value to: "gcsContentType" + Then Click on the Macro button of Property: "outputFileNameBase" and set the value to: "OutFileNameBase" + Then Click on the Macro button of Property: "fileSystemProperties" and set the value to: "FileSystemPr" + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Save the pipeline + Then Preview and run the pipeline + Then Enter runtime argument value "projectId" for key "gcsProjectId" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccount" for key "serviceAccount" + Then Enter runtime argument value for GCS sink property path key "gcsSinkPath" + Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix" + Then Enter runtime argument value "csvFormat" for key "gcsFormat" + Then Enter runtime argument value "writeHeader" for key "WriteHeader" + Then Enter runtime argument value "contentType" for key "gcsContentType" + Then Enter runtime argument value "gcsSinkBucketLocation" for key "gcsSinkLocation" + Then Enter runtime argument value "outputFileNameBase" for key "OutFileNameBase" + Then Enter runtime argument value "gcsFileSysProperty" for key "FileSystemPr" + Then Run the preview of pipeline with runtime arguments + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Close the preview + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Enter runtime argument value "projectId" for key "gcsProjectId" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccount" for key "serviceAccount" + Then Enter runtime argument value for GCS sink property path key "gcsSinkPath" + Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix" + Then Enter runtime argument value "csvFormat" for key "gcsFormat" + Then Enter runtime argument value "writeHeader" for key "WriteHeader" + Then Enter runtime argument value "contentType" for key "gcsContentType" + Then Enter runtime argument value "gcsSinkBucketLocation" for key "gcsSinkLocation" + Then Enter runtime argument value "outputFileNameBase" for key "OutFileNameBase" + Then Enter runtime argument value "gcsFileSysProperty" for key "FileSystemPr" + Then Run the Pipeline in Runtime with runtime arguments + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Verify data is transferred to target GCS bucket + + @GCS_SINK_TEST @BQ_SOURCE_TEST + Scenario Outline: To verify data is getting transferred successfully from BigQuery to GCS with contenttype selection + Given Open Datafusion Project to configure pipeline + When Source is BigQuery + When Sink is GCS + Then Connect source as "BigQuery" and sink as "GCS" to establish connection + Then Open BigQuery source properties + Then Enter the BigQuery source mandatory properties + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Open GCS sink properties + Then Enter GCS property projectId and reference name + Then Enter GCS sink property path + Then Select GCS property format "" + Then Select GCS sink property contentType "" + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Save and Deploy Pipeline + Then Run the Pipeline in Runtime + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Verify data is transferred to target GCS bucket + Examples: + | FileFormat | contentType | + | csv | text/csv | + | tsv | text/plain | + + @BQ_SOURCE_TEST @GCS_SINK_TEST + Scenario: To verify data is getting transferred successfully from BigQuery to GCS using advanced file system properties field + Given Open Datafusion Project to configure pipeline + When Source is BigQuery + When Sink is GCS + Then Connect source as "BigQuery" and sink as "GCS" to establish connection + Then Open BigQuery source properties + Then Override Service account details if set in environment variables + Then Enter the BigQuery source mandatory properties + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Open GCS sink properties + Then Enter GCS property projectId and reference name + Then Override Service account details if set in environment variables + Then Enter GCS sink property path + Then Select GCS property format "csv" + Then Click on the Macro button of Property: "fileSystemProperties" and set the value to: "FileSystemPr" + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Save the pipeline + Then Preview and run the pipeline + Then Enter runtime argument value "gcsFileSysProperty" for key "FileSystemPr" + Then Run the preview of pipeline with runtime arguments + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Close the preview + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Enter runtime argument value "gcsFileSysProperty" for key "FileSystemPr" + Then Run the Pipeline in Runtime with runtime arguments + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Verify data is transferred to target GCS bucket + + @GCS_SINK_TEST @BQ_SOURCE_TEST @GCS_Sink_Required + Scenario Outline: To verify successful data transfer from BigQuery to GCS for different formats with write header true + Given Open Datafusion Project to configure pipeline + When Source is BigQuery + When Sink is GCS + Then Connect source as "BigQuery" and sink as "GCS" to establish connection + Then Open BigQuery source properties + Then Enter the BigQuery source mandatory properties + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Open GCS sink properties + Then Enter GCS property projectId and reference name + Then Enter GCS sink property path + Then Select GCS property format "" + Then Click on the Macro button of Property: "writeHeader" and set the value to: "WriteHeader" + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Save the pipeline + Then Preview and run the pipeline + Then Enter runtime argument value "writeHeader" for key "WriteHeader" + Then Run the preview of pipeline with runtime arguments + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Close the preview + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Enter runtime argument value "writeHeader" for key "WriteHeader" + Then Run the Pipeline in Runtime with runtime arguments + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Verify data is transferred to target GCS bucket + Examples: + | FileFormat | + | csv | + | tsv | + | delimited | \ No newline at end of file diff --git a/src/e2e-test/features/gcs/sink/GCSSinkError.feature b/src/e2e-test/features/gcs/sink/GCSSinkError.feature index 0718136d4a..8c2029d5c7 100644 --- a/src/e2e-test/features/gcs/sink/GCSSinkError.feature +++ b/src/e2e-test/features/gcs/sink/GCSSinkError.feature @@ -65,3 +65,37 @@ Feature: GCS sink - Verify GCS Sink plugin error scenarios Then Select GCS property format "csv" Then Click on the Validate button Then Verify that the Plugin Property: "format" is displaying an in-line error message: "errorMessageInvalidFormat" + + @GCS_SINK_TEST @BQ_SOURCE_TEST + Scenario: To verify and validate the Error message in pipeline logs after deploy with invalid bucket path + Given Open Datafusion Project to configure pipeline + When Source is BigQuery + When Sink is GCS + Then Connect source as "BigQuery" and sink as "GCS" to establish connection + Then Open BigQuery source properties + Then Enter the BigQuery source mandatory properties + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Open GCS sink properties + Then Enter GCS property projectId and reference name + Then Enter GCS property "path" as macro argument "gcsSinkPath" + Then Select GCS property format "csv" + Then Click on the Validate button + Then Close the GCS properties + Then Save the pipeline + Then Preview and run the pipeline + Then Enter runtime argument value "gcsInvalidBucketNameSink" for key "gcsSinkPath" + Then Run the preview of pipeline with runtime arguments + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Close the pipeline logs + Then Close the preview + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Enter runtime argument value "gcsInvalidBucketNameSink" for key "gcsSinkPath" + Then Run the Pipeline in Runtime with runtime arguments + Then Wait till pipeline is in running state + Then Verify the pipeline status is "Failed" + Then Open Pipeline logs and verify Log entries having below listed Level and Message: + | Level | Message | + | ERROR | errorMessageInvalidBucketNameSink | \ No newline at end of file diff --git a/src/e2e-test/resources/errorMessage.properties b/src/e2e-test/resources/errorMessage.properties index bd8a1610b9..3359f00729 100644 --- a/src/e2e-test/resources/errorMessage.properties +++ b/src/e2e-test/resources/errorMessage.properties @@ -33,4 +33,5 @@ errorMessageMultipleFileWithoutClearDefaultSchema=Found a row with 4 fields when errorMessageInvalidSourcePath=Invalid bucket name in path 'abc@'. Bucket name should errorMessageInvalidDestPath=Invalid bucket name in path 'abc@'. Bucket name should errorMessageInvalidEncryptionKey=CryptoKeyName.parse: formattedString not in valid format: Parameter "abc@" must be +errorMessageInvalidBucketNameSink=Spark program 'phase-1' failed with error: Errors were encountered during validation. Error code: 400, Unable to read or access GCS bucket. Bucket names must be at least 3 characters in length, got 2: 'gg'. Please check the system logs for more details. diff --git a/src/e2e-test/resources/pluginParameters.properties b/src/e2e-test/resources/pluginParameters.properties index aae33e0e89..7dd0f3ef30 100644 --- a/src/e2e-test/resources/pluginParameters.properties +++ b/src/e2e-test/resources/pluginParameters.properties @@ -159,6 +159,12 @@ gcsParquetFileSchema=[{"key":"workforce","value":"string"},{"key":"report_year", {"key":"race_black","value":"long"},{"key":"race_hispanic_latinx","value":"long"},\ {"key":"race_native_american","value":"long"},{"key":"race_white","value":"long"},\ {"key":"tablename","value":"string"}] +gcsInvalidBucketNameSink=gg +writeHeader=true +gcsSinkBucketLocation=US +contentType=application/octet-stream +outputFileNameBase=part +gcsSinkInvalidBucketName=gg@ ## GCS-PLUGIN-PROPERTIES-END ## BIGQUERY-PLUGIN-PROPERTIES-START From e371fff6fbeaf3363da53cf55885074a20167854 Mon Sep 17 00:00:00 2001 From: bijay27bit Date: Fri, 13 Dec 2024 04:36:43 +0000 Subject: [PATCH 2/3] GCS Sink additional scenarios. --- src/e2e-test/resources/pluginParameters.properties | 1 - 1 file changed, 1 deletion(-) diff --git a/src/e2e-test/resources/pluginParameters.properties b/src/e2e-test/resources/pluginParameters.properties index 7dd0f3ef30..3bbd6c93fd 100644 --- a/src/e2e-test/resources/pluginParameters.properties +++ b/src/e2e-test/resources/pluginParameters.properties @@ -164,7 +164,6 @@ writeHeader=true gcsSinkBucketLocation=US contentType=application/octet-stream outputFileNameBase=part -gcsSinkInvalidBucketName=gg@ ## GCS-PLUGIN-PROPERTIES-END ## BIGQUERY-PLUGIN-PROPERTIES-START From a48791bb86e5ce892bf6426ff49e8d415ff87911 Mon Sep 17 00:00:00 2001 From: bijay27bit Date: Thu, 19 Dec 2024 14:59:25 +0000 Subject: [PATCH 3/3] Review comments incorporated. --- .../sink/BigQueryToGCSSink_WithMacro.feature | 73 +++++++++++ .../features/gcs/sink/GCSSink.feature | 122 +++++------------- .../features/gcs/sink/GCSSinkError.feature | 10 +- .../resources/errorMessage.properties | 3 +- .../resources/pluginParameters.properties | 2 +- 5 files changed, 111 insertions(+), 99 deletions(-) create mode 100644 src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature diff --git a/src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature b/src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature new file mode 100644 index 0000000000..b449be12ec --- /dev/null +++ b/src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature @@ -0,0 +1,73 @@ +@GCS_Sink +Feature: GCS sink - Verification of GCS Sink plugin macro scenarios + + @CMEK @BQ_SOURCE_TEST @GCS_SINK_TEST + Scenario:Validate successful records transfer from BigQuery to GCS with macro enabled at sink + Given Open Datafusion Project to configure pipeline + Then Select plugin: "BigQuery" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "GCS" from the plugins list as: "Sink" + Then Connect source as "BigQuery" and sink as "GCS" to establish connection + Then Open BigQuery source properties + Then Override Service account details if set in environment variables + Then Enter the BigQuery source mandatory properties + Then Validate "BigQuery" plugin properties + Then Close the BigQuery properties + Then Open GCS sink properties + Then Enter GCS property reference name + Then Enter GCS property "projectId" as macro argument "gcsProjectId" + Then Enter GCS property "serviceAccountType" as macro argument "serviceAccountType" + Then Enter GCS property "serviceAccountFilePath" as macro argument "serviceAccount" + Then Enter GCS property "serviceAccountJSON" as macro argument "serviceAccount" + Then Enter GCS property "path" as macro argument "gcsSinkPath" + Then Enter GCS sink property "pathSuffix" as macro argument "gcsPathSuffix" + Then Enter GCS property "format" as macro argument "gcsFormat" + Then Click on the Macro button of Property: "writeHeader" and set the value to: "WriteHeader" + Then Click on the Macro button of Property: "location" and set the value to: "gcsSinkLocation" + Then Click on the Macro button of Property: "contentType" and set the value to: "gcsContentType" + Then Click on the Macro button of Property: "outputFileNameBase" and set the value to: "OutFileNameBase" + Then Click on the Macro button of Property: "fileSystemProperties" and set the value to: "FileSystemPr" + Then Enter GCS sink cmek property "encryptionKeyName" as macro argument "cmekGCS" if cmek is enabled + Then Validate "GCS" plugin properties + Then Close the GCS properties + Then Save the pipeline + Then Preview and run the pipeline + Then Enter runtime argument value "projectId" for key "gcsProjectId" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccount" for key "serviceAccount" + Then Enter runtime argument value for GCS sink property path key "gcsSinkPath" + Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix" + Then Enter runtime argument value "csvFormat" for key "gcsFormat" + Then Enter runtime argument value "writeHeader" for key "WriteHeader" + Then Enter runtime argument value "contentType" for key "gcsContentType" + Then Enter runtime argument value "gcsSinkBucketLocation" for key "gcsSinkLocation" + Then Enter runtime argument value "outputFileNameBase" for key "OutFileNameBase" + Then Enter runtime argument value "gcsFileSysProperty" for key "FileSystemPr" + Then Enter runtime argument value "cmekGCS" for GCS cmek property key "cmekGCS" if GCS cmek is enabled + Then Run the preview of pipeline with runtime arguments + Then Wait till pipeline preview is in running state + Then Open and capture pipeline preview logs + Then Verify the preview run status of pipeline in the logs is "succeeded" + Then Close the pipeline logs + Then Close the preview + Then Deploy the pipeline + Then Run the Pipeline in Runtime + Then Enter runtime argument value "projectId" for key "gcsProjectId" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" + Then Enter runtime argument value "serviceAccount" for key "serviceAccount" + Then Enter runtime argument value for GCS sink property path key "gcsSinkPath" + Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix" + Then Enter runtime argument value "csvFormat" for key "gcsFormat" + Then Enter runtime argument value "writeHeader" for key "WriteHeader" + Then Enter runtime argument value "contentType" for key "gcsContentType" + Then Enter runtime argument value "gcsSinkBucketLocation" for key "gcsSinkLocation" + Then Enter runtime argument value "outputFileNameBase" for key "OutFileNameBase" + Then Enter runtime argument value "gcsFileSysProperty" for key "FileSystemPr" + Then Enter runtime argument value "cmekGCS" for GCS cmek property key "cmekGCS" if GCS cmek is enabled + Then Run the Pipeline in Runtime with runtime arguments + Then Wait till pipeline is in running state + Then Open and capture logs + Then Verify the pipeline status is "Succeeded" + Then Verify data is transferred to target GCS bucket + Then Validate the cmek key "cmekGCS" of target GCS bucket if cmek is enabled diff --git a/src/e2e-test/features/gcs/sink/GCSSink.feature b/src/e2e-test/features/gcs/sink/GCSSink.feature index 7a4c7de3a5..4787ea3941 100644 --- a/src/e2e-test/features/gcs/sink/GCSSink.feature +++ b/src/e2e-test/features/gcs/sink/GCSSink.feature @@ -95,7 +95,7 @@ Feature: GCS sink - Verification of GCS Sink plugin | parquet | application/octet-stream | | orc | application/octet-stream | - @GCS_SINK_TEST @BQ_SOURCE_TEST + @BQ_SOURCE_TEST @GCS_SINK_TEST Scenario Outline: To verify data is getting transferred successfully from BigQuery to GCS with combinations of contenttype Given Open Datafusion Project to configure pipeline When Source is BigQuery @@ -266,78 +266,12 @@ Feature: GCS sink - Verification of GCS Sink plugin Then Verify the pipeline status is "Succeeded" Then Verify data is transferred to target GCS bucket -#Added new scenarios for GCS Sink - Bijay @BQ_SOURCE_TEST @GCS_SINK_TEST - Scenario:Validate successful records transfer from BigQuery to GCS with macro enabled at sink - Given Open Datafusion Project to configure pipeline - When Source is BigQuery - When Sink is GCS - Then Connect source as "BigQuery" and sink as "GCS" to establish connection - Then Open BigQuery source properties - Then Override Service account details if set in environment variables - Then Enter the BigQuery source mandatory properties - Then Validate "BigQuery" plugin properties - Then Close the BigQuery properties - Then Open GCS sink properties - Then Enter GCS property reference name - Then Enter GCS property "projectId" as macro argument "gcsProjectId" - Then Enter GCS property "serviceAccountType" as macro argument "serviceAccountType" - Then Enter GCS property "serviceAccountFilePath" as macro argument "serviceAccount" - Then Enter GCS property "serviceAccountJSON" as macro argument "serviceAccount" - Then Enter GCS property "path" as macro argument "gcsSinkPath" - Then Enter GCS sink property "pathSuffix" as macro argument "gcsPathSuffix" - Then Enter GCS property "format" as macro argument "gcsFormat" - Then Click on the Macro button of Property: "writeHeader" and set the value to: "WriteHeader" - Then Click on the Macro button of Property: "location" and set the value to: "gcsSinkLocation" - Then Click on the Macro button of Property: "contentType" and set the value to: "gcsContentType" - Then Click on the Macro button of Property: "outputFileNameBase" and set the value to: "OutFileNameBase" - Then Click on the Macro button of Property: "fileSystemProperties" and set the value to: "FileSystemPr" - Then Validate "GCS" plugin properties - Then Close the GCS properties - Then Save the pipeline - Then Preview and run the pipeline - Then Enter runtime argument value "projectId" for key "gcsProjectId" - Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" - Then Enter runtime argument value "serviceAccount" for key "serviceAccount" - Then Enter runtime argument value for GCS sink property path key "gcsSinkPath" - Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix" - Then Enter runtime argument value "csvFormat" for key "gcsFormat" - Then Enter runtime argument value "writeHeader" for key "WriteHeader" - Then Enter runtime argument value "contentType" for key "gcsContentType" - Then Enter runtime argument value "gcsSinkBucketLocation" for key "gcsSinkLocation" - Then Enter runtime argument value "outputFileNameBase" for key "OutFileNameBase" - Then Enter runtime argument value "gcsFileSysProperty" for key "FileSystemPr" - Then Run the preview of pipeline with runtime arguments - Then Wait till pipeline preview is in running state - Then Open and capture pipeline preview logs - Then Verify the preview run status of pipeline in the logs is "succeeded" - Then Close the pipeline logs - Then Close the preview - Then Deploy the pipeline - Then Run the Pipeline in Runtime - Then Enter runtime argument value "projectId" for key "gcsProjectId" - Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" - Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType" - Then Enter runtime argument value "serviceAccount" for key "serviceAccount" - Then Enter runtime argument value for GCS sink property path key "gcsSinkPath" - Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix" - Then Enter runtime argument value "csvFormat" for key "gcsFormat" - Then Enter runtime argument value "writeHeader" for key "WriteHeader" - Then Enter runtime argument value "contentType" for key "gcsContentType" - Then Enter runtime argument value "gcsSinkBucketLocation" for key "gcsSinkLocation" - Then Enter runtime argument value "outputFileNameBase" for key "OutFileNameBase" - Then Enter runtime argument value "gcsFileSysProperty" for key "FileSystemPr" - Then Run the Pipeline in Runtime with runtime arguments - Then Wait till pipeline is in running state - Then Open and capture logs - Then Verify the pipeline status is "Succeeded" - Then Verify data is transferred to target GCS bucket - - @GCS_SINK_TEST @BQ_SOURCE_TEST Scenario Outline: To verify data is getting transferred successfully from BigQuery to GCS with contenttype selection Given Open Datafusion Project to configure pipeline - When Source is BigQuery - When Sink is GCS + When Select plugin: "BigQuery" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "GCS" from the plugins list as: "Sink" Then Connect source as "BigQuery" and sink as "GCS" to establish connection Then Open BigQuery source properties Then Enter the BigQuery source mandatory properties @@ -357,52 +291,56 @@ Feature: GCS sink - Verification of GCS Sink plugin Then Verify the pipeline status is "Succeeded" Then Verify data is transferred to target GCS bucket Examples: - | FileFormat | contentType | - | csv | text/csv | - | tsv | text/plain | + | FileFormat | contentType | + | csv | text/csv | + | tsv | text/plain | - @BQ_SOURCE_TEST @GCS_SINK_TEST - Scenario: To verify data is getting transferred successfully from BigQuery to GCS using advanced file system properties field + @CMEK @BQ_SOURCE_TEST @GCS_SINK_TEST + Scenario:Validate successful records transfer from BigQuery to GCS with advanced file system properties field Given Open Datafusion Project to configure pipeline - When Source is BigQuery - When Sink is GCS - Then Connect source as "BigQuery" and sink as "GCS" to establish connection + Then Select plugin: "BigQuery" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "GCS" from the plugins list as: "Sink" Then Open BigQuery source properties + Then Enter BigQuery property reference name + Then Enter BigQuery property projectId "projectId" + Then Enter BigQuery property datasetProjectId "projectId" + Then Enter BigQuery property dataset "dataset" + Then Enter BigQuery source property table name Then Override Service account details if set in environment variables - Then Enter the BigQuery source mandatory properties + Then Enter BiqQuery property encryption key name "cmekBQ" if cmek is enabled + Then Validate output schema with expectedSchema "bqSourceSchema" Then Validate "BigQuery" plugin properties Then Close the BigQuery properties Then Open GCS sink properties - Then Enter GCS property projectId and reference name Then Override Service account details if set in environment variables - Then Enter GCS sink property path - Then Select GCS property format "csv" - Then Click on the Macro button of Property: "fileSystemProperties" and set the value to: "FileSystemPr" + Then Enter the GCS sink mandatory properties + Then Enter GCS File system properties field "gcsFileSysProperty" Then Validate "GCS" plugin properties Then Close the GCS properties + Then Connect source as "BigQuery" and sink as "GCS" to establish connection Then Save the pipeline Then Preview and run the pipeline - Then Enter runtime argument value "gcsFileSysProperty" for key "FileSystemPr" - Then Run the preview of pipeline with runtime arguments Then Wait till pipeline preview is in running state Then Open and capture pipeline preview logs Then Verify the preview run status of pipeline in the logs is "succeeded" Then Close the pipeline logs - Then Close the preview + Then Click on preview data for GCS sink + Then Verify preview output schema matches the outputSchema captured in properties + Then Close the preview data Then Deploy the pipeline Then Run the Pipeline in Runtime - Then Enter runtime argument value "gcsFileSysProperty" for key "FileSystemPr" - Then Run the Pipeline in Runtime with runtime arguments Then Wait till pipeline is in running state Then Open and capture logs Then Verify the pipeline status is "Succeeded" Then Verify data is transferred to target GCS bucket - @GCS_SINK_TEST @BQ_SOURCE_TEST @GCS_Sink_Required + @BQ_SOURCE_TEST @GCS_SINK_TEST @GCS_Sink_Required Scenario Outline: To verify successful data transfer from BigQuery to GCS for different formats with write header true Given Open Datafusion Project to configure pipeline - When Source is BigQuery - When Sink is GCS + Then Select plugin: "BigQuery" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "GCS" from the plugins list as: "Sink" Then Connect source as "BigQuery" and sink as "GCS" to establish connection Then Open BigQuery source properties Then Enter the BigQuery source mandatory properties @@ -436,4 +374,4 @@ Feature: GCS sink - Verification of GCS Sink plugin | FileFormat | | csv | | tsv | - | delimited | \ No newline at end of file + | delimited | diff --git a/src/e2e-test/features/gcs/sink/GCSSinkError.feature b/src/e2e-test/features/gcs/sink/GCSSinkError.feature index 8c2029d5c7..723e2d7ed7 100644 --- a/src/e2e-test/features/gcs/sink/GCSSinkError.feature +++ b/src/e2e-test/features/gcs/sink/GCSSinkError.feature @@ -66,11 +66,12 @@ Feature: GCS sink - Verify GCS Sink plugin error scenarios Then Click on the Validate button Then Verify that the Plugin Property: "format" is displaying an in-line error message: "errorMessageInvalidFormat" - @GCS_SINK_TEST @BQ_SOURCE_TEST + @BQ_SOURCE_TEST @GCS_SINK_TEST Scenario: To verify and validate the Error message in pipeline logs after deploy with invalid bucket path Given Open Datafusion Project to configure pipeline - When Source is BigQuery - When Sink is GCS + When Select plugin: "BigQuery" from the plugins list as: "Source" + When Expand Plugin group in the LHS plugins list: "Sink" + When Select plugin: "GCS" from the plugins list as: "Sink" Then Connect source as "BigQuery" and sink as "GCS" to establish connection Then Open BigQuery source properties Then Enter the BigQuery source mandatory properties @@ -98,4 +99,5 @@ Feature: GCS sink - Verify GCS Sink plugin error scenarios Then Verify the pipeline status is "Failed" Then Open Pipeline logs and verify Log entries having below listed Level and Message: | Level | Message | - | ERROR | errorMessageInvalidBucketNameSink | \ No newline at end of file + | ERROR | errorMessageInvalidBucketNameSink | + Then Close the pipeline logs \ No newline at end of file diff --git a/src/e2e-test/resources/errorMessage.properties b/src/e2e-test/resources/errorMessage.properties index 3359f00729..07bfb47bb6 100644 --- a/src/e2e-test/resources/errorMessage.properties +++ b/src/e2e-test/resources/errorMessage.properties @@ -33,5 +33,4 @@ errorMessageMultipleFileWithoutClearDefaultSchema=Found a row with 4 fields when errorMessageInvalidSourcePath=Invalid bucket name in path 'abc@'. Bucket name should errorMessageInvalidDestPath=Invalid bucket name in path 'abc@'. Bucket name should errorMessageInvalidEncryptionKey=CryptoKeyName.parse: formattedString not in valid format: Parameter "abc@" must be -errorMessageInvalidBucketNameSink=Spark program 'phase-1' failed with error: Errors were encountered during validation. Error code: 400, Unable to read or access GCS bucket. Bucket names must be at least 3 characters in length, got 2: 'gg'. Please check the system logs for more details. - +errorMessageInvalidBucketNameSink=Unable to read or access GCS bucket. \ No newline at end of file diff --git a/src/e2e-test/resources/pluginParameters.properties b/src/e2e-test/resources/pluginParameters.properties index 3bbd6c93fd..dac313ca56 100644 --- a/src/e2e-test/resources/pluginParameters.properties +++ b/src/e2e-test/resources/pluginParameters.properties @@ -159,7 +159,7 @@ gcsParquetFileSchema=[{"key":"workforce","value":"string"},{"key":"report_year", {"key":"race_black","value":"long"},{"key":"race_hispanic_latinx","value":"long"},\ {"key":"race_native_american","value":"long"},{"key":"race_white","value":"long"},\ {"key":"tablename","value":"string"}] -gcsInvalidBucketNameSink=gg +gcsInvalidBucketNameSink=ggg writeHeader=true gcsSinkBucketLocation=US contentType=application/octet-stream