GoogleCloudPlatform · liferoad · Dec 12, 2024 · Jun 18, 2024 · Jun 18, 2024 · Jun 18, 2024
diff --git a/.../core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java b/.../core-plugin/src/main/java/com/google/cloud/teleport/plugin/model/ImageSpecParameter.java
@@ -652,7 +652,7 @@ protected void processDescriptions(
       this.setHelpText(helpText);
 
       if (example != null && !example.isEmpty()) {
-        this.setHelpText(this.getHelpText() + " (Example: " + example + ")");
+        this.setHelpText(this.getHelpText() + " For example, `" + example + "`");
       }
     }
   }

diff --git a/plugins/core-plugin/src/main/resources/README-template.md b/plugins/core-plugin/src/main/resources/README-template.md
@@ -21,12 +21,12 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
 
 ### Required parameters
 
-<#list spec.metadata.parameters as parameter><#if !parameter.optional!false>* **${parameter.name}** : ${parameter.helpText?ensure_ends_with(".")}
+<#list spec.metadata.parameters as parameter><#if !parameter.optional!false>* **${parameter.name}**: ${parameter.helpText?ensure_ends_with(".")}
 </#if></#list>
 
 ### Optional parameters
 
-<#list spec.metadata.parameters as parameter><#if parameter.optional!false>* **${parameter.name}** : ${parameter.helpText?ensure_ends_with(".")}
+<#list spec.metadata.parameters as parameter><#if parameter.optional!false>* **${parameter.name}**: ${parameter.helpText?ensure_ends_with(".")}
 </#if></#list>
 
 

diff --git a/python/README_Yaml_Template.md b/python/README_Yaml_Template.md
@@ -25,9 +25,9 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
 
 ### Optional parameters
 
-* **yaml_pipeline** : A yaml description of the pipeline to run.
-* **yaml_pipeline_file** : A file in Cloud Storage containing a yaml description of the pipeline to run.
-* **jinja_variables** : A json dict of variables used when invoking the jinja preprocessor on the provided yaml pipeline.
+* **yaml_pipeline**: A yaml description of the pipeline to run.
+* **yaml_pipeline_file**: A file in Cloud Storage containing a yaml description of the pipeline to run.
+* **jinja_variables**: A json dict of variables used when invoking the jinja preprocessor on the provided yaml pipeline.
 
 
 

diff --git a/v1/README_Bulk_Compress_GCS_Files.md b/v1/README_Bulk_Compress_GCS_Files.md
@@ -27,14 +27,14 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
 
 ### Required parameters
 
-* **inputFilePattern** : The Cloud Storage location of the files you'd like to process. (Example: gs://your-bucket/your-files/*.txt).
-* **outputDirectory** : The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. (Example: gs://your-bucket/your-path).
-* **outputFailureFile** : The error log output file to use for write failures that occur during compression. The contents will be one line for each file which failed compression. Note that this parameter will allow the pipeline to continue processing in the event of a failure. (Example: gs://your-bucket/compressed/failed.csv).
-* **compression** : The compression algorithm used to compress the matched files. Valid algorithms: BZIP2, DEFLATE, GZIP.
+* **inputFilePattern**: The Cloud Storage location of the files you'd like to process. For example, `gs://your-bucket/your-files/*.txt`.
+* **outputDirectory**: The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. For example, `gs://your-bucket/your-path`.
+* **outputFailureFile**: The error log output file to use for write failures that occur during compression. The contents will be one line for each file which failed compression. Note that this parameter will allow the pipeline to continue processing in the event of a failure. For example, `gs://your-bucket/compressed/failed.csv`.
+* **compression**: The compression algorithm used to compress the matched files. Valid algorithms: BZIP2, DEFLATE, GZIP.
 
 ### Optional parameters
 
-* **outputFilenameSuffix** : Output filename suffix of the files to write. Defaults to .bzip2, .deflate or .gz depending on the compression algorithm.
+* **outputFilenameSuffix**: Output filename suffix of the files to write. Defaults to .bzip2, .deflate or .gz depending on the compression algorithm.
 
 
 
@@ -211,9 +211,9 @@ resource "google_dataflow_job" "bulk_compress_gcs_files" {
   region            = var.region
   temp_gcs_location = "gs://bucket-name-here/temp"
   parameters        = {
-    inputFilePattern = "gs://your-bucket/your-files/*.txt"
-    outputDirectory = "gs://your-bucket/your-path"
-    outputFailureFile = "gs://your-bucket/compressed/failed.csv"
+    inputFilePattern = "<inputFilePattern>"
+    outputDirectory = "<outputDirectory>"
+    outputFailureFile = "<outputFailureFile>"
     compression = "<compression>"
     # outputFilenameSuffix = "<outputFilenameSuffix>"
   }

diff --git a/v1/README_Bulk_Decompress_GCS_Files.md b/v1/README_Bulk_Decompress_GCS_Files.md
@@ -26,9 +26,9 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
 
 ### Required parameters
 
-* **inputFilePattern** : The Cloud Storage location of the files you'd like to process. (Example: gs://your-bucket/your-files/*.gz).
-* **outputDirectory** : The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. (Example: gs://your-bucket/decompressed/).
-* **outputFailureFile** : The output file to write failures to during the decompression process. If there are no failures, the file will still be created but will be empty. The contents will be one line for each file which failed decompression in CSV format (Filename, Error). Note that this parameter will allow the pipeline to continue processing in the event of a failure. (Example: gs://your-bucket/decompressed/failed.csv).
+* **inputFilePattern**: The Cloud Storage location of the files you'd like to process. For example, `gs://your-bucket/your-files/*.gz`.
+* **outputDirectory**: The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. For example, `gs://your-bucket/decompressed/`.
+* **outputFailureFile**: The output file to write failures to during the decompression process. If there are no failures, the file will still be created but will be empty. The contents will be one line for each file which failed decompression in CSV format (Filename, Error). Note that this parameter will allow the pipeline to continue processing in the event of a failure. For example, `gs://your-bucket/decompressed/failed.csv`.
 
 ### Optional parameters
 
@@ -202,9 +202,9 @@ resource "google_dataflow_job" "bulk_decompress_gcs_files" {
   region            = var.region
   temp_gcs_location = "gs://bucket-name-here/temp"
   parameters        = {
-    inputFilePattern = "gs://your-bucket/your-files/*.gz"
-    outputDirectory = "gs://your-bucket/decompressed/"
-    outputFailureFile = "gs://your-bucket/decompressed/failed.csv"
+    inputFilePattern = "<inputFilePattern>"
+    outputDirectory = "<outputDirectory>"
+    outputFailureFile = "<outputFailureFile>"
   }
 }
 ```
diff --git a/v1/README_Cassandra_To_Cloud_Bigtable.md b/v1/README_Cassandra_To_Cloud_Bigtable.md
@@ -23,21 +23,19 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
 
 ### Required parameters
 
-* **cassandraHosts** : The hosts of the Apache Cassandra nodes in a comma-separated list.
-* **cassandraKeyspace** : The Apache Cassandra keyspace where the table is located.
-* **cassandraTable** : The Apache Cassandra table to copy.
-* **bigtableProjectId** : The Google Cloud project ID associated with the Bigtable instance.
-* **bigtableInstanceId** : The ID of the Bigtable instance that the Apache Cassandra table is copied to.
-* **bigtableTableId** : The name of the Bigtable table that the Apache Cassandra table is copied to.
+* **cassandraHosts**: The hosts of the Apache Cassandra nodes in a comma-separated list.
+* **cassandraKeyspace**: The Apache Cassandra keyspace where the table is located.
+* **cassandraTable**: The Apache Cassandra table to copy.
+* **bigtableProjectId**: The Google Cloud project ID associated with the Bigtable instance.
+* **bigtableInstanceId**: The ID of the Bigtable instance that the Apache Cassandra table is copied to.
+* **bigtableTableId**: The name of the Bigtable table that the Apache Cassandra table is copied to.
 
 ### Optional parameters
 
-* **cassandraPort** : The TCP port to use to reach Apache Cassandra on the nodes. The default value is 9042.
-* **defaultColumnFamily** : The name of the column family of the Bigtable table. The default value is default.
-* **rowKeySeparator** : The separator used to build row-keys. The default value is '#'.
-* **splitLargeRows** : The flag for enabling splitting of large rows into multiple MutateRows requests. Note that when a large row is split between multiple API calls, the updates to the row are not atomic. .
-* **writetimeCassandraColumnSchema** : GCS path to schema to copy Cassandra writetimes to Bigtable. The command to generate this schema is ```cqlsh -e "select json * from system_schema.columns where keyspace_name='$CASSANDRA_KEYSPACE' and table_name='$CASSANDRA_TABLE'`" > column_schema.json```. Set $WRITETIME_CASSANDRA_COLUMN_SCHEMA to a GCS path, e.g. `gs://$BUCKET_NAME/column_schema.json`. Then upload the schema to GCS: `gcloud storage cp column_schema.json $WRITETIME_CASSANDRA_COLUMN_SCHEMA`. Requires Cassandra version 2.2 onwards for JSON support.
-* **setZeroTimestamp** : The flag for setting Bigtable cell timestamp to 0 if Cassandra writetime is not present. The default behavior for when this flag is not set is to set the Bigtable cell timestamp as the template replication time, i.e. now.
+* **cassandraPort**: The TCP port to use to reach Apache Cassandra on the nodes. The default value is `9042`.
+* **defaultColumnFamily**: The name of the column family of the Bigtable table. The default value is `default`.
+* **rowKeySeparator**: The separator used to build row-keys. The default value is `#`.
+* **splitLargeRows**: The flag for enabling splitting of large rows into multiple MutateRows requests. Note that when a large row is split between multiple API calls, the updates to the row are not atomic. .
 
 
 

diff --git a/v1/README_Cloud_BigQuery_to_Cloud_Datastore.md b/v1/README_Cloud_BigQuery_to_Cloud_Datastore.md
@@ -15,17 +15,17 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
 
 ### Required parameters
 
-* **readQuery** : A BigQuery SQL query that extracts data from the source. For example, select * from dataset1.sample_table.
-* **datastoreWriteProjectId** : The ID of the Google Cloud project to write the Datastore entities to.
-* **errorWritePath** : The error log output file to use for write failures that occur during processing. (Example: gs://your-bucket/errors/).
+* **readQuery**: A BigQuery SQL query that extracts data from the source. For example, `select * from dataset1.sample_table`.
+* **datastoreWriteProjectId**: The ID of the Google Cloud project to write the Datastore entities to.
+* **errorWritePath**: The error log output file to use for write failures that occur during processing. For example, `gs://your-bucket/errors/`.
 
 ### Optional parameters
 
-* **readIdColumn** : Name of the BigQuery column storing the unique identifier of the row.
-* **invalidOutputPath** : Cloud Storage path where to write BigQuery rows that cannot be converted to target entities. (Example: gs://your-bucket/your-path).
-* **datastoreWriteEntityKind** : Datastore kind under which entities will be written in the output Google Cloud project.
-* **datastoreWriteNamespace** : Datastore namespace under which entities will be written in the output Google Cloud project.
-* **datastoreHintNumWorkers** : Hint for the expected number of workers in the Datastore ramp-up throttling step. Default is `500`.
+* **readIdColumn**: Name of the BigQuery column storing the unique identifier of the row.
+* **invalidOutputPath**: Cloud Storage path where to write BigQuery rows that cannot be converted to target entities. For example, `gs://your-bucket/your-path`.
+* **datastoreWriteEntityKind**: Datastore kind under which entities will be written in the output Google Cloud project.
+* **datastoreWriteNamespace**: Datastore namespace under which entities will be written in the output Google Cloud project.
+* **datastoreHintNumWorkers**: Hint for the expected number of workers in the Datastore ramp-up throttling step. Defaults to `500`.
 
 
 
@@ -213,9 +213,9 @@ resource "google_dataflow_job" "cloud_bigquery_to_cloud_datastore" {
   parameters        = {
     readQuery = "<readQuery>"
     datastoreWriteProjectId = "<datastoreWriteProjectId>"
-    errorWritePath = "gs://your-bucket/errors/"
+    errorWritePath = "<errorWritePath>"
     # readIdColumn = "<readIdColumn>"
-    # invalidOutputPath = "gs://your-bucket/your-path"
+    # invalidOutputPath = "<invalidOutputPath>"
     # datastoreWriteEntityKind = "<datastoreWriteEntityKind>"
     # datastoreWriteNamespace = "<datastoreWriteNamespace>"
     # datastoreHintNumWorkers = "500"

diff --git a/v1/README_Cloud_BigQuery_to_GCS_TensorFlow_Records.md b/v1/README_Cloud_BigQuery_to_GCS_TensorFlow_Records.md
@@ -22,17 +22,17 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
 
 ### Required parameters
 
-* **readQuery** : A BigQuery SQL query that extracts data from the source. For example, select * from dataset1.sample_table.
-* **outputDirectory** : The top-level Cloud Storage path prefix to use when writing the training, testing, and validation TFRecord files. Subdirectories for resulting training, testing, and validation TFRecord files are automatically generated from `outputDirectory`. For example, `gs://mybucket/output/train` (Example: gs://mybucket/output).
+* **readQuery**: A BigQuery SQL query that extracts data from the source. For example, `select * from dataset1.sample_table`.
+* **outputDirectory**: The top-level Cloud Storage path prefix to use when writing the training, testing, and validation TFRecord files. Subdirectories for resulting training, testing, and validation TFRecord files are automatically generated from `outputDirectory`. For example, `gs://mybucket/output`.
 
 ### Optional parameters
 
-* **readIdColumn** : Name of the BigQuery column storing the unique identifier of the row.
-* **invalidOutputPath** : Cloud Storage path where to write BigQuery rows that cannot be converted to target entities. (Example: gs://your-bucket/your-path).
-* **outputSuffix** : The file suffix for the training, testing, and validation TFRecord files that are written. The default value is `.tfrecord`.
-* **trainingPercentage** : The percentage of query data allocated to training TFRecord files. The default value is 1, or 100%.
-* **testingPercentage** : The percentage of query data allocated to testing TFRecord files. The default value is 0, or 0%.
-* **validationPercentage** : The percentage of query data allocated to validation TFRecord files. The default value is 0, or 0%.
+* **readIdColumn**: Name of the BigQuery column storing the unique identifier of the row.
+* **invalidOutputPath**: Cloud Storage path where to write BigQuery rows that cannot be converted to target entities. For example, `gs://your-bucket/your-path`.
+* **outputSuffix**: The file suffix for the training, testing, and validation TFRecord files that are written. The default value is `.tfrecord`.
+* **trainingPercentage**: The percentage of query data allocated to training TFRecord files. The default value is `1`, or `100%`.
+* **testingPercentage**: The percentage of query data allocated to testing TFRecord files. The default value is `0`, or `0%`.
+* **validationPercentage**: The percentage of query data allocated to validation TFRecord files. The default value is `0`, or `0%`.
 
 
 
@@ -219,9 +219,9 @@ resource "google_dataflow_job" "cloud_bigquery_to_gcs_tensorflow_records" {
   temp_gcs_location = "gs://bucket-name-here/temp"
   parameters        = {
     readQuery = "<readQuery>"
-    outputDirectory = "gs://mybucket/output"
+    outputDirectory = "<outputDirectory>"
     # readIdColumn = "<readIdColumn>"
-    # invalidOutputPath = "gs://your-bucket/your-path"
+    # invalidOutputPath = "<invalidOutputPath>"
     # outputSuffix = ".tfrecord"
     # trainingPercentage = "1.0"
     # testingPercentage = "0.0"

diff --git a/v1/README_Cloud_Bigtable_to_GCS_Avro.md b/v1/README_Cloud_Bigtable_to_GCS_Avro.md
@@ -18,15 +18,15 @@ on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplat
 
 ### Required parameters
 
-* **bigtableProjectId** : The ID of the Google Cloud project that contains the Bigtable instance that you want to read data from.
-* **bigtableInstanceId** : The ID of the Bigtable instance that contains the table.
-* **bigtableTableId** : The ID of the Bigtable table to export.
-* **outputDirectory** : The Cloud Storage path where data is written. (Example: gs://mybucket/somefolder).
-* **filenamePrefix** : The prefix of the Avro filename. For example, `output-`. Defaults to: part.
+* **bigtableProjectId**: The ID of the Google Cloud project that contains the Bigtable instance that you want to read data from.
+* **bigtableInstanceId**: The ID of the Bigtable instance that contains the table.
+* **bigtableTableId**: The ID of the Bigtable table to export.
+* **outputDirectory**: The Cloud Storage path where data is written. For example, `gs://mybucket/somefolder`.
+* **filenamePrefix**: The prefix of the Avro filename. For example, `output-`. Defaults to: part.
 
 ### Optional parameters
 
-* **bigtableAppProfileId** : The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile.
+* **bigtableAppProfileId**: The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile.
 
 
 
@@ -209,7 +209,7 @@ resource "google_dataflow_job" "cloud_bigtable_to_gcs_avro" {
     bigtableProjectId = "<bigtableProjectId>"
     bigtableInstanceId = "<bigtableInstanceId>"
     bigtableTableId = "<bigtableTableId>"
-    outputDirectory = "gs://mybucket/somefolder"
+    outputDirectory = "<outputDirectory>"
     filenamePrefix = "part"
     # bigtableAppProfileId = "default"
   }