From b7e6db7b3872149a1d8a29e5bcf6a90649d45cfa Mon Sep 17 00:00:00 2001 From: Deep1998 Date: Mon, 18 Nov 2024 12:47:52 +0530 Subject: [PATCH] Add enable backfill flag (#2018) --- .../samples/mysql-end-to-end/terraform.tfvars | 1 + .../samples/mysql-sharded-end-to-end/terraform.tfvars | 1 + .../samples/mysql-sharded-single-df-job/main.tf | 10 +++++++++- .../samples/mysql-sharded-single-df-job/outputs.tf | 4 ++-- .../mysql-sharded-single-df-job/terraform.tfvars | 2 ++ .../samples/mysql-sharded-single-df-job/variables.tf | 2 ++ .../samples/postgresql-end-to-end/terraform.tfvars | 1 + 7 files changed, 18 insertions(+), 3 deletions(-) diff --git a/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/terraform.tfvars b/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/terraform.tfvars index ebaa572700..2c5df5570d 100644 --- a/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/terraform.tfvars +++ b/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/terraform.tfvars @@ -14,6 +14,7 @@ datastream_params = { target_gcs_bucket_name = "live-migration" # Or provide a custom bucket name pubsub_topic_name = "live-migration" # Or provide a custom topic name stream_id = "mysql-stream" # Or provide a custom stream ID + enable_backfill = true # This should always be enabled unless using sourcedb-to-spanner template for bulk migrations. max_concurrent_cdc_tasks = 50 # Adjust as needed max_concurrent_backfill_tasks = 50 # Adjust as needed mysql_host = "" diff --git a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-end-to-end/terraform.tfvars b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-end-to-end/terraform.tfvars index 2305d5bd3e..a157ee4224 100644 --- a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-end-to-end/terraform.tfvars +++ b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-end-to-end/terraform.tfvars @@ -9,6 +9,7 @@ common_params = { datastream_params = { stream_prefix_path = "" # Prefix for Datastream stream IDs (e.g., "data") + enable_backfill = true # This should always be enabled unless using sourcedb-to-spanner template for bulk migrations. max_concurrent_cdc_tasks = "" # Maximum concurrent CDC tasks (e.g., 5) max_concurrent_backfill_tasks = "" # Maximum concurrent backfill tasks (e.g., 15) diff --git a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/main.tf b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/main.tf index f2e712e3bf..5b13d7f7cc 100644 --- a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/main.tf +++ b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/main.tf @@ -188,7 +188,14 @@ resource "google_datastream_stream" "mysql_to_gcs" { location = var.common_params.region display_name = "${var.shard_list[count.index].shard_id != null ? var.shard_list[count.index].shard_id : random_pet.migration_id[count.index].id}-${var.shard_list[count.index].datastream_params.stream_id}" desired_state = "RUNNING" - backfill_all { + dynamic "backfill_all" { + for_each = var.common_params.datastream_params.enable_backfill ? [1] : [] + content {} + } + + dynamic "backfill_none" { + for_each = var.common_params.datastream_params.enable_backfill ? [] : [1] + content {} } source_config { @@ -249,6 +256,7 @@ resource "google_project_iam_member" "live_migration_roles" { } # Dataflow Flex Template Job (for CDC to Spanner) resource "google_dataflow_flex_template_job" "live_migration_job" { + count = var.common_params.dataflow_params.skip_dataflow ? 0 : 1 depends_on = [ google_project_service.enabled_apis, google_project_iam_member.live_migration_roles ] # Launch the template once the stream is created. diff --git a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/outputs.tf b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/outputs.tf index d6be22af5f..65ca75e4cf 100644 --- a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/outputs.tf +++ b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/outputs.tf @@ -14,7 +14,7 @@ output "resource_ids" { gcs_bucket = google_storage_bucket.datastream_bucket.name pubsub_topic = google_pubsub_topic.datastream_topic.name pubsub_subscription = google_pubsub_subscription.datastream_subscription.name - dataflow_job = google_dataflow_flex_template_job.live_migration_job.job_id + dataflow_job = var.common_params.dataflow_params.skip_dataflow ? "" : google_dataflow_flex_template_job.live_migration_job[0].job_id } ) @@ -46,7 +46,7 @@ output "resource_urls" { gcs_bucket = "https://console.cloud.google.com/storage/browser/${google_storage_bucket.datastream_bucket.name}?project=${var.common_params.project}" pubsub_topic = "https://console.cloud.google.com/cloudpubsub/topic/detail/${google_pubsub_topic.datastream_topic.name}?project=${var.common_params.project}" pubsub_subscription = "https://console.cloud.google.com/cloudpubsub/subscription/detail/${google_pubsub_subscription.datastream_subscription.name}?project=${var.common_params.project}" - dataflow_job = "https://console.cloud.google.com/dataflow/jobs/${var.common_params.region}/${google_dataflow_flex_template_job.live_migration_job.job_id}?project=${var.common_params.project}" + dataflow_job = var.common_params.dataflow_params.skip_dataflow ? "" : "https://console.cloud.google.com/dataflow/jobs/${var.common_params.region}/${google_dataflow_flex_template_job.live_migration_job[0].job_id}?project=${var.common_params.project}" }) depends_on = [ diff --git a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/terraform.tfvars b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/terraform.tfvars index 29cfefdca8..4f76e9724c 100644 --- a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/terraform.tfvars +++ b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/terraform.tfvars @@ -9,6 +9,7 @@ common_params = { datastream_params = { stream_prefix_path = "" # Prefix for Datastream stream IDs (e.g., "data") + enable_backfill = true # This should always be enabled unless using sourcedb-to-spanner template for bulk migrations. max_concurrent_cdc_tasks = "" # Maximum concurrent CDC tasks (e.g., 5) max_concurrent_backfill_tasks = "" # Maximum concurrent backfill tasks (e.g., 15) private_connectivity_id = "" # If using Private Service Connect @@ -33,6 +34,7 @@ common_params = { } dataflow_params = { + skip_dataflow = false template_params = { shadow_table_prefix = "" # Prefix for shadow tables (e.g., "shadow_") create_shadow_tables = "" # Whether to create shadow tables in Spanner diff --git a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/variables.tf b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/variables.tf index a640882c5e..6e84713190 100644 --- a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/variables.tf +++ b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-single-df-job/variables.tf @@ -14,6 +14,7 @@ variable "common_params" { target_connection_profile_id = optional(string, "target-gcs") gcs_root_path = optional(string, "/") source_type = optional(string, "mysql") + enable_backfill = optional(bool, true) max_concurrent_cdc_tasks = optional(number, 5) max_concurrent_backfill_tasks = optional(number, 20) private_connectivity_id = optional(string) @@ -28,6 +29,7 @@ variable "common_params" { })) }) dataflow_params = object({ + skip_dataflow = optional(bool, false) template_params = object({ shadow_table_prefix = optional(string) create_shadow_tables = optional(bool) diff --git a/v2/datastream-to-spanner/terraform/samples/postgresql-end-to-end/terraform.tfvars b/v2/datastream-to-spanner/terraform/samples/postgresql-end-to-end/terraform.tfvars index dd818340bc..a2be056a82 100644 --- a/v2/datastream-to-spanner/terraform/samples/postgresql-end-to-end/terraform.tfvars +++ b/v2/datastream-to-spanner/terraform/samples/postgresql-end-to-end/terraform.tfvars @@ -14,6 +14,7 @@ datastream_params = { target_gcs_bucket_name = "live-migration" # Or provide a custom bucket name pubsub_topic_name = "live-migration" # Or provide a custom topic name stream_id = "postgresql-stream" # Or provide a custom stream ID + enable_backfill = true # This should always be enabled unless using sourcedb-to-spanner template for bulk migrations. max_concurrent_cdc_tasks = 50 # Adjust as needed max_concurrent_backfill_tasks = 50 # Adjust as needed postgresql_host = ""