Skip to content

Commit

Permalink
Add enable backfill flag (#2018)
Browse files Browse the repository at this point in the history
  • Loading branch information
Deep1998 authored Nov 18, 2024
1 parent ff49726 commit b7e6db7
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ datastream_params = {
target_gcs_bucket_name = "live-migration" # Or provide a custom bucket name
pubsub_topic_name = "live-migration" # Or provide a custom topic name
stream_id = "mysql-stream" # Or provide a custom stream ID
enable_backfill = true # This should always be enabled unless using sourcedb-to-spanner template for bulk migrations.
max_concurrent_cdc_tasks = 50 # Adjust as needed
max_concurrent_backfill_tasks = 50 # Adjust as needed
mysql_host = "<YOUR_MYSQL_HOST_IP_ADDRESS>"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ common_params = {

datastream_params = {
stream_prefix_path = "<YOUR_STREAM_PREFIX>" # Prefix for Datastream stream IDs (e.g., "data")
enable_backfill = true # This should always be enabled unless using sourcedb-to-spanner template for bulk migrations.
max_concurrent_cdc_tasks = "<YOUR_CDC_TASKS>" # Maximum concurrent CDC tasks (e.g., 5)
max_concurrent_backfill_tasks = "<YOUR_BACKFILL_TASKS>" # Maximum concurrent backfill tasks (e.g., 15)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,14 @@ resource "google_datastream_stream" "mysql_to_gcs" {
location = var.common_params.region
display_name = "${var.shard_list[count.index].shard_id != null ? var.shard_list[count.index].shard_id : random_pet.migration_id[count.index].id}-${var.shard_list[count.index].datastream_params.stream_id}"
desired_state = "RUNNING"
backfill_all {
dynamic "backfill_all" {
for_each = var.common_params.datastream_params.enable_backfill ? [1] : []
content {}
}

dynamic "backfill_none" {
for_each = var.common_params.datastream_params.enable_backfill ? [] : [1]
content {}
}

source_config {
Expand Down Expand Up @@ -249,6 +256,7 @@ resource "google_project_iam_member" "live_migration_roles" {
}
# Dataflow Flex Template Job (for CDC to Spanner)
resource "google_dataflow_flex_template_job" "live_migration_job" {
count = var.common_params.dataflow_params.skip_dataflow ? 0 : 1
depends_on = [
google_project_service.enabled_apis, google_project_iam_member.live_migration_roles
] # Launch the template once the stream is created.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ output "resource_ids" {
gcs_bucket = google_storage_bucket.datastream_bucket.name
pubsub_topic = google_pubsub_topic.datastream_topic.name
pubsub_subscription = google_pubsub_subscription.datastream_subscription.name
dataflow_job = google_dataflow_flex_template_job.live_migration_job.job_id
dataflow_job = var.common_params.dataflow_params.skip_dataflow ? "" : google_dataflow_flex_template_job.live_migration_job[0].job_id
}
)

Expand Down Expand Up @@ -46,7 +46,7 @@ output "resource_urls" {
gcs_bucket = "https://console.cloud.google.com/storage/browser/${google_storage_bucket.datastream_bucket.name}?project=${var.common_params.project}"
pubsub_topic = "https://console.cloud.google.com/cloudpubsub/topic/detail/${google_pubsub_topic.datastream_topic.name}?project=${var.common_params.project}"
pubsub_subscription = "https://console.cloud.google.com/cloudpubsub/subscription/detail/${google_pubsub_subscription.datastream_subscription.name}?project=${var.common_params.project}"
dataflow_job = "https://console.cloud.google.com/dataflow/jobs/${var.common_params.region}/${google_dataflow_flex_template_job.live_migration_job.job_id}?project=${var.common_params.project}"
dataflow_job = var.common_params.dataflow_params.skip_dataflow ? "" : "https://console.cloud.google.com/dataflow/jobs/${var.common_params.region}/${google_dataflow_flex_template_job.live_migration_job[0].job_id}?project=${var.common_params.project}"
})

depends_on = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ common_params = {

datastream_params = {
stream_prefix_path = "<YOUR_STREAM_PREFIX>" # Prefix for Datastream stream IDs (e.g., "data")
enable_backfill = true # This should always be enabled unless using sourcedb-to-spanner template for bulk migrations.
max_concurrent_cdc_tasks = "<YOUR_CDC_TASKS>" # Maximum concurrent CDC tasks (e.g., 5)
max_concurrent_backfill_tasks = "<YOUR_BACKFILL_TASKS>" # Maximum concurrent backfill tasks (e.g., 15)
private_connectivity_id = "<YOUR_PRIVATE_CONNECTIVITY_ID>" # If using Private Service Connect
Expand All @@ -33,6 +34,7 @@ common_params = {
}

dataflow_params = {
skip_dataflow = false
template_params = {
shadow_table_prefix = "<YOUR_SHADOW_TABLE_PREFIX>" # Prefix for shadow tables (e.g., "shadow_")
create_shadow_tables = "<TRUE/FALSE>" # Whether to create shadow tables in Spanner
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ variable "common_params" {
target_connection_profile_id = optional(string, "target-gcs")
gcs_root_path = optional(string, "/")
source_type = optional(string, "mysql")
enable_backfill = optional(bool, true)
max_concurrent_cdc_tasks = optional(number, 5)
max_concurrent_backfill_tasks = optional(number, 20)
private_connectivity_id = optional(string)
Expand All @@ -28,6 +29,7 @@ variable "common_params" {
}))
})
dataflow_params = object({
skip_dataflow = optional(bool, false)
template_params = object({
shadow_table_prefix = optional(string)
create_shadow_tables = optional(bool)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ datastream_params = {
target_gcs_bucket_name = "live-migration" # Or provide a custom bucket name
pubsub_topic_name = "live-migration" # Or provide a custom topic name
stream_id = "postgresql-stream" # Or provide a custom stream ID
enable_backfill = true # This should always be enabled unless using sourcedb-to-spanner template for bulk migrations.
max_concurrent_cdc_tasks = 50 # Adjust as needed
max_concurrent_backfill_tasks = 50 # Adjust as needed
postgresql_host = "<YOUR_POSTGRESQL_HOST_IP_ADDRESS>"
Expand Down

0 comments on commit b7e6db7

Please sign in to comment.