From 086d6f5480a19312019cd0ba6803bef86efcb105 Mon Sep 17 00:00:00 2001 From: Nathan Kinkade Date: Wed, 6 Nov 2024 13:45:23 -0700 Subject: [PATCH 1/4] Updates disk image for platform instances Updates the write-metadata.sh script to write out the IATA code to /var/local/metadata --- mlab-sandbox/platform-cluster.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlab-sandbox/platform-cluster.tf b/mlab-sandbox/platform-cluster.tf index 02b0815..faa332d 100644 --- a/mlab-sandbox/platform-cluster.tf +++ b/mlab-sandbox/platform-cluster.tf @@ -8,7 +8,7 @@ module "platform-cluster" { instances = { attributes = { daemonset = "ndt" - disk_image = "platform-cluster-instance-2024-09-12t22-10-23" + disk_image = "platform-cluster-instance-2024-11-05t22-20-17" disk_size_gb = 100 disk_type = "pd-ssd" machine_type = "n2-highcpu-4" From 127929a17daa3bed3491bca549d5317c6ecb546f Mon Sep 17 00:00:00 2001 From: Nathan Kinkade Date: Thu, 21 Nov 2024 14:36:25 -0700 Subject: [PATCH 2/4] Makes load balancers optional for MIGs If M-Lab-managed VMs can use the Autojoin API to get a DNS entry, then they don't need a load balancer. Not only do load balancers turn out to be expensive, but they add additional complexity to the system. This commit adds the necessary changes to make load balancers optional. --- mlab-sandbox/platform-cluster.tf | 14 +++++++++----- modules/platform-cluster/instancegroups.tf | 3 ++- modules/platform-cluster/loadbalancers.tf | 12 ++++++------ 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/mlab-sandbox/platform-cluster.tf b/mlab-sandbox/platform-cluster.tf index faa332d..30ae0e9 100644 --- a/mlab-sandbox/platform-cluster.tf +++ b/mlab-sandbox/platform-cluster.tf @@ -8,7 +8,7 @@ module "platform-cluster" { instances = { attributes = { daemonset = "ndt" - disk_image = "platform-cluster-instance-2024-11-05t22-20-17" + disk_image = "platform-cluster-instance-2024-11-21t01-13-45" disk_size_gb = 100 disk_type = "pd-ssd" machine_type = "n2-highcpu-4" @@ -20,14 +20,18 @@ module "platform-cluster" { }, migs = { mlab1-chs0t = { - region = "us-east1" + daemonset = "ndt-autojoin" + loadbalanced = false + region = "us-east1" }, mlab1-lax0t = { - daemonset = "ndt-canary" - region = "us-west2" + daemonset = "ndt-canary" + loadbalanced = true + region = "us-west2" }, mlab1-pdx0t = { - region = "us-west1" + loadbalanced = true + region = "us-west1" } }, vms = { diff --git a/modules/platform-cluster/instancegroups.tf b/modules/platform-cluster/instancegroups.tf index 82d08a3..a78821c 100644 --- a/modules/platform-cluster/instancegroups.tf +++ b/modules/platform-cluster/instancegroups.tf @@ -29,7 +29,8 @@ resource "google_compute_instance_template" "platform_cluster_mig_templates" { "mlab/site=${split("-", each.key)[1]}", "mlab/type=virtual" ]) - k8s_node = "${each.key}.${data.google_client_config.current.project}.measurement-lab.org" + k8s_node = "${each.key}.${data.google_client_config.current.project}.measurement-lab.org" + loadbalanced = each.value.loadbalanced } name_prefix = "platform-cluster-mig-template-" diff --git a/modules/platform-cluster/loadbalancers.tf b/modules/platform-cluster/loadbalancers.tf index 012178e..8b329a7 100644 --- a/modules/platform-cluster/loadbalancers.tf +++ b/modules/platform-cluster/loadbalancers.tf @@ -2,7 +2,7 @@ # Managed instance group load balancers for regular platfrom VMs # resource "google_compute_address" "platform_cluster_mig_addresses" { - for_each = var.instances.migs + for_each = { for k, v in var.instances.migs : k => v if v.loadbalanced } address_type = "EXTERNAL" name = "${each.key}-${data.google_client_config.current.project}-measurement-lab-org" @@ -10,7 +10,7 @@ resource "google_compute_address" "platform_cluster_mig_addresses" { } resource "google_compute_address" "platform_cluster_mig_addresses_v6" { - for_each = var.instances.migs + for_each = { for k, v in var.instances.migs : k => v if v.loadbalanced } address_type = "EXTERNAL" ipv6_endpoint_type = "NETLB" @@ -21,7 +21,7 @@ resource "google_compute_address" "platform_cluster_mig_addresses_v6" { } resource "google_compute_region_health_check" "platform_cluster_mig_health_checks" { - for_each = var.instances.migs + for_each = { for k, v in var.instances.migs : k => v if v.loadbalanced } https_health_check { port = 443 @@ -32,7 +32,7 @@ resource "google_compute_region_health_check" "platform_cluster_mig_health_check } resource "google_compute_region_backend_service" "platform_cluster_mig_backends" { - for_each = var.instances.migs + for_each = { for k, v in var.instances.migs : k => v if v.loadbalanced } backend { group = google_compute_region_instance_group_manager.platform_cluster_mig_managers[each.key].instance_group @@ -47,7 +47,7 @@ resource "google_compute_region_backend_service" "platform_cluster_mig_backends" } resource "google_compute_forwarding_rule" "platform_cluster_mig_forwarding_rules" { - for_each = var.instances.migs + for_each = { for k, v in var.instances.migs : k => v if v.loadbalanced } all_ports = true backend_service = google_compute_region_backend_service.platform_cluster_mig_backends[each.key].id @@ -59,7 +59,7 @@ resource "google_compute_forwarding_rule" "platform_cluster_mig_forwarding_rules } resource "google_compute_forwarding_rule" "platform_cluster_mig_forwarding_rules_v6" { - for_each = var.instances.migs + for_each = { for k, v in var.instances.migs : k => v if v.loadbalanced } all_ports = true backend_service = google_compute_region_backend_service.platform_cluster_mig_backends[each.key].id From 00712b15c21aaed77eecdd00ce48c6afccce3ed5 Mon Sep 17 00:00:00 2001 From: Nathan Kinkade Date: Tue, 26 Nov 2024 11:12:06 -0700 Subject: [PATCH 3/4] Adds loadbalanced=true to all MIGs in staging and production This is just to prepopulate the value explicitly, and in case Terraform would exit with an error if we are trying to check the value of a non-existent field. --- mlab-oti/platform-cluster.tf | 117 ++++++++++++++++++++----------- mlab-sandbox/platform-cluster.tf | 10 +-- mlab-staging/platform-cluster.tf | 3 +- 3 files changed, 85 insertions(+), 45 deletions(-) diff --git a/mlab-oti/platform-cluster.tf b/mlab-oti/platform-cluster.tf index c63db8e..825ef42 100644 --- a/mlab-oti/platform-cluster.tf +++ b/mlab-oti/platform-cluster.tf @@ -20,127 +20,166 @@ module "platform-cluster" { } migs = { mlab1-ams11 = { - region = "europe-west4" + region = "europe-west4" + loadbalanced = true }, mlab1-ber02 = { # We cannot currently get any N2 quota in this region. - machine_type = "e2-highcpu-4" region = "europe-west10" + machine_type = "e2-highcpu-4" + loadbalanced = true }, mlab1-bom06 = { - region = "asia-south1" + region = "asia-south1" + loadbalanced = true }, mlab1-bru07 = { - region = "europe-west1" + region = "europe-west1" + loadbalanced = true }, mlab1-cgk02 = { - region = "asia-southeast2" + region = "asia-southeast2" + loadbalanced = true }, mlab1-chs02 = { - region = "us-east1" + region = "us-east1" + loadbalanced = true }, mlab1-cmh02 = { - region = "us-east5" + region = "us-east5" + loadbalanced = true }, mlab1-del05 = { - region = "asia-south2" + region = "asia-south2" + loadbalanced = true }, mlab1-dfw12 = { - region = "us-south1" + region = "us-south1" + loadbalanced = true }, mlab1-doh02 = { # This region is new and we can't currently get any N2 quota. - machine_type = "e2-highcpu-4" region = "me-central1" + machine_type = "e2-highcpu-4" + loadbalanced = true }, mlab1-fra08 = { - region = "europe-west3" + region = "europe-west3" + loadbalanced = true }, mlab1-gru06 = { - region = "southamerica-east1" + region = "southamerica-east1" + loadbalanced = true }, mlab1-hel02 = { - region = "europe-north1" + region = "europe-north1" + loadbalanced = true }, mlab1-hkg05 = { - region = "asia-east2" + region = "asia-east2" + loadbalanced = true }, mlab1-hnd07 = { - region = "asia-northeast1" + region = "asia-northeast1" + loadbalanced = true }, mlab1-iad09 = { - region = "us-east4" + region = "us-east4" + loadbalanced = true }, mlab1-icn02 = { - region = "asia-northeast3" + region = "asia-northeast3" + loadbalanced = true }, mlab1-jnb02 = { # This region is new and we can't currently get any N2 quota. - machine_type = "e2-highcpu-4" region = "africa-south1" + machine_type = "e2-highcpu-4" + loadbalanced = true }, mlab1-kix02 = { - region = "asia-northeast2" + region = "asia-northeast2" + loadbalanced = true }, mlab1-las02 = { - region = "us-west4" + region = "us-west4" + loadbalanced = true }, mlab1-lax10 = { - region = "us-west2" + region = "us-west2" + loadbalanced = true }, mlab1-lhr10 = { - region = "europe-west2" + region = "europe-west2" + loadbalanced = true }, mlab1-mad08 = { - region = "europe-southwest1" + region = "europe-southwest1" + loadbalanced = true }, mlab1-mel02 = { - region = "australia-southeast2" + region = "australia-southeast2" + loadbalanced = true }, mlab1-mil09 = { - region = "europe-west8" + region = "europe-west8" + loadbalanced = true }, mlab1-oma02 = { - region = "us-central1" + region = "us-central1" + loadbalanced = true }, mlab1-par09 = { - region = "europe-west9" + region = "europe-west9" + loadbalanced = true }, mlab1-pdx03 = { - region = "us-west1" + region = "us-west1" + loadbalanced = true }, mlab1-scl06 = { - region = "southamerica-west1" + region = "southamerica-west1" + loadbalanced = true }, mlab1-sin03 = { - region = "asia-southeast1" + region = "asia-southeast1" + loadbalanced = true }, mlab1-slc02 = { - region = "us-west3" + region = "us-west3" + loadbalanced = true }, mlab1-syd08 = { - region = "australia-southeast1" + region = "australia-southeast1" + loadbalanced = true }, mlab1-tlv02 = { - region = "me-west1" + region = "me-west1" + loadbalanced = true }, mlab1-tpe03 = { - region = "asia-east1" + region = "asia-east1" + loadbalanced = true }, mlab1-trn04 = { - region = "europe-west12" + region = "europe-west12" + loadbalanced = true }, mlab1-waw02 = { - region = "europe-central2" + region = "europe-central2" + loadbalanced = true }, mlab1-yul08 = { - region = "northamerica-northeast1" + region = "northamerica-northeast1" + loadbalanced = true }, mlab1-yyz08 = { - region = "northamerica-northeast2" + region = "northamerica-northeast2" + loadbalanced = true } mlab1-zrh02 = { - region = "europe-west6" + region = "europe-west6" + loadbalanced = true } } vms = { diff --git a/mlab-sandbox/platform-cluster.tf b/mlab-sandbox/platform-cluster.tf index 30ae0e9..1cabd5f 100644 --- a/mlab-sandbox/platform-cluster.tf +++ b/mlab-sandbox/platform-cluster.tf @@ -20,18 +20,18 @@ module "platform-cluster" { }, migs = { mlab1-chs0t = { - daemonset = "ndt-autojoin" - loadbalanced = false region = "us-east1" + loadbalanced = false + daemonset = "ndt-autojoin" }, mlab1-lax0t = { - daemonset = "ndt-canary" - loadbalanced = true region = "us-west2" + loadbalanced = true + daemonset = "ndt-canary" }, mlab1-pdx0t = { - loadbalanced = true region = "us-west1" + loadbalanced = true } }, vms = { diff --git a/mlab-staging/platform-cluster.tf b/mlab-staging/platform-cluster.tf index 2965b4f..6937194 100644 --- a/mlab-staging/platform-cluster.tf +++ b/mlab-staging/platform-cluster.tf @@ -20,7 +20,8 @@ module "platform-cluster" { }, migs = { mlab4-dfw13 = { - region = "us-south1" + region = "us-south1" + loadbalanced = true } }, vms = { From 75849d8265e80ccfb5a5b024a3d2f4966a85fa4d Mon Sep 17 00:00:00 2001 From: Nathan Kinkade Date: Tue, 26 Nov 2024 17:27:16 -0700 Subject: [PATCH 4/4] Defines default probability of 1.0 and sets VMs that differ VMs definitions that don't have a probability set explicitly will get the new default probability of 1.0. --- mlab-oti/platform-cluster.tf | 27 +++++++++++++++++++++- mlab-sandbox/platform-cluster.tf | 7 +++--- mlab-staging/platform-cluster.tf | 1 + modules/platform-cluster/instancegroups.tf | 1 + modules/platform-cluster/instances.tf | 5 ++-- modules/platform-cluster/variables.tf | 1 + 6 files changed, 36 insertions(+), 6 deletions(-) diff --git a/mlab-oti/platform-cluster.tf b/mlab-oti/platform-cluster.tf index 825ef42..1961318 100644 --- a/mlab-oti/platform-cluster.tf +++ b/mlab-oti/platform-cluster.tf @@ -15,6 +15,7 @@ module "platform-cluster" { mig_min_replicas = 1 mig_max_replicas = 5 network_tier = "PREMIUM" + probability = 1.0 tags = ["ndt-cloud"] scopes = ["cloud-platform"] } @@ -22,6 +23,7 @@ module "platform-cluster" { mlab1-ams11 = { region = "europe-west4" loadbalanced = true + probability = 0.5 }, mlab1-ber02 = { # We cannot currently get any N2 quota in this region. @@ -32,10 +34,12 @@ module "platform-cluster" { mlab1-bom06 = { region = "asia-south1" loadbalanced = true + probability = 0.5 }, mlab1-bru07 = { region = "europe-west1" loadbalanced = true + probability = 0.5 }, mlab1-cgk02 = { region = "asia-southeast2" @@ -52,10 +56,12 @@ module "platform-cluster" { mlab1-del05 = { region = "asia-south2" loadbalanced = true + probability = 0.5 }, mlab1-dfw12 = { region = "us-south1" loadbalanced = true + probability = 0.5 }, mlab1-doh02 = { # This region is new and we can't currently get any N2 quota. @@ -66,10 +72,12 @@ module "platform-cluster" { mlab1-fra08 = { region = "europe-west3" loadbalanced = true + probability = 0.5 }, mlab1-gru06 = { region = "southamerica-east1" loadbalanced = true + probability = 0.5 }, mlab1-hel02 = { region = "europe-north1" @@ -78,14 +86,17 @@ module "platform-cluster" { mlab1-hkg05 = { region = "asia-east2" loadbalanced = true + probability = 0.5 }, mlab1-hnd07 = { region = "asia-northeast1" loadbalanced = true + probability = 0.5 }, mlab1-iad09 = { region = "us-east4" loadbalanced = true + probability = 0.5 }, mlab1-icn02 = { region = "asia-northeast3" @@ -96,6 +107,7 @@ module "platform-cluster" { region = "africa-south1" machine_type = "e2-highcpu-4" loadbalanced = true + probability = 0.5 }, mlab1-kix02 = { region = "asia-northeast2" @@ -108,14 +120,17 @@ module "platform-cluster" { mlab1-lax10 = { region = "us-west2" loadbalanced = true + probability = 0.5 }, mlab1-lhr10 = { region = "europe-west2" loadbalanced = true + probability = 0.5 }, mlab1-mad08 = { region = "europe-southwest1" loadbalanced = true + probability = 0.5 }, mlab1-mel02 = { region = "australia-southeast2" @@ -124,6 +139,7 @@ module "platform-cluster" { mlab1-mil09 = { region = "europe-west8" loadbalanced = true + probability = 0.5 }, mlab1-oma02 = { region = "us-central1" @@ -132,6 +148,7 @@ module "platform-cluster" { mlab1-par09 = { region = "europe-west9" loadbalanced = true + probability = 0.5 }, mlab1-pdx03 = { region = "us-west1" @@ -140,10 +157,12 @@ module "platform-cluster" { mlab1-scl06 = { region = "southamerica-west1" loadbalanced = true + probability = 0.5 }, mlab1-sin03 = { region = "asia-southeast1" loadbalanced = true + probability = 0.5 }, mlab1-slc02 = { region = "us-west3" @@ -152,6 +171,7 @@ module "platform-cluster" { mlab1-syd08 = { region = "australia-southeast1" loadbalanced = true + probability = 0.5 }, mlab1-tlv02 = { region = "me-west1" @@ -160,10 +180,12 @@ module "platform-cluster" { mlab1-tpe03 = { region = "asia-east1" loadbalanced = true + probability = 0.5 }, mlab1-trn04 = { region = "europe-west12" loadbalanced = true + probability = 0.5 }, mlab1-waw02 = { region = "europe-central2" @@ -172,10 +194,12 @@ module "platform-cluster" { mlab1-yul08 = { region = "northamerica-northeast1" loadbalanced = true + probability = 0.5 }, mlab1-yyz08 = { region = "northamerica-northeast2" loadbalanced = true + probability = 0.5 } mlab1-zrh02 = { region = "europe-west6" @@ -184,7 +208,8 @@ module "platform-cluster" { } vms = { mlab1-par08 = { - zone = "europe-west9-c" + zone = "europe-west9-c" + probability = 0.5 }, mlab1-pdx01 = { zone = "us-west1-c" diff --git a/mlab-sandbox/platform-cluster.tf b/mlab-sandbox/platform-cluster.tf index 1cabd5f..5b5fe9d 100644 --- a/mlab-sandbox/platform-cluster.tf +++ b/mlab-sandbox/platform-cluster.tf @@ -8,13 +8,14 @@ module "platform-cluster" { instances = { attributes = { daemonset = "ndt" - disk_image = "platform-cluster-instance-2024-11-21t01-13-45" + disk_image = "platform-cluster-instance-2024-11-27t00-16-26" disk_size_gb = 100 disk_type = "pd-ssd" machine_type = "n2-highcpu-4" mig_min_replicas = 1 mig_max_replicas = 2 network_tier = "PREMIUM" + probability = 1.0 tags = ["ndt-cloud"] scopes = ["cloud-platform"] }, @@ -43,7 +44,7 @@ module "platform-cluster" { api_instances = { machine_attributes = { - disk_image = "platform-cluster-api-instance-2024-09-12t22-10-23" + disk_image = "platform-cluster-api-instance-2024-11-27t00-16-26" disk_size_gb_boot = 100 disk_size_gb_data = 10 # This will show up as /dev/disk/by-id/google- @@ -77,7 +78,7 @@ module "platform-cluster" { } prometheus_instance = { - disk_image = "platform-cluster-internal-instance-2024-09-12t22-10-23" + disk_image = "platform-cluster-internal-instance-2024-11-27t00-16-26" disk_size_gb_boot = 100 disk_size_gb_data = 200 disk_type = "pd-ssd" diff --git a/mlab-staging/platform-cluster.tf b/mlab-staging/platform-cluster.tf index 6937194..b25bba8 100644 --- a/mlab-staging/platform-cluster.tf +++ b/mlab-staging/platform-cluster.tf @@ -15,6 +15,7 @@ module "platform-cluster" { mig_min_replicas = 1 mig_max_replicas = 3 network_tier = "PREMIUM" + probability = 1.0 tags = ["ndt-cloud"] scopes = ["cloud-platform"] }, diff --git a/modules/platform-cluster/instancegroups.tf b/modules/platform-cluster/instancegroups.tf index a78821c..f7a1e57 100644 --- a/modules/platform-cluster/instancegroups.tf +++ b/modules/platform-cluster/instancegroups.tf @@ -31,6 +31,7 @@ resource "google_compute_instance_template" "platform_cluster_mig_templates" { ]) k8s_node = "${each.key}.${data.google_client_config.current.project}.measurement-lab.org" loadbalanced = each.value.loadbalanced + probability = lookup(each.value, "probability", var.instances.attributes.probability) } name_prefix = "platform-cluster-mig-template-" diff --git a/modules/platform-cluster/instances.tf b/modules/platform-cluster/instances.tf index 2cd1c9b..2edd51e 100644 --- a/modules/platform-cluster/instances.tf +++ b/modules/platform-cluster/instances.tf @@ -121,7 +121,8 @@ resource "google_compute_instance" "platform_instances" { "mlab/site=${split("-", each.key)[1]}", "mlab/type=virtual" ]) - k8s_node = "${each.key}.${data.google_client_config.current.project}.measurement-lab.org" + k8s_node = "${each.key}.${data.google_client_config.current.project}.measurement-lab.org" + probability = lookup(each.value, "probability", var.instances.attributes.probability) } name = "${each.key}-${data.google_client_config.current.project}-measurement-lab-org" @@ -134,7 +135,7 @@ resource "google_compute_instance" "platform_instances" { } ipv6_access_config { - external_ipv6 = google_compute_address.platform_addresses_v6["${each.key}"].address + external_ipv6 = google_compute_address.platform_addresses_v6["${each.key}"].address external_ipv6_prefix_length = 96 # From what I gather STANDARD network tier is not available for IPv6. # https://cloud.google.com/network-tiers/docs/overview#resources diff --git a/modules/platform-cluster/variables.tf b/modules/platform-cluster/variables.tf index 251b353..eb52752 100644 --- a/modules/platform-cluster/variables.tf +++ b/modules/platform-cluster/variables.tf @@ -10,6 +10,7 @@ variable "instances" { mig_min_replicas = number mig_max_replicas = number network_tier = string + probability = number tags = list(string) scopes = list(string) })