From 5cb7fb701e0acf343cc8b2c12c42b2b81f53b028 Mon Sep 17 00:00:00 2001 From: smerle33 Date: Thu, 19 Dec 2024 16:47:30 +0100 Subject: [PATCH 01/15] feat(eks/cijenkinsio-agents-2): add toleration and taint for our system/application nodepool --- eks-cijenkinsio-agents-2.tf | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/eks-cijenkinsio-agents-2.tf b/eks-cijenkinsio-agents-2.tf index b010d8a..6f7a1e5 100644 --- a/eks-cijenkinsio-agents-2.tf +++ b/eks-cijenkinsio-agents-2.tf @@ -84,21 +84,61 @@ module "cijenkinsio-agents-2" { # https://docs.aws.amazon.com/cli/latest/reference/eks/describe-addon-versions.html coredns = { addon_version = "v1.11.3-eksbuild.2" + configuration_values = jsonencode({ + "tolerations" : [ + { + "effect" : "NoSchedule", + "key" : "ci.jenkins.io/applications", + "operator" : "Equal", + "value" : "true" + } + ] + }) } # Kube-proxy on an Amazon EKS cluster has the same compatibility and skew policy as Kubernetes # See https://kubernetes.io/releases/version-skew-policy/#kube-proxy kube-proxy = { # https://docs.aws.amazon.com/cli/latest/reference/eks/describe-addon-versions.html addon_version = "v1.29.10-eksbuild.3" + configuration_values = jsonencode({ + "tolerations" : [ + { + "effect" : "NoSchedule", + "key" : "ci.jenkins.io/applications", + "operator" : "Equal", + "value" : "true" + } + ] + }) } # https://github.com/aws/amazon-vpc-cni-k8s/releases vpc-cni = { # https://docs.aws.amazon.com/cli/latest/reference/eks/describe-addon-versions.html addon_version = "v1.19.0-eksbuild.1" + configuration_values = jsonencode({ + "tolerations" : [ + { + "effect" : "NoSchedule", + "key" : "ci.jenkins.io/applications", + "operator" : "Equal", + "value" : "true" + } + ] + }) } eks-pod-identity-agent = { # https://docs.aws.amazon.com/cli/latest/reference/eks/describe-addon-versions.html addon_version = "v1.3.4-eksbuild.1" + configuration_values = jsonencode({ + "tolerations" : [ + { + "effect" : "NoSchedule", + "key" : "ci.jenkins.io/applications", + "operator" : "Equal", + "value" : "true" + } + ] + }) } ## https://github.com/kubernetes-sigs/aws-ebs-csi-driver/blob/master/CHANGELOG.md # aws-ebs-csi-driver = { @@ -122,6 +162,18 @@ module "cijenkinsio-agents-2" { max_size = 3 desired_size = 1 + labels = { + jenkins = "ci.jenkins.io" + role = "applications" + } + taints = { + applications = { + key = "ci.jenkins.io/applications" + value = "true" + effect = "NO_SCHEDULE" + } + } + subnet_ids = slice(module.vpc.private_subnets, 1, 2) # Only 1 subnet in 1 AZ }, } From 2eae2a4f5a58e38b0a85befd2df1b30e68171d32 Mon Sep 17 00:00:00 2001 From: smerle33 Date: Thu, 19 Dec 2024 18:10:23 +0100 Subject: [PATCH 02/15] feat(cijenkinsio-agent-2) add irsa for autoscaler --- eks-cijenkinsio-agents-2.tf | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/eks-cijenkinsio-agents-2.tf b/eks-cijenkinsio-agents-2.tf index 6f7a1e5..cb3ffe7 100644 --- a/eks-cijenkinsio-agents-2.tf +++ b/eks-cijenkinsio-agents-2.tf @@ -201,6 +201,26 @@ module "cijenkinsio-agents-2" { } } +module "autoscaler_irsa_role" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + # TODO track with updatecli + version = "5.48.0" + + role_name = "${module.cijenkinsio-agents-2.cluster_name}-cluster-autoscaler" + attach_cluster_autoscaler_policy = true + + cluster_autoscaler_cluster_names = [module.cijenkinsio-agents-2.cluster_name] + + oidc_providers = { + main = { + provider_arn = module.cijenkinsio-agents-2.oidc_provider_arn + namespace_service_accounts = ["${local.autoscaler_account_namespace}:${local.autoscaler_account_name}"] + } + } + + tags = local.common_tags +} + # Configure the jenkins-infra/kubernetes-management admin service account data "aws_eks_cluster_auth" "cijenkinsio-agents-2" { name = module.cijenkinsio-agents-2.cluster_name From 1d787f053a7c66a4758f98eb3c39dd525e363af6 Mon Sep 17 00:00:00 2001 From: smerle33 Date: Thu, 19 Dec 2024 19:07:21 +0100 Subject: [PATCH 03/15] add helm provider and start autoscaler provisionning --- .terraform.lock.hcl | 19 +++++++++++++++++++ eks-cijenkinsio-agents-2.tf | 24 +++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/.terraform.lock.hcl b/.terraform.lock.hcl index cc01595..be3f293 100644 --- a/.terraform.lock.hcl +++ b/.terraform.lock.hcl @@ -47,6 +47,25 @@ provider "registry.terraform.io/hashicorp/cloudinit" { ] } +provider "registry.terraform.io/hashicorp/helm" { + version = "2.16.1" + hashes = [ + "h1:3VdXbh+m09VEAdSguT7Ea2MMnOVPZBYx4sUqvo6NPxo=", + "zh:0003f6719a32aee9afaeeb001687fc0cfc8c2d5f54861298cf1dc5711f3b4e65", + "zh:16cd5bfee09e7bb081b8b4470f31a9af508e52220fd97fd81c6dda725d9422fe", + "zh:51817de8fdc2c2e36785f23fbf4ec022111bd1cf7679498c16ad0ad7471c16db", + "zh:51b95829b2873be40a65809294bffe349e40cfccc3ff6fee0f471d01770e0ebd", + "zh:56b158dde897c47e1460181fc472c3e920aa23db40579fdc2aad333c1456d2dd", + "zh:916641d26c386959eb982e680028aa677b787687ef7c1283241e45620bc8df50", + "zh:aec15ca8605babba77b283f2ca35daca53e006d567e1c3a3daf50497035b820b", + "zh:c2cecf710b87c8f3a4d186da2ea12cf08041f97ae0c6db82649720d6ed929d65", + "zh:dbdd96f17aea25c7db2d516ab8172a5e683c6686c72a1a44173d2fe96319be39", + "zh:de11e180368434a796b1ab6f20fde7554dc74f7800e063b8e4c8ec3a86d0be63", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:f827a9c1540d210c56053a2d5d5a6abda924896ffa8eeedc94054cf6d44c5f60", + ] +} + provider "registry.terraform.io/hashicorp/kubernetes" { version = "2.35.0" hashes = [ diff --git a/eks-cijenkinsio-agents-2.tf b/eks-cijenkinsio-agents-2.tf index cb3ffe7..310d76f 100644 --- a/eks-cijenkinsio-agents-2.tf +++ b/eks-cijenkinsio-agents-2.tf @@ -202,7 +202,7 @@ module "cijenkinsio-agents-2" { } module "autoscaler_irsa_role" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" # TODO track with updatecli version = "5.48.0" @@ -233,6 +233,28 @@ provider "kubernetes" { token = data.aws_eks_cluster_auth.cijenkinsio-agents-2.token } +provider "helm" { + alias = "cijenkinsio-agents-2" + kubernetes { + host = module.cijenkinsio-agents-2.cluster_endpoint + token = data.aws_eks_cluster_auth.cijenkinsio-agents-2.token + cluster_ca_certificate = base64decode(module.cijenkinsio-agents-2.cluster_certificate_authority_data) + } +} + +resource "helm_release" "cluster-autoscaler" { + name = "cluster_autoscaler" + repository = "https://kubernetes.github.io/autoscaler" + chart = "cluster-autoscaler" + version = "9.43.2" + + values = templatefile("./helm/cluster-autoscaler-values.yaml.tfpl", { + region = local.region, + serviceAccountName = local.autoscaler_account_name, + autoscalerRoleArn = module.autoscaler_irsa_role.iam_role_arn, + }) +} + module "cijenkinsio-agents-2_admin_sa" { providers = { kubernetes = kubernetes.cijenkinsio-agents-2 From 1ced38f6460e6a1dab106c2486290c77670b954a Mon Sep 17 00:00:00 2001 From: smerle33 Date: Thu, 19 Dec 2024 19:09:49 +0100 Subject: [PATCH 04/15] helm template file for autoscaler --- helm/cluster-autoscaler-values.yaml.tfpl | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 helm/cluster-autoscaler-values.yaml.tfpl diff --git a/helm/cluster-autoscaler-values.yaml.tfpl b/helm/cluster-autoscaler-values.yaml.tfpl new file mode 100644 index 0000000..601faaf --- /dev/null +++ b/helm/cluster-autoscaler-values.yaml.tfpl @@ -0,0 +1,19 @@ +--- +awsRegion: ${region} + +nodeSelector: + role: applications + +extraArgs: + balance-similar-node-groups: true +replicaCount: 2 + +rbac: + create: true + serviceAccount: + name: ${serviceAccountName} + annotations: + eks.amazonaws.com/role-arn: ${autoscalerRoleArn} # todo check it match this kind of string "arn:aws:iam::200564066411:role/cluster-autoscaler-aws-cluster-autoscaler-chart-eks" + +autoDiscovery: + enabled: true From 6c34099e3109a1a003854bf2d95e90af99dc44ca Mon Sep 17 00:00:00 2001 From: smerle33 Date: Fri, 20 Dec 2024 09:46:52 +0100 Subject: [PATCH 05/15] templating --- eks-cijenkinsio-agents-2.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eks-cijenkinsio-agents-2.tf b/eks-cijenkinsio-agents-2.tf index 310d76f..725867f 100644 --- a/eks-cijenkinsio-agents-2.tf +++ b/eks-cijenkinsio-agents-2.tf @@ -248,11 +248,11 @@ resource "helm_release" "cluster-autoscaler" { chart = "cluster-autoscaler" version = "9.43.2" - values = templatefile("./helm/cluster-autoscaler-values.yaml.tfpl", { + values = [templatefile("./helm/cluster-autoscaler-values.yaml.tfpl", { region = local.region, serviceAccountName = local.autoscaler_account_name, autoscalerRoleArn = module.autoscaler_irsa_role.iam_role_arn, - }) + })] } module "cijenkinsio-agents-2_admin_sa" { From d00a947a8a53d76877c0a4dc3811272981763812 Mon Sep 17 00:00:00 2001 From: smerle33 Date: Thu, 19 Dec 2024 18:10:23 +0100 Subject: [PATCH 06/15] feat(cijenkinsio-agent-2) add irsa for autoscaler --- eks-cijenkinsio-agents-2.tf | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/eks-cijenkinsio-agents-2.tf b/eks-cijenkinsio-agents-2.tf index b010d8a..4f95a85 100644 --- a/eks-cijenkinsio-agents-2.tf +++ b/eks-cijenkinsio-agents-2.tf @@ -149,6 +149,26 @@ module "cijenkinsio-agents-2" { } } +module "autoscaler_irsa_role" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + # TODO track with updatecli + version = "5.48.0" + + role_name = "${module.cijenkinsio-agents-2.cluster_name}-cluster-autoscaler" + attach_cluster_autoscaler_policy = true + + cluster_autoscaler_cluster_names = [module.cijenkinsio-agents-2.cluster_name] + + oidc_providers = { + main = { + provider_arn = module.cijenkinsio-agents-2.oidc_provider_arn + namespace_service_accounts = ["${local.autoscaler_account_namespace}:${local.autoscaler_account_name}"] + } + } + + tags = local.common_tags +} + # Configure the jenkins-infra/kubernetes-management admin service account data "aws_eks_cluster_auth" "cijenkinsio-agents-2" { name = module.cijenkinsio-agents-2.cluster_name From 38927ca811a51957775b727df1f2f86ae7def2e4 Mon Sep 17 00:00:00 2001 From: smerle33 Date: Thu, 19 Dec 2024 19:07:21 +0100 Subject: [PATCH 07/15] add helm provider and start autoscaler provisionning --- .terraform.lock.hcl | 19 +++++++++++++++++++ eks-cijenkinsio-agents-2.tf | 24 +++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/.terraform.lock.hcl b/.terraform.lock.hcl index cc01595..be3f293 100644 --- a/.terraform.lock.hcl +++ b/.terraform.lock.hcl @@ -47,6 +47,25 @@ provider "registry.terraform.io/hashicorp/cloudinit" { ] } +provider "registry.terraform.io/hashicorp/helm" { + version = "2.16.1" + hashes = [ + "h1:3VdXbh+m09VEAdSguT7Ea2MMnOVPZBYx4sUqvo6NPxo=", + "zh:0003f6719a32aee9afaeeb001687fc0cfc8c2d5f54861298cf1dc5711f3b4e65", + "zh:16cd5bfee09e7bb081b8b4470f31a9af508e52220fd97fd81c6dda725d9422fe", + "zh:51817de8fdc2c2e36785f23fbf4ec022111bd1cf7679498c16ad0ad7471c16db", + "zh:51b95829b2873be40a65809294bffe349e40cfccc3ff6fee0f471d01770e0ebd", + "zh:56b158dde897c47e1460181fc472c3e920aa23db40579fdc2aad333c1456d2dd", + "zh:916641d26c386959eb982e680028aa677b787687ef7c1283241e45620bc8df50", + "zh:aec15ca8605babba77b283f2ca35daca53e006d567e1c3a3daf50497035b820b", + "zh:c2cecf710b87c8f3a4d186da2ea12cf08041f97ae0c6db82649720d6ed929d65", + "zh:dbdd96f17aea25c7db2d516ab8172a5e683c6686c72a1a44173d2fe96319be39", + "zh:de11e180368434a796b1ab6f20fde7554dc74f7800e063b8e4c8ec3a86d0be63", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:f827a9c1540d210c56053a2d5d5a6abda924896ffa8eeedc94054cf6d44c5f60", + ] +} + provider "registry.terraform.io/hashicorp/kubernetes" { version = "2.35.0" hashes = [ diff --git a/eks-cijenkinsio-agents-2.tf b/eks-cijenkinsio-agents-2.tf index 4f95a85..e9c86d0 100644 --- a/eks-cijenkinsio-agents-2.tf +++ b/eks-cijenkinsio-agents-2.tf @@ -150,7 +150,7 @@ module "cijenkinsio-agents-2" { } module "autoscaler_irsa_role" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" # TODO track with updatecli version = "5.48.0" @@ -181,6 +181,28 @@ provider "kubernetes" { token = data.aws_eks_cluster_auth.cijenkinsio-agents-2.token } +provider "helm" { + alias = "cijenkinsio-agents-2" + kubernetes { + host = module.cijenkinsio-agents-2.cluster_endpoint + token = data.aws_eks_cluster_auth.cijenkinsio-agents-2.token + cluster_ca_certificate = base64decode(module.cijenkinsio-agents-2.cluster_certificate_authority_data) + } +} + +resource "helm_release" "cluster-autoscaler" { + name = "cluster_autoscaler" + repository = "https://kubernetes.github.io/autoscaler" + chart = "cluster-autoscaler" + version = "9.43.2" + + values = templatefile("./helm/cluster-autoscaler-values.yaml.tfpl", { + region = local.region, + serviceAccountName = local.autoscaler_account_name, + autoscalerRoleArn = module.autoscaler_irsa_role.iam_role_arn, + }) +} + module "cijenkinsio-agents-2_admin_sa" { providers = { kubernetes = kubernetes.cijenkinsio-agents-2 From 773c72567995cd6c2568e5f25e21648ce7d2cfa7 Mon Sep 17 00:00:00 2001 From: smerle33 Date: Thu, 19 Dec 2024 19:09:49 +0100 Subject: [PATCH 08/15] helm template file for autoscaler --- helm/cluster-autoscaler-values.yaml.tfpl | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 helm/cluster-autoscaler-values.yaml.tfpl diff --git a/helm/cluster-autoscaler-values.yaml.tfpl b/helm/cluster-autoscaler-values.yaml.tfpl new file mode 100644 index 0000000..601faaf --- /dev/null +++ b/helm/cluster-autoscaler-values.yaml.tfpl @@ -0,0 +1,19 @@ +--- +awsRegion: ${region} + +nodeSelector: + role: applications + +extraArgs: + balance-similar-node-groups: true +replicaCount: 2 + +rbac: + create: true + serviceAccount: + name: ${serviceAccountName} + annotations: + eks.amazonaws.com/role-arn: ${autoscalerRoleArn} # todo check it match this kind of string "arn:aws:iam::200564066411:role/cluster-autoscaler-aws-cluster-autoscaler-chart-eks" + +autoDiscovery: + enabled: true From 457bc57b9e284903f3320e7fc8c65d68e8d94138 Mon Sep 17 00:00:00 2001 From: smerle33 Date: Fri, 20 Dec 2024 09:46:52 +0100 Subject: [PATCH 09/15] templating --- eks-cijenkinsio-agents-2.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eks-cijenkinsio-agents-2.tf b/eks-cijenkinsio-agents-2.tf index e9c86d0..fc6f5e3 100644 --- a/eks-cijenkinsio-agents-2.tf +++ b/eks-cijenkinsio-agents-2.tf @@ -196,11 +196,11 @@ resource "helm_release" "cluster-autoscaler" { chart = "cluster-autoscaler" version = "9.43.2" - values = templatefile("./helm/cluster-autoscaler-values.yaml.tfpl", { + values = [templatefile("./helm/cluster-autoscaler-values.yaml.tfpl", { region = local.region, serviceAccountName = local.autoscaler_account_name, autoscalerRoleArn = module.autoscaler_irsa_role.iam_role_arn, - }) + })] } module "cijenkinsio-agents-2_admin_sa" { From a0cce35f4d7487ad1a5adee33f7a2856801db076 Mon Sep 17 00:00:00 2001 From: Damien Duportal Date: Fri, 20 Dec 2024 12:47:46 +0100 Subject: [PATCH 10/15] fixup Signed-off-by: Damien Duportal --- eks-cijenkinsio-agents-2.tf | 49 +++++++++++++++++------- helm/cluster-autoscaler-values.yaml.tfpl | 17 ++++++-- locals.tf | 3 +- 3 files changed, 51 insertions(+), 18 deletions(-) diff --git a/eks-cijenkinsio-agents-2.tf b/eks-cijenkinsio-agents-2.tf index fc6f5e3..d3a4aaf 100644 --- a/eks-cijenkinsio-agents-2.tf +++ b/eks-cijenkinsio-agents-2.tf @@ -111,7 +111,7 @@ module "cijenkinsio-agents-2" { eks_managed_node_groups = { tiny_ondemand_linux = { - # This worker pool is expected to host the "technical" services such as pod autoscaler, etc. + # This worker pool is expected to host the "technical" services such as cluster-autoscaler, data cluster-agent, ACP, etc. name = "tiny-ondemand-linux" instance_types = ["t4g.large"] # 2vcpu 8Gio @@ -122,7 +122,19 @@ module "cijenkinsio-agents-2" { max_size = 3 desired_size = 1 - subnet_ids = slice(module.vpc.private_subnets, 1, 2) # Only 1 subnet in 1 AZ + subnet_ids = slice(module.vpc.private_subnets, 1, 2) # Only 1 subnet in 1 AZ (for EBS) + + labels = { + jenkins = local.ci_jenkins_io_service_fqdn + role = "applications" + } + taints = { + applications = { + key = "${local.ci_jenkins_io_fqdn}/applications" + value = "true" + effect = "NO_SCHEDULE" + } + } }, } @@ -169,20 +181,19 @@ module "autoscaler_irsa_role" { tags = local.common_tags } -# Configure the jenkins-infra/kubernetes-management admin service account +### Define custom providers associated to this cluster (could be in providers.tf as alternative) data "aws_eks_cluster_auth" "cijenkinsio-agents-2" { name = module.cijenkinsio-agents-2.cluster_name } - provider "kubernetes" { alias = "cijenkinsio-agents-2" host = module.cijenkinsio-agents-2.cluster_endpoint cluster_ca_certificate = base64decode(module.cijenkinsio-agents-2.cluster_certificate_authority_data) token = data.aws_eks_cluster_auth.cijenkinsio-agents-2.token } - provider "helm" { alias = "cijenkinsio-agents-2" + kubernetes { host = module.cijenkinsio-agents-2.cluster_endpoint token = data.aws_eks_cluster_auth.cijenkinsio-agents-2.token @@ -190,19 +201,30 @@ provider "helm" { } } +### Install Cluster Autoscaler resource "helm_release" "cluster-autoscaler" { - name = "cluster_autoscaler" + provider = helm.cijenkinsio-agents-2 + name = "cluster-autoscaler" repository = "https://kubernetes.github.io/autoscaler" chart = "cluster-autoscaler" - version = "9.43.2" - - values = [templatefile("./helm/cluster-autoscaler-values.yaml.tfpl", { - region = local.region, - serviceAccountName = local.autoscaler_account_name, - autoscalerRoleArn = module.autoscaler_irsa_role.iam_role_arn, - })] + # TODO: track with updatecli + version = "9.43.2" + create_namespace = true + namespace = local.autoscaler_account_namespace + + values = [ + templatefile("./helm/cluster-autoscaler-values.yaml.tfpl", { + region = local.region, + serviceAccountName = local.autoscaler_account_name, + autoscalerRoleArn = module.autoscaler_irsa_role.iam_role_arn, + clusterName = module.cijenkinsio-agents-2.cluster_name, + nodeSelectors = module.cijenkinsio-agents-2.eks_managed_node_groups["tiny_ondemand_linux"].node_group_labels, + nodeTolerations = module.cijenkinsio-agents-2.eks_managed_node_groups["tiny_ondemand_linux"].node_group_taints, + }) + ] } +### Define admin credential to be used in jenkins-infra/kubernetes-management module "cijenkinsio-agents-2_admin_sa" { providers = { kubernetes = kubernetes.cijenkinsio-agents-2 @@ -212,7 +234,6 @@ module "cijenkinsio-agents-2_admin_sa" { cluster_hostname = module.cijenkinsio-agents-2.cluster_endpoint cluster_ca_certificate_b64 = module.cijenkinsio-agents-2.cluster_certificate_authority_data } - output "kubeconfig_cijenkinsio-agents-2" { sensitive = true value = module.cijenkinsio-agents-2_admin_sa.kubeconfig diff --git a/helm/cluster-autoscaler-values.yaml.tfpl b/helm/cluster-autoscaler-values.yaml.tfpl index 601faaf..ea69cdd 100644 --- a/helm/cluster-autoscaler-values.yaml.tfpl +++ b/helm/cluster-autoscaler-values.yaml.tfpl @@ -2,7 +2,17 @@ awsRegion: ${region} nodeSelector: - role: applications +%{ for label_key, label_value in nodeSelectors ~} + ${label_key}: "${label_value}" +%{ endfor ~} + +tolerations: +%{ for toleration in nodeTolerations ~} + - key: "${toleration.key}" + operator: "Equal" + value: "${toleration.value}" + effect: "${ replace(title(lower(replace(toleration.effect, "_", " "))), " ", "") }" +%{ endfor ~} extraArgs: balance-similar-node-groups: true @@ -11,9 +21,10 @@ replicaCount: 2 rbac: create: true serviceAccount: - name: ${serviceAccountName} + name: "${serviceAccountName}" annotations: - eks.amazonaws.com/role-arn: ${autoscalerRoleArn} # todo check it match this kind of string "arn:aws:iam::200564066411:role/cluster-autoscaler-aws-cluster-autoscaler-chart-eks" + eks.amazonaws.com/role-arn: "${autoscalerRoleArn}" autoDiscovery: enabled: true + clusterName: "${clusterName}" diff --git a/locals.tf b/locals.tf index 0bdf56e..c111612 100644 --- a/locals.tf +++ b/locals.tf @@ -11,7 +11,8 @@ locals { "repository" = "jenkins-infra/terraform-aws-sponsorship" } - ci_jenkins_io_fqdn = "aws.ci.jenkins.io" + ci_jenkins_io_service_fqdn = "ci.jenkins.io" + ci_jenkins_io_fqdn = "aws.${local.ci_jenkins_io_service_fqdn}" ##### ## External and outbounds IP used by resources for network restrictions. From 680f38e338600bbb6145f73e616c8fc1ae4242e3 Mon Sep 17 00:00:00 2001 From: Damien Duportal Date: Fri, 20 Dec 2024 13:17:50 +0100 Subject: [PATCH 11/15] fixup: factorize taints and tolerations Signed-off-by: Damien Duportal --- eks-cijenkinsio-agents-2.tf | 66 +++++++++++++++---------------------- locals.tf | 6 ++++ 2 files changed, 32 insertions(+), 40 deletions(-) diff --git a/eks-cijenkinsio-agents-2.tf b/eks-cijenkinsio-agents-2.tf index 2809093..a9d335e 100644 --- a/eks-cijenkinsio-agents-2.tf +++ b/eks-cijenkinsio-agents-2.tf @@ -8,6 +8,19 @@ resource "aws_kms_key" "cijenkinsio-agents-2" { }) } +locals { + cijenkinsio_agents_2_tolerations = { + applications = [ + { + "effect" : "NoSchedule", + "key" : "${local.ci_jenkins_io_fqdn}/applications", + "operator" : "Equal", + "value" : "true" + }, + ], + } +} + # EKS Cluster definition module "cijenkinsio-agents-2" { source = "terraform-aws-modules/eks/aws" @@ -81,63 +94,36 @@ module "cijenkinsio-agents-2" { ## Manage EKS addons with module - https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/eks_addon # See new versions with `aws eks describe-addon-versions --kubernetes-version --addon-name ` cluster_addons = { - # https://docs.aws.amazon.com/cli/latest/reference/eks/describe-addon-versions.html coredns = { + # https://docs.aws.amazon.com/cli/latest/reference/eks/describe-addon-versions.html + # TODO: track with updatecli addon_version = "v1.11.3-eksbuild.2" configuration_values = jsonencode({ - "tolerations" : [ - { - "effect" : "NoSchedule", - "key" : "ci.jenkins.io/applications", - "operator" : "Equal", - "value" : "true" - } - ] + "tolerations" = local.cijenkinsio_agents_2_tolerations["applications"], }) } # Kube-proxy on an Amazon EKS cluster has the same compatibility and skew policy as Kubernetes # See https://kubernetes.io/releases/version-skew-policy/#kube-proxy kube-proxy = { # https://docs.aws.amazon.com/cli/latest/reference/eks/describe-addon-versions.html + # TODO: track with updatecli addon_version = "v1.29.10-eksbuild.3" - configuration_values = jsonencode({ - "tolerations" : [ - { - "effect" : "NoSchedule", - "key" : "ci.jenkins.io/applications", - "operator" : "Equal", - "value" : "true" - } - ] - }) } # https://github.com/aws/amazon-vpc-cni-k8s/releases vpc-cni = { # https://docs.aws.amazon.com/cli/latest/reference/eks/describe-addon-versions.html + # TODO: track with updatecli addon_version = "v1.19.0-eksbuild.1" configuration_values = jsonencode({ - "tolerations" : [ - { - "effect" : "NoSchedule", - "key" : "ci.jenkins.io/applications", - "operator" : "Equal", - "value" : "true" - } - ] + "tolerations" = local.cijenkinsio_agents_2_tolerations["applications"], }) } eks-pod-identity-agent = { # https://docs.aws.amazon.com/cli/latest/reference/eks/describe-addon-versions.html + # TODO: track with updatecli addon_version = "v1.3.4-eksbuild.1" configuration_values = jsonencode({ - "tolerations" : [ - { - "effect" : "NoSchedule", - "key" : "ci.jenkins.io/applications", - "operator" : "Equal", - "value" : "true" - } - ] + "tolerations" = local.cijenkinsio_agents_2_tolerations["applications"], }) } ## https://github.com/kubernetes-sigs/aws-ebs-csi-driver/blob/master/CHANGELOG.md @@ -168,11 +154,11 @@ module "cijenkinsio-agents-2" { jenkins = local.ci_jenkins_io_service_fqdn role = "applications" } - taints = { - applications = { - key = "${local.ci_jenkins_io_fqdn}/applications" - value = "true" - effect = "NO_SCHEDULE" + taints = { for toleration_key, toleration_value in local.cijenkinsio_agents_2_tolerations["applications"] : + toleration_key => { + key = toleration_value["key"], + value = toleration_value.value + effect = local.toleration_taint_effects[toleration_value.effect] } } }, diff --git a/locals.tf b/locals.tf index c111612..994d4fe 100644 --- a/locals.tf +++ b/locals.tf @@ -14,6 +14,12 @@ locals { ci_jenkins_io_service_fqdn = "ci.jenkins.io" ci_jenkins_io_fqdn = "aws.${local.ci_jenkins_io_service_fqdn}" + toleration_taint_effects = { + "NoSchedule" = "NO_SCHEDULE", + "NoExecute" = "NO_EXECUTE", + "PreferNoSchedule" = "PREFER_NO_SCHEDULE", + } + ##### ## External and outbounds IP used by resources for network restrictions. ## Note: we use scalar (strings with space separator) to manage type changes by updatecli's HCL parser From c533e17bb118ca05f93b4a5de61d0952fa7fdad3 Mon Sep 17 00:00:00 2001 From: Damien Duportal Date: Fri, 20 Dec 2024 13:39:57 +0100 Subject: [PATCH 12/15] fixup: cleanup locals, providers and outputs Signed-off-by: Damien Duportal --- ci.jenkins.io.tf | 8 +++---- eks-cijenkinsio-agents-2.tf | 48 +++++++++---------------------------- locals.tf | 31 +++++++++++++++++------- outputs.tf | 5 ++-- providers.tf | 23 ++++++++++++++++-- 5 files changed, 62 insertions(+), 53 deletions(-) diff --git a/ci.jenkins.io.tf b/ci.jenkins.io.tf index 42e38b5..99babe1 100644 --- a/ci.jenkins.io.tf +++ b/ci.jenkins.io.tf @@ -103,7 +103,7 @@ resource "aws_instance" "ci_jenkins_io" { disable_api_termination = true # Protect ourselves from accidental deletion - user_data = templatefile("${path.root}/.shared-tools/terraform/cloudinit.tftpl", { hostname = local.ci_jenkins_io_fqdn, admin_username = "ubuntu" }) + user_data = templatefile("${path.root}/.shared-tools/terraform/cloudinit.tftpl", { hostname = local.ci_jenkins_io["controller_vm_fqdn"], admin_username = "ubuntu" }) root_block_device { delete_on_termination = false # Even if we terminate the machine @@ -133,14 +133,14 @@ resource "aws_instance" "ci_jenkins_io" { ### DNS Zone delegated from Azure DNS (jenkins-infra/azure-net) # `updatecli` maintains sync between the 2 repositories using the infra reports (see outputs.tf) resource "aws_route53_zone" "aws_ci_jenkins_io" { - name = local.ci_jenkins_io_fqdn + name = local.ci_jenkins_io["controller_vm_fqdn"] tags = local.common_tags } resource "aws_route53_record" "a_aws_ci_jenkins_io" { zone_id = aws_route53_zone.aws_ci_jenkins_io.zone_id - name = local.ci_jenkins_io_fqdn + name = local.ci_jenkins_io["controller_vm_fqdn"] type = "A" ttl = 60 records = [aws_eip.ci_jenkins_io.public_ip] @@ -148,7 +148,7 @@ resource "aws_route53_record" "a_aws_ci_jenkins_io" { resource "aws_route53_record" "aaaa_aws_ci_jenkins_io" { zone_id = aws_route53_zone.aws_ci_jenkins_io.zone_id - name = local.ci_jenkins_io_fqdn + name = local.ci_jenkins_io["controller_vm_fqdn"] type = "AAAA" ttl = 60 records = aws_instance.ci_jenkins_io.ipv6_addresses diff --git a/eks-cijenkinsio-agents-2.tf b/eks-cijenkinsio-agents-2.tf index a9d335e..1c0a541 100644 --- a/eks-cijenkinsio-agents-2.tf +++ b/eks-cijenkinsio-agents-2.tf @@ -8,19 +8,6 @@ resource "aws_kms_key" "cijenkinsio-agents-2" { }) } -locals { - cijenkinsio_agents_2_tolerations = { - applications = [ - { - "effect" : "NoSchedule", - "key" : "${local.ci_jenkins_io_fqdn}/applications", - "operator" : "Equal", - "value" : "true" - }, - ], - } -} - # EKS Cluster definition module "cijenkinsio-agents-2" { source = "terraform-aws-modules/eks/aws" @@ -99,7 +86,7 @@ module "cijenkinsio-agents-2" { # TODO: track with updatecli addon_version = "v1.11.3-eksbuild.2" configuration_values = jsonencode({ - "tolerations" = local.cijenkinsio_agents_2_tolerations["applications"], + "tolerations" = local.cijenkinsio_agents_2["tolerations"]["applications"], }) } # Kube-proxy on an Amazon EKS cluster has the same compatibility and skew policy as Kubernetes @@ -115,7 +102,7 @@ module "cijenkinsio-agents-2" { # TODO: track with updatecli addon_version = "v1.19.0-eksbuild.1" configuration_values = jsonencode({ - "tolerations" = local.cijenkinsio_agents_2_tolerations["applications"], + "tolerations" = local.cijenkinsio_agents_2["tolerations"]["applications"], }) } eks-pod-identity-agent = { @@ -123,7 +110,7 @@ module "cijenkinsio-agents-2" { # TODO: track with updatecli addon_version = "v1.3.4-eksbuild.1" configuration_values = jsonencode({ - "tolerations" = local.cijenkinsio_agents_2_tolerations["applications"], + "tolerations" = local.cijenkinsio_agents_2["tolerations"]["applications"], }) } ## https://github.com/kubernetes-sigs/aws-ebs-csi-driver/blob/master/CHANGELOG.md @@ -133,6 +120,8 @@ module "cijenkinsio-agents-2" { # # TODO specify service account # # service_account_role_arn = module.cijenkinsio-agents-2_irsa_ebs.iam_role_arn # } + # locals: ebs_account_namespace = "kube-system" + # locals: ebs_account_name = "ebs-csi-controller-sa" } eks_managed_node_groups = { @@ -151,10 +140,10 @@ module "cijenkinsio-agents-2" { subnet_ids = slice(module.vpc.private_subnets, 1, 2) # Only 1 subnet in 1 AZ (for EBS) labels = { - jenkins = local.ci_jenkins_io_service_fqdn + jenkins = local.ci_jenkins_io["service_fqdn"] role = "applications" } - taints = { for toleration_key, toleration_value in local.cijenkinsio_agents_2_tolerations["applications"] : + taints = { for toleration_key, toleration_value in local.cijenkinsio_agents_2["tolerations"]["applications"] : toleration_key => { key = toleration_value["key"], value = toleration_value.value @@ -200,32 +189,17 @@ module "autoscaler_irsa_role" { oidc_providers = { main = { provider_arn = module.cijenkinsio-agents-2.oidc_provider_arn - namespace_service_accounts = ["${local.autoscaler_account_namespace}:${local.autoscaler_account_name}"] + namespace_service_accounts = ["${local.cijenkinsio_agents_2["autoscaler"]["namespace"]}:${local.cijenkinsio_agents_2["autoscaler"]["serviceaccount"]}"] } } tags = local.common_tags } -### Define custom providers associated to this cluster (could be in providers.tf as alternative) +# Used by kubernetes/helm provider to authenticate to cluster with the AWS IAM identity (using a token) data "aws_eks_cluster_auth" "cijenkinsio-agents-2" { name = module.cijenkinsio-agents-2.cluster_name } -provider "kubernetes" { - alias = "cijenkinsio-agents-2" - host = module.cijenkinsio-agents-2.cluster_endpoint - cluster_ca_certificate = base64decode(module.cijenkinsio-agents-2.cluster_certificate_authority_data) - token = data.aws_eks_cluster_auth.cijenkinsio-agents-2.token -} -provider "helm" { - alias = "cijenkinsio-agents-2" - - kubernetes { - host = module.cijenkinsio-agents-2.cluster_endpoint - token = data.aws_eks_cluster_auth.cijenkinsio-agents-2.token - cluster_ca_certificate = base64decode(module.cijenkinsio-agents-2.cluster_certificate_authority_data) - } -} ### Install Cluster Autoscaler resource "helm_release" "cluster-autoscaler" { @@ -236,12 +210,12 @@ resource "helm_release" "cluster-autoscaler" { # TODO: track with updatecli version = "9.43.2" create_namespace = true - namespace = local.autoscaler_account_namespace + namespace = local.cijenkinsio_agents_2["autoscaler"]["namespace"] values = [ templatefile("./helm/cluster-autoscaler-values.yaml.tfpl", { region = local.region, - serviceAccountName = local.autoscaler_account_name, + serviceAccountName = local.cijenkinsio_agents_2["autoscaler"]["serviceaccount"], autoscalerRoleArn = module.autoscaler_irsa_role.iam_role_arn, clusterName = module.cijenkinsio-agents-2.cluster_name, nodeSelectors = module.cijenkinsio-agents-2.eks_managed_node_groups["tiny_ondemand_linux"].node_group_labels, diff --git a/locals.tf b/locals.tf index 994d4fe..5a4ec04 100644 --- a/locals.tf +++ b/locals.tf @@ -1,18 +1,33 @@ locals { - aws_account_id = "326712726440" - region = "us-east-2" - autoscaler_account_namespace = "autoscaler" - autoscaler_account_name = "cluster-autoscaler-aws-cluster-autoscaler-chart" - ebs_account_namespace = "kube-system" - ebs_account_name = "ebs-csi-controller-sa" + aws_account_id = "326712726440" + region = "us-east-2" common_tags = { "scope" = "terraform-managed" "repository" = "jenkins-infra/terraform-aws-sponsorship" } - ci_jenkins_io_service_fqdn = "ci.jenkins.io" - ci_jenkins_io_fqdn = "aws.${local.ci_jenkins_io_service_fqdn}" + ci_jenkins_io = { + service_fqdn = "ci.jenkins.io" + controller_vm_fqdn = "aws.ci.jenkins.io" + } + + cijenkinsio_agents_2 = { + autoscaler = { + namespace = "autoscaler", + serviceaccount = "cluster-autoscaler-aws-cluster-autoscaler-chart", + }, + tolerations = { + applications = [ + { + "effect" : "NoSchedule", + "key" : "${local.ci_jenkins_io["service_fqdn"]}/applications", + "operator" : "Equal", + "value" : "true" + }, + ], + }, + } toleration_taint_effects = { "NoSchedule" = "NO_SCHEDULE", diff --git a/outputs.tf b/outputs.tf index d9f90dc..eeaaf7c 100644 --- a/outputs.tf +++ b/outputs.tf @@ -1,6 +1,6 @@ resource "local_file" "jenkins_infra_data_report" { content = jsonencode({ - "${local.ci_jenkins_io_fqdn}" = { + "${local.ci_jenkins_io["controller_vm_fqdn"]}" = { "name_servers" = aws_route53_zone.aws_ci_jenkins_io.name_servers, "outbound_ips" = { "agents" = module.vpc.nat_public_ips, @@ -18,7 +18,8 @@ resource "local_file" "jenkins_infra_data_report" { }, }, "cijenkinsio-agents-2" = { - "cluster_endpoint" = module.cijenkinsio-agents-2.cluster_endpoint + "cluster_endpoint" = module.cijenkinsio-agents-2.cluster_endpoint, + "tolerations" = local.cijenkinsio_agents_2["tolerations"], }, }) filename = "${path.module}/jenkins-infra-data-reports/aws-sponsorship.json" diff --git a/providers.tf b/providers.tf index b85304f..1a9e604 100644 --- a/providers.tf +++ b/providers.tf @@ -13,23 +13,42 @@ provider "aws" { provider "local" { } +# TODO track with updatecli provider "cloudinit" { # Required by the EKS module } +# TODO track with updatecli provider "null" { # Required by the EKS module } +# TODO track with updatecli provider "time" { # Required by the EKS module } +# TODO track with updatecli provider "tls" { # Required by the EKS module } -# There are other kubernetes providers defined in other files with specific auth. -# This one is a placeholder to ensure lock file has the proper setup +# TODO track with updatecli provider "kubernetes" { + alias = "cijenkinsio-agents-2" + + host = module.cijenkinsio-agents-2.cluster_endpoint + cluster_ca_certificate = base64decode(module.cijenkinsio-agents-2.cluster_certificate_authority_data) + token = data.aws_eks_cluster_auth.cijenkinsio-agents-2.token +} + +# TODO track with updatecli +provider "helm" { + alias = "cijenkinsio-agents-2" + + kubernetes { + host = module.cijenkinsio-agents-2.cluster_endpoint + token = data.aws_eks_cluster_auth.cijenkinsio-agents-2.token + cluster_ca_certificate = base64decode(module.cijenkinsio-agents-2.cluster_certificate_authority_data) + } } From 2e8fdf3b8bd161ac16e7c6e7a52a480ba4b6bb94 Mon Sep 17 00:00:00 2001 From: Damien Duportal Date: Fri, 20 Dec 2024 13:52:45 +0100 Subject: [PATCH 13/15] fixup - template and cleaning comments Signed-off-by: Damien Duportal --- eks-cijenkinsio-agents-2.tf | 14 ++++---------- helm/cluster-autoscaler-values.yaml.tfpl | 8 +------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/eks-cijenkinsio-agents-2.tf b/eks-cijenkinsio-agents-2.tf index 1c0a541..ac24426 100644 --- a/eks-cijenkinsio-agents-2.tf +++ b/eks-cijenkinsio-agents-2.tf @@ -23,13 +23,13 @@ module "cijenkinsio-agents-2" { subnet_ids = slice(module.vpc.private_subnets, 1, 3) # Required to allow EKS service accounts to authenticate to AWS API through OIDC (and assume IAM roles) - # useful for autoscaler, EKS addons and any AWS APi usage + # useful for autoscaler, EKS addons and any AWS API usage enable_irsa = true # Allow the terraform CI IAM user to be co-owner of the cluster enable_cluster_creator_admin_permissions = true - # avoid using config map to specify admin accesses (decrease attack surface) + # Avoid using config map to specify admin accesses (decrease attack surface) authentication_mode = "API" access_entries = { @@ -65,9 +65,6 @@ module "cijenkinsio-agents-2" { create_cluster_primary_security_group_tags = false - # Do not use interpolated values from `local` in either keys and values of provided tags (or `cluster_tags) - # To avoid having and implicit dependency to a resource not available when parsing the module (infamous errror `Error: Invalid for_each argument`) - # Ref. same error as having a `depends_on` in https://github.com/terraform-aws-modules/terraform-aws-eks/issues/2337 tags = merge(local.common_tags, { GithubRepo = "terraform-aws-sponsorship" GithubOrg = "jenkins-infra" @@ -75,11 +72,8 @@ module "cijenkinsio-agents-2" { associated_service = "eks/cijenkinsio-agents-2" }) - # VPC is defined in vpc.tf vpc_id = module.vpc.vpc_id - ## Manage EKS addons with module - https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/eks_addon - # See new versions with `aws eks describe-addon-versions --kubernetes-version --addon-name ` cluster_addons = { coredns = { # https://docs.aws.amazon.com/cli/latest/reference/eks/describe-addon-versions.html @@ -125,8 +119,8 @@ module "cijenkinsio-agents-2" { } eks_managed_node_groups = { + # This worker pool is expected to host the "technical" services such as cluster-autoscaler, data cluster-agent, ACP, etc. tiny_ondemand_linux = { - # This worker pool is expected to host the "technical" services such as cluster-autoscaler, data cluster-agent, ACP, etc. name = "tiny-ondemand-linux" instance_types = ["t4g.large"] # 2vcpu 8Gio @@ -219,7 +213,7 @@ resource "helm_release" "cluster-autoscaler" { autoscalerRoleArn = module.autoscaler_irsa_role.iam_role_arn, clusterName = module.cijenkinsio-agents-2.cluster_name, nodeSelectors = module.cijenkinsio-agents-2.eks_managed_node_groups["tiny_ondemand_linux"].node_group_labels, - nodeTolerations = module.cijenkinsio-agents-2.eks_managed_node_groups["tiny_ondemand_linux"].node_group_taints, + nodeTolerations = local.cijenkinsio_agents_2["tolerations"]["applications"], }) ] } diff --git a/helm/cluster-autoscaler-values.yaml.tfpl b/helm/cluster-autoscaler-values.yaml.tfpl index ea69cdd..aa1f797 100644 --- a/helm/cluster-autoscaler-values.yaml.tfpl +++ b/helm/cluster-autoscaler-values.yaml.tfpl @@ -6,13 +6,7 @@ nodeSelector: ${label_key}: "${label_value}" %{ endfor ~} -tolerations: -%{ for toleration in nodeTolerations ~} - - key: "${toleration.key}" - operator: "Equal" - value: "${toleration.value}" - effect: "${ replace(title(lower(replace(toleration.effect, "_", " "))), " ", "") }" -%{ endfor ~} +tolerations: ${yamlencode(nodeTolerations)} extraArgs: balance-similar-node-groups: true From 6452d1997a58d39b31a5ba8e9bc3102a96e45507 Mon Sep 17 00:00:00 2001 From: Damien Duportal Date: Fri, 20 Dec 2024 14:06:45 +0100 Subject: [PATCH 14/15] fixup Signed-off-by: Damien Duportal --- helm/cluster-autoscaler-values.yaml.tfpl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/helm/cluster-autoscaler-values.yaml.tfpl b/helm/cluster-autoscaler-values.yaml.tfpl index aa1f797..09d40a6 100644 --- a/helm/cluster-autoscaler-values.yaml.tfpl +++ b/helm/cluster-autoscaler-values.yaml.tfpl @@ -6,7 +6,8 @@ nodeSelector: ${label_key}: "${label_value}" %{ endfor ~} -tolerations: ${yamlencode(nodeTolerations)} +tolerations: +${yamlencode(nodeTolerations)} extraArgs: balance-similar-node-groups: true From 26d293316d41eaa0382d0643b10504a23b54a023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20MERLE?= <95630726+smerle33@users.noreply.github.com> Date: Fri, 20 Dec 2024 14:32:51 +0100 Subject: [PATCH 15/15] Apply suggestions from code review --- helm/cluster-autoscaler-values.yaml.tfpl | 6 ++---- locals.tf | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/helm/cluster-autoscaler-values.yaml.tfpl b/helm/cluster-autoscaler-values.yaml.tfpl index 09d40a6..b51b356 100644 --- a/helm/cluster-autoscaler-values.yaml.tfpl +++ b/helm/cluster-autoscaler-values.yaml.tfpl @@ -1,10 +1,8 @@ --- -awsRegion: ${region} +awsRegion: "${region}" nodeSelector: -%{ for label_key, label_value in nodeSelectors ~} - ${label_key}: "${label_value}" -%{ endfor ~} +${yamlencode(nodeSelectors)} tolerations: ${yamlencode(nodeTolerations)} diff --git a/locals.tf b/locals.tf index 5a4ec04..aa9d884 100644 --- a/locals.tf +++ b/locals.tf @@ -15,7 +15,7 @@ locals { cijenkinsio_agents_2 = { autoscaler = { namespace = "autoscaler", - serviceaccount = "cluster-autoscaler-aws-cluster-autoscaler-chart", + serviceaccount = "autoscaler", }, tolerations = { applications = [