From 2de4903cd5bd2039da15c0e833712ee8dd2bd983 Mon Sep 17 00:00:00 2001 From: Felix Date: Fri, 26 Mar 2021 23:21:33 +0800 Subject: [PATCH] Add AWS Spot feature and Auto Scaling Group --- data.tf | 10 --- examples/k3s-in-existing-vpc/main.tf | 28 +++++---- examples/k3s-in-new-vpc/main.tf | 3 +- extras/ssm_vpc_endpoints/variables.tf | 1 + k3s_master.tf | 1 + k3s_node.tf | 39 ------------ k3s_node_pool.tf | 89 +++++++++++++++++++++++++++ locals.tf | 27 ++++++++ variables.tf | 18 +++++- 9 files changed, 152 insertions(+), 64 deletions(-) create mode 100644 k3s_node_pool.tf create mode 100644 locals.tf diff --git a/data.tf b/data.tf index 3684ff0..4f3446d 100644 --- a/data.tf +++ b/data.tf @@ -54,13 +54,3 @@ data "aws_subnet" "private" { id = each.key } -locals { - cluster_id = var.cluster_id - master_count = 1 - node_count = var.node_count - master_ami = data.aws_ami.amz2-x86_64.id - node_ami = var.node_instance_arch == "arm64" ? data.aws_ami.amz2-arm64.id : data.aws_ami.amz2-x86_64.id - master_vol = 50 - node_vol = 50 - private_subnets = var.private_subnets -} diff --git a/examples/k3s-in-existing-vpc/main.tf b/examples/k3s-in-existing-vpc/main.tf index d455458..b434358 100644 --- a/examples/k3s-in-existing-vpc/main.tf +++ b/examples/k3s-in-existing-vpc/main.tf @@ -1,6 +1,11 @@ provider "aws" { region = "ap-southeast-1" # change this profile = "default" # can be changed to other profile + + ignore_tags { + # required to prevent tag from messing terraform state + key_prefixes = ["kubernetes.io"] + } } data "aws_region" "current" {} @@ -30,26 +35,25 @@ module "subnets" { } module "k3s-in-existing-vpc" { - # source = "../.." - source = "sagittaros/private-cloud/k3s" + source = "../.." + # source = "sagittaros/private-cloud/k3s" - # context - name = "kay3s" - stage = "staging" + # main + cluster_id = "k3s-in-existing-vpc" # networking - region = data.aws_region.current.name - availability_zones = data.aws_availability_zones.all.names - vpc_id = data.aws_vpc.this.id - public_subnets = module.subnets.public_subnet_ids - private_subnets = module.subnets.private_subnet_ids - create_discovery_tags = true + region = data.aws_region.current.name + availability_zones = data.aws_availability_zones.all.names + vpc_id = data.aws_vpc.this.id + public_subnets = module.subnets.public_subnet_ids + private_subnets = module.subnets.private_subnet_ids # node instances master_instance_type = "t3a.small" node_count = 3 node_instance_arch = "x86_64" - node_instance_type = "t3a.small" + node_instance_types = ["t3a.small", "t3.small"] + on_demand_percentage = 0 # all spot instances # # run on Arm architecture, where g == ARM-based graviton # node_instance_arch = "arm64" diff --git a/examples/k3s-in-new-vpc/main.tf b/examples/k3s-in-new-vpc/main.tf index 0597ac4..e1a2158 100644 --- a/examples/k3s-in-new-vpc/main.tf +++ b/examples/k3s-in-new-vpc/main.tf @@ -58,7 +58,8 @@ module "k3s-in-new-vpc" { master_instance_type = "t3a.small" node_count = 3 node_instance_arch = "x86_64" - node_instance_type = "t3a.small" + node_instance_types = ["t3a.small", "t3.small"] + on_demand_percentage = 0 # all spot instances # # run on Arm architecture, where g == ARM-based graviton # node_instance_arch = "arm64" diff --git a/extras/ssm_vpc_endpoints/variables.tf b/extras/ssm_vpc_endpoints/variables.tf index a6cb4dd..3bafc71 100644 --- a/extras/ssm_vpc_endpoints/variables.tf +++ b/extras/ssm_vpc_endpoints/variables.tf @@ -19,3 +19,4 @@ variable "private_subnets" { type = list(any) description = "List of private subnet ids to use. If blank, infer from VPC" } + diff --git a/k3s_master.tf b/k3s_master.tf index 202d134..27224fb 100644 --- a/k3s_master.tf +++ b/k3s_master.tf @@ -79,6 +79,7 @@ resource "aws_instance" "k3s_master" { user_data = data.cloudinit_config.k3s_master.rendered tags = { + "Name" = "${local.cluster_id}-master", "KubernetesCluster" = local.cluster_id, "kubernetes.io/cluster/${local.cluster_id}" = "owned" "k3s-role" = "master" diff --git a/k3s_node.tf b/k3s_node.tf index 00086e8..8297d63 100644 --- a/k3s_node.tf +++ b/k3s_node.tf @@ -42,42 +42,3 @@ data "cloudinit_config" "k3s_node" { } } - -resource "aws_instance" "k3s_node" { - count = local.node_count - ami = local.node_ami - instance_type = var.node_instance_type - iam_instance_profile = aws_iam_instance_profile.k3s_node.name - - # spread instances across subnets - subnet_id = element(local.private_subnets, count.index) - associate_public_ip_address = false - - vpc_security_group_ids = concat([ - aws_security_group.self.id, - aws_security_group.node_ports.id, - aws_security_group.egress.id - ], var.extra_node_security_groups) - - root_block_device { - volume_size = local.node_vol - encrypted = true - } - - user_data = data.cloudinit_config.k3s_node.rendered - - tags = { - "KubernetesCluster" = local.cluster_id - "kubernetes.io/cluster/${local.cluster_id}" = "owned" - "k3s-role" = "node" - } - - lifecycle { - ignore_changes = [ - ami, # new ami changes by amazon should not affect change to this instance - user_data, # https://github.com/hashicorp/terraform-provider-aws/issues/4954 - tags, - volume_tags, - ] - } -} diff --git a/k3s_node_pool.tf b/k3s_node_pool.tf new file mode 100644 index 0000000..282786c --- /dev/null +++ b/k3s_node_pool.tf @@ -0,0 +1,89 @@ +resource "aws_autoscaling_group" "node_pool" { + name_prefix = local.cluster_id + + desired_capacity = local.node_count + min_size = local.node_count + max_size = local.node_count + default_cooldown = local.asg_default_cooldown + health_check_grace_period = local.asg_health_check_grace_period + + # network + vpc_zone_identifier = local.private_subnets + + # template + mixed_instances_policy { + launch_template { + launch_template_specification { + launch_template_id = aws_launch_template.node_pool.id + version = local.asg_launch_template_version + } + + dynamic "override" { + for_each = local.asg_equiv_instance_types + content { + instance_type = override.value + } + } + } + + # Refer following doc for more parameters + # https://docs.aws.amazon.com/autoscaling/ec2/APIReference/API_InstancesDistribution.html + instances_distribution { + on_demand_percentage_above_base_capacity = local.asg_on_demand_percentage + } + } + + target_group_arns = local.asg_target_group_arns + + lifecycle { + create_before_destroy = true + ignore_changes = [tag] + } + + dynamic "tag" { + for_each = local.node_pool_tags + + content { + key = tag.key + value = tag.value + propagate_at_launch = true + } + } +} + +resource "aws_launch_template" "node_pool" { + name_prefix = local.cluster_id + image_id = local.node_ami + user_data = data.cloudinit_config.k3s_node.rendered + + iam_instance_profile { + arn = aws_iam_instance_profile.k3s_node.arn + } + + instance_type = local.asg_base_instance_type + + block_device_mappings { + device_name = local.node_root_device_name + ebs { + volume_size = local.node_vol + encrypted = true + } + } + + network_interfaces { + associate_public_ip_address = false + security_groups = concat([ + aws_security_group.self.id, + aws_security_group.node_ports.id, + aws_security_group.egress.id + ], var.extra_node_security_groups) + } + + tags = { + Cluster = local.cluster_id + } + + lifecycle { + create_before_destroy = true + } +} diff --git a/locals.tf b/locals.tf new file mode 100644 index 0000000..5682c43 --- /dev/null +++ b/locals.tf @@ -0,0 +1,27 @@ +locals { + cluster_id = var.cluster_id + master_count = 1 + node_count = var.node_count + master_ami = data.aws_ami.amz2-x86_64.id + node_ami = var.node_instance_arch == "arm64" ? data.aws_ami.amz2-arm64.id : data.aws_ami.amz2-x86_64.id + node_root_device_name = var.node_instance_arch == "arm64" ? data.aws_ami.amz2-arm64.root_device_name : data.aws_ami.amz2-x86_64.root_device_name + master_vol = 50 + node_vol = 50 + private_subnets = var.private_subnets + + # ASG configuration + asg_launch_template_version = "$Latest" + asg_target_group_arns = var.target_group_arns + asg_default_cooldown = 30 + asg_health_check_grace_period = 30 + asg_on_demand_percentage = var.on_demand_percentage + asg_base_instance_type = element(var.node_instance_types, 0) + asg_equiv_instance_types = slice(var.node_instance_types, 1, length(var.node_instance_types)) + node_pool_tags = { + "Name" = "${var.cluster_id}-nodes" + "KubernetesCluster" = var.cluster_id + "kubernetes.io/cluster/${var.cluster_id}" = "owned" + "k3s-role" = "node" + } + +} diff --git a/variables.tf b/variables.tf index dcb4228..980c5f2 100644 --- a/variables.tf +++ b/variables.tf @@ -56,9 +56,12 @@ variable "node_instance_arch" { default = "arm64" } -variable "node_instance_type" { +variable "node_instance_types" { + type = list(string) description = "Instance size for k3s instance, Must match architecture (codename a=arm, g=graviton)" - default = "r6g.medium" # 1vcpu, 4GB memory + default = [ + "r6g.medium", # 1vcpu, 4GB memory + ] } variable "extra_master_security_groups" { @@ -73,3 +76,14 @@ variable "extra_node_security_groups" { description = "Additional security groups to attach to k3s agent instances" } +variable "on_demand_percentage" { + default = 100 + type = number + description = "Percentage(ratio) of on-demand against spot instances (0-100)" +} + +variable "target_group_arns" { + type = list(string) + description = "Attach worker nodes to a list of target groups. (Needed for exposure)" + default = [] +}