From ce48dbc987b04e37a2ad5bb4f16acc789b222a15 Mon Sep 17 00:00:00 2001 From: paduin Date: Thu, 18 Jan 2024 17:46:28 +0100 Subject: [PATCH 1/3] Added keepalive config for EKS --- CHANGELOG.md | 4 ++++ README.md | 7 ++++--- k8s.tf | 15 +++++++++++++++ variables.tf | 6 ++++++ 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2d1f55..5f3af5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [4.2.3] - 2024-01-19 +### Fixed +- Enable optional sysctl settings for EKS. To overwrite TCP keepalive settings. + ## [4.2.2] - 2024-01-05 ### Fixed - Conditional reading of Secrets from SecretManager fix. diff --git a/README.md b/README.md index 91d8211..f1cfc01 100644 --- a/README.md +++ b/README.md @@ -54,9 +54,10 @@ For more information please refer to the main [Apiary](https://github.com/Expedi | root_vol_type | Waggle Dance EC2 root volume type. | string | `gp2` | no | | root_vol_size | Waggle Dance EC2 root volume size. | string | `10` | no | | enable_query_functions_across_all_metastores | This controls the thrift call for `get_all_functions`. It is generally used to initialize a client and get built-in functions and registered UDF's from a metastore. Setting this to `false` is more performant as WD then only gets the functions from the `primary` metastore. However, setting this to `true` will collate results by calling `get_all_functions` from all configured metastores. This could be potentially slow if some of the metastores are slow to respond. If all the metastores configured are of the same version and no additional UDF's are installed, then WD gets the same functions back so it's not very useful to call this across metastores. For backwards compatibility, this property can be set to `true`. Further read: https://github.com/ExpediaGroup/waggle-dance#server | bool | false | no | -| tcp_keepalive_time | Sets net.ipv4.tcp_keepalive_time (seconds), currently only supported in ECS. | number | `200` | no | -| tcp_keepalive_intvl | Sets net.ipv4.tcp_keepalive_intvl (seconds), currently only supported in ECS. | number | `30` | no | -| tcp_keepalive_probes | Sets net.ipv4.tcp_keepalive_probes (seconds), currently only supported in ECS. | number | `2` | no | +| enable_sysctl_config_in_eks | Enable sysctl configuration for Hive Metastore. For EKS you need to allow this on your cluster (https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ check EKS version for details). Also see tcp_keepalive_* variables. | bool | false | no | +| tcp_keepalive_time | Sets net.ipv4.tcp_keepalive_time (seconds), currently only supported in ECS. | number | `200` | no | +| tcp_keepalive_intvl | Sets net.ipv4.tcp_keepalive_intvl (seconds), currently only supported in ECS. | number | `30` | no | +| tcp_keepalive_probes | Sets net.ipv4.tcp_keepalive_probes (seconds), currently only supported in ECS. | number | `2` | no | | datadog_key_secret_name | Name of the secret containing the DataDog API key. This needs to be created manually in AWS secrets manager. | string | | no | | datadog_agent_version | Version of the Datadog Agent running in the ECS cluster. | string | `7.46.0-jmx` | no | | include_datadog_agent | Whether to include the datadog-agent container alongside Waggledance. | string | bool | no | diff --git a/k8s.tf b/k8s.tf index 2f844d9..1be4384 100644 --- a/k8s.tf +++ b/k8s.tf @@ -61,6 +61,21 @@ resource "kubernetes_deployment_v1" "waggle_dance" { spec { service_account_name = kubernetes_service_account.waggle_dance[0].metadata.0.name automount_service_account_token = true + dynamic "security_context" { + for_each = var.enable_sysctl_config_in_eks ? ["enabled"] : [] + content { + sysctl = [{ + name="net.ipv4.tcp_keepalive_time" + value="${var.tcp_keepalive_time}" + },{ + name="net.ipv4.tcp_keepalive_intvl" + value="${var.tcp_keepalive_intvl}" + },{ + name="net.ipv4.tcp_keepalive_probes" + value="${var.tcp_keepalive_probes}" + }] + } + } container { image = "${var.docker_image}:${var.docker_version}" name = local.instance_alias diff --git a/variables.tf b/variables.tf index 24a51ab..b394973 100644 --- a/variables.tf +++ b/variables.tf @@ -360,6 +360,12 @@ variable "datadog_metrics_enabled" { default = false } +variable "enable_sysctl_config_in_eks" { + description = "Enable sysctl configuration for Hive Metastore. For EKS you need to allow this on your cluster (https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ check EKS version for details). Also see tcp_keepalive_* variables." + type = bool + default = false +} + variable "tcp_keepalive_time" { description = "Sets net.ipv4.tcp_keepalive_time (seconds), currently only supported in ECS." type = number From d32983779a5f5acaee3e2289b69b9c4fba5f43eb Mon Sep 17 00:00:00 2001 From: paduin Date: Fri, 19 Jan 2024 16:57:03 +0100 Subject: [PATCH 2/3] fixed syscvtl block --- k8s.tf | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/k8s.tf b/k8s.tf index 1be4384..ee38f72 100644 --- a/k8s.tf +++ b/k8s.tf @@ -64,16 +64,18 @@ resource "kubernetes_deployment_v1" "waggle_dance" { dynamic "security_context" { for_each = var.enable_sysctl_config_in_eks ? ["enabled"] : [] content { - sysctl = [{ + sysctl { name="net.ipv4.tcp_keepalive_time" - value="${var.tcp_keepalive_time}" - },{ + value= var.tcp_keepalive_time + } + sysctl { name="net.ipv4.tcp_keepalive_intvl" - value="${var.tcp_keepalive_intvl}" - },{ + value= var.tcp_keepalive_intvl + } + sysctl { name="net.ipv4.tcp_keepalive_probes" - value="${var.tcp_keepalive_probes}" - }] + value= var.tcp_keepalive_probes + } } } container { From 5db127f50c5ae4c33891de0cfbd4c931b058df8a Mon Sep 17 00:00:00 2001 From: Georgi Ivanov Date: Sun, 30 Jun 2024 01:27:16 +0100 Subject: [PATCH 3/3] updated vars to be in sync with Apiary --- CHANGELOG.md | 6 +++--- README.md | 2 +- k8s.tf | 14 +++++++------- variables.tf | 11 ++++++----- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fa3acc..0ba9a76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,9 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [4.5.3] - 2024-06-28 -### Fixed -- Enable optional sysctl settings for EKS. To overwrite TCP keepalive settings. +## [4.5.3] - 2024-07-01 +### Added +- Added support for setting the TCP keepalive settings of Waggledance. ## [4.5.2] - 2024-06-04 ### Updated diff --git a/README.md b/README.md index cb09524..e7be409 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ For more information please refer to the main [Apiary](https://github.com/Expedi | root_vol_type | Waggle Dance EC2 root volume type. | string | `gp2` | no | | root_vol_size | Waggle Dance EC2 root volume size. | string | `10` | no | | enable_query_functions_across_all_metastores | This controls the thrift call for `get_all_functions`. It is generally used to initialize a client and get built-in functions and registered UDF's from a metastore. Setting this to `false` is more performant as WD then only gets the functions from the `primary` metastore. However, setting this to `true` will collate results by calling `get_all_functions` from all configured metastores. This could be potentially slow if some of the metastores are slow to respond. If all the metastores configured are of the same version and no additional UDF's are installed, then WD gets the same functions back so it's not very useful to call this across metastores. For backwards compatibility, this property can be set to `true`. Further read: https://github.com/ExpediaGroup/waggle-dance#server | bool | false | no | -| enable_sysctl_config_in_eks | Enable sysctl configuration for Hive Metastore. For EKS you need to allow this on your cluster (https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ check EKS version for details). Also see tcp_keepalive_* variables. | bool | false | no | +| enable_tcp_keepalive | tcp_keepalive settings on the Waggledance pods. To use this you need to enable the ability to cahnge sysctl settings on your kubernetes cluster. For EKS you need to allow this on your cluster (https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ check EKS version for details). If your EKS version is below 1.24 you need to create a PodSecurityPolicy allowing the following sysctls "net.ipv4.tcp_keepalive_time", "net.ipv4.tcp_keepalive_intvl","net.ipv4.tcp_keepalive_probes" and a ClusterRole + Rolebinding for the service account running the HMS pods or all services accounts in the namespace where Apiary is running so that kubernetes can apply the tcp)keepalive configuration. For EKS 1.25 and above check this https://kubernetes.io/blog/2022/08/23/kubernetes-v1-25-release/#pod-security-changes. Also see tcp_keepalive_* variables. | bool | false | no | | tcp_keepalive_time | Sets net.ipv4.tcp_keepalive_time (seconds), currently only supported in ECS. | number | `200` | no | | tcp_keepalive_intvl | Sets net.ipv4.tcp_keepalive_intvl (seconds), currently only supported in ECS. | number | `30` | no | | tcp_keepalive_probes | Sets net.ipv4.tcp_keepalive_probes (seconds), currently only supported in ECS. | number | `2` | no | diff --git a/k8s.tf b/k8s.tf index d3cda66..668be7f 100644 --- a/k8s.tf +++ b/k8s.tf @@ -79,19 +79,19 @@ resource "kubernetes_deployment_v1" "waggle_dance" { service_account_name = kubernetes_service_account_v1.waggle_dance[0].metadata.0.name automount_service_account_token = true dynamic "security_context" { - for_each = var.enable_sysctl_config_in_eks ? ["enabled"] : [] + for_each = var.enable_tcp_keepalive ? ["enabled"] : [] content { sysctl { - name="net.ipv4.tcp_keepalive_time" - value= var.tcp_keepalive_time + name = "net.ipv4.tcp_keepalive_time" + value = var.tcp_keepalive_time } sysctl { - name="net.ipv4.tcp_keepalive_intvl" - value= var.tcp_keepalive_intvl + name = "net.ipv4.tcp_keepalive_intvl" + value = var.tcp_keepalive_intvl } sysctl { - name="net.ipv4.tcp_keepalive_probes" - value= var.tcp_keepalive_probes + name = "net.ipv4.tcp_keepalive_probes" + value = var.tcp_keepalive_probes } } } diff --git a/variables.tf b/variables.tf index f1ecb17..694802c 100644 --- a/variables.tf +++ b/variables.tf @@ -394,30 +394,31 @@ variable "datadog_metrics_enabled" { default = false } -variable "enable_sysctl_config_in_eks" { - description = "Enable sysctl configuration for Hive Metastore. For EKS you need to allow this on your cluster (https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ check EKS version for details). Also see tcp_keepalive_* variables." +variable "enable_tcp_keepalive" { + description = "Enable tcp keepalive settings on the waggledance pods." type = bool default = false } variable "tcp_keepalive_time" { - description = "Sets net.ipv4.tcp_keepalive_time (seconds), currently only supported in ECS." + description = "Sets net.ipv4.tcp_keepalive_time (seconds)." type = number default = 200 } variable "tcp_keepalive_intvl" { - description = "Sets net.ipv4.tcp_keepalive_intvl (seconds), currently only supported in ECS." + description = "Sets net.ipv4.tcp_keepalive_intvl (seconds)." type = number default = 30 } variable "tcp_keepalive_probes" { - description = "Sets net.ipv4.tcp_keepalive_probes (number), currently only supported in ECS." + description = "Sets net.ipv4.tcp_keepalive_probes (number)." type = number default = 2 } + variable "datadog_key_secret_name" { description = "Name of the secret containing the DataDog API key. This needs to be created manually in AWS secrets manager. This is only applicable to ECS deployments." type = string