From 4468294b8c0571f1d4f8960db2a3f2d24c70a1e9 Mon Sep 17 00:00:00 2001 From: witomlin <76996781+witomlin@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:20:15 +0100 Subject: [PATCH 1/8] Enhance integration tests --- .github/workflows/tests.yaml | 2 +- Makefile | 20 ++++-- README.md | 17 ++--- test/integration/command.go | 8 +-- test/integration/csa.go | 59 +++++----------- test/integration/echoserver.go | 14 ---- test/integration/integration_test.go | 101 +++++++++++++++------------ test/integration/kind.go | 66 ++++++++--------- test/integration/kube.go | 44 +++++++----- test/integration/log.go | 18 +++++ test/integration/path.go | 7 -- test/integration/settings.go | 88 +++++++++++++++++++++++ 12 files changed, 272 insertions(+), 172 deletions(-) create mode 100644 test/integration/log.go create mode 100644 test/integration/settings.go diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 148fdee..2584290 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -48,7 +48,7 @@ jobs: - name: Test env: MAX_PARALLELISM: 1 # Constrained to 2 CPUs on ubuntu-latest - run: make test-run-int-verbose + run: make test-run-int-verbose KUBE_VERSION=1.29 # TODO(wt) test against potentially many versions helm: name: Run Helm tests diff --git a/Makefile b/Makefile index ae0c916..5b804d8 100644 --- a/Makefile +++ b/Makefile @@ -34,20 +34,28 @@ test-run-unit: ## Runs unit tests go test -count=1 ./internal/... .PHONY: test-run-int -test-run-int: ## Runs integration tests +test-run-int: ## Runs integration tests for a specific major.minor version of Kube + @if [ -z "${KUBE_VERSION}" ]; then \ + echo "KUBE_VERSION is required - run 'make test-run-int KUBE_VERSION=x.y'"; \ + exit 1; \ + fi go test -count=1 -timeout ${INT_TESTS_TIMEOUT} ./test/integration/... .PHONY: test-run-int-verbose -test-run-int-verbose: ## Runs integration tests with verbose logging +test-run-int-verbose: ## Runs integration tests for a specific major.minor version of Kube, with verbose logging + @if [ -z "${KUBE_VERSION}" ]; then \ + echo "KUBE_VERSION is required - run 'make test-run-int KUBE_VERSION=x.y'"; \ + exit 1; \ + fi go test -count=1 -timeout ${INT_TESTS_TIMEOUT} -v ./test/integration/... .PHONY: test-run-helm test-run-helm: ## Runs Helm tests - rm -rf ${HELM_TESTS_SNAPSHOT_DIR} - mkdir ${HELM_TESTS_SNAPSHOT_DIR} - chmod 777 ${HELM_TESTS_SNAPSHOT_DIR} + @rm -rf ${HELM_TESTS_SNAPSHOT_DIR} + @mkdir ${HELM_TESTS_SNAPSHOT_DIR} + @chmod 777 ${HELM_TESTS_SNAPSHOT_DIR} docker run -t --rm -v ${ROOT_DIR}charts:/apps helmunittest/helm-unittest:3.12.3-0.3.5 container-startup-autoscaler - rm -rf ${HELM_TESTS_SNAPSHOT_DIR} + @rm -rf ${HELM_TESTS_SNAPSHOT_DIR} ## ------------------ ## Go Modules diff --git a/README.md b/README.md index c91bfc0..691e9b8 100644 --- a/README.md +++ b/README.md @@ -517,14 +517,15 @@ execution might take some time to complete. A number of environment variable-based configuration options are available: -| Name | Default | Description | -|--------------------------|---------|-----------------------------------------------------------------------| -| `MAX_PARALLELISM` | `4` | The maximum number of tests that can run in parallel. | -| `REUSE_CLUSTER` | `false` | Whether to reuse an existing CSA kind cluster (if it already exists). | -| `INSTALL_METRICS_SERVER` | `false` | Whether to install metrics-server. | -| `KEEP_CSA` | `false` | Whether to keep the CSA installation after tests finish. | -| `KEEP_CLUSTER` | `false` | Whether to keep the CSA kind cluster after tests finish. | -| `DELETE_NS_AFTER_TEST` | `true` | Whether to delete namespaces created by tests after they conclude. | +| Name | Default | Description | +|--------------------------|---------|--------------------------------------------------------------------------------------------------------------------------------------| +| `KUBE_VERSION` | - | The _major.minor_ version of Kube to run tests against e.g. `1.29`. | +| `MAX_PARALLELISM` | `4` | The maximum number of tests that can run in parallel. | +| `REUSE_CLUSTER` | `false` | Whether to reuse an existing CSA kind cluster (if it already exists). `KUBE_VERSION` has no effect if an existing cluster is reused. | +| `INSTALL_METRICS_SERVER` | `false` | Whether to install metrics-server. | +| `KEEP_CSA` | `false` | Whether to keep the CSA installation after tests finish. | +| `KEEP_CLUSTER` | `false` | Whether to keep the CSA kind cluster after tests finish. | +| `DELETE_NS_AFTER_TEST` | `true` | Whether to delete namespaces created by tests after they conclude. | Integration tests are executed in parallel due to their long-running nature. Each test operates within a separate Kube namespace (but using the same single CSA installation). If local resources are limited, reduce `MAX_PARALLELISM` diff --git a/test/integration/command.go b/test/integration/command.go index 136fde0..a4ea480 100644 --- a/test/integration/command.go +++ b/test/integration/command.go @@ -17,22 +17,22 @@ limitations under the License. package integration import ( - "fmt" "os" "os/exec" "strings" + "testing" "github.com/ExpediaGroup/container-startup-autoscaler/internal/common" ) -func cmdRun(cmd *exec.Cmd, info string, coreErrMsg string, fatalOnErr bool, suppressInfo ...bool) (string, error) { +func cmdRun(t *testing.T, cmd *exec.Cmd, info string, coreErrMsg string, fatalOnErr bool, suppressInfo ...bool) (string, error) { suppress := false if len(suppressInfo) > 0 && suppressInfo[0] { suppress = true } if info != "" && !suppress { - fmt.Println(info) + logMessage(t, info) } combinedOutput, err := cmd.CombinedOutput() @@ -41,7 +41,7 @@ func cmdRun(cmd *exec.Cmd, info string, coreErrMsg string, fatalOnErr bool, supp wrappedErr := common.WrapErrorf(err, "%s (output: %s)", coreErrMsg, trimmedOutput) if fatalOnErr { - fmt.Println(wrappedErr) + logMessage(t, wrappedErr) os.Exit(1) } return trimmedOutput, wrappedErr diff --git a/test/integration/csa.go b/test/integration/csa.go index 520efbe..54787fc 100644 --- a/test/integration/csa.go +++ b/test/integration/csa.go @@ -21,39 +21,12 @@ import ( "os/exec" "strings" "sync" + "testing" "time" "github.com/ExpediaGroup/container-startup-autoscaler/internal/common" "github.com/ExpediaGroup/container-startup-autoscaler/internal/pod/podcommon" - v1 "k8s.io/api/core/v1" -) - -const ( - csaDockerImage = "csa" - csaDockerTag = "test" - csaDockerImageTag = csaDockerImage + ":" + csaDockerTag -) - -const ( - csaHelmChartRelPath = "charts" + pathSeparator + "container-startup-autoscaler" - csaHelmName = "csa-int" - csaHelmTimeout = "60s" -) - -const ( - csaStatusWaitMillis = 500 - csaStatusMessageStartupCommanded = "Startup resources commanded" - csaStatusMessageStartupCommandedUnknownRes = "Startup resources commanded (unknown resources applied)" - csaStatusMessagePostStartupCommanded = "Post-startup resources commanded" - csaStatusMessagePostStartupCommandedUnknownRes = "Post-startup resources commanded (unknown resources applied)" - csaStatusMessageStartupEnacted = "Startup resources enacted" - csaStatusMessagePostStartupEnacted = "Post-startup resources enacted" - csaStatusMessageValidationError = "Validation error" -) - -const ( - csaEventReasonScaling = "Scaling" - csaEventReasonValidation = "Validation" + "k8s.io/api/core/v1" ) type csaQuantityAnnotations struct { @@ -65,10 +38,11 @@ type csaQuantityAnnotations struct { memoryPostStartupLimits string } -func csaRun() error { - csaCleanUp() +func csaRun(t *testing.T) error { + csaCleanUp(t) _, err := cmdRun( + t, exec.Command("docker", "build", "-t", csaDockerImageTag, rootAbsPath), "building csa...", "unable to build csa", @@ -79,6 +53,7 @@ func csaRun() error { } _, err = cmdRun( + t, exec.Command("kind", "load", "docker-image", csaDockerImageTag, "--name", kindClusterName), "loading csa into kind cluster...", "unable to load csa into kind cluster", @@ -89,6 +64,7 @@ func csaRun() error { } _, err = cmdRun( + t, exec.Command( "helm", "install", csaHelmName, @@ -114,8 +90,9 @@ func csaRun() error { return nil } -func csaCleanUp() { +func csaCleanUp(t *testing.T) { _, _ = cmdRun( + t, exec.Command( "helm", "uninstall", csaHelmName, @@ -127,16 +104,17 @@ func csaCleanUp() { false, ) - _ = kubeDeleteNamespace(csaHelmName) + _ = kubeDeleteNamespace(nil, csaHelmName) } func csaWaitStatus( + t *testing.T, podNamespace string, podName string, waitMsgContains string, timeoutSecs int, ) (*v1.Pod, podcommon.StatusAnnotation, error) { - fmt.Println(fmt.Sprintf("waiting for csa status '%s' for pod '%s/%s'", waitMsgContains, podNamespace, podName)) + logMessage(t, fmt.Sprintf("waiting for csa status '%s' for pod '%s/%s'", waitMsgContains, podNamespace, podName)) var retPod *v1.Pod retStatusAnn := podcommon.StatusAnnotation{} @@ -154,14 +132,14 @@ func csaWaitStatus( ) } - pod, err := kubeGetPod(podNamespace, podName, true) + pod, err := kubeGetPod(t, podNamespace, podName, true) if err != nil { return retPod, retStatusAnn, err } statusAnnStr, exists := pod.Annotations[podcommon.AnnotationStatus] if !exists { - fmt.Println(fmt.Sprintf("csa status for pod '%s/%s' doesn't yet exist", podNamespace, podName)) + logMessage(t, fmt.Sprintf("csa status for pod '%s/%s' doesn't yet exist", podNamespace, podName)) time.Sleep(csaStatusWaitMillis * time.Millisecond) continue } @@ -174,10 +152,10 @@ func csaWaitStatus( } lastStatusAnnJson = statusAnn.Json() - //fmt.Println(lastStatusAnnJson) + //logMessage(t, fmt.Sprintf("current csa status for pod '%s/%s': %s", podNamespace, podName, lastStatusAnnJson)) if strings.Contains(statusAnn.Status, waitMsgContains) { - // TODO(wt) 'In-place Update of Pod Resources' implementation bug + // TODO(wt) 'In-place Update of Pod Resources' implementation bug (Kube 1.29) // See large comment at top of integration_test.go - need to re-get pod in case resize is restarted. // Remove once fixed. if getAgain { @@ -194,11 +172,12 @@ func csaWaitStatus( time.Sleep(csaStatusWaitMillis * time.Millisecond) } - fmt.Println(fmt.Sprintf("got csa status message '%s' for pod '%s/%s'", waitMsgContains, podNamespace, podName)) + logMessage(t, fmt.Sprintf("got csa status message '%s' for pod '%s/%s'", waitMsgContains, podNamespace, podName)) return retPod, retStatusAnn, nil } func csaWaitStatusAll( + t *testing.T, podNamespace string, podNames []string, waitMsgContains string, @@ -215,7 +194,7 @@ func csaWaitStatusAll( go func() { defer wg.Done() - pod, statusAnn, err := csaWaitStatus(podNamespace, name, waitMsgContains, timeoutSecs) + pod, statusAnn, err := csaWaitStatus(t, podNamespace, name, waitMsgContains, timeoutSecs) mutex.Lock() defer mutex.Unlock() diff --git a/test/integration/echoserver.go b/test/integration/echoserver.go index 5ad3f7d..703b4aa 100644 --- a/test/integration/echoserver.go +++ b/test/integration/echoserver.go @@ -25,20 +25,6 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" ) -const ( - echoServerDockerImageTag = "ealen/echo-server:0.7.0" - echoServerName = "echo-server" -) - -const ( - echoServerNonTargetContainerName = echoServerName + "-non-target" - echoServerNonTargetContainerCpuRequests = "50m" - echoServerNonTargetContainerCpuLimits = "50m" - echoServerNonTargetContainerMemoryRequests = "150M" - echoServerNonTargetContainerMemoryLimits = "150M" - echoServerDefaultProbeInitialDelaySeconds = 20 -) - // Deployment----------------------------------------------------------------------------------------------------------- func echoDeploymentConfigStandardStartup( diff --git a/test/integration/integration_test.go b/test/integration/integration_test.go index 75666d3..af579ae 100644 --- a/test/integration/integration_test.go +++ b/test/integration/integration_test.go @@ -33,7 +33,7 @@ import ( /* -// TODO(wt) 'In-place Update of Pod Resources' implementation bug +// TODO(wt) 'In-place Update of Pod Resources' implementation bug (Kube 1.29) Note: there currently appears to be a bug in the 'In-place Update of Pod Resources' implementation whereby successful resizes are restarted - this is specifically mitigated against within csaWaitStatus(). This sometimes (depending on the timing of retrieving pods via kubectl) manifested in a CSA status that (correctly) stated that the resize had occurred, @@ -178,37 +178,49 @@ Example logs of such an event (restart marked with '<-- HERE'): */ -const ( - defaultTimeoutSecs = 60 -) - var deleteNsPostTest = true func TestMain(m *testing.M) { setStringConfig := func(env string, config *string) { envVal := os.Getenv(env) + + if envVal == "" && *config == "" { + // Require env unless defaulted via supplied. + logMessage(nil, fmt.Sprintf("(config) '%s' value is required", env)) + os.Exit(1) + } + if envVal != "" { *config = envVal } } setBoolConfig := func(env string, config *bool) { envVal := os.Getenv(env) + + if envVal == "" && config == nil { + // Require env unless defaulted via supplied. + logMessage(nil, fmt.Sprintf("(config) '%s' value is required", env)) + os.Exit(1) + } + if envVal != "" { var err error *config, err = strconv.ParseBool(envVal) if err != nil { - fmt.Println("(config)", env, "value is not a bool") + logMessage(nil, fmt.Sprintf("(config) '%s' value is not a bool", env)) os.Exit(1) } } } + kubeVersion := "" maxParallelism := "4" reuseCluster := false installMetricsServer := false keepCsa := false keepCluster := false + setStringConfig("KUBE_VERSION", &kubeVersion) setStringConfig("MAX_PARALLELISM", &maxParallelism) setBoolConfig("REUSE_CLUSTER", &reuseCluster) setBoolConfig("INSTALL_METRICS_SERVER", &installMetricsServer) @@ -216,38 +228,39 @@ func TestMain(m *testing.M) { setBoolConfig("KEEP_CLUSTER", &keepCluster) setBoolConfig("DELETE_NS_AFTER_TEST", &deleteNsPostTest) - fmt.Println("(config) MAX_PARALLELISM:", maxParallelism) - fmt.Println("(config) REUSE_CLUSTER:", reuseCluster) - fmt.Println("(config) INSTALL_METRICS_SERVER:", installMetricsServer) - fmt.Println("(config) KEEP_CSA:", keepCsa) - fmt.Println("(config) KEEP_CLUSTER:", keepCluster) - fmt.Println("(config) DELETE_NS_AFTER_TEST:", deleteNsPostTest) + logMessage(nil, fmt.Sprintf("(config) KUBE_VERSION: %s", kubeVersion)) + logMessage(nil, fmt.Sprintf("(config) MAX_PARALLELISM: %s", maxParallelism)) + logMessage(nil, fmt.Sprintf("(config) REUSE_CLUSTER: %t", reuseCluster)) + logMessage(nil, fmt.Sprintf("(config) INSTALL_METRICS_SERVER: %t", installMetricsServer)) + logMessage(nil, fmt.Sprintf("(config) KEEP_CSA: %t", keepCsa)) + logMessage(nil, fmt.Sprintf("(config) KEEP_CLUSTER: %t", keepCluster)) + logMessage(nil, fmt.Sprintf("(config) DELETE_NS_AFTER_TEST: %t", deleteNsPostTest)) _ = flag.Set("test.parallel", maxParallelism) flag.Parse() if testing.Short() { - fmt.Println("not running because short tests configured") + logMessage(nil, "not running because short tests configured") os.Exit(0) } - kindSetupCluster(reuseCluster, installMetricsServer) - if err := csaRun(); err != nil { + kindSetupCluster(nil, kubeVersion, reuseCluster, installMetricsServer) + if err := csaRun(nil); err != nil { if !keepCsa { - csaCleanUp() + csaCleanUp(nil) } if !keepCluster { - kindCleanUpCluster() + kindCleanUpCluster(nil) } - fmt.Println(err) + logMessage(nil, err) os.Exit(1) } exitVal := m.Run() if !keepCsa { - csaCleanUp() + csaCleanUp(nil) } if !keepCluster { - kindCleanUpCluster() + kindCleanUpCluster(nil) } os.Exit(exitVal) } @@ -375,8 +388,8 @@ func TestDeploymentScaleWhenUnknownResources(t *testing.T) { namespace := "deployment-scale-when-unknown-resources" maybeRegisterCleanup(t, namespace) - _ = kubeDeleteNamespace(namespace) - maybeLogErrAndFailNow(t, kubeCreateNamespace(namespace)) + _ = kubeDeleteNamespace(t, namespace) + maybeLogErrAndFailNow(t, kubeCreateNamespace(t, namespace)) annotations := csaQuantityAnnotations{ cpuStartup: "200m", @@ -396,19 +409,19 @@ func TestDeploymentScaleWhenUnknownResources(t *testing.T) { echoServerDefaultProbeInitialDelaySeconds, ) config.removeReadinessProbes() - maybeLogErrAndFailNow(t, kubeApplyYamlOrJsonResources(config.deploymentJson())) + maybeLogErrAndFailNow(t, kubeApplyYamlOrJsonResources(t, config.deploymentJson())) - names, err := kubeGetPodNames(namespace, echoServerName) + names, err := kubeGetPodNames(t, namespace, echoServerName) maybeLogErrAndFailNow(t, err) - podStatusAnn, errs := csaWaitStatusAll(namespace, names, csaStatusMessageStartupEnacted, defaultTimeoutSecs) + podStatusAnn, errs := csaWaitStatusAll(t, namespace, names, csaStatusMessageStartupEnacted, testsDefaultWaitStatusTimeoutSecs) if len(errs) > 0 { maybeLogErrAndFailNow(t, errs[len(errs)-1]) } assertStartupEnacted(t, annotations, podStatusAnn, true, false, false) - podStatusAnn, errs = csaWaitStatusAll(namespace, names, csaStatusMessagePostStartupEnacted, defaultTimeoutSecs) + podStatusAnn, errs = csaWaitStatusAll(t, namespace, names, csaStatusMessagePostStartupEnacted, testsDefaultWaitStatusTimeoutSecs) if len(errs) > 0 { maybeLogErrAndFailNow(t, errs[len(errs)-1]) } @@ -556,8 +569,8 @@ func TestValidationFailure(t *testing.T) { namespace := "validation-failure" maybeRegisterCleanup(t, namespace) - _ = kubeDeleteNamespace(namespace) - maybeLogErrAndFailNow(t, kubeCreateNamespace(namespace)) + _ = kubeDeleteNamespace(t, namespace) + maybeLogErrAndFailNow(t, kubeCreateNamespace(t, namespace)) annotations := csaQuantityAnnotations{ cpuStartup: "50m", @@ -569,12 +582,12 @@ func TestValidationFailure(t *testing.T) { } config := echoDeploymentConfigStandardStartup(namespace, 2, annotations) - maybeLogErrAndFailNow(t, kubeApplyYamlOrJsonResources(config.deploymentJson())) + maybeLogErrAndFailNow(t, kubeApplyYamlOrJsonResources(t, config.deploymentJson())) - names, err := kubeGetPodNames(namespace, echoServerName) + names, err := kubeGetPodNames(t, namespace, echoServerName) maybeLogErrAndFailNow(t, err) - podStatusAnn, errs := csaWaitStatusAll(namespace, names, csaStatusMessageValidationError, defaultTimeoutSecs) + podStatusAnn, errs := csaWaitStatusAll(t, namespace, names, csaStatusMessageValidationError, testsDefaultWaitStatusTimeoutSecs) if len(errs) > 0 { maybeLogErrAndFailNow(t, errs[len(errs)-1]) } @@ -611,8 +624,8 @@ func testWorkflow( assertStartupEnactedRestartFunc func(*testing.T, csaQuantityAnnotations, map[*v1.Pod]podcommon.StatusAnnotation), assertPostStartupEnactedRestartFunc func(*testing.T, csaQuantityAnnotations, map[*v1.Pod]podcommon.StatusAnnotation), ) { - _ = kubeDeleteNamespace(namespace) - maybeLogErrAndFailNow(t, kubeCreateNamespace(namespace)) + _ = kubeDeleteNamespace(t, namespace) + maybeLogErrAndFailNow(t, kubeCreateNamespace(t, namespace)) annotations := csaQuantityAnnotations{ cpuStartup: "200m", @@ -624,22 +637,22 @@ func testWorkflow( } workloadJson, replicas := workloadJsonReplicasFunc(annotations) - maybeLogErrAndFailNow(t, kubeApplyYamlOrJsonResources(workloadJson)) + maybeLogErrAndFailNow(t, kubeApplyYamlOrJsonResources(t, workloadJson)) - maybeLogErrAndFailNow(t, kubeWaitPodsExist(namespace, echoServerName, replicas, defaultTimeoutSecs)) + maybeLogErrAndFailNow(t, kubeWaitPodsExist(t, namespace, echoServerName, replicas, testsDefaultWaitStatusTimeoutSecs)) - names, err := kubeGetPodNames(namespace, echoServerName) + names, err := kubeGetPodNames(t, namespace, echoServerName) maybeLogErrAndFailNow(t, err) // Startup resources enacted --------------------------------------------------------------------------------------- - podStatusAnn, errs := csaWaitStatusAll(namespace, names, csaStatusMessageStartupEnacted, defaultTimeoutSecs) + podStatusAnn, errs := csaWaitStatusAll(t, namespace, names, csaStatusMessageStartupEnacted, testsDefaultWaitStatusTimeoutSecs) if len(errs) > 0 { maybeLogErrAndFailNow(t, errs[len(errs)-1]) } assertStartupEnactedFunc(t, annotations, podStatusAnn) // Post-startup resources enacted ---------------------------------------------------------------------------------- - podStatusAnn, errs = csaWaitStatusAll(namespace, names, csaStatusMessagePostStartupEnacted, defaultTimeoutSecs) + podStatusAnn, errs = csaWaitStatusAll(t, namespace, names, csaStatusMessagePostStartupEnacted, testsDefaultWaitStatusTimeoutSecs) if len(errs) > 0 { maybeLogErrAndFailNow(t, errs[len(errs)-1]) } @@ -649,18 +662,18 @@ func testWorkflow( for pod := range podStatusAnn { for _, status := range pod.Status.ContainerStatuses { if status.Name == echoServerName { - maybeLogErrAndFailNow(t, kubeCauseContainerRestart(status.ContainerID)) + maybeLogErrAndFailNow(t, kubeCauseContainerRestart(t, status.ContainerID)) } } } - podStatusAnn, errs = csaWaitStatusAll(namespace, names, csaStatusMessageStartupEnacted, defaultTimeoutSecs) + podStatusAnn, errs = csaWaitStatusAll(t, namespace, names, csaStatusMessageStartupEnacted, testsDefaultWaitStatusTimeoutSecs) if len(errs) > 0 { maybeLogErrAndFailNow(t, errs[len(errs)-1]) } assertStartupEnactedRestartFunc(t, annotations, podStatusAnn) - podStatusAnn, errs = csaWaitStatusAll(namespace, names, csaStatusMessagePostStartupEnacted, defaultTimeoutSecs) + podStatusAnn, errs = csaWaitStatusAll(t, namespace, names, csaStatusMessagePostStartupEnacted, testsDefaultWaitStatusTimeoutSecs) if len(errs) > 0 { maybeLogErrAndFailNow(t, errs[len(errs)-1]) } @@ -859,7 +872,7 @@ func assertPostStartupEnacted( func ensureEvents(t *testing.T, reason string, substrs []string, namespace string, names []string) { for _, name := range names { - messages, err := kubeGetEventMessages(namespace, name, reason) + messages, err := kubeGetEventMessages(t, namespace, name, reason) maybeLogErrAndFailNow(t, err) for _, substr := range substrs { @@ -879,14 +892,14 @@ func ensureEvents(t *testing.T, reason string, substrs []string, namespace strin func maybeRegisterCleanup(t *testing.T, namespace string) { if deleteNsPostTest { t.Cleanup(func() { - _ = kubeDeleteNamespace(namespace) + _ = kubeDeleteNamespace(t, namespace) }) } } func maybeLogErrAndFailNow(t *testing.T, err error) { if err != nil { - t.Log(err) + logMessage(t, err) t.FailNow() } } diff --git a/test/integration/kind.go b/test/integration/kind.go index 622021f..ad1ffc4 100644 --- a/test/integration/kind.go +++ b/test/integration/kind.go @@ -22,23 +22,11 @@ import ( "os/exec" "runtime" "strings" + "testing" "github.com/ExpediaGroup/container-startup-autoscaler/internal/common" ) -const ( - kindClusterName = "csa-int-cluster" - kindNodeImagex8664 = "kindest/node:v1.29.0@sha256:54a50c9354f11ce0aa56a85d2cacb1b950f85eab3fe1caf988826d1f89bf37eb" - kindNodeImageArm64 = "kindest/node:v1.29.0@sha256:8ccbd8bc4d52c467f3c79eeeb434827c225600a1d7385a4b1c19d9e038c9e0c0" - kindConfigFileRelPath = configDirRelPath + pathSeparator + "kind.yaml" -) - -const ( - metricsServerImageTag = "registry.k8s.io/metrics-server/metrics-server:v0.6.4" - metricsServerKustomizeDirRelPath = configDirRelPath + pathSeparator + "metricsserver" - metricsServerReadyTimeout = "60s" -) - var kindKubeconfig string func init() { @@ -50,10 +38,11 @@ func init() { kindKubeconfig = fmt.Sprintf("%s%s.kube%sconfig-%s", home, pathSeparator, pathSeparator, kindClusterName) } -func kindSetupCluster(reuseCluster bool, installMetricsServer bool) { +func kindSetupCluster(t *testing.T, kubeVersion string, reuseCluster, installMetricsServer bool) { hasExistingCluster := false output, _ := cmdRun( + t, exec.Command("kind", "get", "clusters"), "getting existing kind clusters...", "unable to get existing kind clusters", @@ -70,22 +59,18 @@ func kindSetupCluster(reuseCluster bool, installMetricsServer bool) { if !reuseCluster || !hasExistingCluster { if hasExistingCluster { - kindCleanUpCluster() + kindCleanUpCluster(t) } - var kindNodeImage string - - switch runtime.GOARCH { - case "amd64": - kindNodeImage = kindNodeImagex8664 - case "arm64": - kindNodeImage = kindNodeImageArm64 - default: - fmt.Println(fmt.Errorf("architecture '%s' not supported", runtime.GOARCH)) + kindNodeImage, err := kindImageFromKubeVersion(kubeVersion, runtime.GOARCH) + if err != nil { + logMessage(t, common.WrapErrorf(err, "unable to obtain kind image")) os.Exit(1) } + logMessage(t, fmt.Sprintf("using kind node image '%s'", kindNodeImage)) _, _ = cmdRun( + t, exec.Command("kind", "create", "cluster", "--name", kindClusterName, "--config", pathAbsFromRel(kindConfigFileRelPath), @@ -98,6 +83,7 @@ func kindSetupCluster(reuseCluster bool, installMetricsServer bool) { } output, _ = cmdRun( + t, exec.Command("kind", "get", "kubeconfig", "--name", kindClusterName), "getting kind kubeconfig...", "unable to get kind kubeconfig", @@ -105,17 +91,18 @@ func kindSetupCluster(reuseCluster bool, installMetricsServer bool) { ) if err := os.WriteFile(kindKubeconfig, []byte(output), 0644); err != nil { - fmt.Println(common.WrapErrorf(err, "unable to write kubeconfig")) + logMessage(t, common.WrapErrorf(err, "unable to write kubeconfig")) os.Exit(1) } - if err := kubePrintNodeInfo(); err != nil { - fmt.Println(common.WrapErrorf(err, "unable to print kube node info")) + if err := kubePrintNodeInfo(t); err != nil { + logMessage(t, common.WrapErrorf(err, "unable to print kube node info")) os.Exit(1) } if installMetricsServer { _, _ = cmdRun( + t, exec.Command("docker", "pull", metricsServerImageTag), "pulling metrics-server...", "unable to pull metrics-server", @@ -123,25 +110,27 @@ func kindSetupCluster(reuseCluster bool, installMetricsServer bool) { ) _, _ = cmdRun( + t, exec.Command("kind", "load", "docker-image", metricsServerImageTag, "--name", kindClusterName), "loading metrics-server into kind cluster...", "unable to load metrics-server into kind cluster", true, ) - if err := kubeApplyKustomizeResources(pathAbsFromRel(metricsServerKustomizeDirRelPath)); err != nil { - fmt.Println(err) + if err := kubeApplyKustomizeResources(t, pathAbsFromRel(metricsServerKustomizeDirRelPath)); err != nil { + logMessage(t, err) os.Exit(1) } - err := kubeWaitResourceCondition("kube-system", "k8s-app=metrics-server", "pod", "ready", metricsServerReadyTimeout) + err := kubeWaitResourceCondition(t, "kube-system", "k8s-app=metrics-server", "pod", "ready", metricsServerReadyTimeout) if err != nil { - fmt.Println(err) + logMessage(t, err) os.Exit(1) } } _, _ = cmdRun( + t, exec.Command("docker", "pull", echoServerDockerImageTag), "pulling echo-service...", "unable to pull echo-service", @@ -149,6 +138,7 @@ func kindSetupCluster(reuseCluster bool, installMetricsServer bool) { ) _, _ = cmdRun( + t, exec.Command("kind", "load", "docker-image", echoServerDockerImageTag, "--name", kindClusterName), "loading echo-service into kind cluster...", "unable to load echo-service into kind cluster", @@ -156,11 +146,23 @@ func kindSetupCluster(reuseCluster bool, installMetricsServer bool) { ) } -func kindCleanUpCluster() { +func kindCleanUpCluster(t *testing.T) { _, _ = cmdRun( + t, exec.Command("kind", "delete", "cluster", "--name", kindClusterName), "deleting existing kind cluster...", "unable to delete existing kind cluster", false, ) } + +func kindImageFromKubeVersion(kubeVersion, arch string) (string, error) { + if archMap, found := k8sVersionToImage[kubeVersion]; found { + if image, archFound := archMap[arch]; archFound { + return image, nil + } + return "", fmt.Errorf("architecture '%s' not supported", arch) + } + + return "", fmt.Errorf("kube version %s not supported", kubeVersion) +} diff --git a/test/integration/kube.go b/test/integration/kube.go index 7be203c..d2fa070 100644 --- a/test/integration/kube.go +++ b/test/integration/kube.go @@ -21,13 +21,15 @@ import ( "fmt" "os/exec" "strings" + "testing" "time" - v1 "k8s.io/api/core/v1" + "k8s.io/api/core/v1" ) -func kubePrintNodeInfo() error { +func kubePrintNodeInfo(t *testing.T) error { output, err := cmdRun( + t, exec.Command("kubectl", "describe", "nodes", "--kubeconfig", kindKubeconfig), "", "unable to describe nodes", @@ -38,13 +40,14 @@ func kubePrintNodeInfo() error { return err } - fmt.Println("node information:") - fmt.Println(output) + logMessage(t, "node information:") + logMessage(t, output) return nil } -func kubeCreateNamespace(name string) error { +func kubeCreateNamespace(t *testing.T, name string) error { _, err := cmdRun( + t, exec.Command("kubectl", "create", "namespace", name, "--kubeconfig", kindKubeconfig), fmt.Sprintf("creating namespace '%s'...", name), fmt.Sprintf("unable to create namespace '%s'", name), @@ -53,8 +56,9 @@ func kubeCreateNamespace(name string) error { return err } -func kubeDeleteNamespace(name string) error { +func kubeDeleteNamespace(t *testing.T, name string) error { _, err := cmdRun( + t, exec.Command("kubectl", "delete", "namespace", name, "--kubeconfig", kindKubeconfig), fmt.Sprintf("deleting namespace '%s'...", name), fmt.Sprintf("unable to delete namespace '%s'", name), @@ -63,10 +67,11 @@ func kubeDeleteNamespace(name string) error { return err } -func kubeApplyYamlOrJsonResources(yamlOrJson string) error { +func kubeApplyYamlOrJsonResources(t *testing.T, yamlOrJson string) error { cmd := exec.Command("kubectl", "apply", "-f", "-", "--kubeconfig", kindKubeconfig) cmd.Stdin = strings.NewReader(yamlOrJson) _, err := cmdRun( + t, cmd, fmt.Sprintf("applying resources '%s'...", yamlOrJson), fmt.Sprintf("unable to apply resources '%s'", yamlOrJson), @@ -75,8 +80,9 @@ func kubeApplyYamlOrJsonResources(yamlOrJson string) error { return err } -func kubeApplyKustomizeResources(kPath string) error { +func kubeApplyKustomizeResources(t *testing.T, kPath string) error { _, err := cmdRun( + t, exec.Command("kubectl", "apply", "-k", kPath, "--kubeconfig", kindKubeconfig), fmt.Sprintf("applying kustomize resources from '%s'...", kPath), fmt.Sprintf("unable to apply kustomize resources from '%s'...", kPath), @@ -85,8 +91,9 @@ func kubeApplyKustomizeResources(kPath string) error { return err } -func kubeGetPodNames(namespace string, nameContains string, suppressInfo ...bool) ([]string, error) { +func kubeGetPodNames(t *testing.T, namespace string, nameContains string, suppressInfo ...bool) ([]string, error) { output, err := cmdRun( + t, exec.Command( "kubectl", "get", "pods", "-n", namespace, @@ -113,8 +120,9 @@ func kubeGetPodNames(namespace string, nameContains string, suppressInfo ...bool return ret, nil } -func kubeGetPod(namespace string, name string, suppressInfo ...bool) (*v1.Pod, error) { +func kubeGetPod(t *testing.T, namespace string, name string, suppressInfo ...bool) (*v1.Pod, error) { output, err := cmdRun( + t, exec.Command( "kubectl", "get", "pod", name, @@ -136,8 +144,8 @@ func kubeGetPod(namespace string, name string, suppressInfo ...bool) (*v1.Pod, e return pod, err } -func kubeWaitPodsExist(namespace string, nameContains string, count int, timeoutSecs int) error { - fmt.Println(fmt.Sprintf( +func kubeWaitPodsExist(t *testing.T, namespace string, nameContains string, count int, timeoutSecs int) error { + logMessage(t, fmt.Sprintf( "waiting for %d pods (pod name contains '%s') to exist in namespace '%s'", count, nameContains, namespace, )) @@ -151,7 +159,7 @@ func kubeWaitPodsExist(namespace string, nameContains string, count int, timeout ) } - pods, err := kubeGetPodNames(namespace, nameContains, true) + pods, err := kubeGetPodNames(t, namespace, nameContains, true) if err != nil { return nil } @@ -163,7 +171,7 @@ func kubeWaitPodsExist(namespace string, nameContains string, count int, timeout time.Sleep(csaStatusWaitMillis * time.Millisecond) } - fmt.Println(fmt.Sprintf( + logMessage(t, fmt.Sprintf( "%d pods (pod name contains '%s') now exist in namespace '%s'", count, nameContains, namespace, )) @@ -172,6 +180,7 @@ func kubeWaitPodsExist(namespace string, nameContains string, count int, timeout } func kubeWaitResourceCondition( + t *testing.T, namespace string, label string, resource string, @@ -179,6 +188,7 @@ func kubeWaitResourceCondition( timeout string, ) error { _, err := cmdRun( + t, exec.Command( "kubectl", "wait", @@ -202,8 +212,9 @@ func kubeWaitResourceCondition( return err } -func kubeGetEventMessages(namespace string, podName string, reason string) ([]string, error) { +func kubeGetEventMessages(t *testing.T, namespace string, podName string, reason string) ([]string, error) { output, err := cmdRun( + t, exec.Command( "kubectl", "get", "events", "-n", namespace, @@ -223,10 +234,11 @@ func kubeGetEventMessages(namespace string, podName string, reason string) ([]st return strings.Split(output, "\n"), nil } -func kubeCauseContainerRestart(containerId string) error { +func kubeCauseContainerRestart(t *testing.T, containerId string) error { fixedContainerId := strings.ReplaceAll(containerId, "containerd://", "") _, err := cmdRun( + t, exec.Command( "docker", "exec", "-i", kindClusterName+"-control-plane", "bash", "-c", "ctr -n k8s.io task kill -s SIGTERM "+fixedContainerId, diff --git a/test/integration/log.go b/test/integration/log.go new file mode 100644 index 0000000..c361965 --- /dev/null +++ b/test/integration/log.go @@ -0,0 +1,18 @@ +package integration + +import ( + "fmt" + "testing" + "time" +) + +func logMessage(t *testing.T, log any) { + prefix := fmt.Sprintf("[%s]:", time.Now().Format(time.RFC3339Nano)) + + if t != nil { + t.Log(prefix, log) + return + } + + fmt.Println(prefix, log) +} diff --git a/test/integration/path.go b/test/integration/path.go index 4e3ef09..17ac1b8 100644 --- a/test/integration/path.go +++ b/test/integration/path.go @@ -17,17 +17,10 @@ limitations under the License. package integration import ( - "os" "path/filepath" "runtime" ) -const ( - pathSeparator = string(os.PathSeparator) - intTestRelPath = "test" + pathSeparator + "integration" - configDirRelPath = intTestRelPath + pathSeparator + "config" -) - var rootAbsPath string func init() { diff --git a/test/integration/settings.go b/test/integration/settings.go new file mode 100644 index 0000000..0dfa570 --- /dev/null +++ b/test/integration/settings.go @@ -0,0 +1,88 @@ +package integration + +import "os" + +// Path +const ( + pathSeparator = string(os.PathSeparator) + pathIntTestRelPath = "test" + pathSeparator + "integration" + pathConfigDirRelPath = pathIntTestRelPath + pathSeparator + "config" +) + +// kind ---------------------------------------------------------------------------------------------------------------- +const ( + kindClusterName = "csa-int-cluster" + kindConfigFileRelPath = pathConfigDirRelPath + pathSeparator + "kind.yaml" +) + +var k8sVersionToImage = map[string]map[string]string{ + "1.29": { + "amd64": "kindest/node:v1.29.8@sha256:b69a150f9951ef41158ec76de381a920df2be3582fd16fc19cf4757eef0dded9", + "arm64": "kindest/node:v1.29.8@sha256:0d5623800cf6290edbc1007ca8a33a5f7e2ad92b41dc7022b4d20a66447db23c", + }, + "1.30": { + "amd64": "kindest/node:v1.30.4@sha256:34cb98a38a57a3357fde925a41d61232bbbbeb411b45a25c0d766635d6c3b975", + "arm64": "kindest/node:v1.30.4@sha256:6becd630a18e77730e31f3833f0b129bbcc9c09ee49c3b88429b3c1fdc30bfc4", + }, + "1.31": { + "amd64": "kindest/node:v1.31.0@sha256:919a65376fd11b67df05caa2e60802ad5de2fca250c9fe0c55b0dce5c9591af3", + "arm64": "kindest/node:v1.31.0@sha256:0ccfb11dc66eae4abc20c30ee95687bab51de8aeb04e325e1c49af0890646548", + }, +} + +// metrics-server ------------------------------------------------------------------------------------------------------ +const ( + metricsServerImageTag = "registry.k8s.io/metrics-server/metrics-server:v0.6.4" + metricsServerKustomizeDirRelPath = pathConfigDirRelPath + pathSeparator + "metricsserver" + metricsServerReadyTimeout = "60s" +) + +// CSA ----------------------------------------------------------------------------------------------------------------- + +const ( + csaDockerImage = "csa" + csaDockerTag = "test" + csaDockerImageTag = csaDockerImage + ":" + csaDockerTag +) + +const ( + csaHelmChartRelPath = "charts" + pathSeparator + "container-startup-autoscaler" + csaHelmName = "csa-int" + csaHelmTimeout = "60s" +) + +const ( + csaStatusWaitMillis = 500 + csaStatusMessageStartupCommanded = "Startup resources commanded" + csaStatusMessageStartupCommandedUnknownRes = "Startup resources commanded (unknown resources applied)" + csaStatusMessagePostStartupCommanded = "Post-startup resources commanded" + csaStatusMessagePostStartupCommandedUnknownRes = "Post-startup resources commanded (unknown resources applied)" + csaStatusMessageStartupEnacted = "Startup resources enacted" + csaStatusMessagePostStartupEnacted = "Post-startup resources enacted" + csaStatusMessageValidationError = "Validation error" +) + +const ( + csaEventReasonScaling = "Scaling" + csaEventReasonValidation = "Validation" +) + +// echo-server --------------------------------------------------------------------------------------------------------- +const ( + echoServerDockerImageTag = "ealen/echo-server:0.7.0" + echoServerName = "echo-server" +) + +const ( + echoServerNonTargetContainerName = echoServerName + "-non-target" + echoServerNonTargetContainerCpuRequests = "50m" + echoServerNonTargetContainerCpuLimits = "50m" + echoServerNonTargetContainerMemoryRequests = "150M" + echoServerNonTargetContainerMemoryLimits = "150M" + echoServerDefaultProbeInitialDelaySeconds = 120 // TODO(wt) enacting resources can sometimes take Kube upwards of 90s (Kube 1.29). Reduce this when addressed. +) + +// Tests --------------------------------------------------------------------------------------------------------------- +const ( + testsDefaultWaitStatusTimeoutSecs = echoServerDefaultProbeInitialDelaySeconds + 30 +) From 8f4f4027363efb3e43e1d973063eb92824da0355 Mon Sep 17 00:00:00 2001 From: witomlin <76996781+witomlin@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:28:01 +0100 Subject: [PATCH 2/8] Adjust tests timeout --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5b804d8..5aa77b2 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ SHELL:=/bin/bash ROOT_DIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST)))) -INT_TESTS_TIMEOUT=30m +INT_TESTS_TIMEOUT=60m HELM_TESTS_SNAPSHOT_DIR=${ROOT_DIR}charts/container-startup-autoscaler/tests/__snapshot__ .PHONY: help From 861c159c097a17cdb2ef5ad07a124d8f88ababb8 Mon Sep 17 00:00:00 2001 From: witomlin <76996781+witomlin@users.noreply.github.com> Date: Mon, 30 Sep 2024 12:45:27 +0100 Subject: [PATCH 3/8] More integration test enhancements --- .github/workflows/tests.yaml | 2 +- test/integration/config/kind.yaml | 7 ++ test/integration/{settings.go => consts.go} | 30 +++++-- test/integration/csa.go | 2 +- test/integration/echoserver.go | 8 +- test/integration/integration_test.go | 73 ++-------------- test/integration/kind.go | 8 +- test/integration/log.go | 16 ++++ test/integration/suppliedconfig.go | 94 +++++++++++++++++++++ 9 files changed, 160 insertions(+), 80 deletions(-) rename test/integration/{settings.go => consts.go} (78%) create mode 100644 test/integration/suppliedconfig.go diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 2584290..cba0569 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -47,7 +47,7 @@ jobs: - name: Test env: - MAX_PARALLELISM: 1 # Constrained to 2 CPUs on ubuntu-latest + MAX_PARALLELISM: 2 # Constrained to 2 CPUs on ubuntu-latest run: make test-run-int-verbose KUBE_VERSION=1.29 # TODO(wt) test against potentially many versions helm: diff --git a/test/integration/config/kind.yaml b/test/integration/config/kind.yaml index 372629e..1b92534 100644 --- a/test/integration/config/kind.yaml +++ b/test/integration/config/kind.yaml @@ -16,3 +16,10 @@ kind: Cluster apiVersion: kind.x-k8s.io/v1alpha4 featureGates: InPlacePodVerticalScaling: true +nodes: + - role: control-plane + kubeadmConfigPatches: # TODO(wt) temporary to work around https://github.com/kubernetes/kubernetes/issues/112264 + - | + apiVersion: kubelet.config.k8s.io/v1beta1 + kind: KubeletConfiguration + syncFrequency: "3s" \ No newline at end of file diff --git a/test/integration/settings.go b/test/integration/consts.go similarity index 78% rename from test/integration/settings.go rename to test/integration/consts.go index 0dfa570..bf19fbc 100644 --- a/test/integration/settings.go +++ b/test/integration/consts.go @@ -1,8 +1,24 @@ +/* +Copyright 2024 Expedia Group, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package integration import "os" -// Path +// Path ---------------------------------------------------------------------------------------------------------------- const ( pathSeparator = string(os.PathSeparator) pathIntTestRelPath = "test" + pathSeparator + "integration" @@ -38,7 +54,6 @@ const ( ) // CSA ----------------------------------------------------------------------------------------------------------------- - const ( csaDockerImage = "csa" csaDockerTag = "test" @@ -52,7 +67,7 @@ const ( ) const ( - csaStatusWaitMillis = 500 + csaStatusWaitMillis = 1000 csaStatusMessageStartupCommanded = "Startup resources commanded" csaStatusMessageStartupCommandedUnknownRes = "Startup resources commanded (unknown resources applied)" csaStatusMessagePostStartupCommanded = "Post-startup resources commanded" @@ -79,10 +94,15 @@ const ( echoServerNonTargetContainerCpuLimits = "50m" echoServerNonTargetContainerMemoryRequests = "150M" echoServerNonTargetContainerMemoryLimits = "150M" - echoServerDefaultProbeInitialDelaySeconds = 120 // TODO(wt) enacting resources can sometimes take Kube upwards of 90s (Kube 1.29). Reduce this when addressed. +) + +const ( + echoServerDefaultProbeInitialDelaySeconds = 15 + echoServerProbePeriodSeconds = 1 + echoServerProbeFailureThreshold = echoServerDefaultProbeInitialDelaySeconds ) // Tests --------------------------------------------------------------------------------------------------------------- const ( - testsDefaultWaitStatusTimeoutSecs = echoServerDefaultProbeInitialDelaySeconds + 30 + testsDefaultWaitStatusTimeoutSecs = echoServerDefaultProbeInitialDelaySeconds * 2 ) diff --git a/test/integration/csa.go b/test/integration/csa.go index 54787fc..cf861a0 100644 --- a/test/integration/csa.go +++ b/test/integration/csa.go @@ -152,7 +152,7 @@ func csaWaitStatus( } lastStatusAnnJson = statusAnn.Json() - //logMessage(t, fmt.Sprintf("current csa status for pod '%s/%s': %s", podNamespace, podName, lastStatusAnnJson)) + logMessage(t, fmt.Sprintf("current csa status for pod '%s/%s': %s", podNamespace, podName, lastStatusAnnJson)) if strings.Contains(statusAnn.Status, waitMsgContains) { // TODO(wt) 'In-place Update of Pod Resources' implementation bug (Kube 1.29) diff --git a/test/integration/echoserver.go b/test/integration/echoserver.go index 703b4aa..b545edb 100644 --- a/test/integration/echoserver.go +++ b/test/integration/echoserver.go @@ -239,8 +239,8 @@ func echoContainerConfigStandard( }, }, InitialDelaySeconds: probesInitialDelaySeconds, - PeriodSeconds: 5, - FailureThreshold: 2, + PeriodSeconds: echoServerProbePeriodSeconds, + FailureThreshold: echoServerProbeFailureThreshold, }, readinessProbe: &v1.Probe{ ProbeHandler: v1.ProbeHandler{ @@ -253,8 +253,8 @@ func echoContainerConfigStandard( }, }, InitialDelaySeconds: probesInitialDelaySeconds, - PeriodSeconds: 5, - FailureThreshold: 2, + PeriodSeconds: echoServerProbePeriodSeconds, + FailureThreshold: echoServerProbeFailureThreshold, }, } } diff --git a/test/integration/integration_test.go b/test/integration/integration_test.go index af579ae..2212fef 100644 --- a/test/integration/integration_test.go +++ b/test/integration/integration_test.go @@ -19,9 +19,7 @@ package integration import ( "errors" "flag" - "fmt" "os" - "strconv" "strings" "testing" @@ -178,77 +176,22 @@ Example logs of such an event (restart marked with '<-- HERE'): */ -var deleteNsPostTest = true - func TestMain(m *testing.M) { - setStringConfig := func(env string, config *string) { - envVal := os.Getenv(env) - - if envVal == "" && *config == "" { - // Require env unless defaulted via supplied. - logMessage(nil, fmt.Sprintf("(config) '%s' value is required", env)) - os.Exit(1) - } - - if envVal != "" { - *config = envVal - } - } - setBoolConfig := func(env string, config *bool) { - envVal := os.Getenv(env) - - if envVal == "" && config == nil { - // Require env unless defaulted via supplied. - logMessage(nil, fmt.Sprintf("(config) '%s' value is required", env)) - os.Exit(1) - } - - if envVal != "" { - var err error - *config, err = strconv.ParseBool(envVal) - if err != nil { - logMessage(nil, fmt.Sprintf("(config) '%s' value is not a bool", env)) - os.Exit(1) - } - } - } + suppliedConfigInit() - kubeVersion := "" - maxParallelism := "4" - reuseCluster := false - installMetricsServer := false - keepCsa := false - keepCluster := false - - setStringConfig("KUBE_VERSION", &kubeVersion) - setStringConfig("MAX_PARALLELISM", &maxParallelism) - setBoolConfig("REUSE_CLUSTER", &reuseCluster) - setBoolConfig("INSTALL_METRICS_SERVER", &installMetricsServer) - setBoolConfig("KEEP_CSA", &keepCsa) - setBoolConfig("KEEP_CLUSTER", &keepCluster) - setBoolConfig("DELETE_NS_AFTER_TEST", &deleteNsPostTest) - - logMessage(nil, fmt.Sprintf("(config) KUBE_VERSION: %s", kubeVersion)) - logMessage(nil, fmt.Sprintf("(config) MAX_PARALLELISM: %s", maxParallelism)) - logMessage(nil, fmt.Sprintf("(config) REUSE_CLUSTER: %t", reuseCluster)) - logMessage(nil, fmt.Sprintf("(config) INSTALL_METRICS_SERVER: %t", installMetricsServer)) - logMessage(nil, fmt.Sprintf("(config) KEEP_CSA: %t", keepCsa)) - logMessage(nil, fmt.Sprintf("(config) KEEP_CLUSTER: %t", keepCluster)) - logMessage(nil, fmt.Sprintf("(config) DELETE_NS_AFTER_TEST: %t", deleteNsPostTest)) - - _ = flag.Set("test.parallel", maxParallelism) + _ = flag.Set("test.parallel", suppliedConfig.maxParallelism) flag.Parse() if testing.Short() { logMessage(nil, "not running because short tests configured") os.Exit(0) } - kindSetupCluster(nil, kubeVersion, reuseCluster, installMetricsServer) + kindSetupCluster(nil) if err := csaRun(nil); err != nil { - if !keepCsa { + if !suppliedConfig.keepCsa { csaCleanUp(nil) } - if !keepCluster { + if !suppliedConfig.keepCluster { kindCleanUpCluster(nil) } logMessage(nil, err) @@ -256,10 +199,10 @@ func TestMain(m *testing.M) { } exitVal := m.Run() - if !keepCsa { + if !suppliedConfig.keepCsa { csaCleanUp(nil) } - if !keepCluster { + if !suppliedConfig.keepCluster { kindCleanUpCluster(nil) } os.Exit(exitVal) @@ -890,7 +833,7 @@ func ensureEvents(t *testing.T, reason string, substrs []string, namespace strin } func maybeRegisterCleanup(t *testing.T, namespace string) { - if deleteNsPostTest { + if suppliedConfig.deleteNsPostTest { t.Cleanup(func() { _ = kubeDeleteNamespace(t, namespace) }) diff --git a/test/integration/kind.go b/test/integration/kind.go index ad1ffc4..eb51742 100644 --- a/test/integration/kind.go +++ b/test/integration/kind.go @@ -38,7 +38,7 @@ func init() { kindKubeconfig = fmt.Sprintf("%s%s.kube%sconfig-%s", home, pathSeparator, pathSeparator, kindClusterName) } -func kindSetupCluster(t *testing.T, kubeVersion string, reuseCluster, installMetricsServer bool) { +func kindSetupCluster(t *testing.T) { hasExistingCluster := false output, _ := cmdRun( @@ -57,12 +57,12 @@ func kindSetupCluster(t *testing.T, kubeVersion string, reuseCluster, installMet } } - if !reuseCluster || !hasExistingCluster { + if !suppliedConfig.reuseCluster || !hasExistingCluster { if hasExistingCluster { kindCleanUpCluster(t) } - kindNodeImage, err := kindImageFromKubeVersion(kubeVersion, runtime.GOARCH) + kindNodeImage, err := kindImageFromKubeVersion(suppliedConfig.kubeVersion, runtime.GOARCH) if err != nil { logMessage(t, common.WrapErrorf(err, "unable to obtain kind image")) os.Exit(1) @@ -100,7 +100,7 @@ func kindSetupCluster(t *testing.T, kubeVersion string, reuseCluster, installMet os.Exit(1) } - if installMetricsServer { + if suppliedConfig.installMetricsServer { _, _ = cmdRun( t, exec.Command("docker", "pull", metricsServerImageTag), diff --git a/test/integration/log.go b/test/integration/log.go index c361965..457c2b9 100644 --- a/test/integration/log.go +++ b/test/integration/log.go @@ -1,3 +1,19 @@ +/* +Copyright 2024 Expedia Group, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package integration import ( diff --git a/test/integration/suppliedconfig.go b/test/integration/suppliedconfig.go new file mode 100644 index 0000000..a51ef23 --- /dev/null +++ b/test/integration/suppliedconfig.go @@ -0,0 +1,94 @@ +/* +Copyright 2024 Expedia Group, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "fmt" + "os" + "strconv" +) + +type suppliedConfigStruct struct { + kubeVersion string + maxParallelism string + reuseCluster bool + installMetricsServer bool + keepCsa bool + keepCluster bool + deleteNsPostTest bool +} + +var suppliedConfig = suppliedConfigStruct{ + kubeVersion: "", + maxParallelism: "4", + reuseCluster: false, + installMetricsServer: false, + keepCsa: false, + keepCluster: false, + deleteNsPostTest: true, +} + +func suppliedConfigInit() { + suppliedConfigSetString("KUBE_VERSION", &suppliedConfig.kubeVersion) + suppliedConfigSetString("MAX_PARALLELISM", &suppliedConfig.maxParallelism) + suppliedConfigSetBool("REUSE_CLUSTER", &suppliedConfig.reuseCluster) + suppliedConfigSetBool("INSTALL_METRICS_SERVER", &suppliedConfig.installMetricsServer) + suppliedConfigSetBool("KEEP_CSA", &suppliedConfig.keepCsa) + suppliedConfigSetBool("KEEP_CLUSTER", &suppliedConfig.keepCluster) + suppliedConfigSetBool("DELETE_NS_AFTER_TEST", &suppliedConfig.deleteNsPostTest) + + logMessage(nil, fmt.Sprintf("(config) KUBE_VERSION: %s", suppliedConfig.kubeVersion)) + logMessage(nil, fmt.Sprintf("(config) MAX_PARALLELISM: %s", suppliedConfig.maxParallelism)) + logMessage(nil, fmt.Sprintf("(config) REUSE_CLUSTER: %t", suppliedConfig.reuseCluster)) + logMessage(nil, fmt.Sprintf("(config) INSTALL_METRICS_SERVER: %t", suppliedConfig.installMetricsServer)) + logMessage(nil, fmt.Sprintf("(config) KEEP_CSA: %t", suppliedConfig.keepCsa)) + logMessage(nil, fmt.Sprintf("(config) KEEP_CLUSTER: %t", suppliedConfig.keepCluster)) + logMessage(nil, fmt.Sprintf("(config) DELETE_NS_AFTER_TEST: %t", suppliedConfig.deleteNsPostTest)) +} + +func suppliedConfigSetString(env string, config *string) { + envVal := os.Getenv(env) + + if envVal == "" && *config == "" { + // Require env unless defaulted via supplied. + logMessage(nil, fmt.Sprintf("(config) '%s' value is required", env)) + os.Exit(1) + } + + if envVal != "" { + *config = envVal + } +} + +func suppliedConfigSetBool(env string, config *bool) { + envVal := os.Getenv(env) + + if envVal == "" && config == nil { + // Require env unless defaulted via supplied. + logMessage(nil, fmt.Sprintf("(config) '%s' value is required", env)) + os.Exit(1) + } + + if envVal != "" { + var err error + *config, err = strconv.ParseBool(envVal) + if err != nil { + logMessage(nil, fmt.Sprintf("(config) '%s' value is not a bool", env)) + os.Exit(1) + } + } +} From 2f7c189dc152086885721f4c62713a6537e156b9 Mon Sep 17 00:00:00 2001 From: witomlin <76996781+witomlin@users.noreply.github.com> Date: Mon, 30 Sep 2024 13:24:08 +0100 Subject: [PATCH 4/8] Update docs and introduce multi-Kube version integration tests into GHA --- .github/workflows/tests.yaml | 11 +++++++++-- CHANGELOG.md | 3 ++- README.md | 4 ++-- scripts/sandbox/config/vars.sh | 4 ++-- test/integration/consts.go | 16 ++++++++++------ 5 files changed, 25 insertions(+), 13 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index cba0569..7ea7117 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -26,6 +26,13 @@ jobs: integration: name: Run integration tests runs-on: ubuntu-latest + strategy: + matrix: + arg: + - '1.28' + - '1.29' + - '1.30' + - '1.31' steps: - name: Checkout repository @@ -45,10 +52,10 @@ jobs: version: v0.20.0 kubectl_version: v1.27.2 - - name: Test + - name: Test with Kubernetes ${{ matrix.arg }} env: MAX_PARALLELISM: 2 # Constrained to 2 CPUs on ubuntu-latest - run: make test-run-int-verbose KUBE_VERSION=1.29 # TODO(wt) test against potentially many versions + run: make test-run-int-verbose KUBE_VERSION=${{ matrix.arg }} helm: name: Run Helm tests diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f56b34..00b0635 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ ### Kubernetes Compatibility | Kube Version | Compatible? | `In-place Update of Pod Resources` Maturity | |:------------:|:-----------:|:-------------------------------------------:| +| 1.31 | ✔️ | Alpha | +| 1.30 | ✔️ | Alpha | | 1.29 | ✔️ | Alpha | | 1.28 | ✔️ | Alpha | | 1.27 | ❌ | Alpha | @@ -49,4 +51,3 @@ | 1.29 | ✔️ | Alpha | | 1.28 | ✔️ | Alpha | | 1.27 | ❌ | Alpha | - diff --git a/README.md b/README.md index 691e9b8..4eea12a 100644 --- a/README.md +++ b/README.md @@ -519,7 +519,7 @@ A number of environment variable-based configuration options are available: | Name | Default | Description | |--------------------------|---------|--------------------------------------------------------------------------------------------------------------------------------------| -| `KUBE_VERSION` | - | The _major.minor_ version of Kube to run tests against e.g. `1.29`. | +| `KUBE_VERSION` | - | The _major.minor_ version of Kube to run tests against e.g. `1.31`. | | `MAX_PARALLELISM` | `4` | The maximum number of tests that can run in parallel. | | `REUSE_CLUSTER` | `false` | Whether to reuse an existing CSA kind cluster (if it already exists). `KUBE_VERSION` has no effect if an existing cluster is reused. | | `INSTALL_METRICS_SERVER` | `false` | Whether to install metrics-server. | @@ -543,7 +543,7 @@ exist in parallel, if desired. ### Cluster/CSA Installation Executing `csa-install.sh`: - Removes any pre-existing CSA kind cluster. -- Installs a CSA kind cluster. +- Installs a CSA kind cluster with the latest version of Kubernetes [certified as compatible with CSA](CHANGELOG.md). - Creates a new, separate CSA kind cluster kubeconfig file under `$HOME/.kube/`. - Pulls metrics-server, loads the image into the CSA kind cluster and installs. - Pulls echo-server and loads the image into the CSA kind cluster. diff --git a/scripts/sandbox/config/vars.sh b/scripts/sandbox/config/vars.sh index 1034632..29fc9cd 100644 --- a/scripts/sandbox/config/vars.sh +++ b/scripts/sandbox/config/vars.sh @@ -22,11 +22,11 @@ arch=$(uname -m) case $arch in x86_64) # shellcheck disable=SC2034 - kind_image="kindest/node:v1.29.0@sha256:54a50c9354f11ce0aa56a85d2cacb1b950f85eab3fe1caf988826d1f89bf37eb" + kind_image="kindest/node:v1.31.0@sha256:919a65376fd11b67df05caa2e60802ad5de2fca250c9fe0c55b0dce5c9591af3" ;; arm64) # shellcheck disable=SC2034 - kind_image="kindest/node:v1.29.0@sha256:8ccbd8bc4d52c467f3c79eeeb434827c225600a1d7385a4b1c19d9e038c9e0c0" + kind_image="kindest/node:v1.31.0@sha256:0ccfb11dc66eae4abc20c30ee95687bab51de8aeb04e325e1c49af0890646548" ;; *) echo "Error: architecture '$arch' not supported" diff --git a/test/integration/consts.go b/test/integration/consts.go index bf19fbc..2c6beef 100644 --- a/test/integration/consts.go +++ b/test/integration/consts.go @@ -32,17 +32,21 @@ const ( ) var k8sVersionToImage = map[string]map[string]string{ - "1.29": { - "amd64": "kindest/node:v1.29.8@sha256:b69a150f9951ef41158ec76de381a920df2be3582fd16fc19cf4757eef0dded9", - "arm64": "kindest/node:v1.29.8@sha256:0d5623800cf6290edbc1007ca8a33a5f7e2ad92b41dc7022b4d20a66447db23c", + "1.31": { + "amd64": "kindest/node:v1.31.0@sha256:919a65376fd11b67df05caa2e60802ad5de2fca250c9fe0c55b0dce5c9591af3", + "arm64": "kindest/node:v1.31.0@sha256:0ccfb11dc66eae4abc20c30ee95687bab51de8aeb04e325e1c49af0890646548", }, "1.30": { "amd64": "kindest/node:v1.30.4@sha256:34cb98a38a57a3357fde925a41d61232bbbbeb411b45a25c0d766635d6c3b975", "arm64": "kindest/node:v1.30.4@sha256:6becd630a18e77730e31f3833f0b129bbcc9c09ee49c3b88429b3c1fdc30bfc4", }, - "1.31": { - "amd64": "kindest/node:v1.31.0@sha256:919a65376fd11b67df05caa2e60802ad5de2fca250c9fe0c55b0dce5c9591af3", - "arm64": "kindest/node:v1.31.0@sha256:0ccfb11dc66eae4abc20c30ee95687bab51de8aeb04e325e1c49af0890646548", + "1.29": { + "amd64": "kindest/node:v1.29.8@sha256:b69a150f9951ef41158ec76de381a920df2be3582fd16fc19cf4757eef0dded9", + "arm64": "kindest/node:v1.29.8@sha256:0d5623800cf6290edbc1007ca8a33a5f7e2ad92b41dc7022b4d20a66447db23c", + }, + "1.28": { + "amd64": "kindest/node:v1.28.13@sha256:d97df9fff48099bf9a94c92fdc39adde65bec2aa1d011f84233b96172c1003c9", + "arm64": "kindest/node:v1.28.13@sha256:ddef612bb93a9aa3a989f9d3d4e01c0a7c4d866a4b949264146c182cd202d738", }, } From 7fb2cd202ab387a6a25133e4e3eac34e5eefd59d Mon Sep 17 00:00:00 2001 From: witomlin <76996781+witomlin@users.noreply.github.com> Date: Mon, 30 Sep 2024 13:39:20 +0100 Subject: [PATCH 5/8] Add test name to log output --- test/integration/log.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/integration/log.go b/test/integration/log.go index 457c2b9..9ff964a 100644 --- a/test/integration/log.go +++ b/test/integration/log.go @@ -23,7 +23,12 @@ import ( ) func logMessage(t *testing.T, log any) { - prefix := fmt.Sprintf("[%s]:", time.Now().Format(time.RFC3339Nano)) + testName := "" + if t != nil { + testName = t.Name() + } + + prefix := fmt.Sprintf("[%s] [%s]:", time.Now().Format(time.RFC3339Nano), testName) if t != nil { t.Log(prefix, log) From d50f6123b43248d9a0f79e28328ac81f69547afa Mon Sep 17 00:00:00 2001 From: witomlin <76996781+witomlin@users.noreply.github.com> Date: Mon, 30 Sep 2024 13:41:01 +0100 Subject: [PATCH 6/8] Reduce timeout --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5aa77b2..5b804d8 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ SHELL:=/bin/bash ROOT_DIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST)))) -INT_TESTS_TIMEOUT=60m +INT_TESTS_TIMEOUT=30m HELM_TESTS_SNAPSHOT_DIR=${ROOT_DIR}charts/container-startup-autoscaler/tests/__snapshot__ .PHONY: help From ccc5edbc82957f4cf5149feb35202f7757fee87f Mon Sep 17 00:00:00 2001 From: witomlin <76996781+witomlin@users.noreply.github.com> Date: Mon, 30 Sep 2024 13:54:19 +0100 Subject: [PATCH 7/8] Update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4eea12a..e885724 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ works with deployments, statefulsets, daemonsets and other workload management A CSA is implemented using [controller-runtime](https://github.com/kubernetes-sigs/controller-runtime). CSA is built around Kube's [In-place Update of Pod Resources](https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/1287-in-place-update-pod-resources) -feature, which is currently in alpha state as of Kubernetes 1.29 and therefore requires the `InPlacePodVerticalScaling` +feature, which is currently in alpha state as of Kubernetes 1.31 and therefore requires the `InPlacePodVerticalScaling` feature gate to be enabled. Beta/stable targets are indicated [here](https://github.com/kubernetes/enhancements/issues/1287). The feature implementation (along with the corresponding implementation of CSA) is likely to change until it reaches stable status. See [CHANGELOG.md](CHANGELOG.md) for details of CSA versions and Kubernetes version compatibility. From 14b6c78bccac30aa487b37b88c9855373ee4c073 Mon Sep 17 00:00:00 2001 From: witomlin <76996781+witomlin@users.noreply.github.com> Date: Mon, 30 Sep 2024 14:25:52 +0100 Subject: [PATCH 8/8] Add additional condition to suppliedConfigSetString --- test/integration/suppliedconfig.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/suppliedconfig.go b/test/integration/suppliedconfig.go index a51ef23..4ce3985 100644 --- a/test/integration/suppliedconfig.go +++ b/test/integration/suppliedconfig.go @@ -63,7 +63,7 @@ func suppliedConfigInit() { func suppliedConfigSetString(env string, config *string) { envVal := os.Getenv(env) - if envVal == "" && *config == "" { + if envVal == "" && (config == nil || *config == "") { // Require env unless defaulted via supplied. logMessage(nil, fmt.Sprintf("(config) '%s' value is required", env)) os.Exit(1)