diff --git a/e2e/operator_context_test.go b/e2e/operator_context_test.go index ad78dc5789..f5b3f5f9e1 100644 --- a/e2e/operator_context_test.go +++ b/e2e/operator_context_test.go @@ -23,7 +23,6 @@ import ( "errors" "flag" "fmt" - "net/http" "os" "os/signal" "strings" @@ -31,11 +30,9 @@ import ( "testing" "time" - "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap/zapcore" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - rbacv1 "k8s.io/api/rbac/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -56,10 +53,6 @@ import ( ) const ( - collectorManifest = "../cmd/operator/deploy/operator/10-collector.yaml" - ruleEvalManifest = "../cmd/operator/deploy/operator/11-rule-evaluator.yaml" - alertmanagerManifest = "../cmd/operator/deploy/operator/12-alertmanager.yaml" - testLabel = "monitoring.googleapis.com/prometheus-test" ) @@ -76,6 +69,7 @@ var ( portForward bool leakResources bool cleanup bool + deployOperator bool ) func init() { @@ -90,6 +84,9 @@ func newClient() (client.Client, error) { if err != nil { return nil, fmt.Errorf("operator schema: %w", err) } + if err := apiextensionsv1.AddToScheme(scheme); err != nil { + return nil, err + } return client.New(kubeconfig, client.Options{ Scheme: scheme, @@ -119,11 +116,12 @@ func TestMain(m *testing.M) { flag.StringVar(&projectID, "project-id", "", "The GCP project to write metrics to.") flag.StringVar(&cluster, "cluster", "", "The name of the Kubernetes cluster that's tested against.") flag.StringVar(&location, "location", "", "The location of the Kubernetes cluster that's tested against.") - flag.BoolVar(&skipGCM, "skip-gcm", false, "Skip validating GCM ingested points.") + flag.BoolVar(&skipGCM, "skip-gcm", true, "Skip validating GCM ingested points.") flag.StringVar(&gcpServiceAccount, "gcp-service-account", "", "Path to GCP service account file for usage by deployed containers.") flag.BoolVar(&portForward, "port-forward", true, "Whether to port-forward Kubernetes HTTP requests.") flag.BoolVar(&leakResources, "leak-resources", true, "If set, prevents deleting resources. Useful for debugging.") flag.BoolVar(&cleanup, "cleanup-resources", true, "If set, cleans resources before running tests.") + flag.BoolVar(&deployOperator, "deploy-operator", false, "If set, deploys a GMP operator to more closely mimic an actual environment.") flag.Parse() @@ -163,6 +161,12 @@ func TestMain(m *testing.M) { } } + ctx := context.Background() + if err := DeployGlobalResources(ctx, c); err != nil { + fmt.Fprintln(os.Stderr, "create global resources:", err) + os.Exit(1) + } + go func() { os.Exit(m.Run()) }() @@ -180,7 +184,7 @@ func TestMain(m *testing.M) { return } fmt.Fprintln(os.Stdout, "cleaning up abandoned resources...") - if err := cleanupResources(context.Background(), kubeconfig, c, ""); err != nil { + if err := cleanupResources(ctx, kubeconfig, c, ""); err != nil { fmt.Fprintln(os.Stderr, "Cleaning up namespaces failed:", err) os.Exit(1) } @@ -233,40 +237,10 @@ func newOperatorContext(t *testing.T) *OperatorContext { cancel() }) - if err := createBaseResources(ctx, tctx.Client(), namespace, pubNamespace, userNamespace, tctx.GetOperatorTestLabelValue()); err != nil { - t.Fatalf("create resources: %s", err) - } - - var httpClient *http.Client - if portForward { - var err error - httpClient, err = kubeutil.PortForwardClient(t, kubeconfig, tctx.Client()) - if err != nil { - t.Fatalf("creating HTTP client: %s", err) - } - } - - op, err := operator.New(globalLogger, kubeconfig, operator.Options{ - ProjectID: projectID, - Cluster: cluster, - Location: location, - OperatorNamespace: tctx.namespace, - PublicNamespace: tctx.pubNamespace, - // Pick a random available port. - ListenAddr: ":0", - CollectorHTTPClient: httpClient, - }) - if err != nil { - t.Fatalf("instantiating operator: %s", err) + if err := createLocalResources(t, ctx, tctx.Client(), namespace, pubNamespace, userNamespace, tctx.GetOperatorTestLabelValue()); err != nil { + t.Fatalf("create local resources: %s", err) } - go func() { - if err := op.Run(ctx, prometheus.NewRegistry()); err != nil { - // Since we aren't in the main test goroutine we cannot fail with Fatal here. - t.Errorf("running operator: %s", err) - } - }() - return tctx } @@ -314,46 +288,27 @@ func (tctx *OperatorContext) GetOperatorTestLabelValue() string { return strings.SplitN(tctx.T.Name(), "/", 2)[0] } -// createBaseResources creates resources the operator requires to exist already. +// createLocalResources creates resources the operator requires to exist already. // These are resources which don't depend on runtime state and can thus be deployed // statically, allowing to run the operator without critical write permissions. -func createBaseResources(ctx context.Context, kubeClient client.Client, opNamespace, publicNamespace, userNamespace, labelValue string) error { - if err := createNamespaces(ctx, kubeClient, opNamespace, publicNamespace, userNamespace); err != nil { - return err - } - - if err := createGCPSecretResources(ctx, kubeClient, opNamespace); err != nil { - return err - } - if err := createCollectorResources(ctx, kubeClient, opNamespace, labelValue); err != nil { - return err - } - if err := createAlertmanagerResources(ctx, kubeClient, opNamespace); err != nil { +func createLocalResources(t testing.TB, ctx context.Context, kubeClient client.Client, opNamespace, publicNamespace, userNamespace, labelValue string) error { + if err := DeployCollectorResources(t, ctx, kubeClient, opNamespace, publicNamespace, testLabel, labelValue, skipGCM, !deployOperator); err != nil { return err } - return nil -} - -func createNamespaces(ctx context.Context, kubeClient client.Client, opNamespace, publicNamespace, userNamespace string) error { if err := kubeClient.Create(ctx, &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ - Name: opNamespace, + Name: userNamespace, }, }); err != nil { + return fmt.Errorf("create user namespace: %w", err) + } + if err := createGCPSecretResources(ctx, kubeClient, opNamespace); err != nil { return err } - if err := kubeClient.Create(ctx, &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: publicNamespace, - }, - }); err != nil { + if err := createAlertmanagerResources(ctx, kubeClient, opNamespace); err != nil { return err } - return kubeClient.Create(ctx, &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: userNamespace, - }, - }) + return nil } func createGCPSecretResources(ctx context.Context, kubeClient client.Client, namespace string) error { @@ -377,90 +332,10 @@ func createGCPSecretResources(ctx context.Context, kubeClient client.Client, nam return nil } -func createCollectorResources(ctx context.Context, kubeClient client.Client, namespace, labelValue string) error { - if err := kubeClient.Create(ctx, &corev1.ServiceAccount{ - ObjectMeta: metav1.ObjectMeta{ - Name: operator.NameCollector, - Namespace: namespace, - }, - }); err != nil { - return err - } - - // The cluster role expected to exist already. - const clusterRoleName = operator.DefaultOperatorNamespace + ":" + operator.NameCollector - - if err := kubeClient.Create(ctx, &rbacv1.ClusterRoleBinding{ - ObjectMeta: metav1.ObjectMeta{ - Name: clusterRoleName + ":" + namespace, - }, - RoleRef: rbacv1.RoleRef{ - APIGroup: "rbac.authorization.k8s.io", - Kind: "ClusterRole", - // The ClusterRole is expected to exist in the cluster already. - Name: clusterRoleName, - }, - Subjects: []rbacv1.Subject{ - { - Kind: "ServiceAccount", - Namespace: namespace, - Name: operator.NameCollector, - }, - }, - }); err != nil { - return err - } - - obj, err := kubeutil.ResourceFromFile(kubeClient.Scheme(), collectorManifest) - if err != nil { - return fmt.Errorf("decode collector: %w", err) - } - collector := obj.(*appsv1.DaemonSet) - collector.Namespace = namespace - if collector.Spec.Template.Labels == nil { - collector.Spec.Template.Labels = map[string]string{} - } - collector.Spec.Template.Labels[testLabel] = labelValue - if skipGCM { - for i := range collector.Spec.Template.Spec.Containers { - container := &collector.Spec.Template.Spec.Containers[i] - if container.Name == "prometheus" { - container.Args = append(container.Args, "--export.debug.disable-auth") - break - } - } - } - - if err = kubeClient.Create(ctx, collector); err != nil { - return fmt.Errorf("create collector DaemonSet: %w", err) - } - return nil -} - func createAlertmanagerResources(ctx context.Context, kubeClient client.Client, namespace string) error { - obj, err := kubeutil.ResourceFromFile(kubeClient.Scheme(), ruleEvalManifest) - if err != nil { - return fmt.Errorf("decode evaluator: %w", err) - } - evaluator := obj.(*appsv1.Deployment) - evaluator.Namespace = namespace - - if err := kubeClient.Create(ctx, evaluator); err != nil { - return fmt.Errorf("create rule-evaluator Deployment: %w", err) - } - - objs, err := kubeutil.ResourcesFromFile(kubeClient.Scheme(), alertmanagerManifest) - if err != nil { - return fmt.Errorf("read alertmanager YAML: %w", err) - } - for i, obj := range objs { - obj.SetNamespace(namespace) - - if err := kubeClient.Create(ctx, obj); err != nil { - return fmt.Errorf("create object at index %d: %w", i, err) - } + if err := DeployAlertmanagerResources(ctx, kubeClient, namespace); err != nil { + return err } - return nil } diff --git a/e2e/operator_deploy.go b/e2e/operator_deploy.go new file mode 100644 index 0000000000..646c23c8d8 --- /dev/null +++ b/e2e/operator_deploy.go @@ -0,0 +1,338 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package e2e contains tests that validate the behavior of gmp-operator against a cluster. +// To make tests simple and fast, the test suite runs the operator internally. The CRDs +// are expected to be installed out of band (along with the operator deployment itself in +// a real world setup). +package e2e + +import ( + "context" + "fmt" + "net/http" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/GoogleCloudPlatform/prometheus-engine/e2e/kubeutil" + "github.com/GoogleCloudPlatform/prometheus-engine/pkg/operator" + "github.com/prometheus/client_golang/prometheus" + "golang.org/x/exp/slices" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + appsv1 "k8s.io/api/apps/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +const ( + crdsManifestDirectory = "../cmd/operator/deploy/crds" + operatorManifestDirectory = "../cmd/operator/deploy/operator" + + ruleEvalManifest = "../cmd/operator/deploy/operator/11-rule-evaluator.yaml" + alertmanagerManifest = "../cmd/operator/deploy/operator/12-alertmanager.yaml" +) + +var operatorResourceFiles = []string{ + // We start the operator manually. + "05-deployment.yaml", + // Purposefully, we want to let tests define them. + "06-service.yaml", +} + +var excludedResourceFiles = []string{ + // Purposefully, we want to let tests define them. + "07-operatorconfig.yaml", +} + +func DeployGlobalResources(ctx context.Context, kubeClient client.Client) error { + files, err := os.ReadDir(crdsManifestDirectory) + if err != nil { + return err + } + for _, file := range files { + objs, err := kubeutil.ResourcesFromFile(kubeClient.Scheme(), filepath.Join(crdsManifestDirectory, file.Name())) + if err != nil { + return err + } + for _, obj := range objs { + if _, err := controllerutil.CreateOrUpdate(ctx, kubeClient, obj, func() error { return nil }); err != nil { + return err + } + } + } + + files, err = os.ReadDir(operatorManifestDirectory) + if err != nil { + return err + } + for _, file := range files { + if slices.Contains(excludedResourceFiles, file.Name()) { + continue + } + if slices.Contains(operatorResourceFiles, file.Name()) { + continue + } + objs, err := kubeutil.ResourcesFromFile(kubeClient.Scheme(), filepath.Join(operatorManifestDirectory, file.Name())) + if err != nil { + return err + } + for _, obj := range objs { + if !isGlobalResource(obj) { + continue + } + if _, err := controllerutil.CreateOrUpdate(ctx, kubeClient, obj, func() error { return nil }); err != nil { + return err + } + } + } + return nil +} + +func DeployCollectorResources(t testing.TB, ctx context.Context, kubeClient client.Client, operatorNamespace, publicNamespace, labelName, labelValue string, skipGCM, localOperator bool) error { + files, err := os.ReadDir(operatorManifestDirectory) + if err != nil { + return err + } + for _, file := range files { + if slices.Contains(excludedResourceFiles, file.Name()) { + continue + } + if file.Name() == filepath.Base(ruleEvalManifest) { + continue + } + if file.Name() == filepath.Base(alertmanagerManifest) { + continue + } + + objs, err := kubeutil.ResourcesFromFile(kubeClient.Scheme(), filepath.Join(operatorManifestDirectory, file.Name())) + if err != nil { + return err + } + + for _, obj := range objs { + if isGlobalResource(obj) { + continue + } + normalizeResource(obj, operatorNamespace, publicNamespace, labelName, labelValue) + + if obj.GetObjectKind().GroupVersionKind().Kind == "DaemonSet" && obj.GetName() == operator.NameCollector { + collector, ok := obj.(*appsv1.DaemonSet) + if !ok { + t.Fatalf("expected collector to be a DaemonSet, got %T", obj) + } + if skipGCM { + for i := range collector.Spec.Template.Spec.Containers { + container := &collector.Spec.Template.Spec.Containers[i] + if container.Name == operator.CollectorPrometheusContainerName { + container.Args = append(container.Args, "--export.debug.disable-auth") + break + } + } + } + } + if obj.GetObjectKind().GroupVersionKind().Kind == "Deployment" && obj.GetName() == operator.NameOperator { + if localOperator { + var httpClient *http.Client + if portForward { + var err error + httpClient, err = kubeutil.PortForwardClient(t, kubeconfig, kubeClient) + if err != nil { + t.Fatalf("creating HTTP client: %s", err) + } + } + + op, err := operator.New(globalLogger, kubeconfig, operator.Options{ + ProjectID: projectID, + Cluster: cluster, + Location: location, + OperatorNamespace: operatorNamespace, + PublicNamespace: publicNamespace, + // Pick a random available port. + ListenAddr: ":0", + CollectorHTTPClient: httpClient, + }) + if err != nil { + t.Fatalf("instantiating operator: %s", err) + } + + go func() { + if err := op.Run(ctx, prometheus.NewRegistry()); err != nil { + // Since we aren't in the main test goroutine we cannot fail with Fatal here. + t.Errorf("running operator: %s", err) + } + }() + continue + } + + deployment, ok := obj.(*appsv1.Deployment) + if !ok { + t.Fatalf("expected operator to be a DaemonSet, got %T", obj) + } + container, err := kubeutil.DeploymentContainer(deployment, "operator") + if err != nil { + t.Fatalf("unable to find operator container: %s", err) + } + container.Args = append(container.Args, fmt.Sprintf("--project-id=%s", projectID)) + container.Args = append(container.Args, fmt.Sprintf("--location=%s", location)) + container.Args = append(container.Args, fmt.Sprintf("--cluster=%s", cluster)) + container.Args = append(container.Args, fmt.Sprintf("--operator-namespace=%s", operatorNamespace)) + container.Args = append(container.Args, fmt.Sprintf("--public-namespace=%s", publicNamespace)) + } + if localOperator && obj.GetObjectKind().GroupVersionKind().Group == "admissionregistration.k8s.io" { + continue + } + + if err := kubeClient.Create(ctx, obj); err != nil { + return err + } + } + } + return nil +} + +func isGlobalResource(obj client.Object) bool { + // Non-namespaced resources may use a prefix to indicate it affects a namespace. + if strings.HasPrefix(obj.GetName(), "gmp-public") || strings.HasPrefix(obj.GetName(), "gmp-system") { + return false + } + if strings.HasPrefix(obj.GetName(), "gmp-operator.gmp-public") || strings.HasPrefix(obj.GetName(), "gmp-operator.gmp-system") { + return false + } + return obj.GetNamespace() == "" +} + +func normalizeResource(obj client.Object, operatorNamespace, publicNamespace, labelName, labelValue string) { + switch obj := obj.(type) { + case *rbacv1.RoleBinding: + for i := range obj.Subjects { + subject := &obj.Subjects[i] + if subject.Namespace == "gmp-public" { + subject.Namespace = publicNamespace + } + if subject.Namespace == "gmp-system" { + subject.Namespace = operatorNamespace + } + } + if strings.HasPrefix(obj.RoleRef.Name, "gmp-public") { + obj.RoleRef.Name = publicNamespace + strings.TrimPrefix(obj.RoleRef.Name, "gmp-public") + } + if strings.HasPrefix(obj.RoleRef.Name, "gmp-system") { + obj.RoleRef.Name = operatorNamespace + strings.TrimPrefix(obj.RoleRef.Name, "gmp-system") + } + case *rbacv1.ClusterRoleBinding: + for i := range obj.Subjects { + subject := &obj.Subjects[i] + if subject.Namespace == "gmp-public" { + subject.Namespace = publicNamespace + } + if subject.Namespace == "gmp-system" { + subject.Namespace = operatorNamespace + } + } + if strings.HasPrefix(obj.RoleRef.Name, "gmp-public") { + obj.RoleRef.Name = publicNamespace + strings.TrimPrefix(obj.RoleRef.Name, "gmp-public") + } + if strings.HasPrefix(obj.RoleRef.Name, "gmp-system") { + obj.RoleRef.Name = operatorNamespace + strings.TrimPrefix(obj.RoleRef.Name, "gmp-system") + } + case *admissionregistrationv1.ValidatingWebhookConfiguration: + for i := range obj.Webhooks { + webhook := &obj.Webhooks[i] + if webhook.NamespaceSelector == nil { + webhook.NamespaceSelector = &metav1.LabelSelector{ + MatchLabels: map[string]string{}, + } + } + webhook.NamespaceSelector.MatchLabels[labelName] = labelValue + + service := webhook.ClientConfig.Service + if service != nil { + if service.Namespace == "gmp-public" { + service.Namespace = publicNamespace + } + if service.Namespace == "gmp-system" { + service.Namespace = operatorNamespace + } + } + } + case *admissionregistrationv1.MutatingWebhookConfiguration: + for i := range obj.Webhooks { + webhook := &obj.Webhooks[i] + if webhook.NamespaceSelector == nil { + webhook.NamespaceSelector = &metav1.LabelSelector{ + MatchLabels: map[string]string{}, + } + } + webhook.NamespaceSelector.MatchLabels[labelName] = labelValue + + service := webhook.ClientConfig.Service + if service != nil { + if service.Namespace == "gmp-public" { + service.Namespace = publicNamespace + } + if service.Namespace == "gmp-system" { + service.Namespace = operatorNamespace + } + } + } + } + if strings.HasPrefix(obj.GetName(), "gmp-public") { + obj.SetName(publicNamespace + strings.TrimPrefix(obj.GetName(), "gmp-public")) + } + if strings.HasPrefix(obj.GetName(), "gmp-system") { + obj.SetName(operatorNamespace + strings.TrimPrefix(obj.GetName(), "gmp-system")) + } + if strings.HasPrefix(obj.GetName(), "gmp-operator.gmp-public") { + obj.SetName("gmp-operator." + publicNamespace + strings.TrimPrefix(obj.GetName(), "gmp-operator.gmp-public")) + } + if strings.HasPrefix(obj.GetName(), "gmp-operator.gmp-system") { + obj.SetName("gmp-operator." + operatorNamespace + strings.TrimPrefix(obj.GetName(), "gmp-operator.gmp-system")) + } + if obj.GetNamespace() == "gmp-public" { + obj.SetNamespace(publicNamespace) + } + if obj.GetNamespace() == "gmp-system" { + obj.SetNamespace(operatorNamespace) + } +} + +func DeployAlertmanagerResources(ctx context.Context, kubeClient client.Client, namespace string) error { + obj, err := kubeutil.ResourceFromFile(kubeClient.Scheme(), ruleEvalManifest) + if err != nil { + return err + } + evaluator := obj.(*appsv1.Deployment) + evaluator.Namespace = namespace + + if err := kubeClient.Create(ctx, evaluator); err != nil { + return fmt.Errorf("create rule-evaluator Deployment: %w", err) + } + + objs, err := kubeutil.ResourcesFromFile(kubeClient.Scheme(), alertmanagerManifest) + if err != nil { + return fmt.Errorf("read alertmanager YAML: %w", err) + } + for i, obj := range objs { + obj.SetNamespace(namespace) + if err := kubeClient.Create(ctx, obj); err != nil { + return fmt.Errorf("create object at index %d: %w", i, err) + } + } + + return nil +} diff --git a/hack/kind-test.sh b/hack/kind-test.sh index 55ddc27564..373a67a110 100755 --- a/hack/kind-test.sh +++ b/hack/kind-test.sh @@ -26,18 +26,6 @@ kind delete cluster # export 'kind kind' kubernetes context referenced later. kind create cluster -# Need to ensure namespace is deployed first explicitly. -echo ">>> deploying static resources" -kubectl --context kind-kind apply -f "${REPO_ROOT}/manifests/setup.yaml" - -# TODO(pintohutch): find a way to incorporate webhooks back into our kind tests. -# This is a workaround for now. -for m in `ls -d ${REPO_ROOT}/cmd/operator/deploy/operator/* | grep -v webhook` -do - kubectl --context kind-kind apply -f "${m}" -done -kubectl --context kind-kind apply -f "${REPO_ROOT}/manifests/rule-evaluator.yaml" - echo ">>> executing gmp e2e tests" # We don't cleanup resources because this script recreates the cluster. go test -v "${REPO_ROOT}/e2e" -run "${TEST_RUN:-.}" -args -project-id=test-proj -cluster=test-cluster -location=test-loc -skip-gcm=true -cleanup-resources=false ${TEST_ARGS}