From 42d972cae700e0b48daf9f2f41efc14b3f9bb42a Mon Sep 17 00:00:00 2001 From: Daniel Hrabovcak Date: Fri, 12 Jul 2024 15:01:05 -0400 Subject: [PATCH] feat: add config-reloader readiness --- charts/operator/templates/alertmanager.yaml | 10 +++ charts/operator/templates/collector.yaml | 10 +++ charts/operator/templates/rule-evaluator.yaml | 10 +++ cmd/config-reloader/main.go | 64 ++++++++++--------- e2e/ruler_test.go | 2 +- examples/collector-max-throughput.yaml | 10 +++ manifests/operator.yaml | 30 +++++++++ 7 files changed, 106 insertions(+), 30 deletions(-) diff --git a/charts/operator/templates/alertmanager.yaml b/charts/operator/templates/alertmanager.yaml index 78c646b214..a7c16ee430 100644 --- a/charts/operator/templates/alertmanager.yaml +++ b/charts/operator/templates/alertmanager.yaml @@ -132,6 +132,16 @@ spec: - all privileged: false readOnlyRootFilesystem: true + livenessProbe: + httpGet: + port: 19091 + path: /-/healthy + scheme: HTTP + readinessProbe: + httpGet: + port: 19091 + path: /-/ready + scheme: HTTP volumes: - name: config secret: diff --git a/charts/operator/templates/collector.yaml b/charts/operator/templates/collector.yaml index 396f6434a5..f4d4a755fd 100644 --- a/charts/operator/templates/collector.yaml +++ b/charts/operator/templates/collector.yaml @@ -87,6 +87,16 @@ spec: - all privileged: false readOnlyRootFilesystem: true + livenessProbe: + httpGet: + port: 19091 + path: /-/healthy + scheme: HTTP + readinessProbe: + httpGet: + port: 19091 + path: /-/ready + scheme: HTTP - name: prometheus image: {{.Values.images.prometheus.image}}:{{.Values.images.prometheus.tag}} args: diff --git a/charts/operator/templates/rule-evaluator.yaml b/charts/operator/templates/rule-evaluator.yaml index a01b913e8f..ee005188d1 100644 --- a/charts/operator/templates/rule-evaluator.yaml +++ b/charts/operator/templates/rule-evaluator.yaml @@ -92,6 +92,16 @@ spec: - all privileged: false readOnlyRootFilesystem: true + livenessProbe: + httpGet: + port: 19093 + path: /-/healthy + scheme: HTTP + readinessProbe: + httpGet: + port: 19093 + path: /-/ready + scheme: HTTP - name: evaluator image: {{.Values.images.ruleEvaluator.image}}:{{.Values.images.ruleEvaluator.tag}} args: diff --git a/cmd/config-reloader/main.go b/cmd/config-reloader/main.go index da56209126..74991255e2 100644 --- a/cmd/config-reloader/main.go +++ b/cmd/config-reloader/main.go @@ -17,12 +17,12 @@ package main import ( "context" "flag" + "fmt" "net/http" "net/url" "os" - "os/signal" "strings" - "syscall" + "sync/atomic" "time" "github.com/go-kit/log" @@ -32,6 +32,7 @@ import ( "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/thanos-io/thanos/pkg/reloader" + "sigs.k8s.io/controller-runtime/pkg/manager/signals" ) func main() { @@ -79,9 +80,35 @@ func main() { os.Exit(1) } - // Set up interrupt signal handler. - term := make(chan os.Signal, 1) - signal.Notify(term, os.Interrupt, syscall.SIGTERM) + ctx := signals.SetupSignalHandler() + go func() { + <-ctx.Done() + //nolint:errcheck + level.Info(logger).Log("msg", "received SIGTERM, exiting gracefully...") + }() + + isReady := atomic.Bool{} + server := &http.Server{Addr: *listenAddress} + http.Handle("/metrics", promhttp.HandlerFor(metrics, promhttp.HandlerOpts{Registry: metrics})) + http.HandleFunc("/-/healthy", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + }) + http.HandleFunc("/-/ready", func(w http.ResponseWriter, _ *http.Request) { + if !isReady.Load() { + w.WriteHeader(http.StatusInternalServerError) + fmt.Fprintf(w, "config-reloader is not ready.\n") + return + } + w.WriteHeader(http.StatusOK) + fmt.Fprintf(w, "config-reloader is ready.\n") + }) + serverErr := make(chan error, 1) + + go func() { + //nolint:errcheck + level.Info(logger).Log("msg", "Starting web server for metrics", "listen", *listenAddress) + serverErr <- server.ListenAndServe() + }() // Poll ready endpoint indefinitely until it's up and running. req, err := http.NewRequest(http.MethodGet, *readyURLStr, nil) @@ -102,7 +129,7 @@ func main() { level.Info(logger).Log("msg", "ensure ready-url is healthy") for { select { - case <-term: + case <-ctx.Done(): //nolint:errcheck level.Info(logger).Log("msg", "received SIGTERM, exiting gracefully...") os.Exit(0) @@ -132,6 +159,7 @@ func main() { } }() <-done + isReady.Store(true) var cfgDirs []reloader.CfgDirOption if *configDir != "" { @@ -170,30 +198,8 @@ func main() { }) } { - cancel := make(chan struct{}) - g.Add( - func() error { - select { - case <-term: - //nolint:errcheck - level.Info(logger).Log("msg", "received SIGTERM, exiting gracefully...") - case <-cancel: - } - return nil - }, - func(error) { - close(cancel) - }, - ) - } - { - server := &http.Server{Addr: *listenAddress} - http.Handle("/metrics", promhttp.HandlerFor(metrics, promhttp.HandlerOpts{Registry: metrics})) - g.Add(func() error { - //nolint:errcheck - level.Info(logger).Log("msg", "Starting web server for metrics", "listen", *listenAddress) - return server.ListenAndServe() + return <-serverErr }, func(error) { ctx, cancel := context.WithTimeout(context.Background(), time.Minute) if err := server.Shutdown(ctx); err != nil { diff --git a/e2e/ruler_test.go b/e2e/ruler_test.go index 95f122c034..c205d7c555 100644 --- a/e2e/ruler_test.go +++ b/e2e/ruler_test.go @@ -775,7 +775,7 @@ func logsError(logs string) (string, error) { } data := map[string]string{} if err := json.Unmarshal([]byte(line), &data); err != nil { - return "", fmt.Errorf("unable to unmarshal log line: %s", err) + return "", fmt.Errorf("unable to unmarshal log line %q: %s", line, err) } if data["level"] == "error" { return line, nil diff --git a/examples/collector-max-throughput.yaml b/examples/collector-max-throughput.yaml index d88e38c22f..0b3956a63a 100644 --- a/examples/collector-max-throughput.yaml +++ b/examples/collector-max-throughput.yaml @@ -90,6 +90,16 @@ spec: - all privileged: false readOnlyRootFilesystem: true + livenessProbe: + httpGet: + port: 19091 + path: /-/healthy + scheme: HTTP + readinessProbe: + httpGet: + port: 19091 + path: /-/ready + scheme: HTTP - name: prometheus image: gke.gcr.io/prometheus-engine/prometheus@sha256:01fd523f6dfa54b229b04974195632c8268fbd3d51e66ad076b5d31366210c54 args: diff --git a/manifests/operator.yaml b/manifests/operator.yaml index 50efcdf40e..2f808e7edc 100644 --- a/manifests/operator.yaml +++ b/manifests/operator.yaml @@ -402,6 +402,16 @@ spec: - all privileged: false readOnlyRootFilesystem: true + livenessProbe: + httpGet: + port: 19091 + path: /-/healthy + scheme: HTTP + readinessProbe: + httpGet: + port: 19091 + path: /-/ready + scheme: HTTP - name: prometheus image: gke.gcr.io/prometheus-engine/prometheus@sha256:01fd523f6dfa54b229b04974195632c8268fbd3d51e66ad076b5d31366210c54 args: @@ -708,6 +718,16 @@ spec: - all privileged: false readOnlyRootFilesystem: true + livenessProbe: + httpGet: + port: 19093 + path: /-/healthy + scheme: HTTP + readinessProbe: + httpGet: + port: 19093 + path: /-/ready + scheme: HTTP - name: evaluator image: gke.gcr.io/prometheus-engine/rule-evaluator:v0.9.0-gke.1 args: @@ -908,6 +928,16 @@ spec: - all privileged: false readOnlyRootFilesystem: true + livenessProbe: + httpGet: + port: 19091 + path: /-/healthy + scheme: HTTP + readinessProbe: + httpGet: + port: 19091 + path: /-/ready + scheme: HTTP volumes: - name: config secret: