From accf70e557385e3713e092c167e8646d9763e928 Mon Sep 17 00:00:00 2001 From: jvoravong Date: Thu, 5 Dec 2024 12:12:40 -0700 Subject: [PATCH] Additional fixes for issues where sometimes certificates and Instrumentation opentelemetry.io/v1alpha1 are installed too early --- .../fix-operator-install-operations.yaml | 12 +++++ .../operator/certmanager.yaml | 4 +- .../job-operator-startupapicheck.yaml | 50 +++++++++++++++++++ .../job-operator-webhook-startupapicheck.yaml | 50 +++++++++++++++++++ .../job-operator-startupapicheck.yaml | 46 +++++++++++++++++ .../job-operator-webhook-startupapicheck.yaml | 46 +++++++++++++++++ helm-charts/splunk-otel-collector/values.yaml | 11 ++-- 7 files changed, 212 insertions(+), 7 deletions(-) create mode 100644 .chloggen/fix-operator-install-operations.yaml create mode 100644 examples/enable-operator-and-auto-instrumentation/rendered_manifests/operator/job-operator-startupapicheck.yaml create mode 100644 examples/enable-operator-and-auto-instrumentation/rendered_manifests/operator/job-operator-webhook-startupapicheck.yaml create mode 100644 helm-charts/splunk-otel-collector/templates/operator/job-operator-startupapicheck.yaml create mode 100644 helm-charts/splunk-otel-collector/templates/operator/job-operator-webhook-startupapicheck.yaml diff --git a/.chloggen/fix-operator-install-operations.yaml b/.chloggen/fix-operator-install-operations.yaml new file mode 100644 index 0000000000..c972779c3c --- /dev/null +++ b/.chloggen/fix-operator-install-operations.yaml @@ -0,0 +1,12 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix +# The name of the component, or a single word describing the area of concern, (e.g. agent, clusterReceiver, gateway, operator, chart, other) +component: operator +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Additional fixes for issues where sometimes certificates and Instrumentation opentelemetry.io/v1alpha1 are installed too early +# One or more tracking issues related to the change +issues: [1559] +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/examples/enable-operator-and-auto-instrumentation/rendered_manifests/operator/certmanager.yaml b/examples/enable-operator-and-auto-instrumentation/rendered_manifests/operator/certmanager.yaml index 46d53d3a0b..b609dbb24e 100644 --- a/examples/enable-operator-and-auto-instrumentation/rendered_manifests/operator/certmanager.yaml +++ b/examples/enable-operator-and-auto-instrumentation/rendered_manifests/operator/certmanager.yaml @@ -5,7 +5,7 @@ kind: Certificate metadata: annotations: helm.sh/hook: post-install,post-upgrade - helm.sh/hook-weight: "1" + helm.sh/hook-weight: "3" labels: helm.sh/chart: operator-0.71.2 app.kubernetes.io/name: operator @@ -34,7 +34,7 @@ kind: Issuer metadata: annotations: helm.sh/hook: post-install,post-upgrade - helm.sh/hook-weight: "1" + helm.sh/hook-weight: "2" labels: helm.sh/chart: operator-0.71.2 app.kubernetes.io/name: operator diff --git a/examples/enable-operator-and-auto-instrumentation/rendered_manifests/operator/job-operator-startupapicheck.yaml b/examples/enable-operator-and-auto-instrumentation/rendered_manifests/operator/job-operator-startupapicheck.yaml new file mode 100644 index 0000000000..09bfde9f67 --- /dev/null +++ b/examples/enable-operator-and-auto-instrumentation/rendered_manifests/operator/job-operator-startupapicheck.yaml @@ -0,0 +1,50 @@ +--- +# Source: splunk-otel-collector/templates/operator/job-operator-startupapicheck.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: default-splunk-otel-collector-operator-startupapicheck + namespace: default + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.113.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.113.0" + app: splunk-otel-collector + component: otel-operator + chart: splunk-otel-collector-0.113.0 + release: default + heritage: Helm + app.kubernetes.io/component: otel-operator + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-weight": "4" +spec: + template: + spec: + containers: + - name: startupapicheck + image: "busybox:latest" + env: + - name: MANAGER_METRICS_SERVICE_CLUSTERIP + value: "default-splunk-otel-collector-operator" + - name: MANAGER_METRICS_SERVICE_PORT + value: "8443" + command: + - sh + - -c + - | + i=0 + while [ $i -lt 300 ]; do + if wget -qO- "$MANAGER_METRICS_SERVICE_CLUSTERIP:$MANAGER_METRICS_SERVICE_PORT" 2>&1 | grep -qv "HTTP/1.0 400 Bad Request"; then + echo "Operator service is available." + exit 0 + fi + echo "Waiting for operator service to become available... (attempt $i)" + i=$((i + 1)) + sleep 1 + done + echo "Timeout reached. Operator service did not become available." + exit 1 + restartPolicy: Never diff --git a/examples/enable-operator-and-auto-instrumentation/rendered_manifests/operator/job-operator-webhook-startupapicheck.yaml b/examples/enable-operator-and-auto-instrumentation/rendered_manifests/operator/job-operator-webhook-startupapicheck.yaml new file mode 100644 index 0000000000..60ec7c8419 --- /dev/null +++ b/examples/enable-operator-and-auto-instrumentation/rendered_manifests/operator/job-operator-webhook-startupapicheck.yaml @@ -0,0 +1,50 @@ +--- +# Source: splunk-otel-collector/templates/operator/job-operator-webhook-startupapicheck.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: default-splunk-otel-collector-operator-webhook-startupapicheck + namespace: default + labels: + app.kubernetes.io/name: splunk-otel-collector + helm.sh/chart: splunk-otel-collector-0.113.0 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: default + app.kubernetes.io/version: "0.113.0" + app: splunk-otel-collector + component: otel-operator + chart: splunk-otel-collector-0.113.0 + release: default + heritage: Helm + app.kubernetes.io/component: otel-operator + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-weight": "4" +spec: + template: + spec: + containers: + - name: wget + image: "busybox:latest" + env: + - name: WEBHOOK_SERVICE_CLUSTERIP + value: "default-splunk-otel-collector-operator-webhook" + - name: WEBHOOK_SERVICE_PORT + value: "443" + command: + - sh + - -c + - | + i=0 + while [ $i -lt 300 ]; do + if wget -qO- "$WEBHOOK_SERVICE_CLUSTERIP:$WEBHOOK_SERVICE_PORT" 2>&1 | grep -qv "HTTP/1.0 400 Bad Request"; then + echo "Operator webhook service is available." + exit 0 + fi + echo "Waiting for webhook service to become available... (attempt $i)" + i=$((i + 1)) + sleep 1 + done + echo "Timeout reached. Operator webhook service did not become available." + exit 1 + restartPolicy: Never diff --git a/helm-charts/splunk-otel-collector/templates/operator/job-operator-startupapicheck.yaml b/helm-charts/splunk-otel-collector/templates/operator/job-operator-startupapicheck.yaml new file mode 100644 index 0000000000..68e617621d --- /dev/null +++ b/helm-charts/splunk-otel-collector/templates/operator/job-operator-startupapicheck.yaml @@ -0,0 +1,46 @@ +{{- if .Values.operator.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ template "splunk-otel-collector.fullname" . }}-operator-startupapicheck + namespace: {{ template "splunk-otel-collector.namespace" . }} + labels: + {{- include "splunk-otel-collector.commonLabels" . | nindent 4 }} + app: {{ template "splunk-otel-collector.name" . }} + component: otel-operator + chart: {{ template "splunk-otel-collector.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + app.kubernetes.io/component: otel-operator + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-weight": "4" +spec: + template: + spec: + containers: + - name: startupapicheck + image: "busybox:latest" + env: + - name: MANAGER_METRICS_SERVICE_CLUSTERIP + value: "{{ template "splunk-otel-collector.fullname" . }}-operator" + - name: MANAGER_METRICS_SERVICE_PORT + value: "8443" + command: + - sh + - -c + - | + i=0 + while [ $i -lt 300 ]; do + if wget -qO- "$MANAGER_METRICS_SERVICE_CLUSTERIP:$MANAGER_METRICS_SERVICE_PORT" 2>&1 | grep -qv "HTTP/1.0 400 Bad Request"; then + echo "Operator service is available." + exit 0 + fi + echo "Waiting for operator service to become available... (attempt $i)" + i=$((i + 1)) + sleep 1 + done + echo "Timeout reached. Operator service did not become available." + exit 1 + restartPolicy: Never +{{- end }} diff --git a/helm-charts/splunk-otel-collector/templates/operator/job-operator-webhook-startupapicheck.yaml b/helm-charts/splunk-otel-collector/templates/operator/job-operator-webhook-startupapicheck.yaml new file mode 100644 index 0000000000..a4bdadc979 --- /dev/null +++ b/helm-charts/splunk-otel-collector/templates/operator/job-operator-webhook-startupapicheck.yaml @@ -0,0 +1,46 @@ +{{- if .Values.operator.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ template "splunk-otel-collector.fullname" . }}-operator-webhook-startupapicheck + namespace: {{ template "splunk-otel-collector.namespace" . }} + labels: + {{- include "splunk-otel-collector.commonLabels" . | nindent 4 }} + app: {{ template "splunk-otel-collector.name" . }} + component: otel-operator + chart: {{ template "splunk-otel-collector.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + app.kubernetes.io/component: otel-operator + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-weight": "4" +spec: + template: + spec: + containers: + - name: wget + image: "busybox:latest" + env: + - name: WEBHOOK_SERVICE_CLUSTERIP + value: "{{ template "splunk-otel-collector.fullname" . }}-operator-webhook" + - name: WEBHOOK_SERVICE_PORT + value: "443" + command: + - sh + - -c + - | + i=0 + while [ $i -lt 300 ]; do + if wget -qO- "$WEBHOOK_SERVICE_CLUSTERIP:$WEBHOOK_SERVICE_PORT" 2>&1 | grep -qv "HTTP/1.0 400 Bad Request"; then + echo "Operator webhook service is available." + exit 0 + fi + echo "Waiting for webhook service to become available... (attempt $i)" + i=$((i + 1)) + sleep 1 + done + echo "Timeout reached. Operator webhook service did not become available." + exit 1 + restartPolicy: Never +{{- end }} diff --git a/helm-charts/splunk-otel-collector/values.yaml b/helm-charts/splunk-otel-collector/values.yaml index 81a779280a..86b0f47a41 100644 --- a/helm-charts/splunk-otel-collector/values.yaml +++ b/helm-charts/splunk-otel-collector/values.yaml @@ -1168,13 +1168,14 @@ operator: enabled: false admissionWebhooks: certManager: - # Annotate the certificate and issuer to ensure they are created after the cert-manager CRDs have been installed. - certificateAnnotations: - "helm.sh/hook": post-install,post-upgrade - "helm.sh/hook-weight": "1" + # Annotate the issuer and certificate to ensure they are created after the cert-manager CRDs + # have been installed and cert-manager is ready. issuerAnnotations: "helm.sh/hook": post-install,post-upgrade - "helm.sh/hook-weight": "1" + "helm.sh/hook-weight": "2" + certificateAnnotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-weight": "3" # Collector deployment via the operator is not supported at this time. # The collector image repository is specified here to meet operator subchart constraints. manager: