From 9492c6bca51fdfa0080dda78106fb122f35f4f37 Mon Sep 17 00:00:00 2001 From: Tarun Gupta Akirala Date: Thu, 9 Jan 2025 18:22:57 -0800 Subject: [PATCH] refactor: always have a dependency against COSI for kubecost (#2996) --- licenses.d2iq.yaml | 5 + services/kommander/0.14.0/defaults/cm.yaml | 3 +- .../2.5.0/cosi-storage/cosi-bucket.yaml | 2 +- .../2.5.0/cosi-storage/extra-images.txt | 2 + .../2.5.0/cosi-storage/kustomization.yaml | 1 + .../cosi-storage/object-store-check.yaml | 139 ++++++++++++++++++ services/kubecost/2.5.0/defaults/cm.yaml | 30 ++-- services/kubecost/2.5.0/pre-install.yaml | 3 - .../2.5.0/pre-install/pre-install-jobs.yaml | 72 +-------- services/kubecost/2.5.0/release.yaml | 3 + .../kubecost/2.5.0/release/extra-images.txt | 1 - services/kubecost/2.5.0/release/release.yaml | 21 ++- 12 files changed, 188 insertions(+), 94 deletions(-) create mode 100644 services/kubecost/2.5.0/cosi-storage/extra-images.txt create mode 100644 services/kubecost/2.5.0/cosi-storage/object-store-check.yaml diff --git a/licenses.d2iq.yaml b/licenses.d2iq.yaml index 1cddbd7ef..0e88ae7f7 100644 --- a/licenses.d2iq.yaml +++ b/licenses.d2iq.yaml @@ -608,3 +608,8 @@ resources: - url: https://github.com/ceph/ceph-cosi ref: ${image_tag} license_path: LICENSE + - container_image: gcr.io/k8s-staging-sig-storage/objectstorage-sidecar:v20240513-v0.1.0-35-gefb3255 + sources: + - url: https://github.com/kubernetes-sigs/container-object-storage-interface + ref: main + license_path: LICENSE diff --git a/services/kommander/0.14.0/defaults/cm.yaml b/services/kommander/0.14.0/defaults/cm.yaml index cd2d770d5..9cea0a800 100644 --- a/services/kommander/0.14.0/defaults/cm.yaml +++ b/services/kommander/0.14.0/defaults/cm.yaml @@ -91,11 +91,10 @@ data: capimate: image: tag: v0.0.0-dev.0 - managementApps: + managementApps: # List of apps that are specific to management cluster. Used for platform expansion workflow (exclusively). - "ai-navigator-app" - "ai-navigator-cluster-info-agent" - "centralized-grafana" - - "kubecost" - "chartmuseum" - "dex" - "dex-k8s-authenticator" diff --git a/services/kubecost/2.5.0/cosi-storage/cosi-bucket.yaml b/services/kubecost/2.5.0/cosi-storage/cosi-bucket.yaml index 5663fd8c9..3d386b3c2 100644 --- a/services/kubecost/2.5.0/cosi-storage/cosi-bucket.yaml +++ b/services/kubecost/2.5.0/cosi-storage/cosi-bucket.yaml @@ -27,7 +27,7 @@ spec: valuesFrom: - kind: ConfigMap name: kubecost-2.5.0-d2iq-defaults - valuesKey: ${releaseNamespace}-namespace-${kubecostClusterMode}-values.yaml # This will ensure non kommander namespace installs do not get cosi buckets. + valuesKey: ${kubecostClusterMode:=primary}-cosi-values.yaml # This will ensure kubecost agents installs do not get cosi buckets. optional: true - kind: ConfigMap name: kubecost-overrides diff --git a/services/kubecost/2.5.0/cosi-storage/extra-images.txt b/services/kubecost/2.5.0/cosi-storage/extra-images.txt new file mode 100644 index 000000000..676729971 --- /dev/null +++ b/services/kubecost/2.5.0/cosi-storage/extra-images.txt @@ -0,0 +1,2 @@ +quay.io/ceph/cosi:v0.1.2 +gcr.io/k8s-staging-sig-storage/objectstorage-sidecar:v20240513-v0.1.0-35-gefb3255 diff --git a/services/kubecost/2.5.0/cosi-storage/kustomization.yaml b/services/kubecost/2.5.0/cosi-storage/kustomization.yaml index c80c84207..afaab9f44 100644 --- a/services/kubecost/2.5.0/cosi-storage/kustomization.yaml +++ b/services/kubecost/2.5.0/cosi-storage/kustomization.yaml @@ -2,3 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - cosi-bucket.yaml +- object-store-check.yaml diff --git a/services/kubecost/2.5.0/cosi-storage/object-store-check.yaml b/services/kubecost/2.5.0/cosi-storage/object-store-check.yaml new file mode 100644 index 000000000..2d2410c08 --- /dev/null +++ b/services/kubecost/2.5.0/cosi-storage/object-store-check.yaml @@ -0,0 +1,139 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kubecost-object-store-check + namespace: ${releaseNamespace} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: kubecost-object-store-check + namespace: ${releaseNamespace} +rules: + - apiGroups: [ "" ] + resources: [ "configmaps" ] + verbs: [ "get", "list", "create", "patch" , "delete" ] + - apiGroups: [ "" ] + resources: [ "secrets" ] + verbs: [ "get", "list", "create", "patch" ] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: kubecost-object-store-check + namespace: ${releaseNamespace} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kubecost-object-store-check +subjects: + - kind: ServiceAccount + name: kubecost-object-store-check + namespace: ${releaseNamespace} +--- +# This job will exit with one of the following outcomes: +# - If namespace is not kommander namespace then exit successfully (targets attached clusters). +# - If kubecostClusterMode is not set to primary then exit successfully (targets attached clusters that have been expanded). +# - In kommander namespace and when running in primary mode, wait until the federated-store secret is found. Could be a user created secret or be created by cosi-bucket-kit helmrelease. +apiVersion: batch/v1 +kind: Job +metadata: + name: kubecost-object-store-check + namespace: ${releaseNamespace} +spec: + template: + metadata: + name: kubecost-object-store-check + spec: + serviceAccountName: kubecost-object-store-check + restartPolicy: OnFailure + priorityClassName: dkp-high-priority + containers: + - name: upgrade-kubecost-after-federated-store-secret-is-valid + image: "${kubetoolsImageRepository:=bitnami/kubectl}:${kubetoolsImageTag:=1.30.5}" + command: + - bash + - -c + - | + set -o nounset + set -o pipefail + + echo() { + command echo $(date) "$@" + } + + # Attached clusters (excluding platform expansion clusters) will exit at this check. + if [ "${releaseNamespace}" != "kommander" ]; then + echo "Skipping the step in non-kommander namespace." + exit 0 + fi + + # check the value of kubecostClusterMode and exit early if it is not equal to primary + # Platform expanded clusters will exit at this check. + if [ "${kubecostClusterMode:=primary}" != "primary" ]; then + echo "kubecostClusterMode is not set to primary. Skipping the step." + kubectl delete configmap kubecost-object-store-config -n ${releaseNamespace} --ignore-not-found + exit 0 + fi + + # Wait until federated-store secret is found. + while ! kubectl get secret -n ${releaseNamespace} federated-store; do + echo "federated-store secret not found. Waiting for it to be created." + sleep 5 + done + + echo "federated-store secret found. Looking up if BucketInfo and/or federated-store.yaml is populated..." + + while true; do # Wait until BucketInfo or federated-store.yaml is found in the secret. + bucketInfo=$(kubectl get secret -n ${releaseNamespace} federated-store -o go-template='{{ .data.BucketInfo | base64decode }}') + federatedStoreYAML=$(kubectl get secret -n ${releaseNamespace} federated-store -o go-template='{{ index .data "federated-store.yaml" | base64decode }}') + if [ -n "$bucketInfo" ] || [ -n "$federatedStoreYAML" ]; then + break + fi + echo "BucketInfo and federated-store.yaml not found in federated-store secret. Waiting for it to be populated." + sleep 5 + done + + # If bucketInfo is empty and federatedStoreYAML is not empty, then the secret is already updated (probably manually by the user). + if [ -z "$bucketInfo" ] && [ -n "$federatedStoreYAML" ]; then + echo "BucketInfo is empty and federated-store.yaml is not empty. Using the federated-store.yaml as-is." + kubectl label secret federated-store -n ${releaseNamespace} app.kubernetes.io/kommander-kubecost-federated-store=true --overwrite + kubectl annotate secret federated-store -n ${releaseNamespace} app.kubernetes.io/kommander-kubecost-federated-store-unprocessed=true --overwrite + exit 0 + fi + + tmpfile=$(mktemp /tmp/federated-store.XXXXXX) + echo "Fetched bucketInfo from federated-store secret. Processing it..." + yq eval ' + { + "type": "S3", + "config": { + "bucket": .spec.bucketName, + "endpoint": .spec.secretS3.endpoint | sub(":\\d+$", "") | sub("^http://", "") | sub("^https://", ""), # Remove port and protocol (if any). + "region": .spec.secretS3.region, + "access_key": .spec.secretS3.accessKeyID, + "secret_key": .spec.secretS3.accessSecretKey, + "insecure": .spec.secretS3.endpoint | test("^http://"), # Use insecure if endpoint is http (e.g.: cluster internal endpoint). + "signature_version2": false, # Use signature version 4. + "put_user_metadata": { + "X-Amz-Acl": "bucket-owner-full-control" + }, + "http_config": { + "idle_conn_timeout": "90s", + "response_header_timeout": "2m", + "insecure_skip_verify": false + }, + "trace": { + "enable": false # Enable to debug errors (if any) + }, + "part_size": 10240 # TODO(takirala): Deduce this value logically. + } + }' <<< "$bucketInfo" > "$tmpfile" + echo "Transformed bucketInfo to federated-store.yaml. Updating federated-store secret..." + + kubectl create secret generic federated-store -n ${releaseNamespace} --from-file=federated-store.yaml="$tmpfile" --dry-run=client -o yaml | kubectl apply -f - + kubectl label secret federated-store -n ${releaseNamespace} app.kubernetes.io/kommander-kubecost-federated-store=true --overwrite + kubectl annotate secret federated-store -n ${releaseNamespace} app.kubernetes.io/kommander-kubecost-federated-store-processed=true --overwrite + + kubectl create configmap kubecost-object-store-config -n ${releaseNamespace} --save-config --from-literal=objectStoreStatus=ready --dry-run=client -o yaml | kubectl apply -f - + rm "$tmpfile" diff --git a/services/kubecost/2.5.0/defaults/cm.yaml b/services/kubecost/2.5.0/defaults/cm.yaml index 3afe8def8..71b4fb33f 100644 --- a/services/kubecost/2.5.0/defaults/cm.yaml +++ b/services/kubecost/2.5.0/defaults/cm.yaml @@ -4,6 +4,7 @@ metadata: name: kubecost-2.5.0-d2iq-defaults namespace: ${releaseNamespace} data: + # Using just values.yaml will result in kubecost running in agent mode. values.yaml: | --- global: @@ -168,9 +169,8 @@ data: enabled: false #key: YOUR_KEY - # Overrides for kommander namespace to run kubecost in non agent (but single cluster) mode - # Negate some of the values from the default values.yaml to ensure kubecost runs in single cluster mode - kommander-namespace-values.yaml: | + # Overrides for kubecost to run in primary mode (single cluster with no object storage) + primary-values.yaml: | global: grafana: enabled: true @@ -266,17 +266,9 @@ data: defaultDatasourceEnabled: false label: grafana_datasource_kommander - # Overrides for multi cluster kubecost installations - kommander-namespace-multi-cluster-values.yaml: | + # Overrides for kubecost to create cosi resources. + primary-cosi-values.yaml: | --- - kubecostAggregator: - # deployMethod determines how Aggregator is deployed. Current options are - deployMethod: statefulset - federatedETL: - federatedCluster: true - agentOnly: false - kubecostModel: - federatedStorageConfigSecret: "federated-store" # Secret should have a key named "federated-store.yaml" with the federated storage credentials # COSI related resources bucketClasses: # Cluster scoped resource - name: kubecost-cosi-storage @@ -324,3 +316,15 @@ data: capabilities: bucket: "*" user: "*" + # Overrides for kubecost to run in primary mode for multi cluster setup with object storage. + primary-object-storage-ready-values.yaml: | + --- + kubecostAggregator: + # deployMethod determines how Aggregator is deployed. Current options are + # "singlepod" (within cost-analyzer Pod) "statefulset" (separate + # StatefulSet), and "disabled". + deployMethod: statefulset + federatedETL: + federatedCluster: true + kubecostModel: + federatedStorageConfigSecret: "federated-store" # Secret should have a key named "federated-store.yaml" with the federated storage credentials diff --git a/services/kubecost/2.5.0/pre-install.yaml b/services/kubecost/2.5.0/pre-install.yaml index 4dc143eaa..328c9d2a7 100644 --- a/services/kubecost/2.5.0/pre-install.yaml +++ b/services/kubecost/2.5.0/pre-install.yaml @@ -10,9 +10,6 @@ spec: interval: 10m retryInterval: 1m path: ./services/kubecost/2.5.0/pre-install - dependsOn: - - name: kubecost-cosi-storage - namespace: ${releaseNamespace} sourceRef: kind: GitRepository name: management diff --git a/services/kubecost/2.5.0/pre-install/pre-install-jobs.yaml b/services/kubecost/2.5.0/pre-install/pre-install-jobs.yaml index f51075818..567b1dd69 100644 --- a/services/kubecost/2.5.0/pre-install/pre-install-jobs.yaml +++ b/services/kubecost/2.5.0/pre-install/pre-install-jobs.yaml @@ -1,4 +1,3 @@ -# Copy grafana-datasource cm after it has been created in the release. apiVersion: v1 kind: ServiceAccount metadata: @@ -14,9 +13,6 @@ rules: - apiGroups: [""] resources: [ "configmaps" ] verbs: ["get", "list", "create", "patch" ] - - apiGroups: [ "" ] - resources: [ "secrets" ] - verbs: [ "get", "list", "patch" ] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -84,7 +80,7 @@ spec: echo "Checking for the existence of kubecost-cluster-info-configmap..." # Skip if the configmap already exists and has a non empty CLUSTER_ID value. - CURRENT_CLUSTER_ID=$(kubectl get configmap kubecost-cluster-info-configmap -n kommander -o jsonpath='{.data.CLUSTER_ID}') + CURRENT_CLUSTER_ID=$(kubectl get configmap kubecost-cluster-info-configmap -n kommander -o jsonpath='{.data.CLUSTER_ID}' 2>/dev/null || command echo "") if [ -n "$CURRENT_CLUSTER_ID" ]; then echo "CLUSTER_ID exists and is equal to $CURRENT_CLUSTER_ID." exit 0 @@ -92,69 +88,3 @@ spec: echo "CLUSTER_ID is either missing or empty. Populating it..." fi kubectl create configmap kubecost-cluster-info-configmap -n ${releaseNamespace} -oyaml --dry-run=client --save-config --from-literal=CLUSTER_ID=$(kubectl get namespace kube-system -o jsonpath="{.metadata.uid}") | kubectl apply -f - - - name: transform-cosi-secret-to-kubecost-secret - image: "${kubetoolsImageRepository:=bitnami/kubectl}:${kubetoolsImageTag:=1.30.5}" - command: - - bash - - -c - - | - set -o nounset - set -o errexit - set -o pipefail - - echo() { - command echo $(date) "$@" - } - - # If releaseNamespace is not kommander, skip the step. - if [ "${releaseNamespace}" != "kommander" ]; then - echo "Skipping the step in non-kommander namespace." - exit 0 - fi - - # check the value of kubecostClusterMode and exit early if it is not equal to multi-cluster. - if [ "${kubecostClusterMode}" != "multi-cluster" ]; then - echo "kubecostClusterMode is not set to multi-cluster. Skipping the step." - exit 0 - fi - - # Wait until federated-store secret is found. - while ! kubectl get secret -n ${releaseNamespace} federated-store; do - echo "federated-store secret not found. Waiting for it to be created." - sleep 5 - done - - echo "federated-store secret found. Fetching bucketInfo..." - bucketInfo=$(kubectl get secret -n ${releaseNamespace} federated-store -o go-template='{{ .data.BucketInfo | base64decode }}') - tmpfile=$(mktemp /tmp/federated-store.XXXXXX) - - echo "Fetched bucketInfo from federated-store secret. Processing it..." - yq eval ' - { - "type": "S3", - "config": { - "bucket": .spec.bucketName, - "endpoint": .spec.secretS3.endpoint | sub(":\\d+$", "") | sub("^http://", "") | sub("^https://", ""), # Remove port and protocol (if any). - "region": .spec.secretS3.region, - "access_key": .spec.secretS3.accessKeyID, - "secret_key": .spec.secretS3.accessSecretKey, - "insecure": .spec.secretS3.endpoint | test("^http://"), # Use insecure if endpoint is http (e.g.: cluster internal endpoint). - "signature_version2": false, # Use signature version 4. - "put_user_metadata": { - "X-Amz-Acl": "bucket-owner-full-control" - }, - "http_config": { - "idle_conn_timeout": "90s", - "response_header_timeout": "2m", - "insecure_skip_verify": false - }, - "trace": { - "enable": false # Enable to debug errors (if any) - }, - "part_size": 10240 # TODO(takirala): Deduce this value logically. - } - }' <<< "$bucketInfo" > "$tmpfile" - echo "Transformed bucketInfo to federated-store.yaml. Updating federated-store secret..." - kubectl create secret generic federated-store -n ${releaseNamespace} --from-file=federated-store.yaml="$tmpfile" --dry-run=client -o yaml | kubectl apply -f - - kubectl label secret federated-store -n ${releaseNamespace} app.kubernetes.io/processed-by-kommander-kubecost=true --overwrite - rm "$tmpfile" diff --git a/services/kubecost/2.5.0/release.yaml b/services/kubecost/2.5.0/release.yaml index b2d658cd7..915932088 100644 --- a/services/kubecost/2.5.0/release.yaml +++ b/services/kubecost/2.5.0/release.yaml @@ -24,3 +24,6 @@ spec: substituteFrom: - kind: ConfigMap name: substitution-vars + - kind: ConfigMap + name: kubecost-object-store-config # Created by kubecost post install config optionally to upgrade kubecost to use object storage. + optional: true diff --git a/services/kubecost/2.5.0/release/extra-images.txt b/services/kubecost/2.5.0/release/extra-images.txt index 56f415aa4..b3240e7d2 100644 --- a/services/kubecost/2.5.0/release/extra-images.txt +++ b/services/kubecost/2.5.0/release/extra-images.txt @@ -1,2 +1 @@ {{ .Values.kubecostFrontend.image }}:prod-{{ $.Chart.AppVersion }} -quay.io/ceph/cosi:v0.1.2 diff --git a/services/kubecost/2.5.0/release/release.yaml b/services/kubecost/2.5.0/release/release.yaml index d2a8b6d11..80bf486b4 100644 --- a/services/kubecost/2.5.0/release/release.yaml +++ b/services/kubecost/2.5.0/release/release.yaml @@ -25,14 +25,15 @@ spec: releaseName: kubecost valuesFrom: # The order is important. The last entry will override the previous ones. - kind: ConfigMap - name: kubecost-2.5.0-d2iq-defaults # Configures the kubecost cluster as secondary. + name: kubecost-2.5.0-d2iq-defaults + valuesKey: values.yaml - kind: ConfigMap name: kubecost-2.5.0-d2iq-defaults - valuesKey: ${releaseNamespace}-namespace-values.yaml # Configures the kubecost cluster as primary. + valuesKey: ${kubecostClusterMode:=primary}-values.yaml # Configures the kubecost cluster as primary with no object storage for single cluster mode. optional: true - kind: ConfigMap name: kubecost-2.5.0-d2iq-defaults - valuesKey: ${releaseNamespace}-namespace-${kubecostClusterMode}-values.yaml # Configures the primary kubecost cluster with multi-cluster mode. + valuesKey: ${kubecostClusterMode:=primary}-object-storage-${objectStoreStatus:=not-applicable}-values.yaml # Configures the primary kubecost cluster with object storage for multi cluster mode. optional: true targetNamespace: ${releaseNamespace} postRenderers: @@ -48,6 +49,20 @@ spec: path: /metadata/name value: kubecost-grafana-datasource --- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubecost-app-dashboard-info + namespace: ${releaseNamespace} + labels: + "kommander.d2iq.io/application": "kubecost" +data: + name: "Kubecost" + dashboardLink: "/dkp/kommander/kubecost/frontend/overview.html" + docsLink: "https://docs.kubecost.com/" + # From: https://github.com/kubecost/cost-analyzer-helm-chart/blob/v2.5.0/cost-analyzer/Chart.yaml#L2 + version: "2.5.0" +--- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: