diff --git a/Makefile b/Makefile index 34969c27..5d50623b 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,9 @@ UNAME_S := $(shell uname -s) +NC := $(shell tput sgr0) # No Color ifeq ($(UNAME_S),Linux) COCKROACH_BIN ?= https://binaries.cockroachdb.com/cockroach-v23.2.0.linux-amd64.tgz HELM_BIN ?= https://get.helm.sh/helm-v3.14.0-linux-amd64.tar.gz - KIND_BIN ?= https://kind.sigs.k8s.io/dl/v0.21.0/kind-linux-amd64 + K3D_BIN ?= https://github.com/k3d-io/k3d/releases/download/v5.7.4/k3d-linux-amd64 KUBECTL_BIN ?= https://dl.k8s.io/release/v1.29.1/bin/linux/amd64/kubectl YQ_BIN ?= https://github.com/mikefarah/yq/releases/download/v4.31.2/yq_linux_amd64 JQ_BIN ?= https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 @@ -12,7 +13,7 @@ endif ifeq ($(UNAME_S),Darwin) COCKROACH_BIN ?= https://binaries.cockroachdb.com/cockroach-v23.2.0.darwin-10.9-amd64.tgz HELM_BIN ?= https://get.helm.sh/helm-v3.14.0-darwin-amd64.tar.gz - KIND_BIN ?= https://kind.sigs.k8s.io/dl/v0.21.0/kind-darwin-amd64 + K3D_BIN ?= https://github.com/k3d-io/k3d/releases/download/v5.7.4/k3d-darwin-arm64 KUBECTL_BIN ?= https://dl.k8s.io/release/v1.29.1/bin/darwin/amd64/kubectl YQ_BIN ?= https://github.com/mikefarah/yq/releases/download/v4.31.2/yq_darwin_amd64 JQ_BIN ?= https://github.com/stedolan/jq/releases/download/jq-1.6/jq-osx-amd64 @@ -20,8 +21,12 @@ ifeq ($(UNAME_S),Darwin) OPM_BIN ?= darwin-amd64-opm endif -KIND_CLUSTER ?= chart-testing -REPOSITORY ?= gcr.io/cockroachlabs-helm-charts/cockroach-self-signer-cert +K3D_CLUSTER ?= chart-testing +REGISTRY ?= gcr.io +REPOSITORY ?= cockroachlabs-helm-charts/cockroach-self-signer-cert +DOCKER_NETWORK_NAME ?= "k3d-${K3D_CLUSTER}" +LOCAL_REGISTRY ?= "localhost:5000" +CLUSTER_SIZE ?= 1 export BUNDLE_IMAGE ?= cockroach-operator-bundle export HELM_OPERATOR_IMAGE ?= cockroach-helm-operator @@ -56,7 +61,7 @@ build/chart: bin/helm ## build the helm chart to build/artifacts build/self-signer: bin/yq ## build the self-signer image @docker build --platform=linux/amd64 -f build/docker-image/self-signer-cert-utility/Dockerfile \ --build-arg COCKROACH_VERSION=$(shell bin/yq '.appVersion' ./cockroachdb/Chart.yaml) \ - -t ${REPOSITORY}:$(shell bin/yq '.tls.selfSigner.image.tag' ./cockroachdb/values.yaml) . + -t ${REGISTRY}/${REPOSITORY}:$(shell bin/yq '.tls.selfSigner.image.tag' ./cockroachdb/values.yaml) . ##@ Release @@ -66,33 +71,61 @@ release: ## publish the build artifacts to S3 build-and-push/self-signer: bin/yq ## push the self-signer image @docker buildx build --platform=linux/amd64,linux/arm64 -f build/docker-image/self-signer-cert-utility/Dockerfile \ --build-arg COCKROACH_VERSION=$(shell bin/yq '.appVersion' ./cockroachdb/Chart.yaml) --push \ - -t ${REPOSITORY}:$(shell bin/yq '.tls.selfSigner.image.tag' ./cockroachdb/values.yaml) . + -t ${REGISTRY}/${REPOSITORY}:$(shell bin/yq '.tls.selfSigner.image.tag' ./cockroachdb/values.yaml) . ##@ Dev dev/clean: ## remove built artifacts @rm -r build/artifacts/ +## Setup/teardown registries for easier local dev +dev/registries/up: bin/k3d + @if [ "`docker ps -f name=registry.localhost -q`" = "" ]; then \ + echo "$(CYAN)Starting local Docker registry (for fast offline image push/pull)...$(NC)"; \ + cd ../../bin/k3d; ./tests/k3d/registries.sh up $(DOCKER_NETWORK_NAME); \ + fi + +dev/registries/down: bin/k3d + @if [ "`docker ps -f name=registry.localhost -q`" != "" ]; then \ + echo "$(CYAN)Stopping local Docker registry (for fast offline image push/pull)...$(NC)"; \ + cd ../../bin/k3d; ./tests/k3d/registries.sh down $(DOCKER_NETWORK_NAME); \ + fi + +dev/registries/bounce: bin/k3d dev/registries/down dev/registries/up + +dev/push/local: dev/registries/up + @echo "$(CYAN)Pushing image to local registry...$(NC)" + @docker build --platform=linux/amd64 -f build/docker-image/self-signer-cert-utility/Dockerfile \ + --build-arg COCKROACH_VERSION=$(shell bin/yq '.appVersion' ./cockroachdb/Chart.yaml) --push \ + -t ${LOCAL_REGISTRY}/${REPOSITORY}:$(shell bin/yq '.tls.selfSigner.image.tag' ./cockroachdb/values.yaml) . + ##@ Test +test/cluster: bin/k3d test/cluster/up ## start a local k3d cluster for testing + +test/cluster/bounce: bin/k3d test/cluster/down test/cluster/up ## restart a local k3d cluster for testing + +test/cluster/up: bin/k3d + @bin/k3d cluster list | grep $(K3D_CLUSTER) || ./tests/k3d/dev-cluster.sh up --name "$(K3D_CLUSTER)" --nodes $(CLUSTER_SIZE) -test/cluster: bin/kind ## start a local kind cluster for testing - @bin/kind get clusters -q | grep $(KIND_CLUSTER) || bin/kind create cluster --name $(KIND_CLUSTER) +test/cluster/down: bin/k3d + ./tests/k3d/dev-cluster.sh down --name "$(K3D_CLUSTER)" test/e2e/%: PKG=$* -test/e2e/%: bin/cockroach bin/kubectl bin/helm build/self-signer test/publish-images-to-kind ## run e2e tests for package (e.g. install or rotate) +test/e2e/%: bin/cockroach bin/kubectl bin/helm build/self-signer test/publish-images-to-k3d ## run e2e tests for package (e.g. install or rotate) @PATH="$(PWD)/bin:${PATH}" go test -timeout 30m -v ./tests/e2e/$(PKG)/... test/lint: bin/helm ## lint the helm chart @build/lint.sh && bin/helm lint cockroachdb IMAGE_LIST = cockroachdb/cockroach:v23.2.0 quay.io/jetstack/cert-manager-cainjector:v1.11.0 quay.io/jetstack/cert-manager-webhook:v1.11.0 quay.io/jetstack/cert-manager-controller:v1.11.0 quay.io/jetstack/cert-manager-ctl:v1.11.0 -test/publish-images-to-kind: bin/yq test/cluster ## publish signer and cockroach image to local kind registry +test/publish-images-to-k3d: bin/yq test/cluster ## publish signer and cockroach image to local k3d registry for i in $(IMAGE_LIST); do \ docker pull $$i; \ - bin/kind load docker-image $$i --name $(KIND_CLUSTER); \ + bin/k3d image import $$i -c $(K3D_CLUSTER); \ done - @bin/kind load docker-image \ - ${REPOSITORY}:$(shell bin/yq '.tls.selfSigner.image.tag' ./cockroachdb/values.yaml) \ - --name $(KIND_CLUSTER) + docker pull ${REGISTRY}/${REPOSITORY}:$(shell bin/yq '.tls.selfSigner.image.tag' ./cockroachdb/values.yaml); \ + bin/k3d image import \ + ${REGISTRY}/${REPOSITORY}:$(shell bin/yq '.tls.selfSigner.image.tag' ./cockroachdb/values.yaml) \ + -c $(K3D_CLUSTER) test/template: bin/cockroach bin/helm ## Run template tests @PATH="$(PWD)/bin:${PATH}" go test -v ./tests/template/... @@ -101,7 +134,7 @@ test/units: bin/cockroach ## Run unit tests in ./pkg/... @PATH="$(PWD)/bin:${PATH}" go test -v ./pkg/... ##@ Binaries -bin: bin/cockroach bin/helm bin/kind bin/kubectl bin/yq ## install all binaries +bin: bin/cockroach bin/helm bin/k3d bin/kubectl bin/yq ## install all binaries bin/cockroach: ## install cockroach @mkdir -p bin @@ -113,10 +146,10 @@ bin/helm: ## install helm @curl -L $(HELM_BIN) | tar -xzf - -C bin/ --strip-components 1 @rm -f bin/README.md bin/LICENSE -bin/kind: ## install kind +bin/k3d: ## install k3d @mkdir -p bin - @curl -Lo bin/kind $(KIND_BIN) - @chmod +x bin/kind + @curl -Lo bin/k3d $(K3D_BIN) + @chmod +x bin/k3d bin/kubectl: ## install kubectl @mkdir -p bin diff --git a/cmd/self-signer/generate.go b/cmd/self-signer/generate.go index a094feb7..13aecd76 100644 --- a/cmd/self-signer/generate.go +++ b/cmd/self-signer/generate.go @@ -37,10 +37,12 @@ var ( caExpiry, nodeExpiry, clientExpiry string caSecret string clientOnly bool + operatorManaged bool ) func init() { generateCmd.Flags().BoolVar(&clientOnly, "client-only", false, "generate certificates for custom user") + generateCmd.Flags().BoolVar(&operatorManaged, "operator-managed", false, "generate certificates for operator managed cluster") rootCmd.AddCommand(generateCmd) } @@ -52,6 +54,7 @@ func generate(cmd *cobra.Command, args []string) { } genCert.CaSecret = caSecret + genCert.OperatorManaged = operatorManaged namespace, exists := os.LookupEnv("NAMESPACE") if !exists { diff --git a/cockroachdb/Chart.yaml b/cockroachdb/Chart.yaml index e5cabd55..2538142a 100644 --- a/cockroachdb/Chart.yaml +++ b/cockroachdb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v1 name: cockroachdb home: https://www.cockroachlabs.com -version: 14.0.5 +version: 14.0.6 appVersion: 24.2.4 description: CockroachDB is a scalable, survivable, strongly-consistent SQL database. icon: https://raw.githubusercontent.com/cockroachdb/cockroach/master/docs/media/cockroach_db.png diff --git a/cockroachdb/templates/_helpers.tpl b/cockroachdb/templates/_helpers.tpl index 9ef769a7..8a08ae1e 100644 --- a/cockroachdb/templates/_helpers.tpl +++ b/cockroachdb/templates/_helpers.tpl @@ -289,3 +289,23 @@ Validate that if user enabled tls, then either self-signed certificates or certi {{- end }} {{- end }} {{- end }} + + +{{/* Common labels that are applied to all managed objects. */}} +{{- define "cluster.labels" -}} +helm.sh/chart: {{ include "cockroachdb.chart" . }} +{{ include "cluster.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* + Selector labels defines the set of labels that can be used as selectors for + crdb nodes. +*/}} +{{- define "cluster.selectorLabels" -}} +app.kubernetes.io/name: {{ include "cockroachdb.clusterfullname" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/cockroachdb/templates/crdb.yaml b/cockroachdb/templates/crdb.yaml new file mode 100644 index 00000000..f3871dc6 --- /dev/null +++ b/cockroachdb/templates/crdb.yaml @@ -0,0 +1,48 @@ +{{- if .Values.operator.enabled }} +--- +apiVersion: crdb.cockroachlabs.com/v1alpha1 +kind: CrdbCluster +metadata: + name: {{ .Release.Name }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "cluster.labels" . | nindent 4 }} + {{- with .Values.statefulset.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.operator.clusterSettings }} + clusterSettings: {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.operator.regions }} + regions: {{- toYaml . | nindent 4 }} + {{- end }} + features: + - reconcile + - reconcile-beta + template: + spec: + image: "{{ .Values.image.repository }}:{{ default .Chart.AppVersion .Values.image.tag }}" + certificates: + externalCertificates: + clientCaConfigMapName: {{ .Values.operator.certificates.externalCertificates.clientCaConfigMapName | default (printf "%s-client-ca" $.Release.Name) }} + nodeCaConfigMapName: {{ .Values.operator.certificates.externalCertificates.nodeCaConfigMapName | default (printf "%s-node-ca" $.Release.Name) }} + httpSecretName: {{ .Values.operator.certificates.externalCertificates.httpSecretName | default (printf "%s-ui-certs" $.Release.Name) }} + nodeClientSecretName: {{ .Values.operator.certificates.externalCertificates.nodeClientSecretName | default (printf "%s-node-client-certs" $.Release.Name) }} + nodeSecretName: {{ .Values.operator.certificates.externalCertificates.nodeSecretName | default (printf "%s-node-certs" $.Release.Name) }} + rootSqlClientSecretName: {{ .Values.operator.certificates.externalCertificates.rootSqlClientSecretName | default (printf "%s-client-certs" $.Release.Name) }} + {{- with .Values.operator.resources }} + resourceRequirements: {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ default .Release.Name .Values.operator.rbac.serviceAccountName }} + {{- if .Values.operator.loggingConf }} + loggingConfigMapName: {{ .Release.Name }}-logging + {{- end }} + # All properties below are solely to pass validation. They aren't used by the + # betaclusterctrl controller so the values don't matter so long as they're + # valid. + dataStore: {} +{{- end }} \ No newline at end of file diff --git a/cockroachdb/templates/job-certSelfSigner.yaml b/cockroachdb/templates/job-certSelfSigner.yaml index 54ed2cad..80518540 100644 --- a/cockroachdb/templates/job-certSelfSigner.yaml +++ b/cockroachdb/templates/job-certSelfSigner.yaml @@ -66,6 +66,9 @@ spec: - --client-expiry={{ .Values.tls.certs.selfSigner.clientCertExpiryWindow }} - --node-duration={{ .Values.tls.certs.selfSigner.nodeCertDuration }} - --node-expiry={{ .Values.tls.certs.selfSigner.nodeCertExpiryWindow }} + {{- if .Values.operator.enabled }} + - --operator-managed=true + {{- end}} env: - name: STATEFULSET_NAME value: {{ template "cockroachdb.fullname" . }} diff --git a/cockroachdb/templates/job.init.yaml b/cockroachdb/templates/job.init.yaml index dbc1eaa1..04bf5e7d 100644 --- a/cockroachdb/templates/job.init.yaml +++ b/cockroachdb/templates/job.init.yaml @@ -1,6 +1,6 @@ {{ $isClusterInitEnabled := and (eq (len .Values.conf.join) 0) (not (index .Values.conf `single-node`)) }} {{ $isDatabaseProvisioningEnabled := .Values.init.provisioning.enabled }} -{{- if or $isClusterInitEnabled $isDatabaseProvisioningEnabled }} +{{- if and (or $isClusterInitEnabled $isDatabaseProvisioningEnabled) (not .Values.operator.enabled) }} {{ template "cockroachdb.tlsValidation" . }} kind: Job apiVersion: batch/v1 diff --git a/cockroachdb/templates/poddisruptionbudget.yaml b/cockroachdb/templates/poddisruptionbudget.yaml index f707e405..54268a53 100644 --- a/cockroachdb/templates/poddisruptionbudget.yaml +++ b/cockroachdb/templates/poddisruptionbudget.yaml @@ -1,3 +1,4 @@ +{{- if (not .Values.operator.enabled) }} kind: PodDisruptionBudget {{- if or (.Capabilities.APIVersions.Has "policy/v1") (semverCompare ">=1.21-0" .Capabilities.KubeVersion.Version) }} apiVersion: policy/v1 @@ -24,3 +25,4 @@ spec: {{- toYaml . | nindent 6 }} {{- end }} maxUnavailable: {{ .Values.statefulset.budget.maxUnavailable | int64 }} +{{- end }} \ No newline at end of file diff --git a/cockroachdb/templates/role-certSelfSigner.yaml b/cockroachdb/templates/role-certSelfSigner.yaml index 1cbaab3d..239fdb2e 100644 --- a/cockroachdb/templates/role-certSelfSigner.yaml +++ b/cockroachdb/templates/role-certSelfSigner.yaml @@ -30,4 +30,7 @@ rules: - apiGroups: [""] resources: ["pods"] verbs: ["delete", "get"] + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create", "get", "update", "delete"] {{- end }} diff --git a/cockroachdb/templates/role.yaml b/cockroachdb/templates/role.yaml index ebe5ce8a..3fee571b 100644 --- a/cockroachdb/templates/role.yaml +++ b/cockroachdb/templates/role.yaml @@ -20,4 +20,25 @@ rules: {{- else }} verbs: ["create", "get"] {{- end }} + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch"] +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "cockroachdb.fullname" . }}-node-reader + namespace: {{ .Release.Namespace | quote }} + labels: + helm.sh/chart: {{ template "cockroachdb.chart" . }} + app.kubernetes.io/name: {{ template "cockroachdb.name" . }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + {{- with .Values.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +rules: + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch"] {{- end }} diff --git a/cockroachdb/templates/rolebinding.yaml b/cockroachdb/templates/rolebinding.yaml index 00d9f9a5..60037c69 100644 --- a/cockroachdb/templates/rolebinding.yaml +++ b/cockroachdb/templates/rolebinding.yaml @@ -20,4 +20,26 @@ subjects: - kind: ServiceAccount name: {{ template "cockroachdb.serviceAccount.name" . }} namespace: {{ .Release.Namespace | quote }} +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "cockroachdb.fullname" . }}-node-reader + namespace: {{ .Release.Namespace | quote }} + labels: + helm.sh/chart: {{ template "cockroachdb.chart" . }} + app.kubernetes.io/name: {{ template "cockroachdb.name" . }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + {{- with .Values.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "cockroachdb.fullname" . }}-node-reader +subjects: + - kind: ServiceAccount + name: {{ template "cockroachdb.serviceAccount.name" . }} + namespace: {{ .Release.Namespace | quote }} {{- end }} diff --git a/cockroachdb/templates/statefulset.yaml b/cockroachdb/templates/statefulset.yaml index 318ae770..fde05a48 100644 --- a/cockroachdb/templates/statefulset.yaml +++ b/cockroachdb/templates/statefulset.yaml @@ -1,3 +1,4 @@ +{{- if not .Values.operator.enabled }} kind: StatefulSet apiVersion: {{ template "cockroachdb.statefulset.apiVersion" . }} metadata: @@ -433,3 +434,4 @@ spec: requests: storage: {{ .Values.storage.persistentVolume.size | quote }} {{- end }} +{{- end }} \ No newline at end of file diff --git a/cockroachdb/values.yaml b/cockroachdb/values.yaml index 137f8f22..59165e45 100644 --- a/cockroachdb/values.yaml +++ b/cockroachdb/values.yaml @@ -613,10 +613,10 @@ tls: # Image Placeholder for the selfSigner utility. This will be changed once the CI workflows for the image is in place. image: repository: cockroachlabs-helm-charts/cockroach-self-signer-cert - tag: "1.5" + tag: "1.6" pullPolicy: IfNotPresent credentials: {} - registry: gcr.io + registry: "localhost:5000" # username: john_doe # password: changeme @@ -649,3 +649,101 @@ iap: # Create Google Cloud OAuth credentials and set client id and secret # clientId: # clientSecret: + +# Use the CRDB Operator to manage the CRDB clusters +operator: + enabled: true + # Default values for the cluster chart. + image: + repository: cockroachdb/cockroach + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the cluster chart's appVersion. + tag: "" + + nameOverride: "" + fullnameOverride: "" + + # A map of CRDB cluster settings. + # See https://www.cockroachlabs.com/docs/stable/cluster-settings.html + clusterSettings: ~ + + # Regions controls the number of CRDB nodes that are deployed per region. + #regions: ~ + # - code: us-central1 + # nodes: 3 + + # loggingConf is the logging configuration used by cockroach. + # More details: https://www.cockroachlabs.com/docs/stable/logging-overview.html + loggingConf: ~ + # sinks: + # stderr: + # channels: [health, dev] + # filter: INFO + + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as K3D. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the tilde after 'resources:'. + resources: ~ + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + certificates: + # Any extra alt names that should be added to the node certs. + extraNodeAltNames: [] + # - somevalue + # - somevalue.default + # - somevalue.default.svc.local + # the number of days generated certs are valid for + # validForDays: 3650 + + # External certificates for the CRDB cluster. + externalCertificates: + clientCaConfigMapName: my-release-cockroachdb-ca-secret-crt + nodeCaConfigMapName: my-release-cockroachdb-ca-secret-crt + httpSecretName: my-release-cockroachdb-client-secret + nodeClientSecretName: my-release-cockroachdb-client-secret + nodeSecretName: my-release-cockroachdb-node-secret + rootSqlClientSecretName: my-release-cockroachdb-client-secret + + # RBAC settings for CRDB nodes + rbac: + # By default the service account will be the resource name. It will + # be created during the installation along with a namespaced role and + # a cluster role with the policy rules below. + # + # Uncomment the line below to use a custom SA. If a custom SA is used, + # no roles or bindings will be created. + # serviceAccountName: my-custom-sa + + # Rules for the namespaced role bound to the service account. + # + # E.g. + # permissions: + # - apiGroup: [""] + # resources: ["secrets"] + # verbs: ["create", "get"] + rules: [] + + # Rules for the cluster role bound to the service account. + clusterRules: + # Get nodes allows the locality container to work as expected. It pulls the + # failure-domain.beta.kubernetes.io/zone label to determine node locality. + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get"] + serviceAccountName: ~ + + regions: + - code: us-east-1 + nodes: 3 + cloudProvider: k3d + namespace: default + + extras: + # Add a container with dnsutils (nslookup, dig, ping, etc.) installed. + dnsutils: false diff --git a/operator/.helmignore b/operator/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/operator/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/operator/Chart.yaml b/operator/Chart.yaml new file mode 100644 index 00000000..f64af337 --- /dev/null +++ b/operator/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: operator +description: A Helm chart for managing the CockroachDB operator. +type: application +appVersion: "latest" +version: 0.1.0 diff --git a/operator/README.md b/operator/README.md new file mode 100644 index 00000000..72438172 --- /dev/null +++ b/operator/README.md @@ -0,0 +1,5 @@ +# operator + +This is a subchart for installing the CockroachDB operator. + +> At the moment this is just a placeholder. diff --git a/operator/templates/_operator_certs.tpl b/operator/templates/_operator_certs.tpl new file mode 100644 index 00000000..aa03ec8e --- /dev/null +++ b/operator/templates/_operator_certs.tpl @@ -0,0 +1,17 @@ +{{/* + Generates a CA and a signed cert for SQL connections (root + cert). + + By convention, the first line is expected to be the ca.crt + entry. Lines 2-3 are the client.root entries. Finally, lines + 4-5 are the client.node entries. + +*/}} +{{- define "operator.certs" -}} +{{- $days := default .Values.certificate.validForDays 3650 | int -}} +{{- $ca := genCA "cockroach-operator-certs" 3650 -}} +{{- $cert := genSignedCert "cert" nil (list "cockroach-webhook-service.default.svc" "cockroach-operator.default.svc") $days $ca -}} +ca.crt: {{ $ca.Cert | b64enc }} +tls.crt: {{ $cert.Cert | b64enc }} +tls.key: {{ $cert.Key | b64enc }} +{{- end }} diff --git a/operator/templates/cockroachdb-operator-certs.yaml b/operator/templates/cockroachdb-operator-certs.yaml new file mode 100644 index 00000000..44716a26 --- /dev/null +++ b/operator/templates/cockroachdb-operator-certs.yaml @@ -0,0 +1,11 @@ +{{- $operatorCerts := splitList "\n" (include "operator.certs" .) }} +apiVersion: v1 +kind: Secret +metadata: + name: cockroach-operator-certs + namespace: default # Change the namespace if needed +type: Opaque +data: + {{ index $operatorCerts 0 }} + {{ index $operatorCerts 1 }} + {{ index $operatorCerts 2 }} \ No newline at end of file diff --git a/operator/templates/operator.yaml b/operator/templates/operator.yaml new file mode 100644 index 00000000..bbc1e655 --- /dev/null +++ b/operator/templates/operator.yaml @@ -0,0 +1,529 @@ +# cockroach-operator.yaml defines how the operator are installed in a K8s +# cluster, including its deployment and permissions. +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: cockroach-operator +value: 500000000 + +--- +# Configure the permissions the operator will have in the K8s cluster. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + creationTimestamp: null + name: cockroach-operator-role +rules: + # Namespace permissions should be dropped once this K8s bug is fixed. + # https://github.com/kubernetes/kubernetes/issues/115819 + - apiGroups: + - "" + resources: + - namespaces + verbs: + - list + - apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + # NOTE: The operator writes to events when there is leader election activity. It also writes autoscaler events which + # seem to be requiring a patch permission. + - apiGroups: + - "" + resources: + - events + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - rbac.authorization.k8s.io + resources: + - clusterroles + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - rbac.authorization.k8s.io + resources: + - roles + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - rbac.authorization.k8s.io + resources: + - clusterrolebindings + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - rbac.authorization.k8s.io + resources: + - rolebindings + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - list + - update + - apiGroups: + - networking.k8s.io + resources: + - networkpolicies + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - "" + resources: + - secrets + verbs: + - create + - get + - list + - patch + - update + - watch + - apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - create + - delete + - get + - list + - patch + - watch + - apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests/approval + verbs: + - update + - apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests/status + verbs: + - get + - patch + - update + - apiGroups: + - "" + resources: + - services + verbs: + - create + - delete + - get + - list + - patch + - watch + - apiGroups: + - "" + resources: + - endpoints + verbs: + - get + - list + - watch + - apiGroups: + - batch + resources: + - jobs + verbs: + - create + - delete + - get + - list + - patch + - watch + - apiGroups: + - "" + resources: + - configmaps + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - "" + resources: + - persistentvolumeclaims + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - "" + resources: + - pods/eviction + verbs: + - create + - apiGroups: + - "" + resources: + - pods/exec + verbs: + - create + - apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - create + - delete + - get + - list + - patch + - watch + - apiGroups: + - scheduling.k8s.io + resources: + - priorityclasses + verbs: + - create + - delete + - get + - list + - patch + - watch + # The "create" verb cannot be qualified with resourceNames, so grant the + # unqualified permission so that the operator can create a new webhook. But + # only allow the operator to get and patch its own webhook. + - apiGroups: + - admissionregistration.k8s.io + resources: + - validatingwebhookconfigurations + verbs: + - create + - apiGroups: + - admissionregistration.k8s.io + resources: + - validatingwebhookconfigurations + resourceNames: + - cockroach-webhook-config + verbs: + - get + - patch + # The "create" verb cannot be qualified with resourceNames, so grant the + # unqualified permission so that the operator can create new CRDs. But only + # allow the operator to get and patch its own CRDs. + - apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - create + - list + - apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + resourceNames: + - crdbclusters.crdb.cockroachlabs.com + - crdbnodes.crdb.cockroachlabs.com + - crdbtenants.crdb.cockroachlabs.com + verbs: + - get + - patch + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - apiGroups: + - "apps" + resources: + - statefulsets + - deployments + verbs: + - create + - delete + - get + - list + - patch + - watch + - apiGroups: + - "" + resources: + - pods + - pods/portforward + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - crdb.cockroachlabs.com + resources: + - crdbclusters + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - crdb.cockroachlabs.com + resources: + - crdbclusters/status + verbs: + - get + - patch + - update + - apiGroups: + - crdb.cockroachlabs.com + resources: + - crdbnodes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - crdb.cockroachlabs.com + resources: + - crdbnodes/status + verbs: + - get + - patch + - update + - apiGroups: + - crdb.cockroachlabs.com + resources: + - crdbtenants + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - crdb.cockroachlabs.com + resources: + - crdbtenants/status + verbs: + - get + - patch + - update + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cockroach-operator-default + namespace: default + labels: + app: cockroach-operator + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cockroach-operator-default + labels: + app: cockroach-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cockroach-operator-role +subjects: + - name: cockroach-operator-default + namespace: default + kind: ServiceAccount + +--- +# Expose the operator's prometheus and grpc servers to the SQLProxy's TenantDir. +apiVersion: v1 +kind: Service +metadata: + name: cockroach-operator + namespace: default + labels: + app: cockroach-operator +spec: + ports: + - name: grpc + port: 9070 + targetPort: 9070 + selector: + app: cockroach-operator +--- +# Configure how the operator will be deployed in the K8s cluster. +# NOTE: Put this last, so that permissions have already been granted when it +# starts up. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cockroach-operator + namespace: default + labels: + app: cockroach-operator +spec: + replicas: {{ .Values.numReplicas }} + selector: + matchLabels: + app: cockroach-operator + strategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + maxSurge: 1 + template: + metadata: + labels: + app: cockroach-operator + spec: + shareProcessNamespace: true + serviceAccountName: cockroach-operator-default + priorityClassName: cockroach-operator + containers: + - name: cockroach-operator + image: {{ .Values.image.registry }}/{{ .Values.image.repository }}:release-2024-07-10-0-384-g53408b608c + args: + # Pin metrics port so it can be properly exposed by the "ports" + # field below even in the event of a change to the default value. + - "-metrics-addr=0.0.0.0:8080" + - "-rpc-addr=0.0.0.0:9070" + ports: + # Expose port under specific name so that performance metrics can + # more easily be scraped from container. + - name: operator + containerPort: 8080 + - name: grpc + containerPort: 9070 + env: + - name: WATCH_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + # Expose K8s node name as environment variable. The operator will + # use this to query the node and derive environmental information. + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + resources: + requests: + cpu: 500m + memory: 1000Mi + limits: + cpu: 2000m + memory: 4000Mi + volumeMounts: + - name: certs + mountPath: /certs + livenessProbe: + httpGet: + path: /healthz + port: 9080 + scheme: HTTP + # Give the operator 20 seconds to start. Then, only restart the + # container if 3 consecutive probes fail over a period of 9 seconds. + # Note that the operator starts up much faster than 20 seconds, but + # restarting the container more frequently than once every ~30 + # seconds isn't useful. + initialDelaySeconds: 20 + periodSeconds: 3 + timeoutSeconds: 3 + failureThreshold: 3 + readinessProbe: + httpGet: + # To change this to be the default /readyz - enable /healthz and switch the liveness probes to it + # on all clusters (done), change the readiness probe to use /healthz too and change the readiness + # endpoint from /health to the default /readyz. Once all clusters are updated - change the + # readiness probe to /readyz. + path: /health + port: 9080 + scheme: HTTP + # Immediately start sending readiness probes. Stop sending traffic + # if 3 consecutive probes fail over a period of 9 seconds, as the + # container is probably overloaded or unhealthy, and it's best to + # back off and let it recover. + initialDelaySeconds: 0 + periodSeconds: 3 + timeoutSeconds: 3 + failureThreshold: 3 + # TODO(CC-27018) Use a node label that defines which nodes operator pods + # can run on, instead of blocking operator pods from running on some nodes. + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: crdb.io/app + operator: NotIn + values: + - "in-situ" + volumes: + - name: certs + secret: + secretName: cockroach-operator-certs diff --git a/operator/values.yaml b/operator/values.yaml new file mode 100644 index 00000000..c96155ca --- /dev/null +++ b/operator/values.yaml @@ -0,0 +1,11 @@ +# Default values for the operator chart. +image: + # TODO: once this is published somewhere, use the real repo. + registry: "us-docker.pkg.dev/cockroach-cloud-images/development" + repository: "cockroach-operator" + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the cluster chart's appVersion. + tag: "" +certificate: + validForDays: "" +numReplicas: 1 diff --git a/pkg/generator/generate_cert.go b/pkg/generator/generate_cert.go index 5aeb6bee..66b656d1 100644 --- a/pkg/generator/generate_cert.go +++ b/pkg/generator/generate_cert.go @@ -68,6 +68,7 @@ type GenerateCert struct { ClusterDomain string ReadinessWait time.Duration PodUpdateTimeout time.Duration + OperatorManaged bool } type certConfig struct { @@ -240,6 +241,17 @@ func (rc *GenerateCert) generateCA(ctx context.Context, CASecretName string, nam return errors.Wrap(err, "failed to update ca key secret ") } + // If we are using the operator to manage secrets then we need to store the CA cert in a + // ConfigMap. + if rc.OperatorManaged { + cm := resource.CreateConfigMap(namespace, CASecretName, caCert, + resource.NewKubeResource(ctx, rc.client, namespace, kube.DefaultPersister)) + if err = cm.Update(); err != nil { + return errors.Wrap(err, "failed to update CA cert in ConfigMap") + } + logrus.Infof("Generated and saved CA certificate in ConfigMap [%s]", CASecretName) + } + logrus.Infof("Generated and saved CA key and certificate in secret [%s]", CASecretName) return nil } @@ -307,6 +319,16 @@ func (rc *GenerateCert) generateNodeCert(ctx context.Context, nodeSecretName str fmt.Sprintf("*.%s.%s.svc.%s", rc.DiscoveryServiceName, namespace, rc.ClusterDomain), } + if rc.OperatorManaged { + operatorJoinServiceHosts := []string{ + fmt.Sprintf("%s-join", rc.DiscoveryServiceName), + fmt.Sprintf("%s-join.%s", rc.DiscoveryServiceName, namespace), + fmt.Sprintf("%s-join.%s.svc.%s", rc.DiscoveryServiceName, namespace, rc.ClusterDomain), + } + + hosts = append(hosts, operatorJoinServiceHosts...) + } + // create the Node Pair certificates if err = errors.Wrap( security.CreateNodePair( diff --git a/pkg/resource/config_map.go b/pkg/resource/config_map.go new file mode 100644 index 00000000..619c8c00 --- /dev/null +++ b/pkg/resource/config_map.go @@ -0,0 +1,82 @@ +/* +Copyright 2021 The Cockroach Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resource + +import ( + "fmt" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type ConfigMap struct { + Resource + + configMap *corev1.ConfigMap +} + +// CreateConfigMap creates a ConfigMap in the specified namespace +func CreateConfigMap(namespace string, secretName string, data []byte, r Resource) *ConfigMap { + // Define the ConfigMap object + configMap := &ConfigMap{ + Resource: r, + configMap: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-crt", secretName), + }, + Data: map[string]string{ + "ca.crt": string(data), + }, + }, + } + return configMap +} + +func (c *ConfigMap) Update() error { + data := c.configMap.Data + _, err := c.Persist(c.configMap, func() error { + c.configMap.Data = data + return nil + }) + + return err +} + +func LoadConfigMap(name string, r Resource) (*ConfigMap, error) { + c := &ConfigMap{ + Resource: r, + configMap: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + }, + } + + if err := r.Fetch(c.configMap); err != nil { + return nil, err + } + + return c, nil +} + +func (c *ConfigMap) GetConfigMap() *corev1.ConfigMap { + return c.configMap +} + +func (c *ConfigMap) Name() string { + return c.configMap.Name +} diff --git a/pkg/resource/config_map_test.go b/pkg/resource/config_map_test.go new file mode 100644 index 00000000..938b451e --- /dev/null +++ b/pkg/resource/config_map_test.go @@ -0,0 +1,31 @@ +package resource_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/cockroachdb/helm-charts/pkg/kube" + "github.com/cockroachdb/helm-charts/pkg/resource" + "github.com/cockroachdb/helm-charts/pkg/testutils" +) + +func TestUpdateConfigMap(t *testing.T) { + scheme := testutils.InitScheme(t) + fakeClient := testutils.NewFakeClient(scheme) + namespace := "default" + name := "test-configmap" + + r := resource.NewKubeResource(context.TODO(), fakeClient, namespace, kube.DefaultPersister) + cm := resource.CreateConfigMap(namespace, name, []byte{}, r) + + err := cm.Update() + require.NoError(t, err) + + // fetch the configmap + cm, err = resource.LoadConfigMap(cm.Name(), r) + require.NoError(t, err) + + require.Equal(t, "test-configmap-crt", cm.GetConfigMap().Name) +} diff --git a/tests/k3d/dev-cluster.sh b/tests/k3d/dev-cluster.sh new file mode 100755 index 00000000..830d2595 --- /dev/null +++ b/tests/k3d/dev-cluster.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +region="us-east-1" +zones=3 + +if [ $# -eq 0 ] + then + echo "No arguments supplied: " + echo " up: Start cluster." + echo " --nodes x: The cluster should have x nodes (default 1)" + echo " --version x: The version of Kubernetes (default 1.24.14)" + echo " --name x: The name of the cluster (default local)" + echo " --network_name x: The name of the cluster's network (default k3d-\${name})" + echo " --region x: The name of the cluster's region for node labels topology.kubernetes.io/region (default us-east-1)" + echo " --zones x: The number of zones in the region for node labels topology.kubernetes.io/zone (default 3)" + + echo " down: Delete cluster." + + exit 1 +fi + +COMMAND="${1-}" +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +nodes=${environment:-1} +version=${version:-1.24.14} + +while [ $# -gt 0 ]; do + + if [[ $1 == *"--"* ]]; then + param="${1/--/}" + declare $param="$2" + # echo $1 $2 // Optional to see the parameter:value result + fi + + shift +done + +name=${name:-local} +network_name=${network_name:-"k3d-${name}"} + +# Function to set topology.kubernetes.io/zone labels in a round-robin fashion +set_node_labels() { + local nodes=$1 + local region=$2 + local zones=$3 + local labels="" + local az=(a b c d e f g h i j k l m n o p q r s t u v w x y z) + + for ((i=0; i