From 684baf7dbacef4b85a08db8de9934458745124d8 Mon Sep 17 00:00:00 2001 From: "Oleksandr K." Date: Wed, 25 Sep 2024 19:51:03 +0200 Subject: [PATCH 01/20] feat: support ruler sidecar in singleBinary mode (#13572) Co-authored-by: Trevor Whitney --- .../loki/templates/backend/clusterrole.yaml | 3 +- .../templates/backend/clusterrolebinding.yaml | 2 +- .../templates/single-binary/statefulset.yaml | 82 +++++++++++++++++++ 3 files changed, 84 insertions(+), 3 deletions(-) diff --git a/production/helm/loki/templates/backend/clusterrole.yaml b/production/helm/loki/templates/backend/clusterrole.yaml index e8631c35a501b..36c8a0fe0e805 100644 --- a/production/helm/loki/templates/backend/clusterrole.yaml +++ b/production/helm/loki/templates/backend/clusterrole.yaml @@ -1,5 +1,4 @@ -{{- $isSimpleScalable := eq (include "loki.deployment.isScalable" .) "true" -}} -{{- if and $isSimpleScalable (not .Values.rbac.namespaced) (not .Values.rbac.useExistingRole) }} +{{- if and (not .Values.rbac.namespaced) (not .Values.rbac.useExistingRole) }} kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: diff --git a/production/helm/loki/templates/backend/clusterrolebinding.yaml b/production/helm/loki/templates/backend/clusterrolebinding.yaml index 619b70260cd4f..92f86a47d4f13 100644 --- a/production/helm/loki/templates/backend/clusterrolebinding.yaml +++ b/production/helm/loki/templates/backend/clusterrolebinding.yaml @@ -1,5 +1,5 @@ {{- $isSimpleScalable := eq (include "loki.deployment.isScalable" .) "true" -}} -{{- if and $isSimpleScalable (not .Values.rbac.namespaced) }} +{{- if (not .Values.rbac.namespaced) }} kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: diff --git a/production/helm/loki/templates/single-binary/statefulset.yaml b/production/helm/loki/templates/single-binary/statefulset.yaml index 7bd2b9813f609..5e28902e5677f 100644 --- a/production/helm/loki/templates/single-binary/statefulset.yaml +++ b/production/helm/loki/templates/single-binary/statefulset.yaml @@ -79,6 +79,75 @@ spec: {{- end }} {{- end }} containers: + {{- if .Values.sidecar.rules.enabled }} + - name: loki-sc-rules + {{- if .Values.sidecar.image.sha }} + image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}" + {{- else }} + image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.sidecar.image.pullPolicy }} + env: + - name: METHOD + value: {{ .Values.sidecar.rules.watchMethod }} + - name: LABEL + value: "{{ .Values.sidecar.rules.label }}" + {{- if .Values.sidecar.rules.labelValue }} + - name: LABEL_VALUE + value: {{ quote .Values.sidecar.rules.labelValue }} + {{- end }} + - name: FOLDER + value: "{{ .Values.sidecar.rules.folder }}" + - name: RESOURCE + value: {{ quote .Values.sidecar.rules.resource }} + {{- if .Values.sidecar.enableUniqueFilenames }} + - name: UNIQUE_FILENAMES + value: "{{ .Values.sidecar.enableUniqueFilenames }}" + {{- end }} + {{- if .Values.sidecar.rules.searchNamespace }} + - name: NAMESPACE + value: "{{ .Values.sidecar.rules.searchNamespace | join "," }}" + {{- end }} + {{- if .Values.sidecar.skipTlsVerify }} + - name: SKIP_TLS_VERIFY + value: "{{ .Values.sidecar.skipTlsVerify }}" + {{- end }} + {{- if .Values.sidecar.rules.script }} + - name: SCRIPT + value: "{{ .Values.sidecar.rules.script }}" + {{- end }} + {{- if .Values.sidecar.rules.watchServerTimeout }} + - name: WATCH_SERVER_TIMEOUT + value: "{{ .Values.sidecar.rules.watchServerTimeout }}" + {{- end }} + {{- if .Values.sidecar.rules.watchClientTimeout }} + - name: WATCH_CLIENT_TIMEOUT + value: "{{ .Values.sidecar.rules.watchClientTimeout }}" + {{- end }} + {{- if .Values.sidecar.rules.logLevel }} + - name: LOG_LEVEL + value: "{{ .Values.sidecar.rules.logLevel }}" + {{- end }} + {{- if .Values.sidecar.livenessProbe }} + livenessProbe: + {{- toYaml .Values.sidecar.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.sidecar.readinessProbe }} + readinessProbe: + {{- toYaml .Values.sidecar.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.sidecar.resources }} + resources: + {{- toYaml .Values.sidecar.resources | nindent 12 }} + {{- end }} + {{- if .Values.sidecar.securityContext }} + securityContext: + {{- toYaml .Values.sidecar.securityContext | nindent 12 }} + {{- end }} + volumeMounts: + - name: sc-rules-volume + mountPath: {{ .Values.sidecar.rules.folder | quote }} + {{- end}} - name: loki image: {{ include "loki.image" . }} imagePullPolicy: {{ .Values.loki.image.pullPolicy }} @@ -125,6 +194,10 @@ spec: - name: license mountPath: /etc/loki/license {{- end }} + {{- if .Values.sidecar.rules.enabled }} + - name: sc-rules-volume + mountPath: {{ .Values.sidecar.rules.folder | quote }} + {{- end}} {{- with .Values.singleBinary.extraVolumeMounts }} {{- toYaml . | nindent 12 }} {{- end }} @@ -166,6 +239,15 @@ spec: secretName: enterprise-logs-license {{- end }} {{- end }} + {{- if .Values.sidecar.rules.enabled }} + - name: sc-rules-volume + {{- if .Values.sidecar.rules.sizeLimit }} + emptyDir: + sizeLimit: {{ .Values.sidecar.rules.sizeLimit }} + {{- else }} + emptyDir: {} + {{- end -}} + {{- end -}} {{- with .Values.singleBinary.extraVolumes }} {{- toYaml . | nindent 8 }} {{- end }} From cf1d4a31af5c376e82756eaaab267369f862265d Mon Sep 17 00:00:00 2001 From: Jonathan Date: Wed, 25 Sep 2024 10:55:02 -0700 Subject: [PATCH 02/20] fix: Rename mispelled filename (#14237) Co-authored-by: Trevor Whitney --- production/helm/loki/CHANGELOG.md | 1 + ...ery-frontent.yaml => poddisruptionbudget-query-frontend.yaml} | 0 2 files changed, 1 insertion(+) rename production/helm/loki/templates/query-frontend/{poddisruptionbudget-query-frontent.yaml => poddisruptionbudget-query-frontend.yaml} (100%) diff --git a/production/helm/loki/CHANGELOG.md b/production/helm/loki/CHANGELOG.md index f41649010447e..b39023e85effc 100644 --- a/production/helm/loki/CHANGELOG.md +++ b/production/helm/loki/CHANGELOG.md @@ -20,6 +20,7 @@ Entries should include a reference to the pull request that introduced the chang ## 6.14.0 - [FEATURE] Add additional service annotations for components in distributed mode +- [FIX] Rename loki/templates/query-frontend/poddisruptionbudget-query-frontend.yaml to fix spelling mistake. ## 6.13.0 diff --git a/production/helm/loki/templates/query-frontend/poddisruptionbudget-query-frontent.yaml b/production/helm/loki/templates/query-frontend/poddisruptionbudget-query-frontend.yaml similarity index 100% rename from production/helm/loki/templates/query-frontend/poddisruptionbudget-query-frontent.yaml rename to production/helm/loki/templates/query-frontend/poddisruptionbudget-query-frontend.yaml From f74b44c0e3e8cc9e09dcad96e8cc9ab3bf019b46 Mon Sep 17 00:00:00 2001 From: Trevor Whitney Date: Wed, 25 Sep 2024 17:01:37 -0600 Subject: [PATCH 03/20] ci: remove drone, add GH action to build images (#14273) --- .drone/docker-manifest-build-image.tmpl | 17 - .drone/docker-manifest-ecr.tmpl | 21 - .drone/docker-manifest-operator.tmpl | 26 - .drone/docker-manifest.tmpl | 26 - .drone/drone.jsonnet | 669 --------- .drone/drone.yml | 1313 ----------------- .github/jsonnetfile.json | 2 +- .github/jsonnetfile.lock.json | 4 +- .github/release-workflows.jsonnet | 52 +- .../loki-release/workflows/build.libsonnet | 4 +- .../loki-release/workflows/validate.libsonnet | 1 - .github/workflows/images.yml | 433 ++++++ .github/workflows/minor-release-pr.yml | 20 +- .github/workflows/patch-release-pr.yml | 20 +- Makefile | 25 +- 15 files changed, 511 insertions(+), 2122 deletions(-) delete mode 100644 .drone/docker-manifest-build-image.tmpl delete mode 100644 .drone/docker-manifest-ecr.tmpl delete mode 100644 .drone/docker-manifest-operator.tmpl delete mode 100644 .drone/docker-manifest.tmpl delete mode 100644 .drone/drone.jsonnet delete mode 100644 .drone/drone.yml create mode 100644 .github/workflows/images.yml diff --git a/.drone/docker-manifest-build-image.tmpl b/.drone/docker-manifest-build-image.tmpl deleted file mode 100644 index b870dd5c6165f..0000000000000 --- a/.drone/docker-manifest-build-image.tmpl +++ /dev/null @@ -1,17 +0,0 @@ -image: grafana/{{config.target}} -tags: -{{#if build.tags}} -{{#each build.tags}} - - {{this}} -{{/each}} -{{/if}} -manifests: - - image: grafana/{{config.target}}-amd64 - platform: - architecture: amd64 - os: linux - - image: grafana/{{config.target}}-arm64 - platform: - architecture: arm64 - os: linux - variant: v8 diff --git a/.drone/docker-manifest-ecr.tmpl b/.drone/docker-manifest-ecr.tmpl deleted file mode 100644 index 1c8c98417ac05..0000000000000 --- a/.drone/docker-manifest-ecr.tmpl +++ /dev/null @@ -1,21 +0,0 @@ -image: public.ecr.aws/grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}} -tags: - - main -{{#if build.tag}} - - latest -{{/if}} -{{#if build.tags}} -{{#each build.tags}} - - {{this}} -{{/each}} -{{/if}} -manifests: - - image: public.ecr.aws/grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-amd64 - platform: - architecture: amd64 - os: linux - - image: public.ecr.aws/grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-arm64 - platform: - architecture: arm64 - os: linux - variant: v8 diff --git a/.drone/docker-manifest-operator.tmpl b/.drone/docker-manifest-operator.tmpl deleted file mode 100644 index f5aaa400867df..0000000000000 --- a/.drone/docker-manifest-operator.tmpl +++ /dev/null @@ -1,26 +0,0 @@ -image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "operator/v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}} -tags: - - main -{{#if build.tag}} - - latest -{{/if}} -{{#if build.tags}} -{{#each build.tags}} - - {{this}} -{{/each}} -{{/if}} -manifests: - - image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "operator/v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-amd64 - platform: - architecture: amd64 - os: linux - - image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "operator/v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-arm64 - platform: - architecture: arm64 - os: linux - variant: v8 - - image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "operator/v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-arm - platform: - architecture: arm - os: linux - variant: v7 diff --git a/.drone/docker-manifest.tmpl b/.drone/docker-manifest.tmpl deleted file mode 100644 index 7191e602b4ac0..0000000000000 --- a/.drone/docker-manifest.tmpl +++ /dev/null @@ -1,26 +0,0 @@ -image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}} -tags: - - main -{{#if build.tag}} - - latest -{{/if}} -{{#if build.tags}} -{{#each build.tags}} - - {{this}} -{{/each}} -{{/if}} -manifests: - - image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-amd64 - platform: - architecture: amd64 - os: linux - - image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-arm64 - platform: - architecture: arm64 - os: linux - variant: v8 - - image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-arm - platform: - architecture: arm - os: linux - variant: v7 diff --git a/.drone/drone.jsonnet b/.drone/drone.jsonnet deleted file mode 100644 index 718f0a84aa886..0000000000000 --- a/.drone/drone.jsonnet +++ /dev/null @@ -1,669 +0,0 @@ -local apps = ['loki', 'loki-canary', 'loki-canary-boringcrypto', 'logcli']; -local archs = ['amd64', 'arm64', 'arm']; - -local build_image_version = std.extVar('__build-image-version'); - -local drone_updater_plugin_image = 'us.gcr.io/kubernetes-dev/drone/plugins/updater@sha256:cbcb09c74f96a34c528f52bf9b4815a036b11fed65f685be216e0c8b8e84285b'; - -local onPRs = { - event: ['pull_request'], -}; - -local onTagOrMain = { - event: ['push', 'tag'], -}; - -local onTag = { - event: ['tag'], -}; - -local onPath(path) = { - paths+: [path], -}; - -local pipeline(name) = { - kind: 'pipeline', - name: name, - steps: [], - trigger: { - // Only trigger pipelines for PRs, tags (v*), or pushes to "main". Excluding runs on grafana/loki (non fork) branches - ref: ['refs/heads/main', 'refs/heads/k???', 'refs/tags/v*', 'refs/pull/*/head'], - }, -}; - -local secret(name, vault_path, vault_key) = { - kind: 'secret', - name: name, - get: { - path: vault_path, - name: vault_key, - }, -}; -local docker_username_secret = secret('docker_username', 'infra/data/ci/docker_hub', 'username'); -local docker_password_secret = secret('docker_password', 'infra/data/ci/docker_hub', 'password'); -local ecr_key = secret('ecr_key', 'infra/data/ci/loki/aws-credentials', 'access_key_id'); -local ecr_secret_key = secret('ecr_secret_key', 'infra/data/ci/loki/aws-credentials', 'secret_access_key'); -local pull_secret = secret('dockerconfigjson', 'secret/data/common/gcr', '.dockerconfigjson'); -local github_secret = secret('github_token', 'infra/data/ci/github/grafanabot', 'pat'); -local gpg_passphrase = secret('gpg_passphrase', 'infra/data/ci/packages-publish/gpg', 'passphrase'); -local gpg_private_key = secret('gpg_private_key', 'infra/data/ci/packages-publish/gpg', 'private-key'); - -// Injected in a secret because this is a public repository and having the config here would leak our environment names -local updater_config_template = secret('updater_config_template', 'secret/data/common/loki_ci_autodeploy', 'updater-config-template.json'); -local helm_chart_auto_update_config_template = secret('helm-chart-update-config-template', 'secret/data/common/loki-helm-chart-auto-update', 'on-loki-release-config.json'); - - -local run(name, commands, env={}, image='grafana/loki-build-image:%s' % build_image_version) = { - name: name, - image: image, - commands: commands, - environment: env, -}; - -local make(target, container=true, args=[]) = run(target, [ - std.join(' ', [ - 'make', - 'BUILD_IN_CONTAINER=' + container, - target, - ] + args), -]); - -// The only indication we have that we're running in a fork is the presence of a secret. -// If a secret is blank, it means we're running in a fork. -local skipMissingSecretPipelineStep(secretName) = run( - 'skip pipeline if missing secret', - [ - 'if [ "$${#TEST_SECRET}" -eq 0 ]; then', - ' echo "Missing a secret to run this pipeline. This branch needs to be re-pushed as a branch in main grafana/loki repository in order to run." && exit 78', - 'fi', - ], - image='alpine', - env={ - TEST_SECRET: { from_secret: secretName }, - }, -); - -local docker(arch, app) = { - name: '%s-image' % if $.settings.dry_run then 'build-' + app else 'publish-' + app, - image: if arch == 'arm' then 'plugins/docker:linux-arm' else 'plugins/docker', - settings: { - repo: 'grafana/%s' % app, - dockerfile: 'cmd/%s/Dockerfile' % app, - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - dry_run: false, - }, -}; - -local clients_docker(arch, app) = { - name: '%s-image' % if $.settings.dry_run then 'build-' + app else 'publish-' + app, - image: if arch == 'arm' then 'plugins/docker:linux-arm' else 'plugins/docker', - settings: { - repo: 'grafana/%s' % app, - dockerfile: 'clients/cmd/%s/Dockerfile' % app, - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - dry_run: false, - }, -}; - -local docker_operator(arch, operator) = { - name: '%s-image' % if $.settings.dry_run then 'build-' + operator else 'publish-' + operator, - image: if arch == 'arm' then 'plugins/docker:linux-arm' else 'plugins/docker', - settings: { - repo: 'grafana/%s' % operator, - context: 'operator', - dockerfile: 'operator/Dockerfile', - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - dry_run: false, - }, -}; - -local lambda_promtail_ecr(app) = { - name: '%s-image' % if $.settings.dry_run then 'build-' + app else 'publish-' + app, - image: 'cstyan/ecr', - privileged: true, - settings: { - repo: 'public.ecr.aws/grafana/lambda-promtail', - registry: 'public.ecr.aws/grafana', - dockerfile: 'tools/%s/Dockerfile' % app, - access_key: { from_secret: ecr_key.name }, - secret_key: { from_secret: ecr_secret_key.name }, - dry_run: false, - region: 'us-east-1', - }, -}; - -local arch_image(arch, tags='') = { - platform: { - os: 'linux', - arch: arch, - }, - steps: [{ - name: 'image-tag', - image: 'alpine', - commands: [ - 'apk add --no-cache bash git', - 'git fetch origin --tags', - 'echo $(./tools/image-tag)-%s > .tags' % arch, - ] + if tags != '' then ['echo ",%s" >> .tags' % tags] else [], - }], -}; - -local querytee() = pipeline('querytee-amd64') + arch_image('amd64', 'main') { - steps+: [ - // publish for tag or main - docker('amd64', 'querytee') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: { - repo: 'grafana/loki-query-tee', - }, - }, - ], -}; - -local fluentbit(arch) = pipeline('fluent-bit-' + arch) + arch_image(arch) { - steps+: [ - // publish for tag or main - clients_docker(arch, 'fluent-bit') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: { - repo: 'grafana/fluent-bit-plugin-loki', - }, - }, - ], -}; - -local fluentd() = pipeline('fluentd-amd64') + arch_image('amd64', 'main') { - steps+: [ - // publish for tag or main - clients_docker('amd64', 'fluentd') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: { - repo: 'grafana/fluent-plugin-loki', - }, - }, - ], -}; - -local logstash() = pipeline('logstash-amd64') + arch_image('amd64', 'main') { - steps+: [ - // publish for tag or main - clients_docker('amd64', 'logstash') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: { - repo: 'grafana/logstash-output-loki', - }, - }, - ], -}; - -local promtail(arch) = pipeline('promtail-' + arch) + arch_image(arch) { - steps+: [ - // publish for tag or main - clients_docker(arch, 'promtail') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: {}, - }, - ], -}; - -local lambda_promtail(arch) = pipeline('lambda-promtail-' + arch) + arch_image(arch) { - local skipStep = skipMissingSecretPipelineStep(ecr_key.name), // Needs ECR secrets to run - - steps+: [ - skipStep, - // publish for tag or main - lambda_promtail_ecr('lambda-promtail') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: {}, - }, - ], -}; - -local lokioperator(arch) = pipeline('lokioperator-' + arch) + arch_image(arch) { - steps+: [ - // publish for tag or main - docker_operator(arch, 'loki-operator') { - depends_on: ['image-tag'], - when: onTagOrMain { - ref: ['refs/heads/main', 'refs/tags/operator/v*'], - }, - settings+: {}, - }, - ], -}; - -local logql_analyzer() = pipeline('logql-analyzer') + arch_image('amd64') { - steps+: [ - // publish for tag or main - docker('amd64', 'logql-analyzer') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: { - repo: 'grafana/logql-analyzer', - }, - }, - ], -}; - -local multiarch_image(arch) = pipeline('docker-' + arch) + arch_image(arch) { - steps+: [ - // publish for tag or main - docker(arch, app) { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: {}, - } - for app in apps - ], -}; - -local manifest(apps) = pipeline('manifest') { - steps: std.foldl( - function(acc, app) acc + [{ - name: 'manifest-' + app, - image: 'plugins/manifest:1.4.0', - settings: { - // the target parameter is abused for the app's name, - // as it is unused in spec mode. See docker-manifest.tmpl - target: app, - spec: '.drone/docker-manifest.tmpl', - ignore_missing: false, - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - }, - depends_on: ['clone'] + ( - // Depend on the previous app, if any. - if std.length(acc) > 0 - then [acc[std.length(acc) - 1].name] - else [] - ), - }], - apps, - [], - ), - depends_on: [ - 'docker-%s' % arch - for arch in archs - ] + [ - 'promtail-%s' % arch - for arch in archs - ] + [ - 'fluent-bit-%s' % arch - for arch in archs - ], -}; - -local manifest_operator(app) = pipeline('manifest-operator') { - steps: [{ - name: 'manifest-' + app, - image: 'plugins/manifest:1.4.0', - settings: { - // the target parameter is abused for the app's name, - // as it is unused in spec mode. See docker-manifest-operator.tmpl - target: app, - spec: '.drone/docker-manifest-operator.tmpl', - ignore_missing: false, - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - }, - depends_on: ['clone'], - }], - depends_on: [ - 'lokioperator-%s' % arch - for arch in archs - ], -}; - - -local manifest_ecr(apps, archs) = pipeline('manifest-ecr') { - steps: std.foldl( - function(acc, app) acc + [{ - name: 'manifest-' + app, - image: 'plugins/manifest:1.4.0', - volumes: [{ - name: 'dockerconf', - path: '/.docker', - }], - settings: { - // the target parameter is abused for the app's name, - // as it is unused in spec mode. See docker-manifest-ecr.tmpl - target: app, - spec: '.drone/docker-manifest-ecr.tmpl', - ignore_missing: true, - }, - depends_on: ['clone'] + ( - // Depend on the previous app, if any. - if std.length(acc) > 0 - then [acc[std.length(acc) - 1].name] - else [] - ), - }], - apps, - [{ - name: 'ecr-login', - image: 'docker:dind', - volumes: [{ - name: 'dockerconf', - path: '/root/.docker', - }], - environment: { - AWS_ACCESS_KEY_ID: { from_secret: ecr_key.name }, - AWS_SECRET_ACCESS_KEY: { from_secret: ecr_secret_key.name }, - }, - commands: [ - 'apk add --no-cache aws-cli', - 'docker login --username AWS --password $(aws ecr-public get-login-password --region us-east-1) public.ecr.aws', - ], - depends_on: ['clone'], - }], - ), - volumes: [{ - name: 'dockerconf', - temp: {}, - }], - depends_on: [ - 'lambda-promtail-%s' % arch - for arch in archs - ], -}; - -local build_image_tag = '0.33.2'; -[ - pipeline('loki-build-image-' + arch) { - workspace: { - base: '/src', - path: 'loki', - }, - platform: { - os: 'linux', - arch: arch, - }, - steps: [ - { - name: 'push', - image: 'plugins/docker', - when: onTagOrMain + onPath('loki-build-image/**'), - environment: { - DOCKER_BUILDKIT: 1, - }, - settings: { - repo: 'grafana/loki-build-image', - context: 'loki-build-image', - dockerfile: 'loki-build-image/Dockerfile', - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - tags: [build_image_tag + '-' + arch], - dry_run: false, - }, - }, - ], - } - for arch in ['amd64', 'arm64'] -] + [ - pipeline('loki-build-image-publish') { - steps: [ - { - name: 'manifest', - image: 'plugins/manifest:1.4.0', - when: onTagOrMain + onPath('loki-build-image/**'), - settings: { - // the target parameter is abused for the app's name, as it is unused in spec mode. - target: 'loki-build-image:' + build_image_tag, - spec: '.drone/docker-manifest-build-image.tmpl', - ignore_missing: false, - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - }, - }, - ], - depends_on: [ - 'loki-build-image-%s' % arch - for arch in ['amd64', 'arm64'] - ], - }, - pipeline('helm-test-image') { - workspace: { - base: '/src', - path: 'loki', - }, - steps: [ - { - name: 'push-image', - image: 'plugins/docker', - when: onTagOrMain + onPath('production/helm/loki/src/helm-test/**'), - settings: { - repo: 'grafana/loki-helm-test', - dockerfile: 'production/helm/loki/src/helm-test/Dockerfile', - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - dry_run: false, - }, - }, - ], - }, - pipeline('documentation-checks') { - workspace: { - base: '/src', - path: 'loki', - }, - steps: [ - make('documentation-helm-reference-check', container=false) { - depends_on: ['clone'], - }, - ], - }, -] + [ - multiarch_image(arch) - for arch in archs -] + [ - promtail(arch) + ( - // When we're building Promtail for ARM, we want to use Dockerfile.arm32 to fix - // a problem with the published Drone image. See Dockerfile.arm32 for more - // information. - // - // This is really really hacky and a better more permanent solution will be to use - // buildkit. - if arch == 'arm' - then { - steps: [ - step + ( - if std.objectHas(step, 'settings') && step.settings.dockerfile == 'clients/cmd/promtail/Dockerfile' - then { - settings+: { - dockerfile: 'clients/cmd/promtail/Dockerfile.arm32', - }, - } - else {} - ) - for step in super.steps - ], - } - else {} - ) - for arch in archs -] + [ - lokioperator(arch) { - trigger+: { - ref: [ - 'refs/heads/main', - 'refs/tags/operator/v*', - 'refs/pull/*/head', - ], - }, - } - for arch in archs -] + [ - fluentbit(arch) - for arch in archs -] + [ - fluentd(), - logstash(), - querytee(), - manifest(['promtail', 'loki', 'loki-canary', 'loki-canary-boringcrypto', 'fluent-bit-plugin-loki']) { - trigger+: onTagOrMain, - }, - manifest_operator('loki-operator') { - trigger+: onTagOrMain { - ref: [ - 'refs/heads/main', - 'refs/tags/operator/v*', - ], - }, - }, - pipeline('deploy') { - local configFileName = 'updater-config.json', - trigger: onTagOrMain { - ref: ['refs/heads/main', 'refs/tags/v*'], - }, - depends_on: ['manifest'], - image_pull_secrets: [pull_secret.name], - steps: [ - { - name: 'prepare-updater-config', - image: 'alpine', - environment: { - MAJOR_MINOR_VERSION_REGEXP: '([0-9]+\\.[0-9]+)', - RELEASE_TAG_REGEXP: '^([0-9]+\\.[0-9]+\\.[0-9]+)$', - }, - commands: [ - 'apk add --no-cache bash git', - 'git fetch origin --tags', - 'echo $(./tools/image-tag) > .tag', - 'export RELEASE_TAG=$(cat .tag)', - // if the tag matches the pattern `D.D.D` then RELEASE_NAME="D-D-x", otherwise RELEASE_NAME="next" - 'export RELEASE_NAME=$([[ $RELEASE_TAG =~ $RELEASE_TAG_REGEXP ]] && echo $RELEASE_TAG | grep -oE $MAJOR_MINOR_VERSION_REGEXP | sed "s/\\./-/g" | sed "s/$/-x/" || echo "next")', - 'echo $RELEASE_NAME', - 'echo $PLUGIN_CONFIG_TEMPLATE > %s' % configFileName, - // replace placeholders with RELEASE_NAME and RELEASE TAG - 'sed -i "s/\\"{{release}}\\"/\\"$RELEASE_NAME\\"/g" %s' % configFileName, - 'sed -i "s/{{version}}/$RELEASE_TAG/g" %s' % configFileName, - ], - settings: { - config_template: { from_secret: updater_config_template.name }, - }, - depends_on: ['clone'], - }, - { - name: 'trigger', - image: drone_updater_plugin_image, - settings: { - github_token: { from_secret: github_secret.name }, - config_file: configFileName, - }, - depends_on: ['prepare-updater-config'], - }, - ], - }, - pipeline('update-loki-helm-chart-on-loki-release') { - local configFileName = 'updater-config.json', - depends_on: ['manifest'], - image_pull_secrets: [pull_secret.name], - trigger: { - // we need to run it only on Loki tags that starts with `v`. - ref: ['refs/tags/v*'], - }, - steps: [ - { - name: 'check-version-is-latest', - image: 'alpine', - when: onTag, - commands: [ - 'apk add --no-cache bash git', - 'git fetch --tags', - "latest_version=$(git tag -l 'v[0-9]*.[0-9]*.[0-9]*' | sort -V | tail -n 1 | sed 's/v//g')", - 'RELEASE_TAG=$(./tools/image-tag)', - 'if [ "$RELEASE_TAG" != "$latest_version" ]; then echo "Current version $RELEASE_TAG is not the latest version of Loki. The latest version is $latest_version" && exit 78; fi', - ], - }, - { - name: 'prepare-helm-chart-update-config', - image: 'alpine', - depends_on: ['check-version-is-latest'], - commands: [ - 'apk add --no-cache bash git', - 'git fetch origin --tags', - 'RELEASE_TAG=$(./tools/image-tag)', - 'echo $PLUGIN_CONFIG_TEMPLATE > %s' % configFileName, - // replace placeholders with RELEASE TAG - 'sed -i -E "s/\\{\\{release\\}\\}/$RELEASE_TAG/g" %s' % configFileName, - ], - settings: { - config_template: { from_secret: helm_chart_auto_update_config_template.name }, - }, - }, - { - name: 'trigger-helm-chart-update', - image: drone_updater_plugin_image, - settings: { - github_token: { - from_secret: github_secret.name, - }, - config_file: configFileName, - }, - depends_on: ['prepare-helm-chart-update-config'], - }, - ], - }, - logql_analyzer(), - pipeline('docker-driver') { - trigger+: onTagOrMain, - steps: [ - { - name: 'build and push', - image: 'grafana/loki-build-image:%s' % build_image_version, - depends_on: ['clone'], - environment: { - DOCKER_USERNAME: { from_secret: docker_username_secret.name }, - DOCKER_PASSWORD: { from_secret: docker_password_secret.name }, - }, - commands: [ - 'git fetch origin --tags', - 'make docker-driver-push', - ], - volumes: [ - { - name: 'docker', - path: '/var/run/docker.sock', - }, - ], - privileged: true, - }, - ], - volumes: [ - { - name: 'docker', - host: { - path: '/var/run/docker.sock', - }, - }, - ], - }, -] -+ [ - lambda_promtail(arch) - for arch in ['amd64', 'arm64'] -] + [ - manifest_ecr(['lambda-promtail'], ['amd64', 'arm64']) { - trigger+: { event: ['push'] }, - }, -] + [ - github_secret, - pull_secret, - docker_username_secret, - docker_password_secret, - ecr_key, - ecr_secret_key, - updater_config_template, - helm_chart_auto_update_config_template, - gpg_passphrase, - gpg_private_key, -] diff --git a/.drone/drone.yml b/.drone/drone.yml deleted file mode 100644 index 3710a4b9d3dda..0000000000000 --- a/.drone/drone.yml +++ /dev/null @@ -1,1313 +0,0 @@ ---- -kind: pipeline -name: loki-build-image-amd64 -platform: - arch: amd64 - os: linux -steps: -- environment: - DOCKER_BUILDKIT: 1 - image: plugins/docker - name: push - settings: - context: loki-build-image - dockerfile: loki-build-image/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-build-image - tags: - - 0.33.2-amd64 - username: - from_secret: docker_username - when: - event: - - push - - tag - paths: - - loki-build-image/** -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -workspace: - base: /src - path: loki ---- -kind: pipeline -name: loki-build-image-arm64 -platform: - arch: arm64 - os: linux -steps: -- environment: - DOCKER_BUILDKIT: 1 - image: plugins/docker - name: push - settings: - context: loki-build-image - dockerfile: loki-build-image/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-build-image - tags: - - 0.33.2-arm64 - username: - from_secret: docker_username - when: - event: - - push - - tag - paths: - - loki-build-image/** -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -workspace: - base: /src - path: loki ---- -depends_on: -- loki-build-image-amd64 -- loki-build-image-arm64 -kind: pipeline -name: loki-build-image-publish -steps: -- image: plugins/manifest:1.4.0 - name: manifest - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest-build-image.tmpl - target: loki-build-image:0.33.2 - username: - from_secret: docker_username - when: - event: - - push - - tag - paths: - - loki-build-image/** -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: helm-test-image -steps: -- image: plugins/docker - name: push-image - settings: - dockerfile: production/helm/loki/src/helm-test/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-helm-test - username: - from_secret: docker_username - when: - event: - - push - - tag - paths: - - production/helm/loki/src/helm-test/** -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -workspace: - base: /src - path: loki ---- -kind: pipeline -name: documentation-checks -steps: -- commands: - - make BUILD_IN_CONTAINER=false documentation-helm-reference-check - depends_on: - - clone - environment: {} - image: grafana/loki-build-image:0.33.6 - name: documentation-helm-reference-check -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -workspace: - base: /src - path: loki ---- -kind: pipeline -name: docker-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-image - settings: - dockerfile: cmd/loki/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-canary-image - settings: - dockerfile: cmd/loki-canary/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-canary - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-canary-boringcrypto-image - settings: - dockerfile: cmd/loki-canary-boringcrypto/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-canary-boringcrypto - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-logcli-image - settings: - dockerfile: cmd/logcli/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/logcli - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: docker-arm64 -platform: - arch: arm64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-image - settings: - dockerfile: cmd/loki/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-canary-image - settings: - dockerfile: cmd/loki-canary/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-canary - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-canary-boringcrypto-image - settings: - dockerfile: cmd/loki-canary-boringcrypto/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-canary-boringcrypto - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-logcli-image - settings: - dockerfile: cmd/logcli/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/logcli - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: docker-arm -platform: - arch: arm - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-loki-image - settings: - dockerfile: cmd/loki/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-loki-canary-image - settings: - dockerfile: cmd/loki-canary/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-canary - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-loki-canary-boringcrypto-image - settings: - dockerfile: cmd/loki-canary-boringcrypto/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-canary-boringcrypto - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-logcli-image - settings: - dockerfile: cmd/logcli/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/logcli - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: promtail-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-promtail-image - settings: - dockerfile: clients/cmd/promtail/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/promtail - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: promtail-arm64 -platform: - arch: arm64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-promtail-image - settings: - dockerfile: clients/cmd/promtail/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/promtail - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: promtail-arm -platform: - arch: arm - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-promtail-image - settings: - dockerfile: clients/cmd/promtail/Dockerfile.arm32 - dry_run: false - password: - from_secret: docker_password - repo: grafana/promtail - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: lokioperator-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-operator-image - settings: - context: operator - dockerfile: operator/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-operator - username: - from_secret: docker_username - when: - event: - - push - - tag - ref: - - refs/heads/main - - refs/tags/operator/v* -trigger: - ref: - - refs/heads/main - - refs/tags/operator/v* - - refs/pull/*/head ---- -kind: pipeline -name: lokioperator-arm64 -platform: - arch: arm64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-operator-image - settings: - context: operator - dockerfile: operator/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-operator - username: - from_secret: docker_username - when: - event: - - push - - tag - ref: - - refs/heads/main - - refs/tags/operator/v* -trigger: - ref: - - refs/heads/main - - refs/tags/operator/v* - - refs/pull/*/head ---- -kind: pipeline -name: lokioperator-arm -platform: - arch: arm - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-loki-operator-image - settings: - context: operator - dockerfile: operator/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-operator - username: - from_secret: docker_username - when: - event: - - push - - tag - ref: - - refs/heads/main - - refs/tags/operator/v* -trigger: - ref: - - refs/heads/main - - refs/tags/operator/v* - - refs/pull/*/head ---- -kind: pipeline -name: fluent-bit-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-fluent-bit-image - settings: - dockerfile: clients/cmd/fluent-bit/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/fluent-bit-plugin-loki - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: fluent-bit-arm64 -platform: - arch: arm64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-fluent-bit-image - settings: - dockerfile: clients/cmd/fluent-bit/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/fluent-bit-plugin-loki - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: fluent-bit-arm -platform: - arch: arm - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-fluent-bit-image - settings: - dockerfile: clients/cmd/fluent-bit/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/fluent-bit-plugin-loki - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: fluentd-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - - echo ",main" >> .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-fluentd-image - settings: - dockerfile: clients/cmd/fluentd/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/fluent-plugin-loki - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: logstash-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - - echo ",main" >> .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-logstash-image - settings: - dockerfile: clients/cmd/logstash/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/logstash-output-loki - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: querytee-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - - echo ",main" >> .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-querytee-image - settings: - dockerfile: cmd/querytee/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-query-tee - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -depends_on: -- docker-amd64 -- docker-arm64 -- docker-arm -- promtail-amd64 -- promtail-arm64 -- promtail-arm -- fluent-bit-amd64 -- fluent-bit-arm64 -- fluent-bit-arm -kind: pipeline -name: manifest -steps: -- depends_on: - - clone - image: plugins/manifest:1.4.0 - name: manifest-promtail - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest.tmpl - target: promtail - username: - from_secret: docker_username -- depends_on: - - clone - - manifest-promtail - image: plugins/manifest:1.4.0 - name: manifest-loki - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest.tmpl - target: loki - username: - from_secret: docker_username -- depends_on: - - clone - - manifest-loki - image: plugins/manifest:1.4.0 - name: manifest-loki-canary - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest.tmpl - target: loki-canary - username: - from_secret: docker_username -- depends_on: - - clone - - manifest-loki-canary - image: plugins/manifest:1.4.0 - name: manifest-loki-canary-boringcrypto - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest.tmpl - target: loki-canary-boringcrypto - username: - from_secret: docker_username -- depends_on: - - clone - - manifest-loki-canary-boringcrypto - image: plugins/manifest:1.4.0 - name: manifest-fluent-bit-plugin-loki - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest.tmpl - target: fluent-bit-plugin-loki - username: - from_secret: docker_username -trigger: - event: - - push - - tag - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -depends_on: -- lokioperator-amd64 -- lokioperator-arm64 -- lokioperator-arm -kind: pipeline -name: manifest-operator -steps: -- depends_on: - - clone - image: plugins/manifest:1.4.0 - name: manifest-loki-operator - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest-operator.tmpl - target: loki-operator - username: - from_secret: docker_username -trigger: - event: - - push - - tag - ref: - - refs/heads/main - - refs/tags/operator/v* ---- -depends_on: -- manifest -image_pull_secrets: -- dockerconfigjson -kind: pipeline -name: deploy -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag) > .tag - - export RELEASE_TAG=$(cat .tag) - - export RELEASE_NAME=$([[ $RELEASE_TAG =~ $RELEASE_TAG_REGEXP ]] && echo $RELEASE_TAG - | grep -oE $MAJOR_MINOR_VERSION_REGEXP | sed "s/\./-/g" | sed "s/$/-x/" || echo - "next") - - echo $RELEASE_NAME - - echo $PLUGIN_CONFIG_TEMPLATE > updater-config.json - - sed -i "s/\"{{release}}\"/\"$RELEASE_NAME\"/g" updater-config.json - - sed -i "s/{{version}}/$RELEASE_TAG/g" updater-config.json - depends_on: - - clone - environment: - MAJOR_MINOR_VERSION_REGEXP: ([0-9]+\.[0-9]+) - RELEASE_TAG_REGEXP: ^([0-9]+\.[0-9]+\.[0-9]+)$ - image: alpine - name: prepare-updater-config - settings: - config_template: - from_secret: updater_config_template -- depends_on: - - prepare-updater-config - image: us.gcr.io/kubernetes-dev/drone/plugins/updater@sha256:cbcb09c74f96a34c528f52bf9b4815a036b11fed65f685be216e0c8b8e84285b - name: trigger - settings: - config_file: updater-config.json - github_token: - from_secret: github_token -trigger: - event: - - push - - tag - ref: - - refs/heads/main - - refs/tags/v* ---- -depends_on: -- manifest -image_pull_secrets: -- dockerconfigjson -kind: pipeline -name: update-loki-helm-chart-on-loki-release -steps: -- commands: - - apk add --no-cache bash git - - git fetch --tags - - latest_version=$(git tag -l 'v[0-9]*.[0-9]*.[0-9]*' | sort -V | tail -n 1 | sed - 's/v//g') - - RELEASE_TAG=$(./tools/image-tag) - - if [ "$RELEASE_TAG" != "$latest_version" ]; then echo "Current version $RELEASE_TAG - is not the latest version of Loki. The latest version is $latest_version" && exit - 78; fi - image: alpine - name: check-version-is-latest - when: - event: - - tag -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - RELEASE_TAG=$(./tools/image-tag) - - echo $PLUGIN_CONFIG_TEMPLATE > updater-config.json - - sed -i -E "s/\{\{release\}\}/$RELEASE_TAG/g" updater-config.json - depends_on: - - check-version-is-latest - image: alpine - name: prepare-helm-chart-update-config - settings: - config_template: - from_secret: helm-chart-update-config-template -- depends_on: - - prepare-helm-chart-update-config - image: us.gcr.io/kubernetes-dev/drone/plugins/updater@sha256:cbcb09c74f96a34c528f52bf9b4815a036b11fed65f685be216e0c8b8e84285b - name: trigger-helm-chart-update - settings: - config_file: updater-config.json - github_token: - from_secret: github_token -trigger: - ref: - - refs/tags/v* ---- -kind: pipeline -name: logql-analyzer -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-logql-analyzer-image - settings: - dockerfile: cmd/logql-analyzer/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/logql-analyzer - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: docker-driver -steps: -- commands: - - git fetch origin --tags - - make docker-driver-push - depends_on: - - clone - environment: - DOCKER_PASSWORD: - from_secret: docker_password - DOCKER_USERNAME: - from_secret: docker_username - image: grafana/loki-build-image:0.33.6 - name: build and push - privileged: true - volumes: - - name: docker - path: /var/run/docker.sock -trigger: - event: - - push - - tag - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -volumes: -- host: - path: /var/run/docker.sock - name: docker ---- -kind: pipeline -name: lambda-promtail-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - image: alpine - name: image-tag -- commands: - - if [ "$${#TEST_SECRET}" -eq 0 ]; then - - ' echo "Missing a secret to run this pipeline. This branch needs to be re-pushed - as a branch in main grafana/loki repository in order to run." && exit 78' - - fi - environment: - TEST_SECRET: - from_secret: ecr_key - image: alpine - name: skip pipeline if missing secret -- depends_on: - - image-tag - image: cstyan/ecr - name: publish-lambda-promtail-image - privileged: true - settings: - access_key: - from_secret: ecr_key - dockerfile: tools/lambda-promtail/Dockerfile - dry_run: false - region: us-east-1 - registry: public.ecr.aws/grafana - repo: public.ecr.aws/grafana/lambda-promtail - secret_key: - from_secret: ecr_secret_key - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: lambda-promtail-arm64 -platform: - arch: arm64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm64 > .tags - image: alpine - name: image-tag -- commands: - - if [ "$${#TEST_SECRET}" -eq 0 ]; then - - ' echo "Missing a secret to run this pipeline. This branch needs to be re-pushed - as a branch in main grafana/loki repository in order to run." && exit 78' - - fi - environment: - TEST_SECRET: - from_secret: ecr_key - image: alpine - name: skip pipeline if missing secret -- depends_on: - - image-tag - image: cstyan/ecr - name: publish-lambda-promtail-image - privileged: true - settings: - access_key: - from_secret: ecr_key - dockerfile: tools/lambda-promtail/Dockerfile - dry_run: false - region: us-east-1 - registry: public.ecr.aws/grafana - repo: public.ecr.aws/grafana/lambda-promtail - secret_key: - from_secret: ecr_secret_key - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -depends_on: -- lambda-promtail-amd64 -- lambda-promtail-arm64 -kind: pipeline -name: manifest-ecr -steps: -- commands: - - apk add --no-cache aws-cli - - docker login --username AWS --password $(aws ecr-public get-login-password --region - us-east-1) public.ecr.aws - depends_on: - - clone - environment: - AWS_ACCESS_KEY_ID: - from_secret: ecr_key - AWS_SECRET_ACCESS_KEY: - from_secret: ecr_secret_key - image: docker:dind - name: ecr-login - volumes: - - name: dockerconf - path: /root/.docker -- depends_on: - - clone - - ecr-login - image: plugins/manifest:1.4.0 - name: manifest-lambda-promtail - settings: - ignore_missing: true - spec: .drone/docker-manifest-ecr.tmpl - target: lambda-promtail - volumes: - - name: dockerconf - path: /.docker -trigger: - event: - - push - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -volumes: -- name: dockerconf - temp: {} ---- -get: - name: pat - path: infra/data/ci/github/grafanabot -kind: secret -name: github_token ---- -get: - name: .dockerconfigjson - path: secret/data/common/gcr -kind: secret -name: dockerconfigjson ---- -get: - name: username - path: infra/data/ci/docker_hub -kind: secret -name: docker_username ---- -get: - name: password - path: infra/data/ci/docker_hub -kind: secret -name: docker_password ---- -get: - name: access_key_id - path: infra/data/ci/loki/aws-credentials -kind: secret -name: ecr_key ---- -get: - name: secret_access_key - path: infra/data/ci/loki/aws-credentials -kind: secret -name: ecr_secret_key ---- -get: - name: updater-config-template.json - path: secret/data/common/loki_ci_autodeploy -kind: secret -name: updater_config_template ---- -get: - name: on-loki-release-config.json - path: secret/data/common/loki-helm-chart-auto-update -kind: secret -name: helm-chart-update-config-template ---- -get: - name: passphrase - path: infra/data/ci/packages-publish/gpg -kind: secret -name: gpg_passphrase ---- -get: - name: private-key - path: infra/data/ci/packages-publish/gpg -kind: secret -name: gpg_private_key ---- -kind: signature -hmac: 3b3b039769ab8c44318749efec569ffe50c4cfb173f577422ec9d514054f0a9e - -... diff --git a/.github/jsonnetfile.json b/.github/jsonnetfile.json index 1038aebdcd66c..130eaeb4e984e 100644 --- a/.github/jsonnetfile.json +++ b/.github/jsonnetfile.json @@ -8,7 +8,7 @@ "subdir": "workflows" } }, - "version": "87cb5090c36b5332e7f21b5c59e136962d5f4f56" + "version": "98ce96e408db867d64fb95b59a99c24440ddf441" } ], "legacyImports": true diff --git a/.github/jsonnetfile.lock.json b/.github/jsonnetfile.lock.json index 9eef1872519bf..172082408f8c7 100644 --- a/.github/jsonnetfile.lock.json +++ b/.github/jsonnetfile.lock.json @@ -8,8 +8,8 @@ "subdir": "workflows" } }, - "version": "87cb5090c36b5332e7f21b5c59e136962d5f4f56", - "sum": "kVlVZPpPz8d/D6UGK9Hto+NeGy7z8NvGygcB1QboxWw=" + "version": "98ce96e408db867d64fb95b59a99c24440ddf441", + "sum": "pqEiutdl50ghtCY0wReq+Xa3AymHEyMa1OJQvRQXINI=" } ], "legacyImports": false diff --git a/.github/release-workflows.jsonnet b/.github/release-workflows.jsonnet index 72fdbfd5dc17e..9cf613dfc042d 100644 --- a/.github/release-workflows.jsonnet +++ b/.github/release-workflows.jsonnet @@ -17,11 +17,23 @@ local imageJobs = { querytee: build.image('loki-query-tee', 'cmd/querytee', platform=['linux/amd64']), }; +local weeklyImageJobs = { + loki: build.weeklyImage('loki', 'cmd/loki'), + fluentd: build.weeklyImage('fluent-plugin-loki', 'clients/cmd/fluentd', platform=['linux/amd64']), + 'fluent-bit': build.weeklyImage('fluent-bit-plugin-loki', 'clients/cmd/fluent-bit', platform=['linux/amd64']), + logstash: build.weeklyImage('logstash-output-loki', 'clients/cmd/logstash', platform=['linux/amd64']), + logcli: build.weeklyImage('logcli', 'cmd/logcli'), + 'loki-canary': build.weeklyImage('loki-canary', 'cmd/loki-canary'), + 'loki-canary-boringcrypto': build.weeklyImage('loki-canary-boringcrypto', 'cmd/loki-canary-boringcrypto'), + promtail: build.weeklyImage('promtail', 'clients/cmd/promtail'), + querytee: build.weeklyImage('loki-query-tee', 'cmd/querytee', platform=['linux/amd64']), +}; + local buildImageVersion = std.extVar('BUILD_IMAGE_VERSION'); local buildImage = 'grafana/loki-build-image:%s' % buildImageVersion; local golangCiLintVersion = 'v1.55.1'; -local imageBuildTimeoutMin = 40; +local imageBuildTimeoutMin = 60; local imagePrefix = 'grafana'; { @@ -94,4 +106,42 @@ local imagePrefix = 'grafana'; }, }, }), + 'images.yml': std.manifestYamlDoc({ + name: 'publish images', + on: { + push: { + branches: [ + 'k[0-9]+*', // This is a weird glob pattern, not a regexp, do not use ".*", see https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#filter-pattern-cheat-sheet + 'main', + ], + }, + }, + permissions: { + 'id-token': 'write', + contents: 'write', + 'pull-requests': 'write', + }, + jobs: { + check: { + uses: checkTemplate, + with: { + build_image: buildImage, + golang_ci_lint_version: golangCiLintVersion, + release_lib_ref: releaseLibRef, + skip_validation: false, + use_github_app_token: true, + }, + }, + } + std.mapWithKey(function(name, job) + job + + lokiRelease.job.withNeeds(['check']) + + { + env: { + BUILD_TIMEOUT: imageBuildTimeoutMin, + RELEASE_REPO: 'grafana/loki', + RELEASE_LIB_REF: releaseLibRef, + IMAGE_PREFIX: imagePrefix, + }, + }, weeklyImageJobs), + }), } diff --git a/.github/vendor/github.com/grafana/loki-release/workflows/build.libsonnet b/.github/vendor/github.com/grafana/loki-release/workflows/build.libsonnet index bd9d2e2e9b119..7343c7d72963d 100644 --- a/.github/vendor/github.com/grafana/loki-release/workflows/build.libsonnet +++ b/.github/vendor/github.com/grafana/loki-release/workflows/build.libsonnet @@ -42,7 +42,7 @@ local releaseLibStep = common.releaseLibStep; echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT |||), - step.new('Build and export', 'docker/build-push-action@v5') + step.new('Build and export', 'docker/build-push-action@v6') + step.withTimeoutMinutes('${{ fromJSON(env.BUILD_TIMEOUT) }}') + step.withIf('${{ fromJSON(needs.version.outputs.pr_created) }}') + step.withEnv({ @@ -93,7 +93,7 @@ local releaseLibStep = common.releaseLibStep; echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT |||), - step.new('Build and push', 'docker/build-push-action@v5') + step.new('Build and push', 'docker/build-push-action@v6') + step.withTimeoutMinutes('${{ fromJSON(env.BUILD_TIMEOUT) }}') + step.with({ context: context, diff --git a/.github/vendor/github.com/grafana/loki-release/workflows/validate.libsonnet b/.github/vendor/github.com/grafana/loki-release/workflows/validate.libsonnet index 40bf097049e83..44f4984e4b785 100644 --- a/.github/vendor/github.com/grafana/loki-release/workflows/validate.libsonnet +++ b/.github/vendor/github.com/grafana/loki-release/workflows/validate.libsonnet @@ -115,7 +115,6 @@ local validationJob = _validationJob(false); validationMakeStep('validate dev cluster config', 'validate-dev-cluster-config'), validationMakeStep('check example config docs', 'check-example-config-doc'), validationMakeStep('check helm reference doc', 'documentation-helm-reference-check'), - validationMakeStep('check drone drift', 'check-drone-drift'), ]) + { steps+: [ step.new('build docs website') diff --git a/.github/workflows/images.yml b/.github/workflows/images.yml new file mode 100644 index 0000000000000..f6d8ca6e08aab --- /dev/null +++ b/.github/workflows/images.yml @@ -0,0 +1,433 @@ +"jobs": + "check": + "uses": "grafana/loki-release/.github/workflows/check.yml@main" + "with": + "build_image": "grafana/loki-build-image:0.33.6" + "golang_ci_lint_version": "v1.55.1" + "release_lib_ref": "main" + "skip_validation": false + "use_github_app_token": true + "fluent-bit": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/clients/cmd/fluent-bit/Dockerfile" + "platforms": "linux/amd64" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/fluent-bit-plugin-loki:${{ steps.weekly-version.outputs.version }}" + "fluentd": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/clients/cmd/fluentd/Dockerfile" + "platforms": "linux/amd64" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/fluent-plugin-loki:${{ steps.weekly-version.outputs.version }}" + "logcli": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/cmd/logcli/Dockerfile" + "platforms": "linux/amd64,linux/arm64,linux/arm" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/logcli:${{ steps.weekly-version.outputs.version }}" + "logstash": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/clients/cmd/logstash/Dockerfile" + "platforms": "linux/amd64" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/logstash-output-loki:${{ steps.weekly-version.outputs.version }}" + "loki": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/cmd/loki/Dockerfile" + "platforms": "linux/amd64,linux/arm64,linux/arm" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/loki:${{ steps.weekly-version.outputs.version }}" + "loki-canary": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/cmd/loki-canary/Dockerfile" + "platforms": "linux/amd64,linux/arm64,linux/arm" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/loki-canary:${{ steps.weekly-version.outputs.version }}" + "loki-canary-boringcrypto": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/cmd/loki-canary-boringcrypto/Dockerfile" + "platforms": "linux/amd64,linux/arm64,linux/arm" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/loki-canary-boringcrypto:${{ steps.weekly-version.outputs.version }}" + "promtail": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/clients/cmd/promtail/Dockerfile" + "platforms": "linux/amd64,linux/arm64,linux/arm" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/promtail:${{ steps.weekly-version.outputs.version }}" + "querytee": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/cmd/querytee/Dockerfile" + "platforms": "linux/amd64" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/loki-query-tee:${{ steps.weekly-version.outputs.version }}" +"name": "publish images" +"on": + "push": + "branches": + - "k[0-9]+*" + - "main" +"permissions": + "contents": "write" + "id-token": "write" + "pull-requests": "write" \ No newline at end of file diff --git a/.github/workflows/minor-release-pr.yml b/.github/workflows/minor-release-pr.yml index 74c00fdf23c44..765744e35eb52 100644 --- a/.github/workflows/minor-release-pr.yml +++ b/.github/workflows/minor-release-pr.yml @@ -2,7 +2,7 @@ concurrency: group: "create-release-pr-${{ github.sha }}" env: BUILD_ARTIFACTS_BUCKET: "loki-build-artifacts" - BUILD_TIMEOUT: 40 + BUILD_TIMEOUT: 60 CHANGELOG_PATH: "CHANGELOG.md" DOCKER_USERNAME: "grafana" DRY_RUN: false @@ -198,7 +198,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -260,7 +260,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -322,7 +322,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -386,7 +386,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -448,7 +448,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -512,7 +512,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -576,7 +576,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -640,7 +640,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -704,7 +704,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" diff --git a/.github/workflows/patch-release-pr.yml b/.github/workflows/patch-release-pr.yml index 0a62fa1c9228e..96f6d86e27750 100644 --- a/.github/workflows/patch-release-pr.yml +++ b/.github/workflows/patch-release-pr.yml @@ -2,7 +2,7 @@ concurrency: group: "create-release-pr-${{ github.sha }}" env: BUILD_ARTIFACTS_BUCKET: "loki-build-artifacts" - BUILD_TIMEOUT: 40 + BUILD_TIMEOUT: 60 CHANGELOG_PATH: "CHANGELOG.md" DOCKER_USERNAME: "grafana" DRY_RUN: false @@ -198,7 +198,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -260,7 +260,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -322,7 +322,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -386,7 +386,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -448,7 +448,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -512,7 +512,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -576,7 +576,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -640,7 +640,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -704,7 +704,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" diff --git a/Makefile b/Makefile index bacc4bb9a429a..476b08b29c1f8 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ help: .PHONY: fluentd-image, fluentd-push, fluentd-test .PHONY: push-images push-latest save-images load-images promtail-image loki-image build-image build-image-push .PHONY: bigtable-backup, push-bigtable-backup -.PHONY: benchmark-store, drone, check-drone-drift, check-mod +.PHONY: benchmark-store, check-mod .PHONY: migrate migrate-image lint-markdown ragel .PHONY: doc check-doc .PHONY: validate-example-configs generate-example-config-doc check-example-config-doc @@ -36,7 +36,7 @@ DOCKER_IMAGE_DIRS := $(patsubst %/Dockerfile,%,$(DOCKERFILES)) # or you can override this with an environment variable BUILD_IN_CONTAINER ?= true -# ensure you run `make drone` and `make release-workflows` after changing this +# ensure you run `make release-workflows` after changing this BUILD_IMAGE_VERSION ?= 0.33.6 GO_VERSION := 1.22.6 @@ -699,27 +699,6 @@ benchmark-store: go run ./pkg/storage/hack/main.go $(GOTEST) ./pkg/storage/ -bench=. -benchmem -memprofile memprofile.out -cpuprofile cpuprofile.out -trace trace.out -# regenerate drone yaml -drone: -ifeq ($(BUILD_IN_CONTAINER),true) - @mkdir -p $(shell pwd)/.pkg - @mkdir -p $(shell pwd)/.cache - $(SUDO) docker run $(RM) $(TTY) -i \ - -e DRONE_SERVER -e DRONE_TOKEN \ - -v $(shell pwd)/.cache:/go/cache$(MOUNT_FLAGS) \ - -v $(shell pwd)/.pkg:/go/pkg$(MOUNT_FLAGS) \ - -v $(shell pwd):/src/loki$(MOUNT_FLAGS) \ - $(IMAGE_PREFIX)/loki-build-image:$(BUILD_IMAGE_VERSION) $@; -else - drone jsonnet --stream --format -V __build-image-version=$(BUILD_IMAGE_VERSION) --source .drone/drone.jsonnet --target .drone/drone.yml - drone lint .drone/drone.yml --trusted - drone sign --save grafana/loki .drone/drone.yml || echo "You must set DRONE_SERVER and DRONE_TOKEN. These values can be found on your [drone account](http://drone.grafana.net/account) page." -endif - -check-drone-drift: - ./tools/check-drone-drift.sh $(BUILD_IMAGE_VERSION) - - # support go modules check-mod: ifeq ($(BUILD_IN_CONTAINER),true) From 6143a548f129131a82233da9a84bbfdf1157fcb9 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 26 Sep 2024 08:35:36 +0200 Subject: [PATCH 04/20] chore: Disable bloom filtering for bounded sharding strategy (#14272) This commit temporarily disables bloom filtering chunk refs in the `GetShards()` call on the index gateway when using the `bounded` sharding strategy. This will result in chunks only being filtered once by the bloom gateway, reducing the amount of checks that need to be done on the bloom filters for a single request.Signed-off-by: Christian Haudum --- pkg/indexgateway/gateway.go | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/pkg/indexgateway/gateway.go b/pkg/indexgateway/gateway.go index 745b114c08ac0..92d476d496673 100644 --- a/pkg/indexgateway/gateway.go +++ b/pkg/indexgateway/gateway.go @@ -413,7 +413,7 @@ func (g *Gateway) GetShards(request *logproto.ShardsRequest, server logproto.Ind return g.boundedShards(ctx, request, server, instanceID, p, forSeries) } -// boundedShards handles bounded shard requests, optionally using blooms and/or returning precomputed chunks. +// boundedShards handles bounded shard requests, optionally returning precomputed chunks. func (g *Gateway) boundedShards( ctx context.Context, req *logproto.ShardsRequest, @@ -466,18 +466,20 @@ func (g *Gateway) boundedShards( // 2) filter via blooms if enabled filters := v1.ExtractTestableLabelMatchers(p.Plan().AST) - if g.bloomQuerier != nil && len(filters) > 0 { - xs, err := g.bloomQuerier.FilterChunkRefs(ctx, instanceID, req.From, req.Through, refs, p.Plan()) - if err != nil { - level.Error(logger).Log("msg", "failed to filter chunk refs", "err", err) - } else { - filtered = xs - } - sp.LogKV( - "stage", "queried bloom gateway", - "err", err, - ) - } + // NOTE(chaudum): Temporarily disable bloom filtering of chunk refs, + // as this doubles the load on bloom gateways. + // if g.bloomQuerier != nil && len(filters) > 0 { + // xs, err := g.bloomQuerier.FilterChunkRefs(ctx, instanceID, req.From, req.Through, refs, p.Plan()) + // if err != nil { + // level.Error(logger).Log("msg", "failed to filter chunk refs", "err", err) + // } else { + // filtered = xs + // } + // sp.LogKV( + // "stage", "queried bloom gateway", + // "err", err, + // ) + // } g.metrics.preFilterChunks.WithLabelValues(routeShards).Observe(float64(ct)) g.metrics.postFilterChunks.WithLabelValues(routeShards).Observe(float64(len(filtered))) From fac3177814b8d2914eb3af618d571104eba18934 Mon Sep 17 00:00:00 2001 From: Cyril Tovena Date: Thu, 26 Sep 2024 09:51:59 +0200 Subject: [PATCH 05/20] feat(jsonnet): Allow to name prefix zoned ingesters (#14260) --- production/ksonnet/loki/multi-zone.libsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/production/ksonnet/loki/multi-zone.libsonnet b/production/ksonnet/loki/multi-zone.libsonnet index 606f70099d0f8..a3d48f21a96d8 100644 --- a/production/ksonnet/loki/multi-zone.libsonnet +++ b/production/ksonnet/loki/multi-zone.libsonnet @@ -100,8 +100,8 @@ local rolloutOperator = import 'rollout-operator.libsonnet'; }, )), - newIngesterZoneStatefulSet(zone, container):: - local name = '%(prefix)s-%(zone)s' % { prefix: $._config.multi_zone_ingester_name_prefix, zone: zone }; + newIngesterZoneStatefulSet(zone, container, name_prefix=''):: + local name = '%(prefix)s-%(zone)s' % { prefix: if name_prefix == '' then $._config.multi_zone_ingester_name_prefix else name_prefix, zone: zone }; self.newIngesterStatefulSet(name, container, with_anti_affinity=false) + statefulSet.mixin.metadata.withLabels({ 'rollout-group': 'ingester' }) + From f80d68a1edbd85a605be882eb0104b169343cf00 Mon Sep 17 00:00:00 2001 From: Cyril Tovena Date: Thu, 26 Sep 2024 10:14:32 +0200 Subject: [PATCH 06/20] feat(distributors): Use a pool of worker to push to ingesters. (#14245) --- docs/sources/shared/configuration.md | 4 ++ pkg/distributor/distributor.go | 70 +++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 12 deletions(-) diff --git a/docs/sources/shared/configuration.md b/docs/sources/shared/configuration.md index 08acf80bfcc22..15426e54d088f 100644 --- a/docs/sources/shared/configuration.md +++ b/docs/sources/shared/configuration.md @@ -2248,6 +2248,10 @@ ring: # CLI flag: -distributor.ring.instance-interface-names [instance_interface_names: | default = []] +# Number of workers to push batches to ingesters. +# CLI flag: -distributor.push-worker-count +[push_worker_count: | default = 256] + rate_store: # The max number of concurrent requests to make to ingester stream apis # CLI flag: -distributor.rate-store.max-request-parallelism diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 3ad586f3e596f..476bad507ea0b 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -10,6 +10,7 @@ import ( "sort" "strconv" "strings" + "sync" "time" "unicode" "unsafe" @@ -79,6 +80,7 @@ var allowedLabelsForLevel = map[string]struct{}{ type Config struct { // Distributors ring DistributorRing RingConfig `yaml:"ring,omitempty"` + PushWorkerCount int `yaml:"push_worker_count"` // For testing. factory ring_client.PoolFactory `yaml:"-"` @@ -102,7 +104,7 @@ func (cfg *Config) RegisterFlags(fs *flag.FlagSet) { cfg.DistributorRing.RegisterFlags(fs) cfg.RateStore.RegisterFlagsWithPrefix("distributor.rate-store", fs) cfg.WriteFailuresLogging.RegisterFlagsWithPrefix("distributor.write-failures-logging", fs) - + fs.IntVar(&cfg.PushWorkerCount, "distributor.push-worker-count", 256, "Number of workers to push batches to ingesters.") fs.BoolVar(&cfg.KafkaEnabled, "distributor.kafka-writes-enabled", false, "Enable writes to Kafka during Push requests.") fs.BoolVar(&cfg.IngesterEnabled, "distributor.ingester-writes-enabled", true, "Enable writes to Ingesters during Push requests. Defaults to true.") } @@ -166,7 +168,9 @@ type Distributor struct { replicationFactor prometheus.Gauge streamShardCount prometheus.Counter - usageTracker push.UsageTracker + usageTracker push.UsageTracker + ingesterTasks chan pushIngesterTask + ingesterTaskWg sync.WaitGroup // kafka kafkaWriter KafkaProducer @@ -253,6 +257,7 @@ func New( rateLimitStrat: rateLimitStrat, tee: tee, usageTracker: usageTracker, + ingesterTasks: make(chan pushIngesterTask), ingesterAppends: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ Namespace: constants.Loki, Name: "distributor_ingester_appends_total", @@ -354,6 +359,15 @@ func (d *Distributor) starting(ctx context.Context) error { } func (d *Distributor) running(ctx context.Context) error { + ctx, cancel := context.WithCancel(ctx) + defer func() { + cancel() + d.ingesterTaskWg.Wait() + }() + d.ingesterTaskWg.Add(d.cfg.PushWorkerCount) + for i := 0; i < d.cfg.PushWorkerCount; i++ { + go d.pushIngesterWorker(ctx) + } select { case <-ctx.Done(): return nil @@ -630,15 +644,26 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log } for ingester, streams := range streamsByIngester { - go func(ingester ring.InstanceDesc, samples []*streamTracker) { + func(ingester ring.InstanceDesc, samples []*streamTracker) { // Use a background context to make sure all ingesters get samples even if we return early localCtx, cancel := context.WithTimeout(context.Background(), d.clientCfg.RemoteTimeout) - defer cancel() localCtx = user.InjectOrgID(localCtx, tenantID) if sp := opentracing.SpanFromContext(ctx); sp != nil { localCtx = opentracing.ContextWithSpan(localCtx, sp) } - d.sendStreams(localCtx, ingester, samples, &tracker) + select { + case <-ctx.Done(): + cancel() + return + case d.ingesterTasks <- pushIngesterTask{ + ingester: ingester, + streamTracker: samples, + pushTracker: &tracker, + ctx: localCtx, + cancel: cancel, + }: + return + } }(ingesterDescs[ingester], streams) } } @@ -830,9 +855,30 @@ func (d *Distributor) truncateLines(vContext validationContext, stream *logproto validation.MutatedBytes.WithLabelValues(validation.LineTooLong, vContext.userID).Add(float64(truncatedBytes)) } +type pushIngesterTask struct { + streamTracker []*streamTracker + pushTracker *pushTracker + ingester ring.InstanceDesc + ctx context.Context + cancel context.CancelFunc +} + +func (d *Distributor) pushIngesterWorker(ctx context.Context) { + defer d.ingesterTaskWg.Done() + for { + select { + case <-ctx.Done(): + return + case task := <-d.ingesterTasks: + d.sendStreams(task) + } + } +} + // TODO taken from Cortex, see if we can refactor out an usable interface. -func (d *Distributor) sendStreams(ctx context.Context, ingester ring.InstanceDesc, streamTrackers []*streamTracker, pushTracker *pushTracker) { - err := d.sendStreamsErr(ctx, ingester, streamTrackers) +func (d *Distributor) sendStreams(task pushIngesterTask) { + defer task.cancel() + err := d.sendStreamsErr(task.ctx, task.ingester, task.streamTracker) // If we succeed, decrement each stream's pending count by one. // If we reach the required number of successful puts on this stream, then @@ -843,17 +889,17 @@ func (d *Distributor) sendStreams(ctx context.Context, ingester ring.InstanceDes // // The use of atomic increments here guarantees only a single sendStreams // goroutine will write to either channel. - for i := range streamTrackers { + for i := range task.streamTracker { if err != nil { - if streamTrackers[i].failed.Inc() <= int32(streamTrackers[i].maxFailures) { + if task.streamTracker[i].failed.Inc() <= int32(task.streamTracker[i].maxFailures) { continue } - pushTracker.doneWithResult(err) + task.pushTracker.doneWithResult(err) } else { - if streamTrackers[i].succeeded.Inc() != int32(streamTrackers[i].minSuccess) { + if task.streamTracker[i].succeeded.Inc() != int32(task.streamTracker[i].minSuccess) { continue } - pushTracker.doneWithResult(nil) + task.pushTracker.doneWithResult(nil) } } } From c8993168c57c405740c17b67336b6f0e30bcbf3c Mon Sep 17 00:00:00 2001 From: Tony Abboud Date: Thu, 26 Sep 2024 07:39:05 -0400 Subject: [PATCH 07/20] chore: Allow setting annotations for memberlist and query-scheduler-discovery services (#14211) **What this PR does / why we need it**: Both the memberlist and query-scheduler-discovery services do not provide a way to add annotations. --- docs/sources/setup/install/helm/reference.md | 9 +++++++++ production/helm/loki/CHANGELOG.md | 3 +++ production/helm/loki/Chart.yaml | 2 +- production/helm/loki/README.md | 2 +- .../templates/backend/query-scheduler-discovery.yaml | 7 +++++++ production/helm/loki/templates/service-memberlist.yaml | 7 +++++++ production/helm/loki/values.yaml | 1 + 7 files changed, 29 insertions(+), 2 deletions(-) diff --git a/docs/sources/setup/install/helm/reference.md b/docs/sources/setup/install/helm/reference.md index 21c1b00ac58a0..9add6158f3a6a 100644 --- a/docs/sources/setup/install/helm/reference.md +++ b/docs/sources/setup/install/helm/reference.md @@ -6485,6 +6485,15 @@ true "type": "RollingUpdate" } + + + + memberlist.service.annotations + object + +
+{}
+
diff --git a/production/helm/loki/CHANGELOG.md b/production/helm/loki/CHANGELOG.md index b39023e85effc..519755dbec944 100644 --- a/production/helm/loki/CHANGELOG.md +++ b/production/helm/loki/CHANGELOG.md @@ -13,6 +13,9 @@ Entries should include a reference to the pull request that introduced the chang [//]: # ( : do not remove this line. This locator is used by the CI pipeline to automatically create a changelog entry for each new Loki release. Add other chart versions and respective changelog entries bellow this line.) +## 6.15.0 + +- [ENHANCEMENT] Allow setting annotations for memberlist and query-scheduler-discovery services ## 6.14.1 - [BUGFIX] Fixed Memcached persistence options. diff --git a/production/helm/loki/Chart.yaml b/production/helm/loki/Chart.yaml index 24e94eb8bc508..56e48a535e070 100644 --- a/production/helm/loki/Chart.yaml +++ b/production/helm/loki/Chart.yaml @@ -3,7 +3,7 @@ name: loki description: Helm chart for Grafana Loki and Grafana Enterprise Logs supporting both simple, scalable and distributed modes. type: application appVersion: 3.1.1 -version: 6.14.1 +version: 6.15.0 home: https://grafana.github.io/helm-charts sources: - https://github.com/grafana/loki diff --git a/production/helm/loki/README.md b/production/helm/loki/README.md index dea0ec488b023..92008e768d6bc 100644 --- a/production/helm/loki/README.md +++ b/production/helm/loki/README.md @@ -1,6 +1,6 @@ # loki -![Version: 6.14.1](https://img.shields.io/badge/Version-6.14.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 3.1.1](https://img.shields.io/badge/AppVersion-3.1.1-informational?style=flat-square) +![Version: 6.15.0](https://img.shields.io/badge/Version-6.15.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 3.1.1](https://img.shields.io/badge/AppVersion-3.1.1-informational?style=flat-square) Helm chart for Grafana Loki and Grafana Enterprise Logs supporting both simple, scalable and distributed modes. diff --git a/production/helm/loki/templates/backend/query-scheduler-discovery.yaml b/production/helm/loki/templates/backend/query-scheduler-discovery.yaml index 14bca1fa041fc..4c357e53a431f 100644 --- a/production/helm/loki/templates/backend/query-scheduler-discovery.yaml +++ b/production/helm/loki/templates/backend/query-scheduler-discovery.yaml @@ -9,6 +9,13 @@ metadata: labels: {{- include "loki.backendSelectorLabels" . | nindent 4 }} prometheus.io/service-monitor: "false" + annotations: + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.backend.service.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP clusterIP: None diff --git a/production/helm/loki/templates/service-memberlist.yaml b/production/helm/loki/templates/service-memberlist.yaml index cacb5b1e872bb..3d46f234d4eab 100644 --- a/production/helm/loki/templates/service-memberlist.yaml +++ b/production/helm/loki/templates/service-memberlist.yaml @@ -6,6 +6,13 @@ metadata: namespace: {{ $.Release.Namespace }} labels: {{- include "loki.labels" . | nindent 4 }} + annotations: + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.memberlist.service.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP clusterIP: None diff --git a/production/helm/loki/values.yaml b/production/helm/loki/values.yaml index fe7b9273ee80a..a3ee28a805338 100644 --- a/production/helm/loki/values.yaml +++ b/production/helm/loki/values.yaml @@ -794,6 +794,7 @@ networkPolicy: memberlist: service: publishNotReadyAddresses: false + annotations: {} ###################################################################################################################### # # adminAPI configuration, enterprise only. From c874d2cc4487c13eb281005d76b03e6c343e933b Mon Sep 17 00:00:00 2001 From: Paul Rogers <129207811+paul1r@users.noreply.github.com> Date: Thu, 26 Sep 2024 09:49:43 -0400 Subject: [PATCH 08/20] chore: Wrap error to surface more context (#14284) --- pkg/storage/chunk/client/util/reader.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/storage/chunk/client/util/reader.go b/pkg/storage/chunk/client/util/reader.go index 2459b1e9ea43a..88cd408f9e374 100644 --- a/pkg/storage/chunk/client/util/reader.go +++ b/pkg/storage/chunk/client/util/reader.go @@ -2,6 +2,7 @@ package util import ( "bytes" + "fmt" "io" ) @@ -11,7 +12,7 @@ func ReadSeeker(r io.Reader) (io.ReadSeeker, error) { } data, err := io.ReadAll(r) if err != nil { - return nil, err + return nil, fmt.Errorf("Error in ReadSeeker ReadAll(): %w", err) } return bytes.NewReader(data), nil } From beca6f33662e8a43ea59943a4327a1c328960058 Mon Sep 17 00:00:00 2001 From: Cyril Tovena Date: Thu, 26 Sep 2024 16:28:22 +0200 Subject: [PATCH 09/20] feat(kafka): enqueue commit offset only once per batch process (#14278) Co-authored-by: George Robinson --- pkg/ingester/kafka_consumer.go | 69 +++++++++++++++++++++++------ pkg/ingester/kafka_consumer_test.go | 2 + pkg/kafka/ingester/consumer_test.go | 11 +++-- pkg/kafka/partition/committer.go | 3 +- pkg/kafka/partition/reader.go | 1 - 5 files changed, 68 insertions(+), 18 deletions(-) diff --git a/pkg/ingester/kafka_consumer.go b/pkg/ingester/kafka_consumer.go index 52c5ba96a661e..c2fe90ee052f6 100644 --- a/pkg/ingester/kafka_consumer.go +++ b/pkg/ingester/kafka_consumer.go @@ -2,12 +2,14 @@ package ingester import ( "context" + "errors" math "math" "sync" "time" "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/grafana/dskit/backoff" "github.com/grafana/dskit/user" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -39,24 +41,26 @@ func newConsumerMetrics(reg prometheus.Registerer) *consumerMetrics { func NewKafkaConsumerFactory(pusher logproto.PusherServer, logger log.Logger, reg prometheus.Registerer) partition.ConsumerFactory { metrics := newConsumerMetrics(reg) - return func(_ partition.Committer) (partition.Consumer, error) { + return func(committer partition.Committer) (partition.Consumer, error) { decoder, err := kafka.NewDecoder() if err != nil { return nil, err } return &kafkaConsumer{ - pusher: pusher, - logger: logger, - decoder: decoder, - metrics: metrics, + pusher: pusher, + logger: logger, + decoder: decoder, + metrics: metrics, + committer: committer, }, nil } } type kafkaConsumer struct { - pusher logproto.PusherServer - logger log.Logger - decoder *kafka.Decoder + pusher logproto.PusherServer + logger log.Logger + decoder *kafka.Decoder + committer partition.Committer metrics *consumerMetrics } @@ -72,14 +76,14 @@ func (kc *kafkaConsumer) Start(ctx context.Context, recordsChan <-chan []partiti level.Info(kc.logger).Log("msg", "shutting down kafka consumer") return case records := <-recordsChan: - kc.consume(records) + kc.consume(ctx, records) } } }() return wg.Wait } -func (kc *kafkaConsumer) consume(records []partition.Record) { +func (kc *kafkaConsumer) consume(ctx context.Context, records []partition.Record) { if len(records) == 0 { return } @@ -101,13 +105,52 @@ func (kc *kafkaConsumer) consume(records []partition.Record) { level.Error(kc.logger).Log("msg", "failed to decode record", "error", err) continue } - ctx := user.InjectOrgID(record.Ctx, record.TenantID) - if _, err := kc.pusher.Push(ctx, &logproto.PushRequest{ + recordCtx := user.InjectOrgID(record.Ctx, record.TenantID) + req := &logproto.PushRequest{ Streams: []logproto.Stream{stream}, + } + if err := retryWithBackoff(ctx, func(attempts int) error { + if _, err := kc.pusher.Push(recordCtx, req); err != nil { + level.Warn(kc.logger).Log("msg", "failed to push records", "err", err, "offset", record.Offset, "attempts", attempts) + return err + } + return nil }); err != nil { - level.Error(kc.logger).Log("msg", "failed to push records", "error", err) + level.Error(kc.logger).Log("msg", "exhausted all retry attempts, failed to push records", "err", err, "offset", record.Offset) } + kc.committer.EnqueueOffset(record.Offset) } kc.metrics.consumeLatency.Observe(time.Since(consumeStart).Seconds()) kc.metrics.currentOffset.Set(float64(maxOffset)) } + +func canRetry(err error) bool { + return errors.Is(err, ErrReadOnly) +} + +func retryWithBackoff(ctx context.Context, fn func(attempts int) error) error { + err := fn(0) + if err == nil { + return nil + } + if !canRetry(err) { + return err + } + backoff := backoff.New(ctx, backoff.Config{ + MinBackoff: 100 * time.Millisecond, + MaxBackoff: 5 * time.Second, + MaxRetries: 0, // Retry infinitely + }) + backoff.Wait() + for backoff.Ongoing() { + err = fn(backoff.NumRetries()) + if err == nil { + return nil + } + if !canRetry(err) { + return err + } + backoff.Wait() + } + return backoff.Err() +} diff --git a/pkg/ingester/kafka_consumer_test.go b/pkg/ingester/kafka_consumer_test.go index 7a2ba5887d08e..f9ac98c513e56 100644 --- a/pkg/ingester/kafka_consumer_test.go +++ b/pkg/ingester/kafka_consumer_test.go @@ -74,6 +74,8 @@ func (f *fakePusher) Push(ctx context.Context, in *logproto.PushRequest) (*logpr type noopCommitter struct{} +func (nc *noopCommitter) EnqueueOffset(_ int64) {} + func (noopCommitter) Commit(_ context.Context, _ int64) error { return nil } func TestConsumer(t *testing.T) { diff --git a/pkg/kafka/ingester/consumer_test.go b/pkg/kafka/ingester/consumer_test.go index c6e14ddebeca8..a0baa92ba86a7 100644 --- a/pkg/kafka/ingester/consumer_test.go +++ b/pkg/kafka/ingester/consumer_test.go @@ -33,6 +33,11 @@ func (m *mockCommitter) Commit(_ context.Context, offset int64) error { return nil } +func (m *mockCommitter) EnqueueOffset(offset int64) { + // For testing purposes, we'll just set the committed offset directly + m.committed = offset +} + func TestConsumer_PeriodicFlush(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -46,7 +51,7 @@ func TestConsumer_PeriodicFlush(t *testing.T) { flushInterval := 100 * time.Millisecond maxFlushSize := int64(1000) - committer := &mockCommitter{} + committer := newMockCommitter() consumerFactory := NewConsumerFactory(metastore, storage, flushInterval, maxFlushSize, log.NewLogfmtLogger(os.Stdout), reg) consumer, err := consumerFactory(committer) require.NoError(t, err) @@ -99,7 +104,7 @@ func TestConsumer_ShutdownFlush(t *testing.T) { flushInterval := 1 * time.Hour maxFlushSize := int64(1000) - committer := &mockCommitter{} + committer := newMockCommitter() consumerFactory := NewConsumerFactory(metastore, storage, flushInterval, maxFlushSize, log.NewLogfmtLogger(os.Stdout), reg) consumer, err := consumerFactory(committer) require.NoError(t, err) @@ -153,7 +158,7 @@ func TestConsumer_MaxFlushSize(t *testing.T) { flushInterval := 1 * time.Hour maxFlushSize := int64(10) - committer := &mockCommitter{} + committer := newMockCommitter() consumerFactory := NewConsumerFactory(metastore, storage, flushInterval, maxFlushSize, log.NewLogfmtLogger(os.Stdout), reg) consumer, err := consumerFactory(committer) require.NoError(t, err) diff --git a/pkg/kafka/partition/committer.go b/pkg/kafka/partition/committer.go index c3a1f796e0e41..f9aeda3f0fc51 100644 --- a/pkg/kafka/partition/committer.go +++ b/pkg/kafka/partition/committer.go @@ -19,6 +19,7 @@ import ( // Committer defines an interface for committing offsets type Committer interface { Commit(ctx context.Context, offset int64) error + EnqueueOffset(offset int64) } // partitionCommitter is responsible for committing offsets for a specific Kafka partition @@ -113,7 +114,7 @@ func (r *partitionCommitter) autoCommitLoop(ctx context.Context) { } } -func (r *partitionCommitter) enqueueOffset(o int64) { +func (r *partitionCommitter) EnqueueOffset(o int64) { if r.kafkaCfg.ConsumerGroupOffsetCommitInterval > 0 { r.toCommit.Store(o) } diff --git a/pkg/kafka/partition/reader.go b/pkg/kafka/partition/reader.go index 9720e059ae566..9972d13307e8b 100644 --- a/pkg/kafka/partition/reader.go +++ b/pkg/kafka/partition/reader.go @@ -123,7 +123,6 @@ func (p *Reader) startFetchLoop(ctx context.Context) <-chan []Record { return default: records <- p.poll(ctx) - p.committer.enqueueOffset(p.lastProcessedOffset) } } }() From 923a3e41c8a6f96cf36d1e6276655ad14f5f1eea Mon Sep 17 00:00:00 2001 From: Trevor Whitney Date: Thu, 26 Sep 2024 08:46:20 -0600 Subject: [PATCH 10/20] ci: use GH app for helm workflows (#14269) Co-authored-by: Christian Haudum --- .github/workflows/helm-ci.yml | 10 +++++++++- .github/workflows/helm-release.yaml | 5 +++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/helm-ci.yml b/.github/workflows/helm-ci.yml index d58705d1496b7..cbd51c60b8fac 100644 --- a/.github/workflows/helm-ci.yml +++ b/.github/workflows/helm-ci.yml @@ -27,6 +27,14 @@ jobs: - name: Lint Yaml run: make helm-lint + - id: "get_github_app_token" + name: "Get Github app token" + uses: "actions/create-github-app-token@v1" + with: + app-id: "${{ secrets.APP_ID }}" + owner: "${{ github.repository_owner }}" + private-key: "${{ secrets.APP_PRIVATE_KEY }}" + - name: Lint Code Base uses: docker://github/super-linter:v3.12.0 env: @@ -37,7 +45,7 @@ jobs: VALIDATE_YAML: false VALIDATE_GO: false DEFAULT_BRANCH: main - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ steps.get_github_app_token.outputs.token }} call-test: name: Test Helm Chart runs-on: ubuntu-latest diff --git a/.github/workflows/helm-release.yaml b/.github/workflows/helm-release.yaml index b1d065aa9f694..d2f9fe97ba4f6 100644 --- a/.github/workflows/helm-release.yaml +++ b/.github/workflows/helm-release.yaml @@ -16,5 +16,6 @@ jobs: cr_configfile: production/helm/cr.yaml ct_configfile: production/helm/ct.yaml helm_tag_prefix: helm - secrets: - helm_repo_token: ${{ secrets.GH_BOT_ACCESS_TOKEN }} + secrets: + github_app_id: ${{ secrets.APP_ID }} + github_app_pem: ${{ secrets.APP_PRIVATE_KEY }} From 3c36ba949d65e803cc6702b8664f87aca07ed052 Mon Sep 17 00:00:00 2001 From: benclive Date: Thu, 26 Sep 2024 15:58:51 +0100 Subject: [PATCH 11/20] feat: Implement owned streams calculation using Partition Ring (#14282) --- pkg/ingester/checkpoint_test.go | 22 +-- pkg/ingester/flush_test.go | 2 +- pkg/ingester/ingester.go | 12 +- pkg/ingester/ingester_test.go | 26 ++-- pkg/ingester/instance.go | 23 +--- pkg/ingester/recalculate_owned_streams.go | 130 ++++++++++++++++-- .../recalculate_owned_streams_test.go | 81 ++++++++++- pkg/ingester/recovery_test.go | 4 +- pkg/loki/modules.go | 9 +- 9 files changed, 235 insertions(+), 74 deletions(-) diff --git a/pkg/ingester/checkpoint_test.go b/pkg/ingester/checkpoint_test.go index 88e770d0c2dac..1639125390a07 100644 --- a/pkg/ingester/checkpoint_test.go +++ b/pkg/ingester/checkpoint_test.go @@ -73,7 +73,7 @@ func TestIngesterWAL(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -116,7 +116,7 @@ func TestIngesterWAL(t *testing.T) { expectCheckpoint(t, walDir, false, time.Second) // restart the ingester - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -130,7 +130,7 @@ func TestIngesterWAL(t *testing.T) { require.Nil(t, services.StopAndAwaitTerminated(context.Background(), i)) // restart the ingester - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -155,7 +155,7 @@ func TestIngesterWALIgnoresStreamLimits(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -201,7 +201,7 @@ func TestIngesterWALIgnoresStreamLimits(t *testing.T) { require.NoError(t, err) // restart the ingester - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -260,7 +260,7 @@ func TestIngesterWALBackpressureSegments(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -281,7 +281,7 @@ func TestIngesterWALBackpressureSegments(t *testing.T) { expectCheckpoint(t, walDir, false, time.Second) // restart the ingester, ensuring we replayed from WAL. - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -304,7 +304,7 @@ func TestIngesterWALBackpressureCheckpoint(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -325,7 +325,7 @@ func TestIngesterWALBackpressureCheckpoint(t *testing.T) { require.Nil(t, services.StopAndAwaitTerminated(context.Background(), i)) // restart the ingester, ensuring we can replay from the checkpoint as well. - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -602,7 +602,7 @@ func TestIngesterWALReplaysUnorderedToOrdered(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -674,7 +674,7 @@ func TestIngesterWALReplaysUnorderedToOrdered(t *testing.T) { require.NoError(t, err) // restart the ingester - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) diff --git a/pkg/ingester/flush_test.go b/pkg/ingester/flush_test.go index 5ef40d9d17d09..f01fb02e8730b 100644 --- a/pkg/ingester/flush_test.go +++ b/pkg/ingester/flush_test.go @@ -393,7 +393,7 @@ func newTestStore(t require.TestingT, cfg Config, walOverride WAL) (*testStore, limits, err := validation.NewOverrides(defaultLimitsTestConfig(), nil) require.NoError(t, err) - ing, err := New(cfg, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokitlog.NewNopLogger(), nil, readRingMock) + ing, err := New(cfg, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokitlog.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) require.NoError(t, services.StartAndAwaitRunning(context.Background(), ing)) diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index 3a18000af271d..7776b9097f085 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -292,7 +292,7 @@ type Ingester struct { // recalculateOwnedStreams periodically checks the ring for changes and recalculates owned streams for each instance. readRing ring.ReadRing - recalculateOwnedStreams *recalculateOwnedStreams + recalculateOwnedStreams *recalculateOwnedStreamsSvc ingestPartitionID int32 partitionRingLifecycler *ring.PartitionInstanceLifecycler @@ -300,7 +300,7 @@ type Ingester struct { } // New makes a new Ingester. -func New(cfg Config, clientConfig client.Config, store Store, limits Limits, configs *runtime.TenantConfigs, registerer prometheus.Registerer, writeFailuresCfg writefailures.Cfg, metricsNamespace string, logger log.Logger, customStreamsTracker push.UsageTracker, readRing ring.ReadRing) (*Ingester, error) { +func New(cfg Config, clientConfig client.Config, store Store, limits Limits, configs *runtime.TenantConfigs, registerer prometheus.Registerer, writeFailuresCfg writefailures.Cfg, metricsNamespace string, logger log.Logger, customStreamsTracker push.UsageTracker, readRing ring.ReadRing, partitionRingWatcher *ring.PartitionRingWatcher) (*Ingester, error) { if cfg.ingesterClientFactory == nil { cfg.ingesterClientFactory = client.New } @@ -408,7 +408,13 @@ func New(cfg Config, clientConfig client.Config, store Store, limits Limits, con i.SetExtractorWrapper(i.cfg.SampleExtractorWrapper) } - i.recalculateOwnedStreams = newRecalculateOwnedStreams(i.getInstances, i.lifecycler.ID, i.readRing, cfg.OwnedStreamsCheckInterval, util_log.Logger) + var ownedStreamsStrategy ownershipStrategy + if i.cfg.KafkaIngestion.Enabled { + ownedStreamsStrategy = newOwnedStreamsPartitionStrategy(i.ingestPartitionID, partitionRingWatcher, util_log.Logger) + } else { + ownedStreamsStrategy = newOwnedStreamsIngesterStrategy(i.lifecycler.ID, i.readRing, util_log.Logger) + } + i.recalculateOwnedStreams = newRecalculateOwnedStreamsSvc(i.getInstances, ownedStreamsStrategy, cfg.OwnedStreamsCheckInterval, util_log.Logger) return i, nil } diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index bd43daec7a31f..a9108c52c2a14 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -63,7 +63,7 @@ func TestPrepareShutdownMarkerPathNotSet(t *testing.T) { mockRing := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, mockRing) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, mockRing, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -88,7 +88,7 @@ func TestPrepareShutdown(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -151,7 +151,7 @@ func TestIngester_GetStreamRates_Correctness(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -184,7 +184,7 @@ func BenchmarkGetStreamRatesAllocs(b *testing.B) { } readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(b, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -210,7 +210,7 @@ func TestIngester(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -397,7 +397,7 @@ func TestIngesterStreamLimitExceeded(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, overrides, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, overrides, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -819,7 +819,7 @@ func Test_InMemoryLabels(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -874,7 +874,7 @@ func TestIngester_GetDetectedLabels(t *testing.T) { } readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -938,7 +938,7 @@ func TestIngester_GetDetectedLabelsWithQuery(t *testing.T) { } readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -1306,7 +1306,7 @@ func TestStats(t *testing.T) { require.NoError(t, err) readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) i.instances["test"] = defaultInstance(t) @@ -1334,7 +1334,7 @@ func TestVolume(t *testing.T) { require.NoError(t, err) readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) i.instances["test"] = defaultInstance(t) @@ -1414,7 +1414,7 @@ func createIngesterServer(t *testing.T, ingesterConfig Config) (ingesterClient, require.NoError(t, err) readRingMock := mockReadRingWithOneActiveIngester() - ing, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + ing, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) listener := bufconn.Listen(1024 * 1024) @@ -1631,7 +1631,7 @@ func TestUpdateOwnedStreams(t *testing.T) { require.NoError(t, err) readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) i.instances["test"] = defaultInstance(t) diff --git a/pkg/ingester/instance.go b/pkg/ingester/instance.go index e2fd472656a9f..72ed01793ce7f 100644 --- a/pkg/ingester/instance.go +++ b/pkg/ingester/instance.go @@ -13,7 +13,6 @@ import ( "github.com/go-kit/log/level" "github.com/grafana/dskit/httpgrpc" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/tenant" "github.com/opentracing/opentracing-go" "github.com/prometheus/client_golang/prometheus" @@ -47,7 +46,6 @@ import ( "github.com/grafana/loki/v3/pkg/util/httpreq" util_log "github.com/grafana/loki/v3/pkg/util/log" mathutil "github.com/grafana/loki/v3/pkg/util/math" - lokiring "github.com/grafana/loki/v3/pkg/util/ring" server_util "github.com/grafana/loki/v3/pkg/util/server" "github.com/grafana/loki/v3/pkg/validation" ) @@ -1175,35 +1173,24 @@ func minTs(stream *logproto.Stream) model.Time { } // For each stream, we check if the stream is owned by the ingester or not and increment/decrement the owned stream count. -func (i *instance) updateOwnedStreams(ingesterRing ring.ReadRing, ingesterID string) error { +func (i *instance) updateOwnedStreams(isOwnedStream func(*stream) (bool, error)) error { start := time.Now() defer func() { i.metrics.streamsOwnershipCheck.Observe(float64(time.Since(start).Milliseconds())) }() - var descsBuf = make([]ring.InstanceDesc, ingesterRing.ReplicationFactor()+1) - var hostsBuf = make([]string, ingesterRing.ReplicationFactor()+1) - var zoneBuf = make([]string, ingesterRing.ZonesCount()+1) + var err error i.streams.WithLock(func() { i.ownedStreamsSvc.resetStreamCounts() err = i.streams.ForEach(func(s *stream) (bool, error) { - replicationSet, err := ingesterRing.Get(lokiring.TokenFor(i.instanceID, s.labelsString), ring.WriteNoExtend, descsBuf, hostsBuf, zoneBuf) + ownedStream, err := isOwnedStream(s) if err != nil { - return false, fmt.Errorf("error getting replication set for stream %s: %v", s.labelsString, err) + return false, err } - ownedStream := i.isOwnedStream(replicationSet, ingesterID) + i.ownedStreamsSvc.trackStreamOwnership(s.fp, ownedStream) return true, nil }) }) return err } - -func (i *instance) isOwnedStream(replicationSet ring.ReplicationSet, ingesterID string) bool { - for _, instanceDesc := range replicationSet.Instances { - if instanceDesc.Id == ingesterID { - return true - } - } - return false -} diff --git a/pkg/ingester/recalculate_owned_streams.go b/pkg/ingester/recalculate_owned_streams.go index d3bf79d29f743..b1f6bd62ebfc7 100644 --- a/pkg/ingester/recalculate_owned_streams.go +++ b/pkg/ingester/recalculate_owned_streams.go @@ -2,49 +2,56 @@ package ingester import ( "context" + "fmt" + "sync" "time" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" + "golang.org/x/exp/slices" + + lokiring "github.com/grafana/loki/v3/pkg/util/ring" ) -type recalculateOwnedStreams struct { +type ownershipStrategy interface { + checkRingForChanges() (bool, error) + isOwnedStream(*stream) (bool, error) +} + +type recalculateOwnedStreamsSvc struct { services.Service logger log.Logger + ownershipStrategy ownershipStrategy instancesSupplier func() []*instance - ingesterID string - previousRing ring.ReplicationSet - ingestersRing ring.ReadRing ticker *time.Ticker } -func newRecalculateOwnedStreams(instancesSupplier func() []*instance, ingesterID string, ring ring.ReadRing, ringPollInterval time.Duration, logger log.Logger) *recalculateOwnedStreams { - svc := &recalculateOwnedStreams{ - ingestersRing: ring, +func newRecalculateOwnedStreamsSvc(instancesSupplier func() []*instance, ownershipStrategy ownershipStrategy, ringPollInterval time.Duration, logger log.Logger) *recalculateOwnedStreamsSvc { + svc := &recalculateOwnedStreamsSvc{ instancesSupplier: instancesSupplier, - ingesterID: ingesterID, logger: logger, + ownershipStrategy: ownershipStrategy, } svc.Service = services.NewTimerService(ringPollInterval, nil, svc.iteration, nil) return svc } -func (s *recalculateOwnedStreams) iteration(_ context.Context) error { +func (s *recalculateOwnedStreamsSvc) iteration(_ context.Context) error { s.recalculate() return nil } -func (s *recalculateOwnedStreams) recalculate() { +func (s *recalculateOwnedStreamsSvc) recalculate() { level.Info(s.logger).Log("msg", "starting recalculate owned streams job") defer func() { s.updateFixedLimitForAll() level.Info(s.logger).Log("msg", "completed recalculate owned streams job") }() - ringChanged, err := s.checkRingForChanges() + ringChanged, err := s.ownershipStrategy.checkRingForChanges() if err != nil { level.Error(s.logger).Log("msg", "failed to check ring for changes", "err", err) return @@ -61,14 +68,14 @@ func (s *recalculateOwnedStreams) recalculate() { } level.Info(s.logger).Log("msg", "updating streams ownership", "tenant", instance.instanceID) - err := instance.updateOwnedStreams(s.ingestersRing, s.ingesterID) + err := instance.updateOwnedStreams(s.ownershipStrategy.isOwnedStream) if err != nil { level.Error(s.logger).Log("msg", "failed to re-evaluate streams ownership", "tenant", instance.instanceID, "err", err) } } } -func (s *recalculateOwnedStreams) updateFixedLimitForAll() { +func (s *recalculateOwnedStreamsSvc) updateFixedLimitForAll() { for _, instance := range s.instancesSupplier() { oldLimit, newLimit := instance.ownedStreamsSvc.updateFixedLimit() if oldLimit != newLimit { @@ -77,7 +84,36 @@ func (s *recalculateOwnedStreams) updateFixedLimitForAll() { } } -func (s *recalculateOwnedStreams) checkRingForChanges() (bool, error) { +type ownedStreamsIngesterStrategy struct { + logger log.Logger + + ingesterID string + previousRing ring.ReplicationSet + ingestersRing ring.ReadRing + + descsBufPool sync.Pool + hostsBufPool sync.Pool + zoneBufPool sync.Pool +} + +func newOwnedStreamsIngesterStrategy(ingesterID string, ingestersRing ring.ReadRing, logger log.Logger) *ownedStreamsIngesterStrategy { + return &ownedStreamsIngesterStrategy{ + ingesterID: ingesterID, + ingestersRing: ingestersRing, + logger: logger, + descsBufPool: sync.Pool{New: func() interface{} { + return make([]ring.InstanceDesc, ingestersRing.ReplicationFactor()+1) + }}, + hostsBufPool: sync.Pool{New: func() interface{} { + return make([]string, ingestersRing.ReplicationFactor()+1) + }}, + zoneBufPool: sync.Pool{New: func() interface{} { + return make([]string, ingestersRing.ZonesCount()+1) + }}, + } +} + +func (s *ownedStreamsIngesterStrategy) checkRingForChanges() (bool, error) { rs, err := s.ingestersRing.GetAllHealthy(ring.WriteNoExtend) if err != nil { return false, err @@ -87,3 +123,69 @@ func (s *recalculateOwnedStreams) checkRingForChanges() (bool, error) { s.previousRing = rs return ringChanged, nil } + +//nolint:staticcheck +func (s *ownedStreamsIngesterStrategy) isOwnedStream(str *stream) (bool, error) { + descsBuf := s.descsBufPool.Get().([]ring.InstanceDesc) + hostsBuf := s.hostsBufPool.Get().([]string) + zoneBuf := s.zoneBufPool.Get().([]string) + defer func() { + s.descsBufPool.Put(descsBuf[:0]) + s.hostsBufPool.Put(hostsBuf[:0]) + s.zoneBufPool.Put(zoneBuf[:0]) + }() + + replicationSet, err := s.ingestersRing.Get(lokiring.TokenFor(str.tenant, str.labelsString), ring.WriteNoExtend, descsBuf, hostsBuf, zoneBuf) + if err != nil { + return false, fmt.Errorf("error getting replication set for stream %s: %v", str.labelsString, err) + } + return s.isOwnedStreamInner(replicationSet, s.ingesterID), nil +} + +func (s *ownedStreamsIngesterStrategy) isOwnedStreamInner(replicationSet ring.ReplicationSet, ingesterID string) bool { + for _, instanceDesc := range replicationSet.Instances { + if instanceDesc.Id == ingesterID { + return true + } + } + return false +} + +type ownedStreamsPartitionStrategy struct { + logger log.Logger + + partitionID int32 + partitionRingWatcher ring.PartitionRingReader + previousActivePartitions []int32 + getPartitionShardSize func(user string) int +} + +func newOwnedStreamsPartitionStrategy(partitionID int32, ring ring.PartitionRingReader, logger log.Logger) *ownedStreamsPartitionStrategy { + return &ownedStreamsPartitionStrategy{ + partitionID: partitionID, + partitionRingWatcher: ring, + logger: logger, + } +} + +func (s *ownedStreamsPartitionStrategy) checkRingForChanges() (bool, error) { + // When using partitions ring, we consider ring to be changed if active partitions have changed. + r := s.partitionRingWatcher.PartitionRing() + if r.PartitionsCount() == 0 { + return false, ring.ErrEmptyRing + } + + activePartitions := r.ActivePartitionIDs() + ringChanged := !slices.Equal(s.previousActivePartitions, activePartitions) + s.previousActivePartitions = activePartitions + return ringChanged, nil +} + +func (s *ownedStreamsPartitionStrategy) isOwnedStream(str *stream) (bool, error) { + partitionForStream, err := s.partitionRingWatcher.PartitionRing().ActivePartitionForKey(lokiring.TokenFor(str.tenant, str.labelsString)) + if err != nil { + return false, fmt.Errorf("failed to find active partition for stream: %w", err) + } + + return partitionForStream == s.partitionID, nil +} diff --git a/pkg/ingester/recalculate_owned_streams_test.go b/pkg/ingester/recalculate_owned_streams_test.go index 91b32baef820d..82a733e593d61 100644 --- a/pkg/ingester/recalculate_owned_streams_test.go +++ b/pkg/ingester/recalculate_owned_streams_test.go @@ -18,12 +18,13 @@ import ( "github.com/grafana/loki/v3/pkg/validation" ) -func Test_recalculateOwnedStreams_newRecalculateOwnedStreams(t *testing.T) { +func Test_recalculateOwnedStreams_newRecalculateOwnedStreamsIngester(t *testing.T) { mockInstancesSupplier := &mockTenantsSuplier{tenants: []*instance{}} mockRing := newReadRingMock([]ring.InstanceDesc{ {Addr: "test", Timestamp: time.Now().UnixNano(), State: ring.ACTIVE, Tokens: []uint32{1, 2, 3}}, }, 0) - service := newRecalculateOwnedStreams(mockInstancesSupplier.get, "test", mockRing, 50*time.Millisecond, log.NewNopLogger()) + strategy := newOwnedStreamsIngesterStrategy("test", mockRing, log.NewNopLogger()) + service := newRecalculateOwnedStreamsSvc(mockInstancesSupplier.get, strategy, 50*time.Millisecond, log.NewNopLogger()) require.Equal(t, 0, mockRing.getAllHealthyCallsCount, "ring must be called only after service's start up") ctx := context.Background() require.NoError(t, service.StartAsync(ctx)) @@ -33,7 +34,7 @@ func Test_recalculateOwnedStreams_newRecalculateOwnedStreams(t *testing.T) { }, 1*time.Second, 50*time.Millisecond, "expected at least two runs of the iteration") } -func Test_recalculateOwnedStreams_recalculate(t *testing.T) { +func Test_recalculateOwnedStreams_recalculateWithIngesterStrategy(t *testing.T) { tests := map[string]struct { featureEnabled bool expectedOwnedStreamCount int @@ -105,7 +106,8 @@ func Test_recalculateOwnedStreams_recalculate(t *testing.T) { mockTenantsSupplier := &mockTenantsSuplier{tenants: []*instance{tenant}} - service := newRecalculateOwnedStreams(mockTenantsSupplier.get, currentIngesterName, mockRing, 50*time.Millisecond, log.NewNopLogger()) + strategy := newOwnedStreamsIngesterStrategy(currentIngesterName, mockRing, log.NewNopLogger()) + service := newRecalculateOwnedStreamsSvc(mockTenantsSupplier.get, strategy, 50*time.Millisecond, log.NewNopLogger()) //change the limit to assert that fixed limit is updated after the recalculation limits.DefaultLimits().MaxGlobalStreamsPerUser = 50 @@ -153,14 +155,13 @@ func (r *mockStreamsOwnershipRing) Get(streamToken uint32, _ ring.Operation, _ [ return set, nil } -func Test_recalculateOwnedStreams_checkRingForChanges(t *testing.T) { +func Test_ownedStreamsIngesterStrategy_checkRingForChanges(t *testing.T) { mockRing := &readRingMock{ replicationSet: ring.ReplicationSet{ Instances: []ring.InstanceDesc{{Addr: "ingester-0", Timestamp: time.Now().UnixNano(), State: ring.ACTIVE, Tokens: []uint32{100, 200, 300}}}, }, } - mockTenantsSupplier := &mockTenantsSuplier{tenants: []*instance{{}}} - service := newRecalculateOwnedStreams(mockTenantsSupplier.get, "ingester-0", mockRing, 50*time.Millisecond, log.NewNopLogger()) + service := newOwnedStreamsIngesterStrategy("ingester-0", mockRing, log.NewNopLogger()) ringChanged, err := service.checkRingForChanges() require.NoError(t, err) @@ -178,6 +179,64 @@ func Test_recalculateOwnedStreams_checkRingForChanges(t *testing.T) { require.True(t, ringChanged) } +func newMockPartitionRingWithActivePartitions(activePartitions ...int32) *ring.PartitionRing { + partitionRing := ring.PartitionRingDesc{ + Partitions: map[int32]ring.PartitionDesc{}, + Owners: map[string]ring.OwnerDesc{}, + } + + for _, id := range activePartitions { + partitionRing.Partitions[id] = ring.PartitionDesc{ + Id: id, + Tokens: []uint32{uint32(id)}, + State: ring.PartitionActive, + } + partitionRing.Owners[fmt.Sprintf("test%d", id)] = ring.OwnerDesc{ + OwnedPartition: id, + State: ring.OwnerActive, + } + } + return ring.NewPartitionRing(partitionRing) +} + +func Test_ownedStreamsPartitionStrategy_checkRingForChanges(t *testing.T) { + ringReader := &mockPartitionRingReader{ + ring: newMockPartitionRingWithActivePartitions(1), + } + service := newOwnedStreamsPartitionStrategy(1, ringReader, log.NewNopLogger()) + + ringChanged, err := service.checkRingForChanges() + require.NoError(t, err) + require.True(t, ringChanged, "expected ring to be changed because it was not initialized yet") + + ringChanged, err = service.checkRingForChanges() + require.NoError(t, err) + require.False(t, ringChanged, "expected ring not to be changed because token ranges is not changed") + + ringReader.ring = newMockPartitionRingWithActivePartitions(1, 2) + + ringChanged, err = service.checkRingForChanges() + require.NoError(t, err) + require.True(t, ringChanged) +} + +func Test_ownedStreamsPartitionStrategy_isOwnedStream(t *testing.T) { + ringReader := &mockPartitionRingReader{ + ring: newMockPartitionRingWithActivePartitions(1, 2, 3), + } + stream := &stream{tenant: "test1", labelsString: "mock=1"} // has a hashkey mapping to partition 1 + + service1 := newOwnedStreamsPartitionStrategy(1, ringReader, log.NewNopLogger()) + owned, err := service1.isOwnedStream(stream) + require.NoError(t, err) + require.True(t, owned) + + service2 := newOwnedStreamsPartitionStrategy(2, ringReader, log.NewNopLogger()) + owned, err = service2.isOwnedStream(stream) + require.NoError(t, err) + require.False(t, owned) +} + func createStream(t *testing.T, inst *instance, fingerprint int) *stream { lbls := labels.Labels{labels.Label{Name: "mock", Value: strconv.Itoa(fingerprint)}} @@ -195,3 +254,11 @@ type mockTenantsSuplier struct { func (m *mockTenantsSuplier) get() []*instance { return m.tenants } + +type mockPartitionRingReader struct { + ring *ring.PartitionRing +} + +func (m mockPartitionRingReader) PartitionRing() *ring.PartitionRing { + return m.ring +} diff --git a/pkg/ingester/recovery_test.go b/pkg/ingester/recovery_test.go index 97c55d3da9f8f..180d02e954e60 100644 --- a/pkg/ingester/recovery_test.go +++ b/pkg/ingester/recovery_test.go @@ -229,7 +229,7 @@ func TestSeriesRecoveryNoDuplicates(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, loki_runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, loki_runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) mkSample := func(i int) *logproto.PushRequest { @@ -263,7 +263,7 @@ func TestSeriesRecoveryNoDuplicates(t *testing.T) { require.Equal(t, false, iter.Next()) // create a new ingester now - i, err = New(ingesterConfig, client.Config{}, store, limits, loki_runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, store, limits, loki_runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) // recover the checkpointed series diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go index 30eb20fa2ac1c..1a72cb3c5b91e 100644 --- a/pkg/loki/modules.go +++ b/pkg/loki/modules.go @@ -48,7 +48,6 @@ import ( "github.com/grafana/loki/v3/pkg/indexgateway" "github.com/grafana/loki/v3/pkg/ingester" "github.com/grafana/loki/v3/pkg/ingester-rf1/objstore" - ingesterkafka "github.com/grafana/loki/v3/pkg/kafka/ingester" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql" "github.com/grafana/loki/v3/pkg/logqlmodel/stats" @@ -597,7 +596,7 @@ func (t *Loki) initIngester() (_ services.Service, err error) { level.Warn(util_log.Logger).Log("msg", "The config setting shutdown marker path is not set. The /ingester/prepare_shutdown endpoint won't work") } - t.Ingester, err = ingester.New(t.Cfg.Ingester, t.Cfg.IngesterClient, t.Store, t.Overrides, t.tenantConfigs, prometheus.DefaultRegisterer, t.Cfg.Distributor.WriteFailuresLogging, t.Cfg.MetricsNamespace, logger, t.UsageTracker, t.ring) + t.Ingester, err = ingester.New(t.Cfg.Ingester, t.Cfg.IngesterClient, t.Store, t.Overrides, t.tenantConfigs, prometheus.DefaultRegisterer, t.Cfg.Distributor.WriteFailuresLogging, t.Cfg.MetricsNamespace, logger, t.UsageTracker, t.ring, t.partitionRingWatcher) if err != nil { return } @@ -1758,16 +1757,16 @@ func (t *Loki) initPartitionRing() (services.Service, error) { return nil, nil } - kvClient, err := kv.NewClient(t.Cfg.Ingester.KafkaIngestion.PartitionRingConfig.KVStore, ring.GetPartitionRingCodec(), kv.RegistererWithKVName(prometheus.DefaultRegisterer, ingesterkafka.PartitionRingName+"-watcher"), util_log.Logger) + kvClient, err := kv.NewClient(t.Cfg.Ingester.KafkaIngestion.PartitionRingConfig.KVStore, ring.GetPartitionRingCodec(), kv.RegistererWithKVName(prometheus.DefaultRegisterer, ingester.PartitionRingName+"-watcher"), util_log.Logger) if err != nil { return nil, fmt.Errorf("creating KV store for partitions ring watcher: %w", err) } - t.partitionRingWatcher = ring.NewPartitionRingWatcher(ingester.PartitionRingName, ingester.PartitionRingName, kvClient, util_log.Logger, prometheus.WrapRegistererWithPrefix("loki_", prometheus.DefaultRegisterer)) + t.partitionRingWatcher = ring.NewPartitionRingWatcher(ingester.PartitionRingName, ingester.PartitionRingKey, kvClient, util_log.Logger, prometheus.WrapRegistererWithPrefix("loki_", prometheus.DefaultRegisterer)) t.partitionRing = ring.NewPartitionInstanceRing(t.partitionRingWatcher, t.ring, t.Cfg.Ingester.LifecyclerConfig.RingConfig.HeartbeatTimeout) // Expose a web page to view the partitions ring state. - t.Server.HTTP.Path("/partition-ring").Methods("GET", "POST").Handler(ring.NewPartitionRingPageHandler(t.partitionRingWatcher, ring.NewPartitionRingEditor(ingesterkafka.PartitionRingName+"-key", kvClient))) + t.Server.HTTP.Path("/partition-ring").Methods("GET", "POST").Handler(ring.NewPartitionRingPageHandler(t.partitionRingWatcher, ring.NewPartitionRingEditor(ingester.PartitionRingKey, kvClient))) return t.partitionRingWatcher, nil } From f39cdbd541d85a961db655e70da713be04d9a294 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 26 Sep 2024 11:12:26 -0600 Subject: [PATCH 12/20] fix(deps): update github.com/grafana/dskit digest to 7c41a40 (#14277) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: Trevor Whitney --- go.mod | 2 +- go.sum | 4 +-- tools/lambda-promtail/go.mod | 2 +- tools/lambda-promtail/go.sum | 4 +-- .../grafana/dskit/grpcclient/grpcclient.go | 26 +++++++++++++----- .../dskit/kv/memberlist/memberlist_client.go | 27 +++++++++++++------ .../grafana/dskit/ring/ring_status.gohtml | 5 ++++ vendor/modules.txt | 2 +- 8 files changed, 51 insertions(+), 21 deletions(-) diff --git a/go.mod b/go.mod index 6b76cbd817ad8..3981ee97bf44a 100644 --- a/go.mod +++ b/go.mod @@ -49,7 +49,7 @@ require ( github.com/gorilla/mux v1.8.1 github.com/gorilla/websocket v1.5.3 github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2 - github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b + github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319 github.com/grafana/go-gelf/v2 v2.0.1 github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56 github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc diff --git a/go.sum b/go.sum index 28ca7bfdf1dcc..fabf4aa4fdca5 100644 --- a/go.sum +++ b/go.sum @@ -1042,8 +1042,8 @@ github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aN github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2 h1:qhugDMdQ4Vp68H0tp/0iN17DM2ehRo1rLEdOFe/gB8I= github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2/go.mod h1:w/aiO1POVIeXUQyl0VQSZjl5OAGDTL5aX+4v0RA1tcw= -github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b h1:x2HCzk29I0o5pRPfqWP/qwhXaPGlcz8pohq5kO1NZoE= -github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b/go.mod h1:SPLNCARd4xdjCkue0O6hvuoveuS1dGJjDnfxYe405YQ= +github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319 h1:KACpOOTqA4WqyyKF2fFPQFiaSOpZdOT5f5gg0qkPLiU= +github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319/go.mod h1:SPLNCARd4xdjCkue0O6hvuoveuS1dGJjDnfxYe405YQ= github.com/grafana/go-gelf/v2 v2.0.1 h1:BOChP0h/jLeD+7F9mL7tq10xVkDG15he3T1zHuQaWak= github.com/grafana/go-gelf/v2 v2.0.1/go.mod h1:lexHie0xzYGwCgiRGcvZ723bSNyNI8ZRD4s0CLobh90= github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85 h1:xLuzPoOzdfNb/RF/IENCw+oLVdZB4G21VPhkHBgwSHY= diff --git a/tools/lambda-promtail/go.mod b/tools/lambda-promtail/go.mod index c548f6149945b..42e8066c9436e 100644 --- a/tools/lambda-promtail/go.mod +++ b/tools/lambda-promtail/go.mod @@ -10,7 +10,7 @@ require ( github.com/go-kit/log v0.2.1 github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 - github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b + github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319 github.com/grafana/loki/v3 v3.0.0-20240809103847-9315b3d03d79 github.com/prometheus/common v0.55.0 github.com/stretchr/testify v1.9.0 diff --git a/tools/lambda-promtail/go.sum b/tools/lambda-promtail/go.sum index c1088c8692cc5..1729b4d8e00c4 100644 --- a/tools/lambda-promtail/go.sum +++ b/tools/lambda-promtail/go.sum @@ -216,8 +216,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= -github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b h1:x2HCzk29I0o5pRPfqWP/qwhXaPGlcz8pohq5kO1NZoE= -github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b/go.mod h1:SPLNCARd4xdjCkue0O6hvuoveuS1dGJjDnfxYe405YQ= +github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319 h1:KACpOOTqA4WqyyKF2fFPQFiaSOpZdOT5f5gg0qkPLiU= +github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319/go.mod h1:SPLNCARd4xdjCkue0O6hvuoveuS1dGJjDnfxYe405YQ= github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56 h1:X8IKQ0wu40wpvYcKfBcc5T4QnhdQjUhtUtB/1CY89lE= github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU= github.com/grafana/jsonparser v0.0.0-20240425183733-ea80629e1a32 h1:NznuPwItog+rwdVg8hAuGKP29ndRSzJAwhxKldkP8oQ= diff --git a/vendor/github.com/grafana/dskit/grpcclient/grpcclient.go b/vendor/github.com/grafana/dskit/grpcclient/grpcclient.go index 7518990471549..a8f728c61e29b 100644 --- a/vendor/github.com/grafana/dskit/grpcclient/grpcclient.go +++ b/vendor/github.com/grafana/dskit/grpcclient/grpcclient.go @@ -2,6 +2,8 @@ package grpcclient import ( "flag" + "slices" + "strings" "time" "github.com/pkg/errors" @@ -40,6 +42,9 @@ type Config struct { Middleware []grpc.UnaryClientInterceptor `yaml:"-"` StreamMiddleware []grpc.StreamClientInterceptor `yaml:"-"` + + // CustomCompressors allows configuring custom compressors. + CustomCompressors []string `yaml:"-"` } // RegisterFlags registers flags. @@ -55,9 +60,19 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { cfg.InitialStreamWindowSize = defaultInitialWindowSize cfg.InitialConnectionWindowSize = defaultInitialWindowSize + var supportedCompressors strings.Builder + supportedCompressors.WriteString("Use compression when sending messages. Supported values are: 'gzip', 'snappy'") + for _, cmp := range cfg.CustomCompressors { + supportedCompressors.WriteString(", ") + supportedCompressors.WriteString("'") + supportedCompressors.WriteString(cmp) + supportedCompressors.WriteString("'") + } + supportedCompressors.WriteString(" and '' (disable compression)") + f.IntVar(&cfg.MaxRecvMsgSize, prefix+".grpc-max-recv-msg-size", 100<<20, "gRPC client max receive message size (bytes).") f.IntVar(&cfg.MaxSendMsgSize, prefix+".grpc-max-send-msg-size", 100<<20, "gRPC client max send message size (bytes).") - f.StringVar(&cfg.GRPCCompression, prefix+".grpc-compression", "", "Use compression when sending messages. Supported values are: 'gzip', 'snappy' and '' (disable compression)") + f.StringVar(&cfg.GRPCCompression, prefix+".grpc-compression", "", supportedCompressors.String()) f.Float64Var(&cfg.RateLimit, prefix+".grpc-client-rate-limit", 0., "Rate limit for gRPC client; 0 means disabled.") f.IntVar(&cfg.RateLimitBurst, prefix+".grpc-client-rate-limit-burst", 0, "Rate limit burst for gRPC client.") f.BoolVar(&cfg.BackoffOnRatelimits, prefix+".backoff-on-ratelimits", false, "Enable backoff and retry when we hit rate limits.") @@ -74,11 +89,10 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { } func (cfg *Config) Validate() error { - switch cfg.GRPCCompression { - case gzip.Name, snappy.Name, "": - // valid - default: - return errors.Errorf("unsupported compression type: %s", cfg.GRPCCompression) + supportedCompressors := []string{gzip.Name, snappy.Name, ""} + supportedCompressors = append(supportedCompressors, cfg.CustomCompressors...) + if !slices.Contains(supportedCompressors, cfg.GRPCCompression) { + return errors.Errorf("unsupported compression type: %q", cfg.GRPCCompression) } return nil } diff --git a/vendor/github.com/grafana/dskit/kv/memberlist/memberlist_client.go b/vendor/github.com/grafana/dskit/kv/memberlist/memberlist_client.go index a7eefe92fc22f..1d96363fe3fa8 100644 --- a/vendor/github.com/grafana/dskit/kv/memberlist/memberlist_client.go +++ b/vendor/github.com/grafana/dskit/kv/memberlist/memberlist_client.go @@ -552,7 +552,7 @@ func (m *KV) fastJoinMembersOnStartup(ctx context.Context) { for toJoin > 0 && len(nodes) > 0 && ctx.Err() == nil { reached, err := m.memberlist.Join(nodes[0:1]) // Try to join single node only. if err != nil { - level.Debug(m.logger).Log("msg", "fast-joining node failed", "node", nodes[0], "err", err) + level.Info(m.logger).Log("msg", "fast-joining node failed", "node", nodes[0], "err", err) } totalJoined += reached @@ -1018,14 +1018,16 @@ func (m *KV) trySingleCas(key string, codec codec.Codec, f func(in interface{}) } // Don't even try - r, ok := out.(Mergeable) - if !ok || r == nil { + incomingValue, ok := out.(Mergeable) + if !ok || incomingValue == nil { return nil, 0, retry, fmt.Errorf("invalid type: %T, expected Mergeable", out) } // To support detection of removed items from value, we will only allow CAS operation to // succeed if version hasn't changed, i.e. state hasn't changed since running 'f'. - change, newver, err := m.mergeValueForKey(key, r, ver, codec) + // Supplied function may have kept a reference to the returned "incoming value". + // If KV store will keep this value as well, it needs to make a clone. + change, newver, err := m.mergeValueForKey(key, incomingValue, true, ver, codec) if err == errVersionMismatch { return nil, 0, retry, err } @@ -1379,14 +1381,15 @@ func (m *KV) mergeBytesValueForKey(key string, incomingData []byte, codec codec. return nil, 0, fmt.Errorf("expected Mergeable, got: %T", decodedValue) } - return m.mergeValueForKey(key, incomingValue, 0, codec) + // No need to clone this "incomingValue", since we have just decoded it from bytes, and won't be using it. + return m.mergeValueForKey(key, incomingValue, false, 0, codec) } // Merges incoming value with value we have in our store. Returns "a change" that can be sent to other // cluster members to update their state, and new version of the value. // If CAS version is specified, then merging will fail if state has changed already, and errVersionMismatch is reported. // If no modification occurred, new version is 0. -func (m *KV) mergeValueForKey(key string, incomingValue Mergeable, casVersion uint, codec codec.Codec) (Mergeable, uint, error) { +func (m *KV) mergeValueForKey(key string, incomingValue Mergeable, incomingValueRequiresClone bool, casVersion uint, codec codec.Codec) (Mergeable, uint, error) { m.storeMu.Lock() defer m.storeMu.Unlock() @@ -1398,7 +1401,7 @@ func (m *KV) mergeValueForKey(key string, incomingValue Mergeable, casVersion ui if casVersion > 0 && curr.Version != casVersion { return nil, 0, errVersionMismatch } - result, change, err := computeNewValue(incomingValue, curr.value, casVersion > 0) + result, change, err := computeNewValue(incomingValue, incomingValueRequiresClone, curr.value, casVersion > 0) if err != nil { return nil, 0, err } @@ -1441,8 +1444,16 @@ func (m *KV) mergeValueForKey(key string, incomingValue Mergeable, casVersion ui } // returns [result, change, error] -func computeNewValue(incoming Mergeable, oldVal Mergeable, cas bool) (Mergeable, Mergeable, error) { +func computeNewValue(incoming Mergeable, incomingValueRequiresClone bool, oldVal Mergeable, cas bool) (Mergeable, Mergeable, error) { if oldVal == nil { + // It's OK to return the same value twice (once as result, once as change), because "change" will be cloned + // in mergeValueForKey if needed. + + if incomingValueRequiresClone { + clone := incoming.Clone() + return clone, clone, nil + } + return incoming, incoming, nil } diff --git a/vendor/github.com/grafana/dskit/ring/ring_status.gohtml b/vendor/github.com/grafana/dskit/ring/ring_status.gohtml index 5270b457c6250..157f8d89e6379 100644 --- a/vendor/github.com/grafana/dskit/ring/ring_status.gohtml +++ b/vendor/github.com/grafana/dskit/ring/ring_status.gohtml @@ -38,8 +38,13 @@ {{ .State }} {{ .Address }} {{ .RegisteredTimestamp | timeOrEmptyString }} + {{ if .ReadOnly }} {{ .ReadOnly }} + {{ .ReadOnlyUpdatedTimestamp | durationSince }} ago ({{ .ReadOnlyUpdatedTimestamp.Format "15:04:05.999" }}) + {{ else }} + {{ .ReadOnlyUpdatedTimestamp | timeOrEmptyString }} + {{ end }} {{ .HeartbeatTimestamp | durationSince }} ago ({{ .HeartbeatTimestamp.Format "15:04:05.999" }}) {{ .NumTokens }} {{ .Ownership | humanFloat }}% diff --git a/vendor/modules.txt b/vendor/modules.txt index b2b8f38c4b5f6..b49a55d88caed 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -980,7 +980,7 @@ github.com/gorilla/websocket # github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2 ## explicit; go 1.17 github.com/grafana/cloudflare-go -# github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b +# github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319 ## explicit; go 1.21 github.com/grafana/dskit/aws github.com/grafana/dskit/backoff From 04f87ca9332cb33606f55fb94403bb4f2048c6fd Mon Sep 17 00:00:00 2001 From: Paul Rogers <129207811+paul1r@users.noreply.github.com> Date: Thu, 26 Sep 2024 18:07:48 -0400 Subject: [PATCH 13/20] chore: Lint in prep for golangci compatible with Go 1.23 (#14289) --- pkg/logql/syntax/ast.go | 16 ++++++++-------- pkg/querier/queryrange/detected_fields_test.go | 14 +++++++------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pkg/logql/syntax/ast.go b/pkg/logql/syntax/ast.go index 95c99eef6d110..51bee23b01a9a 100644 --- a/pkg/logql/syntax/ast.go +++ b/pkg/logql/syntax/ast.go @@ -80,7 +80,7 @@ func ExtractLabelFiltersBeforeParser(e Expr) []*LabelFilterExpr { ) visitor := &DepthFirstTraversal{ - VisitLabelFilterFn: func(v RootVisitor, e *LabelFilterExpr) { + VisitLabelFilterFn: func(_ RootVisitor, e *LabelFilterExpr) { if !foundParseStage { filters = append(filters, e) } @@ -94,13 +94,13 @@ func ExtractLabelFiltersBeforeParser(e Expr) []*LabelFilterExpr { // expression is added without updating this list, blooms can silently // misbehave. - VisitLogfmtParserFn: func(v RootVisitor, e *LogfmtParserExpr) { foundParseStage = true }, - VisitLabelParserFn: func(v RootVisitor, e *LabelParserExpr) { foundParseStage = true }, - VisitJSONExpressionParserFn: func(v RootVisitor, e *JSONExpressionParser) { foundParseStage = true }, - VisitLogfmtExpressionParserFn: func(v RootVisitor, e *LogfmtExpressionParser) { foundParseStage = true }, - VisitLabelFmtFn: func(v RootVisitor, e *LabelFmtExpr) { foundParseStage = true }, - VisitKeepLabelFn: func(v RootVisitor, e *KeepLabelsExpr) { foundParseStage = true }, - VisitDropLabelsFn: func(v RootVisitor, e *DropLabelsExpr) { foundParseStage = true }, + VisitLogfmtParserFn: func(_ RootVisitor, _ *LogfmtParserExpr) { foundParseStage = true }, + VisitLabelParserFn: func(_ RootVisitor, _ *LabelParserExpr) { foundParseStage = true }, + VisitJSONExpressionParserFn: func(_ RootVisitor, _ *JSONExpressionParser) { foundParseStage = true }, + VisitLogfmtExpressionParserFn: func(_ RootVisitor, _ *LogfmtExpressionParser) { foundParseStage = true }, + VisitLabelFmtFn: func(_ RootVisitor, _ *LabelFmtExpr) { foundParseStage = true }, + VisitKeepLabelFn: func(_ RootVisitor, _ *KeepLabelsExpr) { foundParseStage = true }, + VisitDropLabelsFn: func(_ RootVisitor, _ *DropLabelsExpr) { foundParseStage = true }, } e.Accept(visitor) return filters diff --git a/pkg/querier/queryrange/detected_fields_test.go b/pkg/querier/queryrange/detected_fields_test.go index 1a967f77c069d..b82f3a4a70de1 100644 --- a/pkg/querier/queryrange/detected_fields_test.go +++ b/pkg/querier/queryrange/detected_fields_test.go @@ -969,7 +969,7 @@ func TestQuerier_DetectedFields(t *testing.T) { limitedHandler := func(stream logproto.Stream) base.Handler { return base.HandlerFunc( - func(ctx context.Context, req base.Request) (base.Response, error) { + func(_ context.Context, _ base.Request) (base.Response, error) { return &LokiResponse{ Status: "success", Data: LokiData{ @@ -985,7 +985,7 @@ func TestQuerier_DetectedFields(t *testing.T) { logHandler := func(stream logproto.Stream) base.Handler { return base.HandlerFunc( - func(ctx context.Context, req base.Request) (base.Response, error) { + func(_ context.Context, _ base.Request) (base.Response, error) { return &LokiResponse{ Status: "success", Data: LokiData{ @@ -1028,7 +1028,7 @@ func TestQuerier_DetectedFields(t *testing.T) { limitedHandler(mockLogfmtStreamWithLabels(1, 5, `{type="test", name="foo"}`)), logHandler(mockLogfmtStreamWithLabels(1, 5, `{type="test", name="foo"}`)), limits, - ).Wrap(base.HandlerFunc(func(ctx context.Context, req base.Request) (base.Response, error) { + ).Wrap(base.HandlerFunc(func(_ context.Context, _ base.Request) (base.Response, error) { t.Fatal("should not be called") return nil, nil })) @@ -1058,7 +1058,7 @@ func TestQuerier_DetectedFields(t *testing.T) { limitedHandler(mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 5, `{type="test", name="bob"}`)), logHandler(mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 5, `{type="test", name="bob"}`)), limits, - ).Wrap(base.HandlerFunc(func(ctx context.Context, req base.Request) (base.Response, error) { + ).Wrap(base.HandlerFunc(func(_ context.Context, _ base.Request) (base.Response, error) { t.Fatal("should not be called") return nil, nil })) @@ -1090,7 +1090,7 @@ func TestQuerier_DetectedFields(t *testing.T) { limitedHandler(mockLogfmtStreamWithLabels(1, 2, `{type="test", name="foo"}`)), logHandler(mockLogfmtStreamWithLabels(1, 2, `{type="test", name="foo"}`)), limits, - ).Wrap(base.HandlerFunc(func(ctx context.Context, req base.Request) (base.Response, error) { + ).Wrap(base.HandlerFunc(func(_ context.Context, _ base.Request) (base.Response, error) { t.Fatal("should not be called") return nil, nil })) @@ -1136,7 +1136,7 @@ func TestQuerier_DetectedFields(t *testing.T) { ), logHandler(mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 2, `{type="test"}`)), limits, - ).Wrap(base.HandlerFunc(func(ctx context.Context, req base.Request) (base.Response, error) { + ).Wrap(base.HandlerFunc(func(_ context.Context, _ base.Request) (base.Response, error) { t.Fatal("should not be called") return nil, nil })) @@ -1188,7 +1188,7 @@ func TestQuerier_DetectedFields(t *testing.T) { ), logHandler(mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 2, `{type="test", name="bob"}`)), limits, - ).Wrap(base.HandlerFunc(func(ctx context.Context, req base.Request) (base.Response, error) { + ).Wrap(base.HandlerFunc(func(_ context.Context, _ base.Request) (base.Response, error) { t.Fatal("should not be called") return nil, nil })) From 2d4792a54fb52caa5cd904a17349b04410fae4c0 Mon Sep 17 00:00:00 2001 From: Ravishankar Date: Fri, 27 Sep 2024 14:11:41 +0530 Subject: [PATCH 14/20] fix: allow rename of structuremetadata labels (#13955) Co-authored-by: Christian Haudum --- pkg/logql/log/labels.go | 90 +++++++++++++++--- pkg/logql/log/labels_test.go | 180 +++++++++++++++++++++++++++++++++++ 2 files changed, 255 insertions(+), 15 deletions(-) diff --git a/pkg/logql/log/labels.go b/pkg/logql/log/labels.go index c68fe1af0e5b5..c8d0bcb31ebb4 100644 --- a/pkg/logql/log/labels.go +++ b/pkg/logql/log/labels.go @@ -327,20 +327,52 @@ func (b *LabelsBuilder) Get(key string) (string, bool) { // Del deletes the label of the given name. func (b *LabelsBuilder) Del(ns ...string) *LabelsBuilder { for _, n := range ns { - for category, lbls := range b.add { - for i, a := range lbls { - if a.Name == n { - b.add[category] = append(lbls[:i], lbls[i+1:]...) - } - } + for category := range b.add { + b.deleteWithCategory(LabelCategory(category), n) } b.del = append(b.del, n) } return b } +// deleteWithCategory removes the label from the specified category +func (b *LabelsBuilder) deleteWithCategory(category LabelCategory, n string) { + for i, l := range b.add[category] { + if l.Name == n { + b.add[category] = append(b.add[category][:i], b.add[category][i+1:]...) + } + } +} + // Set the name/value pair as a label. +// The value `v` may not be set if a category with higher preference already contains `n`. +// Category preference goes as Parsed > Structured Metadata > Stream. func (b *LabelsBuilder) Set(category LabelCategory, n, v string) *LabelsBuilder { + // Parsed takes precedence over Structured Metadata and Stream labels. + // If category is Parsed, we delete `n` from the structured metadata and stream labels. + if category == ParsedLabel { + b.deleteWithCategory(StructuredMetadataLabel, n) + b.deleteWithCategory(StreamLabel, n) + } + + // Structured Metadata takes precedence over Stream labels. + // If category is `StructuredMetadataLabel`,we delete `n` from the stream labels. + // If `n` exists in the parsed labels, we won't overwrite it's value and we just return what we have. + if category == StructuredMetadataLabel { + b.deleteWithCategory(StreamLabel, n) + if labelsContain(b.add[ParsedLabel], n) { + return b + } + } + + // Finally, if category is `StreamLabel` and `n` already exists in either the structured metadata or + // parsed labels, the `Set` operation is a noop and we return the unmodified labels builder. + if category == StreamLabel { + if labelsContain(b.add[StructuredMetadataLabel], n) || labelsContain(b.add[ParsedLabel], n) { + return b + } + } + for i, a := range b.add[category] { if a.Name == n { b.add[category][i].Value = v @@ -430,6 +462,7 @@ func (b *LabelsBuilder) UnsortedLabels(buf labels.Labels, categories ...LabelCat } else { buf = buf[:0] } + if categoriesContain(categories, StreamLabel) { Outer: for _, l := range b.base { @@ -439,20 +472,38 @@ func (b *LabelsBuilder) UnsortedLabels(buf labels.Labels, categories ...LabelCat continue Outer } } - // Skip stream labels which value will be replaced - for _, lbls := range b.add { - for _, la := range lbls { - if l.Name == la.Name { - continue Outer - } - } + + // Skip stream labels which value will be replaced by structured metadata + if labelsContain(b.add[StructuredMetadataLabel], l.Name) { + continue + } + + // Skip stream labels which value will be replaced by parsed labels + if labelsContain(b.add[ParsedLabel], l.Name) { + continue + } + + // Take value from stream label if present + if labelsContain(b.add[StreamLabel], l.Name) { + buf = append(buf, labels.Label{Name: l.Name, Value: b.add[StreamLabel].Get(l.Name)}) + } else { + buf = append(buf, l) + } + } + } + + if categoriesContain(categories, StructuredMetadataLabel) { + for _, l := range b.add[StructuredMetadataLabel] { + if labelsContain(b.add[ParsedLabel], l.Name) { + continue } + buf = append(buf, l) } } - for _, category := range categories { - buf = append(buf, b.add[category]...) + if categoriesContain(categories, ParsedLabel) { + buf = append(buf, b.add[ParsedLabel]...) } if (b.HasErr() || b.HasErrorDetails()) && categoriesContain(categories, ParsedLabel) { buf = b.appendErrors(buf) @@ -566,6 +617,15 @@ func flattenLabels(buf labels.Labels, many ...labels.Labels) labels.Labels { return buf } +func labelsContain(labels labels.Labels, name string) bool { + for _, l := range labels { + if l.Name == name { + return true + } + } + return false +} + func (b *BaseLabelsBuilder) toUncategorizedResult(buf labels.Labels) LabelsResult { hash := b.hasher.Hash(buf) if cached, ok := b.resultCache[hash]; ok { diff --git a/pkg/logql/log/labels_test.go b/pkg/logql/log/labels_test.go index 97c9a8899c223..7f543a48d7d82 100644 --- a/pkg/logql/log/labels_test.go +++ b/pkg/logql/log/labels_test.go @@ -1,6 +1,7 @@ package log import ( + "sort" "testing" "github.com/prometheus/prometheus/model/labels" @@ -198,6 +199,185 @@ func TestLabelsBuilder_LabelsResult(t *testing.T) { assert.Equal(t, expectedStreamLbls, actual.Stream()) assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) assert.Equal(t, expectedParsedLbls, actual.Parsed()) + + b.Reset() + b.Set(StreamLabel, "namespace", "tempo") + b.Set(StreamLabel, "bazz", "tazz") + b.Set(StructuredMetadataLabel, "bazz", "sazz") + b.Set(ParsedLabel, "ToReplace", "other") + + expectedStreamLbls = labels.FromStrings( + "namespace", "tempo", + "cluster", "us-central1", + "job", "us-central1/loki", + ) + expectedStucturedMetadataLbls = labels.FromStrings( + "bazz", "sazz", + ) + expectedParsedLbls = labels.FromStrings( + "ToReplace", "other", + ) + + expected = make(labels.Labels, 0, len(expectedStreamLbls)+len(expectedStucturedMetadataLbls)+len(expectedParsedLbls)) + expected = append(expected, expectedStreamLbls...) + expected = append(expected, expectedStucturedMetadataLbls...) + expected = append(expected, expectedParsedLbls...) + expected = labels.New(expected...) + assertLabelResult(t, expected, b.LabelsResult()) + // cached. + assertLabelResult(t, expected, b.LabelsResult()) + actual = b.LabelsResult() + assert.Equal(t, expectedStreamLbls, actual.Stream()) + assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) + assert.Equal(t, expectedParsedLbls, actual.Parsed()) +} + +func TestLabelsBuilder_Set(t *testing.T) { + strs := []string{ + "namespace", "loki", + "cluster", "us-central1", + "toreplace", "fuzz", + } + lbs := labels.FromStrings(strs...) + b := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash()) + + // test duplicating stream label with parsed label + b.Set(StructuredMetadataLabel, "stzz", "stvzz") + b.Set(ParsedLabel, "toreplace", "buzz") + expectedStreamLbls := labels.FromStrings("namespace", "loki", "cluster", "us-central1") + expectedStucturedMetadataLbls := labels.FromStrings("stzz", "stvzz") + expectedParsedLbls := labels.FromStrings("toreplace", "buzz") + + expected := make(labels.Labels, 0, len(expectedStreamLbls)+len(expectedStucturedMetadataLbls)+len(expectedParsedLbls)) + expected = append(expected, expectedStreamLbls...) + expected = append(expected, expectedStucturedMetadataLbls...) + expected = append(expected, expectedParsedLbls...) + expected = labels.New(expected...) + + actual := b.LabelsResult() + assertLabelResult(t, expected, actual) + assert.Equal(t, expectedStreamLbls, actual.Stream()) + assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) + assert.Equal(t, expectedParsedLbls, actual.Parsed()) + + b.Reset() + + // test duplicating structured metadata label with parsed label + b.Set(StructuredMetadataLabel, "stzz", "stvzz") + b.Set(StructuredMetadataLabel, "toreplace", "muzz") + b.Set(ParsedLabel, "toreplace", "buzz") + expectedStreamLbls = labels.FromStrings("namespace", "loki", "cluster", "us-central1") + expectedStucturedMetadataLbls = labels.FromStrings("stzz", "stvzz") + expectedParsedLbls = labels.FromStrings("toreplace", "buzz") + + expected = make(labels.Labels, 0, len(expectedStreamLbls)+len(expectedStucturedMetadataLbls)+len(expectedParsedLbls)) + expected = append(expected, expectedStreamLbls...) + expected = append(expected, expectedStucturedMetadataLbls...) + expected = append(expected, expectedParsedLbls...) + expected = labels.New(expected...) + + actual = b.LabelsResult() + assertLabelResult(t, expected, actual) + assert.Equal(t, expectedStreamLbls, actual.Stream()) + assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) + assert.Equal(t, expectedParsedLbls, actual.Parsed()) + + b.Reset() + + // test duplicating stream label with structured meta data label + b.Set(StructuredMetadataLabel, "toreplace", "muzz") + b.Set(ParsedLabel, "stzz", "stvzz") + expectedStreamLbls = labels.FromStrings("namespace", "loki", "cluster", "us-central1") + expectedStucturedMetadataLbls = labels.FromStrings("toreplace", "muzz") + expectedParsedLbls = labels.FromStrings("stzz", "stvzz") + + expected = make(labels.Labels, 0, len(expectedStreamLbls)+len(expectedStucturedMetadataLbls)+len(expectedParsedLbls)) + expected = append(expected, expectedStreamLbls...) + expected = append(expected, expectedStucturedMetadataLbls...) + expected = append(expected, expectedParsedLbls...) + expected = labels.New(expected...) + + actual = b.LabelsResult() + assertLabelResult(t, expected, actual) + assert.Equal(t, expectedStreamLbls, actual.Stream()) + assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) + assert.Equal(t, expectedParsedLbls, actual.Parsed()) + + b.Reset() + + // test duplicating parsed label with structured meta data label + b.Set(ParsedLabel, "toreplace", "puzz") + b.Set(StructuredMetadataLabel, "stzz", "stvzzz") + b.Set(StructuredMetadataLabel, "toreplace", "muzz") + expectedStreamLbls = labels.FromStrings("namespace", "loki", "cluster", "us-central1") + expectedStucturedMetadataLbls = labels.FromStrings("stzz", "stvzzz") + expectedParsedLbls = labels.FromStrings("toreplace", "puzz") + + expected = make(labels.Labels, 0, len(expectedStreamLbls)+len(expectedStucturedMetadataLbls)+len(expectedParsedLbls)) + expected = append(expected, expectedStreamLbls...) + expected = append(expected, expectedStucturedMetadataLbls...) + expected = append(expected, expectedParsedLbls...) + expected = labels.New(expected...) + + actual = b.LabelsResult() + assertLabelResult(t, expected, actual) + assert.Equal(t, expectedStreamLbls, actual.Stream()) + assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) + assert.Equal(t, expectedParsedLbls, actual.Parsed()) + + b.Reset() + + // test duplicating structured meta data label with stream label + b.Set(ParsedLabel, "stzz", "stvzzz") + b.Set(StructuredMetadataLabel, "toreplace", "muzz") + expectedStreamLbls = labels.FromStrings("namespace", "loki", "cluster", "us-central1") + expectedStucturedMetadataLbls = labels.FromStrings("toreplace", "muzz") + expectedParsedLbls = labels.FromStrings("stzz", "stvzzz") + + expected = make(labels.Labels, 0, len(expectedStreamLbls)+len(expectedStucturedMetadataLbls)+len(expectedParsedLbls)) + expected = append(expected, expectedStreamLbls...) + expected = append(expected, expectedStucturedMetadataLbls...) + expected = append(expected, expectedParsedLbls...) + expected = labels.New(expected...) + + actual = b.LabelsResult() + assertLabelResult(t, expected, actual) + assert.Equal(t, expectedStreamLbls, actual.Stream()) + assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) + assert.Equal(t, expectedParsedLbls, actual.Parsed()) +} + +func TestLabelsBuilder_UnsortedLabels(t *testing.T) { + strs := []string{ + "namespace", "loki", + "cluster", "us-central1", + "toreplace", "fuzz", + } + lbs := labels.FromStrings(strs...) + b := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash()) + b.add[StructuredMetadataLabel] = labels.FromStrings("toreplace", "buzz", "fzz", "bzz") + b.add[ParsedLabel] = labels.FromStrings("pzz", "pvzz") + expected := labels.FromStrings("cluster", "us-central1", "namespace", "loki", "fzz", "bzz", "toreplace", "buzz", "pzz", "pvzz") + actual := b.UnsortedLabels(nil) + require.ElementsMatch(t, expected, actual) + + b.Reset() + b.add[StructuredMetadataLabel] = labels.FromStrings("fzz", "bzz") + b.add[ParsedLabel] = labels.FromStrings("toreplace", "buzz", "pzz", "pvzz") + expected = labels.FromStrings("cluster", "us-central1", "namespace", "loki", "fzz", "bzz", "toreplace", "buzz", "pzz", "pvzz") + actual = b.UnsortedLabels(nil) + sort.Sort(expected) + sort.Sort(actual) + assert.Equal(t, expected, actual) + + b.Reset() + b.add[StructuredMetadataLabel] = labels.FromStrings("fzz", "bzz", "toreplacezz", "test") + b.add[ParsedLabel] = labels.FromStrings("toreplacezz", "buzz", "pzz", "pvzz") + expected = labels.FromStrings("cluster", "us-central1", "namespace", "loki", "fzz", "bzz", "toreplace", "fuzz", "pzz", "pvzz", "toreplacezz", "buzz") + actual = b.UnsortedLabels(nil) + sort.Sort(expected) + sort.Sort(actual) + assert.Equal(t, expected, actual) } func TestLabelsBuilder_GroupedLabelsResult(t *testing.T) { From 6354deda90a9430856447e27123b3a33fd1b77a0 Mon Sep 17 00:00:00 2001 From: George Robinson Date: Fri, 27 Sep 2024 12:13:18 +0100 Subject: [PATCH 15/20] fix: missing dep PartitionRing for Ingester (#14292) --- pkg/loki/loki.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index 20ea802f12ebb..84af0a73504f8 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -719,7 +719,7 @@ func (t *Loki) setupModuleManager() error { TenantConfigs: {RuntimeConfig}, Distributor: {Ring, Server, Overrides, TenantConfigs, PatternRingClient, PatternIngesterTee, Analytics, PartitionRing}, Store: {Overrides, IndexGatewayRing}, - Ingester: {Store, Server, MemberlistKV, TenantConfigs, Analytics}, + Ingester: {Store, Server, MemberlistKV, TenantConfigs, Analytics, PartitionRing}, Querier: {Store, Ring, Server, IngesterQuerier, PatternRingClient, Overrides, Analytics, CacheGenerationLoader, QuerySchedulerRing}, QueryFrontendTripperware: {Server, Overrides, TenantConfigs}, QueryFrontend: {QueryFrontendTripperware, Analytics, CacheGenerationLoader, QuerySchedulerRing}, From 17c472d9abea6b1cae21de5fe2af8b365bdaf137 Mon Sep 17 00:00:00 2001 From: Ashwanth Date: Fri, 27 Sep 2024 19:03:39 +0530 Subject: [PATCH 16/20] fix(sharding): apply offset to both `from` and `through` in shard request (#14256) --- pkg/querier/queryrange/shard_resolver.go | 25 +++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/pkg/querier/queryrange/shard_resolver.go b/pkg/querier/queryrange/shard_resolver.go index 31366d0a0dd76..4fe444c3bc59b 100644 --- a/pkg/querier/queryrange/shard_resolver.go +++ b/pkg/querier/queryrange/shard_resolver.go @@ -225,7 +225,10 @@ func (r *dynamicShardResolver) ShardingRanges(expr syntax.Expr, targetBytesPerSh ) { log := spanlogger.FromContext(r.ctx) - adjustedFrom := r.from + var ( + adjustedFrom = r.from + adjustedThrough model.Time + ) // NB(owen-d): there should only ever be 1 matcher group passed // to this call as we call it separately for different legs @@ -236,18 +239,30 @@ func (r *dynamicShardResolver) ShardingRanges(expr syntax.Expr, targetBytesPerSh } for _, grp := range grps { - diff := grp.Interval + grp.Offset + diff := grp.Interval // For instant queries, when start == end, // we have a default lookback which we add here - if grp.Interval == 0 { - diff = diff + r.defaultLookback + if diff == 0 { + diff = r.defaultLookback } + diff += grp.Offset + // use the oldest adjustedFrom if r.from.Add(-diff).Before(adjustedFrom) { adjustedFrom = r.from.Add(-diff) } + + // use the latest adjustedThrough + if r.through.Add(-grp.Offset).After(adjustedThrough) { + adjustedThrough = r.through.Add(-grp.Offset) + } + } + + // handle the case where there are no matchers + if adjustedThrough == 0 { + adjustedThrough = r.through } exprStr := expr.String() @@ -256,7 +271,7 @@ func (r *dynamicShardResolver) ShardingRanges(expr syntax.Expr, targetBytesPerSh // use the retry handler here to retry transient errors resp, err := r.retryNextHandler.Do(r.ctx, &logproto.ShardsRequest{ From: adjustedFrom, - Through: r.through, + Through: adjustedThrough, Query: expr.String(), TargetBytesPerShard: targetBytesPerShard, }) From a584fb713db30ec5056e6d45d1435ad38025b556 Mon Sep 17 00:00:00 2001 From: Ashwanth Date: Fri, 27 Sep 2024 19:08:40 +0530 Subject: [PATCH 17/20] chore: sync tables without acquiring read lock the whole time (#14179) --- pkg/storage/store_test.go | 1 + .../indexshipper/downloads/index_set.go | 5 ++++ .../shipper/indexshipper/downloads/table.go | 18 +++++++++++-- .../indexshipper/downloads/table_manager.go | 25 ++++++++++++++++--- .../shipper/indexshipper/downloads/util.go | 9 +++++++ 5 files changed, 53 insertions(+), 5 deletions(-) diff --git a/pkg/storage/store_test.go b/pkg/storage/store_test.go index c509783d8661f..b1493089750a9 100644 --- a/pkg/storage/store_test.go +++ b/pkg/storage/store_test.go @@ -1894,6 +1894,7 @@ func TestStore_BoltdbTsdbSameIndexPrefix(t *testing.T) { // recreate the store because boltdb-shipper now runs queriers on snapshots which are created every 1 min and during startup. store.Stop() + ResetBoltDBIndexClientsWithShipper() // there should be 2 index tables in the object storage indexTables, err := os.ReadDir(filepath.Join(cfg.FSConfig.Directory, "index")) diff --git a/pkg/storage/stores/shipper/indexshipper/downloads/index_set.go b/pkg/storage/stores/shipper/indexshipper/downloads/index_set.go index 8edd121071c5e..971dcb0fb65b3 100644 --- a/pkg/storage/stores/shipper/indexshipper/downloads/index_set.go +++ b/pkg/storage/stores/shipper/indexshipper/downloads/index_set.go @@ -283,6 +283,11 @@ func (t *indexSet) cleanupDB(fileName string) error { } func (t *indexSet) Sync(ctx context.Context) (err error) { + if !t.indexMtx.isReady() { + level.Info(t.logger).Log("msg", "skip sync since the index set is not ready") + return nil + } + return t.syncWithRetry(ctx, true, false) } diff --git a/pkg/storage/stores/shipper/indexshipper/downloads/table.go b/pkg/storage/stores/shipper/indexshipper/downloads/table.go index 1bae83c51e0e9..5b9f29c3a0c18 100644 --- a/pkg/storage/stores/shipper/indexshipper/downloads/table.go +++ b/pkg/storage/stores/shipper/indexshipper/downloads/table.go @@ -13,6 +13,7 @@ import ( "github.com/go-kit/log/level" "github.com/grafana/dskit/concurrency" "github.com/pkg/errors" + "golang.org/x/exp/maps" "golang.org/x/sync/errgroup" "github.com/grafana/loki/v3/pkg/storage/chunk/client/util" @@ -271,9 +272,22 @@ func (t *table) Sync(ctx context.Context) error { level.Debug(t.logger).Log("msg", fmt.Sprintf("syncing files for table %s", t.name)) t.indexSetsMtx.RLock() - defer t.indexSetsMtx.RUnlock() + users := maps.Keys(t.indexSets) + t.indexSetsMtx.RUnlock() + + for _, userID := range users { + if err := ctx.Err(); err != nil { + return err + } + + t.indexSetsMtx.RLock() + indexSet, ok := t.indexSets[userID] + t.indexSetsMtx.RUnlock() + + if !ok { + continue + } - for userID, indexSet := range t.indexSets { if err := indexSet.Sync(ctx); err != nil { return errors.Wrap(err, fmt.Sprintf("failed to sync index set %s for table %s", userID, t.name)) } diff --git a/pkg/storage/stores/shipper/indexshipper/downloads/table_manager.go b/pkg/storage/stores/shipper/indexshipper/downloads/table_manager.go index 6b69272593784..3b4bc4bfb3fce 100644 --- a/pkg/storage/stores/shipper/indexshipper/downloads/table_manager.go +++ b/pkg/storage/stores/shipper/indexshipper/downloads/table_manager.go @@ -14,6 +14,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "golang.org/x/exp/maps" "github.com/grafana/loki/v3/pkg/compactor/deletion" "github.com/grafana/loki/v3/pkg/storage/chunk/client/util" @@ -180,6 +181,10 @@ func (tm *tableManager) ForEach(ctx context.Context, tableName, userID string, c } func (tm *tableManager) getOrCreateTable(tableName string) (Table, error) { + if tm.ctx.Err() != nil { + return nil, errors.New("table manager is stopping") + } + // if table is already there, use it. start := time.Now() tm.tablesMtx.RLock() @@ -214,7 +219,8 @@ func (tm *tableManager) getOrCreateTable(tableName string) (Table, error) { func (tm *tableManager) syncTables(ctx context.Context) error { tm.tablesMtx.RLock() - defer tm.tablesMtx.RUnlock() + tables := maps.Keys(tm.tables) + tm.tablesMtx.RUnlock() start := time.Now() var err error @@ -231,11 +237,24 @@ func (tm *tableManager) syncTables(ctx context.Context) error { level.Info(tm.logger).Log("msg", "syncing tables") - for name, table := range tm.tables { + for _, name := range tables { + if err := ctx.Err(); err != nil { + return err + } + level.Debug(tm.logger).Log("msg", "syncing table", "table", name) start := time.Now() + + tm.tablesMtx.RLock() + table, ok := tm.tables[name] + tm.tablesMtx.RUnlock() + + if !ok { + continue + } + err := table.Sync(ctx) - duration := float64(time.Since(start)) + duration := time.Since(start).Seconds() if err != nil { tm.metrics.tableSyncLatency.WithLabelValues(name, statusFailure).Observe(duration) return errors.Wrapf(err, "failed to sync table '%s'", name) diff --git a/pkg/storage/stores/shipper/indexshipper/downloads/util.go b/pkg/storage/stores/shipper/indexshipper/downloads/util.go index 457f76b3433d7..4c5fcfee1674d 100644 --- a/pkg/storage/stores/shipper/indexshipper/downloads/util.go +++ b/pkg/storage/stores/shipper/indexshipper/downloads/util.go @@ -23,6 +23,15 @@ func (m *mtxWithReadiness) markReady() { close(m.ready) } +func (m *mtxWithReadiness) isReady() bool { + select { + case <-m.ready: + return true + default: + return false + } +} + func (m *mtxWithReadiness) awaitReady(ctx context.Context) error { ctx, cancel := context.WithTimeoutCause(ctx, 30*time.Second, errors.New("exceeded 30 seconds in awaitReady")) defer cancel() From 59ff1ece1dacc461d03f71e41c0728396727eee6 Mon Sep 17 00:00:00 2001 From: Robert Jacob Date: Fri, 27 Sep 2024 12:30:10 -0400 Subject: [PATCH 18/20] fix(config): Copy Alibaba and IBM object storage configuration from common (#14297) --- pkg/loki/config_wrapper.go | 20 ++++++ pkg/loki/config_wrapper_test.go | 118 ++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) diff --git a/pkg/loki/config_wrapper.go b/pkg/loki/config_wrapper.go index cbc6ff44b0442..16d25c1ff5e89 100644 --- a/pkg/loki/config_wrapper.go +++ b/pkg/loki/config_wrapper.go @@ -540,6 +540,26 @@ func applyStorageConfig(cfg, defaults *ConfigWrapper) error { } } + if !reflect.DeepEqual(cfg.Common.Storage.AlibabaCloud, defaults.StorageConfig.AlibabaStorageConfig) { + configsFound++ + + applyConfig = func(r *ConfigWrapper) { + r.Ruler.StoreConfig.Type = "alibaba" + r.Ruler.StoreConfig.AlibabaCloud = r.Common.Storage.AlibabaCloud + r.StorageConfig.AlibabaStorageConfig = r.Common.Storage.AlibabaCloud + } + } + + if !reflect.DeepEqual(cfg.Common.Storage.COS, defaults.StorageConfig.COSConfig) { + configsFound++ + + applyConfig = func(r *ConfigWrapper) { + r.Ruler.StoreConfig.Type = "cos" + r.Ruler.StoreConfig.COS = r.Common.Storage.COS + r.StorageConfig.COSConfig = r.Common.Storage.COS + } + } + if !reflect.DeepEqual(cfg.Common.Storage.CongestionControl, defaults.StorageConfig.CongestionControl) { applyConfig = func(r *ConfigWrapper) { r.StorageConfig.CongestionControl = r.Common.Storage.CongestionControl diff --git a/pkg/loki/config_wrapper_test.go b/pkg/loki/config_wrapper_test.go index e8894d6329b7c..5e1ad00bec501 100644 --- a/pkg/loki/config_wrapper_test.go +++ b/pkg/loki/config_wrapper_test.go @@ -219,12 +219,16 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) assert.EqualValues(t, defaults.StorageConfig.AWSStorageConfig, config.StorageConfig.AWSStorageConfig) assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) }) t.Run("when multiple configs are provided, an error is returned", func(t *testing.T) { @@ -296,12 +300,17 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) + // should remain empty assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) }) t.Run("when common s3 storage config is provided (with session token), ruler and storage config are defaulted to use it", func(t *testing.T) { @@ -356,12 +365,17 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) + // should remain empty assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) }) t.Run("when common gcs storage config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { @@ -392,12 +406,17 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) + // should remain empty assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) assert.EqualValues(t, defaults.StorageConfig.AWSStorageConfig.S3Config, config.StorageConfig.AWSStorageConfig.S3Config) assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) }) t.Run("when common azure storage config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { @@ -444,6 +463,8 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) // should remain empty assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) @@ -451,6 +472,8 @@ memberlist: assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) }) t.Run("when common bos storage config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { @@ -482,6 +505,8 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.S3, config.Ruler.StoreConfig.S3) assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) // should remain empty assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) @@ -489,6 +514,8 @@ memberlist: assert.EqualValues(t, defaults.StorageConfig.AWSStorageConfig.S3Config, config.StorageConfig.AWSStorageConfig.S3Config) assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) }) t.Run("when common swift storage config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { @@ -549,12 +576,103 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.Azure, config.Ruler.StoreConfig.Azure) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) + + // should remain empty + assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) + assert.EqualValues(t, defaults.StorageConfig.AWSStorageConfig.S3Config, config.StorageConfig.AWSStorageConfig.S3Config) + assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) + assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) + }) + + t.Run("when common alibaba storage config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { + configInput := `common: + storage: + alibabacloud: + bucket: testbucket + endpoint: https://example.com + access_key_id: abc123 + secret_access_key: def789` + + config, defaults := testContext(configInput, nil) + + assert.Equal(t, "alibaba", config.Ruler.StoreConfig.Type) + + for _, actual := range []alibaba.OssConfig{ + config.Ruler.StoreConfig.AlibabaCloud, + config.StorageConfig.AlibabaStorageConfig, + } { + assert.Equal(t, "testbucket", actual.Bucket) + assert.Equal(t, "https://example.com", actual.Endpoint) + assert.Equal(t, "abc123", actual.AccessKeyID) + assert.Equal(t, "def789", actual.SecretAccessKey) + } + + // should remain empty + assert.EqualValues(t, defaults.Ruler.StoreConfig.GCS, config.Ruler.StoreConfig.GCS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.S3, config.Ruler.StoreConfig.S3) + assert.EqualValues(t, defaults.Ruler.StoreConfig.Azure, config.Ruler.StoreConfig.Azure) + assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) + assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) + assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) + + // should remain empty + assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) + assert.EqualValues(t, defaults.StorageConfig.AWSStorageConfig.S3Config, config.StorageConfig.AWSStorageConfig.S3Config) + assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) + assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) + assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) + }) + + t.Run("when common cos storage config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { + configInput := `common: + storage: + cos: + bucketnames: testbucket + endpoint: https://example.com + region: test-region + access_key_id: abc123 + secret_access_key: def789` + + config, defaults := testContext(configInput, nil) + + assert.Equal(t, "cos", config.Ruler.StoreConfig.Type) + + for _, actual := range []ibmcloud.COSConfig{ + config.Ruler.StoreConfig.COS, + config.StorageConfig.COSConfig, + } { + assert.Equal(t, "testbucket", actual.BucketNames) + assert.Equal(t, "https://example.com", actual.Endpoint) + assert.Equal(t, "test-region", actual.Region) + assert.Equal(t, "abc123", actual.AccessKeyID) + assert.Equal(t, flagext.SecretWithValue("def789"), actual.SecretAccessKey) + } + + // should remain empty + assert.EqualValues(t, defaults.Ruler.StoreConfig.GCS, config.Ruler.StoreConfig.GCS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.S3, config.Ruler.StoreConfig.S3) + assert.EqualValues(t, defaults.Ruler.StoreConfig.Azure, config.Ruler.StoreConfig.Azure) + assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) + assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) + assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + // should remain empty assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) assert.EqualValues(t, defaults.StorageConfig.AWSStorageConfig.S3Config, config.StorageConfig.AWSStorageConfig.S3Config) assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) }) t.Run("when common filesystem/local config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { From 9267ee3561ccbb90589600d7b045f7e05b1b2ee0 Mon Sep 17 00:00:00 2001 From: Emad Mohamadi <33651341+emadolsky@users.noreply.github.com> Date: Fri, 27 Sep 2024 18:42:26 +0200 Subject: [PATCH 19/20] fix(canary): Reconnect immediately upon tail max duration (#14287) --- pkg/canary/reader/reader.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/canary/reader/reader.go b/pkg/canary/reader/reader.go index 88af34ce8e759..c98a7cab8fd78 100644 --- a/pkg/canary/reader/reader.go +++ b/pkg/canary/reader/reader.go @@ -390,6 +390,14 @@ func (r *Reader) run() { // or times out based on the above SetReadDeadline call. err := unmarshal.ReadTailResponseJSON(tailResponse, r.conn) if err != nil { + var e *websocket.CloseError + if errors.As(err, &e) && e.Text == "reached tail max duration limit" { + fmt.Fprintf(r.w, "tail max duration limit exceeded, will retry immediately: %s\n", err) + + r.closeAndReconnect() + continue + } + reason := "error reading websocket" if e, ok := err.(net.Error); ok && e.Timeout() { reason = fmt.Sprintf("timeout tailing new logs (timeout period: %.2fs)", timeoutInterval.Seconds()) From 39119c0c9fbcaf3d7f186ebcbd6fb69b6a254bff Mon Sep 17 00:00:00 2001 From: Trevor Whitney Date: Fri, 27 Sep 2024 13:35:56 -0600 Subject: [PATCH 20/20] chore: refactor detected fields handler (#14288) Co-authored-by: Paul Rogers --- pkg/querier/queryrange/detected_fields.go | 90 ++++++++++--------- .../queryrange/detected_fields_test.go | 25 ++---- pkg/querier/queryrange/roundtrip.go | 3 +- 3 files changed, 53 insertions(+), 65 deletions(-) diff --git a/pkg/querier/queryrange/detected_fields.go b/pkg/querier/queryrange/detected_fields.go index 9c1ecd0c8a8af..115ba9601573c 100644 --- a/pkg/querier/queryrange/detected_fields.go +++ b/pkg/querier/queryrange/detected_fields.go @@ -27,55 +27,59 @@ func NewDetectedFieldsHandler( limitedHandler base.Handler, logHandler base.Handler, limits Limits, -) base.Middleware { - return base.MiddlewareFunc(func(next base.Handler) base.Handler { - return base.HandlerFunc( - func(ctx context.Context, req base.Request) (base.Response, error) { - r, ok := req.(*DetectedFieldsRequest) - if !ok { - return nil, httpgrpc.Errorf( - http.StatusBadRequest, - "invalid request type, expected *DetectedFieldsRequest", - ) - } +) base.Handler { + return base.HandlerFunc( + func(ctx context.Context, req base.Request) (base.Response, error) { + r, ok := req.(*DetectedFieldsRequest) + if !ok { + return nil, httpgrpc.Errorf( + http.StatusBadRequest, + "invalid request type, expected *DetectedFieldsRequest", + ) + } - resp, err := makeDownstreamRequest(ctx, limits, limitedHandler, logHandler, r) - if err != nil { - return nil, err - } + resp, err := makeDownstreamRequest(ctx, limits, limitedHandler, logHandler, r) + if err != nil { + return nil, err + } - re, ok := resp.(*LokiResponse) - if !ok || re.Status != "success" { - return resp, nil + re, ok := resp.(*LokiResponse) + if !ok || re.Status != "success" { + return resp, nil + } + + detectedFields := parseDetectedFields(r.FieldLimit, re.Data.Result) + fields := make([]*logproto.DetectedField, len(detectedFields)) + fieldCount := 0 + for k, v := range detectedFields { + p := v.parsers + if len(p) == 0 { + p = nil + } + fields[fieldCount] = &logproto.DetectedField{ + Label: k, + Type: v.fieldType, + Cardinality: v.Estimate(), + Parsers: p, } - detectedFields := parseDetectedFields(r.FieldLimit, re.Data.Result) - fields := make([]*logproto.DetectedField, len(detectedFields)) - fieldCount := 0 - for k, v := range detectedFields { - p := v.parsers - if len(p) == 0 { - p = nil - } - fields[fieldCount] = &logproto.DetectedField{ - Label: k, - Type: v.fieldType, - Cardinality: v.Estimate(), - Parsers: p, - } + fieldCount++ + } - fieldCount++ - } + dfResp := DetectedFieldsResponse{ + Response: &logproto.DetectedFieldsResponse{ + Fields: fields, + }, + Headers: re.Headers, + } + + // Otherwise all they get is the field limit, which is a bit confusing + if len(fields) > 0 { + dfResp.Response.FieldLimit = r.GetFieldLimit() + } - return &DetectedFieldsResponse{ - Response: &logproto.DetectedFieldsResponse{ - Fields: fields, - FieldLimit: r.GetFieldLimit(), - }, - Headers: re.Headers, - }, nil - }) - }) + return &dfResp, nil + }) } func makeDownstreamRequest( diff --git a/pkg/querier/queryrange/detected_fields_test.go b/pkg/querier/queryrange/detected_fields_test.go index b82f3a4a70de1..654a42ac8d00a 100644 --- a/pkg/querier/queryrange/detected_fields_test.go +++ b/pkg/querier/queryrange/detected_fields_test.go @@ -1028,10 +1028,7 @@ func TestQuerier_DetectedFields(t *testing.T) { limitedHandler(mockLogfmtStreamWithLabels(1, 5, `{type="test", name="foo"}`)), logHandler(mockLogfmtStreamWithLabels(1, 5, `{type="test", name="foo"}`)), limits, - ).Wrap(base.HandlerFunc(func(_ context.Context, _ base.Request) (base.Response, error) { - t.Fatal("should not be called") - return nil, nil - })) + ) detectedFields := handleRequest(handler, request) // log lines come from querier_mock_test.go @@ -1058,10 +1055,7 @@ func TestQuerier_DetectedFields(t *testing.T) { limitedHandler(mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 5, `{type="test", name="bob"}`)), logHandler(mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 5, `{type="test", name="bob"}`)), limits, - ).Wrap(base.HandlerFunc(func(_ context.Context, _ base.Request) (base.Response, error) { - t.Fatal("should not be called") - return nil, nil - })) + ) detectedFields := handleRequest(handler, request) // log lines come from querier_mock_test.go @@ -1090,10 +1084,7 @@ func TestQuerier_DetectedFields(t *testing.T) { limitedHandler(mockLogfmtStreamWithLabels(1, 2, `{type="test", name="foo"}`)), logHandler(mockLogfmtStreamWithLabels(1, 2, `{type="test", name="foo"}`)), limits, - ).Wrap(base.HandlerFunc(func(_ context.Context, _ base.Request) (base.Response, error) { - t.Fatal("should not be called") - return nil, nil - })) + ) detectedFields := handleRequest(handler, request) // log lines come from querier_mock_test.go @@ -1136,10 +1127,7 @@ func TestQuerier_DetectedFields(t *testing.T) { ), logHandler(mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 2, `{type="test"}`)), limits, - ).Wrap(base.HandlerFunc(func(_ context.Context, _ base.Request) (base.Response, error) { - t.Fatal("should not be called") - return nil, nil - })) + ) detectedFields := handleRequest(handler, request) // log lines come from querier_mock_test.go @@ -1188,10 +1176,7 @@ func TestQuerier_DetectedFields(t *testing.T) { ), logHandler(mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 2, `{type="test", name="bob"}`)), limits, - ).Wrap(base.HandlerFunc(func(_ context.Context, _ base.Request) (base.Response, error) { - t.Fatal("should not be called") - return nil, nil - })) + ) detectedFields := handleRequest(handler, request) // log lines come from querier_mock_test.go diff --git a/pkg/querier/queryrange/roundtrip.go b/pkg/querier/queryrange/roundtrip.go index 8e1c6a04948da..8f80d94606886 100644 --- a/pkg/querier/queryrange/roundtrip.go +++ b/pkg/querier/queryrange/roundtrip.go @@ -1222,7 +1222,6 @@ func NewDetectedFieldsTripperware( limitedHandler := limitedTripperware.Wrap(next) logHandler := logTripperware.Wrap(next) - detectedFieldsHandler := NewDetectedFieldsHandler(limitedHandler, logHandler, limits) - return NewLimitedRoundTripper(next, limits, schema.Configs, detectedFieldsHandler) + return NewDetectedFieldsHandler(limitedHandler, logHandler, limits) }), nil }