diff --git a/.drone/drone.jsonnet b/.drone/drone.jsonnet index 49f67f06861a3..9351f2f693c06 100644 --- a/.drone/drone.jsonnet +++ b/.drone/drone.jsonnet @@ -610,23 +610,6 @@ local build_image_tag = '0.33.0'; 'cd -', ]) { depends_on: ['clone'], when: onPRs }, make('test', container=false) { depends_on: ['clone-target-branch', 'check-generated-files'] }, - run('test-target-branch', commands=['cd ../loki-target-branch && BUILD_IN_CONTAINER=false make test']) { depends_on: ['clone-target-branch'], when: onPRs }, - make('compare-coverage', container=false, args=[ - 'old=../loki-target-branch/test_results.txt', - 'new=test_results.txt', - 'packages=ingester,distributor,querier,querier/queryrange,iter,storage,chunkenc,logql,loki', - '> diff.txt', - ]) { depends_on: ['test', 'test-target-branch'], when: onPRs }, - run('report-coverage', commands=[ - "total_diff=$(sed 's/%//' diff.txt | awk '{sum+=$3;}END{print sum;}')", - 'if [ $total_diff = 0 ]; then exit 0; fi', - "pull=$(echo $CI_COMMIT_REF | awk -F '/' '{print $3}')", - "body=$(jq -Rs '{body: . }' diff.txt)", - 'curl -X POST -u $USER:$TOKEN -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/grafana/loki/issues/$pull/comments -d "$body" > /dev/null', - ], env={ - USER: 'grafanabot', - TOKEN: { from_secret: github_secret.name }, - }) { depends_on: ['compare-coverage'], when: onPRs }, make('lint', container=false) { depends_on: ['check-generated-files'] }, make('check-mod', container=false) { depends_on: ['test', 'lint'] }, { diff --git a/.drone/drone.yml b/.drone/drone.yml index 7a62b621262a8..c33a66998e71c 100644 --- a/.drone/drone.yml +++ b/.drone/drone.yml @@ -212,47 +212,6 @@ steps: environment: {} image: grafana/loki-build-image:0.33.0 name: test -- commands: - - cd ../loki-target-branch && BUILD_IN_CONTAINER=false make test - depends_on: - - clone-target-branch - environment: {} - image: grafana/loki-build-image:0.33.0 - name: test-target-branch - when: - event: - - pull_request -- commands: - - make BUILD_IN_CONTAINER=false compare-coverage old=../loki-target-branch/test_results.txt - new=test_results.txt packages=ingester,distributor,querier,querier/queryrange,iter,storage,chunkenc,logql,loki - > diff.txt - depends_on: - - test - - test-target-branch - environment: {} - image: grafana/loki-build-image:0.33.0 - name: compare-coverage - when: - event: - - pull_request -- commands: - - total_diff=$(sed 's/%//' diff.txt | awk '{sum+=$3;}END{print sum;}') - - if [ $total_diff = 0 ]; then exit 0; fi - - pull=$(echo $CI_COMMIT_REF | awk -F '/' '{print $3}') - - 'body=$(jq -Rs ''{body: . }'' diff.txt)' - - 'curl -X POST -u $USER:$TOKEN -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/grafana/loki/issues/$pull/comments - -d "$body" > /dev/null' - depends_on: - - compare-coverage - environment: - TOKEN: - from_secret: github_token - USER: grafanabot - image: grafana/loki-build-image:0.33.0 - name: report-coverage - when: - event: - - pull_request - commands: - make BUILD_IN_CONTAINER=false lint depends_on: @@ -2113,6 +2072,6 @@ kind: secret name: gpg_private_key --- kind: signature -hmac: 457592d17208477ceb480f81dbdb88f7b95a5ad015c88d9d6fed06c2422a52f9 +hmac: 51861919f0ba5370a152bdb9267828c742f2042819fb01388c6d23bf44e3cbb7 ... diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e5adea5d2a02..8abd9a846458b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ ##### Enhancements +* [11851](https://github.com/grafana/loki/pull/11851) **elcomtik**: Helm: Allow the definition of resources for GrafanaAgent pods. +* [11819](https://github.com/grafana/loki/pull/11819) **jburnham**: Ruler: Add the ability to disable the `X-Scope-OrgId` tenant identification header in remote write requests. * [11633](https://github.com/grafana/loki/pull/11633) **cyriltovena**: Add profiling integrations to tracing instrumentation. * [11571](https://github.com/grafana/loki/pull/11571) **MichelHollands**: Add a metrics.go log line for requests from querier to ingester * [11477](https://github.com/grafana/loki/pull/11477) **MichelHollands**: support GET for /ingester/shutdown @@ -54,6 +56,8 @@ * [11143](https://github.com/grafana/loki/pull/11143) **sandeepsukhani** otel: Add support for per tenant configuration for mapping otlp data to loki format * [11499](https://github.com/grafana/loki/pull/11284) **jmichalek132** Config: Adds `frontend.log-query-request-headers` to enable logging of request headers in query logs. * [11817](https://github.com/grafana/loki/pull/11817) **ashwanthgoli** Ruler: Add support for filtering results of `/prometheus/api/v1/rules` endpoint by rule_name, rule_group, file and type. +* [11897](https://github.com/grafana/loki/pull/11897) **ashwanthgoli** Metadata: Introduces a separate split interval of `split_recent_metadata_queries_by_interval` for `recent_metadata_query_window` to help with caching recent metadata query results. +* [11970](https://github.com/grafana/loki/pull/11897) **masslessparticle** Ksonnet: Introduces memory limits to the compactor configuration to avoid unbounded memory usage. ##### Fixes * [11074](https://github.com/grafana/loki/pull/11074) **hainenber** Fix panic in lambda-promtail due to mishandling of empty DROP_LABELS env var. @@ -66,6 +70,7 @@ * [11657](https://github.com/grafana/loki/pull/11657) **ashwanthgoli** Log results cache: compose empty response based on the request being served to avoid returning incorrect limit or direction. * [11587](https://github.com/grafana/loki/pull/11587) **trevorwhitney** Fix semantics of label parsing logic of metrics and logs queries. Both only parse the first label if multiple extractions into the same label are requested. * [11776](https://github.com/grafana/loki/pull/11776) **ashwanthgoli** Background Cache: Fixes a bug that is causing the background queue size to be incremented twice for each enqueued item. +* [11921](https://github.com/grafana/loki/pull/11921) **paul1r**: Parsing: String array elements were not being parsed correctly in JSON processing ##### Changes diff --git a/clients/cmd/fluent-bit/Dockerfile b/clients/cmd/fluent-bit/Dockerfile index 563614a75f52e..f0dfbc90c36a3 100644 --- a/clients/cmd/fluent-bit/Dockerfile +++ b/clients/cmd/fluent-bit/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.21.3-bullseye AS builder +FROM golang:1.22.0-bullseye AS builder COPY . /src diff --git a/clients/pkg/promtail/targets/cloudflare/target.go b/clients/pkg/promtail/targets/cloudflare/target.go index b64e33da4bc29..19d1f18758273 100644 --- a/clients/pkg/promtail/targets/cloudflare/target.go +++ b/clients/pkg/promtail/targets/cloudflare/target.go @@ -8,13 +8,13 @@ import ( "sync" "time" - "github.com/buger/jsonparser" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/cloudflare-go" "github.com/grafana/dskit/backoff" "github.com/grafana/dskit/concurrency" "github.com/grafana/dskit/multierror" + "github.com/grafana/jsonparser" "github.com/prometheus/common/model" "go.uber.org/atomic" diff --git a/docs/sources/configure/_index.md b/docs/sources/configure/_index.md index b675f85157423..d3c5593b4da23 100644 --- a/docs/sources/configure/_index.md +++ b/docs/sources/configure/_index.md @@ -1274,6 +1274,10 @@ remote_write: # CLI flag: -ruler.remote-write.config-refresh-period [config_refresh_period: | default = 10s] + # Add X-Scope-OrgID header in remote write requests. + # CLI flag: -ruler.remote-write.add-org-id-header + [add_org_id_header: | default = true] + # Configuration for rule evaluation. evaluation: # The evaluation mode for the ruler. Can be either 'local' or 'remote'. If set @@ -2327,27 +2331,26 @@ bloom_shipper: [max_tasks_enqueued_per_tenant: | default = 10000] blocks_cache: - # Whether embedded cache is enabled. - # CLI flag: -blocks-cache.enabled + # Cache for bloom blocks. Whether embedded cache is enabled. + # CLI flag: -bloom.blocks-cache.enabled [enabled: | default = false] - # Maximum memory size of the cache in MB. - # CLI flag: -blocks-cache.max-size-mb + # Cache for bloom blocks. Maximum memory size of the cache in MB. + # CLI flag: -bloom.blocks-cache.max-size-mb [max_size_mb: | default = 100] - # Maximum number of entries in the cache. - # CLI flag: -blocks-cache.max-size-items + # Cache for bloom blocks. Maximum number of entries in the cache. + # CLI flag: -bloom.blocks-cache.max-size-items [max_size_items: | default = 0] - # The time to live for items in the cache before they get purged. - # CLI flag: -blocks-cache.ttl - [ttl: | default = 0s] + # Cache for bloom blocks. The time to live for items in the cache before + # they get purged. + # CLI flag: -bloom.blocks-cache.ttl + [ttl: | default = 24h] - # During this period the process waits until the directory becomes not used - # and only after this it will be deleted. If the timeout is reached, the - # directory is force deleted. - # CLI flag: -blocks-cache.remove-directory-graceful-period - [remove_directory_graceful_period: | default = 5m] + # The cache block configures the cache backend. + # The CLI flags prefix for this block configuration is: bloom.metas-cache + [metas_cache: ] ``` ### chunk_store_config @@ -2642,14 +2645,27 @@ ring: # CLI flag: -bloom-compactor.enabled [enabled: | default = false] -# Directory where files can be downloaded for compaction. -# CLI flag: -bloom-compactor.working-directory -[working_directory: | default = ""] - # Interval at which to re-run the compaction operation. # CLI flag: -bloom-compactor.compaction-interval [compaction_interval: | default = 10m] +# How many index periods (days) to wait before compacting a table. This can be +# used to lower cost by not re-writing data to object storage too frequently +# since recent data changes more often. +# CLI flag: -bloom-compactor.min-table-compaction-period +[min_table_compaction_period: | default = 1] + +# How many index periods (days) to wait before compacting a table. This can be +# used to lower cost by not trying to compact older data which doesn't change. +# This can be optimized by aligning it with the maximum +# `reject_old_samples_max_age` setting of any tenant. +# CLI flag: -bloom-compactor.max-table-compaction-period +[max_table_compaction_period: | default = 7] + +# Number of workers to run in parallel for compaction. +# CLI flag: -bloom-compactor.worker-parallelism +[worker_parallelism: | default = 1] + # Minimum backoff time between retries. # CLI flag: -bloom-compactor.compaction-retries-min-backoff [compaction_retries_min_backoff: | default = 10s] @@ -2895,6 +2911,30 @@ The `limits_config` block configures global and per-tenant limits in Loki. # CLI flag: -querier.split-metadata-queries-by-interval [split_metadata_queries_by_interval: | default = 1d] +# Experimental. Split interval to use for the portion of metadata request that +# falls within `recent_metadata_query_window`. Rest of the request which is +# outside the window still uses `split_metadata_queries_by_interval`. If set to +# 0, the entire request defaults to using a split interval of +# `split_metadata_queries_by_interval.`. +# CLI flag: -experimental.querier.split-recent-metadata-queries-by-interval +[split_recent_metadata_queries_by_interval: | default = 1h] + +# Experimental. Metadata query window inside which +# `split_recent_metadata_queries_by_interval` gets applied, portion of the +# metadata request that falls in this window is split using +# `split_recent_metadata_queries_by_interval`. The value 0 disables using a +# different split interval for recent metadata queries. +# +# This is added to improve cacheability of recent metadata queries. Query split +# interval also determines the interval used in cache key. The default split +# interval of 24h is useful for caching long queries, each cache key holding 1 +# day's results. But metadata queries are often shorter than 24h, to cache them +# effectively we need a smaller split interval. `recent_metadata_query_window` +# along with `split_recent_metadata_queries_by_interval` help configure a +# shorter split interval for recent metadata queries. +# CLI flag: -experimental.querier.recent-metadata-query-window +[recent_metadata_query_window: | default = 0s] + # Interval to use for time-based splitting when a request is within the # `query_ingesters_within` window; defaults to `split-queries-by-interval` by # setting to 0. @@ -3115,7 +3155,7 @@ shard_streams: # Skip factor for the n-grams created when computing blooms from log lines. # CLI flag: -bloom-compactor.ngram-skip -[bloom_ngram_skip: | default = 0] +[bloom_ngram_skip: | default = 1] # Scalable Bloom Filter desired false-positive rate. # CLI flag: -bloom-compactor.false-positive-rate @@ -3129,6 +3169,12 @@ shard_streams: # CLI flag: -bloom-gateway.cache-key-interval [bloom_gateway_cache_key_interval: | default = 15m] +# The maximum bloom block size. A value of 0 sets an unlimited size. Default is +# 200MB. The actual block size might exceed this limit since blooms will be +# added to blocks until the block exceeds the maximum block size. +# CLI flag: -bloom-compactor.max-block-size +[bloom_compactor_max_block_size: | default = 200MB] + # Allow user to send structured metadata in push payload. # CLI flag: -validation.allow-structured-metadata [allow_structured_metadata: | default = false] @@ -4354,6 +4400,7 @@ The TLS configuration. The cache block configures the cache backend. The supported CLI flags `` used to reference this configuration block are: - `bloom-gateway-client.cache` +- `bloom.metas-cache` - `frontend` - `frontend.index-stats-results-cache` - `frontend.label-results-cache` diff --git a/docs/sources/release-notes/cadence.md b/docs/sources/release-notes/cadence.md index f13781cf1c5f3..ef6fbcaf072fd 100644 --- a/docs/sources/release-notes/cadence.md +++ b/docs/sources/release-notes/cadence.md @@ -8,7 +8,7 @@ weight: 1 ## Stable Releases -Loki releases (this includes [Promtail](/clients/promtail), [Loki Canary](/operations/loki-canary/), etc) use the following +Loki releases (this includes [Promtail](https://grafana.com/docs/loki//send-data/promtail/), [Loki Canary](https://grafana.com/docs/loki//operations/loki-canary/), etc.) use the following naming scheme: `MAJOR`.`MINOR`.`PATCH`. - `MAJOR` (roughly once a year): these releases include large new features and possible backwards-compatibility breaks. @@ -18,14 +18,14 @@ naming scheme: `MAJOR`.`MINOR`.`PATCH`. {{% admonition type="note" %}} While our naming scheme resembles [Semantic Versioning](https://semver.org/), at this time we do not strictly follow its guidelines to the letter. Our goal is to provide regular releases that are as stable as possible, and we take backwards-compatibility -seriously. As with any software, always read the [release notes](/release-notes) and the [upgrade guide](/upgrading) whenever +seriously. As with any software, always read the [release notes](https://grafana.com/docs/loki//release-notes/) and the [upgrade guide](https://grafana.com/docs/loki//setup/upgrade/) whenever choosing a new version of Loki to install. {{% /admonition %}} New releases are based of a [weekly release](#weekly-releases) which we have vetted for stability over a number of weeks. We strongly recommend keeping up-to-date with patch releases as they are released. We post updates of new releases in the `#loki` channel -of our [Slack community](/community/getting-in-touch). +of our [Slack community](https://grafana.com/docs/loki//community/getting-in-touch/). You can find all of our releases [on GitHub](https://github.com/grafana/loki/releases) and on [Docker Hub](https://hub.docker.com/r/grafana/loki). diff --git a/docs/sources/release-notes/next.md b/docs/sources/release-notes/next.md index a2a6e81330082..1aadfcba4db19 100644 --- a/docs/sources/release-notes/next.md +++ b/docs/sources/release-notes/next.md @@ -1,16 +1,18 @@ ---- -title: V?.? -description: Version ?.? release notes -weight: 55 ---- - -# V?.? -Grafana Labs is excited to announce the release of Loki ?.?.? Here's a summary of new enhancements and important fixes: - -:warning: This a placeholder for the next release. Clean up all features listed below - -## Features and enhancements - -## Upgrade Considerations - -## Bug fixes +--- +title: V?.? +description: Version ?.? release notes +weight: 55 +--- + +# V?.? +Grafana Labs is excited to announce the release of Loki ?.?.? Here's a summary of new enhancements and important fixes: + +:warning: This a placeholder for the next release. Clean up all features listed below + +## Features and enhancements + +## Upgrade Considerations + +## Bug fixes + +- **Parse JSON String arrays properly so string elements can be retrieved**: [PR #11921](https://github.com/grafana/loki/pull/11921)] \ No newline at end of file diff --git a/docs/sources/setup/install/helm/reference.md b/docs/sources/setup/install/helm/reference.md index e687a560ef715..e7dbfdbdd3f65 100644 --- a/docs/sources/setup/install/helm/reference.md +++ b/docs/sources/setup/install/helm/reference.md @@ -2806,6 +2806,15 @@ true
 null
 
+ + + + monitoring.selfMonitoring.grafanaAgent.resources + object + Resource requests and limits for the grafanaAgent pods +
+{}
+
diff --git a/docs/variables.mk b/docs/variables.mk index afa0a9e867366..1ec7dbab57677 100644 --- a/docs/variables.mk +++ b/docs/variables.mk @@ -1,8 +1,5 @@ # List of projects to provide to the make-docs script. PROJECTS := loki -# Use alternative image until make-docs 3.0.0 is rolled out. -export DOCS_IMAGE := grafana/docs-base:dbd975af06 - # Set the DOC_VALIDATOR_IMAGE to match the one defined in CI. export DOC_VALIDATOR_IMAGE := $(shell sed -En 's, *image: "(grafana/doc-validator.*)",\1,p' "$(shell git rev-parse --show-toplevel)/.github/workflows/doc-validator.yml") diff --git a/go.mod b/go.mod index 87ea0fd075852..6235582406d50 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,6 @@ require ( github.com/aws/aws-sdk-go v1.44.321 github.com/baidubce/bce-sdk-go v0.9.141 github.com/bmatcuk/doublestar v1.3.4 - github.com/buger/jsonparser v1.1.1 github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b github.com/cespare/xxhash v1.1.0 github.com/cespare/xxhash/v2 v2.2.0 @@ -124,6 +123,7 @@ require ( github.com/efficientgo/core v1.0.0-rc.2 github.com/fsnotify/fsnotify v1.6.0 github.com/gogo/googleapis v1.4.0 + github.com/grafana/jsonparser v0.0.0-20240209175146-098958973a2d github.com/grafana/loki/pkg/push v0.0.0-20231124142027-e52380921608 github.com/heroku/x v0.0.61 github.com/influxdata/tdigest v0.0.2-0.20210216194612-fc98d27c9e8b diff --git a/go.sum b/go.sum index dd756d74f7c69..8ab729e928055 100644 --- a/go.sum +++ b/go.sum @@ -390,8 +390,6 @@ github.com/bmatcuk/doublestar v1.3.4/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9 github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= -github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= -github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b h1:6+ZFm0flnudZzdSE0JxlhR2hKnGPcNB35BjQf4RYQDY= github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= github.com/caddyserver/caddy v1.0.4/go.mod h1:uruyfVsyMcDb3IOzSKsi1x0wOjy1my/PxOSTcD+24jM= @@ -1003,6 +1001,8 @@ github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85 h1:xLuzPoOzdfNb/RF/I github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85/go.mod h1:crI9WX6p0IhrqB+DqIUHulRW853PaNFf7o4UprV//3I= github.com/grafana/gomemcache v0.0.0-20231204155601-7de47a8c3cb0 h1:aLBiDMjTtXx2800iCIp+8kdjIlvGX0MF/zICQMQO2qU= github.com/grafana/gomemcache v0.0.0-20231204155601-7de47a8c3cb0/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU= +github.com/grafana/jsonparser v0.0.0-20240209175146-098958973a2d h1:YwbJJ/PrVWVdnR+j/EAVuazdeP+Za5qbiH1Vlr+wFXs= +github.com/grafana/jsonparser v0.0.0-20240209175146-098958973a2d/go.mod h1:796sq+UcONnSlzA3RtlBZ+b/hrerkZXiEmO8oMjyRwY= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe h1:yIXAAbLswn7VNWBIvM71O2QsgfgW9fRXZNR0DXe6pDU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= github.com/grafana/pyroscope-go/godeltaprof v0.1.6 h1:nEdZ8louGAplSvIJi1HVp7kWvFvdiiYg3COLlTwJiFo= diff --git a/integration/client/client.go b/integration/client/client.go index 2e5a86aa6b3de..1ad94fd0edbb6 100644 --- a/integration/client/client.go +++ b/integration/client/client.go @@ -14,9 +14,9 @@ import ( "strings" "time" - "github.com/buger/jsonparser" "github.com/gorilla/websocket" "github.com/grafana/dskit/user" + "github.com/grafana/jsonparser" "github.com/prometheus/common/config" "github.com/prometheus/prometheus/model/labels" "go.opentelemetry.io/collector/pdata/pcommon" diff --git a/integration/cluster/cluster.go b/integration/cluster/cluster.go index 831da46f2cb99..7e978b84eb326 100644 --- a/integration/cluster/cluster.go +++ b/integration/cluster/cluster.go @@ -84,7 +84,6 @@ bloom_gateway: bloom_compactor: enabled: false - working_directory: {{.dataPath}}/bloom-compactor compactor: working_directory: {{.dataPath}}/compactor diff --git a/operator/.bingo/go.mod b/operator/.bingo/go.mod index 610249af0b0b0..3aa5b7c946f5d 100644 --- a/operator/.bingo/go.mod +++ b/operator/.bingo/go.mod @@ -1 +1 @@ -module _ // Fake go.mod auto-created by 'bingo' for go -moddir compatibility with non-Go projects. Commit this file, together with other .mod files. \ No newline at end of file +module _ // Fake go.mod auto-created by 'bingo' for go -moddir compatibility with non-Go projects. Commit this file, together with other .mod files. diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index d978c0c8f423d..d504e4ee31b52 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,8 @@ ## Main +- [11964](https://github.com/grafana/loki/pull/11964) **xperimental**: Provide Azure region for managed credentials using environment variable +- [11920](https://github.com/grafana/loki/pull/11920) **xperimental**: Refactor handling of credentials in managed-auth mode +- [11869](https://github.com/grafana/loki/pull/11869) **periklis**: Add support for running with Google Workload Identity - [11868](https://github.com/grafana/loki/pull/11868) **xperimental**: Integrate support for OpenShift-managed credentials in Azure - [11854](https://github.com/grafana/loki/pull/11854) **periklis**: Allow custom audience for managed-auth on STS - [11802](https://github.com/grafana/loki/pull/11802) **xperimental**: Add support for running with Azure Workload Identity diff --git a/operator/apis/config/v1/projectconfig_types.go b/operator/apis/config/v1/projectconfig_types.go index 06ff8cb090598..8e510b5d3ab79 100644 --- a/operator/apis/config/v1/projectconfig_types.go +++ b/operator/apis/config/v1/projectconfig_types.go @@ -52,16 +52,11 @@ type OpenShiftFeatureGates struct { // Dashboards enables the loki-mixin dashboards into the OpenShift Console Dashboards bool `json:"dashboards,omitempty"` - // ManagedAuthEnv enabled when the operator installation is on OpenShift STS clusters. + // ManagedAuthEnv is true when OpenShift-functions are enabled and the operator has detected + // that it is running with some kind of "workload identity" (AWS STS, Azure WIF) enabled. ManagedAuthEnv bool } -// ManagedAuthEnabled returns true when OpenShift-functions are enabled and the operator has detected that it is -// running with some kind of "workload identity" (AWS STS, Azure WIF) enabled. -func (o *OpenShiftFeatureGates) ManagedAuthEnabled() bool { - return o.Enabled && o.ManagedAuthEnv -} - // FeatureGates is the supported set of all operator feature gates. type FeatureGates struct { // ServiceMonitors enables creating a Prometheus-Operator managed ServiceMonitor diff --git a/operator/apis/loki/v1/lokistack_types.go b/operator/apis/loki/v1/lokistack_types.go index a50fb48b187ea..b652ba0c7a4d9 100644 --- a/operator/apis/loki/v1/lokistack_types.go +++ b/operator/apis/loki/v1/lokistack_types.go @@ -1174,6 +1174,27 @@ type LokiStackComponentStatus struct { Ruler PodStatusMap `json:"ruler,omitempty"` } +// CredentialMode represents the type of authentication used for accessing the object storage. +// +// +kubebuilder:validation:Enum=static;token;managed +type CredentialMode string + +const ( + // CredentialModeStatic represents the usage of static, long-lived credentials stored in a Secret. + // This is the default authentication mode and available for all supported object storage types. + CredentialModeStatic CredentialMode = "static" + // CredentialModeToken represents the usage of short-lived tokens retrieved from a credential source. + // In this mode the static configuration does not contain credentials needed for the object storage. + // Instead, they are generated during runtime using a service, which allows for shorter-lived credentials and + // much more granular control. This authentication mode is not supported for all object storage types. + CredentialModeToken CredentialMode = "token" + // CredentialModeManaged represents the usage of short-lived tokens retrieved from a credential source. + // This mode is similar to CredentialModeToken,but instead of having a user-configured credential source, + // it is configured by the environment, for example the Cloud Credential Operator in OpenShift. + // This mode is only supported for certain object storage types in certain runtime environments. + CredentialModeManaged CredentialMode = "managed" +) + // LokiStackStorageStatus defines the observed state of // the Loki storage configuration. type LokiStackStorageStatus struct { @@ -1183,6 +1204,12 @@ type LokiStackStorageStatus struct { // +optional // +kubebuilder:validation:Optional Schemas []ObjectStorageSchema `json:"schemas,omitempty"` + + // CredentialMode contains the authentication mode used for accessing the object storage. + // + // +optional + // +kubebuilder:validation:Optional + CredentialMode CredentialMode `json:"credentialMode,omitempty"` } // LokiStackStatus defines the observed state of LokiStack diff --git a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml index 6854bf38ff661..ad2b2e1bc93b4 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:0.5.0 - createdAt: "2024-01-31T16:48:07Z" + createdAt: "2024-02-12T14:48:52Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. features.operators.openshift.io/disconnected: "true" @@ -1472,6 +1472,7 @@ spec: - delete - get - list + - update - watch - apiGroups: - config.openshift.io diff --git a/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml index a8033e692214e..e1a7e5578965a 100644 --- a/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml @@ -4064,6 +4064,14 @@ spec: description: Storage provides summary of all changes that have occurred to the storage configuration. properties: + credentialMode: + description: CredentialMode contains the authentication mode used + for accessing the object storage. + enum: + - static + - token + - managed + type: string schemas: description: Schemas is a list of schemas which have been applied to the LokiStack. diff --git a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml index f8c37162b5a44..b372a29504e3a 100644 --- a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:0.5.0 - createdAt: "2024-01-31T16:48:04Z" + createdAt: "2024-02-12T14:48:49Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. operators.operatorframework.io/builder: operator-sdk-unknown @@ -1452,6 +1452,7 @@ spec: - delete - get - list + - update - watch - apiGroups: - config.openshift.io diff --git a/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml index 8b86ddfff8bbf..f92665f5095d2 100644 --- a/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml @@ -4064,6 +4064,14 @@ spec: description: Storage provides summary of all changes that have occurred to the storage configuration. properties: + credentialMode: + description: CredentialMode contains the authentication mode used + for accessing the object storage. + enum: + - static + - token + - managed + type: string schemas: description: Schemas is a list of schemas which have been applied to the LokiStack. diff --git a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml index 234ddb423a3aa..8026bbcd0fc4c 100644 --- a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: quay.io/openshift-logging/loki-operator:0.1.0 - createdAt: "2024-01-31T16:48:10Z" + createdAt: "2024-02-12T14:48:55Z" description: | The Loki Operator for OCP provides a means for configuring and managing a Loki stack for cluster logging. ## Prerequisites and Requirements @@ -1457,6 +1457,7 @@ spec: - delete - get - list + - update - watch - apiGroups: - config.openshift.io diff --git a/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml index f121699ec6fb8..3163752ad36f0 100644 --- a/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml @@ -4064,6 +4064,14 @@ spec: description: Storage provides summary of all changes that have occurred to the storage configuration. properties: + credentialMode: + description: CredentialMode contains the authentication mode used + for accessing the object storage. + enum: + - static + - token + - managed + type: string schemas: description: Schemas is a list of schemas which have been applied to the LokiStack. diff --git a/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml b/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml index 4661097811b75..d603ef2a9b644 100644 --- a/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml +++ b/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml @@ -4046,6 +4046,14 @@ spec: description: Storage provides summary of all changes that have occurred to the storage configuration. properties: + credentialMode: + description: CredentialMode contains the authentication mode used + for accessing the object storage. + enum: + - static + - token + - managed + type: string schemas: description: Schemas is a list of schemas which have been applied to the LokiStack. diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml index 766a6d7d191e6..072efd5b99128 100644 --- a/operator/config/rbac/role.yaml +++ b/operator/config/rbac/role.yaml @@ -56,6 +56,7 @@ rules: - delete - get - list + - update - watch - apiGroups: - config.openshift.io diff --git a/operator/controllers/loki/credentialsrequests_controller.go b/operator/controllers/loki/credentialsrequests_controller.go deleted file mode 100644 index efd0226c6a340..0000000000000 --- a/operator/controllers/loki/credentialsrequests_controller.go +++ /dev/null @@ -1,82 +0,0 @@ -package controllers - -import ( - "context" - - "github.com/go-logr/logr" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - - lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" - "github.com/grafana/loki/operator/controllers/loki/internal/lokistack" - "github.com/grafana/loki/operator/controllers/loki/internal/management/state" - "github.com/grafana/loki/operator/internal/external/k8s" - "github.com/grafana/loki/operator/internal/handlers" -) - -// CredentialsRequestsReconciler reconciles a single CredentialsRequest resource for each LokiStack request. -type CredentialsRequestsReconciler struct { - client.Client - Scheme *runtime.Scheme - Log logr.Logger -} - -// Reconcile creates a single CredentialsRequest per LokiStack for the OpenShift cloud-credentials-operator (CCO) to -// provide a managed cloud credentials Secret. On successful creation, the LokiStack resource is annotated -// with `loki.grafana.com/credentials-request-secret-ref` that refers to the secret provided by CCO. If the LokiStack -// resource is not found its accompanying CredentialsRequest resource is deleted. -func (r *CredentialsRequestsReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - var stack lokiv1.LokiStack - if err := r.Client.Get(ctx, req.NamespacedName, &stack); err != nil { - if apierrors.IsNotFound(err) { - return ctrl.Result{}, handlers.DeleteCredentialsRequest(ctx, r.Client, req.NamespacedName) - } - return ctrl.Result{}, err - } - - managed, err := state.IsManaged(ctx, req, r.Client) - if err != nil { - return ctrl.Result{}, err - } - if !managed { - r.Log.Info("Skipping reconciliation for unmanaged LokiStack resource", "name", req.String()) - // Stop requeueing for unmanaged LokiStack custom resources - return ctrl.Result{}, nil - } - - storageSecretName := client.ObjectKey{ - Namespace: req.Namespace, - Name: stack.Spec.Storage.Secret.Name, - } - storageSecret := &corev1.Secret{} - err = r.Client.Get(ctx, storageSecretName, storageSecret) - if err != nil { - return ctrl.Result{}, err - } - - secretRef, err := handlers.CreateCredentialsRequest(ctx, r.Client, req.NamespacedName, storageSecret) - if err != nil { - return ctrl.Result{}, err - } - - if err := lokistack.AnnotateForCredentialsRequest(ctx, r.Client, req.NamespacedName, secretRef); err != nil { - return ctrl.Result{}, err - } - - return ctrl.Result{}, nil -} - -// SetupWithManager sets up the controller with the Manager. -func (r *CredentialsRequestsReconciler) SetupWithManager(mgr ctrl.Manager) error { - b := ctrl.NewControllerManagedBy(mgr) - return r.buildController(k8s.NewCtrlBuilder(b)) -} - -func (r *CredentialsRequestsReconciler) buildController(bld k8s.Builder) error { - return bld. - For(&lokiv1.LokiStack{}). - Complete(r) -} diff --git a/operator/controllers/loki/credentialsrequests_controller_test.go b/operator/controllers/loki/credentialsrequests_controller_test.go deleted file mode 100644 index 3c91ee2275e97..0000000000000 --- a/operator/controllers/loki/credentialsrequests_controller_test.go +++ /dev/null @@ -1,164 +0,0 @@ -package controllers - -import ( - "context" - "testing" - - cloudcredentialsv1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - - lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" - "github.com/grafana/loki/operator/internal/external/k8s/k8sfakes" - "github.com/grafana/loki/operator/internal/manifests/storage" -) - -func TestCredentialsRequestController_RegistersCustomResource_WithDefaultPredicates(t *testing.T) { - b := &k8sfakes.FakeBuilder{} - k := &k8sfakes.FakeClient{} - c := &CredentialsRequestsReconciler{Client: k, Scheme: scheme} - - b.ForReturns(b) - b.OwnsReturns(b) - - err := c.buildController(b) - require.NoError(t, err) - - // Require only one For-Call for the custom resource - require.Equal(t, 1, b.ForCallCount()) - - // Require For-call with LokiStack resource - obj, _ := b.ForArgsForCall(0) - require.Equal(t, &lokiv1.LokiStack{}, obj) -} - -func TestCredentialsRequestController_DeleteCredentialsRequest_WhenLokiStackNotFound(t *testing.T) { - k := &k8sfakes.FakeClient{} - c := &CredentialsRequestsReconciler{Client: k, Scheme: scheme} - r := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "my-stack", - Namespace: "ns", - }, - } - - // Set managed auth environment - t.Setenv("ROLEARN", "a-role-arn") - - k.GetStub = func(_ context.Context, key types.NamespacedName, _ client.Object, _ ...client.GetOption) error { - if key.Name == r.Name && key.Namespace == r.Namespace { - return apierrors.NewNotFound(schema.GroupResource{}, "lokistack not found") - } - return nil - } - - res, err := c.Reconcile(context.Background(), r) - require.NoError(t, err) - require.Equal(t, ctrl.Result{}, res) - require.Equal(t, 1, k.DeleteCallCount()) -} - -func TestCredentialsRequestController_CreateCredentialsRequest_WhenLokiStackNotAnnotated(t *testing.T) { - k := &k8sfakes.FakeClient{} - c := &CredentialsRequestsReconciler{Client: k, Scheme: scheme} - r := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "my-stack", - Namespace: "ns", - }, - } - s := lokiv1.LokiStack{ - ObjectMeta: metav1.ObjectMeta{ - Name: "my-stack", - Namespace: "ns", - }, - Spec: lokiv1.LokiStackSpec{ - ManagementState: lokiv1.ManagementStateManaged, - }, - } - secret := &corev1.Secret{} - - // Set managed auth environment - t.Setenv("ROLEARN", "a-role-arn") - - k.GetStub = func(_ context.Context, key types.NamespacedName, out client.Object, _ ...client.GetOption) error { - switch out.(type) { - case *lokiv1.LokiStack: - if key.Name == r.Name && key.Namespace == r.Namespace { - k.SetClientObject(out, &s) - return nil - } - return apierrors.NewNotFound(schema.GroupResource{}, "lokistack not found") - case *corev1.Secret: - k.SetClientObject(out, secret) - return nil - } - return nil - } - - k.CreateStub = func(_ context.Context, o client.Object, _ ...client.CreateOption) error { - _, isCredReq := o.(*cloudcredentialsv1.CredentialsRequest) - if !isCredReq { - return apierrors.NewBadRequest("something went wrong creating a credentials request") - } - return nil - } - - k.UpdateStub = func(_ context.Context, o client.Object, _ ...client.UpdateOption) error { - stack, ok := o.(*lokiv1.LokiStack) - if !ok { - return apierrors.NewBadRequest("something went wrong creating a credentials request") - } - - _, hasSecretRef := stack.Annotations[storage.AnnotationCredentialsRequestsSecretRef] - if !hasSecretRef { - return apierrors.NewBadRequest("something went updating the lokistack annotations") - } - return nil - } - - res, err := c.Reconcile(context.Background(), r) - require.NoError(t, err) - require.Equal(t, ctrl.Result{}, res) - require.Equal(t, 1, k.CreateCallCount()) - require.Equal(t, 1, k.UpdateCallCount()) -} - -func TestCredentialsRequestController_SkipsUnmanaged(t *testing.T) { - k := &k8sfakes.FakeClient{} - c := &CredentialsRequestsReconciler{Client: k, Scheme: scheme} - r := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "my-stack", - Namespace: "ns", - }, - } - - s := lokiv1.LokiStack{ - ObjectMeta: metav1.ObjectMeta{ - Name: "my-stack", - Namespace: "ns", - }, - Spec: lokiv1.LokiStackSpec{ - ManagementState: lokiv1.ManagementStateUnmanaged, - }, - } - - k.GetStub = func(_ context.Context, key types.NamespacedName, out client.Object, _ ...client.GetOption) error { - if key.Name == s.Name && key.Namespace == s.Namespace { - k.SetClientObject(out, &s) - return nil - } - return apierrors.NewNotFound(schema.GroupResource{}, "something not found") - } - - res, err := c.Reconcile(context.Background(), r) - require.NoError(t, err) - require.Equal(t, ctrl.Result{}, res) -} diff --git a/operator/controllers/loki/internal/lokistack/credentialsrequest_discovery.go b/operator/controllers/loki/internal/lokistack/credentialsrequest_discovery.go deleted file mode 100644 index c911c1196eed4..0000000000000 --- a/operator/controllers/loki/internal/lokistack/credentialsrequest_discovery.go +++ /dev/null @@ -1,30 +0,0 @@ -package lokistack - -import ( - "context" - - "github.com/ViaQ/logerr/v2/kverrors" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/grafana/loki/operator/internal/external/k8s" - "github.com/grafana/loki/operator/internal/manifests/storage" -) - -// AnnotateForCredentialsRequest adds the `loki.grafana.com/credentials-request-secret-ref` annotation -// to the named Lokistack. If no LokiStack is found, then skip reconciliation. Or else return an error. -func AnnotateForCredentialsRequest(ctx context.Context, k k8s.Client, key client.ObjectKey, secretRef string) error { - stack, err := getLokiStack(ctx, k, key) - if stack == nil || err != nil { - return err - } - - if val, ok := stack.Annotations[storage.AnnotationCredentialsRequestsSecretRef]; ok && val == secretRef { - return nil - } - - if err := updateAnnotation(ctx, k, stack, storage.AnnotationCredentialsRequestsSecretRef, secretRef); err != nil { - return kverrors.Wrap(err, "failed to update lokistack `credentialsRequestSecretRef` annotation", "key", key) - } - - return nil -} diff --git a/operator/controllers/loki/internal/lokistack/credentialsrequest_discovery_test.go b/operator/controllers/loki/internal/lokistack/credentialsrequest_discovery_test.go deleted file mode 100644 index ef073ca853ba5..0000000000000 --- a/operator/controllers/loki/internal/lokistack/credentialsrequest_discovery_test.go +++ /dev/null @@ -1,98 +0,0 @@ -package lokistack - -import ( - "context" - "testing" - - "github.com/stretchr/testify/require" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" - - lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" - "github.com/grafana/loki/operator/internal/external/k8s/k8sfakes" - "github.com/grafana/loki/operator/internal/manifests/storage" -) - -func TestAnnotateForCredentialsRequest_ReturnError_WhenLokiStackMissing(t *testing.T) { - k := &k8sfakes.FakeClient{} - annotationVal := "ns-my-stack-aws-creds" - stackKey := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - - k.GetStub = func(_ context.Context, _ types.NamespacedName, out client.Object, _ ...client.GetOption) error { - return apierrors.NewBadRequest("failed to get lokistack") - } - - err := AnnotateForCredentialsRequest(context.Background(), k, stackKey, annotationVal) - require.Error(t, err) -} - -func TestAnnotateForCredentialsRequest_DoNothing_WhenAnnotationExists(t *testing.T) { - k := &k8sfakes.FakeClient{} - - annotationVal := "ns-my-stack-aws-creds" - s := &lokiv1.LokiStack{ - ObjectMeta: metav1.ObjectMeta{ - Name: "my-stack", - Namespace: "ns", - Annotations: map[string]string{ - storage.AnnotationCredentialsRequestsSecretRef: annotationVal, - }, - }, - } - stackKey := client.ObjectKeyFromObject(s) - - k.GetStub = func(_ context.Context, key types.NamespacedName, out client.Object, _ ...client.GetOption) error { - if key.Name == stackKey.Name && key.Namespace == stackKey.Namespace { - k.SetClientObject(out, s) - return nil - } - return nil - } - - err := AnnotateForCredentialsRequest(context.Background(), k, stackKey, annotationVal) - require.NoError(t, err) - require.Equal(t, 0, k.UpdateCallCount()) -} - -func TestAnnotateForCredentialsRequest_UpdateLokistack_WhenAnnotationMissing(t *testing.T) { - k := &k8sfakes.FakeClient{} - - annotationVal := "ns-my-stack-aws-creds" - s := &lokiv1.LokiStack{ - ObjectMeta: metav1.ObjectMeta{ - Name: "my-stack", - Namespace: "ns", - Annotations: map[string]string{}, - }, - } - stackKey := client.ObjectKeyFromObject(s) - - k.GetStub = func(_ context.Context, key types.NamespacedName, out client.Object, _ ...client.GetOption) error { - if key.Name == stackKey.Name && key.Namespace == stackKey.Namespace { - k.SetClientObject(out, s) - return nil - } - return nil - } - - k.UpdateStub = func(_ context.Context, o client.Object, _ ...client.UpdateOption) error { - stack, ok := o.(*lokiv1.LokiStack) - if !ok { - return apierrors.NewBadRequest("failed conversion to *lokiv1.LokiStack") - } - val, ok := stack.Annotations[storage.AnnotationCredentialsRequestsSecretRef] - if !ok { - return apierrors.NewBadRequest("missing annotation") - } - if val != annotationVal { - return apierrors.NewBadRequest("annotations does not match input") - } - return nil - } - - err := AnnotateForCredentialsRequest(context.Background(), k, stackKey, annotationVal) - require.NoError(t, err) - require.Equal(t, 1, k.UpdateCallCount()) -} diff --git a/operator/controllers/loki/lokistack_controller.go b/operator/controllers/loki/lokistack_controller.go index 40e7691bd1a2b..eb30a1a9bf555 100644 --- a/operator/controllers/loki/lokistack_controller.go +++ b/operator/controllers/loki/lokistack_controller.go @@ -3,7 +3,6 @@ package controllers import ( "context" "errors" - "strings" "time" "github.com/go-logr/logr" @@ -16,7 +15,6 @@ import ( corev1 "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" rbacv1 "k8s.io/api/rbac/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" @@ -31,6 +29,7 @@ import ( configv1 "github.com/grafana/loki/operator/apis/config/v1" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" "github.com/grafana/loki/operator/controllers/loki/internal/management/state" + "github.com/grafana/loki/operator/internal/config" "github.com/grafana/loki/operator/internal/external/k8s" "github.com/grafana/loki/operator/internal/handlers" manifestsocp "github.com/grafana/loki/operator/internal/manifests/openshift" @@ -111,6 +110,7 @@ type LokiStackReconciler struct { Log logr.Logger Scheme *runtime.Scheme FeatureGates configv1.FeatureGates + AuthConfig *config.ManagedAuthConfig } // +kubebuilder:rbac:groups=loki.grafana.com,resources=lokistacks,verbs=get;list;watch;create;update;patch;delete @@ -128,7 +128,7 @@ type LokiStackReconciler struct { // +kubebuilder:rbac:groups=policy,resources=poddisruptionbudgets,verbs=get;list;watch;create;update // +kubebuilder:rbac:groups=config.openshift.io,resources=dnses;apiservers;proxies,verbs=get;list;watch // +kubebuilder:rbac:groups=route.openshift.io,resources=routes,verbs=get;list;watch;create;update;delete -// +kubebuilder:rbac:groups=cloudcredential.openshift.io,resources=credentialsrequests,verbs=get;list;watch;create;delete +// +kubebuilder:rbac:groups=cloudcredential.openshift.io,resources=credentialsrequests,verbs=get;list;watch;create;update;delete // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -150,7 +150,7 @@ func (r *LokiStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } var degraded *status.DegradedError - err = r.updateResources(ctx, req) + credentialMode, err := r.updateResources(ctx, req) switch { case errors.As(err, °raded): // degraded errors are handled by status.Refresh below @@ -158,7 +158,7 @@ func (r *LokiStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, err } - err = status.Refresh(ctx, r.Client, req, time.Now(), degraded) + err = status.Refresh(ctx, r.Client, req, time.Now(), credentialMode, degraded) if err != nil { return ctrl.Result{}, err } @@ -172,18 +172,25 @@ func (r *LokiStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, nil } -func (r *LokiStackReconciler) updateResources(ctx context.Context, req ctrl.Request) error { +func (r *LokiStackReconciler) updateResources(ctx context.Context, req ctrl.Request) (lokiv1.CredentialMode, error) { if r.FeatureGates.BuiltInCertManagement.Enabled { if err := handlers.CreateOrRotateCertificates(ctx, r.Log, req, r.Client, r.Scheme, r.FeatureGates); err != nil { - return err + return "", err } } - if err := handlers.CreateOrUpdateLokiStack(ctx, r.Log, req, r.Client, r.Scheme, r.FeatureGates); err != nil { - return err + if r.FeatureGates.OpenShift.ManagedAuthEnv { + if err := handlers.CreateCredentialsRequest(ctx, r.Log, r.Scheme, r.AuthConfig, r.Client, req); err != nil { + return "", err + } + } + + credentialMode, err := handlers.CreateOrUpdateLokiStack(ctx, r.Log, req, r.Client, r.Scheme, r.FeatureGates) + if err != nil { + return "", err } - return nil + return credentialMode, nil } // SetupWithManager sets up the controller with the Manager. @@ -216,7 +223,7 @@ func (r *LokiStackReconciler) buildController(bld k8s.Builder) error { if r.FeatureGates.OpenShift.Enabled { bld = bld. Owns(&routev1.Route{}, updateOrDeleteOnlyPred). - Watches(&cloudcredentialv1.CredentialsRequest{}, r.enqueueForCredentialsRequest(), updateOrDeleteOnlyPred) + Owns(&cloudcredentialv1.CredentialsRequest{}, updateOrDeleteOnlyPred) if r.FeatureGates.OpenShift.ClusterTLSPolicy { bld = bld.Watches(&openshiftconfigv1.APIServer{}, r.enqueueAllLokiStacksHandler(), updateOrDeleteOnlyPred) @@ -358,34 +365,3 @@ func (r *LokiStackReconciler) enqueueForStorageCA() handler.EventHandler { return requests }) } - -func (r *LokiStackReconciler) enqueueForCredentialsRequest() handler.EventHandler { - return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { - a := obj.GetAnnotations() - owner, ok := a[manifestsocp.AnnotationCredentialsRequestOwner] - if !ok { - return nil - } - - var ( - ownerParts = strings.Split(owner, "/") - namespace = ownerParts[0] - name = ownerParts[1] - key = client.ObjectKey{Namespace: namespace, Name: name} - ) - - var stack lokiv1.LokiStack - if err := r.Client.Get(ctx, key, &stack); err != nil { - if !apierrors.IsNotFound(err) { - r.Log.Error(err, "failed retrieving CredentialsRequest owning Lokistack", "key", key) - } - return nil - } - - return []reconcile.Request{ - { - NamespacedName: key, - }, - } - }) -} diff --git a/operator/controllers/loki/lokistack_controller_test.go b/operator/controllers/loki/lokistack_controller_test.go index 515d829766aa1..6be22022c19db 100644 --- a/operator/controllers/loki/lokistack_controller_test.go +++ b/operator/controllers/loki/lokistack_controller_test.go @@ -161,7 +161,18 @@ func TestLokiStackController_RegisterOwnedResourcesForUpdateOrDeleteOnly(t *test { obj: &routev1.Route{}, index: 10, - ownCallsCount: 11, + ownCallsCount: 12, + featureGates: configv1.FeatureGates{ + OpenShift: configv1.OpenShiftFeatureGates{ + Enabled: true, + }, + }, + pred: updateOrDeleteOnlyPred, + }, + { + obj: &cloudcredentialv1.CredentialsRequest{}, + index: 11, + ownCallsCount: 12, featureGates: configv1.FeatureGates{ OpenShift: configv1.OpenShiftFeatureGates{ Enabled: true, @@ -203,20 +214,9 @@ func TestLokiStackController_RegisterWatchedResources(t *testing.T) { } table := []test{ { - src: &cloudcredentialv1.CredentialsRequest{}, + src: &openshiftconfigv1.APIServer{}, index: 3, watchesCallsCount: 4, - featureGates: configv1.FeatureGates{ - OpenShift: configv1.OpenShiftFeatureGates{ - Enabled: true, - }, - }, - pred: updateOrDeleteOnlyPred, - }, - { - src: &openshiftconfigv1.APIServer{}, - index: 4, - watchesCallsCount: 5, featureGates: configv1.FeatureGates{ OpenShift: configv1.OpenShiftFeatureGates{ Enabled: true, @@ -227,8 +227,8 @@ func TestLokiStackController_RegisterWatchedResources(t *testing.T) { }, { src: &openshiftconfigv1.Proxy{}, - index: 4, - watchesCallsCount: 5, + index: 3, + watchesCallsCount: 4, featureGates: configv1.FeatureGates{ OpenShift: configv1.OpenShiftFeatureGates{ Enabled: true, diff --git a/operator/docs/operator/api.md b/operator/docs/operator/api.md index 92f93dd970224..48fbe0c8a7e48 100644 --- a/operator/docs/operator/api.md +++ b/operator/docs/operator/api.md @@ -1100,6 +1100,40 @@ string +## CredentialMode { #loki-grafana-com-v1-CredentialMode } +(string alias) +

+(Appears on:LokiStackStorageStatus) +

+
+

CredentialMode represents the type of authentication used for accessing the object storage.

+
+ + + + + + + + + + + + + + +
ValueDescription

"managed"

CredentialModeManaged represents the usage of short-lived tokens retrieved from a credential source. +This mode is similar to CredentialModeToken,but instead of having a user-configured credential source, +it is configured by the environment, for example the Cloud Credential Operator in OpenShift. +This mode is only supported for certain object storage types in certain runtime environments.

+

"static"

CredentialModeStatic represents the usage of static, long-lived credentials stored in a Secret. +This is the default authentication mode and available for all supported object storage types.

+

"token"

CredentialModeToken represents the usage of short-lived tokens retrieved from a credential source. +In this mode the static configuration does not contain credentials needed for the object storage. +Instead, they are generated during runtime using a service, which allows for shorter-lived credentials and +much more granular control. This authentication mode is not supported for all object storage types.

+
+ ## HashRingSpec { #loki-grafana-com-v1-HashRingSpec }

(Appears on:LokiStackSpec) @@ -2152,6 +2186,20 @@ the Loki storage configuration.

to the LokiStack.

+ + +credentialMode
+ + +CredentialMode + + + + +(Optional) +

CredentialMode contains the authentication mode used for accessing the object storage.

+ + diff --git a/operator/docs/operator/feature-gates.md b/operator/docs/operator/feature-gates.md index 34fbdf4b69a4d..189b72e4ddb12 100644 --- a/operator/docs/operator/feature-gates.md +++ b/operator/docs/operator/feature-gates.md @@ -417,7 +417,8 @@ bool -

ManagedAuthEnv enabled when the operator installation is on OpenShift STS clusters.

+

ManagedAuthEnv is true when OpenShift-functions are enabled and the operator has detected +that it is running with some kind of “workload identity” (AWS STS, Azure WIF) enabled.

diff --git a/operator/internal/config/managed_auth.go b/operator/internal/config/managed_auth.go new file mode 100644 index 0000000000000..76f9d72f3c262 --- /dev/null +++ b/operator/internal/config/managed_auth.go @@ -0,0 +1,50 @@ +package config + +import "os" + +type AWSEnvironment struct { + RoleARN string +} + +type AzureEnvironment struct { + ClientID string + SubscriptionID string + TenantID string + Region string +} + +type ManagedAuthConfig struct { + AWS *AWSEnvironment + Azure *AzureEnvironment +} + +func discoverManagedAuthConfig() *ManagedAuthConfig { + // AWS + roleARN := os.Getenv("ROLEARN") + + // Azure + clientID := os.Getenv("CLIENTID") + tenantID := os.Getenv("TENANTID") + subscriptionID := os.Getenv("SUBSCRIPTIONID") + region := os.Getenv("REGION") + + switch { + case roleARN != "": + return &ManagedAuthConfig{ + AWS: &AWSEnvironment{ + RoleARN: roleARN, + }, + } + case clientID != "" && tenantID != "" && subscriptionID != "": + return &ManagedAuthConfig{ + Azure: &AzureEnvironment{ + ClientID: clientID, + SubscriptionID: subscriptionID, + TenantID: tenantID, + Region: region, + }, + } + } + + return nil +} diff --git a/operator/internal/config/options.go b/operator/internal/config/options.go index 7ed9abb526a7b..dc54404f22450 100644 --- a/operator/internal/config/options.go +++ b/operator/internal/config/options.go @@ -17,19 +17,24 @@ import ( // LoadConfig initializes the controller configuration, optionally overriding the defaults // from a provided configuration file. -func LoadConfig(scheme *runtime.Scheme, configFile string) (*configv1.ProjectConfig, ctrl.Options, error) { +func LoadConfig(scheme *runtime.Scheme, configFile string) (*configv1.ProjectConfig, *ManagedAuthConfig, ctrl.Options, error) { options := ctrl.Options{Scheme: scheme} if configFile == "" { - return &configv1.ProjectConfig{}, options, nil + return &configv1.ProjectConfig{}, nil, options, nil } ctrlCfg, err := loadConfigFile(scheme, configFile) if err != nil { - return nil, options, fmt.Errorf("failed to parse controller manager config file: %w", err) + return nil, nil, options, fmt.Errorf("failed to parse controller manager config file: %w", err) + } + + managedAuth := discoverManagedAuthConfig() + if ctrlCfg.Gates.OpenShift.Enabled && managedAuth != nil { + ctrlCfg.Gates.OpenShift.ManagedAuthEnv = true } options = mergeOptionsFromFile(options, ctrlCfg) - return ctrlCfg, options, nil + return ctrlCfg, managedAuth, options, nil } func mergeOptionsFromFile(o manager.Options, cfg *configv1.ProjectConfig) manager.Options { diff --git a/operator/internal/handlers/credentialsrequest.go b/operator/internal/handlers/credentialsrequest.go new file mode 100644 index 0000000000000..0d562332dc9d5 --- /dev/null +++ b/operator/internal/handlers/credentialsrequest.go @@ -0,0 +1,73 @@ +package handlers + +import ( + "context" + "fmt" + + "github.com/ViaQ/logerr/v2/kverrors" + "github.com/go-logr/logr" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" + "github.com/grafana/loki/operator/internal/config" + "github.com/grafana/loki/operator/internal/external/k8s" + "github.com/grafana/loki/operator/internal/manifests" + "github.com/grafana/loki/operator/internal/manifests/openshift" +) + +// CreateCredentialsRequest creates a new CredentialsRequest resource for a Lokistack +// to request a cloud credentials Secret resource from the OpenShift cloud-credentials-operator. +func CreateCredentialsRequest(ctx context.Context, log logr.Logger, scheme *runtime.Scheme, managedAuth *config.ManagedAuthConfig, k k8s.Client, req ctrl.Request) error { + ll := log.WithValues("lokistack", req.NamespacedName, "event", "createCredentialsRequest") + + var stack lokiv1.LokiStack + if err := k.Get(ctx, req.NamespacedName, &stack); err != nil { + if apierrors.IsNotFound(err) { + // maybe the user deleted it before we could react? Either way this isn't an issue + ll.Error(err, "could not find the requested LokiStack", "name", req.String()) + return nil + } + return kverrors.Wrap(err, "failed to lookup LokiStack", "name", req.String()) + } + + opts := openshift.Options{ + BuildOpts: openshift.BuildOptions{ + LokiStackName: stack.Name, + LokiStackNamespace: stack.Namespace, + RulerName: manifests.RulerName(stack.Name), + }, + ManagedAuth: managedAuth, + } + + credReq, err := openshift.BuildCredentialsRequest(opts) + if err != nil { + return err + } + + err = ctrl.SetControllerReference(&stack, credReq, scheme) + if err != nil { + return kverrors.Wrap(err, "failed to set controller owner reference to resource") + } + + desired := credReq.DeepCopyObject().(client.Object) + mutateFn := manifests.MutateFuncFor(credReq, desired, map[string]string{}) + + op, err := ctrl.CreateOrUpdate(ctx, k, credReq, mutateFn) + if err != nil { + return kverrors.Wrap(err, "failed to configure CredentialRequest") + } + + msg := fmt.Sprintf("Resource has been %s", op) + switch op { + case ctrlutil.OperationResultNone: + ll.V(1).Info(msg) + default: + ll.Info(msg) + } + + return nil +} diff --git a/operator/internal/handlers/credentialsrequest_create.go b/operator/internal/handlers/credentialsrequest_create.go deleted file mode 100644 index 6074e10b2d5af..0000000000000 --- a/operator/internal/handlers/credentialsrequest_create.go +++ /dev/null @@ -1,66 +0,0 @@ -package handlers - -import ( - "context" - "errors" - - "github.com/ViaQ/logerr/v2/kverrors" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/grafana/loki/operator/internal/external/k8s" - "github.com/grafana/loki/operator/internal/manifests/openshift" - "github.com/grafana/loki/operator/internal/manifests/storage" -) - -var ( - errAzureNoSecretFound = errors.New("can not create CredentialsRequest: no azure secret found") - errAzureNoRegion = errors.New("can not create CredentialsRequest: missing secret field: region") -) - -// CreateCredentialsRequest creates a new CredentialsRequest resource for a Lokistack -// to request a cloud credentials Secret resource from the OpenShift cloud-credentials-operator. -func CreateCredentialsRequest(ctx context.Context, k k8s.Client, stack client.ObjectKey, secret *corev1.Secret) (string, error) { - managedAuthEnv := openshift.DiscoverManagedAuthEnv() - if managedAuthEnv == nil { - return "", nil - } - - if managedAuthEnv.Azure != nil && managedAuthEnv.Azure.Region == "" { - // Managed environment for Azure does not provide Region, but we need this for the CredentialsRequest. - // This looks like an oversight when creating the UI in OpenShift, but for now we need to pull this data - // from somewhere else -> the Azure Storage Secret - if secret == nil { - return "", errAzureNoSecretFound - } - - region := secret.Data[storage.KeyAzureRegion] - if len(region) == 0 { - return "", errAzureNoRegion - } - - managedAuthEnv.Azure.Region = string(region) - } - - opts := openshift.Options{ - BuildOpts: openshift.BuildOptions{ - LokiStackName: stack.Name, - LokiStackNamespace: stack.Namespace, - }, - ManagedAuthEnv: managedAuthEnv, - } - - credReq, err := openshift.BuildCredentialsRequest(opts) - if err != nil { - return "", err - } - - if err := k.Create(ctx, credReq); err != nil { - if !apierrors.IsAlreadyExists(err) { - return "", kverrors.Wrap(err, "failed to create credentialsrequest", "key", client.ObjectKeyFromObject(credReq)) - } - } - - return credReq.Spec.SecretRef.Name, nil -} diff --git a/operator/internal/handlers/credentialsrequest_create_test.go b/operator/internal/handlers/credentialsrequest_create_test.go deleted file mode 100644 index df903eaec662f..0000000000000 --- a/operator/internal/handlers/credentialsrequest_create_test.go +++ /dev/null @@ -1,114 +0,0 @@ -package handlers - -import ( - "context" - "testing" - - cloudcredentialv1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime/schema" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/grafana/loki/operator/internal/external/k8s/k8sfakes" -) - -func TestCreateCredentialsRequest_DoNothing_WhenManagedAuthEnvMissing(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - - secretRef, err := CreateCredentialsRequest(context.Background(), k, key, nil) - require.NoError(t, err) - require.Empty(t, secretRef) -} - -func TestCreateCredentialsRequest_CreateNewResource(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - - t.Setenv("ROLEARN", "a-role-arn") - - secretRef, err := CreateCredentialsRequest(context.Background(), k, key, nil) - require.NoError(t, err) - require.NotEmpty(t, secretRef) - require.Equal(t, 1, k.CreateCallCount()) -} - -func TestCreateCredentialsRequest_CreateNewResourceAzure(t *testing.T) { - wantRegion := "test-region" - - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - secret := &corev1.Secret{ - Data: map[string][]byte{ - "region": []byte(wantRegion), - }, - } - - t.Setenv("CLIENTID", "test-client-id") - t.Setenv("TENANTID", "test-tenant-id") - t.Setenv("SUBSCRIPTIONID", "test-subscription-id") - - secretRef, err := CreateCredentialsRequest(context.Background(), k, key, secret) - require.NoError(t, err) - require.NotEmpty(t, secretRef) - - require.Equal(t, 1, k.CreateCallCount()) - _, obj, _ := k.CreateArgsForCall(0) - credReq, ok := obj.(*cloudcredentialv1.CredentialsRequest) - require.True(t, ok) - - providerSpec := &cloudcredentialv1.AzureProviderSpec{} - require.NoError(t, cloudcredentialv1.Codec.DecodeProviderSpec(credReq.Spec.ProviderSpec, providerSpec)) - - require.Equal(t, wantRegion, providerSpec.AzureRegion) -} - -func TestCreateCredentialsRequest_CreateNewResourceAzure_Errors(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - - tt := []struct { - secret *corev1.Secret - wantError string - }{ - { - secret: nil, - wantError: errAzureNoSecretFound.Error(), - }, - { - secret: &corev1.Secret{}, - wantError: errAzureNoRegion.Error(), - }, - } - - for _, tc := range tt { - tc := tc - t.Run(tc.wantError, func(t *testing.T) { - // Not parallel (environment variables) - t.Setenv("CLIENTID", "test-client-id") - t.Setenv("TENANTID", "test-tenant-id") - t.Setenv("SUBSCRIPTIONID", "test-subscription-id") - - _, err := CreateCredentialsRequest(context.Background(), k, key, tc.secret) - require.EqualError(t, err, tc.wantError) - }) - } -} - -func TestCreateCredentialsRequest_DoNothing_WhenCredentialsRequestExist(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - - t.Setenv("ROLEARN", "a-role-arn") - - k.CreateStub = func(_ context.Context, _ client.Object, _ ...client.CreateOption) error { - return errors.NewAlreadyExists(schema.GroupResource{}, "credentialsrequest exists") - } - - secretRef, err := CreateCredentialsRequest(context.Background(), k, key, nil) - require.NoError(t, err) - require.NotEmpty(t, secretRef) - require.Equal(t, 1, k.CreateCallCount()) -} diff --git a/operator/internal/handlers/credentialsrequest_delete.go b/operator/internal/handlers/credentialsrequest_delete.go deleted file mode 100644 index edf05fcb205d0..0000000000000 --- a/operator/internal/handlers/credentialsrequest_delete.go +++ /dev/null @@ -1,43 +0,0 @@ -package handlers - -import ( - "context" - - "github.com/ViaQ/logerr/v2/kverrors" - "k8s.io/apimachinery/pkg/api/errors" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/grafana/loki/operator/internal/external/k8s" - "github.com/grafana/loki/operator/internal/manifests/openshift" -) - -// DeleteCredentialsRequest deletes a LokiStack's accompanying CredentialsRequest resource -// to trigger the OpenShift cloud-credentials-operator to wipe out any credentials related -// Secret resource on the LokiStack namespace. -func DeleteCredentialsRequest(ctx context.Context, k k8s.Client, stack client.ObjectKey) error { - managedAuthEnv := openshift.DiscoverManagedAuthEnv() - if managedAuthEnv == nil { - return nil - } - - opts := openshift.Options{ - BuildOpts: openshift.BuildOptions{ - LokiStackName: stack.Name, - LokiStackNamespace: stack.Namespace, - }, - ManagedAuthEnv: managedAuthEnv, - } - - credReq, err := openshift.BuildCredentialsRequest(opts) - if err != nil { - return kverrors.Wrap(err, "failed to build credentialsrequest", "key", stack) - } - - if err := k.Delete(ctx, credReq); err != nil { - if !errors.IsNotFound(err) { - return kverrors.Wrap(err, "failed to delete credentialsrequest", "key", client.ObjectKeyFromObject(credReq)) - } - } - - return nil -} diff --git a/operator/internal/handlers/credentialsrequest_delete_test.go b/operator/internal/handlers/credentialsrequest_delete_test.go deleted file mode 100644 index 57f1c005ee706..0000000000000 --- a/operator/internal/handlers/credentialsrequest_delete_test.go +++ /dev/null @@ -1,47 +0,0 @@ -package handlers - -import ( - "context" - "testing" - - "github.com/stretchr/testify/require" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime/schema" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/grafana/loki/operator/internal/external/k8s/k8sfakes" -) - -func TestDeleteCredentialsRequest_DoNothing_WhenManagedAuthEnvMissing(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - - err := DeleteCredentialsRequest(context.Background(), k, key) - require.NoError(t, err) -} - -func TestDeleteCredentialsRequest_DeleteExistingResource(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - - t.Setenv("ROLEARN", "a-role-arn") - - err := DeleteCredentialsRequest(context.Background(), k, key) - require.NoError(t, err) - require.Equal(t, 1, k.DeleteCallCount()) -} - -func TestDeleteCredentialsRequest_DoNothing_WhenCredentialsRequestNotExists(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - - t.Setenv("ROLEARN", "a-role-arn") - - k.DeleteStub = func(_ context.Context, _ client.Object, _ ...client.DeleteOption) error { - return errors.NewNotFound(schema.GroupResource{}, "credentials request not found") - } - - err := DeleteCredentialsRequest(context.Background(), k, key) - require.NoError(t, err) - require.Equal(t, 1, k.DeleteCallCount()) -} diff --git a/operator/internal/handlers/credentialsrequest_test.go b/operator/internal/handlers/credentialsrequest_test.go new file mode 100644 index 0000000000000..dd6dfb50d77dc --- /dev/null +++ b/operator/internal/handlers/credentialsrequest_test.go @@ -0,0 +1,146 @@ +package handlers + +import ( + "context" + "testing" + + cloudcredentialv1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" + "github.com/stretchr/testify/require" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" + "github.com/grafana/loki/operator/internal/config" + "github.com/grafana/loki/operator/internal/external/k8s/k8sfakes" +) + +func credentialsRequestFakeClient(cr *cloudcredentialv1.CredentialsRequest, lokistack *lokiv1.LokiStack) *k8sfakes.FakeClient { + k := &k8sfakes.FakeClient{} + k.GetStub = func(_ context.Context, name types.NamespacedName, object client.Object, _ ...client.GetOption) error { + switch object.(type) { + case *cloudcredentialv1.CredentialsRequest: + if cr == nil { + return errors.NewNotFound(schema.GroupResource{}, name.Name) + } + k.SetClientObject(object, cr) + case *lokiv1.LokiStack: + if lokistack == nil { + return errors.NewNotFound(schema.GroupResource{}, name.Name) + } + k.SetClientObject(object, lokistack) + } + return nil + } + + return k +} + +func TestCreateCredentialsRequest_CreateNewResource(t *testing.T) { + wantServiceAccountNames := []string{ + "my-stack", + "my-stack-ruler", + } + + lokistack := &lokiv1.LokiStack{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-stack", + Namespace: "ns", + }, + } + + k := credentialsRequestFakeClient(nil, lokistack) + req := ctrl.Request{ + NamespacedName: client.ObjectKey{Name: "my-stack", Namespace: "ns"}, + } + + managedAuth := &config.ManagedAuthConfig{ + AWS: &config.AWSEnvironment{ + RoleARN: "a-role-arn", + }, + } + + err := CreateCredentialsRequest(context.Background(), logger, scheme, managedAuth, k, req) + require.NoError(t, err) + require.Equal(t, 1, k.CreateCallCount()) + + _, obj, _ := k.CreateArgsForCall(0) + credReq, ok := obj.(*cloudcredentialv1.CredentialsRequest) + require.True(t, ok) + + require.Equal(t, wantServiceAccountNames, credReq.Spec.ServiceAccountNames) +} + +func TestCreateCredentialsRequest_CreateNewResourceAzure(t *testing.T) { + wantRegion := "test-region" + + lokistack := &lokiv1.LokiStack{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-stack", + Namespace: "ns", + }, + } + + k := credentialsRequestFakeClient(nil, lokistack) + req := ctrl.Request{ + NamespacedName: client.ObjectKey{Name: "my-stack", Namespace: "ns"}, + } + + managedAuth := &config.ManagedAuthConfig{ + Azure: &config.AzureEnvironment{ + ClientID: "test-client-id", + SubscriptionID: "test-tenant-id", + TenantID: "test-subscription-id", + Region: "test-region", + }, + } + + err := CreateCredentialsRequest(context.Background(), logger, scheme, managedAuth, k, req) + require.NoError(t, err) + + require.Equal(t, 1, k.CreateCallCount()) + _, obj, _ := k.CreateArgsForCall(0) + credReq, ok := obj.(*cloudcredentialv1.CredentialsRequest) + require.True(t, ok) + + providerSpec := &cloudcredentialv1.AzureProviderSpec{} + require.NoError(t, cloudcredentialv1.Codec.DecodeProviderSpec(credReq.Spec.ProviderSpec, providerSpec)) + + require.Equal(t, wantRegion, providerSpec.AzureRegion) +} + +func TestCreateCredentialsRequest_DoNothing_WhenCredentialsRequestExist(t *testing.T) { + req := ctrl.Request{ + NamespacedName: client.ObjectKey{Name: "my-stack", Namespace: "ns"}, + } + + managedAuth := &config.ManagedAuthConfig{ + AWS: &config.AWSEnvironment{ + RoleARN: "a-role-arn", + }, + } + + cr := &cloudcredentialv1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-stack", + Namespace: "ns", + }, + } + lokistack := &lokiv1.LokiStack{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-stack", + Namespace: "ns", + }, + } + + k := credentialsRequestFakeClient(cr, lokistack) + + err := CreateCredentialsRequest(context.Background(), logger, scheme, managedAuth, k, req) + require.NoError(t, err) + require.Equal(t, 2, k.GetCallCount()) + require.Equal(t, 0, k.CreateCallCount()) + require.Equal(t, 1, k.UpdateCallCount()) +} diff --git a/operator/internal/handlers/internal/storage/secrets.go b/operator/internal/handlers/internal/storage/secrets.go index 6b8275d2d28ae..2492eea4d4191 100644 --- a/operator/internal/handlers/internal/storage/secrets.go +++ b/operator/internal/handlers/internal/storage/secrets.go @@ -3,6 +3,7 @@ package storage import ( "context" "crypto/sha1" + "encoding/json" "errors" "fmt" "sort" @@ -32,8 +33,13 @@ var ( errAzureNoCredentials = errors.New("azure storage secret does contain neither account_key or client_id") errAzureMixedCredentials = errors.New("azure storage secret can not contain both account_key and client_id") errAzureManagedIdentityNoOverride = errors.New("when in managed mode, storage secret can not contain credentials") + + errGCPParseCredentialsFile = errors.New("gcp storage secret cannot be parsed from JSON content") + errGCPWrongCredentialSourceFile = errors.New("credential source in secret needs to point to token file") ) +const gcpAccountTypeExternal = "external_account" + func getSecrets(ctx context.Context, k k8s.Client, stack *lokiv1.LokiStack, fg configv1.FeatureGates) (*corev1.Secret, *corev1.Secret, error) { var ( storageSecret corev1.Secret @@ -53,15 +59,7 @@ func getSecrets(ctx context.Context, k k8s.Client, stack *lokiv1.LokiStack, fg c } if fg.OpenShift.ManagedAuthEnv { - secretName, ok := stack.Annotations[storage.AnnotationCredentialsRequestsSecretRef] - if !ok { - return nil, nil, &status.DegradedError{ - Message: "Missing OpenShift cloud credentials request", - Reason: lokiv1.ReasonMissingCredentialsRequest, - Requeue: true, - } - } - + secretName := storage.ManagedCredentialsSecretName(stack.Name) managedAuthCredsKey := client.ObjectKey{Name: secretName, Namespace: stack.Namespace} if err := k.Get(ctx, managedAuthCredsKey, &managedAuthSecret); err != nil { if apierrors.IsNotFound(err) { @@ -94,7 +92,7 @@ func extractSecrets(secretType lokiv1.ObjectStorageSecretType, objStore, managed SharedStore: secretType, } - if fg.OpenShift.ManagedAuthEnabled() { + if fg.OpenShift.ManagedAuthEnv { var managedAuthHash string managedAuthHash, err = hashSecretData(managedAuth) if err != nil { @@ -204,12 +202,7 @@ func validateAzureCredentials(s *corev1.Secret, fg configv1.FeatureGates) (workl tenantID := s.Data[storage.KeyAzureStorageTenantID] subscriptionID := s.Data[storage.KeyAzureStorageSubscriptionID] - if fg.OpenShift.ManagedAuthEnabled() { - region := s.Data[storage.KeyAzureRegion] - if len(region) == 0 { - return false, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyAzureRegion) - } - + if fg.OpenShift.ManagedAuthEnv { if len(accountKey) > 0 || len(clientID) > 0 || len(tenantID) > 0 || len(subscriptionID) > 0 { return false, errAzureManagedIdentityNoOverride } @@ -255,8 +248,36 @@ func extractGCSConfigSecret(s *corev1.Secret) (*storage.GCSStorageConfig, error) return nil, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyGCPServiceAccountKeyFilename) } + credentialsFile := struct { + CredentialsType string `json:"type"` + CredentialsSource struct { + File string `json:"file"` + } `json:"credential_source"` + }{} + + err := json.Unmarshal(keyJSON, &credentialsFile) + if err != nil { + return nil, errGCPParseCredentialsFile + } + + var ( + audience = s.Data[storage.KeyGCPWorkloadIdentityProviderAudience] + isWorkloadIdentity = credentialsFile.CredentialsType == gcpAccountTypeExternal + ) + if isWorkloadIdentity { + if len(audience) == 0 { + return nil, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyGCPWorkloadIdentityProviderAudience) + } + + if credentialsFile.CredentialsSource.File != storage.ServiceAccountTokenFilePath { + return nil, fmt.Errorf("%w: %s", errGCPWrongCredentialSourceFile, storage.ServiceAccountTokenFilePath) + } + } + return &storage.GCSStorageConfig{ - Bucket: string(bucket), + Bucket: string(bucket), + WorkloadIdentity: isWorkloadIdentity, + Audience: string(audience), }, nil } @@ -296,7 +317,7 @@ func extractS3ConfigSecret(s *corev1.Secret, fg configv1.FeatureGates) (*storage ) switch { - case fg.OpenShift.ManagedAuthEnabled(): + case fg.OpenShift.ManagedAuthEnv: cfg.STS = true cfg.Audience = string(audience) // Do not allow users overriding the role arn provided on Loki Operator installation diff --git a/operator/internal/handlers/internal/storage/secrets_test.go b/operator/internal/handlers/internal/storage/secrets_test.go index 94b6ae2e3aaa1..ca3623b718c1b 100644 --- a/operator/internal/handlers/internal/storage/secrets_test.go +++ b/operator/internal/handlers/internal/storage/secrets_test.go @@ -71,11 +71,12 @@ func TestUnknownType(t *testing.T) { func TestAzureExtract(t *testing.T) { type test struct { - name string - secret *corev1.Secret - managedSecret *corev1.Secret - featureGates configv1.FeatureGates - wantError string + name string + secret *corev1.Secret + managedSecret *corev1.Secret + featureGates configv1.FeatureGates + wantError string + wantCredentialMode lokiv1.CredentialMode } table := []test{ { @@ -155,27 +156,6 @@ func TestAzureExtract(t *testing.T) { }, wantError: "missing secret field: subscription_id", }, - { - name: "managed auth - no region", - secret: &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{Name: "test"}, - Data: map[string][]byte{ - "environment": []byte("here"), - "account_name": []byte("test-account-name"), - "container": []byte("this,that"), - }, - }, - managedSecret: &corev1.Secret{ - Data: map[string][]byte{}, - }, - featureGates: configv1.FeatureGates{ - OpenShift: configv1.OpenShiftFeatureGates{ - Enabled: true, - ManagedAuthEnv: true, - }, - }, - wantError: "missing secret field: region", - }, { name: "managed auth - no auth override", secret: &corev1.Secret{ @@ -224,6 +204,7 @@ func TestAzureExtract(t *testing.T) { "account_key": []byte("secret"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, }, { name: "mandatory for workload-identity set", @@ -239,6 +220,7 @@ func TestAzureExtract(t *testing.T) { "region": []byte("test-region"), }, }, + wantCredentialMode: lokiv1.CredentialModeToken, }, { name: "mandatory for managed workload-identity set", @@ -252,7 +234,14 @@ func TestAzureExtract(t *testing.T) { }, }, managedSecret: &corev1.Secret{ - Data: map[string][]byte{}, + ObjectMeta: metav1.ObjectMeta{ + Name: "managed-secret", + }, + Data: map[string][]byte{ + "azure_client_id": []byte("test-client-id"), + "azure_tenant_id": []byte("test-tenant-id"), + "azure_subscription_id": []byte("test-subscription-id"), + }, }, featureGates: configv1.FeatureGates{ OpenShift: configv1.OpenShiftFeatureGates{ @@ -260,6 +249,7 @@ func TestAzureExtract(t *testing.T) { ManagedAuthEnv: true, }, }, + wantCredentialMode: lokiv1.CredentialModeManaged, }, { name: "all set including optional", @@ -273,6 +263,7 @@ func TestAzureExtract(t *testing.T) { "endpoint_suffix": []byte("suffix"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, }, } for _, tst := range table { @@ -285,7 +276,8 @@ func TestAzureExtract(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, opts.SecretName) require.NotEmpty(t, opts.SecretSHA1) - require.Equal(t, opts.SharedStore, lokiv1.ObjectStorageSecretAzure) + require.Equal(t, lokiv1.ObjectStorageSecretAzure, opts.SharedStore) + require.Equal(t, tst.wantCredentialMode, opts.CredentialMode()) } else { require.EqualError(t, err, tst.wantError) } @@ -295,9 +287,10 @@ func TestAzureExtract(t *testing.T) { func TestGCSExtract(t *testing.T) { type test struct { - name string - secret *corev1.Secret - wantError string + name string + secret *corev1.Secret + wantError string + wantCredentialMode lokiv1.CredentialMode } table := []test{ { @@ -314,15 +307,51 @@ func TestGCSExtract(t *testing.T) { }, wantError: "missing secret field: key.json", }, + { + name: "missing audience", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "bucketname": []byte("here"), + "key.json": []byte("{\"type\": \"external_account\"}"), + }, + }, + wantError: "missing secret field: audience", + }, + { + name: "credential_source file no override", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "bucketname": []byte("here"), + "audience": []byte("test"), + "key.json": []byte("{\"type\": \"external_account\", \"credential_source\": {\"file\": \"/custom/path/to/secret/storage/serviceaccount/token\"}}"), + }, + }, + wantError: "credential source in secret needs to point to token file: /var/run/secrets/storage/serviceaccount/token", + }, { name: "all set", secret: &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{Name: "test"}, Data: map[string][]byte{ "bucketname": []byte("here"), - "key.json": []byte("{\"type\": \"SA\"}"), + "key.json": []byte("{\"type\": \"service_account\"}"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, + }, + { + name: "mandatory for workload-identity set", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "bucketname": []byte("here"), + "audience": []byte("test"), + "key.json": []byte("{\"type\": \"external_account\", \"credential_source\": {\"file\": \"/var/run/secrets/storage/serviceaccount/token\"}}"), + }, + }, + wantCredentialMode: lokiv1.CredentialModeToken, }, } for _, tst := range table { @@ -330,9 +359,10 @@ func TestGCSExtract(t *testing.T) { t.Run(tst.name, func(t *testing.T) { t.Parallel() - _, err := extractSecrets(lokiv1.ObjectStorageSecretGCS, tst.secret, nil, configv1.FeatureGates{}) + opts, err := extractSecrets(lokiv1.ObjectStorageSecretGCS, tst.secret, nil, configv1.FeatureGates{}) if tst.wantError == "" { require.NoError(t, err) + require.Equal(t, tst.wantCredentialMode, opts.CredentialMode()) } else { require.EqualError(t, err, tst.wantError) } @@ -342,9 +372,10 @@ func TestGCSExtract(t *testing.T) { func TestS3Extract(t *testing.T) { type test struct { - name string - secret *corev1.Secret - wantError string + name string + secret *corev1.Secret + wantError string + wantCredentialMode lokiv1.CredentialMode } table := []test{ { @@ -422,6 +453,7 @@ func TestS3Extract(t *testing.T) { "sse_kms_key_id": []byte("kms-key-id"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, }, { name: "all set with SSE-KMS with encryption context", @@ -437,6 +469,7 @@ func TestS3Extract(t *testing.T) { "sse_kms_encryption_context": []byte("kms-encryption-ctx"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, }, { name: "all set with SSE-S3", @@ -450,6 +483,7 @@ func TestS3Extract(t *testing.T) { "sse_type": []byte("SSE-S3"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, }, { name: "all set without SSE", @@ -462,6 +496,7 @@ func TestS3Extract(t *testing.T) { "access_key_secret": []byte("secret"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, }, { name: "STS missing region", @@ -484,6 +519,7 @@ func TestS3Extract(t *testing.T) { "region": []byte("here"), }, }, + wantCredentialMode: lokiv1.CredentialModeToken, }, { name: "STS all set", @@ -496,6 +532,7 @@ func TestS3Extract(t *testing.T) { "audience": []byte("audience"), }, }, + wantCredentialMode: lokiv1.CredentialModeToken, }, } for _, tst := range table { @@ -508,7 +545,8 @@ func TestS3Extract(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, opts.SecretName) require.NotEmpty(t, opts.SecretSHA1) - require.Equal(t, opts.SharedStore, lokiv1.ObjectStorageSecretS3) + require.Equal(t, lokiv1.ObjectStorageSecretS3, opts.SharedStore) + require.Equal(t, tst.wantCredentialMode, opts.CredentialMode()) } else { require.EqualError(t, err, tst.wantError) } @@ -582,10 +620,11 @@ func TestS3Extract_WithOpenShiftManagedAuth(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, opts.SecretName) require.NotEmpty(t, opts.SecretSHA1) - require.Equal(t, opts.SharedStore, lokiv1.ObjectStorageSecretS3) + require.Equal(t, lokiv1.ObjectStorageSecretS3, opts.SharedStore) require.True(t, opts.S3.STS) - require.Equal(t, opts.OpenShift.CloudCredentials.SecretName, tst.managedAuthSecret.Name) + require.Equal(t, tst.managedAuthSecret.Name, opts.OpenShift.CloudCredentials.SecretName) require.NotEmpty(t, opts.OpenShift.CloudCredentials.SHA1) + require.Equal(t, lokiv1.CredentialModeManaged, opts.CredentialMode()) } else { require.EqualError(t, err, tst.wantError) } @@ -733,7 +772,8 @@ func TestSwiftExtract(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, opts.SecretName) require.NotEmpty(t, opts.SecretSHA1) - require.Equal(t, opts.SharedStore, lokiv1.ObjectStorageSecretSwift) + require.Equal(t, lokiv1.ObjectStorageSecretSwift, opts.SharedStore) + require.Equal(t, lokiv1.CredentialModeStatic, opts.CredentialMode()) } else { require.EqualError(t, err, tst.wantError) } @@ -806,7 +846,8 @@ func TestAlibabaCloudExtract(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, opts.SecretName) require.NotEmpty(t, opts.SecretSHA1) - require.Equal(t, opts.SharedStore, lokiv1.ObjectStorageSecretAlibabaCloud) + require.Equal(t, lokiv1.ObjectStorageSecretAlibabaCloud, opts.SharedStore) + require.Equal(t, lokiv1.CredentialModeStatic, opts.CredentialMode()) } else { require.EqualError(t, err, tst.wantError) } diff --git a/operator/internal/handlers/internal/storage/storage_test.go b/operator/internal/handlers/internal/storage/storage_test.go index 9e041bf99a23a..45f5b0f2865ba 100644 --- a/operator/internal/handlers/internal/storage/storage_test.go +++ b/operator/internal/handlers/internal/storage/storage_test.go @@ -17,7 +17,6 @@ import ( configv1 "github.com/grafana/loki/operator/apis/config/v1" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" "github.com/grafana/loki/operator/internal/external/k8s/k8sfakes" - "github.com/grafana/loki/operator/internal/manifests/storage" "github.com/grafana/loki/operator/internal/status" ) @@ -135,77 +134,6 @@ func TestBuildOptions_WhenMissingSecret_SetDegraded(t *testing.T) { require.Equal(t, degradedErr, err) } -func TestBuildOptions_WhenMissingCloudCredentialsRequest_SetDegraded(t *testing.T) { - sw := &k8sfakes.FakeStatusWriter{} - k := &k8sfakes.FakeClient{} - r := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "my-stack", - Namespace: "some-ns", - }, - } - - fg := configv1.FeatureGates{ - OpenShift: configv1.OpenShiftFeatureGates{ - ManagedAuthEnv: true, - }, - } - - degradedErr := &status.DegradedError{ - Message: "Missing OpenShift cloud credentials request", - Reason: lokiv1.ReasonMissingCredentialsRequest, - Requeue: true, - } - - stack := &lokiv1.LokiStack{ - TypeMeta: metav1.TypeMeta{ - Kind: "LokiStack", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "my-stack", - Namespace: "some-ns", - UID: "b23f9a38-9672-499f-8c29-15ede74d3ece", - Annotations: map[string]string{}, - }, - Spec: lokiv1.LokiStackSpec{ - Size: lokiv1.SizeOneXExtraSmall, - Storage: lokiv1.ObjectStorageSpec{ - Schemas: []lokiv1.ObjectStorageSchema{ - { - Version: lokiv1.ObjectStorageSchemaV11, - EffectiveDate: "2020-10-11", - }, - }, - Secret: lokiv1.ObjectStorageSecretSpec{ - Name: defaultManagedAuthSecret.Name, - Type: lokiv1.ObjectStorageSecretS3, - }, - }, - }, - } - - k.GetStub = func(_ context.Context, name types.NamespacedName, object client.Object, _ ...client.GetOption) error { - _, isLokiStack := object.(*lokiv1.LokiStack) - if r.Name == name.Name && r.Namespace == name.Namespace && isLokiStack { - k.SetClientObject(object, stack) - return nil - } - if name.Name == defaultManagedAuthSecret.Name { - k.SetClientObject(object, &defaultManagedAuthSecret) - return nil - } - return apierrors.NewNotFound(schema.GroupResource{}, "something is not found") - } - - k.StatusStub = func() client.StatusWriter { return sw } - - _, err := BuildOptions(context.TODO(), k, stack, fg) - - // make sure error is returned - require.Error(t, err) - require.Equal(t, degradedErr, err) -} - func TestBuildOptions_WhenMissingCloudCredentialsSecret_SetDegraded(t *testing.T) { sw := &k8sfakes.FakeStatusWriter{} k := &k8sfakes.FakeClient{} @@ -236,9 +164,6 @@ func TestBuildOptions_WhenMissingCloudCredentialsSecret_SetDegraded(t *testing.T Name: "my-stack", Namespace: "some-ns", UID: "b23f9a38-9672-499f-8c29-15ede74d3ece", - Annotations: map[string]string{ - storage.AnnotationCredentialsRequestsSecretRef: "my-stack-aws-creds", - }, }, Spec: lokiv1.LokiStackSpec{ Size: lokiv1.SizeOneXExtraSmall, diff --git a/operator/internal/handlers/lokistack_create_or_update.go b/operator/internal/handlers/lokistack_create_or_update.go index 2f78f75d02c5b..47e7a309bf8b9 100644 --- a/operator/internal/handlers/lokistack_create_or_update.go +++ b/operator/internal/handlers/lokistack_create_or_update.go @@ -36,7 +36,7 @@ func CreateOrUpdateLokiStack( k k8s.Client, s *runtime.Scheme, fg configv1.FeatureGates, -) error { +) (lokiv1.CredentialMode, error) { ll := log.WithValues("lokistack", req.NamespacedName, "event", "createOrUpdate") var stack lokiv1.LokiStack @@ -44,9 +44,9 @@ func CreateOrUpdateLokiStack( if apierrors.IsNotFound(err) { // maybe the user deleted it before we could react? Either way this isn't an issue ll.Error(err, "could not find the requested loki stack", "name", req.NamespacedName) - return nil + return "", nil } - return kverrors.Wrap(err, "failed to lookup lokistack", "name", req.NamespacedName) + return "", kverrors.Wrap(err, "failed to lookup lokistack", "name", req.NamespacedName) } img := os.Getenv(manifests.EnvRelatedImageLoki) @@ -61,21 +61,21 @@ func CreateOrUpdateLokiStack( objStore, err := storage.BuildOptions(ctx, k, &stack, fg) if err != nil { - return err + return "", err } baseDomain, tenants, err := gateway.BuildOptions(ctx, ll, k, &stack, fg) if err != nil { - return err + return "", err } if err = rules.Cleanup(ctx, ll, k, &stack); err != nil { - return err + return "", err } alertingRules, recordingRules, ruler, ocpOptions, err := rules.BuildOptions(ctx, ll, k, &stack) if err != nil { - return err + return "", err } certRotationRequiredAt := "" @@ -86,7 +86,7 @@ func CreateOrUpdateLokiStack( timeoutConfig, err := manifests.NewTimeoutConfig(stack.Spec.Limits) if err != nil { ll.Error(err, "failed to parse query timeout") - return &status.DegradedError{ + return "", &status.DegradedError{ Message: fmt.Sprintf("Error parsing query timeout: %s", err), Reason: lokiv1.ReasonQueryTimeoutInvalid, Requeue: false, @@ -116,13 +116,13 @@ func CreateOrUpdateLokiStack( if optErr := manifests.ApplyDefaultSettings(&opts); optErr != nil { ll.Error(optErr, "failed to conform options to build settings") - return optErr + return "", optErr } if fg.LokiStackGateway { if optErr := manifests.ApplyGatewayDefaultOptions(&opts); optErr != nil { ll.Error(optErr, "failed to apply defaults options to gateway settings") - return optErr + return "", optErr } } @@ -140,13 +140,13 @@ func CreateOrUpdateLokiStack( if optErr := manifests.ApplyTLSSettings(&opts, tlsProfile); optErr != nil { ll.Error(optErr, "failed to conform options to tls profile settings") - return optErr + return "", optErr } objects, err := manifests.BuildAll(opts) if err != nil { ll.Error(err, "failed to build manifests") - return err + return "", err } ll.Info("manifests built", "count", len(objects)) @@ -158,7 +158,7 @@ func CreateOrUpdateLokiStack( // a user possibly being unable to read logs. if err := status.SetStorageSchemaStatus(ctx, k, req, objStore.Schemas); err != nil { ll.Error(err, "failed to set storage schema status") - return err + return "", err } var errCount int32 @@ -182,7 +182,7 @@ func CreateOrUpdateLokiStack( depAnnotations, err := dependentAnnotations(ctx, k, obj) if err != nil { l.Error(err, "failed to set dependent annotations") - return err + return "", err } desired := obj.DeepCopyObject().(client.Object) @@ -205,7 +205,7 @@ func CreateOrUpdateLokiStack( } if errCount > 0 { - return kverrors.New("failed to configure lokistack resources", "name", req.NamespacedName) + return "", kverrors.New("failed to configure lokistack resources", "name", req.NamespacedName) } // 1x.demo is used only for development, so the metrics will not @@ -214,7 +214,7 @@ func CreateOrUpdateLokiStack( metrics.Collect(&opts.Stack, opts.Name) } - return nil + return objStore.CredentialMode(), nil } func dependentAnnotations(ctx context.Context, k k8s.Client, obj client.Object) (map[string]string, error) { diff --git a/operator/internal/handlers/lokistack_create_or_update_test.go b/operator/internal/handlers/lokistack_create_or_update_test.go index 4ba9a9affc369..bef5ffc9efb70 100644 --- a/operator/internal/handlers/lokistack_create_or_update_test.go +++ b/operator/internal/handlers/lokistack_create_or_update_test.go @@ -108,7 +108,7 @@ func TestCreateOrUpdateLokiStack_WhenGetReturnsNotFound_DoesNotError(t *testing. k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) require.NoError(t, err) // make sure create was NOT called because the Get failed @@ -132,7 +132,7 @@ func TestCreateOrUpdateLokiStack_WhenGetReturnsAnErrorOtherThanNotFound_ReturnsT k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) require.Equal(t, badRequestErr, errors.Unwrap(err)) @@ -219,7 +219,7 @@ func TestCreateOrUpdateLokiStack_SetsNamespaceOnAllObjects(t *testing.T) { k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) require.NoError(t, err) // make sure create was called @@ -327,7 +327,7 @@ func TestCreateOrUpdateLokiStack_SetsOwnerRefOnAllObjects(t *testing.T) { k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) require.NoError(t, err) // make sure create was called @@ -387,7 +387,7 @@ func TestCreateOrUpdateLokiStack_WhenSetControllerRefInvalid_ContinueWithOtherOb k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) // make sure error is returned to re-trigger reconciliation require.Error(t, err) @@ -490,7 +490,7 @@ func TestCreateOrUpdateLokiStack_WhenGetReturnsNoError_UpdateObjects(t *testing. k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) require.NoError(t, err) // make sure create not called @@ -556,7 +556,7 @@ func TestCreateOrUpdateLokiStack_WhenCreateReturnsError_ContinueWithOtherObjects k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) // make sure error is returned to re-trigger reconciliation require.Error(t, err) @@ -663,7 +663,7 @@ func TestCreateOrUpdateLokiStack_WhenUpdateReturnsError_ContinueWithOtherObjects k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) // make sure error is returned to re-trigger reconciliation require.Error(t, err) @@ -734,7 +734,7 @@ func TestCreateOrUpdateLokiStack_WhenInvalidQueryTimeout_SetDegraded(t *testing. k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) // make sure error is returned require.Error(t, err) diff --git a/operator/internal/manifests/mutate.go b/operator/internal/manifests/mutate.go index 27421750bf2cc..63308bb9ceb62 100644 --- a/operator/internal/manifests/mutate.go +++ b/operator/internal/manifests/mutate.go @@ -6,6 +6,7 @@ import ( "github.com/ViaQ/logerr/v2/kverrors" "github.com/imdario/mergo" routev1 "github.com/openshift/api/route/v1" + cloudcredentialv1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -123,6 +124,11 @@ func MutateFuncFor(existing, desired client.Object, depAnnotations map[string]st wantRt := desired.(*routev1.Route) mutateRoute(rt, wantRt) + case *cloudcredentialv1.CredentialsRequest: + cr := existing.(*cloudcredentialv1.CredentialsRequest) + wantCr := desired.(*cloudcredentialv1.CredentialsRequest) + mutateCredentialRequest(cr, wantCr) + case *monitoringv1.PrometheusRule: pr := existing.(*monitoringv1.PrometheusRule) wantPr := desired.(*monitoringv1.PrometheusRule) @@ -213,6 +219,10 @@ func mutateRoute(existing, desired *routev1.Route) { existing.Spec = desired.Spec } +func mutateCredentialRequest(existing, desired *cloudcredentialv1.CredentialsRequest) { + existing.Spec = desired.Spec +} + func mutatePrometheusRule(existing, desired *monitoringv1.PrometheusRule) { existing.Annotations = desired.Annotations existing.Labels = desired.Labels diff --git a/operator/internal/manifests/openshift/credentialsrequest.go b/operator/internal/manifests/openshift/credentialsrequest.go index 2962b61d0d1ef..0c0a19adc98d3 100644 --- a/operator/internal/manifests/openshift/credentialsrequest.go +++ b/operator/internal/manifests/openshift/credentialsrequest.go @@ -1,10 +1,6 @@ package openshift import ( - "fmt" - "os" - "path" - "github.com/ViaQ/logerr/v2/kverrors" cloudcredentialv1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" corev1 "k8s.io/api/core/v1" @@ -12,32 +8,28 @@ import ( "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/grafana/loki/operator/internal/config" "github.com/grafana/loki/operator/internal/manifests/storage" ) -const ( - ccoNamespace = "openshift-cloud-credential-operator" -) +const azureFallbackRegion = "centralus" func BuildCredentialsRequest(opts Options) (*cloudcredentialv1.CredentialsRequest, error) { stack := client.ObjectKey{Name: opts.BuildOpts.LokiStackName, Namespace: opts.BuildOpts.LokiStackNamespace} - providerSpec, secretName, err := encodeProviderSpec(opts.BuildOpts.LokiStackName, opts.ManagedAuthEnv) + providerSpec, err := encodeProviderSpec(opts.ManagedAuth) if err != nil { return nil, kverrors.Wrap(err, "failed encoding credentialsrequest provider spec") } return &cloudcredentialv1.CredentialsRequest{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s", stack.Namespace, secretName), - Namespace: ccoNamespace, - Annotations: map[string]string{ - AnnotationCredentialsRequestOwner: stack.String(), - }, + Name: stack.Name, + Namespace: stack.Namespace, }, Spec: cloudcredentialv1.CredentialsRequestSpec{ SecretRef: corev1.ObjectReference{ - Name: secretName, + Name: storage.ManagedCredentialsSecretName(stack.Name), Namespace: stack.Namespace, }, ProviderSpec: providerSpec, @@ -45,16 +37,13 @@ func BuildCredentialsRequest(opts Options) (*cloudcredentialv1.CredentialsReques stack.Name, rulerServiceAccountName(opts), }, - CloudTokenPath: path.Join(storage.AWSTokenVolumeDirectory, "token"), + CloudTokenPath: storage.ServiceAccountTokenFilePath, }, }, nil } -func encodeProviderSpec(stackName string, env *ManagedAuthEnv) (*runtime.RawExtension, string, error) { - var ( - spec runtime.Object - secretName string - ) +func encodeProviderSpec(env *config.ManagedAuthConfig) (*runtime.RawExtension, error) { + var spec runtime.Object switch { case env.AWS != nil: @@ -73,9 +62,17 @@ func encodeProviderSpec(stackName string, env *ManagedAuthEnv) (*runtime.RawExte }, STSIAMRoleARN: env.AWS.RoleARN, } - secretName = fmt.Sprintf("%s-aws-creds", stackName) case env.Azure != nil: azure := env.Azure + if azure.Region == "" { + // The OpenShift Console currently does not provide a UI to configure the Azure Region + // for an operator using managed credentials. Because the CredentialsRequest is currently + // not used to create a Managed Identity, the region is actually never used. + // We default to the US region if nothing is set, so that the CredentialsRequest can be + // created. This should have no effect on the generated credential secret. + // The region can be configured by setting an environment variable on the operator Subscription. + azure.Region = azureFallbackRegion + } spec = &cloudcredentialv1.AzureProviderSpec{ Permissions: []string{ @@ -101,38 +98,8 @@ func encodeProviderSpec(stackName string, env *ManagedAuthEnv) (*runtime.RawExte AzureSubscriptionID: azure.SubscriptionID, AzureTenantID: azure.TenantID, } - secretName = fmt.Sprintf("%s-azure-creds", stackName) } encodedSpec, err := cloudcredentialv1.Codec.EncodeProviderSpec(spec.DeepCopyObject()) - return encodedSpec, secretName, err -} - -func DiscoverManagedAuthEnv() *ManagedAuthEnv { - // AWS - roleARN := os.Getenv("ROLEARN") - - // Azure - clientID := os.Getenv("CLIENTID") - tenantID := os.Getenv("TENANTID") - subscriptionID := os.Getenv("SUBSCRIPTIONID") - - switch { - case roleARN != "": - return &ManagedAuthEnv{ - AWS: &AWSSTSEnv{ - RoleARN: roleARN, - }, - } - case clientID != "" && tenantID != "" && subscriptionID != "": - return &ManagedAuthEnv{ - Azure: &AzureWIFEnvironment{ - ClientID: clientID, - SubscriptionID: subscriptionID, - TenantID: tenantID, - }, - } - } - - return nil + return encodedSpec, err } diff --git a/operator/internal/manifests/openshift/credentialsrequest_test.go b/operator/internal/manifests/openshift/credentialsrequest_test.go index 21b193c8c7d7e..36c6e2331f7e5 100644 --- a/operator/internal/manifests/openshift/credentialsrequest_test.go +++ b/operator/internal/manifests/openshift/credentialsrequest_test.go @@ -1,40 +1,22 @@ package openshift import ( - "strings" "testing" "github.com/stretchr/testify/require" + "github.com/grafana/loki/operator/internal/config" "github.com/grafana/loki/operator/internal/manifests/storage" ) -func TestBuildCredentialsRequest_HasOwnerAnnotation(t *testing.T) { - opts := Options{ - BuildOpts: BuildOptions{ - LokiStackName: "a-stack", - LokiStackNamespace: "ns", - }, - ManagedAuthEnv: &ManagedAuthEnv{ - AWS: &AWSSTSEnv{ - RoleARN: "role-arn", - }, - }, - } - - credReq, err := BuildCredentialsRequest(opts) - require.NoError(t, err) - require.Contains(t, credReq.Annotations, AnnotationCredentialsRequestOwner) -} - func TestBuildCredentialsRequest_HasSecretRef_MatchingLokiStackNamespace(t *testing.T) { opts := Options{ BuildOpts: BuildOptions{ LokiStackName: "a-stack", LokiStackNamespace: "ns", }, - ManagedAuthEnv: &ManagedAuthEnv{ - AWS: &AWSSTSEnv{ + ManagedAuth: &config.ManagedAuthConfig{ + AWS: &config.AWSEnvironment{ RoleARN: "role-arn", }, }, @@ -51,8 +33,8 @@ func TestBuildCredentialsRequest_HasServiceAccountNames_ContainsAllLokiStackServ LokiStackName: "a-stack", LokiStackNamespace: "ns", }, - ManagedAuthEnv: &ManagedAuthEnv{ - AWS: &AWSSTSEnv{ + ManagedAuth: &config.ManagedAuthConfig{ + AWS: &config.AWSEnvironment{ RoleARN: "role-arn", }, }, @@ -70,8 +52,8 @@ func TestBuildCredentialsRequest_CloudTokenPath_MatchinOpenShiftSADirectory(t *t LokiStackName: "a-stack", LokiStackNamespace: "ns", }, - ManagedAuthEnv: &ManagedAuthEnv{ - AWS: &AWSSTSEnv{ + ManagedAuth: &config.ManagedAuthConfig{ + AWS: &config.AWSEnvironment{ RoleARN: "role-arn", }, }, @@ -79,7 +61,7 @@ func TestBuildCredentialsRequest_CloudTokenPath_MatchinOpenShiftSADirectory(t *t credReq, err := BuildCredentialsRequest(opts) require.NoError(t, err) - require.True(t, strings.HasPrefix(credReq.Spec.CloudTokenPath, storage.AWSTokenVolumeDirectory)) + require.Equal(t, storage.ServiceAccountTokenFilePath, credReq.Spec.CloudTokenPath) } func TestBuildCredentialsRequest_FollowsNamingConventions(t *testing.T) { @@ -96,14 +78,14 @@ func TestBuildCredentialsRequest_FollowsNamingConventions(t *testing.T) { LokiStackName: "a-stack", LokiStackNamespace: "ns", }, - ManagedAuthEnv: &ManagedAuthEnv{ - AWS: &AWSSTSEnv{ + ManagedAuth: &config.ManagedAuthConfig{ + AWS: &config.AWSEnvironment{ RoleARN: "role-arn", }, }, }, - wantName: "ns-a-stack-aws-creds", - wantSecretName: "a-stack-aws-creds", + wantName: "a-stack", + wantSecretName: "a-stack-managed-credentials", }, } for _, test := range tests { diff --git a/operator/internal/manifests/openshift/options.go b/operator/internal/manifests/openshift/options.go index 9bc2e4faae36e..572db7fe64453 100644 --- a/operator/internal/manifests/openshift/options.go +++ b/operator/internal/manifests/openshift/options.go @@ -6,6 +6,7 @@ import ( "time" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" + "github.com/grafana/loki/operator/internal/config" ) // Options is the set of internal template options for rendering @@ -14,7 +15,7 @@ type Options struct { BuildOpts BuildOptions Authentication []AuthenticationSpec Authorization AuthorizationSpec - ManagedAuthEnv *ManagedAuthEnv + ManagedAuth *config.ManagedAuthConfig } // AuthenticationSpec describes the authentication specification @@ -55,22 +56,6 @@ type TenantData struct { CookieSecret string } -type AWSSTSEnv struct { - RoleARN string -} - -type AzureWIFEnvironment struct { - ClientID string - SubscriptionID string - TenantID string - Region string -} - -type ManagedAuthEnv struct { - AWS *AWSSTSEnv - Azure *AzureWIFEnvironment -} - // NewOptions returns an openshift options struct. func NewOptions( stackName, stackNamespace string, diff --git a/operator/internal/manifests/openshift/var.go b/operator/internal/manifests/openshift/var.go index 84928c48d7e28..5e3ac6300e3eb 100644 --- a/operator/internal/manifests/openshift/var.go +++ b/operator/internal/manifests/openshift/var.go @@ -48,8 +48,6 @@ var ( MonitoringSVCUserWorkload = "alertmanager-user-workload" MonitoringUserWorkloadNS = "openshift-user-workload-monitoring" - - AnnotationCredentialsRequestOwner = "loki.grafana.com/credentialsrequest-owner" ) func authorizerRbacName(componentName string) string { diff --git a/operator/internal/manifests/storage/configure.go b/operator/internal/manifests/storage/configure.go index f3fd86ebbaa1c..ede098425323d 100644 --- a/operator/internal/manifests/storage/configure.go +++ b/operator/internal/manifests/storage/configure.go @@ -13,6 +13,18 @@ import ( lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" ) +var ( + managedAuthConfigVolumeMount = corev1.VolumeMount{ + Name: managedAuthConfigVolumeName, + MountPath: managedAuthConfigDirectory, + } + + saTokenVolumeMount = corev1.VolumeMount{ + Name: saTokenVolumeName, + MountPath: saTokenVolumeMountPath, + } +) + // ConfigureDeployment appends additional pod volumes and container env vars, args, volume mounts // based on the object storage type. Currently supported amendments: // - All: Ensure object storage secret mounted and auth projected as env vars. @@ -127,11 +139,11 @@ func ensureObjectStoreCredentials(p *corev1.PodSpec, opts Options) corev1.PodSpe if managedAuthEnabled(opts) { container.Env = append(container.Env, managedAuthCredentials(opts)...) volumes = append(volumes, saTokenVolume(opts)) - container.VolumeMounts = append(container.VolumeMounts, saTokenVolumeMount(opts)) + container.VolumeMounts = append(container.VolumeMounts, saTokenVolumeMount) - if opts.OpenShift.ManagedAuthEnabled() { - volumes = append(volumes, managedAuthVolume(opts)) - container.VolumeMounts = append(container.VolumeMounts, managedAuthVolumeMount(opts)) + if opts.OpenShift.ManagedAuthEnabled() && opts.S3 != nil && opts.S3.STS { + volumes = append(volumes, managedAuthConfigVolume(opts)) + container.VolumeMounts = append(container.VolumeMounts, managedAuthConfigVolumeMount) } } else { container.Env = append(container.Env, staticAuthCredentials(opts)...) @@ -183,13 +195,13 @@ func managedAuthCredentials(opts Options) []corev1.EnvVar { case lokiv1.ObjectStorageSecretS3: if opts.OpenShift.ManagedAuthEnabled() { return []corev1.EnvVar{ - envVarFromValue(EnvAWSCredentialsFile, path.Join(managedAuthSecretDirectory, KeyAWSCredentialsFilename)), + envVarFromValue(EnvAWSCredentialsFile, path.Join(managedAuthConfigDirectory, KeyAWSCredentialsFilename)), envVarFromValue(EnvAWSSdkLoadConfig, "true"), } } else { return []corev1.EnvVar{ envVarFromSecret(EnvAWSRoleArn, opts.SecretName, KeyAWSRoleArn), - envVarFromValue(EnvAWSWebIdentityTokenFile, path.Join(AWSTokenVolumeDirectory, "token")), + envVarFromValue(EnvAWSWebIdentityTokenFile, ServiceAccountTokenFilePath), } } case lokiv1.ObjectStorageSecretAzure: @@ -199,7 +211,7 @@ func managedAuthCredentials(opts Options) []corev1.EnvVar { envVarFromSecret(EnvAzureClientID, opts.OpenShift.CloudCredentials.SecretName, azureManagedCredentialKeyClientID), envVarFromSecret(EnvAzureTenantID, opts.OpenShift.CloudCredentials.SecretName, azureManagedCredentialKeyTenantID), envVarFromSecret(EnvAzureSubscriptionID, opts.OpenShift.CloudCredentials.SecretName, azureManagedCredentialKeySubscriptionID), - envVarFromValue(EnvAzureFederatedTokenFile, path.Join(azureTokenVolumeDirectory, "token")), + envVarFromValue(EnvAzureFederatedTokenFile, ServiceAccountTokenFilePath), } } @@ -208,7 +220,11 @@ func managedAuthCredentials(opts Options) []corev1.EnvVar { envVarFromSecret(EnvAzureClientID, opts.SecretName, KeyAzureStorageClientID), envVarFromSecret(EnvAzureTenantID, opts.SecretName, KeyAzureStorageTenantID), envVarFromSecret(EnvAzureSubscriptionID, opts.SecretName, KeyAzureStorageSubscriptionID), - envVarFromValue(EnvAzureFederatedTokenFile, path.Join(azureTokenVolumeDirectory, "token")), + envVarFromValue(EnvAzureFederatedTokenFile, ServiceAccountTokenFilePath), + } + case lokiv1.ObjectStorageSecretGCS: + return []corev1.EnvVar{ + envVarFromValue(EnvGoogleApplicationCredentials, path.Join(secretDirectory, KeyGCPServiceAccountKeyFilename)), } default: return []corev1.EnvVar{} @@ -290,25 +306,13 @@ func managedAuthEnabled(opts Options) bool { return opts.S3 != nil && opts.S3.STS case lokiv1.ObjectStorageSecretAzure: return opts.Azure != nil && opts.Azure.WorkloadIdentity + case lokiv1.ObjectStorageSecretGCS: + return opts.GCS != nil && opts.GCS.WorkloadIdentity default: return false } } -func saTokenVolumeMount(opts Options) corev1.VolumeMount { - var tokenPath string - switch opts.SharedStore { - case lokiv1.ObjectStorageSecretS3: - tokenPath = AWSTokenVolumeDirectory - case lokiv1.ObjectStorageSecretAzure: - tokenPath = azureTokenVolumeDirectory - } - return corev1.VolumeMount{ - Name: saTokenVolumeName, - MountPath: tokenPath, - } -} - func saTokenVolume(opts Options) corev1.Volume { var audience string storeType := opts.SharedStore @@ -323,6 +327,8 @@ func saTokenVolume(opts Options) corev1.Volume { if opts.Azure.Audience != "" { audience = opts.Azure.Audience } + case lokiv1.ObjectStorageSecretGCS: + audience = opts.GCS.Audience } return corev1.Volume{ Name: saTokenVolumeName, @@ -342,16 +348,9 @@ func saTokenVolume(opts Options) corev1.Volume { } } -func managedAuthVolumeMount(opts Options) corev1.VolumeMount { - return corev1.VolumeMount{ - Name: opts.OpenShift.CloudCredentials.SecretName, - MountPath: managedAuthSecretDirectory, - } -} - -func managedAuthVolume(opts Options) corev1.Volume { +func managedAuthConfigVolume(opts Options) corev1.Volume { return corev1.Volume{ - Name: opts.OpenShift.CloudCredentials.SecretName, + Name: managedAuthConfigVolumeName, VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ SecretName: opts.OpenShift.CloudCredentials.SecretName, diff --git a/operator/internal/manifests/storage/configure_test.go b/operator/internal/manifests/storage/configure_test.go index 03e22682f4028..2cd7b079a4b4a 100644 --- a/operator/internal/manifests/storage/configure_test.go +++ b/operator/internal/manifests/storage/configure_test.go @@ -206,7 +206,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/azure/serviceaccount", + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -256,7 +256,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, { Name: EnvAzureFederatedTokenFile, - Value: "/var/run/secrets/azure/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -331,7 +331,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/azure/serviceaccount", + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -381,7 +381,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, { Name: EnvAzureFederatedTokenFile, - Value: "/var/run/secrets/azure/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -462,11 +462,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/azure/serviceaccount", - }, - { - Name: "cloud-credentials", - MountPath: managedAuthSecretDirectory, + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -516,7 +512,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, { Name: EnvAzureFederatedTokenFile, - Value: "/var/run/secrets/azure/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -546,11 +542,59 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, }, + }, + }, + }, + }, + }, + }, + { + desc: "object storage GCS", + opts: Options{ + SecretName: "test", + SharedStore: lokiv1.ObjectStorageSecretGCS, + }, + dpl: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ { - Name: "cloud-credentials", + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + }, + Env: []corev1.EnvVar{ + { + Name: EnvGoogleApplicationCredentials, + Value: "/etc/storage/secrets/key.json", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: "cloud-credentials", + SecretName: "test", }, }, }, @@ -561,10 +605,14 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, { - desc: "object storage GCS", + desc: "object storage GCS with Workload Identity", opts: Options{ SecretName: "test", SharedStore: lokiv1.ObjectStorageSecretGCS, + GCS: &GCSStorageConfig{ + Audience: "test", + WorkloadIdentity: true, + }, }, dpl: &appsv1.Deployment{ Spec: appsv1.DeploymentSpec{ @@ -592,6 +640,11 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { ReadOnly: false, MountPath: "/etc/storage/secrets", }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: saTokenVolumeMountPath, + }, }, Env: []corev1.EnvVar{ { @@ -610,6 +663,22 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: "test", + ExpirationSeconds: ptr.To[int64](3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, }, }, }, @@ -729,7 +798,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/aws/serviceaccount", + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -746,7 +815,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, { Name: "AWS_WEB_IDENTITY_TOKEN_FILE", - Value: "/var/run/secrets/aws/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -827,13 +896,9 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/aws/serviceaccount", - }, - { - Name: "cloud-credentials", - ReadOnly: false, - MountPath: "/etc/storage/managed-auth", + MountPath: saTokenVolumeMountPath, }, + managedAuthConfigVolumeMount, }, Env: []corev1.EnvVar{ { @@ -873,7 +938,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, { - Name: "cloud-credentials", + Name: managedAuthConfigVolumeName, VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ SecretName: "cloud-credentials", @@ -1259,7 +1324,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/azure/serviceaccount", + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -1309,7 +1374,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, { Name: EnvAzureFederatedTokenFile, - Value: "/var/run/secrets/azure/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -1384,7 +1449,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/azure/serviceaccount", + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -1434,7 +1499,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, { Name: EnvAzureFederatedTokenFile, - Value: "/var/run/secrets/azure/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -1515,11 +1580,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/azure/serviceaccount", - }, - { - Name: "cloud-credentials", - MountPath: managedAuthSecretDirectory, + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -1569,7 +1630,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, { Name: EnvAzureFederatedTokenFile, - Value: "/var/run/secrets/azure/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -1599,11 +1660,59 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, }, }, + }, + }, + }, + }, + }, + }, + { + desc: "object storage GCS", + opts: Options{ + SecretName: "test", + SharedStore: lokiv1.ObjectStorageSecretGCS, + }, + sts: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ { - Name: "cloud-credentials", + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + }, + Env: []corev1.EnvVar{ + { + Name: EnvGoogleApplicationCredentials, + Value: "/etc/storage/secrets/key.json", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: "cloud-credentials", + SecretName: "test", }, }, }, @@ -1614,10 +1723,14 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, }, { - desc: "object storage GCS", + desc: "object storage GCS with Workload Identity", opts: Options{ SecretName: "test", SharedStore: lokiv1.ObjectStorageSecretGCS, + GCS: &GCSStorageConfig{ + Audience: "test", + WorkloadIdentity: true, + }, }, sts: &appsv1.StatefulSet{ Spec: appsv1.StatefulSetSpec{ @@ -1645,6 +1758,11 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { ReadOnly: false, MountPath: "/etc/storage/secrets", }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: saTokenVolumeMountPath, + }, }, Env: []corev1.EnvVar{ { @@ -1663,6 +1781,22 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, }, }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: "test", + ExpirationSeconds: ptr.To[int64](3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, }, }, }, @@ -1788,13 +1922,9 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/aws/serviceaccount", - }, - { - Name: "cloud-credentials", - ReadOnly: false, - MountPath: "/etc/storage/managed-auth", + MountPath: saTokenVolumeMountPath, }, + managedAuthConfigVolumeMount, }, Env: []corev1.EnvVar{ { @@ -1834,7 +1964,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, }, { - Name: "cloud-credentials", + Name: managedAuthConfigVolumeName, VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ SecretName: "cloud-credentials", diff --git a/operator/internal/manifests/storage/options.go b/operator/internal/manifests/storage/options.go index 4c62ce7513755..56e2b8e870df1 100644 --- a/operator/internal/manifests/storage/options.go +++ b/operator/internal/manifests/storage/options.go @@ -23,6 +23,40 @@ type Options struct { OpenShift OpenShiftOptions } +// CredentialMode returns which mode is used by the current storage configuration. +// This defaults to CredentialModeStatic, but can be CredentialModeToken +// or CredentialModeManaged depending on the object storage provide, the provided +// secret and whether the operator is running in a managed-auth cluster. +func (o Options) CredentialMode() lokiv1.CredentialMode { + if o.Azure != nil { + if o.OpenShift.ManagedAuthEnabled() { + return lokiv1.CredentialModeManaged + } + + if o.Azure.WorkloadIdentity { + return lokiv1.CredentialModeToken + } + } + + if o.GCS != nil { + if o.GCS.WorkloadIdentity { + return lokiv1.CredentialModeToken + } + } + + if o.S3 != nil { + if o.OpenShift.ManagedAuthEnabled() { + return lokiv1.CredentialModeManaged + } + + if o.S3.STS { + return lokiv1.CredentialModeToken + } + } + + return lokiv1.CredentialModeStatic +} + // AzureStorageConfig for Azure storage config type AzureStorageConfig struct { Env string @@ -34,7 +68,9 @@ type AzureStorageConfig struct { // GCSStorageConfig for GCS storage config type GCSStorageConfig struct { - Bucket string + Bucket string + Audience string + WorkloadIdentity bool } // S3StorageConfig for S3 storage config diff --git a/operator/internal/manifests/storage/var.go b/operator/internal/manifests/storage/var.go index 418fb27152bd3..1f236406bdd09 100644 --- a/operator/internal/manifests/storage/var.go +++ b/operator/internal/manifests/storage/var.go @@ -1,5 +1,7 @@ package storage +import "fmt" + const ( // EnvAlibabaCloudAccessKeyID is the environment variable to specify the AlibabaCloud client id to access S3. EnvAlibabaCloudAccessKeyID = "ALIBABA_CLOUD_ACCESS_KEY_ID" @@ -86,11 +88,11 @@ const ( KeyAzureStorageEndpointSuffix = "endpoint_suffix" // KeyAzureEnvironmentName is the secret data key for the Azure cloud environment name. KeyAzureEnvironmentName = "environment" - // KeyAzureRegion is the secret data key for storing the Azure cloud region. - KeyAzureRegion = "region" // KeyAzureAudience is the secret data key for customizing the audience used for the ServiceAccount token. KeyAzureAudience = "audience" + // KeyGCPWorkloadIdentityProviderAudience is the secret data key for the GCP Workload Identity Provider audience. + KeyGCPWorkloadIdentityProviderAudience = "audience" // KeyGCPStorageBucketName is the secret data key for the GCS bucket name. KeyGCPStorageBucketName = "bucketname" // KeyGCPServiceAccountKeyFilename is the service account key filename containing the Google authentication credentials. @@ -125,24 +127,29 @@ const ( // KeySwiftUsername is the secret data key for the OpenStack Swift password. KeySwiftUsername = "username" - saTokenVolumeK8sDirectory = "/var/run/secrets/kubernetes.io/serviceaccount" - saTokenVolumeName = "bound-sa-token" - saTokenExpiration int64 = 3600 + saTokenVolumeName = "bound-sa-token" + saTokenExpiration int64 = 3600 + saTokenVolumeMountPath = "/var/run/secrets/storage/serviceaccount" + + ServiceAccountTokenFilePath = saTokenVolumeMountPath + "/token" + + secretDirectory = "/etc/storage/secrets" + storageTLSVolume = "storage-tls" + caDirectory = "/etc/storage/ca" - secretDirectory = "/etc/storage/secrets" - managedAuthSecretDirectory = "/etc/storage/managed-auth" - storageTLSVolume = "storage-tls" - caDirectory = "/etc/storage/ca" + managedAuthConfigVolumeName = "managed-auth-config" + managedAuthConfigDirectory = "/etc/storage/managed-auth" - awsDefaultAudience = "sts.amazonaws.com" - AWSTokenVolumeDirectory = "/var/run/secrets/aws/serviceaccount" + awsDefaultAudience = "sts.amazonaws.com" - azureDefaultAudience = "api://AzureADTokenExchange" - azureTokenVolumeDirectory = "/var/run/secrets/azure/serviceaccount" + azureDefaultAudience = "api://AzureADTokenExchange" azureManagedCredentialKeyClientID = "azure_client_id" azureManagedCredentialKeyTenantID = "azure_tenant_id" azureManagedCredentialKeySubscriptionID = "azure_subscription_id" - - AnnotationCredentialsRequestsSecretRef = "loki.grafana.com/credentials-request-secret-ref" ) + +// ManagedCredentialsSecretName returns the name of the secret holding the managed credentials. +func ManagedCredentialsSecretName(stackName string) string { + return fmt.Sprintf("%s-managed-credentials", stackName) +} diff --git a/operator/internal/status/status.go b/operator/internal/status/status.go index 281a167355c37..c544695d3d2ea 100644 --- a/operator/internal/status/status.go +++ b/operator/internal/status/status.go @@ -17,7 +17,7 @@ import ( // Refresh executes an aggregate update of the LokiStack Status struct, i.e. // - It recreates the Status.Components pod status map per component. // - It sets the appropriate Status.Condition to true that matches the pod status maps. -func Refresh(ctx context.Context, k k8s.Client, req ctrl.Request, now time.Time, degradedErr *DegradedError) error { +func Refresh(ctx context.Context, k k8s.Client, req ctrl.Request, now time.Time, credentialMode lokiv1.CredentialMode, degradedErr *DegradedError) error { var stack lokiv1.LokiStack if err := k.Get(ctx, req.NamespacedName, &stack); err != nil { if apierrors.IsNotFound(err) { @@ -45,6 +45,7 @@ func Refresh(ctx context.Context, k k8s.Client, req ctrl.Request, now time.Time, statusUpdater := func(stack *lokiv1.LokiStack) { stack.Status.Components = *cs stack.Status.Conditions = mergeConditions(stack.Status.Conditions, activeConditions, metaTime) + stack.Status.Storage.CredentialMode = credentialMode } statusUpdater(&stack) diff --git a/operator/internal/status/status_test.go b/operator/internal/status/status_test.go index c7895cbe8020e..32ef892ed1bde 100644 --- a/operator/internal/status/status_test.go +++ b/operator/internal/status/status_test.go @@ -54,7 +54,9 @@ func TestRefreshSuccess(t *testing.T) { Gateway: map[corev1.PodPhase][]string{corev1.PodRunning: {"lokistack-gateway-pod-0"}}, Ruler: map[corev1.PodPhase][]string{corev1.PodRunning: {"ruler-pod-0"}}, }, - Storage: lokiv1.LokiStackStorageStatus{}, + Storage: lokiv1.LokiStackStorageStatus{ + CredentialMode: lokiv1.CredentialModeStatic, + }, Conditions: []metav1.Condition{ { Type: string(lokiv1.ConditionReady), @@ -68,7 +70,7 @@ func TestRefreshSuccess(t *testing.T) { k, sw := setupListClient(t, stack, componentPods) - err := Refresh(context.Background(), k, req, now, nil) + err := Refresh(context.Background(), k, req, now, lokiv1.CredentialModeStatic, nil) require.NoError(t, err) require.Equal(t, 1, k.GetCallCount()) @@ -130,7 +132,7 @@ func TestRefreshSuccess_ZoneAwarePendingPod(t *testing.T) { return nil } - err := Refresh(context.Background(), k, req, now, nil) + err := Refresh(context.Background(), k, req, now, lokiv1.CredentialModeStatic, nil) require.NoError(t, err) require.Equal(t, 1, k.GetCallCount()) diff --git a/operator/main.go b/operator/main.go index a88a857bcee44..e212c268cbad8 100644 --- a/operator/main.go +++ b/operator/main.go @@ -21,7 +21,6 @@ import ( lokiv1beta1 "github.com/grafana/loki/operator/apis/loki/v1beta1" lokictrl "github.com/grafana/loki/operator/controllers/loki" "github.com/grafana/loki/operator/internal/config" - manifestsocp "github.com/grafana/loki/operator/internal/manifests/openshift" "github.com/grafana/loki/operator/internal/metrics" "github.com/grafana/loki/operator/internal/operator" "github.com/grafana/loki/operator/internal/validation" @@ -60,12 +59,16 @@ func main() { var err error - ctrlCfg, options, err := config.LoadConfig(scheme, configFile) + ctrlCfg, managedAuth, options, err := config.LoadConfig(scheme, configFile) if err != nil { logger.Error(err, "failed to load operator configuration") os.Exit(1) } + if managedAuth != nil { + logger.Info("Discovered OpenShift Cluster within a managed authentication environment") + } + if ctrlCfg.Gates.LokiStackAlerts && !ctrlCfg.Gates.ServiceMonitors { logger.Error(kverrors.New("LokiStackAlerts flag requires ServiceMonitors"), "") os.Exit(1) @@ -95,16 +98,12 @@ func main() { os.Exit(1) } - if ctrlCfg.Gates.OpenShift.Enabled && manifestsocp.DiscoverManagedAuthEnv() != nil { - logger.Info("discovered OpenShift Cluster within a managed authentication environment") - ctrlCfg.Gates.OpenShift.ManagedAuthEnv = true - } - if err = (&lokictrl.LokiStackReconciler{ Client: mgr.GetClient(), Log: logger.WithName("controllers").WithName("lokistack"), Scheme: mgr.GetScheme(), FeatureGates: ctrlCfg.Gates, + AuthConfig: managedAuth, }).SetupWithManager(mgr); err != nil { logger.Error(err, "unable to create controller", "controller", "lokistack") os.Exit(1) @@ -129,17 +128,6 @@ func main() { } } - if ctrlCfg.Gates.OpenShift.ManagedAuthEnabled() { - if err = (&lokictrl.CredentialsRequestsReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Log: logger.WithName("controllers").WithName("lokistack-credentialsrequest"), - }).SetupWithManager(mgr); err != nil { - logger.Error(err, "unable to create controller", "controller", "lokistack-credentialsrequest") - os.Exit(1) - } - } - if ctrlCfg.Gates.LokiStackWebhook { v := &validation.LokiStackValidator{} if err = v.SetupWebhookWithManager(mgr); err != nil { diff --git a/pkg/bloomcompactor/batch.go b/pkg/bloomcompactor/batch.go new file mode 100644 index 0000000000000..bed0834a86b74 --- /dev/null +++ b/pkg/bloomcompactor/batch.go @@ -0,0 +1,361 @@ +package bloomcompactor + +import ( + "context" + "io" + "math" + "time" + + "github.com/grafana/dskit/multierror" + "golang.org/x/exp/slices" + + "github.com/grafana/loki/pkg/chunkenc" + "github.com/grafana/loki/pkg/logproto" + logql_log "github.com/grafana/loki/pkg/logql/log" + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/chunk" + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" +) + +type Fetcher[A, B any] interface { + Fetch(ctx context.Context, inputs []A) ([]B, error) +} + +type FetchFunc[A, B any] func(ctx context.Context, inputs []A) ([]B, error) + +func (f FetchFunc[A, B]) Fetch(ctx context.Context, inputs []A) ([]B, error) { + return f(ctx, inputs) +} + +// batchedLoader implements `v1.Iterator[C]` in batches +type batchedLoader[A, B, C any] struct { + metrics *Metrics + batchSize int + ctx context.Context + fetchers []Fetcher[A, B] + work [][]A + + mapper func(B) (C, error) + cur C + batch []B + err error +} + +const batchedLoaderDefaultBatchSize = 50 + +func newBatchedLoader[A, B, C any]( + ctx context.Context, + fetchers []Fetcher[A, B], + inputs [][]A, + mapper func(B) (C, error), + batchSize int, +) *batchedLoader[A, B, C] { + return &batchedLoader[A, B, C]{ + batchSize: max(batchSize, 1), + ctx: ctx, + fetchers: fetchers, + work: inputs, + mapper: mapper, + } +} + +func (b *batchedLoader[A, B, C]) Next() bool { + + // iterate work until we have non-zero length batch + for len(b.batch) == 0 { + + // empty batch + no work remaining = we're done + if len(b.work) == 0 { + return false + } + + // setup next batch + next := b.work[0] + batchSize := min(b.batchSize, len(next)) + toFetch := next[:batchSize] + fetcher := b.fetchers[0] + + // update work + b.work[0] = b.work[0][batchSize:] + if len(b.work[0]) == 0 { + // if we've exhausted work from this set of inputs, + // set pointer to next set of inputs + // and their respective fetcher + b.work = b.work[1:] + b.fetchers = b.fetchers[1:] + } + + // there was no work in this batch; continue (should not happen) + if len(toFetch) == 0 { + continue + } + + b.batch, b.err = fetcher.Fetch(b.ctx, toFetch) + // error fetching, short-circuit iteration + if b.err != nil { + return false + } + } + + return b.prepNext() +} + +func (b *batchedLoader[_, B, C]) prepNext() bool { + b.cur, b.err = b.mapper(b.batch[0]) + b.batch = b.batch[1:] + return b.err == nil +} + +func (b *batchedLoader[_, _, C]) At() C { + return b.cur +} + +func (b *batchedLoader[_, _, _]) Err() error { + return b.err +} + +// to ensure memory is bounded while loading chunks +// TODO(owen-d): testware +func newBatchedChunkLoader( + ctx context.Context, + fetchers []Fetcher[chunk.Chunk, chunk.Chunk], + inputs [][]chunk.Chunk, + metrics *Metrics, + batchSize int, +) *batchedLoader[chunk.Chunk, chunk.Chunk, v1.ChunkRefWithIter] { + + mapper := func(c chunk.Chunk) (v1.ChunkRefWithIter, error) { + chk := c.Data.(*chunkenc.Facade).LokiChunk() + metrics.chunkSize.Observe(float64(chk.UncompressedSize())) + itr, err := chk.Iterator( + ctx, + time.Unix(0, 0), + time.Unix(0, math.MaxInt64), + logproto.FORWARD, + logql_log.NewNoopPipeline().ForStream(c.Metric), + ) + + if err != nil { + return v1.ChunkRefWithIter{}, err + } + + return v1.ChunkRefWithIter{ + Ref: v1.ChunkRef{ + Start: c.From, + End: c.Through, + Checksum: c.Checksum, + }, + Itr: itr, + }, nil + } + return newBatchedLoader(ctx, fetchers, inputs, mapper, batchSize) +} + +func newBatchedBlockLoader( + ctx context.Context, + fetcher Fetcher[bloomshipper.BlockRef, *bloomshipper.CloseableBlockQuerier], + blocks []bloomshipper.BlockRef, + batchSize int, +) *batchedLoader[bloomshipper.BlockRef, *bloomshipper.CloseableBlockQuerier, *bloomshipper.CloseableBlockQuerier] { + + fetchers := []Fetcher[bloomshipper.BlockRef, *bloomshipper.CloseableBlockQuerier]{fetcher} + inputs := [][]bloomshipper.BlockRef{blocks} + mapper := func(a *bloomshipper.CloseableBlockQuerier) (*bloomshipper.CloseableBlockQuerier, error) { + return a, nil + } + + return newBatchedLoader(ctx, fetchers, inputs, mapper, batchSize) +} + +// compiler checks +var _ v1.Iterator[*v1.SeriesWithBloom] = &blockLoadingIter{} +var _ v1.CloseableIterator[*v1.SeriesWithBloom] = &blockLoadingIter{} +var _ v1.ResettableIterator[*v1.SeriesWithBloom] = &blockLoadingIter{} + +// TODO(chaudum): testware +func newBlockLoadingIter(ctx context.Context, blocks []bloomshipper.BlockRef, fetcher FetchFunc[bloomshipper.BlockRef, *bloomshipper.CloseableBlockQuerier], batchSize int) *blockLoadingIter { + + return &blockLoadingIter{ + ctx: ctx, + fetcher: fetcher, + inputs: blocks, + batchSize: batchSize, + loaded: make(map[io.Closer]struct{}), + } +} + +type blockLoadingIter struct { + // constructor arguments + ctx context.Context + fetcher Fetcher[bloomshipper.BlockRef, *bloomshipper.CloseableBlockQuerier] + inputs []bloomshipper.BlockRef + overlapping v1.Iterator[[]bloomshipper.BlockRef] + batchSize int + // optional arguments + filter func(*bloomshipper.CloseableBlockQuerier) bool + // internals + initialized bool + err error + iter v1.Iterator[*v1.SeriesWithBloom] + loader *batchedLoader[bloomshipper.BlockRef, *bloomshipper.CloseableBlockQuerier, *bloomshipper.CloseableBlockQuerier] + loaded map[io.Closer]struct{} +} + +// At implements v1.Iterator. +func (i *blockLoadingIter) At() *v1.SeriesWithBloom { + if !i.initialized { + panic("iterator not initialized") + } + return i.iter.At() +} + +// Err implements v1.Iterator. +func (i *blockLoadingIter) Err() error { + if !i.initialized { + panic("iterator not initialized") + } + if i.err != nil { + return i.err + } + return i.iter.Err() +} + +// Next implements v1.Iterator. +func (i *blockLoadingIter) Next() bool { + i.init() + // next from current batch + hasNext := i.iter.Next() + if !hasNext && !i.loadNext() { + return false + } + // next from next batch + return i.iter.Next() +} + +// Close implements v1.CloseableIterator. +func (i *blockLoadingIter) Close() error { + var err multierror.MultiError + for k := range i.loaded { + err.Add(k.Close()) + } + return err.Err() +} + +// Reset implements v1.ResettableIterator. +// TODO(chaudum) Cache already fetched blocks to to avoid the overhead of +// creating the reader. +func (i *blockLoadingIter) Reset() error { + if !i.initialized { + return nil + } + // close loaded queriers + err := i.Close() + i.initialized = false + clear(i.loaded) + return err +} + +func (i *blockLoadingIter) init() { + if i.initialized { + return + } + + // group overlapping blocks + i.overlapping = overlappingBlocksIter(i.inputs) + + // set "match all" filter function if not present + if i.filter == nil { + i.filter = func(cbq *bloomshipper.CloseableBlockQuerier) bool { return true } + } + + // load first batch + i.loadNext() + + // done + i.initialized = true +} + +func (i *blockLoadingIter) Filter(filter func(*bloomshipper.CloseableBlockQuerier) bool) { + if i.initialized { + panic("iterator already initialized") + } + i.filter = filter +} + +func (i *blockLoadingIter) loadNext() bool { + // check if there are more overlapping groups to load + if !i.overlapping.Next() { + i.iter = v1.NewEmptyIter[*v1.SeriesWithBloom]() + return false + } + + if i.overlapping.Err() != nil { + i.err = i.overlapping.Err() + return false + } + + blockRefs := i.overlapping.At() + + loader := newBatchedBlockLoader(i.ctx, i.fetcher, blockRefs, i.batchSize) + filtered := v1.NewFilterIter[*bloomshipper.CloseableBlockQuerier](loader, i.filter) + + iters := make([]v1.PeekingIterator[*v1.SeriesWithBloom], 0, len(blockRefs)) + for filtered.Next() && filtered.Err() == nil { + bq := loader.At() + if _, ok := i.loaded[bq]; !ok { + i.loaded[bq] = struct{}{} + } + iter, _ := bq.SeriesIter() + iters = append(iters, iter) + } + + if loader.Err() != nil { + i.err = loader.Err() + return false + } + + if len(iters) == 0 { + i.iter = v1.NewEmptyIter[*v1.SeriesWithBloom]() + return true + } + + // Turn the list of blocks into a single iterator that returns the next series + mergedBlocks := v1.NewHeapIterForSeriesWithBloom(iters...) + // two overlapping blocks can conceivably have the same series, so we need to dedupe, + // preferring the one with the most chunks already indexed since we'll have + // to add fewer chunks to the bloom + i.iter = v1.NewDedupingIter[*v1.SeriesWithBloom, *v1.SeriesWithBloom]( + func(a, b *v1.SeriesWithBloom) bool { + return a.Series.Fingerprint == b.Series.Fingerprint + }, + v1.Identity[*v1.SeriesWithBloom], + func(a, b *v1.SeriesWithBloom) *v1.SeriesWithBloom { + if len(a.Series.Chunks) > len(b.Series.Chunks) { + return a + } + return b + }, + v1.NewPeekingIter(mergedBlocks), + ) + return true +} + +func overlappingBlocksIter(inputs []bloomshipper.BlockRef) v1.Iterator[[]bloomshipper.BlockRef] { + // can we assume sorted blocks? + peekIter := v1.NewPeekingIter(v1.NewSliceIter(inputs)) + + return v1.NewDedupingIter[bloomshipper.BlockRef, []bloomshipper.BlockRef]( + func(a bloomshipper.BlockRef, b []bloomshipper.BlockRef) bool { + minFp := b[0].Bounds.Min + maxFp := slices.MaxFunc(b, func(a, b bloomshipper.BlockRef) int { return int(a.Bounds.Max - b.Bounds.Max) }).Bounds.Max + return a.Bounds.Overlaps(v1.NewBounds(minFp, maxFp)) + }, + func(a bloomshipper.BlockRef) []bloomshipper.BlockRef { + return []bloomshipper.BlockRef{a} + }, + func(a bloomshipper.BlockRef, b []bloomshipper.BlockRef) []bloomshipper.BlockRef { + return append(b, a) + }, + peekIter, + ) +} diff --git a/pkg/bloomcompactor/batch_test.go b/pkg/bloomcompactor/batch_test.go new file mode 100644 index 0000000000000..bd2cb3378cfba --- /dev/null +++ b/pkg/bloomcompactor/batch_test.go @@ -0,0 +1,210 @@ +package bloomcompactor + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/require" + + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" +) + +func TestBatchedLoader(t *testing.T) { + t.Parallel() + + errMapper := func(i int) (int, error) { + return 0, errors.New("bzzt") + } + successMapper := func(i int) (int, error) { + return i, nil + } + + expired, cancel := context.WithCancel(context.Background()) + cancel() + + for _, tc := range []struct { + desc string + ctx context.Context + batchSize int + mapper func(int) (int, error) + err bool + inputs [][]int + exp []int + }{ + { + desc: "OneBatch", + ctx: context.Background(), + batchSize: 2, + mapper: successMapper, + err: false, + inputs: [][]int{{0, 1}}, + exp: []int{0, 1}, + }, + { + desc: "ZeroBatchSizeStillWorks", + ctx: context.Background(), + batchSize: 0, + mapper: successMapper, + err: false, + inputs: [][]int{{0, 1}}, + exp: []int{0, 1}, + }, + { + desc: "OneBatchLessThanFull", + ctx: context.Background(), + batchSize: 2, + mapper: successMapper, + err: false, + inputs: [][]int{{0}}, + exp: []int{0}, + }, + { + desc: "TwoBatches", + ctx: context.Background(), + batchSize: 2, + mapper: successMapper, + err: false, + inputs: [][]int{{0, 1, 2, 3}}, + exp: []int{0, 1, 2, 3}, + }, + { + desc: "MultipleBatchesMultipleLoaders", + ctx: context.Background(), + batchSize: 2, + mapper: successMapper, + err: false, + inputs: [][]int{{0, 1}, {2}, {3, 4, 5}}, + exp: []int{0, 1, 2, 3, 4, 5}, + }, + { + desc: "HandlesEmptyInputs", + ctx: context.Background(), + batchSize: 2, + mapper: successMapper, + err: false, + inputs: [][]int{{0, 1, 2, 3}, nil, {4}}, + exp: []int{0, 1, 2, 3, 4}, + }, + { + desc: "Timeout", + ctx: expired, + batchSize: 2, + mapper: successMapper, + err: true, + inputs: [][]int{{0}}, + }, + { + desc: "MappingFailure", + ctx: context.Background(), + batchSize: 2, + mapper: errMapper, + err: true, + inputs: [][]int{{0}}, + }, + } { + tc := tc + t.Run(tc.desc, func(t *testing.T) { + fetchers := make([]Fetcher[int, int], 0, len(tc.inputs)) + for range tc.inputs { + fetchers = append( + fetchers, + FetchFunc[int, int](func(ctx context.Context, xs []int) ([]int, error) { + if ctx.Err() != nil { + return nil, ctx.Err() + } + return xs, nil + }), + ) + } + + loader := newBatchedLoader[int, int, int]( + tc.ctx, + fetchers, + tc.inputs, + tc.mapper, + tc.batchSize, + ) + + got, err := v1.Collect[int](loader) + if tc.err { + require.Error(t, err) + return + } + require.NoError(t, err) + require.Equal(t, tc.exp, got) + + }) + } +} + +func TestOverlappingBlocksIter(t *testing.T) { + t.Parallel() + for _, tc := range []struct { + desc string + inp []bloomshipper.BlockRef + exp int // expected groups + }{ + { + desc: "Empty", + inp: []bloomshipper.BlockRef{}, + exp: 0, + }, + { + desc: "NonOverlapping", + inp: []bloomshipper.BlockRef{ + genBlockRef(0x0000, 0x00ff), + genBlockRef(0x0100, 0x01ff), + genBlockRef(0x0200, 0x02ff), + }, + exp: 3, + }, + { + desc: "AllOverlapping", + inp: []bloomshipper.BlockRef{ + genBlockRef(0x0000, 0x02ff), // |-----------| + genBlockRef(0x0100, 0x01ff), // |---| + genBlockRef(0x0200, 0x02ff), // |---| + }, + exp: 1, + }, + { + desc: "PartialOverlapping", + inp: []bloomshipper.BlockRef{ + genBlockRef(0x0000, 0x01ff), // group 1 |-------| + genBlockRef(0x0100, 0x02ff), // group 1 |-------| + genBlockRef(0x0200, 0x03ff), // group 1 |-------| + genBlockRef(0x0200, 0x02ff), // group 1 |---| + }, + exp: 1, + }, + { + desc: "PartialOverlapping", + inp: []bloomshipper.BlockRef{ + genBlockRef(0x0000, 0x01ff), // group 1 |-------| + genBlockRef(0x0100, 0x02ff), // group 1 |-------| + genBlockRef(0x0100, 0x01ff), // group 1 |---| + genBlockRef(0x0300, 0x03ff), // group 2 |---| + genBlockRef(0x0310, 0x03ff), // group 2 |-| + }, + exp: 2, + }, + } { + tc := tc + t.Run(tc.desc, func(t *testing.T) { + it := overlappingBlocksIter(tc.inp) + var overlapping [][]bloomshipper.BlockRef + var i int + for it.Next() && it.Err() == nil { + require.NotNil(t, it.At()) + overlapping = append(overlapping, it.At()) + for _, r := range it.At() { + t.Log(i, r) + } + i++ + } + require.Equal(t, tc.exp, len(overlapping)) + }) + } +} diff --git a/pkg/bloomcompactor/bloomcompactor.go b/pkg/bloomcompactor/bloomcompactor.go index cf3b3fafcb6d1..3bb1c815e8295 100644 --- a/pkg/bloomcompactor/bloomcompactor.go +++ b/pkg/bloomcompactor/bloomcompactor.go @@ -2,23 +2,31 @@ package bloomcompactor import ( "context" - "fmt" + "sync" "time" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/backoff" + "github.com/grafana/dskit/concurrency" "github.com/grafana/dskit/multierror" + "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/grafana/loki/pkg/bloomutils" - "github.com/grafana/loki/pkg/compactor" + "github.com/grafana/loki/pkg/storage" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/storage/stores" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" - "github.com/grafana/loki/pkg/util" + util_ring "github.com/grafana/loki/pkg/util/ring" +) + +var ( + RingOp = ring.NewOp([]ring.InstanceState{ring.JOINING, ring.ACTIVE}, nil) ) /* @@ -33,41 +41,75 @@ Bloom-compactor regularly runs to check for changes in meta.jsons and runs compa type Compactor struct { services.Service - cfg Config - logger log.Logger - limits Limits + cfg Config + schemaCfg config.SchemaConfig + logger log.Logger + limits Limits - // temporary workaround until store has implemented read/write shipper interface - store bloomshipper.Store + tsdbStore TSDBStore + // TODO(owen-d): ShardingStrategy + controller *SimpleBloomController - sharding ShardingStrategy + // temporary workaround until bloomStore has implemented read/write shipper interface + bloomStore bloomshipper.Store - metrics *metrics + sharding util_ring.TenantSharding + + metrics *Metrics btMetrics *v1.Metrics } func New( cfg Config, - store bloomshipper.Store, - sharding ShardingStrategy, + schemaCfg config.SchemaConfig, + storeCfg storage.Config, + clientMetrics storage.ClientMetrics, + fetcherProvider stores.ChunkFetcherProvider, + sharding util_ring.TenantSharding, limits Limits, logger log.Logger, r prometheus.Registerer, ) (*Compactor, error) { c := &Compactor{ - cfg: cfg, - store: store, - logger: logger, - sharding: sharding, - limits: limits, + cfg: cfg, + schemaCfg: schemaCfg, + logger: logger, + sharding: sharding, + limits: limits, + } + + tsdbStore, err := NewTSDBStores(schemaCfg, storeCfg, clientMetrics) + if err != nil { + return nil, errors.Wrap(err, "failed to create TSDB store") } + c.tsdbStore = tsdbStore + + // TODO(owen-d): export bloomstore as a dependency that can be reused by the compactor & gateway rather that + bloomStore, err := bloomshipper.NewBloomStore(schemaCfg.Configs, storeCfg, clientMetrics, nil, nil, logger) + if err != nil { + return nil, errors.Wrap(err, "failed to create bloom store") + } + c.bloomStore = bloomStore // initialize metrics - c.btMetrics = v1.NewMetrics(prometheus.WrapRegistererWithPrefix("loki_bloom_tokenizer", r)) - c.metrics = newMetrics(r) - c.metrics.compactionRunInterval.Set(cfg.CompactionInterval.Seconds()) - c.Service = services.NewBasicService(c.starting, c.running, c.stopping) + c.btMetrics = v1.NewMetrics(prometheus.WrapRegistererWithPrefix("loki_bloom_tokenizer_", r)) + c.metrics = NewMetrics(r, c.btMetrics) + + chunkLoader := NewStoreChunkLoader( + fetcherProvider, + c.metrics, + ) + c.controller = NewSimpleBloomController( + c.tsdbStore, + c.bloomStore, + chunkLoader, + c.limits, + c.metrics, + c.logger, + ) + + c.Service = services.NewBasicService(c.starting, c.running, c.stopping) return c, nil } @@ -76,192 +118,253 @@ func (c *Compactor) starting(_ context.Context) (err error) { return err } +func (c *Compactor) stopping(_ error) error { + c.metrics.compactorRunning.Set(0) + return nil +} + func (c *Compactor) running(ctx context.Context) error { - // Run an initial compaction before starting the interval. - if err := c.runCompaction(ctx); err != nil { - level.Error(c.logger).Log("msg", "failed to run compaction", "err", err) + // run once at beginning + if err := c.runOne(ctx); err != nil { + return err } - ticker := time.NewTicker(util.DurationWithJitter(c.cfg.CompactionInterval, 0.05)) + ticker := time.NewTicker(c.cfg.CompactionInterval) defer ticker.Stop() - for { select { + case <-ctx.Done(): + return ctx.Err() + case start := <-ticker.C: - c.metrics.compactionRunsStarted.Inc() - if err := c.runCompaction(ctx); err != nil { - c.metrics.compactionRunsCompleted.WithLabelValues(statusFailure).Inc() - c.metrics.compactionRunTime.WithLabelValues(statusFailure).Observe(time.Since(start).Seconds()) - level.Error(c.logger).Log("msg", "failed to run compaction", "err", err) - continue + c.metrics.compactionsStarted.Inc() + if err := c.runOne(ctx); err != nil { + level.Error(c.logger).Log("msg", "compaction iteration failed", "err", err, "duration", time.Since(start)) + c.metrics.compactionCompleted.WithLabelValues(statusFailure).Inc() + c.metrics.compactionTime.WithLabelValues(statusFailure).Observe(time.Since(start).Seconds()) + return err } - c.metrics.compactionRunsCompleted.WithLabelValues(statusSuccess).Inc() - c.metrics.compactionRunTime.WithLabelValues(statusSuccess).Observe(time.Since(start).Seconds()) - case <-ctx.Done(): + level.Info(c.logger).Log("msg", "compaction iteration completed", "duration", time.Since(start)) + c.metrics.compactionCompleted.WithLabelValues(statusSuccess).Inc() + c.metrics.compactionTime.WithLabelValues(statusSuccess).Observe(time.Since(start).Seconds()) + } + } +} + +func runWithRetries( + ctx context.Context, + minBackoff, maxBackoff time.Duration, + maxRetries int, + f func(ctx context.Context) error, +) error { + var lastErr error + + retries := backoff.New(ctx, backoff.Config{ + MinBackoff: minBackoff, + MaxBackoff: maxBackoff, + MaxRetries: maxRetries, + }) + + for retries.Ongoing() { + lastErr = f(ctx) + if lastErr == nil { return nil } + + retries.Wait() } + + return lastErr } -func (c *Compactor) stopping(_ error) error { - c.metrics.compactorRunning.Set(0) - return nil +type tenantTable struct { + tenant string + table config.DayTable + ownershipRange v1.FingerprintBounds } -func (c *Compactor) runCompaction(ctx context.Context) error { - var tables []string - // TODO(owen-d): resolve tables +func (c *Compactor) tenants(ctx context.Context, table config.DayTable) (v1.Iterator[string], error) { + tenants, err := c.tsdbStore.UsersForPeriod(ctx, table) + if err != nil { + return nil, errors.Wrap(err, "getting tenants") + } - // process most recent tables first - tablesIntervals := getIntervalsForTables(tables) - compactor.SortTablesByRange(tables) + return v1.NewSliceIter(tenants), nil +} - // TODO(owen-d): parallelize at the bottom level, not the top level. - // Can dispatch to a queue & wait. - for _, table := range tables { - logger := log.With(c.logger, "table", table) - err := c.compactTable(ctx, logger, table, tablesIntervals[table]) - if err != nil { - level.Error(logger).Log("msg", "failed to compact table", "err", err) - return errors.Wrapf(err, "failed to compact table %s", table) - } +// ownsTenant returns the ownership range for the tenant, if the compactor owns the tenant, and an error. +func (c *Compactor) ownsTenant(tenant string) (v1.FingerprintBounds, bool, error) { + tenantRing, owned := c.sharding.OwnsTenant(tenant) + if !owned { + return v1.FingerprintBounds{}, false, nil } - return nil -} -func (c *Compactor) compactTable(ctx context.Context, logger log.Logger, tableName string, tableInterval model.Interval) error { - // Ensure the context has not been canceled (ie. compactor shutdown has been triggered). - if err := ctx.Err(); err != nil { - return fmt.Errorf("interrupting compaction of table: %w", err) + rs, err := tenantRing.GetAllHealthy(RingOp) + if err != nil { + return v1.FingerprintBounds{}, false, errors.Wrap(err, "getting ring healthy instances") + } - var tenants []string + keyRange, err := bloomutils.KeyRangeForInstance(c.cfg.Ring.InstanceID, rs.Instances, bloomutils.Uint64Range) + if err != nil { + return v1.FingerprintBounds{}, false, errors.Wrap(err, "getting instance token range") + } + return v1.NewBounds(model.Fingerprint(keyRange.Min), model.Fingerprint(keyRange.Max)), true, nil +} - level.Info(logger).Log("msg", "discovered tenants from bucket", "users", len(tenants)) - return c.compactUsers(ctx, logger, tableName, tableInterval, tenants) +// runs a single round of compaction for all relevant tenants and tables +func (c *Compactor) runOne(ctx context.Context) error { + var workersErr error + var wg sync.WaitGroup + ch := make(chan tenantTable) + wg.Add(1) + go func() { + workersErr = c.runWorkers(ctx, ch) + wg.Done() + }() + + err := c.loadWork(ctx, ch) + + wg.Wait() + return multierror.New(workersErr, err, ctx.Err()).Err() } -func (c *Compactor) compactUsers(ctx context.Context, logger log.Logger, tableName string, tableInterval model.Interval, tenants []string) error { - // Keep track of tenants owned by this shard, so that we can delete the local files for all other users. - errs := multierror.New() - ownedTenants := make(map[string]struct{}, len(tenants)) - for _, tenant := range tenants { - tenantLogger := log.With(logger, "tenant", tenant) +func (c *Compactor) tables(ts time.Time) *dayRangeIterator { + // adjust the minimum by one to make it inclusive, which is more intuitive + // for a configuration variable + adjustedMin := min(c.cfg.MinTableCompactionPeriod - 1) + minCompactionPeriod := time.Duration(adjustedMin) * config.ObjectStorageIndexRequiredPeriod + maxCompactionPeriod := time.Duration(c.cfg.MaxTableCompactionPeriod) * config.ObjectStorageIndexRequiredPeriod - // Ensure the context has not been canceled (ie. compactor shutdown has been triggered). - if err := ctx.Err(); err != nil { - return fmt.Errorf("interrupting compaction of tenants: %w", err) - } + from := ts.Add(-maxCompactionPeriod).UnixNano() / int64(config.ObjectStorageIndexRequiredPeriod) * int64(config.ObjectStorageIndexRequiredPeriod) + through := ts.Add(-minCompactionPeriod).UnixNano() / int64(config.ObjectStorageIndexRequiredPeriod) * int64(config.ObjectStorageIndexRequiredPeriod) - // Skip tenant if compaction is not enabled - if !c.limits.BloomCompactorEnabled(tenant) { - level.Info(tenantLogger).Log("msg", "compaction disabled for tenant. Skipping.") - continue - } + fromDay := config.NewDayTime(model.TimeFromUnixNano(from)) + throughDay := config.NewDayTime(model.TimeFromUnixNano(through)) + return newDayRangeIterator(fromDay, throughDay, c.schemaCfg) +} - // Skip this table if it is too old for the tenant limits. - now := model.Now() - tableMaxAge := c.limits.BloomCompactorMaxTableAge(tenant) - if tableMaxAge > 0 && tableInterval.Start.Before(now.Add(-tableMaxAge)) { - level.Debug(tenantLogger).Log("msg", "skipping tenant because table is too old", "table-max-age", tableMaxAge, "table-start", tableInterval.Start, "now", now) - continue - } +func (c *Compactor) loadWork(ctx context.Context, ch chan<- tenantTable) error { + tables := c.tables(time.Now()) + + for tables.Next() && tables.Err() == nil && ctx.Err() == nil { + table := tables.At() - // Ensure the tenant ID belongs to our shard. - if !c.sharding.OwnsTenant(tenant) { - c.metrics.compactionRunSkippedTenants.Inc() - level.Debug(tenantLogger).Log("msg", "skipping tenant because it is not owned by this shard") - continue + tenants, err := c.tenants(ctx, table) + if err != nil { + return errors.Wrap(err, "getting tenants") } - ownedTenants[tenant] = struct{}{} - - start := time.Now() - if err := c.compactTenantWithRetries(ctx, tenantLogger, tableName, tenant); err != nil { - switch { - case errors.Is(err, context.Canceled): - // We don't want to count shutdowns as failed compactions because we will pick up with the rest of the compaction after the restart. - level.Info(tenantLogger).Log("msg", "compaction for tenant was interrupted by a shutdown") - return nil - default: - c.metrics.compactionRunTenantsCompleted.WithLabelValues(statusFailure).Inc() - c.metrics.compactionRunTenantsTime.WithLabelValues(statusFailure).Observe(time.Since(start).Seconds()) - level.Error(tenantLogger).Log("msg", "failed to compact tenant", "err", err) - errs.Add(err) + for tenants.Next() && tenants.Err() == nil && ctx.Err() == nil { + c.metrics.tenantsDiscovered.Inc() + tenant := tenants.At() + ownershipRange, owns, err := c.ownsTenant(tenant) + if err != nil { + return errors.Wrap(err, "checking tenant ownership") + } + if !owns { + c.metrics.tenantsSkipped.Inc() + continue } - continue + c.metrics.tenantsOwned.Inc() + + select { + case ch <- tenantTable{ + tenant: tenant, + table: table, + ownershipRange: ownershipRange, + }: + case <-ctx.Done(): + return ctx.Err() + } + } + + if err := tenants.Err(); err != nil { + return errors.Wrap(err, "iterating tenants") } - c.metrics.compactionRunTenantsCompleted.WithLabelValues(statusSuccess).Inc() - c.metrics.compactionRunTenantsTime.WithLabelValues(statusSuccess).Observe(time.Since(start).Seconds()) - level.Info(tenantLogger).Log("msg", "successfully compacted tenant") } - return errs.Err() + if err := tables.Err(); err != nil { + return errors.Wrap(err, "iterating tables") + } - // TODO: Delete local files for unowned tenants, if there are any. + close(ch) + return ctx.Err() } -func (c *Compactor) compactTenant(ctx context.Context, logger log.Logger, _ string, tenant string) error { - level.Info(logger).Log("msg", "starting compaction of tenant") - - // Ensure the context has not been canceled (ie. compactor shutdown has been triggered). - if err := ctx.Err(); err != nil { - return err - } +func (c *Compactor) runWorkers(ctx context.Context, ch <-chan tenantTable) error { + + return concurrency.ForEachJob(ctx, c.cfg.WorkerParallelism, c.cfg.WorkerParallelism, func(ctx context.Context, idx int) error { + + for { + select { + case <-ctx.Done(): + return ctx.Err() + + case tt, ok := <-ch: + if !ok { + return nil + } + + start := time.Now() + c.metrics.tenantsStarted.Inc() + if err := c.compactTenantTable(ctx, tt); err != nil { + c.metrics.tenantsCompleted.WithLabelValues(statusFailure).Inc() + c.metrics.tenantsCompletedTime.WithLabelValues(statusFailure).Observe(time.Since(start).Seconds()) + return errors.Wrapf( + err, + "compacting tenant table (%s) for tenant (%s) with ownership (%s)", + tt.table, + tt.tenant, + tt.ownershipRange, + ) + } + c.metrics.tenantsCompleted.WithLabelValues(statusSuccess).Inc() + c.metrics.tenantsCompletedTime.WithLabelValues(statusSuccess).Observe(time.Since(start).Seconds()) + } + } - // Tokenizer is not thread-safe so we need one per goroutine. - nGramLen := c.limits.BloomNGramLength(tenant) - nGramSkip := c.limits.BloomNGramSkip(tenant) - _ = v1.NewBloomTokenizer(nGramLen, nGramSkip, c.btMetrics) + }) - rs, err := c.sharding.GetTenantSubRing(tenant).GetAllHealthy(RingOp) - if err != nil { - return err - } - tokenRanges := bloomutils.GetInstanceWithTokenRange(c.cfg.Ring.InstanceID, rs.Instances) - for _, tr := range tokenRanges { - level.Debug(logger).Log("msg", "got token range for instance", "id", tr.Instance.Id, "min", tr.MinToken, "max", tr.MaxToken) - } +} - // TODO(owen-d): impl - return nil +func (c *Compactor) compactTenantTable(ctx context.Context, tt tenantTable) error { + level.Info(c.logger).Log("msg", "compacting", "org_id", tt.tenant, "table", tt.table, "ownership", tt.ownershipRange) + return c.controller.compactTenant(ctx, tt.table, tt.tenant, tt.ownershipRange) } -func runWithRetries( - ctx context.Context, - minBackoff, maxBackoff time.Duration, - maxRetries int, - f func(ctx context.Context) error, -) error { - var lastErr error +type dayRangeIterator struct { + min, max, cur config.DayTime + curPeriod config.PeriodConfig + schemaCfg config.SchemaConfig + err error +} - retries := backoff.New(ctx, backoff.Config{ - MinBackoff: minBackoff, - MaxBackoff: maxBackoff, - MaxRetries: maxRetries, - }) +func newDayRangeIterator(min, max config.DayTime, schemaCfg config.SchemaConfig) *dayRangeIterator { + return &dayRangeIterator{min: min, max: max, cur: min.Dec(), schemaCfg: schemaCfg} +} - for retries.Ongoing() { - lastErr = f(ctx) - if lastErr == nil { - return nil - } +func (r *dayRangeIterator) Next() bool { + r.cur = r.cur.Inc() + if !r.cur.Before(r.max) { + return false + } - retries.Wait() + period, err := r.schemaCfg.SchemaForTime(r.cur.ModelTime()) + if err != nil { + r.err = errors.Wrapf(err, "getting schema for time (%s)", r.cur) + return false } + r.curPeriod = period - return lastErr + return true } -func (c *Compactor) compactTenantWithRetries(ctx context.Context, logger log.Logger, tableName string, tenant string) error { - return runWithRetries( - ctx, - c.cfg.RetryMinBackoff, - c.cfg.RetryMaxBackoff, - c.cfg.CompactionRetries, - func(ctx context.Context) error { - return c.compactTenant(ctx, logger, tableName, tenant) - }, - ) +func (r *dayRangeIterator) At() config.DayTable { + return config.NewDayTable(r.cur, r.curPeriod.IndexTables.Prefix) +} + +func (r *dayRangeIterator) Err() error { + return nil } diff --git a/pkg/bloomcompactor/bloomcompactor_test.go b/pkg/bloomcompactor/bloomcompactor_test.go new file mode 100644 index 0000000000000..475ba8ec0585d --- /dev/null +++ b/pkg/bloomcompactor/bloomcompactor_test.go @@ -0,0 +1,197 @@ +package bloomcompactor + +import ( + "context" + "flag" + "fmt" + "math" + "testing" + "time" + + "github.com/grafana/dskit/services" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + util_log "github.com/grafana/loki/pkg/util/log" + lokiring "github.com/grafana/loki/pkg/util/ring" + util_ring "github.com/grafana/loki/pkg/util/ring" + "github.com/grafana/loki/pkg/validation" +) + +func TestCompactor_ownsTenant(t *testing.T) { + for _, tc := range []struct { + name string + limits Limits + compactors int + + expectedCompactorsOwningTenant int + }{ + { + name: "no sharding with one instance", + limits: mockLimits{ + shardSize: 0, + }, + compactors: 1, + expectedCompactorsOwningTenant: 1, + }, + { + name: "no sharding with multiple instances", + limits: mockLimits{ + shardSize: 0, + }, + compactors: 10, + expectedCompactorsOwningTenant: 10, + }, + { + name: "sharding with one instance", + limits: mockLimits{ + shardSize: 5, + }, + compactors: 1, + expectedCompactorsOwningTenant: 1, + }, + { + name: "sharding with multiple instances", + limits: mockLimits{ + shardSize: 5, + }, + compactors: 10, + expectedCompactorsOwningTenant: 5, + }, + } { + t.Run(tc.name, func(t *testing.T) { + var ringManagers []*lokiring.RingManager + var compactors []*Compactor + for i := 0; i < tc.compactors; i++ { + var ringCfg lokiring.RingConfig + ringCfg.RegisterFlagsWithPrefix("", "", flag.NewFlagSet("ring", flag.PanicOnError)) + ringCfg.KVStore.Store = "inmemory" + ringCfg.InstanceID = fmt.Sprintf("bloom-compactor-%d", i) + ringCfg.InstanceAddr = fmt.Sprintf("localhost-%d", i) + + ringManager, err := lokiring.NewRingManager("bloom-compactor", lokiring.ServerMode, ringCfg, 1, 1, util_log.Logger, prometheus.NewRegistry()) + require.NoError(t, err) + require.NoError(t, ringManager.StartAsync(context.Background())) + + shuffleSharding := util_ring.NewTenantShuffleSharding(ringManager.Ring, ringManager.RingLifecycler, tc.limits.BloomCompactorShardSize) + + compactor := &Compactor{ + cfg: Config{ + Ring: ringCfg, + }, + sharding: shuffleSharding, + limits: tc.limits, + } + + ringManagers = append(ringManagers, ringManager) + compactors = append(compactors, compactor) + } + defer func() { + // Stop all rings and wait for them to stop. + for _, ringManager := range ringManagers { + ringManager.StopAsync() + require.Eventually(t, func() bool { + return ringManager.State() == services.Terminated + }, 1*time.Minute, 100*time.Millisecond) + } + }() + + // Wait for all rings to see each other. + for _, ringManager := range ringManagers { + require.Eventually(t, func() bool { + running := ringManager.State() == services.Running + discovered := ringManager.Ring.InstancesCount() == tc.compactors + return running && discovered + }, 1*time.Minute, 100*time.Millisecond) + } + + var compactorOwnsTenant int + var compactorOwnershipRange []v1.FingerprintBounds + for _, compactor := range compactors { + ownershipRange, ownsTenant, err := compactor.ownsTenant("tenant") + require.NoError(t, err) + if ownsTenant { + compactorOwnsTenant++ + compactorOwnershipRange = append(compactorOwnershipRange, ownershipRange) + } + } + require.Equal(t, tc.expectedCompactorsOwningTenant, compactorOwnsTenant) + + coveredKeySpace := v1.NewBounds(math.MaxUint64, 0) + for i, boundsA := range compactorOwnershipRange { + for j, boundsB := range compactorOwnershipRange { + if i == j { + continue + } + // Assert that the fingerprint key-space is not overlapping + require.False(t, boundsA.Overlaps(boundsB)) + } + + if boundsA.Min < coveredKeySpace.Min { + coveredKeySpace.Min = boundsA.Min + } + if boundsA.Max > coveredKeySpace.Max { + coveredKeySpace.Max = boundsA.Max + } + + // Assert that the fingerprint key-space is evenly distributed across the compactors + // We do some adjustments if the key-space is not evenly distributable, so we use a delta of 10 + // to account for that and check that the key-space is reasonably evenly distributed. + fpPerTenant := math.MaxUint64 / uint64(tc.expectedCompactorsOwningTenant) + boundsLen := uint64(boundsA.Max - boundsA.Min) + require.InDelta(t, fpPerTenant, boundsLen, 10) + } + // Assert that the fingerprint key-space is complete + require.True(t, coveredKeySpace.Equal(v1.NewBounds(0, math.MaxUint64))) + }) + } +} + +type mockLimits struct { + shardSize int +} + +func (m mockLimits) AllByUserID() map[string]*validation.Limits { + panic("implement me") +} + +func (m mockLimits) DefaultLimits() *validation.Limits { + panic("implement me") +} + +func (m mockLimits) VolumeMaxSeries(_ string) int { + panic("implement me") +} + +func (m mockLimits) BloomCompactorShardSize(_ string) int { + return m.shardSize +} + +func (m mockLimits) BloomCompactorChunksBatchSize(_ string) int { + panic("implement me") +} + +func (m mockLimits) BloomCompactorMaxTableAge(_ string) time.Duration { + panic("implement me") +} + +func (m mockLimits) BloomCompactorEnabled(_ string) bool { + panic("implement me") +} + +func (m mockLimits) BloomNGramLength(_ string) int { + panic("implement me") +} + +func (m mockLimits) BloomNGramSkip(_ string) int { + panic("implement me") +} + +func (m mockLimits) BloomFalsePositiveRate(_ string) float64 { + panic("implement me") +} + +func (m mockLimits) BloomCompactorMaxBlockSize(_ string) int { + panic("implement me") +} diff --git a/pkg/bloomcompactor/config.go b/pkg/bloomcompactor/config.go index 884034fdd043d..15f9aa86c040f 100644 --- a/pkg/bloomcompactor/config.go +++ b/pkg/bloomcompactor/config.go @@ -2,6 +2,7 @@ package bloomcompactor import ( "flag" + "fmt" "time" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/downloads" @@ -15,13 +16,14 @@ type Config struct { // section and the ingester configuration by default). Ring ring.RingConfig `yaml:"ring,omitempty" doc:"description=Defines the ring to be used by the bloom-compactor servers. In case this isn't configured, this block supports inheriting configuration from the common ring section."` // Enabled configures whether bloom-compactors should be used to compact index values into bloomfilters - Enabled bool `yaml:"enabled"` - WorkingDirectory string `yaml:"working_directory"` - CompactionInterval time.Duration `yaml:"compaction_interval"` - - RetryMinBackoff time.Duration `yaml:"compaction_retries_min_backoff"` - RetryMaxBackoff time.Duration `yaml:"compaction_retries_max_backoff"` - CompactionRetries int `yaml:"compaction_retries"` + Enabled bool `yaml:"enabled"` + CompactionInterval time.Duration `yaml:"compaction_interval"` + MinTableCompactionPeriod int `yaml:"min_table_compaction_period"` + MaxTableCompactionPeriod int `yaml:"max_table_compaction_period"` + WorkerParallelism int `yaml:"worker_parallelism"` + RetryMinBackoff time.Duration `yaml:"compaction_retries_min_backoff"` + RetryMaxBackoff time.Duration `yaml:"compaction_retries_max_backoff"` + CompactionRetries int `yaml:"compaction_retries"` MaxCompactionParallelism int `yaml:"max_compaction_parallelism"` } @@ -30,14 +32,29 @@ type Config struct { func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.Ring.RegisterFlagsWithPrefix("bloom-compactor.", "collectors/", f) f.BoolVar(&cfg.Enabled, "bloom-compactor.enabled", false, "Flag to enable or disable the usage of the bloom-compactor component.") - f.StringVar(&cfg.WorkingDirectory, "bloom-compactor.working-directory", "", "Directory where files can be downloaded for compaction.") f.DurationVar(&cfg.CompactionInterval, "bloom-compactor.compaction-interval", 10*time.Minute, "Interval at which to re-run the compaction operation.") + f.IntVar(&cfg.WorkerParallelism, "bloom-compactor.worker-parallelism", 1, "Number of workers to run in parallel for compaction.") + f.IntVar(&cfg.MinTableCompactionPeriod, "bloom-compactor.min-table-compaction-period", 1, "How many index periods (days) to wait before compacting a table. This can be used to lower cost by not re-writing data to object storage too frequently since recent data changes more often.") + // TODO(owen-d): ideally we'd set this per tenant based on their `reject_old_samples_max_age` setting, + // but due to how we need to discover tenants, we can't do that yet. Tenant+Period discovery is done by + // iterating the table periods in object storage and looking for tenants within that period. + // In order to have this done dynamically, we'd need to account for tenant specific overrides, which are also + // dynamically reloaded. + // I'm doing it the simple way for now. + f.IntVar(&cfg.MaxTableCompactionPeriod, "bloom-compactor.max-table-compaction-period", 7, "How many index periods (days) to wait before compacting a table. This can be used to lower cost by not trying to compact older data which doesn't change. This can be optimized by aligning it with the maximum `reject_old_samples_max_age` setting of any tenant.") f.DurationVar(&cfg.RetryMinBackoff, "bloom-compactor.compaction-retries-min-backoff", 10*time.Second, "Minimum backoff time between retries.") f.DurationVar(&cfg.RetryMaxBackoff, "bloom-compactor.compaction-retries-max-backoff", time.Minute, "Maximum backoff time between retries.") f.IntVar(&cfg.CompactionRetries, "bloom-compactor.compaction-retries", 3, "Number of retries to perform when compaction fails.") f.IntVar(&cfg.MaxCompactionParallelism, "bloom-compactor.max-compaction-parallelism", 1, "Maximum number of tables to compact in parallel. While increasing this value, please make sure compactor has enough disk space allocated to be able to store and compact as many tables.") } +func (cfg *Config) Validate() error { + if cfg.MinTableCompactionPeriod > cfg.MaxTableCompactionPeriod { + return fmt.Errorf("min_compaction_age must be less than or equal to max_compaction_age") + } + return nil +} + type Limits interface { downloads.Limits BloomCompactorShardSize(tenantID string) int @@ -47,4 +64,5 @@ type Limits interface { BloomNGramLength(tenantID string) int BloomNGramSkip(tenantID string) int BloomFalsePositiveRate(tenantID string) float64 + BloomCompactorMaxBlockSize(tenantID string) int } diff --git a/pkg/bloomcompactor/controller.go b/pkg/bloomcompactor/controller.go index 2002d8ce2a8bc..ef41ec2d8efbb 100644 --- a/pkg/bloomcompactor/controller.go +++ b/pkg/bloomcompactor/controller.go @@ -1,6 +1,7 @@ package bloomcompactor import ( + "bytes" "context" "fmt" "sort" @@ -10,107 +11,242 @@ import ( "github.com/pkg/errors" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" ) -type uploader interface { - PutBlock(ctx context.Context, block bloomshipper.Block) error - PutMeta(ctx context.Context, meta bloomshipper.Meta) error -} - type SimpleBloomController struct { - // TODO(owen-d): consider making tenant+table dynamic (not 1 struct per combination) - tenant string - table string - ownershipRange v1.FingerprintBounds // ownership range of this controller - tsdbStore TSDBStore - bloomStore bloomshipper.Store - uploader uploader - chunkLoader ChunkLoader - rwFn func() (v1.BlockWriter, v1.BlockReader) - metrics *Metrics - - // TODO(owen-d): add metrics + tsdbStore TSDBStore + bloomStore bloomshipper.Store + chunkLoader ChunkLoader + metrics *Metrics + limits Limits + logger log.Logger } func NewSimpleBloomController( - tenant, table string, - ownershipRange v1.FingerprintBounds, tsdbStore TSDBStore, blockStore bloomshipper.Store, - uploader uploader, chunkLoader ChunkLoader, - rwFn func() (v1.BlockWriter, v1.BlockReader), + limits Limits, metrics *Metrics, logger log.Logger, ) *SimpleBloomController { return &SimpleBloomController{ - tenant: tenant, - table: table, - ownershipRange: ownershipRange, - tsdbStore: tsdbStore, - bloomStore: blockStore, - uploader: uploader, - chunkLoader: chunkLoader, - rwFn: rwFn, - metrics: metrics, - logger: log.With(logger, "ownership", ownershipRange), + tsdbStore: tsdbStore, + bloomStore: blockStore, + chunkLoader: chunkLoader, + metrics: metrics, + limits: limits, + logger: logger, } } -func (s *SimpleBloomController) do(ctx context.Context) error { - // 1. Resolve TSDBs - tsdbs, err := s.tsdbStore.ResolveTSDBs() +// TODO(owen-d): pool, evaluate if memory-only is the best choice +func (s *SimpleBloomController) rwFn() (v1.BlockWriter, v1.BlockReader) { + indexBuf := bytes.NewBuffer(nil) + bloomsBuf := bytes.NewBuffer(nil) + return v1.NewMemoryBlockWriter(indexBuf, bloomsBuf), v1.NewByteReader(indexBuf, bloomsBuf) +} + +/* +Compaction works as follows, split across many functions for clarity: + 1. Fetch all meta.jsons for the given tenant and table which overlap the ownership range of this compactor. + 2. Load current TSDBs for this tenant/table. + 3. For each live TSDB (there should be only 1, but this works with multiple), find any gaps + (fingerprint ranges) which are not up date, determined by checking other meta.jsons and comparing + the tsdbs they were generated from + their ownership ranges. + 4. Build new bloom blocks for each gap, using the series and chunks from the TSDBs and any existing + blocks which overlap the gaps to accelerate bloom generation. + 5. Write the new blocks and metas to the store. + 6. Determine if any meta.jsons overlap the ownership range but are outdated, and remove them and + their associated blocks if so. +*/ +func (s *SimpleBloomController) compactTenant( + ctx context.Context, + table config.DayTable, + tenant string, + ownershipRange v1.FingerprintBounds, +) error { + logger := log.With(s.logger, "ownership", ownershipRange, "org_id", tenant, "table", table.Addr()) + + client, err := s.bloomStore.Client(table.ModelTime()) if err != nil { - level.Error(s.logger).Log("msg", "failed to resolve tsdbs", "err", err) - return errors.Wrap(err, "failed to resolve tsdbs") + level.Error(logger).Log("msg", "failed to get client", "err", err) + return errors.Wrap(err, "failed to get client") } - if len(tsdbs) == 0 { - return nil + // Fetch source metas to be used in both compaction and cleanup of out-of-date metas+blooms + metas, err := s.bloomStore.FetchMetas( + ctx, + bloomshipper.MetaSearchParams{ + TenantID: tenant, + Interval: bloomshipper.NewInterval(table.Bounds()), + Keyspace: ownershipRange, + }, + ) + if err != nil { + level.Error(logger).Log("msg", "failed to get metas", "err", err) + return errors.Wrap(err, "failed to get metas") } - ids := make([]tsdb.Identifier, 0, len(tsdbs)) - for _, id := range tsdbs { - ids = append(ids, id) + // build compaction plans + work, err := s.findOutdatedGaps(ctx, tenant, table, ownershipRange, metas, logger) + if err != nil { + return errors.Wrap(err, "failed to find outdated gaps") } - // 2. Fetch metas - metas, err := s.bloomStore.FetchMetas( + // build new blocks + built, err := s.buildGaps(ctx, tenant, table, client, work, logger) + if err != nil { + return errors.Wrap(err, "failed to build gaps") + } + + // in order to delete outdates metas which only partially fall within the ownership range, + // we need to fetcha all metas in the entire bound range of the first set of metas we've resolved + /* + For instance, we have the following ownership range and we resolve `meta1` in our first Fetch call + because it overlaps the ownership range, we'll need to fetch newer metas that may overlap it in order + to check if it safely can be deleted. This falls partially outside our specific ownership range, but + we can safely run multiple deletes by treating their removal as idempotent. + |-------------ownership range-----------------| + |-------meta1-------| + + we fetch this before possibly deleting meta1 |------| + */ + superset := ownershipRange + for _, meta := range metas { + union := superset.Union(meta.Bounds) + if len(union) > 1 { + level.Error(logger).Log("msg", "meta bounds union is not a single range", "union", union) + return errors.New("meta bounds union is not a single range") + } + superset = union[0] + } + + metas, err = s.bloomStore.FetchMetas( ctx, bloomshipper.MetaSearchParams{ - TenantID: s.tenant, - Interval: bloomshipper.Interval{}, // TODO(owen-d): gen interval - Keyspace: s.ownershipRange, + TenantID: tenant, + Interval: bloomshipper.NewInterval(table.Bounds()), + Keyspace: superset, }, ) if err != nil { - level.Error(s.logger).Log("msg", "failed to get metas", "err", err) - return errors.Wrap(err, "failed to get metas") + level.Error(logger).Log("msg", "failed to get meta superset range", "err", err, "superset", superset) + return errors.Wrap(err, "failed to get meta supseret range") + } + + // combine built and pre-existing metas + // in preparation for removing outdated metas + metas = append(metas, built...) + + outdated := outdatedMetas(metas) + for _, meta := range outdated { + for _, block := range meta.Blocks { + if err := client.DeleteBlocks(ctx, []bloomshipper.BlockRef{block}); err != nil { + if client.IsObjectNotFoundErr(err) { + level.Debug(logger).Log("msg", "block not found while attempting delete, continuing", "block", block) + continue + } + + level.Error(logger).Log("msg", "failed to delete blocks", "err", err) + return errors.Wrap(err, "failed to delete blocks") + } + } + + if err := client.DeleteMetas(ctx, []bloomshipper.MetaRef{meta.MetaRef}); err != nil { + if client.IsObjectNotFoundErr(err) { + level.Debug(logger).Log("msg", "meta not found while attempting delete, continuing", "meta", meta.MetaRef) + } else { + level.Error(logger).Log("msg", "failed to delete metas", "err", err) + return errors.Wrap(err, "failed to delete metas") + } + } } - // 3. Determine which TSDBs have gaps in the ownership range and need to + level.Debug(logger).Log("msg", "finished compaction") + return nil + +} + +func (s *SimpleBloomController) findOutdatedGaps( + ctx context.Context, + tenant string, + table config.DayTable, + ownershipRange v1.FingerprintBounds, + metas []bloomshipper.Meta, + logger log.Logger, +) ([]blockPlan, error) { + // Resolve TSDBs + tsdbs, err := s.tsdbStore.ResolveTSDBs(ctx, table, tenant) + if err != nil { + level.Error(logger).Log("msg", "failed to resolve tsdbs", "err", err) + return nil, errors.Wrap(err, "failed to resolve tsdbs") + } + + if len(tsdbs) == 0 { + return nil, nil + } + + // Determine which TSDBs have gaps in the ownership range and need to // be processed. - tsdbsWithGaps, err := gapsBetweenTSDBsAndMetas(s.ownershipRange, ids, metas) + tsdbsWithGaps, err := gapsBetweenTSDBsAndMetas(ownershipRange, tsdbs, metas) if err != nil { - level.Error(s.logger).Log("msg", "failed to find gaps", "err", err) - return errors.Wrap(err, "failed to find gaps") + level.Error(logger).Log("msg", "failed to find gaps", "err", err) + return nil, errors.Wrap(err, "failed to find gaps") } if len(tsdbsWithGaps) == 0 { - level.Debug(s.logger).Log("msg", "blooms exist for all tsdbs") - return nil + level.Debug(logger).Log("msg", "blooms exist for all tsdbs") + return nil, nil } work, err := blockPlansForGaps(tsdbsWithGaps, metas) if err != nil { - level.Error(s.logger).Log("msg", "failed to create plan", "err", err) - return errors.Wrap(err, "failed to create plan") + level.Error(logger).Log("msg", "failed to create plan", "err", err) + return nil, errors.Wrap(err, "failed to create plan") + } + + return work, nil +} + +func (s *SimpleBloomController) loadWorkForGap( + ctx context.Context, + table config.DayTable, + tenant string, + id tsdb.Identifier, + gap gapWithBlocks, +) (v1.CloseableIterator[*v1.Series], v1.CloseableResettableIterator[*v1.SeriesWithBloom], error) { + // load a series iterator for the gap + seriesItr, err := s.tsdbStore.LoadTSDB(ctx, table, tenant, id, gap.bounds) + if err != nil { + return nil, nil, errors.Wrap(err, "failed to load tsdb") } - // 4. Generate Blooms + // load a blocks iterator for the gap + fetcher, err := s.bloomStore.Fetcher(table.ModelTime()) + if err != nil { + return nil, nil, errors.Wrap(err, "failed to get fetcher") + } + + f := FetchFunc[bloomshipper.BlockRef, *bloomshipper.CloseableBlockQuerier](fetcher.FetchBlocks) + blocksIter := newBlockLoadingIter(ctx, gap.blocks, f, 10) + + return seriesItr, blocksIter, nil +} + +func (s *SimpleBloomController) buildGaps( + ctx context.Context, + tenant string, + table config.DayTable, + client bloomshipper.Client, + work []blockPlan, + logger log.Logger, +) ([]bloomshipper.Meta, error) { + // Generate Blooms // Now that we have the gaps, we will generate a bloom block for each gap. // We can accelerate this by using existing blocks which may already contain // needed chunks in their blooms, for instance after a new TSDB version is generated @@ -121,82 +257,228 @@ func (s *SimpleBloomController) do(ctx context.Context) error { // accelerate bloom generation for the new blocks. var ( - blockCt int - tsdbCt = len(work) + blockCt int + tsdbCt = len(work) + nGramSize = uint64(s.limits.BloomNGramLength(tenant)) + nGramSkip = uint64(s.limits.BloomNGramSkip(tenant)) + maxBlockSize = uint64(s.limits.BloomCompactorMaxBlockSize(tenant)) + blockOpts = v1.NewBlockOptions(nGramSize, nGramSkip, maxBlockSize) + created []bloomshipper.Meta + totalSeries uint64 ) for _, plan := range work { - for _, gap := range plan.gaps { + for i := range plan.gaps { + gap := plan.gaps[i] + + meta := bloomshipper.Meta{ + MetaRef: bloomshipper.MetaRef{ + Ref: bloomshipper.Ref{ + TenantID: tenant, + TableName: table.Addr(), + Bounds: gap.bounds, + }, + }, + Sources: []tsdb.SingleTenantTSDBIdentifier{plan.tsdb}, + } + // Fetch blocks that aren't up to date but are in the desired fingerprint range // to try and accelerate bloom creation - seriesItr, preExistingBlocks, err := s.loadWorkForGap(ctx, plan.tsdb, gap) + seriesItr, blocksIter, err := s.loadWorkForGap(ctx, table, tenant, plan.tsdb, gap) if err != nil { - level.Error(s.logger).Log("msg", "failed to get series and blocks", "err", err) - return errors.Wrap(err, "failed to get series and blocks") + level.Error(logger).Log("msg", "failed to get series and blocks", "err", err) + return nil, errors.Wrap(err, "failed to get series and blocks") } + // Blocks are built consuming the series iterator. For observability, we wrap the series iterator + // with a counter iterator to count the number of times Next() is called on it. + // This is used to observe the number of series that are being processed. + seriesItrWithCounter := v1.NewCounterIter[*v1.Series](seriesItr) + gen := NewSimpleBloomGenerator( - v1.DefaultBlockOptions, - seriesItr, + tenant, + blockOpts, + seriesItrWithCounter, s.chunkLoader, - preExistingBlocks, + blocksIter, s.rwFn, s.metrics, - log.With(s.logger, "tsdb", plan.tsdb.Name(), "ownership", gap, "blocks", len(preExistingBlocks)), + log.With(logger, "tsdb", plan.tsdb.Name(), "ownership", gap), ) - _, newBlocks, err := gen.Generate(ctx) + newBlocks := gen.Generate(ctx) if err != nil { - // TODO(owen-d): metrics - level.Error(s.logger).Log("msg", "failed to generate bloom", "err", err) - return errors.Wrap(err, "failed to generate bloom") + level.Error(logger).Log("msg", "failed to generate bloom", "err", err) + blocksIter.Close() + return nil, errors.Wrap(err, "failed to generate bloom") } - // TODO(owen-d): dispatch this to a queue for writing, handling retries/backpressure, etc? - for newBlocks.Next() { + for newBlocks.Next() && newBlocks.Err() == nil { blockCt++ blk := newBlocks.At() - if err := s.uploader.PutBlock( + built, err := bloomshipper.BlockFrom(tenant, table.Addr(), blk) + if err != nil { + level.Error(logger).Log("msg", "failed to build block", "err", err) + blocksIter.Close() + return nil, errors.Wrap(err, "failed to build block") + } + + if err := client.PutBlock( ctx, - bloomshipper.BlockFrom(s.tenant, s.table, blk), + built, ); err != nil { - level.Error(s.logger).Log("msg", "failed to write block", "err", err) - return errors.Wrap(err, "failed to write block") + level.Error(logger).Log("msg", "failed to write block", "err", err) + blocksIter.Close() + return nil, errors.Wrap(err, "failed to write block") } + + meta.Blocks = append(meta.Blocks, built.BlockRef) } if err := newBlocks.Err(); err != nil { - // TODO(owen-d): metrics - level.Error(s.logger).Log("msg", "failed to generate bloom", "err", err) - return errors.Wrap(err, "failed to generate bloom") + level.Error(logger).Log("msg", "failed to generate bloom", "err", err) + return nil, errors.Wrap(err, "failed to generate bloom") + } + + // Close pre-existing blocks + blocksIter.Close() + + // Write the new meta + ref, err := bloomshipper.MetaRefFrom(tenant, table.Addr(), gap.bounds, meta.Sources, meta.Blocks) + if err != nil { + level.Error(logger).Log("msg", "failed to checksum meta", "err", err) + return nil, errors.Wrap(err, "failed to checksum meta") } + meta.MetaRef = ref + if err := client.PutMeta(ctx, meta); err != nil { + level.Error(logger).Log("msg", "failed to write meta", "err", err) + return nil, errors.Wrap(err, "failed to write meta") + } + created = append(created, meta) + + totalSeries += uint64(seriesItrWithCounter.Count()) } } - // TODO(owen-d): build meta from blocks - // TODO(owen-d): reap tombstones, old metas + s.metrics.tenantsSeries.Observe(float64(totalSeries)) + level.Debug(logger).Log("msg", "finished bloom generation", "blocks", blockCt, "tsdbs", tsdbCt) + return created, nil +} + +// outdatedMetas returns metas that are outdated and need to be removed, +// determined by if their entire ownership range is covered by other metas with newer +// TSDBs +func outdatedMetas(metas []bloomshipper.Meta) (outdated []bloomshipper.Meta) { + // first, ensure data is sorted so we can take advantage of that + sort.Slice(metas, func(i, j int) bool { + return metas[i].Bounds.Less(metas[j].Bounds) + }) + + // NB(owen-d): time complexity shouldn't be a problem + // given the number of metas should be low (famous last words, i know). + for i := range metas { + a := metas[i] + + var overlaps []v1.FingerprintBounds + + for j := range metas { + if j == i { + continue + } - level.Debug(s.logger).Log("msg", "finished bloom generation", "blocks", blockCt, "tsdbs", tsdbCt) - return nil + b := metas[j] + intersection := a.Bounds.Intersection(b.Bounds) + if intersection == nil { + if a.Bounds.Cmp(b.Bounds.Min) == v1.After { + // All subsequent metas will be newer, so we can break + break + } + // otherwise, just check the next meta + continue + } + + // we can only remove older data, not data which may be newer + if !tsdbsStrictlyNewer(b.Sources, a.Sources) { + continue + } + + // because we've sorted the metas, we only have to test overlaps against the last + // overlap we found (if any) + if len(overlaps) == 0 { + overlaps = append(overlaps, *intersection) + continue + } + + // best effort at merging overlaps first pass + last := overlaps[len(overlaps)-1] + overlaps = append(overlaps[:len(overlaps)-1], last.Union(*intersection)...) + } + + if coversFullRange(a.Bounds, overlaps) { + outdated = append(outdated, a) + } + } + return } -func (s *SimpleBloomController) loadWorkForGap(ctx context.Context, id tsdb.Identifier, gap gapWithBlocks) (v1.CloseableIterator[*v1.Series], []*bloomshipper.CloseableBlockQuerier, error) { - // load a series iterator for the gap - seriesItr, err := s.tsdbStore.LoadTSDB(id, gap.bounds) - if err != nil { - return nil, nil, errors.Wrap(err, "failed to load tsdb") +func coversFullRange(bounds v1.FingerprintBounds, overlaps []v1.FingerprintBounds) bool { + // if there are no overlaps, the range is not covered + if len(overlaps) == 0 { + return false } - blocks, err := s.bloomStore.FetchBlocks(ctx, gap.blocks) - if err != nil { - return nil, nil, errors.Wrap(err, "failed to get blocks") + // keep track of bounds which need to be filled in order + // for the overlaps to cover the full range + missing := []v1.FingerprintBounds{bounds} + ignores := make(map[int]bool) + for _, overlap := range overlaps { + var i int + for { + if i >= len(missing) { + break + } + + if ignores[i] { + i++ + continue + } + + remaining := missing[i].Unless(overlap) + switch len(remaining) { + case 0: + // this range is covered, ignore it + ignores[i] = true + case 1: + // this range is partially covered, updated it + missing[i] = remaining[0] + case 2: + // this range has been partially covered in the middle, + // split it into two ranges and append + ignores[i] = true + missing = append(missing, remaining...) + } + i++ + } + } - return seriesItr, blocks, nil + return len(ignores) == len(missing) +} + +// tsdbStrictlyNewer returns if all of the tsdbs in a are newer than all of the tsdbs in b +func tsdbsStrictlyNewer(as, bs []tsdb.SingleTenantTSDBIdentifier) bool { + for _, a := range as { + for _, b := range bs { + if a.TS.Before(b.TS) { + return false + } + } + } + return true } type gapWithBlocks struct { @@ -214,7 +496,7 @@ type gapWithBlocks struct { // of the same chunks we need to ensure are indexed, just from previous tsdb iterations. // This is a performance optimization to avoid expensive re-reindexing type blockPlan struct { - tsdb tsdb.Identifier + tsdb tsdb.SingleTenantTSDBIdentifier gaps []gapWithBlocks } @@ -292,7 +574,7 @@ func blockPlansForGaps(tsdbs []tsdbGaps, metas []bloomshipper.Meta) ([]blockPlan // Used to signal the gaps that need to be populated for a tsdb type tsdbGaps struct { - tsdb tsdb.Identifier + tsdb tsdb.SingleTenantTSDBIdentifier gaps []v1.FingerprintBounds } @@ -300,7 +582,7 @@ type tsdbGaps struct { // that for each TSDB, there are metas covering the entire ownership range which were generated from that specific TSDB. func gapsBetweenTSDBsAndMetas( ownershipRange v1.FingerprintBounds, - tsdbs []tsdb.Identifier, + tsdbs []tsdb.SingleTenantTSDBIdentifier, metas []bloomshipper.Meta, ) (res []tsdbGaps, err error) { for _, db := range tsdbs { diff --git a/pkg/bloomcompactor/controller_test.go b/pkg/bloomcompactor/controller_test.go index 0660a5b601eea..72653c292b18b 100644 --- a/pkg/bloomcompactor/controller_test.go +++ b/pkg/bloomcompactor/controller_test.go @@ -142,14 +142,14 @@ func Test_gapsBetweenTSDBsAndMetas(t *testing.T) { err bool exp []tsdbGaps ownershipRange v1.FingerprintBounds - tsdbs []tsdb.Identifier + tsdbs []tsdb.SingleTenantTSDBIdentifier metas []bloomshipper.Meta }{ { desc: "non-overlapping tsdbs and metas", err: true, ownershipRange: v1.NewBounds(0, 10), - tsdbs: []tsdb.Identifier{tsdbID(0)}, + tsdbs: []tsdb.SingleTenantTSDBIdentifier{tsdbID(0)}, metas: []bloomshipper.Meta{ genMeta(11, 20, []int{0}, nil), }, @@ -157,7 +157,7 @@ func Test_gapsBetweenTSDBsAndMetas(t *testing.T) { { desc: "single tsdb", ownershipRange: v1.NewBounds(0, 10), - tsdbs: []tsdb.Identifier{tsdbID(0)}, + tsdbs: []tsdb.SingleTenantTSDBIdentifier{tsdbID(0)}, metas: []bloomshipper.Meta{ genMeta(4, 8, []int{0}, nil), }, @@ -174,7 +174,7 @@ func Test_gapsBetweenTSDBsAndMetas(t *testing.T) { { desc: "multiple tsdbs with separate blocks", ownershipRange: v1.NewBounds(0, 10), - tsdbs: []tsdb.Identifier{tsdbID(0), tsdbID(1)}, + tsdbs: []tsdb.SingleTenantTSDBIdentifier{tsdbID(0), tsdbID(1)}, metas: []bloomshipper.Meta{ genMeta(0, 5, []int{0}, nil), genMeta(6, 10, []int{1}, nil), @@ -197,7 +197,7 @@ func Test_gapsBetweenTSDBsAndMetas(t *testing.T) { { desc: "multiple tsdbs with the same blocks", ownershipRange: v1.NewBounds(0, 10), - tsdbs: []tsdb.Identifier{tsdbID(0), tsdbID(1)}, + tsdbs: []tsdb.SingleTenantTSDBIdentifier{tsdbID(0), tsdbID(1)}, metas: []bloomshipper.Meta{ genMeta(0, 5, []int{0, 1}, nil), genMeta(6, 8, []int{1}, nil), @@ -242,7 +242,7 @@ func Test_blockPlansForGaps(t *testing.T) { for _, tc := range []struct { desc string ownershipRange v1.FingerprintBounds - tsdbs []tsdb.Identifier + tsdbs []tsdb.SingleTenantTSDBIdentifier metas []bloomshipper.Meta err bool exp []blockPlan @@ -250,7 +250,7 @@ func Test_blockPlansForGaps(t *testing.T) { { desc: "single overlapping meta+no overlapping block", ownershipRange: v1.NewBounds(0, 10), - tsdbs: []tsdb.Identifier{tsdbID(0)}, + tsdbs: []tsdb.SingleTenantTSDBIdentifier{tsdbID(0)}, metas: []bloomshipper.Meta{ genMeta(5, 20, []int{1}, []bloomshipper.BlockRef{genBlockRef(11, 20)}), }, @@ -268,7 +268,7 @@ func Test_blockPlansForGaps(t *testing.T) { { desc: "single overlapping meta+one overlapping block", ownershipRange: v1.NewBounds(0, 10), - tsdbs: []tsdb.Identifier{tsdbID(0)}, + tsdbs: []tsdb.SingleTenantTSDBIdentifier{tsdbID(0)}, metas: []bloomshipper.Meta{ genMeta(5, 20, []int{1}, []bloomshipper.BlockRef{genBlockRef(9, 20)}), }, @@ -290,7 +290,7 @@ func Test_blockPlansForGaps(t *testing.T) { // but we can trim the range needing generation desc: "trims up to date area", ownershipRange: v1.NewBounds(0, 10), - tsdbs: []tsdb.Identifier{tsdbID(0)}, + tsdbs: []tsdb.SingleTenantTSDBIdentifier{tsdbID(0)}, metas: []bloomshipper.Meta{ genMeta(9, 20, []int{0}, []bloomshipper.BlockRef{genBlockRef(9, 20)}), // block for same tsdb genMeta(9, 20, []int{1}, []bloomshipper.BlockRef{genBlockRef(9, 20)}), // block for different tsdb @@ -309,7 +309,7 @@ func Test_blockPlansForGaps(t *testing.T) { { desc: "uses old block for overlapping range", ownershipRange: v1.NewBounds(0, 10), - tsdbs: []tsdb.Identifier{tsdbID(0)}, + tsdbs: []tsdb.SingleTenantTSDBIdentifier{tsdbID(0)}, metas: []bloomshipper.Meta{ genMeta(9, 20, []int{0}, []bloomshipper.BlockRef{genBlockRef(9, 20)}), // block for same tsdb genMeta(5, 20, []int{1}, []bloomshipper.BlockRef{genBlockRef(5, 20)}), // block for different tsdb @@ -329,7 +329,7 @@ func Test_blockPlansForGaps(t *testing.T) { { desc: "multi case", ownershipRange: v1.NewBounds(0, 10), - tsdbs: []tsdb.Identifier{tsdbID(0), tsdbID(1)}, // generate for both tsdbs + tsdbs: []tsdb.SingleTenantTSDBIdentifier{tsdbID(0), tsdbID(1)}, // generate for both tsdbs metas: []bloomshipper.Meta{ genMeta(0, 2, []int{0}, []bloomshipper.BlockRef{ genBlockRef(0, 1), @@ -377,7 +377,7 @@ func Test_blockPlansForGaps(t *testing.T) { { desc: "dedupes block refs", ownershipRange: v1.NewBounds(0, 10), - tsdbs: []tsdb.Identifier{tsdbID(0)}, + tsdbs: []tsdb.SingleTenantTSDBIdentifier{tsdbID(0)}, metas: []bloomshipper.Meta{ genMeta(9, 20, []int{1}, []bloomshipper.BlockRef{ genBlockRef(1, 4), @@ -421,3 +421,144 @@ func Test_blockPlansForGaps(t *testing.T) { }) } } + +func Test_coversFullRange(t *testing.T) { + for _, tc := range []struct { + desc string + src v1.FingerprintBounds + overlaps []v1.FingerprintBounds + exp bool + }{ + { + desc: "empty", + src: v1.NewBounds(0, 10), + overlaps: []v1.FingerprintBounds{}, + exp: false, + }, + { + desc: "single_full_range", + src: v1.NewBounds(0, 10), + overlaps: []v1.FingerprintBounds{ + v1.NewBounds(0, 10), + }, + exp: true, + }, + { + desc: "single_partial_range", + src: v1.NewBounds(0, 10), + overlaps: []v1.FingerprintBounds{ + v1.NewBounds(0, 5), + }, + exp: false, + }, + { + desc: "multiple_full_ranges", + src: v1.NewBounds(0, 10), + overlaps: []v1.FingerprintBounds{ + v1.NewBounds(0, 5), + v1.NewBounds(6, 10), + }, + exp: true, + }, + { + desc: "multiple_partial_ranges", + src: v1.NewBounds(0, 10), + overlaps: []v1.FingerprintBounds{ + v1.NewBounds(0, 5), + v1.NewBounds(7, 8), + }, + exp: false, + }, + { + desc: "wraps_partial_range", + src: v1.NewBounds(10, 20), + overlaps: []v1.FingerprintBounds{ + v1.NewBounds(0, 12), + v1.NewBounds(13, 15), + v1.NewBounds(19, 21), + }, + exp: false, + }, + { + desc: "wraps_full_range", + src: v1.NewBounds(10, 20), + overlaps: []v1.FingerprintBounds{ + v1.NewBounds(0, 12), + v1.NewBounds(13, 15), + v1.NewBounds(16, 25), + }, + exp: true, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + require.Equal(t, tc.exp, coversFullRange(tc.src, tc.overlaps)) + }) + } +} + +func Test_OutdatedMetas(t *testing.T) { + gen := func(bounds v1.FingerprintBounds, tsdbTimes ...model.Time) (meta bloomshipper.Meta) { + for _, tsdbTime := range tsdbTimes { + meta.Sources = append(meta.Sources, tsdb.SingleTenantTSDBIdentifier{TS: tsdbTime.Time()}) + } + meta.Bounds = bounds + return meta + } + + for _, tc := range []struct { + desc string + metas []bloomshipper.Meta + exp []bloomshipper.Meta + }{ + { + desc: "no metas", + metas: nil, + exp: nil, + }, + { + desc: "single meta", + metas: []bloomshipper.Meta{ + gen(v1.NewBounds(0, 10), 0), + }, + exp: nil, + }, + { + desc: "single outdated meta", + metas: []bloomshipper.Meta{ + gen(v1.NewBounds(0, 10), 0), + gen(v1.NewBounds(0, 10), 1), + }, + exp: []bloomshipper.Meta{ + gen(v1.NewBounds(0, 10), 0), + }, + }, + { + desc: "single outdated via partitions", + metas: []bloomshipper.Meta{ + gen(v1.NewBounds(0, 5), 0), + gen(v1.NewBounds(6, 10), 0), + gen(v1.NewBounds(0, 10), 1), + }, + exp: []bloomshipper.Meta{ + gen(v1.NewBounds(0, 5), 0), + gen(v1.NewBounds(6, 10), 0), + }, + }, + { + desc: "multi tsdbs", + metas: []bloomshipper.Meta{ + gen(v1.NewBounds(0, 5), 0, 1), + gen(v1.NewBounds(6, 10), 0, 1), + gen(v1.NewBounds(0, 10), 2, 3), + }, + exp: []bloomshipper.Meta{ + gen(v1.NewBounds(0, 5), 0, 1), + gen(v1.NewBounds(6, 10), 0, 1), + }, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + require.Equal(t, tc.exp, outdatedMetas(tc.metas)) + }) + } +} diff --git a/pkg/bloomcompactor/meta.go b/pkg/bloomcompactor/meta.go deleted file mode 100644 index 2f2c2cd9de16e..0000000000000 --- a/pkg/bloomcompactor/meta.go +++ /dev/null @@ -1,16 +0,0 @@ -package bloomcompactor - -import ( - v1 "github.com/grafana/loki/pkg/storage/bloom/v1" - "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" -) - -const ( - BloomPrefix = "bloom" - MetasPrefix = "metas" -) - -type TSDBStore interface { - ResolveTSDBs() ([]*tsdb.SingleTenantTSDBIdentifier, error) - LoadTSDB(id tsdb.Identifier, bounds v1.FingerprintBounds) (v1.CloseableIterator[*v1.Series], error) -} diff --git a/pkg/bloomcompactor/metrics.go b/pkg/bloomcompactor/metrics.go index ee2f1630ab5ec..350e3ed7e480e 100644 --- a/pkg/bloomcompactor/metrics.go +++ b/pkg/bloomcompactor/metrics.go @@ -3,6 +3,8 @@ package bloomcompactor import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" ) const ( @@ -13,97 +15,105 @@ const ( statusFailure = "failure" ) -type metrics struct { - compactionRunsStarted prometheus.Counter - compactionRunsCompleted *prometheus.CounterVec - compactionRunTime *prometheus.HistogramVec - compactionRunDiscoveredTenants prometheus.Counter - compactionRunSkippedTenants prometheus.Counter - compactionRunTenantsCompleted *prometheus.CounterVec - compactionRunTenantsTime *prometheus.HistogramVec - compactionRunJobStarted prometheus.Counter - compactionRunJobCompleted *prometheus.CounterVec - compactionRunJobTime *prometheus.HistogramVec - compactionRunInterval prometheus.Gauge - compactorRunning prometheus.Gauge +type Metrics struct { + bloomMetrics *v1.Metrics + compactorRunning prometheus.Gauge + chunkSize prometheus.Histogram // uncompressed size of all chunks summed per series + + compactionsStarted prometheus.Counter + compactionCompleted *prometheus.CounterVec + compactionTime *prometheus.HistogramVec + + tenantsDiscovered prometheus.Counter + tenantsOwned prometheus.Counter + tenantsSkipped prometheus.Counter + tenantsStarted prometheus.Counter + tenantsCompleted *prometheus.CounterVec + tenantsCompletedTime *prometheus.HistogramVec + tenantsSeries prometheus.Histogram } -func newMetrics(r prometheus.Registerer) *metrics { - m := metrics{ - compactionRunsStarted: promauto.With(r).NewCounter(prometheus.CounterOpts{ +func NewMetrics(r prometheus.Registerer, bloomMetrics *v1.Metrics) *Metrics { + m := Metrics{ + bloomMetrics: bloomMetrics, + compactorRunning: promauto.With(r).NewGauge(prometheus.GaugeOpts{ + Namespace: metricsNamespace, + Subsystem: metricsSubsystem, + Name: "running", + Help: "Value will be 1 if compactor is currently running on this instance", + }), + chunkSize: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Subsystem: metricsSubsystem, + Name: "chunk_series_size", + Help: "Uncompressed size of chunks in a series", + Buckets: prometheus.ExponentialBucketsRange(1024, 1073741824, 10), + }), + + compactionsStarted: promauto.With(r).NewCounter(prometheus.CounterOpts{ Namespace: metricsNamespace, Subsystem: metricsSubsystem, - Name: "runs_started_total", + Name: "compactions_started", Help: "Total number of compactions started", }), - compactionRunsCompleted: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ + compactionCompleted: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Namespace: metricsNamespace, Subsystem: metricsSubsystem, - Name: "runs_completed_total", - Help: "Total number of compactions completed successfully", + Name: "compactions_completed", + Help: "Total number of compactions completed", }, []string{"status"}), - compactionRunTime: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ + compactionTime: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ Namespace: metricsNamespace, Subsystem: metricsSubsystem, - Name: "runs_time_seconds", + Name: "compactions_time_seconds", Help: "Time spent during a compaction cycle.", Buckets: prometheus.DefBuckets, }, []string{"status"}), - compactionRunDiscoveredTenants: promauto.With(r).NewCounter(prometheus.CounterOpts{ + + tenantsDiscovered: promauto.With(r).NewCounter(prometheus.CounterOpts{ Namespace: metricsNamespace, Subsystem: metricsSubsystem, Name: "tenants_discovered", Help: "Number of tenants discovered during the current compaction run", }), - compactionRunSkippedTenants: promauto.With(r).NewCounter(prometheus.CounterOpts{ + tenantsOwned: promauto.With(r).NewCounter(prometheus.CounterOpts{ Namespace: metricsNamespace, Subsystem: metricsSubsystem, - Name: "tenants_skipped", - Help: "Number of tenants skipped during the current compaction run", + Name: "tenants_owned", + Help: "Number of tenants owned by this instance", }), - compactionRunTenantsCompleted: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Namespace: metricsNamespace, - Subsystem: metricsSubsystem, - Name: "tenants_completed", - Help: "Number of tenants successfully processed during the current compaction run", - }, []string{"status"}), - compactionRunTenantsTime: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ + tenantsSkipped: promauto.With(r).NewCounter(prometheus.CounterOpts{ Namespace: metricsNamespace, Subsystem: metricsSubsystem, - Name: "tenants_time_seconds", - Help: "Time spent processing tenants.", - Buckets: prometheus.DefBuckets, - }, []string{"status"}), - compactionRunJobStarted: promauto.With(r).NewCounter(prometheus.CounterOpts{ + Name: "tenants_skipped", + Help: "Number of tenants skipped since they are not owned by this instance", + }), + tenantsStarted: promauto.With(r).NewCounter(prometheus.CounterOpts{ Namespace: metricsNamespace, Subsystem: metricsSubsystem, - Name: "job_started", - Help: "Number of jobs started processing during the current compaction run", + Name: "tenants_started", + Help: "Number of tenants started to process during the current compaction run", }), - compactionRunJobCompleted: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ + tenantsCompleted: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Namespace: metricsNamespace, Subsystem: metricsSubsystem, - Name: "job_completed", - Help: "Number of jobs successfully processed during the current compaction run", + Name: "tenants_completed", + Help: "Number of tenants successfully processed during the current compaction run", }, []string{"status"}), - compactionRunJobTime: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ + tenantsCompletedTime: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ Namespace: metricsNamespace, Subsystem: metricsSubsystem, - Name: "job_time_seconds", - Help: "Time spent processing jobs.", + Name: "tenants_time_seconds", + Help: "Time spent processing tenants.", Buckets: prometheus.DefBuckets, }, []string{"status"}), - compactionRunInterval: promauto.With(r).NewGauge(prometheus.GaugeOpts{ + tenantsSeries: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ Namespace: metricsNamespace, Subsystem: metricsSubsystem, - Name: "compaction_interval_seconds", - Help: "The configured interval on which compaction is run in seconds", - }), - compactorRunning: promauto.With(r).NewGauge(prometheus.GaugeOpts{ - Namespace: metricsNamespace, - Subsystem: metricsSubsystem, - Name: "running", - Help: "Value will be 1 if compactor is currently running on this instance", + Name: "tenants_series", + Help: "Number of series processed per tenant in the owned fingerprint-range.", + // Up to 10M series per tenant, way more than what we expect given our max_global_streams_per_user limits + Buckets: prometheus.ExponentialBucketsRange(1, 10000000, 10), }), } diff --git a/pkg/bloomcompactor/sharding.go b/pkg/bloomcompactor/sharding.go deleted file mode 100644 index 9b3009bd50652..0000000000000 --- a/pkg/bloomcompactor/sharding.go +++ /dev/null @@ -1,58 +0,0 @@ -package bloomcompactor - -import ( - "github.com/grafana/dskit/ring" - - util_ring "github.com/grafana/loki/pkg/util/ring" -) - -var ( - // TODO: Should we include LEAVING instances in the replication set? - RingOp = ring.NewOp([]ring.InstanceState{ring.JOINING, ring.ACTIVE}, nil) -) - -// ShardingStrategy describes whether compactor "owns" given user or job. -type ShardingStrategy interface { - util_ring.TenantSharding - OwnsFingerprint(tenantID string, fp uint64) (bool, error) -} - -type ShuffleShardingStrategy struct { - util_ring.TenantSharding - ringLifeCycler *ring.BasicLifecycler -} - -func NewShuffleShardingStrategy(r *ring.Ring, ringLifecycler *ring.BasicLifecycler, limits Limits) *ShuffleShardingStrategy { - s := ShuffleShardingStrategy{ - TenantSharding: util_ring.NewTenantShuffleSharding(r, ringLifecycler, limits.BloomCompactorShardSize), - ringLifeCycler: ringLifecycler, - } - - return &s -} - -// OwnsFingerprint makes sure only a single compactor processes the fingerprint. -func (s *ShuffleShardingStrategy) OwnsFingerprint(tenantID string, fp uint64) (bool, error) { - if !s.OwnsTenant(tenantID) { - return false, nil - } - - tenantRing := s.GetTenantSubRing(tenantID) - fpSharding := util_ring.NewFingerprintShuffleSharding(tenantRing, s.ringLifeCycler, RingOp) - return fpSharding.OwnsFingerprint(fp) -} - -// NoopStrategy is an implementation of the ShardingStrategy that does not -// filter anything. -type NoopStrategy struct { - util_ring.NoopStrategy -} - -// OwnsFingerprint implements TenantShuffleSharding. -func (s *NoopStrategy) OwnsFingerprint(_ string, _ uint64) (bool, error) { - return true, nil -} - -func NewNoopStrategy() *NoopStrategy { - return &NoopStrategy{NoopStrategy: util_ring.NoopStrategy{}} -} diff --git a/pkg/bloomcompactor/sharding_test.go b/pkg/bloomcompactor/sharding_test.go deleted file mode 100644 index 4e79752279fb9..0000000000000 --- a/pkg/bloomcompactor/sharding_test.go +++ /dev/null @@ -1,149 +0,0 @@ -package bloomcompactor - -import ( - "context" - "flag" - "fmt" - "testing" - "time" - - "github.com/grafana/dskit/services" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/labels" - "github.com/stretchr/testify/require" - - util_log "github.com/grafana/loki/pkg/util/log" - lokiring "github.com/grafana/loki/pkg/util/ring" - "github.com/grafana/loki/pkg/validation" -) - -func TestShuffleSharding(t *testing.T) { - const shardSize = 2 - const rings = 4 - const tenants = 2000 - const jobsPerTenant = 200 - - var limits validation.Limits - limits.RegisterFlags(flag.NewFlagSet("limits", flag.PanicOnError)) - overrides, err := validation.NewOverrides(limits, nil) - require.NoError(t, err) - - var ringManagers []*lokiring.RingManager - var shards []*ShuffleShardingStrategy - for i := 0; i < rings; i++ { - var ringCfg lokiring.RingConfig - ringCfg.RegisterFlagsWithPrefix("", "", flag.NewFlagSet("ring", flag.PanicOnError)) - ringCfg.KVStore.Store = "inmemory" - ringCfg.InstanceID = fmt.Sprintf("bloom-compactor-%d", i) - ringCfg.InstanceAddr = fmt.Sprintf("localhost-%d", i) - - ringManager, err := lokiring.NewRingManager("bloom-compactor", lokiring.ServerMode, ringCfg, 1, 1, util_log.Logger, prometheus.NewRegistry()) - require.NoError(t, err) - require.NoError(t, ringManager.StartAsync(context.Background())) - - sharding := NewShuffleShardingStrategy(ringManager.Ring, ringManager.RingLifecycler, mockLimits{ - Overrides: overrides, - bloomCompactorShardSize: shardSize, - }) - - ringManagers = append(ringManagers, ringManager) - shards = append(shards, sharding) - } - - // Wait for all rings to see each other. - for i := 0; i < rings; i++ { - require.Eventually(t, func() bool { - running := ringManagers[i].State() == services.Running - discovered := ringManagers[i].Ring.InstancesCount() == rings - return running && discovered - }, 1*time.Minute, 100*time.Millisecond) - } - - // This is kind of an un-deterministic test, because sharding is random - // and the seed is initialized by the ring lib. - // Here we'll generate a bunch of tenants and test that if the sharding doesn't own the tenant, - // that's because the tenant is owned by other ring instances. - shard := shards[0] - otherShards := shards[1:] - var ownedTenants, ownedJobs int - for i := 0; i < tenants; i++ { - tenant := fmt.Sprintf("tenant-%d", i) - ownsTenant := shard.OwnsTenant(tenant) - - var tenantOwnedByOther int - for _, other := range otherShards { - otherOwns := other.OwnsTenant(tenant) - if otherOwns { - tenantOwnedByOther++ - } - } - - // If this shard owns the tenant, shardSize-1 other members should also own the tenant. - // Otherwise, shardSize other members should own the tenant. - if ownsTenant { - require.Equal(t, shardSize-1, tenantOwnedByOther) - ownedTenants++ - } else { - require.Equal(t, shardSize, tenantOwnedByOther) - } - - for j := 0; j < jobsPerTenant; j++ { - lbls := labels.FromStrings("namespace", fmt.Sprintf("namespace-%d", j)) - fp := model.Fingerprint(lbls.Hash()) - ownsFingerprint, err := shard.OwnsFingerprint(tenant, uint64(fp)) - require.NoError(t, err) - - var jobOwnedByOther int - for _, other := range otherShards { - otherOwns, err := other.OwnsFingerprint(tenant, uint64(fp)) - require.NoError(t, err) - if otherOwns { - jobOwnedByOther++ - } - } - - // If this shard owns the job, no one else should own the job. - // And if this shard doesn't own the job, only one of the other shards should own the job. - if ownsFingerprint { - require.Equal(t, 0, jobOwnedByOther) - ownedJobs++ - } else { - require.Equal(t, 1, jobOwnedByOther) - } - } - } - - t.Logf("owned tenants: %d (out of %d)", ownedTenants, tenants) - t.Logf("owned jobs: %d (out of %d)", ownedJobs, tenants*jobsPerTenant) - - // Stop all rings and wait for them to stop. - for i := 0; i < rings; i++ { - ringManagers[i].StopAsync() - require.Eventually(t, func() bool { - return ringManagers[i].State() == services.Terminated - }, 1*time.Minute, 100*time.Millisecond) - } -} - -type mockLimits struct { - *validation.Overrides - bloomCompactorShardSize int - chunksDownloadingBatchSize int - fpRate float64 -} - -func (m mockLimits) BloomFalsePositiveRate(_ string) float64 { - return m.fpRate -} - -func (m mockLimits) BloomCompactorShardSize(_ string) int { - return m.bloomCompactorShardSize -} - -func (m mockLimits) BloomCompactorChunksBatchSize(_ string) int { - if m.chunksDownloadingBatchSize != 0 { - return m.chunksDownloadingBatchSize - } - return 1 -} diff --git a/pkg/bloomcompactor/spec.go b/pkg/bloomcompactor/spec.go index bf9a0a02387b4..67d41b650e375 100644 --- a/pkg/bloomcompactor/spec.go +++ b/pkg/bloomcompactor/spec.go @@ -3,51 +3,22 @@ package bloomcompactor import ( "context" "fmt" - "math" - "time" + "io" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/pkg/errors" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/model" - "github.com/grafana/dskit/multierror" - - "github.com/grafana/loki/pkg/chunkenc" "github.com/grafana/loki/pkg/logproto" - logql_log "github.com/grafana/loki/pkg/logql/log" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/chunk" + "github.com/grafana/loki/pkg/storage/chunk/fetcher" + "github.com/grafana/loki/pkg/storage/stores" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" ) -/* -This file maintains a number of things supporting bloom generation. Most notably, the `BloomGenerator` interface/implementation which builds bloom filters. - -- `BloomGenerator`: Builds blooms. Most other things in this file are supporting this in various ways. -- `SimpleBloomGenerator`: A foundational implementation of `BloomGenerator` which wires up a few different components to generate bloom filters for a set of blocks and handles schema compatibility: -- `chunkLoader`: Loads chunks w/ a specific fingerprint from the store, returns an iterator of chunk iterators. We return iterators rather than chunk implementations mainly for ease of testing. In practice, this will just be an iterator over `MemChunk`s. -*/ - -type Metrics struct { - bloomMetrics *v1.Metrics - chunkSize prometheus.Histogram // uncompressed size of all chunks summed per series -} - -func NewMetrics(r prometheus.Registerer, bloomMetrics *v1.Metrics) *Metrics { - return &Metrics{ - bloomMetrics: bloomMetrics, - chunkSize: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Name: "bloom_chunk_series_size", - Help: "Uncompressed size of chunks in a series", - Buckets: prometheus.ExponentialBucketsRange(1024, 1073741824, 10), - }), - } -} - // inclusive range type Keyspace struct { min, max model.Fingerprint @@ -65,16 +36,16 @@ func (k Keyspace) Cmp(other Keyspace) v1.BoundsCheck { // Store is likely bound within. This allows specifying impls like ShardedStore // to only request the shard-range needed from the existing store. type BloomGenerator interface { - Generate(ctx context.Context) (skippedBlocks []*v1.Block, results v1.Iterator[*v1.Block], err error) + Generate(ctx context.Context) (skippedBlocks []v1.BlockMetadata, toClose []io.Closer, results v1.Iterator[*v1.Block], err error) } // Simple implementation of a BloomGenerator. type SimpleBloomGenerator struct { + userID string store v1.Iterator[*v1.Series] chunkLoader ChunkLoader - // TODO(owen-d): blocks need not be all downloaded prior. Consider implementing - // as an iterator of iterators, where each iterator is a batch of overlapping blocks. - blocks []*bloomshipper.CloseableBlockQuerier + blocksIter v1.ResettableIterator[*v1.SeriesWithBloom] + skipped []v1.BlockMetadata // options to build blocks with opts v1.BlockOptions @@ -92,20 +63,21 @@ type SimpleBloomGenerator struct { // and handles schema compatibility: // Blocks which are incompatible with the schema are skipped and will have their chunks reindexed func NewSimpleBloomGenerator( + userID string, opts v1.BlockOptions, store v1.Iterator[*v1.Series], chunkLoader ChunkLoader, - blocks []*bloomshipper.CloseableBlockQuerier, + blocksIter v1.ResettableIterator[*v1.SeriesWithBloom], readWriterFn func() (v1.BlockWriter, v1.BlockReader), metrics *Metrics, logger log.Logger, ) *SimpleBloomGenerator { return &SimpleBloomGenerator{ - opts: opts, - // TODO(owen-d): implement Iterator[Series] against TSDB files to hook in here. + userID: userID, + opts: opts, store: store, chunkLoader: chunkLoader, - blocks: blocks, + blocksIter: blocksIter, logger: log.With(logger, "component", "bloom_generator"), readWriterFn: readWriterFn, metrics: metrics, @@ -116,7 +88,7 @@ func NewSimpleBloomGenerator( func (s *SimpleBloomGenerator) populator(ctx context.Context) func(series *v1.Series, bloom *v1.Bloom) error { return func(series *v1.Series, bloom *v1.Bloom) error { - chunkItersWithFP, err := s.chunkLoader.Load(ctx, series) + chunkItersWithFP, err := s.chunkLoader.Load(ctx, s.userID, series) if err != nil { return errors.Wrapf(err, "failed to load chunks for series: %+v", series) } @@ -132,69 +104,114 @@ func (s *SimpleBloomGenerator) populator(ctx context.Context) func(series *v1.Se } -func (s *SimpleBloomGenerator) Generate(ctx context.Context) (skippedBlocks []v1.BlockMetadata, results v1.Iterator[*v1.Block], err error) { - - var closeErrors multierror.MultiError - blocksMatchingSchema := make([]v1.PeekingIterator[*v1.SeriesWithBloom], 0, len(s.blocks)) - toClose := make([]*bloomshipper.CloseableBlockQuerier, 0, len(s.blocks)) - // Close all remaining blocks on exit - defer func() { - for _, block := range toClose { - closeErrors.Add(block.Close()) - } - if err := closeErrors.Err(); err != nil { - level.Error(s.logger).Log("msg", "failed to close blocks", "err", err) - } - }() +func (s *SimpleBloomGenerator) Generate(ctx context.Context) v1.Iterator[*v1.Block] { + level.Debug(s.logger).Log("msg", "generating bloom filters for blocks", "schema", fmt.Sprintf("%+v", s.opts.Schema)) + + series := v1.NewPeekingIter(s.store) + + // TODO: Use interface + impl, ok := s.blocksIter.(*blockLoadingIter) + if ok { + impl.Filter( + func(bq *bloomshipper.CloseableBlockQuerier) bool { + + logger := log.With(s.logger, "block", bq.BlockRef) + md, err := bq.Metadata() + schema := md.Options.Schema + if err != nil { + level.Warn(logger).Log("msg", "failed to get schema for block", "err", err) + s.skipped = append(s.skipped, md) + bq.Close() // close unused querier + return false + } + + if !s.opts.Schema.Compatible(schema) { + level.Warn(logger).Log("msg", "block schema incompatible with options", "generator_schema", fmt.Sprintf("%+v", s.opts.Schema), "block_schema", fmt.Sprintf("%+v", schema)) + s.skipped = append(s.skipped, md) + bq.Close() // close unused querier + return false + } + + level.Debug(logger).Log("msg", "adding compatible block to bloom generation inputs") + return true + }, + ) + } - for _, block := range s.blocks { - // TODO(owen-d): implement block naming so we can log the affected block in all these calls - logger := log.With(s.logger, "block", fmt.Sprintf("%+v", block)) - md, err := block.Metadata() - schema := md.Options.Schema - if err != nil { - level.Warn(logger).Log("msg", "failed to get schema for block", "err", err) - skippedBlocks = append(skippedBlocks, md) + return NewLazyBlockBuilderIterator(ctx, s.opts, s.populator(ctx), s.readWriterFn, series, s.blocksIter) +} - // Close unneeded block - closeErrors.Add(block.Close()) - continue - } +// LazyBlockBuilderIterator is a lazy iterator over blocks that builds +// each block by adding series to them until they are full. +type LazyBlockBuilderIterator struct { + ctx context.Context + opts v1.BlockOptions + populate func(*v1.Series, *v1.Bloom) error + readWriterFn func() (v1.BlockWriter, v1.BlockReader) + series v1.PeekingIterator[*v1.Series] + blocks v1.ResettableIterator[*v1.SeriesWithBloom] - if !s.opts.Schema.Compatible(schema) { - level.Warn(logger).Log("msg", "block schema incompatible with options", "generator_schema", fmt.Sprintf("%+v", s.opts.Schema), "block_schema", fmt.Sprintf("%+v", schema)) - skippedBlocks = append(skippedBlocks, md) + curr *v1.Block + err error +} - // Close unneeded block - closeErrors.Add(block.Close()) - continue - } +func NewLazyBlockBuilderIterator( + ctx context.Context, + opts v1.BlockOptions, + populate func(*v1.Series, *v1.Bloom) error, + readWriterFn func() (v1.BlockWriter, v1.BlockReader), + series v1.PeekingIterator[*v1.Series], + blocks v1.ResettableIterator[*v1.SeriesWithBloom], +) *LazyBlockBuilderIterator { + return &LazyBlockBuilderIterator{ + ctx: ctx, + opts: opts, + populate: populate, + readWriterFn: readWriterFn, + series: series, + blocks: blocks, + } +} - level.Debug(logger).Log("msg", "adding compatible block to bloom generation inputs") - itr := v1.NewPeekingIter[*v1.SeriesWithBloom](block) - blocksMatchingSchema = append(blocksMatchingSchema, itr) - // append needed block to close list (when finished) - toClose = append(toClose, block) +func (b *LazyBlockBuilderIterator) Next() bool { + // No more series to process + if _, hasNext := b.series.Peek(); !hasNext { + return false } - level.Debug(s.logger).Log("msg", "generating bloom filters for blocks", "num_blocks", len(blocksMatchingSchema), "skipped_blocks", len(skippedBlocks), "schema", fmt.Sprintf("%+v", s.opts.Schema)) + if err := b.ctx.Err(); err != nil { + b.err = errors.Wrap(err, "context canceled") + return false + } - // TODO(owen-d): implement bounded block sizes - mergeBuilder := v1.NewMergeBuilder(blocksMatchingSchema, s.store, s.populator(ctx)) - writer, reader := s.readWriterFn() + if err := b.blocks.Reset(); err != nil { + b.err = errors.Wrap(err, "reset blocks iterator") + return false + } - blockBuilder, err := v1.NewBlockBuilder(v1.NewBlockOptionsFromSchema(s.opts.Schema), writer) + mergeBuilder := v1.NewMergeBuilder(b.blocks, b.series, b.populate) + writer, reader := b.readWriterFn() + blockBuilder, err := v1.NewBlockBuilder(b.opts, writer) if err != nil { - return skippedBlocks, nil, errors.Wrap(err, "failed to create bloom block builder") + b.err = errors.Wrap(err, "failed to create bloom block builder") + return false } - _, err = mergeBuilder.Build(blockBuilder) if err != nil { - return skippedBlocks, nil, errors.Wrap(err, "failed to build bloom block") + b.err = errors.Wrap(err, "failed to build bloom block") + return false } - return skippedBlocks, v1.NewSliceIter[*v1.Block]([]*v1.Block{v1.NewBlock(reader)}), nil + b.curr = v1.NewBlock(reader) + return true +} +func (b *LazyBlockBuilderIterator) At() *v1.Block { + return b.curr +} + +func (b *LazyBlockBuilderIterator) Err() error { + return b.err } // IndexLoader loads an index. This helps us do things like @@ -211,45 +228,33 @@ type ChunkItersByFingerprint struct { // ChunkLoader loads chunks from a store type ChunkLoader interface { - Load(context.Context, *v1.Series) (*ChunkItersByFingerprint, error) -} - -// interface modeled from `pkg/storage/stores/composite_store.ChunkFetcherProvider` -type fetcherProvider interface { - GetChunkFetcher(model.Time) chunkFetcher -} - -// interface modeled from `pkg/storage/chunk/fetcher.Fetcher` -type chunkFetcher interface { - FetchChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) + Load(ctx context.Context, userID string, series *v1.Series) (*ChunkItersByFingerprint, error) } // StoreChunkLoader loads chunks from a store type StoreChunkLoader struct { - userID string - fetcherProvider fetcherProvider + fetcherProvider stores.ChunkFetcherProvider metrics *Metrics } -func NewStoreChunkLoader(userID string, fetcherProvider fetcherProvider, metrics *Metrics) *StoreChunkLoader { +func NewStoreChunkLoader(fetcherProvider stores.ChunkFetcherProvider, metrics *Metrics) *StoreChunkLoader { return &StoreChunkLoader{ - userID: userID, fetcherProvider: fetcherProvider, metrics: metrics, } } -func (s *StoreChunkLoader) Load(ctx context.Context, series *v1.Series) (*ChunkItersByFingerprint, error) { - // TODO(owen-d): This is probalby unnecessary as we should only have one fetcher +func (s *StoreChunkLoader) Load(ctx context.Context, userID string, series *v1.Series) (*ChunkItersByFingerprint, error) { + // NB(owen-d): This is probably unnecessary as we should only have one fetcher // because we'll only be working on a single index period at a time, but this should protect // us in the case of refactoring/changing this and likely isn't a perf bottleneck. - chksByFetcher := make(map[chunkFetcher][]chunk.Chunk) + chksByFetcher := make(map[*fetcher.Fetcher][]chunk.Chunk) for _, chk := range series.Chunks { fetcher := s.fetcherProvider.GetChunkFetcher(chk.Start) chksByFetcher[fetcher] = append(chksByFetcher[fetcher], chunk.Chunk{ ChunkRef: logproto.ChunkRef{ Fingerprint: uint64(series.Fingerprint), - UserID: s.userID, + UserID: userID, From: chk.Start, Through: chk.End, Checksum: chk.Checksum, @@ -257,104 +262,18 @@ func (s *StoreChunkLoader) Load(ctx context.Context, series *v1.Series) (*ChunkI }) } - work := make([]chunkWork, 0, len(chksByFetcher)) + var ( + fetchers = make([]Fetcher[chunk.Chunk, chunk.Chunk], 0, len(chksByFetcher)) + inputs = make([][]chunk.Chunk, 0, len(chksByFetcher)) + ) for fetcher, chks := range chksByFetcher { - work = append(work, chunkWork{ - fetcher: fetcher, - chks: chks, - }) + fn := FetchFunc[chunk.Chunk, chunk.Chunk](fetcher.FetchChunks) + fetchers = append(fetchers, fn) + inputs = append(inputs, chks) } return &ChunkItersByFingerprint{ fp: series.Fingerprint, - itr: newBatchedLoader(ctx, work, batchedLoaderDefaultBatchSize, s.metrics), + itr: newBatchedChunkLoader(ctx, fetchers, inputs, s.metrics, batchedLoaderDefaultBatchSize), }, nil } - -type chunkWork struct { - fetcher chunkFetcher - chks []chunk.Chunk -} - -// batchedLoader implements `v1.Iterator[v1.ChunkRefWithIter]` in batches -// to ensure memory is bounded while loading chunks -// TODO(owen-d): testware -type batchedLoader struct { - metrics *Metrics - batchSize int - ctx context.Context - work []chunkWork - - cur v1.ChunkRefWithIter - batch []chunk.Chunk - err error -} - -const batchedLoaderDefaultBatchSize = 50 - -func newBatchedLoader(ctx context.Context, work []chunkWork, batchSize int, metrics *Metrics) *batchedLoader { - return &batchedLoader{ - metrics: metrics, - batchSize: batchSize, - ctx: ctx, - work: work, - } -} - -func (b *batchedLoader) Next() bool { - if len(b.batch) > 0 { - b.cur, b.err = b.format(b.batch[0]) - b.batch = b.batch[1:] - return b.err == nil - } - - if len(b.work) == 0 { - return false - } - - // setup next batch - next := b.work[0] - batchSize := min(b.batchSize, len(next.chks)) - toFetch := next.chks[:batchSize] - // update work - b.work[0].chks = next.chks[batchSize:] - if len(b.work[0].chks) == 0 { - b.work = b.work[1:] - } - - b.batch, b.err = next.fetcher.FetchChunks(b.ctx, toFetch) - return b.err == nil -} - -func (b *batchedLoader) format(c chunk.Chunk) (v1.ChunkRefWithIter, error) { - chk := c.Data.(*chunkenc.Facade).LokiChunk() - b.metrics.chunkSize.Observe(float64(chk.UncompressedSize())) - itr, err := chk.Iterator( - b.ctx, - time.Unix(0, 0), // TODO: Parameterize/better handle the timestamps? - time.Unix(0, math.MaxInt64), - logproto.FORWARD, - logql_log.NewNoopPipeline().ForStream(c.Metric), - ) - - if err != nil { - return v1.ChunkRefWithIter{}, err - } - - return v1.ChunkRefWithIter{ - Ref: v1.ChunkRef{ - Start: c.From, - End: c.Through, - Checksum: c.Checksum, - }, - Itr: itr, - }, nil -} - -func (b *batchedLoader) At() v1.ChunkRefWithIter { - return b.cur -} - -func (b *batchedLoader) Err() error { - return b.err -} diff --git a/pkg/bloomcompactor/spec_test.go b/pkg/bloomcompactor/spec_test.go index c43a4b715a1e7..f278948fed7a6 100644 --- a/pkg/bloomcompactor/spec_test.go +++ b/pkg/bloomcompactor/spec_test.go @@ -13,22 +13,21 @@ import ( "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" ) -func blocksFromSchema(t *testing.T, n int, options v1.BlockOptions) (res []*v1.Block, data []v1.SeriesWithBloom) { +func blocksFromSchema(t *testing.T, n int, options v1.BlockOptions) (res []*v1.Block, data []v1.SeriesWithBloom, refs []bloomshipper.BlockRef) { return blocksFromSchemaWithRange(t, n, options, 0, 0xffff) } // splits 100 series across `n` non-overlapping blocks. // uses options to build blocks with. -func blocksFromSchemaWithRange(t *testing.T, n int, options v1.BlockOptions, fromFP, throughFp model.Fingerprint) (res []*v1.Block, data []v1.SeriesWithBloom) { +func blocksFromSchemaWithRange(t *testing.T, n int, options v1.BlockOptions, fromFP, throughFp model.Fingerprint) (res []*v1.Block, data []v1.SeriesWithBloom, refs []bloomshipper.BlockRef) { if 100%n != 0 { panic("100 series must be evenly divisible by n") } numSeries := 100 - numKeysPerSeries := 10000 - data, _ = v1.MkBasicSeriesWithBlooms(numSeries, numKeysPerSeries, fromFP, throughFp, 0, 10000) + data, _ = v1.MkBasicSeriesWithBlooms(numSeries, 0, fromFP, throughFp, 0, 10000) - seriesPerBlock := 100 / n + seriesPerBlock := numSeries / n for i := 0; i < n; i++ { // references for linking in memory reader+writer @@ -43,39 +42,62 @@ func blocksFromSchemaWithRange(t *testing.T, n int, options v1.BlockOptions, fro ) require.Nil(t, err) - itr := v1.NewSliceIter[v1.SeriesWithBloom](data[i*seriesPerBlock : (i+1)*seriesPerBlock]) + minIdx, maxIdx := i*seriesPerBlock, (i+1)*seriesPerBlock + + itr := v1.NewSliceIter[v1.SeriesWithBloom](data[minIdx:maxIdx]) _, err = builder.BuildFrom(itr) require.Nil(t, err) res = append(res, v1.NewBlock(reader)) + ref := genBlockRef(data[minIdx].Series.Fingerprint, data[maxIdx-1].Series.Fingerprint) + t.Log("create block", ref) + refs = append(refs, ref) } - return res, data + return res, data, refs } // doesn't actually load any chunks type dummyChunkLoader struct{} -func (dummyChunkLoader) Load(_ context.Context, series *v1.Series) (*ChunkItersByFingerprint, error) { +func (dummyChunkLoader) Load(_ context.Context, _ string, series *v1.Series) (*ChunkItersByFingerprint, error) { return &ChunkItersByFingerprint{ fp: series.Fingerprint, itr: v1.NewEmptyIter[v1.ChunkRefWithIter](), }, nil } -func dummyBloomGen(opts v1.BlockOptions, store v1.Iterator[*v1.Series], blocks []*v1.Block) *SimpleBloomGenerator { +func dummyBloomGen(t *testing.T, opts v1.BlockOptions, store v1.Iterator[*v1.Series], blocks []*v1.Block, refs []bloomshipper.BlockRef) *SimpleBloomGenerator { bqs := make([]*bloomshipper.CloseableBlockQuerier, 0, len(blocks)) - for _, b := range blocks { + for i, b := range blocks { bqs = append(bqs, &bloomshipper.CloseableBlockQuerier{ + BlockRef: refs[i], BlockQuerier: v1.NewBlockQuerier(b), }) } + fetcher := func(_ context.Context, refs []bloomshipper.BlockRef) ([]*bloomshipper.CloseableBlockQuerier, error) { + res := make([]*bloomshipper.CloseableBlockQuerier, 0, len(refs)) + for _, ref := range refs { + for _, bq := range bqs { + if ref.Bounds.Equal(bq.Bounds) { + res = append(res, bq) + } + } + } + t.Log("req", refs) + t.Log("res", res) + return res, nil + } + + blocksIter := newBlockLoadingIter(context.Background(), refs, FetchFunc[bloomshipper.BlockRef, *bloomshipper.CloseableBlockQuerier](fetcher), 1) + return NewSimpleBloomGenerator( + "fake", opts, store, dummyChunkLoader{}, - bqs, + blocksIter, func() (v1.BlockWriter, v1.BlockReader) { indexBuf := bytes.NewBuffer(nil) bloomsBuf := bytes.NewBuffer(nil) @@ -87,28 +109,40 @@ func dummyBloomGen(opts v1.BlockOptions, store v1.Iterator[*v1.Series], blocks [ } func TestSimpleBloomGenerator(t *testing.T) { + const maxBlockSize = 100 << 20 // 100MB for _, tc := range []struct { - desc string - fromSchema, toSchema v1.BlockOptions - sourceBlocks, numSkipped int + desc string + fromSchema, toSchema v1.BlockOptions + sourceBlocks, numSkipped, outputBlocks int + overlapping bool }{ { desc: "SkipsIncompatibleSchemas", - fromSchema: v1.NewBlockOptions(3, 0), - toSchema: v1.NewBlockOptions(4, 0), + fromSchema: v1.NewBlockOptions(3, 0, maxBlockSize), + toSchema: v1.NewBlockOptions(4, 0, maxBlockSize), sourceBlocks: 2, numSkipped: 2, + outputBlocks: 1, }, { desc: "CombinesBlocks", - fromSchema: v1.NewBlockOptions(4, 0), - toSchema: v1.NewBlockOptions(4, 0), + fromSchema: v1.NewBlockOptions(4, 0, maxBlockSize), + toSchema: v1.NewBlockOptions(4, 0, maxBlockSize), sourceBlocks: 2, numSkipped: 0, + outputBlocks: 1, + }, + { + desc: "MaxBlockSize", + fromSchema: v1.NewBlockOptions(4, 0, maxBlockSize), + toSchema: v1.NewBlockOptions(4, 0, 1<<10), // 1KB + sourceBlocks: 2, + numSkipped: 0, + outputBlocks: 6, }, } { t.Run(tc.desc, func(t *testing.T) { - sourceBlocks, data := blocksFromSchema(t, tc.sourceBlocks, tc.fromSchema) + sourceBlocks, data, refs := blocksFromSchemaWithRange(t, tc.sourceBlocks, tc.fromSchema, 0x00000, 0x6ffff) storeItr := v1.NewMapIter[v1.SeriesWithBloom, *v1.Series]( v1.NewSliceIter[v1.SeriesWithBloom](data), func(swb v1.SeriesWithBloom) *v1.Series { @@ -116,27 +150,29 @@ func TestSimpleBloomGenerator(t *testing.T) { }, ) - gen := dummyBloomGen(tc.toSchema, storeItr, sourceBlocks) - skipped, results, err := gen.Generate(context.Background()) - require.Nil(t, err) - require.Equal(t, tc.numSkipped, len(skipped)) - - require.True(t, results.Next()) - block := results.At() - require.False(t, results.Next()) - - refs := v1.PointerSlice[v1.SeriesWithBloom](data) - - v1.EqualIterators[*v1.SeriesWithBloom]( - t, - func(a, b *v1.SeriesWithBloom) { - // TODO(owen-d): better equality check - // once chunk fetching is implemented - require.Equal(t, a.Series, b.Series) - }, - v1.NewSliceIter[*v1.SeriesWithBloom](refs), - block.Querier(), - ) + gen := dummyBloomGen(t, tc.toSchema, storeItr, sourceBlocks, refs) + results := gen.Generate(context.Background()) + + var outputBlocks []*v1.Block + for results.Next() { + outputBlocks = append(outputBlocks, results.At()) + } + require.Equal(t, tc.outputBlocks, len(outputBlocks)) + require.Equal(t, tc.numSkipped, len(gen.skipped)) + + // Check all the input series are present in the output blocks. + expectedRefs := v1.PointerSlice(data) + outputRefs := make([]*v1.SeriesWithBloom, 0, len(data)) + for _, block := range outputBlocks { + bq := block.Querier() + for bq.Next() { + outputRefs = append(outputRefs, bq.At()) + } + } + require.Equal(t, len(expectedRefs), len(outputRefs)) + for i := range expectedRefs { + require.Equal(t, expectedRefs[i].Series, outputRefs[i].Series) + } }) } } diff --git a/pkg/bloomcompactor/table_utils.go b/pkg/bloomcompactor/table_utils.go deleted file mode 100644 index 55bc2e9a328f1..0000000000000 --- a/pkg/bloomcompactor/table_utils.go +++ /dev/null @@ -1,16 +0,0 @@ -package bloomcompactor - -import ( - "github.com/prometheus/common/model" - - "github.com/grafana/loki/pkg/compactor/retention" -) - -func getIntervalsForTables(tables []string) map[string]model.Interval { - tablesIntervals := make(map[string]model.Interval, len(tables)) - for _, table := range tables { - tablesIntervals[table] = retention.ExtractIntervalFromTableName(table) - } - - return tablesIntervals -} diff --git a/pkg/bloomcompactor/tsdb.go b/pkg/bloomcompactor/tsdb.go index bb4383cc84f60..6159ce02a804a 100644 --- a/pkg/bloomcompactor/tsdb.go +++ b/pkg/bloomcompactor/tsdb.go @@ -2,15 +2,119 @@ package bloomcompactor import ( "context" + "fmt" + "io" "math" + "path" + "strings" + "github.com/pkg/errors" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" + "github.com/grafana/loki/pkg/chunkenc" + baseStore "github.com/grafana/loki/pkg/storage" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/storage" + "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb/index" ) +const ( + gzipExtension = ".gz" +) + +type TSDBStore interface { + UsersForPeriod(ctx context.Context, table config.DayTable) ([]string, error) + ResolveTSDBs(ctx context.Context, table config.DayTable, tenant string) ([]tsdb.SingleTenantTSDBIdentifier, error) + LoadTSDB( + ctx context.Context, + table config.DayTable, + tenant string, + id tsdb.Identifier, + bounds v1.FingerprintBounds, + ) (v1.CloseableIterator[*v1.Series], error) +} + +// BloomTSDBStore is a wrapper around the storage.Client interface which +// implements the TSDBStore interface for this pkg. +type BloomTSDBStore struct { + storage storage.Client +} + +func NewBloomTSDBStore(storage storage.Client) *BloomTSDBStore { + return &BloomTSDBStore{ + storage: storage, + } +} + +func (b *BloomTSDBStore) UsersForPeriod(ctx context.Context, table config.DayTable) ([]string, error) { + _, users, err := b.storage.ListFiles(ctx, table.Addr(), true) // bypass cache for ease of testing + return users, err +} + +func (b *BloomTSDBStore) ResolveTSDBs(ctx context.Context, table config.DayTable, tenant string) ([]tsdb.SingleTenantTSDBIdentifier, error) { + indices, err := b.storage.ListUserFiles(ctx, table.Addr(), tenant, true) // bypass cache for ease of testing + if err != nil { + return nil, errors.Wrap(err, "failed to list user files") + } + + ids := make([]tsdb.SingleTenantTSDBIdentifier, 0, len(indices)) + for _, index := range indices { + key := index.Name + if decompress := storage.IsCompressedFile(index.Name); decompress { + key = strings.TrimSuffix(key, gzipExtension) + } + + id, ok := tsdb.ParseSingleTenantTSDBPath(path.Base(key)) + if !ok { + return nil, errors.Errorf("failed to parse single tenant tsdb path: %s", key) + } + + ids = append(ids, id) + + } + return ids, nil +} + +func (b *BloomTSDBStore) LoadTSDB( + ctx context.Context, + table config.DayTable, + tenant string, + id tsdb.Identifier, + bounds v1.FingerprintBounds, +) (v1.CloseableIterator[*v1.Series], error) { + withCompression := id.Name() + gzipExtension + + data, err := b.storage.GetUserFile(ctx, table.Addr(), tenant, withCompression) + if err != nil { + return nil, errors.Wrap(err, "failed to get file") + } + defer data.Close() + + decompressorPool := chunkenc.GetReaderPool(chunkenc.EncGZIP) + decompressor, err := decompressorPool.GetReader(data) + if err != nil { + return nil, errors.Wrap(err, "failed to get decompressor") + } + defer decompressorPool.PutReader(decompressor) + + buf, err := io.ReadAll(decompressor) + if err != nil { + return nil, errors.Wrap(err, "failed to read file") + } + + reader, err := index.NewReader(index.RealByteSlice(buf)) + if err != nil { + return nil, errors.Wrap(err, "failed to create index reader") + } + + idx := tsdb.NewTSDBIndex(reader) + + return NewTSDBSeriesIter(ctx, idx, bounds), nil +} + // TSDBStore is an interface for interacting with the TSDB, // modeled off a relevant subset of the `tsdb.TSDBIndex` struct type forSeries interface { @@ -109,3 +213,94 @@ func (t *TSDBSeriesIter) background() { close(t.ch) }() } + +type TSDBStores struct { + schemaCfg config.SchemaConfig + stores []TSDBStore +} + +func NewTSDBStores( + schemaCfg config.SchemaConfig, + storeCfg baseStore.Config, + clientMetrics baseStore.ClientMetrics, +) (*TSDBStores, error) { + res := &TSDBStores{ + schemaCfg: schemaCfg, + stores: make([]TSDBStore, len(schemaCfg.Configs)), + } + + for i, cfg := range schemaCfg.Configs { + if cfg.IndexType == config.TSDBType { + + c, err := baseStore.NewObjectClient(cfg.ObjectType, storeCfg, clientMetrics) + if err != nil { + return nil, errors.Wrap(err, "failed to create object client") + } + prefix := path.Join(cfg.IndexTables.PathPrefix, cfg.IndexTables.Prefix) + res.stores[i] = NewBloomTSDBStore(storage.NewIndexStorageClient(c, prefix)) + } + } + + return res, nil +} + +func (s *TSDBStores) storeForPeriod(table config.DayTime) (TSDBStore, error) { + for i := len(s.schemaCfg.Configs) - 1; i >= 0; i-- { + period := s.schemaCfg.Configs[i] + + if !table.Before(period.From) { + // we have the desired period config + + if s.stores[i] != nil { + // valid: it's of tsdb type + return s.stores[i], nil + } + + // invalid + return nil, errors.Errorf( + "store for period is not of TSDB type (%s) while looking up store for (%v)", + period.IndexType, + table, + ) + } + + } + + return nil, fmt.Errorf( + "there is no store matching no matching period found for table (%v) -- too early", + table, + ) +} + +func (s *TSDBStores) UsersForPeriod(ctx context.Context, table config.DayTable) ([]string, error) { + store, err := s.storeForPeriod(table.DayTime) + if err != nil { + return nil, err + } + + return store.UsersForPeriod(ctx, table) +} + +func (s *TSDBStores) ResolveTSDBs(ctx context.Context, table config.DayTable, tenant string) ([]tsdb.SingleTenantTSDBIdentifier, error) { + store, err := s.storeForPeriod(table.DayTime) + if err != nil { + return nil, err + } + + return store.ResolveTSDBs(ctx, table, tenant) +} + +func (s *TSDBStores) LoadTSDB( + ctx context.Context, + table config.DayTable, + tenant string, + id tsdb.Identifier, + bounds v1.FingerprintBounds, +) (v1.CloseableIterator[*v1.Series], error) { + store, err := s.storeForPeriod(table.DayTime) + if err != nil { + return nil, err + } + + return store.LoadTSDB(ctx, table, tenant, id, bounds) +} diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 1e7a54f1d1e33..58f709f0be2f8 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -23,13 +23,15 @@ of line filter expressions. | bloomgateway.Gateway | - queue.RequestQueue + queue.RequestQueue | - bloomgateway.Worker + bloomgateway.Worker | - bloomshipper.Shipper + bloomgateway.Processor | - bloomshipper.BloomFileClient + bloomshipper.Store + | + bloomshipper.Client | ObjectClient | @@ -56,6 +58,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/pkg/logqlmodel/stats" "github.com/grafana/loki/pkg/queue" "github.com/grafana/loki/pkg/storage" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" @@ -170,11 +173,9 @@ type Gateway struct { workerMetrics *workerMetrics queueMetrics *queue.Metrics - queue *queue.RequestQueue - activeUsers *util.ActiveUsersCleanupService - bloomShipper bloomshipper.Interface - - sharding ShardingStrategy + queue *queue.RequestQueue + activeUsers *util.ActiveUsersCleanupService + bloomStore bloomshipper.Store pendingTasks *pendingTasks @@ -193,12 +194,11 @@ func (l *fixedQueueLimits) MaxConsumers(_ string, _ int) int { } // New returns a new instance of the Bloom Gateway. -func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, overrides Limits, shardingStrategy ShardingStrategy, cm storage.ClientMetrics, logger log.Logger, reg prometheus.Registerer) (*Gateway, error) { +func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, overrides Limits, cm storage.ClientMetrics, logger log.Logger, reg prometheus.Registerer) (*Gateway, error) { g := &Gateway{ cfg: cfg, logger: logger, metrics: newMetrics(reg, constants.Loki, metricsSubsystem), - sharding: shardingStrategy, pendingTasks: makePendingTasks(pendingTasksInitialCap), workerConfig: workerConfig{ maxItems: 100, @@ -206,25 +206,33 @@ func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, o workerMetrics: newWorkerMetrics(reg, constants.Loki, metricsSubsystem), queueMetrics: queue.NewMetrics(reg, constants.Loki, metricsSubsystem), } + var err error g.queue = queue.NewRequestQueue(cfg.MaxOutstandingPerTenant, time.Minute, &fixedQueueLimits{0}, g.queueMetrics) g.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(g.queueMetrics.Cleanup) - // TODO(chaudum): Plug in cache var metasCache cache.Cache - var blocksCache *cache.EmbeddedCache[string, bloomshipper.BlockDirectory] - store, err := bloomshipper.NewBloomStore(schemaCfg.Configs, storageCfg, cm, metasCache, blocksCache, logger) - if err != nil { - return nil, err + mcCfg := storageCfg.BloomShipperConfig.MetasCache + if cache.IsCacheConfigured(mcCfg) { + metasCache, err = cache.New(mcCfg, reg, logger, stats.BloomMetasCache, constants.Loki) + if err != nil { + return nil, err + } } - bloomShipper, err := bloomshipper.NewShipper(store, storageCfg.BloomShipperConfig, overrides, logger, reg) + var blocksCache cache.TypedCache[string, bloomshipper.BlockDirectory] + bcCfg := storageCfg.BloomShipperConfig.BlocksCache + if bcCfg.IsEnabled() { + blocksCache = bloomshipper.NewBlocksCache(bcCfg, reg, logger) + } + + store, err := bloomshipper.NewBloomStore(schemaCfg.Configs, storageCfg, cm, metasCache, blocksCache, logger) if err != nil { return nil, err } // We need to keep a reference to be able to call Stop() on shutdown of the gateway. - g.bloomShipper = bloomShipper + g.bloomStore = store if err := g.initServices(); err != nil { return nil, err @@ -239,7 +247,7 @@ func (g *Gateway) initServices() error { svcs := []services.Service{g.queue, g.activeUsers} for i := 0; i < g.cfg.WorkerConcurrency; i++ { id := fmt.Sprintf("bloom-query-worker-%d", i) - w := newWorker(id, g.workerConfig, g.queue, g.bloomShipper, g.pendingTasks, g.logger, g.workerMetrics) + w := newWorker(id, g.workerConfig, g.queue, g.bloomStore, g.pendingTasks, g.logger, g.workerMetrics) svcs = append(svcs, w) } g.serviceMngr, err = services.NewManager(svcs...) @@ -291,7 +299,7 @@ func (g *Gateway) running(ctx context.Context) error { } func (g *Gateway) stopping(_ error) error { - g.bloomShipper.Stop() + g.bloomStore.Stop() return services.StopManagerAndAwaitStopped(context.Background(), g.serviceMngr) } @@ -361,7 +369,7 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk tasksCh := make(chan Task, len(tasks)) for _, task := range tasks { task := task - level.Info(logger).Log("msg", "enqueue task", "task", task.ID, "day", task.day, "series", len(task.series)) + level.Info(logger).Log("msg", "enqueue task", "task", task.ID, "table", task.table, "series", len(task.series)) g.queue.Enqueue(tenantID, []string{}, task, func() { // When enqueuing, we also add the task to the pending tasks g.pendingTasks.Add(task.ID, task) diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index c8da44a7c719b..f07e014b84dc3 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -3,6 +3,7 @@ package bloomgateway import ( "context" "fmt" + "math/rand" "os" "testing" "time" @@ -45,8 +46,6 @@ func newLimits() *validation.Overrides { } func TestBloomGateway_StartStopService(t *testing.T) { - - ss := NewNoopStrategy() logger := log.NewNopLogger() reg := prometheus.NewRegistry() limits := newLimits() @@ -96,7 +95,7 @@ func TestBloomGateway_StartStopService(t *testing.T) { MaxOutstandingPerTenant: 1024, } - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) @@ -113,8 +112,6 @@ func TestBloomGateway_StartStopService(t *testing.T) { func TestBloomGateway_FilterChunkRefs(t *testing.T) { tenantID := "test" - - ss := NewNoopStrategy() logger := log.NewLogfmtLogger(os.Stderr) reg := prometheus.NewRegistry() limits := newLimits() @@ -165,15 +162,17 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("shipper error is propagated", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) now := mktime("2023-10-03 10:00") - bqs, data := createBlockQueriers(t, 10, now.Add(-24*time.Hour), now, 0, 1000) - mockStore := newMockBloomStore(bqs) - mockStore.err = errors.New("failed to fetch block") - gw.bloomShipper = mockStore + // replace store implementation and re-initialize workers and sub-services + _, metas, queriers, data := createBlocks(t, tenantID, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + mockStore := newMockBloomStore(queriers, metas) + mockStore.err = errors.New("request failed") + gw.bloomStore = mockStore err = gw.initServices() require.NoError(t, err) @@ -185,7 +184,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { require.NoError(t, err) }) - chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 10) + chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 100) // saturate workers // then send additional request @@ -204,21 +203,23 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Cleanup(cancelFn) res, err := gw.FilterChunkRefs(ctx, req) - require.ErrorContainsf(t, err, "request failed: failed to fetch block", "%+v", res) + require.ErrorContainsf(t, err, "request failed", "%+v", res) } }) t.Run("request cancellation does not result in channel locking", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) now := mktime("2024-01-25 10:00") - bqs, data := createBlockQueriers(t, 50, now.Add(-24*time.Hour), now, 0, 1024) - mockStore := newMockBloomStore(bqs) - mockStore.delay = 50 * time.Millisecond // delay for each block - 50x50=2500ms - gw.bloomShipper = mockStore + // replace store implementation and re-initialize workers and sub-services + _, metas, queriers, data := createBlocks(t, tenantID, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + mockStore := newMockBloomStore(queriers, metas) + mockStore.delay = 2000 * time.Millisecond + gw.bloomStore = mockStore err = gw.initServices() require.NoError(t, err) @@ -255,7 +256,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("returns unfiltered chunk refs if no filters provided", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) @@ -300,7 +301,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("gateway tracks active users", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) @@ -340,14 +341,15 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("use fuse queriers to filter chunks", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) now := mktime("2023-10-03 10:00") // replace store implementation and re-initialize workers and sub-services - bqs, data := createBlockQueriers(t, 5, now.Add(-8*time.Hour), now, 0, 1024) - gw.bloomShipper = newMockBloomStore(bqs) + _, metas, queriers, data := createBlocks(t, tenantID, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + gw.bloomStore = newMockBloomStore(queriers, metas) err = gw.initServices() require.NoError(t, err) @@ -358,7 +360,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { require.NoError(t, err) }) - chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 100) + chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 10) t.Run("no match - return empty response", func(t *testing.T) { inputChunkRefs := groupRefs(t, chunkRefs) @@ -382,27 +384,37 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("match - return filtered", func(t *testing.T) { inputChunkRefs := groupRefs(t, chunkRefs) - // hack to get indexed key for a specific series - // the indexed key range for a series is defined as - // i * keysPerSeries ... i * keysPerSeries + keysPerSeries - 1 - // where i is the nth series in a block - // fortunately, i is also used as Checksum for the single chunk of a series - // see mkBasicSeriesWithBlooms() in pkg/storage/bloom/v1/test_util.go - key := inputChunkRefs[0].Refs[0].Checksum*1000 + 500 + // Hack to get search string for a specific series + // see MkBasicSeriesWithBlooms() in pkg/storage/bloom/v1/test_util.go + // each series has 1 chunk + // each chunk has multiple strings, from int(fp) to int(nextFp)-1 + x := rand.Intn(len(inputChunkRefs)) + fp := inputChunkRefs[x].Fingerprint + chks := inputChunkRefs[x].Refs + line := fmt.Sprintf("%04x:%04x", int(fp), 0) // first line + + t.Log("x=", x, "fp=", fp, "line=", line) req := &logproto.FilterChunkRefRequest{ From: now.Add(-8 * time.Hour), Through: now, Refs: inputChunkRefs, Filters: []syntax.LineFilter{ - {Ty: labels.MatchEqual, Match: fmt.Sprintf("series %d", key)}, + {Ty: labels.MatchEqual, Match: line}, }, } ctx := user.InjectOrgID(context.Background(), tenantID) res, err := gw.FilterChunkRefs(ctx, req) require.NoError(t, err) + expectedResponse := &logproto.FilterChunkRefResponse{ - ChunkRefs: inputChunkRefs[:1], + ChunkRefs: []*logproto.GroupedChunkRefs{ + { + Fingerprint: fp, + Refs: chks, + Tenant: tenantID, + }, + }, } require.Equal(t, expectedResponse, res) }) diff --git a/pkg/bloomgateway/cache.go b/pkg/bloomgateway/cache.go index fe40b87e95488..6c573cb47d6de 100644 --- a/pkg/bloomgateway/cache.go +++ b/pkg/bloomgateway/cache.go @@ -182,6 +182,7 @@ func NewBloomGatewayClientCacheMiddleware( }, cacheGen, retentionEnabled, + false, ) return &ClientCache{ diff --git a/pkg/bloomgateway/client.go b/pkg/bloomgateway/client.go index 6453987b91683..e5fd35d884fb9 100644 --- a/pkg/bloomgateway/client.go +++ b/pkg/bloomgateway/client.go @@ -7,7 +7,6 @@ import ( "io" "math" "math/rand" - "sort" "sync" "github.com/go-kit/log" @@ -20,6 +19,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/model" + "golang.org/x/exp/slices" "google.golang.org/grpc" "google.golang.org/grpc/health/grpc_health_v1" @@ -36,6 +36,10 @@ import ( ) var ( + // BlocksOwnerRead is the operation used to check the authoritative owners of a block + // (replicas included) that are available for queries (a bloom gateway is available for + // queries only when ACTIVE). + BlocksOwnerRead = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, nil) // groupedChunksRefPool pooling slice of logproto.GroupedChunkRefs [64, 128, 256, ..., 65536] groupedChunksRefPool = queue.NewSlicePool[*logproto.GroupedChunkRefs](1<<6, 1<<16, 2) // ringGetBuffersPool pooling for ringGetBuffers to avoid calling ring.MakeBuffersForGet() for each request @@ -226,15 +230,16 @@ func (c *GatewayClient) FilterChunks(ctx context.Context, tenant string, from, t } subRing := GetShuffleShardingSubring(c.ring, tenant, c.limits) - rs, err := subRing.GetAllHealthy(BlocksRead) + rs, err := subRing.GetAllHealthy(BlocksOwnerRead) if err != nil { return nil, errors.Wrap(err, "bloom gateway get healthy instances") } - streamsByInst, err := c.groupFingerprintsByServer(groups, subRing, rs.Instances) + servers, err := serverAddressesWithTokenRanges(subRing, rs.Instances) if err != nil { return nil, err } + streamsByInst := groupFingerprintsByServer(groups, servers) filteredChunkRefs := groupedChunksRefPool.Get(len(groups)) defer groupedChunksRefPool.Put(filteredChunkRefs) @@ -286,13 +291,9 @@ func (c *GatewayClient) doForAddrs(addrs []string, fn func(logproto.BloomGateway return err } -func (c *GatewayClient) groupFingerprintsByServer(groups []*logproto.GroupedChunkRefs, subRing ring.ReadRing, instances []ring.InstanceDesc) ([]instanceWithFingerprints, error) { - servers, err := serverAddressesWithTokenRanges(subRing, instances) - if err != nil { - return nil, err - } +func groupFingerprintsByServer(groups []*logproto.GroupedChunkRefs, servers []addrsWithTokenRange) []instanceWithFingerprints { boundedFingerprints := partitionFingerprintsByAddresses(groups, servers) - return groupByInstance(boundedFingerprints), nil + return groupByInstance(boundedFingerprints) } func serverAddressesWithTokenRanges(subRing ring.ReadRing, instances []ring.InstanceDesc) ([]addrsWithTokenRange, error) { @@ -303,48 +304,36 @@ func serverAddressesWithTokenRanges(subRing ring.ReadRing, instances []ring.Inst for it.Next() { // We can use on of the tokens from the token range // to obtain all addresses for that token. - rs, err := subRing.Get(it.At().MaxToken, BlocksRead, bufDescs, bufHosts, bufZones) + rs, err := subRing.Get(it.At().TokenRange.Max, BlocksOwnerRead, bufDescs, bufHosts, bufZones) if err != nil { return nil, errors.Wrap(err, "bloom gateway get ring") } servers = append(servers, addrsWithTokenRange{ - id: it.At().Instance.Id, - addrs: rs.GetAddresses(), - minToken: it.At().MinToken, - maxToken: it.At().MaxToken, + id: it.At().Instance.Id, + addrs: rs.GetAddresses(), + tokenRange: it.At().TokenRange, }) } - if len(servers) > 0 && servers[len(servers)-1].maxToken < math.MaxUint32 { + if len(servers) > 0 && servers[len(servers)-1].tokenRange.Max < math.MaxUint32 { // append the instance for the token range between the greates token and MaxUint32 servers = append(servers, addrsWithTokenRange{ - id: servers[0].id, - addrs: servers[0].addrs, - minToken: servers[len(servers)-1].maxToken + 1, - maxToken: math.MaxUint32, + id: servers[0].id, + addrs: servers[0].addrs, + tokenRange: bloomutils.NewTokenRange(servers[len(servers)-1].tokenRange.Max+1, math.MaxUint32), }) } return servers, nil } -type instanceWithToken struct { - instance ring.InstanceDesc - token uint32 -} - type addrsWithTokenRange struct { - id string - addrs []string - minToken, maxToken uint32 + id string + addrs []string + tokenRange bloomutils.Range[uint32] } func (s addrsWithTokenRange) cmp(token uint32) v1.BoundsCheck { - if token < s.minToken { - return v1.Before - } else if token > s.maxToken { - return v1.After - } - return v1.Overlap + return s.tokenRange.Cmp(token) } type instanceWithFingerprints struct { @@ -354,13 +343,22 @@ type instanceWithFingerprints struct { func partitionFingerprintsByAddresses(fingerprints []*logproto.GroupedChunkRefs, addresses []addrsWithTokenRange) (result []instanceWithFingerprints) { for _, instance := range addresses { - - min := sort.Search(len(fingerprints), func(i int) bool { - return instance.cmp(uint32(fingerprints[i].Fingerprint)) > v1.Before + min, _ := slices.BinarySearchFunc(fingerprints, instance.tokenRange, func(g *logproto.GroupedChunkRefs, r bloomutils.Range[uint32]) int { + if uint32(g.Fingerprint) < r.Min { + return -1 + } else if uint32(g.Fingerprint) > r.Min { + return 1 + } + return 0 }) - max := sort.Search(len(fingerprints), func(i int) bool { - return instance.cmp(uint32(fingerprints[i].Fingerprint)) == v1.After + max, _ := slices.BinarySearchFunc(fingerprints, instance.tokenRange, func(g *logproto.GroupedChunkRefs, r bloomutils.Range[uint32]) int { + if uint32(g.Fingerprint) <= r.Max { + return -1 + } else if uint32(g.Fingerprint) > r.Max { + return 1 + } + return 0 }) // fingerprint is out of boundaries @@ -410,3 +408,21 @@ func groupByInstance(boundedFingerprints []instanceWithFingerprints) []instanceW return result } + +// GetShuffleShardingSubring returns the subring to be used for a given user. +// This function should be used both by index gateway servers and clients in +// order to guarantee the same logic is used. +func GetShuffleShardingSubring(ring ring.ReadRing, tenantID string, limits Limits) ring.ReadRing { + shardSize := limits.BloomGatewayShardSize(tenantID) + + // A shard size of 0 means shuffle sharding is disabled for this specific user, + // so we just return the full ring so that indexes will be sharded across all index gateways. + // Since we set the shard size to replication factor if shard size is 0, this + // can only happen if both the shard size and the replication factor are set + // to 0. + if shardSize <= 0 { + return ring + } + + return ring.ShuffleShard(tenantID, shardSize) +} diff --git a/pkg/bloomgateway/client_test.go b/pkg/bloomgateway/client_test.go index e59fff2306ab9..8a9a3d35646ce 100644 --- a/pkg/bloomgateway/client_test.go +++ b/pkg/bloomgateway/client_test.go @@ -2,6 +2,7 @@ package bloomgateway import ( "context" + "fmt" "math" "sort" "testing" @@ -19,6 +20,9 @@ import ( "github.com/grafana/loki/pkg/validation" ) +// short constructor +var newTr = bloomutils.NewTokenRange + func TestBloomGatewayClient(t *testing.T) { logger := log.NewNopLogger() reg := prometheus.NewRegistry() @@ -53,10 +57,10 @@ func TestBloomGatewayClient_PartitionFingerprintsByAddresses(t *testing.T) { {Fingerprint: 401}, // out of bounds, will be dismissed } servers := []addrsWithTokenRange{ - {id: "instance-1", addrs: []string{"10.0.0.1"}, minToken: 0, maxToken: 100}, - {id: "instance-2", addrs: []string{"10.0.0.2"}, minToken: 101, maxToken: 200}, - {id: "instance-3", addrs: []string{"10.0.0.3"}, minToken: 201, maxToken: 300}, - {id: "instance-2", addrs: []string{"10.0.0.2"}, minToken: 301, maxToken: 400}, + {id: "instance-1", addrs: []string{"10.0.0.1"}, tokenRange: newTr(0, 100)}, + {id: "instance-2", addrs: []string{"10.0.0.2"}, tokenRange: newTr(101, 200)}, + {id: "instance-3", addrs: []string{"10.0.0.3"}, tokenRange: newTr(201, 300)}, + {id: "instance-2", addrs: []string{"10.0.0.2"}, tokenRange: newTr(301, 400)}, } // partition fingerprints @@ -135,9 +139,9 @@ func TestBloomGatewayClient_PartitionFingerprintsByAddresses(t *testing.T) { {Fingerprint: 350}, } servers := []addrsWithTokenRange{ - {id: "instance-1", addrs: []string{"10.0.0.1"}, minToken: 0, maxToken: 200}, - {id: "instance-2", addrs: []string{"10.0.0.2"}, minToken: 100, maxToken: 300}, - {id: "instance-3", addrs: []string{"10.0.0.3"}, minToken: 200, maxToken: 400}, + {id: "instance-1", addrs: []string{"10.0.0.1"}, tokenRange: newTr(0, 200)}, + {id: "instance-2", addrs: []string{"10.0.0.2"}, tokenRange: newTr(100, 300)}, + {id: "instance-3", addrs: []string{"10.0.0.3"}, tokenRange: newTr(200, 400)}, } // partition fingerprints @@ -162,6 +166,33 @@ func TestBloomGatewayClient_PartitionFingerprintsByAddresses(t *testing.T) { }) } +func BenchmarkPartitionFingerprintsByAddresses(b *testing.B) { + numFp := 100000 + fpStep := math.MaxUint64 / uint64(numFp) + + groups := make([]*logproto.GroupedChunkRefs, 0, numFp) + for i := uint64(0); i < math.MaxUint64-fpStep; i += fpStep { + groups = append(groups, &logproto.GroupedChunkRefs{Fingerprint: i}) + } + + numServers := 100 + tokenStep := math.MaxUint32 / uint32(numServers) + servers := make([]addrsWithTokenRange, 0, numServers) + for i := uint32(0); i < math.MaxUint32-tokenStep; i += tokenStep { + servers = append(servers, addrsWithTokenRange{ + id: fmt.Sprintf("instance-%x", i), + addrs: []string{fmt.Sprintf("%d", i)}, + tokenRange: newTr(i, i+tokenStep), + }) + } + + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _ = partitionFingerprintsByAddresses(groups, servers) + } +} + func TestBloomGatewayClient_ServerAddressesWithTokenRanges(t *testing.T) { testCases := map[string]struct { instances []ring.InstanceDesc @@ -174,10 +205,10 @@ func TestBloomGatewayClient_ServerAddressesWithTokenRanges(t *testing.T) { {Id: "instance-3", Addr: "10.0.0.3", Tokens: []uint32{math.MaxUint32 / 6 * 5}}, }, expected: []addrsWithTokenRange{ - {id: "instance-1", addrs: []string{"10.0.0.1"}, minToken: 0, maxToken: math.MaxUint32 / 6 * 1}, - {id: "instance-2", addrs: []string{"10.0.0.2"}, minToken: math.MaxUint32/6*1 + 1, maxToken: math.MaxUint32 / 6 * 3}, - {id: "instance-3", addrs: []string{"10.0.0.3"}, minToken: math.MaxUint32/6*3 + 1, maxToken: math.MaxUint32 / 6 * 5}, - {id: "instance-1", addrs: []string{"10.0.0.1"}, minToken: math.MaxUint32/6*5 + 1, maxToken: math.MaxUint32}, + {id: "instance-1", addrs: []string{"10.0.0.1"}, tokenRange: newTr(0, math.MaxUint32/6*1)}, + {id: "instance-2", addrs: []string{"10.0.0.2"}, tokenRange: newTr(math.MaxUint32/6*1+1, math.MaxUint32/6*3)}, + {id: "instance-3", addrs: []string{"10.0.0.3"}, tokenRange: newTr(math.MaxUint32/6*3+1, math.MaxUint32/6*5)}, + {id: "instance-1", addrs: []string{"10.0.0.1"}, tokenRange: newTr(math.MaxUint32/6*5+1, math.MaxUint32)}, }, }, "MinUint32 and MaxUint32 are tokens in the ring": { @@ -186,10 +217,10 @@ func TestBloomGatewayClient_ServerAddressesWithTokenRanges(t *testing.T) { {Id: "instance-2", Addr: "10.0.0.2", Tokens: []uint32{math.MaxUint32 / 3 * 1, math.MaxUint32}}, }, expected: []addrsWithTokenRange{ - {id: "instance-1", addrs: []string{"10.0.0.1"}, minToken: 0, maxToken: 0}, - {id: "instance-2", addrs: []string{"10.0.0.2"}, minToken: 1, maxToken: math.MaxUint32 / 3}, - {id: "instance-1", addrs: []string{"10.0.0.1"}, minToken: math.MaxUint32/3*1 + 1, maxToken: math.MaxUint32 / 3 * 2}, - {id: "instance-2", addrs: []string{"10.0.0.2"}, minToken: math.MaxUint32/3*2 + 1, maxToken: math.MaxUint32}, + {id: "instance-1", addrs: []string{"10.0.0.1"}, tokenRange: newTr(0, 0)}, + {id: "instance-2", addrs: []string{"10.0.0.2"}, tokenRange: newTr(1, math.MaxUint32/3)}, + {id: "instance-1", addrs: []string{"10.0.0.1"}, tokenRange: newTr(math.MaxUint32/3*1+1, math.MaxUint32/3*2)}, + {id: "instance-2", addrs: []string{"10.0.0.2"}, tokenRange: newTr(math.MaxUint32/3*2+1, math.MaxUint32)}, }, }, } @@ -207,19 +238,6 @@ func TestBloomGatewayClient_ServerAddressesWithTokenRanges(t *testing.T) { } func TestBloomGatewayClient_GroupFingerprintsByServer(t *testing.T) { - - logger := log.NewNopLogger() - reg := prometheus.NewRegistry() - - l, err := validation.NewOverrides(validation.Limits{BloomGatewayShardSize: 1}, nil) - require.NoError(t, err) - - cfg := ClientConfig{} - flagext.DefaultValues(&cfg) - - c, err := NewClient(cfg, nil, l, reg, logger, "loki", nil, false) - require.NoError(t, err) - instances := []ring.InstanceDesc{ {Id: "instance-1", Addr: "10.0.0.1", Tokens: []uint32{2146405214, 1029997044, 678878693}}, {Id: "instance-2", Addr: "10.0.0.2", Tokens: []uint32{296463531, 1697323986, 800258284}}, @@ -228,7 +246,7 @@ func TestBloomGatewayClient_GroupFingerprintsByServer(t *testing.T) { it := bloomutils.NewInstanceSortMergeIterator(instances) for it.Next() { - t.Log(it.At().MaxToken, it.At().Instance.Addr) + t.Log(it.At().TokenRange.Max, it.At().Instance.Addr) } testCases := []struct { @@ -339,8 +357,9 @@ func TestBloomGatewayClient_GroupFingerprintsByServer(t *testing.T) { return tc.chunks[i].Fingerprint < tc.chunks[j].Fingerprint }) - res, err := c.groupFingerprintsByServer(tc.chunks, subRing, instances) + servers, err := serverAddressesWithTokenRanges(subRing, instances) require.NoError(t, err) + res := groupFingerprintsByServer(tc.chunks, servers) require.Equal(t, tc.expected, res) }) } @@ -369,10 +388,10 @@ type mockRing struct { // Get implements ring.ReadRing. func (r *mockRing) Get(key uint32, _ ring.Operation, _ []ring.InstanceDesc, _ []string, _ []string) (ring.ReplicationSet, error) { idx, _ := sort.Find(len(r.ranges), func(i int) int { - if r.ranges[i].MaxToken < key { + if r.ranges[i].TokenRange.Max < key { return 1 } - if r.ranges[i].MaxToken > key { + if r.ranges[i].TokenRange.Max > key { return -1 } return 0 diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index d2722ad8f1496..c952c9f6b87fd 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -12,6 +12,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql/syntax" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/config" ) const ( @@ -69,7 +70,7 @@ type Task struct { ctx context.Context // TODO(chaudum): Investigate how to remove that. - day model.Time + table config.DayTime } // NewTask returns a new Task that can be enqueued to the task queue. @@ -89,7 +90,7 @@ func NewTask(ctx context.Context, tenantID string, refs seriesWithBounds, filter filters: filters, series: refs.series, bounds: refs.bounds, - day: refs.day, + table: refs.table, ctx: ctx, done: make(chan struct{}), responses: make([]v1.Output, 0, len(refs.series)), @@ -129,7 +130,7 @@ func (t Task) Copy(series []*logproto.GroupedChunkRefs) Task { filters: t.filters, series: series, bounds: t.bounds, - day: t.day, + table: t.table, ctx: t.ctx, done: make(chan struct{}), responses: make([]v1.Output, 0, len(series)), diff --git a/pkg/bloomgateway/processor.go b/pkg/bloomgateway/processor.go index 26895bc43eda5..7d1d687853979 100644 --- a/pkg/bloomgateway/processor.go +++ b/pkg/bloomgateway/processor.go @@ -3,30 +3,35 @@ package bloomgateway import ( "context" "math" - "sort" + "time" "github.com/go-kit/log" - "github.com/prometheus/common/model" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" ) -type tasksForBlock struct { - blockRef bloomshipper.BlockRef - tasks []Task +func newProcessor(id string, store bloomshipper.Store, logger log.Logger, metrics *workerMetrics) *processor { + return &processor{ + id: id, + store: store, + logger: logger, + metrics: metrics, + } } type processor struct { - store bloomshipper.Store - logger log.Logger + id string + store bloomshipper.Store + logger log.Logger + metrics *workerMetrics } func (p *processor) run(ctx context.Context, tasks []Task) error { - for ts, tasks := range group(tasks, func(t Task) model.Time { return t.day }) { - interval := bloomshipper.NewInterval(ts, ts.Add(Day)) + for ts, tasks := range group(tasks, func(t Task) config.DayTime { return t.table }) { tenant := tasks[0].Tenant - err := p.processTasks(ctx, tenant, interval, []v1.FingerprintBounds{{Min: 0, Max: math.MaxUint64}}, tasks) + err := p.processTasks(ctx, tenant, ts, []v1.FingerprintBounds{{Min: 0, Max: math.MaxUint64}}, tasks) if err != nil { for _, task := range tasks { task.CloseWithError(err) @@ -40,8 +45,9 @@ func (p *processor) run(ctx context.Context, tasks []Task) error { return nil } -func (p *processor) processTasks(ctx context.Context, tenant string, interval bloomshipper.Interval, keyspaces []v1.FingerprintBounds, tasks []Task) error { +func (p *processor) processTasks(ctx context.Context, tenant string, day config.DayTime, keyspaces []v1.FingerprintBounds, tasks []Task) error { minFpRange, maxFpRange := getFirstLast(keyspaces) + interval := bloomshipper.NewInterval(day.Bounds()) metaSearch := bloomshipper.MetaSearchParams{ TenantID: tenant, Interval: interval, @@ -51,20 +57,23 @@ func (p *processor) processTasks(ctx context.Context, tenant string, interval bl if err != nil { return err } + p.metrics.metasFetched.WithLabelValues(p.id).Observe(float64(len(metas))) + blocksRefs := bloomshipper.BlocksForMetas(metas, interval, keyspaces) - return p.processBlocks(ctx, partition(tasks, blocksRefs)) + return p.processBlocks(ctx, partitionTasks(tasks, blocksRefs)) } -func (p *processor) processBlocks(ctx context.Context, data []tasksForBlock) error { +func (p *processor) processBlocks(ctx context.Context, data []blockWithTasks) error { refs := make([]bloomshipper.BlockRef, len(data)) for _, block := range data { - refs = append(refs, block.blockRef) + refs = append(refs, block.ref) } bqs, err := p.store.FetchBlocks(ctx, refs) if err != nil { return err } + p.metrics.metasFetched.WithLabelValues(p.id).Observe(float64(len(bqs))) blockIter := v1.NewSliceIter(bqs) @@ -72,7 +81,7 @@ outer: for blockIter.Next() { bq := blockIter.At() for i, block := range data { - if block.blockRef.Bounds.Equal(bq.Bounds) { + if block.ref.Bounds.Equal(bq.Bounds) { err := p.processBlock(ctx, bq.BlockQuerier, block.tasks) bq.Close() if err != nil { @@ -102,7 +111,16 @@ func (p *processor) processBlock(_ context.Context, blockQuerier *v1.BlockQuerie } fq := blockQuerier.Fuse(iters) - return fq.Run() + + start := time.Now() + err = fq.Run() + if err != nil { + p.metrics.blockQueryLatency.WithLabelValues(p.id, labelFailure).Observe(time.Since(start).Seconds()) + } else { + p.metrics.blockQueryLatency.WithLabelValues(p.id, labelSuccess).Observe(time.Since(start).Seconds()) + } + + return err } // getFirstLast returns the first and last item of a fingerprint slice @@ -122,37 +140,3 @@ func group[K comparable, V any, S ~[]V](s S, f func(v V) K) map[K]S { } return m } - -func partition(tasks []Task, blocks []bloomshipper.BlockRef) []tasksForBlock { - result := make([]tasksForBlock, 0, len(blocks)) - - for _, block := range blocks { - bounded := tasksForBlock{ - blockRef: block, - } - - for _, task := range tasks { - refs := task.series - min := sort.Search(len(refs), func(i int) bool { - return block.Cmp(refs[i].Fingerprint) > v1.Before - }) - - max := sort.Search(len(refs), func(i int) bool { - return block.Cmp(refs[i].Fingerprint) == v1.After - }) - - // All fingerprints fall outside of the consumer's range - if min == len(refs) || max == 0 { - continue - } - - bounded.tasks = append(bounded.tasks, task.Copy(refs[min:max])) - } - - if len(bounded.tasks) > 0 { - result = append(result, bounded) - } - - } - return result -} diff --git a/pkg/bloomgateway/processor_test.go b/pkg/bloomgateway/processor_test.go index d39ba61a89613..27d0068753d5b 100644 --- a/pkg/bloomgateway/processor_test.go +++ b/pkg/bloomgateway/processor_test.go @@ -7,23 +7,41 @@ import ( "testing" "time" + "github.com/go-kit/log" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "go.uber.org/atomic" "github.com/grafana/loki/pkg/logql/syntax" + "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" + "github.com/grafana/loki/pkg/util/constants" ) var _ bloomshipper.Store = &dummyStore{} +func newMockBloomStore(bqs []*bloomshipper.CloseableBlockQuerier, metas []bloomshipper.Meta) *dummyStore { + return &dummyStore{ + querieres: bqs, + metas: metas, + } +} + type dummyStore struct { metas []bloomshipper.Meta - blocks []bloomshipper.BlockRef querieres []*bloomshipper.CloseableBlockQuerier + + // mock how long it takes to serve block queriers + delay time.Duration + // mock response error when serving block queriers in ForEach + err error } func (s *dummyStore) ResolveMetas(_ context.Context, _ bloomshipper.MetaSearchParams) ([][]bloomshipper.MetaRef, []*bloomshipper.Fetcher, error) { + time.Sleep(s.delay) + //TODO(chaudum) Filter metas based on search params refs := make([]bloomshipper.MetaRef, 0, len(s.metas)) for _, meta := range s.metas { @@ -51,6 +69,11 @@ func (s *dummyStore) Stop() { func (s *dummyStore) FetchBlocks(_ context.Context, refs []bloomshipper.BlockRef) ([]*bloomshipper.CloseableBlockQuerier, error) { result := make([]*bloomshipper.CloseableBlockQuerier, 0, len(s.querieres)) + if s.err != nil { + time.Sleep(s.delay) + return result, s.err + } + for _, ref := range refs { for _, bq := range s.querieres { if ref.Bounds.Equal(bq.Bounds) { @@ -63,6 +86,8 @@ func (s *dummyStore) FetchBlocks(_ context.Context, refs []bloomshipper.BlockRef result[i], result[j] = result[j], result[i] }) + time.Sleep(s.delay) + return result, nil } @@ -70,16 +95,13 @@ func TestProcessor(t *testing.T) { ctx := context.Background() tenant := "fake" now := mktime("2024-01-27 12:00") + metrics := newWorkerMetrics(prometheus.NewPedanticRegistry(), constants.Loki, "bloom_gatway") - t.Run("dummy", func(t *testing.T) { - blocks, metas, queriers, data := createBlocks(t, tenant, 10, now.Add(-1*time.Hour), now, 0x0000, 0x1000) - p := &processor{ - store: &dummyStore{ - querieres: queriers, - metas: metas, - blocks: blocks, - }, - } + t.Run("success case", func(t *testing.T) { + _, metas, queriers, data := createBlocks(t, tenant, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + mockStore := newMockBloomStore(queriers, metas) + p := newProcessor("worker", mockStore, log.NewNopLogger(), metrics) chunkRefs := createQueryInputFromBlockData(t, tenant, data, 10) swb := seriesWithBounds{ @@ -88,7 +110,7 @@ func TestProcessor(t *testing.T) { Start: now.Add(-1 * time.Hour), End: now, }, - day: truncateDay(now), + table: config.NewDayTime(truncateDay(now)), } filters := []syntax.LineFilter{ {Ty: 0, Match: "no match"}, @@ -116,4 +138,48 @@ func TestProcessor(t *testing.T) { require.NoError(t, err) require.Equal(t, int64(len(swb.series)), results.Load()) }) + + t.Run("failure case", func(t *testing.T) { + _, metas, queriers, data := createBlocks(t, tenant, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + mockStore := newMockBloomStore(queriers, metas) + mockStore.err = errors.New("store failed") + + p := newProcessor("worker", mockStore, log.NewNopLogger(), metrics) + + chunkRefs := createQueryInputFromBlockData(t, tenant, data, 10) + swb := seriesWithBounds{ + series: groupRefs(t, chunkRefs), + bounds: model.Interval{ + Start: now.Add(-1 * time.Hour), + End: now, + }, + table: config.NewDayTime(truncateDay(now)), + } + filters := []syntax.LineFilter{ + {Ty: 0, Match: "no match"}, + } + + t.Log("series", len(swb.series)) + task, _ := NewTask(ctx, "fake", swb, filters) + tasks := []Task{task} + + results := atomic.NewInt64(0) + var wg sync.WaitGroup + for i := range tasks { + wg.Add(1) + go func(ta Task) { + defer wg.Done() + for range ta.resCh { + results.Inc() + } + t.Log("done", results.Load()) + }(tasks[i]) + } + + err := p.run(ctx, tasks) + wg.Wait() + require.Errorf(t, err, "store failed") + require.Equal(t, int64(0), results.Load()) + }) } diff --git a/pkg/bloomgateway/sharding.go b/pkg/bloomgateway/sharding.go deleted file mode 100644 index 5dfb9f11732a0..0000000000000 --- a/pkg/bloomgateway/sharding.go +++ /dev/null @@ -1,156 +0,0 @@ -package bloomgateway - -import ( - "context" - - "github.com/go-kit/log" - "github.com/grafana/dskit/ring" - - util_ring "github.com/grafana/loki/pkg/util/ring" -) - -// TODO(chaudum): Replace this placeholder with actual BlockRef struct. -type BlockRef struct { - FromFp, ThroughFp uint64 - FromTs, ThroughTs int64 -} - -var ( - // BlocksOwnerSync is the operation used to check the authoritative owners of a block - // (replicas included). - BlocksOwnerSync = ring.NewOp([]ring.InstanceState{ring.JOINING, ring.ACTIVE, ring.LEAVING}, nil) - - // BlocksOwnerRead is the operation used to check the authoritative owners of a block - // (replicas included) that are available for queries (a bloom gateway is available for - // queries only when ACTIVE). - BlocksOwnerRead = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, nil) - - // BlocksRead is the operation run by the querier to query blocks via the bloom gateway. - BlocksRead = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, func(s ring.InstanceState) bool { - // Blocks can only be queried from ACTIVE instances. However, if the block belongs to - // a non-active instance, then we should extend the replication set and try to query it - // from the next ACTIVE instance in the ring (which is expected to have it because a - // bloom gateway keeps their previously owned blocks until new owners are ACTIVE). - return s != ring.ACTIVE - }) -) - -type ShardingStrategy interface { - // FilterTenants whose indexes should be loaded by the index gateway. - // Returns the list of user IDs that should be synced by the index gateway. - FilterTenants(ctx context.Context, tenantIDs []string) ([]string, error) - FilterBlocks(ctx context.Context, tenantID string, blockRefs []BlockRef) ([]BlockRef, error) -} - -type ShuffleShardingStrategy struct { - util_ring.TenantSharding - r ring.ReadRing - ringLifeCycler *ring.BasicLifecycler - logger log.Logger -} - -func NewShuffleShardingStrategy(r ring.ReadRing, ringLifecycler *ring.BasicLifecycler, limits Limits, logger log.Logger) *ShuffleShardingStrategy { - return &ShuffleShardingStrategy{ - TenantSharding: util_ring.NewTenantShuffleSharding(r, ringLifecycler, limits.BloomGatewayShardSize), - ringLifeCycler: ringLifecycler, - logger: logger, - } -} - -// FilterTenants implements ShardingStrategy. -func (s *ShuffleShardingStrategy) FilterTenants(_ context.Context, tenantIDs []string) ([]string, error) { - // As a protection, ensure the bloom gateway instance is healthy in the ring. It could also be missing - // in the ring if it was failing to heartbeat the ring and it got remove from another healthy bloom gateway - // instance, because of the auto-forget feature. - if set, err := s.r.GetAllHealthy(BlocksOwnerSync); err != nil { - return nil, err - } else if !set.Includes(s.ringLifeCycler.GetInstanceID()) { - return nil, errGatewayUnhealthy - } - - var filteredIDs []string - - for _, tenantID := range tenantIDs { - // Include the user only if it belongs to this bloom gateway shard. - if s.OwnsTenant(tenantID) { - filteredIDs = append(filteredIDs, tenantID) - } - } - - return filteredIDs, nil -} - -// nolint:revive -func getBucket(rangeMin, rangeMax, pos uint64) int { - return 0 -} - -// FilterBlocks implements ShardingStrategy. -func (s *ShuffleShardingStrategy) FilterBlocks(_ context.Context, tenantID string, blockRefs []BlockRef) ([]BlockRef, error) { - if !s.OwnsTenant(tenantID) { - return nil, nil - } - - filteredBlockRefs := make([]BlockRef, 0, len(blockRefs)) - - tenantRing := s.GetTenantSubRing(tenantID) - - fpSharding := util_ring.NewFingerprintShuffleSharding(tenantRing, s.ringLifeCycler, BlocksOwnerSync) - for _, blockRef := range blockRefs { - owns, err := fpSharding.OwnsFingerprint(blockRef.FromFp) - if err != nil { - return nil, err - } - if owns { - filteredBlockRefs = append(filteredBlockRefs, blockRef) - continue - } - - owns, err = fpSharding.OwnsFingerprint(blockRef.ThroughFp) - if err != nil { - return nil, err - } - if owns { - filteredBlockRefs = append(filteredBlockRefs, blockRef) - continue - } - } - - return filteredBlockRefs, nil -} - -// GetShuffleShardingSubring returns the subring to be used for a given user. -// This function should be used both by index gateway servers and clients in -// order to guarantee the same logic is used. -func GetShuffleShardingSubring(ring ring.ReadRing, tenantID string, limits Limits) ring.ReadRing { - shardSize := limits.BloomGatewayShardSize(tenantID) - - // A shard size of 0 means shuffle sharding is disabled for this specific user, - // so we just return the full ring so that indexes will be sharded across all index gateways. - // Since we set the shard size to replication factor if shard size is 0, this - // can only happen if both the shard size and the replication factor are set - // to 0. - if shardSize <= 0 { - return ring - } - - return ring.ShuffleShard(tenantID, shardSize) -} - -// NoopStrategy is an implementation of the ShardingStrategy that does not -// filter anything. -type NoopStrategy struct{} - -func NewNoopStrategy() *NoopStrategy { - return &NoopStrategy{} -} - -// FilterTenants implements ShardingStrategy. -func (s *NoopStrategy) FilterTenants(_ context.Context, tenantIDs []string) ([]string, error) { - return tenantIDs, nil -} - -// FilterBlocks implements ShardingStrategy. -func (s *NoopStrategy) FilterBlocks(_ context.Context, _ string, blockRefs []BlockRef) ([]BlockRef, error) { - return blockRefs, nil -} diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index cf72aec3b5b4b..3ab234aaa8ae0 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -11,6 +11,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql/syntax" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" ) @@ -82,15 +83,17 @@ func convertToChunkRefs(refs []*logproto.ShortRef) v1.ChunkRefs { return result } -type boundedTasks struct { - blockRef bloomshipper.BlockRef - tasks []Task +type blockWithTasks struct { + ref bloomshipper.BlockRef + tasks []Task } -func partitionFingerprintRange(tasks []Task, blocks []bloomshipper.BlockRef) (result []boundedTasks) { +func partitionTasks(tasks []Task, blocks []bloomshipper.BlockRef) []blockWithTasks { + result := make([]blockWithTasks, 0, len(blocks)) + for _, block := range blocks { - bounded := boundedTasks{ - blockRef: block, + bounded := blockWithTasks{ + ref: block, } for _, task := range tasks { @@ -121,7 +124,7 @@ func partitionFingerprintRange(tasks []Task, blocks []bloomshipper.BlockRef) (re type seriesWithBounds struct { bounds model.Interval - day model.Time + table config.DayTime series []*logproto.GroupedChunkRefs } @@ -173,7 +176,7 @@ func partitionRequest(req *logproto.FilterChunkRefRequest) []seriesWithBounds { Start: minTs, End: maxTs, }, - day: day, + table: config.NewDayTime(day), series: res, }) } diff --git a/pkg/bloomgateway/util_test.go b/pkg/bloomgateway/util_test.go index f19564b43ef59..6bc43cf794342 100644 --- a/pkg/bloomgateway/util_test.go +++ b/pkg/bloomgateway/util_test.go @@ -1,8 +1,6 @@ package bloomgateway import ( - "context" - "math/rand" "testing" "time" @@ -75,7 +73,7 @@ func mkBlockRef(minFp, maxFp uint64) bloomshipper.BlockRef { } } -func TestPartitionFingerprintRange(t *testing.T) { +func TestPartitionTasks(t *testing.T) { t.Run("consecutive block ranges", func(t *testing.T) { bounds := []bloomshipper.BlockRef{ @@ -95,7 +93,7 @@ func TestPartitionFingerprintRange(t *testing.T) { tasks[i%nTasks].series = append(tasks[i%nTasks].series, &logproto.GroupedChunkRefs{Fingerprint: uint64(i)}) } - results := partitionFingerprintRange(tasks, bounds) + results := partitionTasks(tasks, bounds) require.Equal(t, 3, len(results)) // ensure we only return bounds in range actualFingerprints := make([]*logproto.GroupedChunkRefs, 0, nSeries) @@ -130,7 +128,7 @@ func TestPartitionFingerprintRange(t *testing.T) { task.series = append(task.series, &logproto.GroupedChunkRefs{Fingerprint: uint64(i)}) } - results := partitionFingerprintRange([]Task{task}, bounds) + results := partitionTasks([]Task{task}, bounds) require.Equal(t, 3, len(results)) // ensure we only return bounds in range for _, res := range results { // ensure we have the right number of tasks per bound @@ -178,7 +176,7 @@ func TestPartitionRequest(t *testing.T) { exp: []seriesWithBounds{ { bounds: model.Interval{Start: ts.Add(-60 * time.Minute), End: ts.Add(-45 * time.Minute)}, - day: mktime("2024-01-24 00:00"), + table: config.NewDayTime(mktime("2024-01-24 00:00")), series: []*logproto.GroupedChunkRefs{ { Fingerprint: 0x00, @@ -219,7 +217,7 @@ func TestPartitionRequest(t *testing.T) { exp: []seriesWithBounds{ { bounds: model.Interval{Start: ts.Add(-23 * time.Hour), End: ts.Add(-22 * time.Hour)}, - day: mktime("2024-01-23 00:00"), + table: config.NewDayTime(mktime("2024-01-23 00:00")), series: []*logproto.GroupedChunkRefs{ { Fingerprint: 0x00, @@ -231,7 +229,7 @@ func TestPartitionRequest(t *testing.T) { }, { bounds: model.Interval{Start: ts.Add(-2 * time.Hour), End: ts.Add(-1 * time.Hour)}, - day: mktime("2024-01-24 00:00"), + table: config.NewDayTime(mktime("2024-01-24 00:00")), series: []*logproto.GroupedChunkRefs{ { Fingerprint: 0x01, @@ -260,7 +258,7 @@ func TestPartitionRequest(t *testing.T) { exp: []seriesWithBounds{ { bounds: model.Interval{Start: ts.Add(-13 * time.Hour), End: ts.Add(-11 * time.Hour)}, - day: mktime("2024-01-23 00:00"), + table: config.NewDayTime(mktime("2024-01-23 00:00")), series: []*logproto.GroupedChunkRefs{ { Fingerprint: 0x00, @@ -272,7 +270,7 @@ func TestPartitionRequest(t *testing.T) { }, { bounds: model.Interval{Start: ts.Add(-13 * time.Hour), End: ts.Add(-11 * time.Hour)}, - day: mktime("2024-01-24 00:00"), + table: config.NewDayTime(mktime("2024-01-24 00:00")), series: []*logproto.GroupedChunkRefs{ { Fingerprint: 0x00, @@ -295,39 +293,10 @@ func TestPartitionRequest(t *testing.T) { } -func createBlockQueriers(t *testing.T, numBlocks int, from, through model.Time, minFp, maxFp model.Fingerprint) ([]*bloomshipper.CloseableBlockQuerier, [][]v1.SeriesWithBloom) { - t.Helper() - step := (maxFp - minFp) / model.Fingerprint(numBlocks) - bqs := make([]*bloomshipper.CloseableBlockQuerier, 0, numBlocks) - series := make([][]v1.SeriesWithBloom, 0, numBlocks) - for i := 0; i < numBlocks; i++ { - fromFp := minFp + (step * model.Fingerprint(i)) - throughFp := fromFp + step - 1 - // last block needs to include maxFp - if i == numBlocks-1 { - throughFp = maxFp - } - blockQuerier, data := v1.MakeBlockQuerier(t, fromFp, throughFp, from, through) - bq := &bloomshipper.CloseableBlockQuerier{ - BlockQuerier: blockQuerier, - BlockRef: bloomshipper.BlockRef{ - Ref: bloomshipper.Ref{ - Bounds: v1.NewBounds(fromFp, throughFp), - StartTimestamp: from, - EndTimestamp: through, - }, - }, - } - bqs = append(bqs, bq) - series = append(series, data) - } - return bqs, series -} - func createBlocks(t *testing.T, tenant string, n int, from, through model.Time, minFp, maxFp model.Fingerprint) ([]bloomshipper.BlockRef, []bloomshipper.Meta, []*bloomshipper.CloseableBlockQuerier, [][]v1.SeriesWithBloom) { t.Helper() - blocks := make([]bloomshipper.BlockRef, 0, n) + blockRefs := make([]bloomshipper.BlockRef, 0, n) metas := make([]bloomshipper.Meta, 0, n) queriers := make([]*bloomshipper.CloseableBlockQuerier, 0, n) series := make([][]v1.SeriesWithBloom, 0, n) @@ -342,84 +311,39 @@ func createBlocks(t *testing.T, tenant string, n int, from, through model.Time, } ref := bloomshipper.Ref{ TenantID: tenant, - TableName: "table_0", + TableName: config.NewDayTable(config.NewDayTime(truncateDay(from)), "").Addr(), Bounds: v1.NewBounds(fromFp, throughFp), StartTimestamp: from, EndTimestamp: through, } - block := bloomshipper.BlockRef{ + blockRef := bloomshipper.BlockRef{ Ref: ref, } meta := bloomshipper.Meta{ MetaRef: bloomshipper.MetaRef{ Ref: ref, }, - Tombstones: []bloomshipper.BlockRef{}, - Blocks: []bloomshipper.BlockRef{block}, + BlockTombstones: []bloomshipper.BlockRef{}, + Blocks: []bloomshipper.BlockRef{blockRef}, } - blockQuerier, data := v1.MakeBlockQuerier(t, fromFp, throughFp, from, through) + block, data, _ := v1.MakeBlock(t, n, fromFp, throughFp, from, through) + // Printing fingerprints and the log lines of its chunks comes handy for debugging... + // for i := range keys { + // t.Log(data[i].Series.Fingerprint) + // for j := range keys[i] { + // t.Log(i, j, string(keys[i][j])) + // } + // } querier := &bloomshipper.CloseableBlockQuerier{ - BlockQuerier: blockQuerier, - BlockRef: block, + BlockQuerier: v1.NewBlockQuerier(block), + BlockRef: blockRef, } queriers = append(queriers, querier) metas = append(metas, meta) - blocks = append(blocks, block) + blockRefs = append(blockRefs, blockRef) series = append(series, data) } - return blocks, metas, queriers, series -} - -func newMockBloomStore(bqs []*bloomshipper.CloseableBlockQuerier) *mockBloomStore { - return &mockBloomStore{bqs: bqs} -} - -type mockBloomStore struct { - bqs []*bloomshipper.CloseableBlockQuerier - // mock how long it takes to serve block queriers - delay time.Duration - // mock response error when serving block queriers in ForEach - err error -} - -var _ bloomshipper.Interface = &mockBloomStore{} - -// GetBlockRefs implements bloomshipper.Interface -func (s *mockBloomStore) GetBlockRefs(_ context.Context, _ string, _ bloomshipper.Interval) ([]bloomshipper.BlockRef, error) { - time.Sleep(s.delay) - blocks := make([]bloomshipper.BlockRef, 0, len(s.bqs)) - for i := range s.bqs { - blocks = append(blocks, s.bqs[i].BlockRef) - } - return blocks, nil -} - -// Stop implements bloomshipper.Interface -func (s *mockBloomStore) Stop() {} - -// ForEach implements bloomshipper.Interface -func (s *mockBloomStore) ForEach(_ context.Context, _ string, _ []bloomshipper.BlockRef, callback bloomshipper.ForEachBlockCallback) error { - if s.err != nil { - time.Sleep(s.delay) - return s.err - } - - shuffled := make([]*bloomshipper.CloseableBlockQuerier, len(s.bqs)) - _ = copy(shuffled, s.bqs) - - rand.Shuffle(len(shuffled), func(i, j int) { - shuffled[i], shuffled[j] = shuffled[j], shuffled[i] - }) - - for _, bq := range shuffled { - // ignore errors in the mock - time.Sleep(s.delay) - err := callback(bq.BlockQuerier, bq.Bounds) - if err != nil { - return err - } - } - return nil + return blockRefs, metas, queriers, series } func createQueryInputFromBlockData(t *testing.T, tenant string, data [][]v1.SeriesWithBloom, nthSeries int) []*logproto.ChunkRef { diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index 0f7db8a9ca586..5c57c0a2e4952 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -10,60 +10,76 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/prometheus/common/model" - "golang.org/x/exp/slices" "github.com/grafana/loki/pkg/queue" - v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" ) +const ( + labelSuccess = "success" + labelFailure = "failure" +) + type workerConfig struct { maxItems int } type workerMetrics struct { - dequeuedTasks *prometheus.CounterVec - dequeueErrors *prometheus.CounterVec - dequeueWaitTime *prometheus.SummaryVec - storeAccessLatency *prometheus.HistogramVec - bloomQueryLatency *prometheus.HistogramVec + dequeueDuration *prometheus.HistogramVec + processDuration *prometheus.HistogramVec + metasFetched *prometheus.HistogramVec + blocksFetched *prometheus.HistogramVec + tasksDequeued *prometheus.CounterVec + tasksProcessed *prometheus.CounterVec + blockQueryLatency *prometheus.HistogramVec } func newWorkerMetrics(registerer prometheus.Registerer, namespace, subsystem string) *workerMetrics { labels := []string{"worker"} + r := promauto.With(registerer) return &workerMetrics{ - dequeuedTasks: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + dequeueDuration: r.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "dequeued_tasks_total", - Help: "Total amount of tasks that the worker dequeued from the bloom query queue", + Name: "dequeue_duration_seconds", + Help: "Time spent dequeuing tasks from queue in seconds", }, labels), - dequeueErrors: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + processDuration: r.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "process_duration_seconds", + Help: "Time spent processing tasks in seconds", + }, append(labels, "status")), + metasFetched: r.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "dequeue_errors_total", - Help: "Total amount of failed dequeue operations", + Name: "metas_fetched", + Help: "Amount of metas fetched", }, labels), - dequeueWaitTime: promauto.With(registerer).NewSummaryVec(prometheus.SummaryOpts{ + blocksFetched: r.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "dequeue_wait_time", - Help: "Time spent waiting for dequeuing tasks from queue", + Name: "blocks_fetched", + Help: "Amount of blocks fetched", }, labels), - bloomQueryLatency: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ + tasksDequeued: r.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "bloom_query_latency", - Help: "Latency in seconds of processing bloom blocks", + Name: "tasks_dequeued_total", + Help: "Total amount of tasks that the worker dequeued from the queue", }, append(labels, "status")), - // TODO(chaudum): Move this metric into the bloomshipper - storeAccessLatency: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ + tasksProcessed: r.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "store_latency", - Help: "Latency in seconds of accessing the bloom store component", - }, append(labels, "operation")), + Name: "tasks_processed_total", + Help: "Total amount of tasks that the worker processed", + }, append(labels, "status")), + blockQueryLatency: r.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "block_query_latency", + Help: "Time spent running searches against a bloom block", + }, append(labels, "status")), } } @@ -78,18 +94,18 @@ type worker struct { id string cfg workerConfig queue *queue.RequestQueue - shipper bloomshipper.Interface + store bloomshipper.Store pending *pendingTasks logger log.Logger metrics *workerMetrics } -func newWorker(id string, cfg workerConfig, queue *queue.RequestQueue, shipper bloomshipper.Interface, pending *pendingTasks, logger log.Logger, metrics *workerMetrics) *worker { +func newWorker(id string, cfg workerConfig, queue *queue.RequestQueue, store bloomshipper.Store, pending *pendingTasks, logger log.Logger, metrics *workerMetrics) *worker { w := &worker{ id: id, cfg: cfg, queue: queue, - shipper: shipper, + store: store, pending: pending, logger: log.With(logger, "worker", id), metrics: metrics, @@ -107,17 +123,19 @@ func (w *worker) starting(_ context.Context) error { func (w *worker) running(_ context.Context) error { idx := queue.StartIndexWithLocalQueue + p := newProcessor(w.id, w.store, w.logger, w.metrics) + for st := w.State(); st == services.Running || st == services.Stopping; { taskCtx := context.Background() - dequeueStart := time.Now() + start := time.Now() items, newIdx, err := w.queue.DequeueMany(taskCtx, idx, w.id, w.cfg.maxItems) - w.metrics.dequeueWaitTime.WithLabelValues(w.id).Observe(time.Since(dequeueStart).Seconds()) + w.metrics.dequeueDuration.WithLabelValues(w.id).Observe(time.Since(start).Seconds()) if err != nil { // We only return an error if the queue is stopped and dequeuing did not yield any items if err == queue.ErrStopped && len(items) == 0 { return err } - w.metrics.dequeueErrors.WithLabelValues(w.id).Inc() + w.metrics.tasksDequeued.WithLabelValues(w.id, labelFailure).Inc() level.Error(w.logger).Log("msg", "failed to dequeue tasks", "err", err, "items", len(items)) } idx = newIdx @@ -126,10 +144,9 @@ func (w *worker) running(_ context.Context) error { w.queue.ReleaseRequests(items) continue } - w.metrics.dequeuedTasks.WithLabelValues(w.id).Add(float64(len(items))) - - tasksPerDay := make(map[model.Time][]Task) + w.metrics.tasksDequeued.WithLabelValues(w.id, labelSuccess).Add(float64(len(items))) + tasks := make([]Task, 0, len(items)) for _, item := range items { task, ok := item.(Task) if !ok { @@ -139,91 +156,19 @@ func (w *worker) running(_ context.Context) error { } level.Debug(w.logger).Log("msg", "dequeued task", "task", task.ID) w.pending.Delete(task.ID) - - tasksPerDay[task.day] = append(tasksPerDay[task.day], task) + tasks = append(tasks, task) } - for day, tasks := range tasksPerDay { - - // Remove tasks that are already cancelled - tasks = slices.DeleteFunc(tasks, func(t Task) bool { - if res := t.ctx.Err(); res != nil { - t.CloseWithError(res) - return true - } - return false - }) - // no tasks to process - // continue with tasks of next day - if len(tasks) == 0 { - continue - } - - // interval is [Start, End) - interval := bloomshipper.NewInterval(day, day.Add(Day)) - logger := log.With(w.logger, "day", day.Time(), "tenant", tasks[0].Tenant) - level.Debug(logger).Log("msg", "process tasks", "tasks", len(tasks)) - - storeFetchStart := time.Now() - blockRefs, err := w.shipper.GetBlockRefs(taskCtx, tasks[0].Tenant, interval) - w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockRefs").Observe(time.Since(storeFetchStart).Seconds()) - if err != nil { - for _, t := range tasks { - t.CloseWithError(err) - } - // continue with tasks of next day - continue - } - if len(tasks) == 0 { - continue - } - - // No blocks found. - // Since there are no blocks for the given tasks, we need to return the - // unfiltered list of chunk refs. - if len(blockRefs) == 0 { - level.Warn(logger).Log("msg", "no blocks found") - for _, t := range tasks { - t.Close() - } - // continue with tasks of next day - continue - } - - // Remove tasks that are already cancelled - tasks = slices.DeleteFunc(tasks, func(t Task) bool { - if res := t.ctx.Err(); res != nil { - t.CloseWithError(res) - return true - } - return false - }) - // no tasks to process - // continue with tasks of next day - if len(tasks) == 0 { - continue - } - - tasksForBlocks := partitionFingerprintRange(tasks, blockRefs) - blockRefs = blockRefs[:0] - for _, b := range tasksForBlocks { - blockRefs = append(blockRefs, b.blockRef) - } + start = time.Now() + err = p.run(taskCtx, tasks) - err = w.processBlocksWithCallback(taskCtx, tasks[0].Tenant, blockRefs, tasksForBlocks) - if err != nil { - for _, t := range tasks { - t.CloseWithError(err) - } - // continue with tasks of next day - continue - } - - // all tasks for this day are done. - // close them to notify the request handler - for _, task := range tasks { - task.Close() - } + if err != nil { + w.metrics.processDuration.WithLabelValues(w.id, labelSuccess).Observe(time.Since(start).Seconds()) + w.metrics.tasksProcessed.WithLabelValues(w.id, labelFailure).Add(float64(len(tasks))) + level.Error(w.logger).Log("msg", "failed to process tasks", "err", err) + } else { + w.metrics.processDuration.WithLabelValues(w.id, labelSuccess).Observe(time.Since(start).Seconds()) + w.metrics.tasksProcessed.WithLabelValues(w.id, labelSuccess).Add(float64(len(tasks))) } // return dequeued items back to the pool @@ -238,41 +183,3 @@ func (w *worker) stopping(err error) error { w.queue.UnregisterConsumerConnection(w.id) return nil } - -func (w *worker) processBlocksWithCallback(taskCtx context.Context, tenant string, blockRefs []bloomshipper.BlockRef, boundedRefs []boundedTasks) error { - return w.shipper.ForEach(taskCtx, tenant, blockRefs, func(bq *v1.BlockQuerier, bounds v1.FingerprintBounds) error { - for _, b := range boundedRefs { - if b.blockRef.Bounds.Equal(bounds) { - return w.processBlock(bq, b.tasks) - } - } - return nil - }) -} - -func (w *worker) processBlock(blockQuerier *v1.BlockQuerier, tasks []Task) error { - schema, err := blockQuerier.Schema() - if err != nil { - return err - } - - tokenizer := v1.NewNGramTokenizer(schema.NGramLen(), 0) - iters := make([]v1.PeekingIterator[v1.Request], 0, len(tasks)) - for _, task := range tasks { - it := v1.NewPeekingIter(task.RequestIter(tokenizer)) - iters = append(iters, it) - } - fq := blockQuerier.Fuse(iters) - - start := time.Now() - err = fq.Run() - duration := time.Since(start).Seconds() - - if err != nil { - w.metrics.bloomQueryLatency.WithLabelValues(w.id, "failure").Observe(duration) - return err - } - - w.metrics.bloomQueryLatency.WithLabelValues(w.id, "success").Observe(duration) - return nil -} diff --git a/pkg/bloomutils/iter.go b/pkg/bloomutils/iter.go deleted file mode 100644 index fdbe4a5e62587..0000000000000 --- a/pkg/bloomutils/iter.go +++ /dev/null @@ -1,37 +0,0 @@ -package bloomutils - -import ( - "io" - - v1 "github.com/grafana/loki/pkg/storage/bloom/v1" -) - -// sortMergeIterator implements v1.Iterator -type sortMergeIterator[T any, C comparable, R any] struct { - curr *R - heap *v1.HeapIterator[v1.IndexedValue[C]] - items []T - transform func(T, C, *R) *R - err error -} - -func (it *sortMergeIterator[T, C, R]) Next() bool { - ok := it.heap.Next() - if !ok { - it.err = io.EOF - return false - } - - group := it.heap.At() - it.curr = it.transform(it.items[group.Index()], group.Value(), it.curr) - - return true -} - -func (it *sortMergeIterator[T, C, R]) At() R { - return *it.curr -} - -func (it *sortMergeIterator[T, C, R]) Err() error { - return it.err -} diff --git a/pkg/bloomutils/ring.go b/pkg/bloomutils/ring.go index 08e62a13acb71..d2aebe5b88a37 100644 --- a/pkg/bloomutils/ring.go +++ b/pkg/bloomutils/ring.go @@ -1,32 +1,62 @@ // This file contains a bunch of utility functions for bloom components. -// TODO: Find a better location for this package package bloomutils import ( + "errors" + "fmt" "math" "sort" "github.com/grafana/dskit/ring" + "golang.org/x/exp/constraints" "golang.org/x/exp/slices" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" ) -type InstanceWithTokenRange struct { - Instance ring.InstanceDesc - MinToken, MaxToken uint32 +var ( + Uint32Range = Range[uint32]{Min: 0, Max: math.MaxUint32} + Uint64Range = Range[uint64]{Min: 0, Max: math.MaxUint64} +) + +type Range[T constraints.Unsigned] struct { + Min, Max T } -func (i InstanceWithTokenRange) Cmp(token uint32) v1.BoundsCheck { - if token < i.MinToken { +func (r Range[T]) String() string { + return fmt.Sprintf("%016x-%016x", r.Min, r.Max) +} + +func (r Range[T]) Less(other Range[T]) bool { + if r.Min != other.Min { + return r.Min < other.Min + } + return r.Max <= other.Max +} + +func (r Range[T]) Cmp(t T) v1.BoundsCheck { + if t < r.Min { return v1.Before - } else if token > i.MaxToken { + } else if t > r.Max { return v1.After } return v1.Overlap } +func NewTokenRange(min, max uint32) Range[uint32] { + return Range[uint32]{min, max} +} + +type InstanceWithTokenRange struct { + Instance ring.InstanceDesc + TokenRange Range[uint32] +} + +func (i InstanceWithTokenRange) Cmp(token uint32) v1.BoundsCheck { + return i.TokenRange.Cmp(token) +} + type InstancesWithTokenRange []InstanceWithTokenRange func (i InstancesWithTokenRange) Contains(token uint32) bool { @@ -38,22 +68,14 @@ func (i InstancesWithTokenRange) Contains(token uint32) bool { return false } -// GetInstanceTokenRange calculates the token range for a specific instance +// KeyRangeForInstance calculates the token range for a specific instance // with given id based on the first token in the ring. // This assumes that each instance in the ring is configured with only a single // token. -func GetInstanceWithTokenRange(id string, instances []ring.InstanceDesc) InstancesWithTokenRange { - - // Sorting the tokens of the instances would not be necessary if there is - // only a single token per instances, however, since we only assume one - // token, but don't enforce one token, we keep the sorting. - for _, inst := range instances { - sort.Slice(inst.Tokens, func(i, j int) bool { - return inst.Tokens[i] < inst.Tokens[j] - }) - } +func KeyRangeForInstance[T constraints.Unsigned](id string, instances []ring.InstanceDesc, keyspace Range[T]) (Range[T], error) { - // Sort instances + // Sort instances -- they may not be sorted + // because they're usually accessed by looking up the tokens (which are sorted) sort.Slice(instances, func(i, j int) bool { return instances[i].Tokens[0] < instances[j].Tokens[0] }) @@ -64,83 +86,61 @@ func GetInstanceWithTokenRange(id string, instances []ring.InstanceDesc) Instanc // instance with Id == id not found if idx == -1 { - return InstancesWithTokenRange{} + return Range[T]{}, ring.ErrInstanceNotFound } - i := uint32(idx) - n := uint32(len(instances)) - step := math.MaxUint32 / n + diff := keyspace.Max - keyspace.Min + i := T(idx) + n := T(len(instances)) - minToken := step * i - maxToken := step*i + step - 1 - if i == n-1 { - // extend the last token tange to MaxUint32 - maxToken = math.MaxUint32 + if diff < n { + return Range[T]{}, errors.New("keyspace is smaller than amount of instances") } - return InstancesWithTokenRange{ - {MinToken: minToken, MaxToken: maxToken, Instance: instances[i]}, + step := diff / n + min := step * i + max := step*i + step - 1 + if i == n-1 { + // extend the last token tange to MaxUint32 + max = (keyspace.Max - keyspace.Min) } -} -// GetInstancesWithTokenRanges calculates the token ranges for a specific -// instance with given id based on all tokens in the ring. -// If the instances in the ring are configured with a single token, such as the -// bloom compactor, use GetInstanceWithTokenRange() instead. -func GetInstancesWithTokenRanges(id string, instances []ring.InstanceDesc) InstancesWithTokenRange { - servers := make([]InstanceWithTokenRange, 0, len(instances)) - it := NewInstanceSortMergeIterator(instances) - var firstInst ring.InstanceDesc - var lastToken uint32 - for it.Next() { - if firstInst.Id == "" { - firstInst = it.At().Instance - } - if it.At().Instance.Id == id { - servers = append(servers, it.At()) - } - lastToken = it.At().MaxToken - } - // append token range from lastToken+1 to MaxUint32 - // only if the instance with the first token is the current one - if len(servers) > 0 && firstInst.Id == id { - servers = append(servers, InstanceWithTokenRange{ - MinToken: lastToken + 1, - MaxToken: math.MaxUint32, - Instance: servers[0].Instance, - }) - } - return servers + return Range[T]{min, max}, nil } // NewInstanceSortMergeIterator creates an iterator that yields instanceWithToken elements // where the token of the elements are sorted in ascending order. func NewInstanceSortMergeIterator(instances []ring.InstanceDesc) v1.Iterator[InstanceWithTokenRange] { - it := &sortMergeIterator[ring.InstanceDesc, uint32, InstanceWithTokenRange]{ - items: instances, - transform: func(item ring.InstanceDesc, val uint32, prev *InstanceWithTokenRange) *InstanceWithTokenRange { - var prevToken uint32 - if prev != nil { - prevToken = prev.MaxToken + 1 - } - return &InstanceWithTokenRange{Instance: item, MinToken: prevToken, MaxToken: val} - }, - } - sequences := make([]v1.PeekingIterator[v1.IndexedValue[uint32]], 0, len(instances)) - for i := range instances { - sort.Slice(instances[i].Tokens, func(a, b int) bool { - return instances[i].Tokens[a] < instances[i].Tokens[b] - }) - iter := v1.NewIterWithIndex[uint32](v1.NewSliceIter(instances[i].Tokens), i) - sequences = append(sequences, v1.NewPeekingIter[v1.IndexedValue[uint32]](iter)) + tokenIters := make([]v1.PeekingIterator[v1.IndexedValue[uint32]], 0, len(instances)) + for i, inst := range instances { + sort.Slice(inst.Tokens, func(a, b int) bool { return inst.Tokens[a] < inst.Tokens[b] }) + itr := v1.NewIterWithIndex(v1.NewSliceIter[uint32](inst.Tokens), i) + tokenIters = append(tokenIters, v1.NewPeekingIter[v1.IndexedValue[uint32]](itr)) } - it.heap = v1.NewHeapIterator( - func(i, j v1.IndexedValue[uint32]) bool { - return i.Value() < j.Value() + + heapIter := v1.NewHeapIterator[v1.IndexedValue[uint32]]( + func(iv1, iv2 v1.IndexedValue[uint32]) bool { + return iv1.Value() < iv2.Value() }, - sequences..., + tokenIters..., ) - it.err = nil - return it + prevToken := -1 + return v1.NewDedupingIter[v1.IndexedValue[uint32], InstanceWithTokenRange]( + func(iv v1.IndexedValue[uint32], iwtr InstanceWithTokenRange) bool { + return false + }, + func(iv v1.IndexedValue[uint32]) InstanceWithTokenRange { + minToken, maxToken := uint32(prevToken+1), iv.Value() + prevToken = int(maxToken) + return InstanceWithTokenRange{ + Instance: instances[iv.Index()], + TokenRange: NewTokenRange(minToken, maxToken), + } + }, + func(iv v1.IndexedValue[uint32], iwtr InstanceWithTokenRange) InstanceWithTokenRange { + panic("must not be called, because Eq() is always false") + }, + v1.NewPeekingIter(heapIter), + ) } diff --git a/pkg/bloomutils/ring_test.go b/pkg/bloomutils/ring_test.go index 30da072021edf..c9ff6cf5e1d60 100644 --- a/pkg/bloomutils/ring_test.go +++ b/pkg/bloomutils/ring_test.go @@ -8,18 +8,23 @@ import ( "github.com/stretchr/testify/require" ) -func TestBloomGatewayClient_SortInstancesByToken(t *testing.T) { +func TestBloomGatewayClient_InstanceSortMergeIterator(t *testing.T) { + // | 0 1 2 3 4 5 6 7 8 9 | + // ---------+---------------------+ + // ID 1 | ***o ***o | + // ID 2 | ***o ***o | + // ID 3 | **o | input := []ring.InstanceDesc{ {Id: "1", Tokens: []uint32{5, 9}}, {Id: "2", Tokens: []uint32{3, 7}}, {Id: "3", Tokens: []uint32{1}}, } expected := []InstanceWithTokenRange{ - {Instance: input[2], MinToken: 0, MaxToken: 1}, - {Instance: input[1], MinToken: 2, MaxToken: 3}, - {Instance: input[0], MinToken: 4, MaxToken: 5}, - {Instance: input[1], MinToken: 6, MaxToken: 7}, - {Instance: input[0], MinToken: 8, MaxToken: 9}, + {Instance: input[2], TokenRange: NewTokenRange(0, 1)}, + {Instance: input[1], TokenRange: NewTokenRange(2, 3)}, + {Instance: input[0], TokenRange: NewTokenRange(4, 5)}, + {Instance: input[1], TokenRange: NewTokenRange(6, 7)}, + {Instance: input[0], TokenRange: NewTokenRange(8, 9)}, } var i int @@ -31,43 +36,15 @@ func TestBloomGatewayClient_SortInstancesByToken(t *testing.T) { } } -func TestBloomGatewayClient_GetInstancesWithTokenRanges(t *testing.T) { - t.Run("instance does not own first token in the ring", func(t *testing.T) { - input := []ring.InstanceDesc{ - {Id: "1", Tokens: []uint32{5, 9}}, - {Id: "2", Tokens: []uint32{3, 7}}, - {Id: "3", Tokens: []uint32{1}}, - } - expected := InstancesWithTokenRange{ - {Instance: input[1], MinToken: 2, MaxToken: 3}, - {Instance: input[1], MinToken: 6, MaxToken: 7}, - } - - result := GetInstancesWithTokenRanges("2", input) - require.Equal(t, expected, result) - }) - - t.Run("instance owns first token in the ring", func(t *testing.T) { - input := []ring.InstanceDesc{ - {Id: "1", Tokens: []uint32{5, 9}}, - {Id: "2", Tokens: []uint32{3, 7}}, - {Id: "3", Tokens: []uint32{1}}, - } - expected := InstancesWithTokenRange{ - {Instance: input[2], MinToken: 0, MaxToken: 1}, - {Instance: input[2], MinToken: 10, MaxToken: math.MaxUint32}, - } - - result := GetInstancesWithTokenRanges("3", input) - require.Equal(t, expected, result) - }) +func uint64Range(min, max uint64) Range[uint64] { + return Range[uint64]{min, max} } -func TestBloomGatewayClient_GetInstanceWithTokenRange(t *testing.T) { +func TestBloomGatewayClient_KeyRangeForInstance(t *testing.T) { for name, tc := range map[string]struct { id string input []ring.InstanceDesc - expected InstancesWithTokenRange + expected Range[uint64] }{ "first instance includes 0 token": { id: "3", @@ -76,9 +53,7 @@ func TestBloomGatewayClient_GetInstanceWithTokenRange(t *testing.T) { {Id: "2", Tokens: []uint32{5}}, {Id: "3", Tokens: []uint32{1}}, }, - expected: InstancesWithTokenRange{ - {Instance: ring.InstanceDesc{Id: "3", Tokens: []uint32{1}}, MinToken: 0, MaxToken: math.MaxUint32/3 - 1}, - }, + expected: uint64Range(0, math.MaxUint64/3-1), }, "middle instance": { id: "1", @@ -87,9 +62,7 @@ func TestBloomGatewayClient_GetInstanceWithTokenRange(t *testing.T) { {Id: "2", Tokens: []uint32{5}}, {Id: "3", Tokens: []uint32{1}}, }, - expected: InstancesWithTokenRange{ - {Instance: ring.InstanceDesc{Id: "1", Tokens: []uint32{3}}, MinToken: math.MaxUint32 / 3, MaxToken: math.MaxUint32/3*2 - 1}, - }, + expected: uint64Range(math.MaxUint64/3, math.MaxUint64/3*2-1), }, "last instance includes MaxUint32 token": { id: "2", @@ -98,14 +71,13 @@ func TestBloomGatewayClient_GetInstanceWithTokenRange(t *testing.T) { {Id: "2", Tokens: []uint32{5}}, {Id: "3", Tokens: []uint32{1}}, }, - expected: InstancesWithTokenRange{ - {Instance: ring.InstanceDesc{Id: "2", Tokens: []uint32{5}}, MinToken: math.MaxUint32 / 3 * 2, MaxToken: math.MaxUint32}, - }, + expected: uint64Range(math.MaxUint64/3*2, math.MaxUint64), }, } { tc := tc t.Run(name, func(t *testing.T) { - result := GetInstanceWithTokenRange(tc.id, tc.input) + result, err := KeyRangeForInstance(tc.id, tc.input, Uint64Range) + require.NoError(t, err) require.Equal(t, tc.expected, result) }) } diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index a5229b0ca1498..f47148fa42b0d 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -52,7 +52,7 @@ import ( const ( ringKey = "distributor" - ringAutoForgetUnhealthyPeriods = 10 + ringAutoForgetUnhealthyPeriods = 2 ) var ( diff --git a/pkg/loghttp/entry.go b/pkg/loghttp/entry.go index 2a55ac9ecd285..0529bf536a2d5 100644 --- a/pkg/loghttp/entry.go +++ b/pkg/loghttp/entry.go @@ -6,7 +6,7 @@ import ( "time" "unsafe" - "github.com/buger/jsonparser" + "github.com/grafana/jsonparser" jsoniter "github.com/json-iterator/go" "github.com/modern-go/reflect2" "github.com/prometheus/prometheus/model/labels" diff --git a/pkg/loghttp/labels.go b/pkg/loghttp/labels.go index b15a94ab23414..98bad4e957869 100644 --- a/pkg/loghttp/labels.go +++ b/pkg/loghttp/labels.go @@ -6,8 +6,8 @@ import ( "strconv" "strings" - "github.com/buger/jsonparser" "github.com/gorilla/mux" + "github.com/grafana/jsonparser" "github.com/grafana/loki/pkg/logproto" ) diff --git a/pkg/loghttp/query.go b/pkg/loghttp/query.go index 617754393538c..854ccd5ae7116 100644 --- a/pkg/loghttp/query.go +++ b/pkg/loghttp/query.go @@ -8,7 +8,7 @@ import ( "time" "unsafe" - "github.com/buger/jsonparser" + "github.com/grafana/jsonparser" json "github.com/json-iterator/go" "github.com/prometheus/common/model" diff --git a/pkg/logql/accumulator.go b/pkg/logql/accumulator.go new file mode 100644 index 0000000000000..9e9784cb037ef --- /dev/null +++ b/pkg/logql/accumulator.go @@ -0,0 +1,379 @@ +package logql + +import ( + "container/heap" + "context" + "fmt" + "sort" + "time" + + "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/pkg/logqlmodel" + "github.com/grafana/loki/pkg/logqlmodel/metadata" + "github.com/grafana/loki/pkg/logqlmodel/stats" + "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase/definitions" + "github.com/grafana/loki/pkg/util/math" +) + +// NewBufferedAccumulator returns an accumulator which aggregates all query +// results in a slice. This is useful for metric queries, which are generally +// small payloads and the memory overhead for buffering is negligible. +func NewBufferedAccumulator(n int) *BufferedAccumulator { + return &BufferedAccumulator{ + results: make([]logqlmodel.Result, n), + } +} + +type BufferedAccumulator struct { + results []logqlmodel.Result +} + +func (a *BufferedAccumulator) Accumulate(_ context.Context, acc logqlmodel.Result, i int) error { + a.results[i] = acc + return nil +} + +func (a *BufferedAccumulator) Result() []logqlmodel.Result { + return a.results +} + +type QuantileSketchAccumulator struct { + matrix ProbabilisticQuantileMatrix +} + +// newQuantileSketchAccumulator returns an accumulator for sharded +// probabilistic quantile queries that merges results as they come in. +func newQuantileSketchAccumulator() *QuantileSketchAccumulator { + return &QuantileSketchAccumulator{} +} + +func (a *QuantileSketchAccumulator) Accumulate(_ context.Context, res logqlmodel.Result, _ int) error { + if res.Data.Type() != QuantileSketchMatrixType { + return fmt.Errorf("unexpected matrix data type: got (%s), want (%s)", res.Data.Type(), QuantileSketchMatrixType) + } + data, ok := res.Data.(ProbabilisticQuantileMatrix) + if !ok { + return fmt.Errorf("unexpected matrix type: got (%T), want (ProbabilisticQuantileMatrix)", res.Data) + } + if a.matrix == nil { + a.matrix = data + return nil + } + + var err error + a.matrix, err = a.matrix.Merge(data) + return err +} + +func (a *QuantileSketchAccumulator) Result() []logqlmodel.Result { + return []logqlmodel.Result{{Data: a.matrix}} +} + +// heap impl for keeping only the top n results across m streams +// importantly, AccumulatedStreams is _bounded_, so it will only +// store the top `limit` results across all streams. +// To implement this, we use a min-heap when looking +// for the max values (logproto.FORWARD) +// and vice versa for logproto.BACKWARD. +// This allows us to easily find the 'worst' value +// and replace it with a better one. +// Once we've fully processed all log lines, +// we return the heap in opposite order and then reverse it +// to get the correct order. +// Heap implements container/heap.Interface +// solely to use heap.Interface as a library. +// It is not intended for the heap pkg functions +// to otherwise call this type. +type AccumulatedStreams struct { + count, limit int + labelmap map[string]int + streams []*logproto.Stream + order logproto.Direction + + stats stats.Result // for accumulating statistics from downstream requests + headers map[string][]string // for accumulating headers from downstream requests +} + +// NewStreamAccumulator returns an accumulator for limited log queries. +// Log queries, sharded thousands of times and each returning +// results, can be _considerably_ larger. In this case, we eagerly +// accumulate the results into a logsAccumulator, discarding values +// over the limit to keep memory pressure down while other subqueries +// are executing. +func NewStreamAccumulator(params Params) *AccumulatedStreams { + // the stream accumulator stores a heap with reversed order + // from the results we expect, so we need to reverse the direction + order := logproto.FORWARD + if params.Direction() == logproto.FORWARD { + order = logproto.BACKWARD + } + + return &AccumulatedStreams{ + labelmap: make(map[string]int), + order: order, + limit: int(params.Limit()), + + headers: make(map[string][]string), + } +} + +// returns the top priority +func (acc *AccumulatedStreams) top() (time.Time, bool) { + if len(acc.streams) > 0 && len(acc.streams[0].Entries) > 0 { + return acc.streams[0].Entries[len(acc.streams[0].Entries)-1].Timestamp, true + } + return time.Time{}, false +} + +func (acc *AccumulatedStreams) Find(labels string) (int, bool) { + i, ok := acc.labelmap[labels] + return i, ok +} + +// number of streams +func (acc *AccumulatedStreams) Len() int { return len(acc.streams) } + +func (acc *AccumulatedStreams) Swap(i, j int) { + // for i=0, j=1 + + // {'a': 0, 'b': 1} + // [a, b] + acc.streams[i], acc.streams[j] = acc.streams[j], acc.streams[i] + // {'a': 0, 'b': 1} + // [b, a] + acc.labelmap[acc.streams[i].Labels] = i + acc.labelmap[acc.streams[j].Labels] = j + // {'a': 1, 'b': 0} + // [b, a] +} + +// first order by timestamp, then by labels +func (acc *AccumulatedStreams) Less(i, j int) bool { + // order by the 'oldest' entry in the stream + if a, b := acc.streams[i].Entries[len(acc.streams[i].Entries)-1].Timestamp, acc.streams[j].Entries[len(acc.streams[j].Entries)-1].Timestamp; !a.Equal(b) { + return acc.less(a, b) + } + return acc.streams[i].Labels <= acc.streams[j].Labels +} + +func (acc *AccumulatedStreams) less(a, b time.Time) bool { + // use after for stable sort + if acc.order == logproto.FORWARD { + return !a.After(b) + } + return !b.After(a) +} + +func (acc *AccumulatedStreams) Push(x any) { + s := x.(*logproto.Stream) + if len(s.Entries) == 0 { + return + } + + if room := acc.limit - acc.count; room >= len(s.Entries) { + if i, ok := acc.Find(s.Labels); ok { + // stream already exists, append entries + + // these are already guaranteed to be sorted + // Reasoning: we shard subrequests so each stream exists on only one + // shard. Therefore, the only time a stream should already exist + // is in successive splits, which are already guaranteed to be ordered + // and we can just append. + acc.appendTo(acc.streams[i], s) + + return + } + + // new stream + acc.addStream(s) + return + } + + // there's not enough room for all the entries, + // so we need to + acc.push(s) +} + +// there's not enough room for all the entries. +// since we store them in a reverse heap relative to what we _want_ +// (i.e. the max value for FORWARD, the min value for BACKWARD), +// we test if the new entry is better than the worst entry, +// swapping them if so. +func (acc *AccumulatedStreams) push(s *logproto.Stream) { + worst, ok := acc.top() + room := math.Min(acc.limit-acc.count, len(s.Entries)) + + if !ok { + if room == 0 { + // special case: limit must be zero since there's no room and no worst entry + return + } + s.Entries = s.Entries[:room] + // special case: there are no entries in the heap. Push entries up to the limit + acc.addStream(s) + return + } + + // since entries are sorted by timestamp from best -> worst, + // we can discard the entire stream if the incoming best entry + // is worse than the worst entry in the heap. + cutoff := sort.Search(len(s.Entries), func(i int) bool { + // TODO(refactor label comparison -- should be in another fn) + if worst.Equal(s.Entries[i].Timestamp) { + return acc.streams[0].Labels < s.Labels + } + return acc.less(s.Entries[i].Timestamp, worst) + }) + s.Entries = s.Entries[:cutoff] + + for i := 0; i < len(s.Entries) && acc.less(worst, s.Entries[i].Timestamp); i++ { + + // push one entry at a time + room = acc.limit - acc.count + // pop if there's no room to make the heap small enough for an append; + // in the short path of Push() we know that there's room for at least one entry + if room == 0 { + acc.Pop() + } + + cpy := *s + cpy.Entries = []logproto.Entry{s.Entries[i]} + acc.Push(&cpy) + + // update worst + worst, _ = acc.top() + } +} + +func (acc *AccumulatedStreams) addStream(s *logproto.Stream) { + // ensure entries conform to order we expect + // TODO(owen-d): remove? should be unnecessary since we insert in appropriate order + // but it's nice to have the safeguard + sort.Slice(s.Entries, func(i, j int) bool { + return acc.less(s.Entries[j].Timestamp, s.Entries[i].Timestamp) + }) + + acc.streams = append(acc.streams, s) + i := len(acc.streams) - 1 + acc.labelmap[s.Labels] = i + acc.count += len(s.Entries) + heap.Fix(acc, i) +} + +// dst must already exist in acc +func (acc *AccumulatedStreams) appendTo(dst, src *logproto.Stream) { + // these are already guaranteed to be sorted + // Reasoning: we shard subrequests so each stream exists on only one + // shard. Therefore, the only time a stream should already exist + // is in successive splits, which are already guaranteed to be ordered + // and we can just append. + + var needsSort bool + for _, e := range src.Entries { + // sort if order has broken + if len(dst.Entries) > 0 && acc.less(dst.Entries[len(dst.Entries)-1].Timestamp, e.Timestamp) { + needsSort = true + } + dst.Entries = append(dst.Entries, e) + } + + if needsSort { + sort.Slice(dst.Entries, func(i, j int) bool { + // store in reverse order so we can more reliably insert without sorting and pop from end + return acc.less(dst.Entries[j].Timestamp, dst.Entries[i].Timestamp) + }) + } + + acc.count += len(src.Entries) + heap.Fix(acc, acc.labelmap[dst.Labels]) + +} + +// Pop returns a stream with one entry. It pops the first entry of the first stream +func (acc *AccumulatedStreams) Pop() any { + n := acc.Len() + if n == 0 { + return nil + } + + stream := acc.streams[0] + cpy := *stream + cpy.Entries = []logproto.Entry{cpy.Entries[len(stream.Entries)-1]} + stream.Entries = stream.Entries[:len(stream.Entries)-1] + + acc.count-- + + if len(stream.Entries) == 0 { + // remove stream + acc.Swap(0, n-1) + acc.streams[n-1] = nil // avoid leaking reference + delete(acc.labelmap, stream.Labels) + acc.streams = acc.streams[:n-1] + + } + + if acc.Len() > 0 { + heap.Fix(acc, 0) + } + + return &cpy +} + +// Note: can only be called once as it will alter stream ordreing. +func (acc *AccumulatedStreams) Result() []logqlmodel.Result { + // sort streams by label + sort.Slice(acc.streams, func(i, j int) bool { + return acc.streams[i].Labels < acc.streams[j].Labels + }) + + streams := make(logqlmodel.Streams, 0, len(acc.streams)) + + for _, s := range acc.streams { + // sort entries by timestamp, inversely based on direction + sort.Slice(s.Entries, func(i, j int) bool { + return acc.less(s.Entries[j].Timestamp, s.Entries[i].Timestamp) + }) + streams = append(streams, *s) + } + + res := logqlmodel.Result{ + // stats & headers are already aggregated in the context + Data: streams, + Statistics: acc.stats, + Headers: make([]*definitions.PrometheusResponseHeader, 0, len(acc.headers)), + } + + for name, vals := range acc.headers { + res.Headers = append( + res.Headers, + &definitions.PrometheusResponseHeader{ + Name: name, + Values: vals, + }, + ) + } + + return []logqlmodel.Result{res} +} + +func (acc *AccumulatedStreams) Accumulate(_ context.Context, x logqlmodel.Result, _ int) error { + // TODO(owen-d/ewelch): Shard counts should be set by the querier + // so we don't have to do it in tricky ways in multiple places. + // See pkg/logql/downstream.go:DownstreamEvaluator.Downstream + // for another example. + if x.Statistics.Summary.Shards == 0 { + x.Statistics.Summary.Shards = 1 + } + acc.stats.Merge(x.Statistics) + metadata.ExtendHeaders(acc.headers, x.Headers) + + switch got := x.Data.(type) { + case logqlmodel.Streams: + for i := range got { + acc.Push(&got[i]) + } + default: + return fmt.Errorf("unexpected response type during response result accumulation. Got (%T), wanted %s", got, logqlmodel.ValueTypeStreams) + } + return nil +} diff --git a/pkg/logql/accumulator_test.go b/pkg/logql/accumulator_test.go new file mode 100644 index 0000000000000..d827e3ea02e71 --- /dev/null +++ b/pkg/logql/accumulator_test.go @@ -0,0 +1,273 @@ +package logql + +import ( + "context" + "fmt" + "math/rand" + "testing" + "time" + + "github.com/prometheus/prometheus/model/labels" + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/pkg/logql/sketch" + "github.com/grafana/loki/pkg/logqlmodel" +) + +func TestAccumulatedStreams(t *testing.T) { + lim := 30 + nStreams := 10 + start, end := 0, 10 + // for a logproto.BACKWARD query, we use a min heap based on FORWARD + // to store the _earliest_ timestamp of the _latest_ entries, up to `limit` + xs := newStreams(time.Unix(int64(start), 0), time.Unix(int64(end), 0), time.Second, nStreams, logproto.BACKWARD) + acc := NewStreamAccumulator(LiteralParams{ + direction: logproto.BACKWARD, + limit: uint32(lim), + }) + for _, x := range xs { + acc.Push(x) + } + + for i := 0; i < lim; i++ { + got := acc.Pop().(*logproto.Stream) + require.Equal(t, fmt.Sprintf(`{n="%d"}`, i%nStreams), got.Labels) + exp := (nStreams*(end-start) - lim + i) / nStreams + require.Equal(t, time.Unix(int64(exp), 0), got.Entries[0].Timestamp) + } + +} + +func TestDownstreamAccumulatorSimple(t *testing.T) { + lim := 30 + start, end := 0, 10 + direction := logproto.BACKWARD + + streams := newStreams(time.Unix(int64(start), 0), time.Unix(int64(end), 0), time.Second, 10, direction) + x := make(logqlmodel.Streams, 0, len(streams)) + for _, s := range streams { + x = append(x, *s) + } + // dummy params. Only need to populate direction & limit + params, err := NewLiteralParams( + `{app="foo"}`, time.Time{}, time.Time{}, 0, 0, direction, uint32(lim), nil, + ) + require.NoError(t, err) + + acc := NewStreamAccumulator(params) + result := logqlmodel.Result{ + Data: x, + } + + require.Nil(t, acc.Accumulate(context.Background(), result, 0)) + + res := acc.Result()[0] + got, ok := res.Data.(logqlmodel.Streams) + require.Equal(t, true, ok) + require.Equal(t, 10, len(got), "correct number of streams") + + // each stream should have the top 3 entries + for i := 0; i < 10; i++ { + require.Equal(t, 3, len(got[i].Entries), "correct number of entries in stream") + for j := 0; j < 3; j++ { + require.Equal(t, time.Unix(int64(9-j), 0), got[i].Entries[j].Timestamp, "correct timestamp") + } + } +} + +// TestDownstreamAccumulatorMultiMerge simulates merging multiple +// sub-results from different queries. +func TestDownstreamAccumulatorMultiMerge(t *testing.T) { + for _, direction := range []logproto.Direction{logproto.BACKWARD, logproto.FORWARD} { + t.Run(direction.String(), func(t *testing.T) { + nQueries := 10 + delta := 10 // 10 entries per stream, 1s apart + streamsPerQuery := 10 + lim := 30 + + payloads := make([]logqlmodel.Streams, 0, nQueries) + for i := 0; i < nQueries; i++ { + start := i * delta + end := start + delta + streams := newStreams(time.Unix(int64(start), 0), time.Unix(int64(end), 0), time.Second, streamsPerQuery, direction) + var res logqlmodel.Streams + for i := range streams { + res = append(res, *streams[i]) + } + payloads = append(payloads, res) + + } + + // queries are always dispatched in the correct order. + // oldest time ranges first in the case of logproto.FORWARD + // and newest time ranges first in the case of logproto.BACKWARD + if direction == logproto.BACKWARD { + for i, j := 0, len(payloads)-1; i < j; i, j = i+1, j-1 { + payloads[i], payloads[j] = payloads[j], payloads[i] + } + } + + // dummy params. Only need to populate direction & limit + params, err := NewLiteralParams( + `{app="foo"}`, time.Time{}, time.Time{}, 0, 0, direction, uint32(lim), nil, + ) + require.NoError(t, err) + + acc := NewStreamAccumulator(params) + for i := 0; i < nQueries; i++ { + err := acc.Accumulate(context.Background(), logqlmodel.Result{ + Data: payloads[i], + }, i) + require.Nil(t, err) + } + + got, ok := acc.Result()[0].Data.(logqlmodel.Streams) + require.Equal(t, true, ok) + require.Equal(t, int64(nQueries), acc.Result()[0].Statistics.Summary.Shards) + + // each stream should have the top 3 entries + for i := 0; i < streamsPerQuery; i++ { + stream := got[i] + require.Equal(t, fmt.Sprintf(`{n="%d"}`, i), stream.Labels, "correct labels") + ln := lim / streamsPerQuery + require.Equal(t, ln, len(stream.Entries), "correct number of entries in stream") + switch direction { + case logproto.BACKWARD: + for i := 0; i < ln; i++ { + offset := delta*nQueries - 1 - i + require.Equal(t, time.Unix(int64(offset), 0), stream.Entries[i].Timestamp, "correct timestamp") + } + default: + for i := 0; i < ln; i++ { + offset := i + require.Equal(t, time.Unix(int64(offset), 0), stream.Entries[i].Timestamp, "correct timestamp") + } + } + } + }) + } +} + +func BenchmarkAccumulator(b *testing.B) { + + // dummy params. Only need to populate direction & limit + lim := 30 + params, err := NewLiteralParams( + `{app="foo"}`, time.Time{}, time.Time{}, 0, 0, logproto.BACKWARD, uint32(lim), nil, + ) + require.NoError(b, err) + + for acc, tc := range map[string]struct { + results []logqlmodel.Result + newAcc func(Params, []logqlmodel.Result) Accumulator + params Params + }{ + "streams": { + newStreamResults(), + func(p Params, _ []logqlmodel.Result) Accumulator { + return NewStreamAccumulator(p) + }, + params, + }, + "quantile sketches": { + newQuantileSketchResults(), + func(p Params, _ []logqlmodel.Result) Accumulator { + return newQuantileSketchAccumulator() + }, + params, + }, + } { + b.Run(acc, func(b *testing.B) { + b.ResetTimer() + b.ReportAllocs() + for n := 0; n < b.N; n++ { + + acc := tc.newAcc(params, tc.results) + for i, r := range tc.results { + err := acc.Accumulate(context.Background(), r, i) + require.Nil(b, err) + } + + acc.Result() + } + }) + } +} + +func newStreamResults() []logqlmodel.Result { + nQueries := 50 + delta := 100 // 10 entries per stream, 1s apart + streamsPerQuery := 50 + + results := make([]logqlmodel.Result, nQueries) + for i := 0; i < nQueries; i++ { + start := i * delta + end := start + delta + streams := newStreams(time.Unix(int64(start), 0), time.Unix(int64(end), 0), time.Second, streamsPerQuery, logproto.BACKWARD) + var res logqlmodel.Streams + for i := range streams { + res = append(res, *streams[i]) + } + results[i] = logqlmodel.Result{Data: res} + + } + + return results +} + +func newQuantileSketchResults() []logqlmodel.Result { + results := make([]logqlmodel.Result, 100) + + for r := range results { + vectors := make([]ProbabilisticQuantileVector, 10) + for i := range vectors { + vectors[i] = make(ProbabilisticQuantileVector, 10) + for j := range vectors[i] { + vectors[i][j] = ProbabilisticQuantileSample{ + T: int64(i), + F: newRandomSketch(), + Metric: []labels.Label{{Name: "foo", Value: fmt.Sprintf("bar-%d", j)}}, + } + } + } + results[r] = logqlmodel.Result{Data: ProbabilisticQuantileMatrix(vectors)} + } + + return results +} + +func newStreamWithDirection(start, end time.Time, delta time.Duration, ls string, direction logproto.Direction) *logproto.Stream { + s := &logproto.Stream{ + Labels: ls, + } + for t := start; t.Before(end); t = t.Add(delta) { + s.Entries = append(s.Entries, logproto.Entry{ + Timestamp: t, + Line: fmt.Sprintf("%d", t.Unix()), + }) + } + if direction == logproto.BACKWARD { + // simulate data coming in reverse order (logproto.BACKWARD) + for i, j := 0, len(s.Entries)-1; i < j; i, j = i+1, j-1 { + s.Entries[i], s.Entries[j] = s.Entries[j], s.Entries[i] + } + } + return s +} + +func newStreams(start, end time.Time, delta time.Duration, n int, direction logproto.Direction) (res []*logproto.Stream) { + for i := 0; i < n; i++ { + res = append(res, newStreamWithDirection(start, end, delta, fmt.Sprintf(`{n="%d"}`, i), direction)) + } + return res +} + +func newRandomSketch() sketch.QuantileSketch { + r := rand.New(rand.NewSource(42)) + s := sketch.NewDDSketch() + for i := 0; i < 1000; i++ { + _ = s.Add(r.Float64()) + } + return s +} diff --git a/pkg/logql/downstream.go b/pkg/logql/downstream.go index 76594dc040c22..33d945f11b923 100644 --- a/pkg/logql/downstream.go +++ b/pkg/logql/downstream.go @@ -83,6 +83,29 @@ func (d DownstreamSampleExpr) String() string { return fmt.Sprintf("downstream<%s, shard=%s>", d.SampleExpr.String(), d.shard) } +// The DownstreamSampleExpr is not part of LogQL. In the prettified version it's +// represented as e.g. `downstream` +func (d DownstreamSampleExpr) Pretty(level int) string { + s := syntax.Indent(level) + if !syntax.NeedSplit(d) { + return s + d.String() + } + + s += "downstream<\n" + + s += d.SampleExpr.Pretty(level + 1) + s += ",\n" + s += syntax.Indent(level+1) + "shard=" + if d.shard != nil { + s += d.shard.String() + "\n" + } else { + s += "nil\n" + } + + s += syntax.Indent(level) + ">" + return s +} + // DownstreamLogSelectorExpr is a LogSelectorExpr which signals downstream computation type DownstreamLogSelectorExpr struct { shard *astmapper.ShardAnnotation @@ -93,6 +116,29 @@ func (d DownstreamLogSelectorExpr) String() string { return fmt.Sprintf("downstream<%s, shard=%s>", d.LogSelectorExpr.String(), d.shard) } +// The DownstreamLogSelectorExpr is not part of LogQL. In the prettified version it's +// represented as e.g. `downstream<{foo="bar"} |= "error", shard=1_of_3>` +func (d DownstreamLogSelectorExpr) Pretty(level int) string { + s := syntax.Indent(level) + if !syntax.NeedSplit(d) { + return s + d.String() + } + + s += "downstream<\n" + + s += d.LogSelectorExpr.Pretty(level + 1) + s += ",\n" + s += syntax.Indent(level+1) + "shard=" + if d.shard != nil { + s += d.shard.String() + "\n" + } else { + s += "nil\n" + } + + s += syntax.Indent(level) + ">" + return s +} + func (d DownstreamSampleExpr) Walk(f syntax.WalkFn) { f(d) } var defaultMaxDepth = 4 @@ -105,7 +151,7 @@ type ConcatSampleExpr struct { next *ConcatSampleExpr } -func (c ConcatSampleExpr) String() string { +func (c *ConcatSampleExpr) String() string { if c.next == nil { return c.DownstreamSampleExpr.String() } @@ -115,7 +161,7 @@ func (c ConcatSampleExpr) String() string { // in order to not display huge queries with thousands of shards, // we can limit the number of stringified subqueries. -func (c ConcatSampleExpr) string(maxDepth int) string { +func (c *ConcatSampleExpr) string(maxDepth int) string { if c.next == nil { return c.DownstreamSampleExpr.String() } @@ -125,18 +171,46 @@ func (c ConcatSampleExpr) string(maxDepth int) string { return fmt.Sprintf("%s ++ %s", c.DownstreamSampleExpr.String(), c.next.string(maxDepth-1)) } -func (c ConcatSampleExpr) Walk(f syntax.WalkFn) { +func (c *ConcatSampleExpr) Walk(f syntax.WalkFn) { f(c) f(c.next) } +// ConcatSampleExpr has no LogQL repretenstation. It is expressed in in the +// prettified version as e.g. `concat(downstream ++ )` +func (c *ConcatSampleExpr) Pretty(level int) string { + s := syntax.Indent(level) + if !syntax.NeedSplit(c) { + return s + c.String() + } + + s += "concat(\n" + + head := c + for i := 0; i < defaultMaxDepth && head != nil; i++ { + if i > 0 { + s += syntax.Indent(level+1) + "++\n" + } + s += head.DownstreamSampleExpr.Pretty(level + 1) + s += "\n" + head = head.next + } + // There are more downstream samples... + if head != nil { + s += syntax.Indent(level+1) + "++ ...\n" + } + s += syntax.Indent(level) + ")" + + return s +} + // ConcatLogSelectorExpr is an expr for concatenating multiple LogSelectorExpr type ConcatLogSelectorExpr struct { DownstreamLogSelectorExpr next *ConcatLogSelectorExpr } -func (c ConcatLogSelectorExpr) String() string { +func (c *ConcatLogSelectorExpr) String() string { if c.next == nil { return c.DownstreamLogSelectorExpr.String() } @@ -146,7 +220,7 @@ func (c ConcatLogSelectorExpr) String() string { // in order to not display huge queries with thousands of shards, // we can limit the number of stringified subqueries. -func (c ConcatLogSelectorExpr) string(maxDepth int) string { +func (c *ConcatLogSelectorExpr) string(maxDepth int) string { if c.next == nil { return c.DownstreamLogSelectorExpr.String() } @@ -156,6 +230,34 @@ func (c ConcatLogSelectorExpr) string(maxDepth int) string { return fmt.Sprintf("%s ++ %s", c.DownstreamLogSelectorExpr.String(), c.next.string(maxDepth-1)) } +// ConcatLogSelectorExpr has no representation in LogQL. Its prettified version +// is e.g. `concat(downstream<{foo="bar"} |= "error", shard=1_of_3>)` +func (c *ConcatLogSelectorExpr) Pretty(level int) string { + s := syntax.Indent(level) + if !syntax.NeedSplit(c) { + return s + c.String() + } + + s += "concat(\n" + + head := c + for i := 0; i < defaultMaxDepth && head != nil; i++ { + if i > 0 { + s += syntax.Indent(level+1) + "++\n" + } + s += head.DownstreamLogSelectorExpr.Pretty(level + 1) + s += "\n" + head = head.next + } + // There are more downstream samples... + if head != nil { + s += syntax.Indent(level+1) + "++ ...\n" + } + s += ")" + + return s +} + // QuantileSketchEvalExpr evaluates a quantile sketch to the actual quantile. type QuantileSketchEvalExpr struct { syntax.SampleExpr @@ -244,7 +346,13 @@ type Resp struct { // Downstreamer is an interface for deferring responsibility for query execution. // It is decoupled from but consumed by a downStreamEvaluator to dispatch ASTs. type Downstreamer interface { - Downstream(context.Context, []DownstreamQuery) ([]logqlmodel.Result, error) + Downstream(context.Context, []DownstreamQuery, Accumulator) ([]logqlmodel.Result, error) +} + +// Accumulator is an interface for accumulating query results. +type Accumulator interface { + Accumulate(context.Context, logqlmodel.Result, int) error + Result() []logqlmodel.Result } // DownstreamEvaluator is an evaluator which handles shard aware AST nodes @@ -254,8 +362,8 @@ type DownstreamEvaluator struct { } // Downstream runs queries and collects stats from the embedded Downstreamer -func (ev DownstreamEvaluator) Downstream(ctx context.Context, queries []DownstreamQuery) ([]logqlmodel.Result, error) { - results, err := ev.Downstreamer.Downstream(ctx, queries) +func (ev DownstreamEvaluator) Downstream(ctx context.Context, queries []DownstreamQuery, acc Accumulator) ([]logqlmodel.Result, error) { + results, err := ev.Downstreamer.Downstream(ctx, queries, acc) if err != nil { return nil, err } @@ -314,12 +422,13 @@ func (ev *DownstreamEvaluator) NewStepEvaluator( if e.shard != nil { shards = append(shards, *e.shard) } + acc := NewBufferedAccumulator(1) results, err := ev.Downstream(ctx, []DownstreamQuery{{ Params: ParamsWithShardsOverride{ Params: ParamsWithExpressionOverride{Params: params, ExpressionOverride: e.SampleExpr}, ShardsOverride: Shards(shards).Encode(), }, - }}) + }}, acc) if err != nil { return nil, err } @@ -339,7 +448,8 @@ func (ev *DownstreamEvaluator) NewStepEvaluator( cur = cur.next } - results, err := ev.Downstream(ctx, queries) + acc := NewBufferedAccumulator(len(queries)) + results, err := ev.Downstream(ctx, queries, acc) if err != nil { return nil, err } @@ -379,7 +489,8 @@ func (ev *DownstreamEvaluator) NewStepEvaluator( } } - results, err := ev.Downstream(ctx, queries) + acc := newQuantileSketchAccumulator() + results, err := ev.Downstream(ctx, queries, acc) if err != nil { return nil, err } @@ -413,12 +524,13 @@ func (ev *DownstreamEvaluator) NewIterator( if e.shard != nil { shards = append(shards, *e.shard) } + acc := NewStreamAccumulator(params) results, err := ev.Downstream(ctx, []DownstreamQuery{{ Params: ParamsWithShardsOverride{ Params: ParamsWithExpressionOverride{Params: params, ExpressionOverride: e.LogSelectorExpr}, ShardsOverride: shards.Encode(), }, - }}) + }}, acc) if err != nil { return nil, err } @@ -438,7 +550,8 @@ func (ev *DownstreamEvaluator) NewIterator( cur = cur.next } - results, err := ev.Downstream(ctx, queries) + acc := NewStreamAccumulator(params) + results, err := ev.Downstream(ctx, queries, acc) if err != nil { return nil, err } diff --git a/pkg/logql/downstream_test.go b/pkg/logql/downstream_test.go index 426722a554594..ec5f3170468d0 100644 --- a/pkg/logql/downstream_test.go +++ b/pkg/logql/downstream_test.go @@ -8,12 +8,14 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/user" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql/syntax" + "github.com/grafana/loki/pkg/querier/astmapper" ) var nilShardMetrics = NewShardMapperMetrics(nil) @@ -543,3 +545,142 @@ func relativeError(t *testing.T, expected, actual promql.Matrix, alpha float64) require.InEpsilonSlice(t, e, a, alpha) } } + +func TestFormat_ShardedExpr(t *testing.T) { + oldMax := syntax.MaxCharsPerLine + syntax.MaxCharsPerLine = 20 + + oldDefaultDepth := defaultMaxDepth + defaultMaxDepth = 2 + defer func() { + syntax.MaxCharsPerLine = oldMax + defaultMaxDepth = oldDefaultDepth + }() + + cases := []struct { + name string + in syntax.Expr + exp string + }{ + { + name: "ConcatSampleExpr", + in: &ConcatSampleExpr{ + DownstreamSampleExpr: DownstreamSampleExpr{ + shard: &astmapper.ShardAnnotation{ + Shard: 0, + Of: 3, + }, + SampleExpr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeRate, + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")}, + }, + Interval: time.Minute, + }, + }, + }, + next: &ConcatSampleExpr{ + DownstreamSampleExpr: DownstreamSampleExpr{ + shard: &astmapper.ShardAnnotation{ + Shard: 1, + Of: 3, + }, + SampleExpr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeRate, + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")}, + }, + Interval: time.Minute, + }, + }, + }, + next: &ConcatSampleExpr{ + DownstreamSampleExpr: DownstreamSampleExpr{ + shard: &astmapper.ShardAnnotation{ + Shard: 1, + Of: 3, + }, + SampleExpr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeRate, + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")}, + }, + Interval: time.Minute, + }, + }, + }, + next: nil, + }, + }, + }, + exp: `concat( + downstream< + rate( + {foo="bar"} [1m] + ), + shard=0_of_3 + > + ++ + downstream< + rate( + {foo="bar"} [1m] + ), + shard=1_of_3 + > + ++ ... +)`, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got := syntax.Prettify(c.in) + assert.Equal(t, c.exp, got) + }) + } +} + +func TestPrettierWithoutShards(t *testing.T) { + q := `((quantile_over_time(0.5,{foo="bar"} | json | unwrap bytes[1d]) by (cluster) > 42) and (count by (cluster)(max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace)) > 10))` + e := syntax.MustParseExpr(q) + + mapper := NewShardMapper(ConstantShards(4), nilShardMetrics, []string{}) + _, _, mapped, err := mapper.Parse(e) + require.NoError(t, err) + got := syntax.Prettify(mapped) + expected := ` downstream> + > + 42 +and + count by (cluster)( + max by (cluster, namespace)( + concat( + downstream< + max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace), + shard=0_of_4 + > + ++ + downstream< + max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace), + shard=1_of_4 + > + ++ + downstream< + max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace), + shard=2_of_4 + > + ++ + downstream< + max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace), + shard=3_of_4 + > + ) + ) + ) + > + 10` + assert.Equal(t, expected, got) +} diff --git a/pkg/logql/log/parser.go b/pkg/logql/log/parser.go index c03e7c91cb960..90d4a4bebf8ab 100644 --- a/pkg/logql/log/parser.go +++ b/pkg/logql/log/parser.go @@ -6,7 +6,7 @@ import ( "fmt" "unicode/utf8" - "github.com/buger/jsonparser" + "github.com/grafana/jsonparser" "github.com/grafana/loki/pkg/logql/log/jsonexpr" "github.com/grafana/loki/pkg/logql/log/logfmt" diff --git a/pkg/logql/log/parser_test.go b/pkg/logql/log/parser_test.go index bd57603ab8084..f8cf6373a152f 100644 --- a/pkg/logql/log/parser_test.go +++ b/pkg/logql/log/parser_test.go @@ -237,7 +237,7 @@ func (p *fakeParseHints) ShouldContinueParsingLine(_ string, _ *LabelsBuilder) b } func TestJSONExpressionParser(t *testing.T) { - testLine := []byte(`{"app":"foo","field with space":"value","field with ÜFT8👌":"value","null_field":null,"bool_field":false,"namespace":"prod","pod":{"uuid":"foo","deployment":{"ref":"foobar", "params": [1,2,3]}}}`) + testLine := []byte(`{"app":"foo","field with space":"value","field with ÜFT8👌":"value","null_field":null,"bool_field":false,"namespace":"prod","pod":{"uuid":"foo","deployment":{"ref":"foobar", "params": [1,2,3,"string_value"]}}}`) tests := []struct { name string @@ -340,6 +340,16 @@ func TestJSONExpressionParser(t *testing.T) { labels.FromStrings("param", "1"), NoParserHints(), }, + { + "array string element", + testLine, + []LabelExtractionExpr{ + NewLabelExtractionExpr("param", `pod.deployment.params[3]`), + }, + labels.EmptyLabels(), + labels.FromStrings("param", "string_value"), + NoParserHints(), + }, { "full array", testLine, @@ -347,7 +357,7 @@ func TestJSONExpressionParser(t *testing.T) { NewLabelExtractionExpr("params", `pod.deployment.params`), }, labels.EmptyLabels(), - labels.FromStrings("params", "[1,2,3]"), + labels.FromStrings("params", `[1,2,3,"string_value"]`), NoParserHints(), }, { @@ -357,7 +367,7 @@ func TestJSONExpressionParser(t *testing.T) { NewLabelExtractionExpr("deployment", `pod.deployment`), }, labels.EmptyLabels(), - labels.FromStrings("deployment", `{"ref":"foobar", "params": [1,2,3]}`), + labels.FromStrings("deployment", `{"ref":"foobar", "params": [1,2,3,"string_value"]}`), NoParserHints(), }, { diff --git a/pkg/logql/metrics.go b/pkg/logql/metrics.go index 63051e362eae6..40fbece82d87d 100644 --- a/pkg/logql/metrics.go +++ b/pkg/logql/metrics.go @@ -114,13 +114,17 @@ func RecordRangeAndInstantQueryMetrics( } queryTags, _ := ctx.Value(httpreq.QueryTagsHTTPHeader).(string) // it's ok to be empty. + var ( + query = p.QueryString() + hashedQuery = util.HashedQuery(query) + ) logValues := make([]interface{}, 0, 50) logValues = append(logValues, []interface{}{ "latency", latencyType, // this can be used to filter log lines. - "query", p.QueryString(), - "query_hash", util.HashedQuery(p.QueryString()), + "query", query, + "query_hash", hashedQuery, "query_type", queryType, "range_type", rt, "length", p.End().Sub(p.Start()), diff --git a/pkg/logql/shardmapper_test.go b/pkg/logql/shardmapper_test.go index 96955109a9413..0e345291eed3b 100644 --- a/pkg/logql/shardmapper_test.go +++ b/pkg/logql/shardmapper_test.go @@ -1598,3 +1598,32 @@ func TestStringTrimming(t *testing.T) { func float64p(v float64) *float64 { return &v } + +func TestShardTopk(t *testing.T) { + expr := `topk( + 10, + sum by (ip) ( + sum_over_time({job="foo"} | json | unwrap bytes(bytes)[1m]) + ) + )` + m := NewShardMapper(ConstantShards(5), nilShardMetrics, []string{ShardQuantileOverTime}) + _, _, mappedExpr, err := m.Parse(syntax.MustParseExpr(expr)) + require.NoError(t, err) + + expected := `topk( + 10, + sum by (ip)( + concat( + downstream + ++ + downstream + ++ + downstream + ++ + downstream + ++ ... + ) + ) +)` + require.Equal(t, expected, mappedExpr.Pretty(0)) +} diff --git a/pkg/logql/syntax/prettier.go b/pkg/logql/syntax/prettier.go index cf346e26c562f..1b407453858f7 100644 --- a/pkg/logql/syntax/prettier.go +++ b/pkg/logql/syntax/prettier.go @@ -35,8 +35,8 @@ import ( // var ( - // maxCharsPerLine is used to qualify whether some LogQL expressions are worth `splitting` into new lines. - maxCharsPerLine = 100 + // MaxCharsPerLine is used to qualify whether some LogQL expressions are worth `splitting` into new lines. + MaxCharsPerLine = 100 ) func Prettify(e Expr) string { @@ -51,8 +51,8 @@ func (e *MatchersExpr) Pretty(level int) string { // e.g: `{foo="bar"} | logfmt | level="error"` // Here, left = `{foo="bar"}` and multistages would collection of each stage in pipeline, here `logfmt` and `level="error"` func (e *PipelineExpr) Pretty(level int) string { - if !needSplit(e) { - return indent(level) + e.String() + if !NeedSplit(e) { + return Indent(level) + e.String() } s := fmt.Sprintf("%s\n", e.Left.Pretty(level)) @@ -73,8 +73,8 @@ func (e *PipelineExpr) Pretty(level int) string { // e.g: `|= "error" != "memcache" |= ip("192.168.0.1")` // NOTE: here `ip` is Op in this expression. func (e *LineFilterExpr) Pretty(level int) string { - if !needSplit(e) { - return indent(level) + e.String() + if !NeedSplit(e) { + return Indent(level) + e.String() } var s string @@ -90,7 +90,7 @@ func (e *LineFilterExpr) Pretty(level int) string { s += "\n" } - s += indent(level) + s += Indent(level) // We re-use LineFilterExpr's String() implementation to avoid duplication. // We create new LineFilterExpr without `Left`. @@ -153,7 +153,7 @@ func (e *LogfmtExpressionParser) Pretty(level int) string { // e.g: sum_over_time({foo="bar"} | logfmt | unwrap bytes_processed [5m]) func (e *UnwrapExpr) Pretty(level int) string { - s := indent(level) + s := Indent(level) if e.Operation != "" { s += fmt.Sprintf("%s %s %s(%s)", OpPipe, OpUnwrap, e.Operation, e.Identifier) @@ -161,7 +161,7 @@ func (e *UnwrapExpr) Pretty(level int) string { s += fmt.Sprintf("%s %s %s", OpPipe, OpUnwrap, e.Identifier) } for _, f := range e.PostFilters { - s += fmt.Sprintf("\n%s%s %s", indent(level), OpPipe, f) + s += fmt.Sprintf("\n%s%s %s", Indent(level), OpPipe, f) } return s } @@ -200,8 +200,8 @@ func (e *OffsetExpr) Pretty(_ int) string { // e.g: count_over_time({foo="bar"}[5m]) func (e *RangeAggregationExpr) Pretty(level int) string { - s := indent(level) - if !needSplit(e) { + s := Indent(level) + if !NeedSplit(e) { return s + e.String() } @@ -211,13 +211,13 @@ func (e *RangeAggregationExpr) Pretty(level int) string { // print args to the function. if e.Params != nil { - s = fmt.Sprintf("%s%s%s,", s, indent(level+1), fmt.Sprint(*e.Params)) + s = fmt.Sprintf("%s%s%s,", s, Indent(level+1), fmt.Sprint(*e.Params)) s += "\n" } s += e.Left.Pretty(level + 1) - s += "\n" + indent(level) + ")" + s += "\n" + Indent(level) + ")" if e.Grouping != nil { s += e.Grouping.Pretty(level) @@ -236,9 +236,9 @@ func (e *RangeAggregationExpr) Pretty(level int) string { // - vector on which aggregation is done. // [without|by (