diff --git a/.drone/drone.jsonnet b/.drone/drone.jsonnet index 49f67f06861a3..9351f2f693c06 100644 --- a/.drone/drone.jsonnet +++ b/.drone/drone.jsonnet @@ -610,23 +610,6 @@ local build_image_tag = '0.33.0'; 'cd -', ]) { depends_on: ['clone'], when: onPRs }, make('test', container=false) { depends_on: ['clone-target-branch', 'check-generated-files'] }, - run('test-target-branch', commands=['cd ../loki-target-branch && BUILD_IN_CONTAINER=false make test']) { depends_on: ['clone-target-branch'], when: onPRs }, - make('compare-coverage', container=false, args=[ - 'old=../loki-target-branch/test_results.txt', - 'new=test_results.txt', - 'packages=ingester,distributor,querier,querier/queryrange,iter,storage,chunkenc,logql,loki', - '> diff.txt', - ]) { depends_on: ['test', 'test-target-branch'], when: onPRs }, - run('report-coverage', commands=[ - "total_diff=$(sed 's/%//' diff.txt | awk '{sum+=$3;}END{print sum;}')", - 'if [ $total_diff = 0 ]; then exit 0; fi', - "pull=$(echo $CI_COMMIT_REF | awk -F '/' '{print $3}')", - "body=$(jq -Rs '{body: . }' diff.txt)", - 'curl -X POST -u $USER:$TOKEN -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/grafana/loki/issues/$pull/comments -d "$body" > /dev/null', - ], env={ - USER: 'grafanabot', - TOKEN: { from_secret: github_secret.name }, - }) { depends_on: ['compare-coverage'], when: onPRs }, make('lint', container=false) { depends_on: ['check-generated-files'] }, make('check-mod', container=false) { depends_on: ['test', 'lint'] }, { diff --git a/.drone/drone.yml b/.drone/drone.yml index 7a62b621262a8..c33a66998e71c 100644 --- a/.drone/drone.yml +++ b/.drone/drone.yml @@ -212,47 +212,6 @@ steps: environment: {} image: grafana/loki-build-image:0.33.0 name: test -- commands: - - cd ../loki-target-branch && BUILD_IN_CONTAINER=false make test - depends_on: - - clone-target-branch - environment: {} - image: grafana/loki-build-image:0.33.0 - name: test-target-branch - when: - event: - - pull_request -- commands: - - make BUILD_IN_CONTAINER=false compare-coverage old=../loki-target-branch/test_results.txt - new=test_results.txt packages=ingester,distributor,querier,querier/queryrange,iter,storage,chunkenc,logql,loki - > diff.txt - depends_on: - - test - - test-target-branch - environment: {} - image: grafana/loki-build-image:0.33.0 - name: compare-coverage - when: - event: - - pull_request -- commands: - - total_diff=$(sed 's/%//' diff.txt | awk '{sum+=$3;}END{print sum;}') - - if [ $total_diff = 0 ]; then exit 0; fi - - pull=$(echo $CI_COMMIT_REF | awk -F '/' '{print $3}') - - 'body=$(jq -Rs ''{body: . }'' diff.txt)' - - 'curl -X POST -u $USER:$TOKEN -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/grafana/loki/issues/$pull/comments - -d "$body" > /dev/null' - depends_on: - - compare-coverage - environment: - TOKEN: - from_secret: github_token - USER: grafanabot - image: grafana/loki-build-image:0.33.0 - name: report-coverage - when: - event: - - pull_request - commands: - make BUILD_IN_CONTAINER=false lint depends_on: @@ -2113,6 +2072,6 @@ kind: secret name: gpg_private_key --- kind: signature -hmac: 457592d17208477ceb480f81dbdb88f7b95a5ad015c88d9d6fed06c2422a52f9 +hmac: 51861919f0ba5370a152bdb9267828c742f2042819fb01388c6d23bf44e3cbb7 ... diff --git a/clients/cmd/fluent-bit/Dockerfile b/clients/cmd/fluent-bit/Dockerfile index 563614a75f52e..f0dfbc90c36a3 100644 --- a/clients/cmd/fluent-bit/Dockerfile +++ b/clients/cmd/fluent-bit/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.21.3-bullseye AS builder +FROM golang:1.22.0-bullseye AS builder COPY . /src diff --git a/docs/sources/configure/_index.md b/docs/sources/configure/_index.md index b675f85157423..e79a2503176fc 100644 --- a/docs/sources/configure/_index.md +++ b/docs/sources/configure/_index.md @@ -2327,27 +2327,26 @@ bloom_shipper: [max_tasks_enqueued_per_tenant: | default = 10000] blocks_cache: - # Whether embedded cache is enabled. - # CLI flag: -blocks-cache.enabled + # Cache for bloom blocks. Whether embedded cache is enabled. + # CLI flag: -bloom.blocks-cache.enabled [enabled: | default = false] - # Maximum memory size of the cache in MB. - # CLI flag: -blocks-cache.max-size-mb + # Cache for bloom blocks. Maximum memory size of the cache in MB. + # CLI flag: -bloom.blocks-cache.max-size-mb [max_size_mb: | default = 100] - # Maximum number of entries in the cache. - # CLI flag: -blocks-cache.max-size-items + # Cache for bloom blocks. Maximum number of entries in the cache. + # CLI flag: -bloom.blocks-cache.max-size-items [max_size_items: | default = 0] - # The time to live for items in the cache before they get purged. - # CLI flag: -blocks-cache.ttl - [ttl: | default = 0s] + # Cache for bloom blocks. The time to live for items in the cache before + # they get purged. + # CLI flag: -bloom.blocks-cache.ttl + [ttl: | default = 24h] - # During this period the process waits until the directory becomes not used - # and only after this it will be deleted. If the timeout is reached, the - # directory is force deleted. - # CLI flag: -blocks-cache.remove-directory-graceful-period - [remove_directory_graceful_period: | default = 5m] + # The cache block configures the cache backend. + # The CLI flags prefix for this block configuration is: bloom.metas-cache + [metas_cache: ] ``` ### chunk_store_config @@ -2650,6 +2649,18 @@ ring: # CLI flag: -bloom-compactor.compaction-interval [compaction_interval: | default = 10m] +# Minimum age of a table before it is considered for compaction. +# CLI flag: -bloom-compactor.min-compaction-age +[min_compaction_age: | default = 24h] + +# Maximum age of a table before it is considered for compaction. +# CLI flag: -bloom-compactor.max-compaction-age +[max_compaction_age: | default = 168h] + +# Number of workers to run in parallel for compaction. +# CLI flag: -bloom-compactor.worker-parallelism +[worker_parallelism: | default = 1] + # Minimum backoff time between retries. # CLI flag: -bloom-compactor.compaction-retries-min-backoff [compaction_retries_min_backoff: | default = 10s] @@ -3129,6 +3140,12 @@ shard_streams: # CLI flag: -bloom-gateway.cache-key-interval [bloom_gateway_cache_key_interval: | default = 15m] +# The maximum bloom block size. A value of 0 sets an unlimited size. Default is +# 200MB. The actual block size might exceed this limit since blooms will be +# added to blocks until the block exceeds the maximum block size. +# CLI flag: -bloom-compactor.max-block-size +[bloom_compactor_max_block_size: | default = 200MB] + # Allow user to send structured metadata in push payload. # CLI flag: -validation.allow-structured-metadata [allow_structured_metadata: | default = false] @@ -4354,6 +4371,7 @@ The TLS configuration. The cache block configures the cache backend. The supported CLI flags `` used to reference this configuration block are: - `bloom-gateway-client.cache` +- `bloom.metas-cache` - `frontend` - `frontend.index-stats-results-cache` - `frontend.label-results-cache` diff --git a/docs/sources/get-started/labels/structured-metadata.md b/docs/sources/get-started/labels/structured-metadata.md index 071339cc0bde3..e199402e0b000 100644 --- a/docs/sources/get-started/labels/structured-metadata.md +++ b/docs/sources/get-started/labels/structured-metadata.md @@ -5,10 +5,6 @@ description: Describes how to enable structure metadata for logs and how to quer --- # What is structured metadata -{{% admonition type="warning" %}} -Structured metadata is an experimental feature and is subject to change in future releases of Grafana Loki. This feature is not yet available for Cloud Logs users. -{{% /admonition %}} - {{% admonition type="warning" %}} Structured metadata was added to chunk format V4 which is used if the schema version is greater or equal to `13`. (See [Schema Config]({{< relref "../../storage#schema-config" >}}) for more details about schema versions. ) {{% /admonition %}} diff --git a/docs/sources/operations/query-fairness/_index.md b/docs/sources/operations/query-fairness/_index.md index 39f9ede21fbab..44b3c15f8f9ad 100644 --- a/docs/sources/operations/query-fairness/_index.md +++ b/docs/sources/operations/query-fairness/_index.md @@ -115,7 +115,7 @@ you would usually want to avoid this scenario and control yourself where the hea When using Grafana as the Loki user interface, you can, for example, create multiple data sources with the same tenant, but with a different additional HTTP header -`X-Loki-Scope-Actor` and restrict which Grafana user can use which data source. +`X-Loki-Actor-Path` and restrict which Grafana user can use which data source. Alternatively, if you have a proxy for authentication in front of Loki, you can pass the (hashed) user from the authentication as downstream header to Loki. diff --git a/docs/sources/release-notes/v2-9.md b/docs/sources/release-notes/v2-9.md index 8355dd02abf08..68d3da85bc4dd 100644 --- a/docs/sources/release-notes/v2-9.md +++ b/docs/sources/release-notes/v2-9.md @@ -9,6 +9,8 @@ Grafana Labs is excited to announce the release of Loki 2.9.0 Here's a summary o ## Features and enhancements +- **Structured metadata**: The [Structured Metadata](https://grafana.com/docs/loki/latest/get-started/labels/structured-metadata/) feature, which was introduced as experimental in release 2.9.0, is generally available as of release 2.9.4. + - **Query Language Improvements**: Several improvements to the query language that speed up line parsing and regex matching. [PR #8646](https://github.com/grafana/loki/pull/8646), [PR #8659](https://github.com/grafana/loki/pull/8659), [PR #8724](https://github.com/grafana/loki/pull/8724), [PR #8734](https://github.com/grafana/loki/pull/8734), [PR #8739](https://github.com/grafana/loki/pull/8739), [PR #8763](https://github.com/grafana/loki/pull/8763), [PR #8890](https://github.com/grafana/loki/pull/8890), [PR #8914](https://github.com/grafana/loki/pull/8914) - **Remote rule evaluation**: Rule evaluation can now be handled by queriers to improve speed. [PR #8744](https://github.com/grafana/loki/pull/8744) [PR #8848](https://github.com/grafana/loki/pull/8848) @@ -33,13 +35,13 @@ Grafana Labs is excited to announce the release of Loki 2.9.0 Here's a summary o ## Bug fixes -### 2.9.1 (2023-09-14) - -* Update Docker base images to mitigate security vulnerability CVE-2022-48174 -* Fix bugs in indexshipper (`tsdb`, `boltdb-shipper`) that could result in not showing all ingested logs in query results. - ### 2.9.2 (2023-10-16) * Upgrade go to v1.21.3, golang.org/x/net to v0.17.0 and grpc-go to v1.56.3 to patch CVE-2023-39325 / CVE-2023-44487 For a full list of all changes and fixes, look at the [CHANGELOG](https://github.com/grafana/loki/blob/release-2.9.x/CHANGELOG.md). + +### 2.9.1 (2023-09-14) + +* Update Docker base images to mitigate security vulnerability CVE-2022-48174 +* Fix bugs in indexshipper (`tsdb`, `boltdb-shipper`) that could result in not showing all ingested logs in query results. diff --git a/docs/sources/send-data/otel/_index.md b/docs/sources/send-data/otel/_index.md index 84d1226316ecf..12f9cdd0e4af5 100644 --- a/docs/sources/send-data/otel/_index.md +++ b/docs/sources/send-data/otel/_index.md @@ -9,10 +9,6 @@ weight: 250 # Ingesting logs to Loki using OpenTelemetry Collector -{{% admonition type="warning" %}} -OpenTelemetry logs ingestion is an experimental feature and is subject to change in future releases of Grafana Loki. -{{% /admonition %}} - Loki natively supports ingesting OpenTelemetry logs over HTTP. For ingesting logs to Loki using the OpenTelemetry Collector, you must use the [`otlphttp` exporter](https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlphttpexporter). diff --git a/docs/sources/setup/install/docker.md b/docs/sources/setup/install/docker.md index 51df7f9288c85..8e9007c0fdf43 100644 --- a/docs/sources/setup/install/docker.md +++ b/docs/sources/setup/install/docker.md @@ -9,7 +9,7 @@ weight: 400 # Install Loki with Docker or Docker Compose You can install Loki and Promtail with Docker or Docker Compose if you are evaluating, testing, or developing Loki. -For production, we recommend installing with Tanka or Helm. +For production, Grafana recommends installing with Tanka or Helm. The configuration acquired with these installation instructions run Loki as a single binary. diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index 6e2b1e741b02f..2a1ebc2f5d362 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,7 @@ ## Main +- [11869](https://github.com/grafana/loki/pull/11869) **periklis**: Add support for running with Google Workload Identity +- [11868](https://github.com/grafana/loki/pull/11868) **xperimental**: Integrate support for OpenShift-managed credentials in Azure - [11854](https://github.com/grafana/loki/pull/11854) **periklis**: Allow custom audience for managed-auth on STS - [11802](https://github.com/grafana/loki/pull/11802) **xperimental**: Add support for running with Azure Workload Identity - [11824](https://github.com/grafana/loki/pull/11824) **xperimental**: Improve messages for errors in storage secret diff --git a/operator/apis/config/v1/projectconfig_types.go b/operator/apis/config/v1/projectconfig_types.go index ba7cc703c5bb8..06ff8cb090598 100644 --- a/operator/apis/config/v1/projectconfig_types.go +++ b/operator/apis/config/v1/projectconfig_types.go @@ -56,7 +56,9 @@ type OpenShiftFeatureGates struct { ManagedAuthEnv bool } -func (o OpenShiftFeatureGates) ManagedAuthEnabled() bool { +// ManagedAuthEnabled returns true when OpenShift-functions are enabled and the operator has detected that it is +// running with some kind of "workload identity" (AWS STS, Azure WIF) enabled. +func (o *OpenShiftFeatureGates) ManagedAuthEnabled() bool { return o.Enabled && o.ManagedAuthEnv } diff --git a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml index 4b20f814804a2..6854bf38ff661 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:0.5.0 - createdAt: "2024-01-25T11:08:43Z" + createdAt: "2024-01-31T16:48:07Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. features.operators.openshift.io/disconnected: "true" diff --git a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml index 81575be404e82..f8c37162b5a44 100644 --- a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:0.5.0 - createdAt: "2024-01-25T11:08:41Z" + createdAt: "2024-01-31T16:48:04Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. operators.operatorframework.io/builder: operator-sdk-unknown diff --git a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml index b79f4ea7a2f49..234ddb423a3aa 100644 --- a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: quay.io/openshift-logging/loki-operator:0.1.0 - createdAt: "2024-01-25T11:08:45Z" + createdAt: "2024-01-31T16:48:10Z" description: | The Loki Operator for OCP provides a means for configuring and managing a Loki stack for cluster logging. ## Prerequisites and Requirements @@ -165,7 +165,7 @@ metadata: features.operators.openshift.io/proxy-aware: "true" features.operators.openshift.io/tls-profiles: "true" features.operators.openshift.io/token-auth-aws: "true" - features.operators.openshift.io/token-auth-azure: "false" + features.operators.openshift.io/token-auth-azure: "true" features.operators.openshift.io/token-auth-gcp: "false" olm.skipRange: '>=5.7.0-0 <5.9.0' operatorframework.io/cluster-monitoring: "true" diff --git a/operator/config/manifests/openshift/bases/loki-operator.clusterserviceversion.yaml b/operator/config/manifests/openshift/bases/loki-operator.clusterserviceversion.yaml index 0e724292edbb6..48a221736e2dc 100644 --- a/operator/config/manifests/openshift/bases/loki-operator.clusterserviceversion.yaml +++ b/operator/config/manifests/openshift/bases/loki-operator.clusterserviceversion.yaml @@ -21,7 +21,7 @@ metadata: features.operators.openshift.io/proxy-aware: "true" features.operators.openshift.io/tls-profiles: "true" features.operators.openshift.io/token-auth-aws: "true" - features.operators.openshift.io/token-auth-azure: "false" + features.operators.openshift.io/token-auth-azure: "true" features.operators.openshift.io/token-auth-gcp: "false" olm.skipRange: '>=5.7.0-0 <5.9.0' operatorframework.io/cluster-monitoring: "true" diff --git a/operator/controllers/loki/credentialsrequests_controller.go b/operator/controllers/loki/credentialsrequests_controller.go index 61d0b58423e90..efd0226c6a340 100644 --- a/operator/controllers/loki/credentialsrequests_controller.go +++ b/operator/controllers/loki/credentialsrequests_controller.go @@ -4,6 +4,7 @@ import ( "context" "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" @@ -46,7 +47,17 @@ func (r *CredentialsRequestsReconciler) Reconcile(ctx context.Context, req ctrl. return ctrl.Result{}, nil } - secretRef, err := handlers.CreateCredentialsRequest(ctx, r.Client, req.NamespacedName) + storageSecretName := client.ObjectKey{ + Namespace: req.Namespace, + Name: stack.Spec.Storage.Secret.Name, + } + storageSecret := &corev1.Secret{} + err = r.Client.Get(ctx, storageSecretName, storageSecret) + if err != nil { + return ctrl.Result{}, err + } + + secretRef, err := handlers.CreateCredentialsRequest(ctx, r.Client, req.NamespacedName, storageSecret) if err != nil { return ctrl.Result{}, err } diff --git a/operator/controllers/loki/credentialsrequests_controller_test.go b/operator/controllers/loki/credentialsrequests_controller_test.go index e6738c1d1796e..3c91ee2275e97 100644 --- a/operator/controllers/loki/credentialsrequests_controller_test.go +++ b/operator/controllers/loki/credentialsrequests_controller_test.go @@ -6,6 +6,7 @@ import ( cloudcredentialsv1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" @@ -81,16 +82,24 @@ func TestCredentialsRequestController_CreateCredentialsRequest_WhenLokiStackNotA ManagementState: lokiv1.ManagementStateManaged, }, } + secret := &corev1.Secret{} // Set managed auth environment t.Setenv("ROLEARN", "a-role-arn") k.GetStub = func(_ context.Context, key types.NamespacedName, out client.Object, _ ...client.GetOption) error { - if key.Name == r.Name && key.Namespace == r.Namespace { - k.SetClientObject(out, &s) + switch out.(type) { + case *lokiv1.LokiStack: + if key.Name == r.Name && key.Namespace == r.Namespace { + k.SetClientObject(out, &s) + return nil + } + return apierrors.NewNotFound(schema.GroupResource{}, "lokistack not found") + case *corev1.Secret: + k.SetClientObject(out, secret) return nil } - return apierrors.NewNotFound(schema.GroupResource{}, "lokistack not found") + return nil } k.CreateStub = func(_ context.Context, o client.Object, _ ...client.CreateOption) error { diff --git a/operator/internal/handlers/credentialsrequest_create.go b/operator/internal/handlers/credentialsrequest_create.go index 477528326b9a5..6074e10b2d5af 100644 --- a/operator/internal/handlers/credentialsrequest_create.go +++ b/operator/internal/handlers/credentialsrequest_create.go @@ -2,23 +2,47 @@ package handlers import ( "context" + "errors" "github.com/ViaQ/logerr/v2/kverrors" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/grafana/loki/operator/internal/external/k8s" "github.com/grafana/loki/operator/internal/manifests/openshift" + "github.com/grafana/loki/operator/internal/manifests/storage" +) + +var ( + errAzureNoSecretFound = errors.New("can not create CredentialsRequest: no azure secret found") + errAzureNoRegion = errors.New("can not create CredentialsRequest: missing secret field: region") ) // CreateCredentialsRequest creates a new CredentialsRequest resource for a Lokistack // to request a cloud credentials Secret resource from the OpenShift cloud-credentials-operator. -func CreateCredentialsRequest(ctx context.Context, k k8s.Client, stack client.ObjectKey) (string, error) { +func CreateCredentialsRequest(ctx context.Context, k k8s.Client, stack client.ObjectKey, secret *corev1.Secret) (string, error) { managedAuthEnv := openshift.DiscoverManagedAuthEnv() if managedAuthEnv == nil { return "", nil } + if managedAuthEnv.Azure != nil && managedAuthEnv.Azure.Region == "" { + // Managed environment for Azure does not provide Region, but we need this for the CredentialsRequest. + // This looks like an oversight when creating the UI in OpenShift, but for now we need to pull this data + // from somewhere else -> the Azure Storage Secret + if secret == nil { + return "", errAzureNoSecretFound + } + + region := secret.Data[storage.KeyAzureRegion] + if len(region) == 0 { + return "", errAzureNoRegion + } + + managedAuthEnv.Azure.Region = string(region) + } + opts := openshift.Options{ BuildOpts: openshift.BuildOptions{ LokiStackName: stack.Name, diff --git a/operator/internal/handlers/credentialsrequest_create_test.go b/operator/internal/handlers/credentialsrequest_create_test.go index f6bf9c0f1b526..df903eaec662f 100644 --- a/operator/internal/handlers/credentialsrequest_create_test.go +++ b/operator/internal/handlers/credentialsrequest_create_test.go @@ -4,7 +4,9 @@ import ( "context" "testing" + cloudcredentialv1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime/schema" "sigs.k8s.io/controller-runtime/pkg/client" @@ -16,7 +18,7 @@ func TestCreateCredentialsRequest_DoNothing_WhenManagedAuthEnvMissing(t *testing k := &k8sfakes.FakeClient{} key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - secretRef, err := CreateCredentialsRequest(context.Background(), k, key) + secretRef, err := CreateCredentialsRequest(context.Background(), k, key, nil) require.NoError(t, err) require.Empty(t, secretRef) } @@ -27,12 +29,74 @@ func TestCreateCredentialsRequest_CreateNewResource(t *testing.T) { t.Setenv("ROLEARN", "a-role-arn") - secretRef, err := CreateCredentialsRequest(context.Background(), k, key) + secretRef, err := CreateCredentialsRequest(context.Background(), k, key, nil) require.NoError(t, err) require.NotEmpty(t, secretRef) require.Equal(t, 1, k.CreateCallCount()) } +func TestCreateCredentialsRequest_CreateNewResourceAzure(t *testing.T) { + wantRegion := "test-region" + + k := &k8sfakes.FakeClient{} + key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} + secret := &corev1.Secret{ + Data: map[string][]byte{ + "region": []byte(wantRegion), + }, + } + + t.Setenv("CLIENTID", "test-client-id") + t.Setenv("TENANTID", "test-tenant-id") + t.Setenv("SUBSCRIPTIONID", "test-subscription-id") + + secretRef, err := CreateCredentialsRequest(context.Background(), k, key, secret) + require.NoError(t, err) + require.NotEmpty(t, secretRef) + + require.Equal(t, 1, k.CreateCallCount()) + _, obj, _ := k.CreateArgsForCall(0) + credReq, ok := obj.(*cloudcredentialv1.CredentialsRequest) + require.True(t, ok) + + providerSpec := &cloudcredentialv1.AzureProviderSpec{} + require.NoError(t, cloudcredentialv1.Codec.DecodeProviderSpec(credReq.Spec.ProviderSpec, providerSpec)) + + require.Equal(t, wantRegion, providerSpec.AzureRegion) +} + +func TestCreateCredentialsRequest_CreateNewResourceAzure_Errors(t *testing.T) { + k := &k8sfakes.FakeClient{} + key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} + + tt := []struct { + secret *corev1.Secret + wantError string + }{ + { + secret: nil, + wantError: errAzureNoSecretFound.Error(), + }, + { + secret: &corev1.Secret{}, + wantError: errAzureNoRegion.Error(), + }, + } + + for _, tc := range tt { + tc := tc + t.Run(tc.wantError, func(t *testing.T) { + // Not parallel (environment variables) + t.Setenv("CLIENTID", "test-client-id") + t.Setenv("TENANTID", "test-tenant-id") + t.Setenv("SUBSCRIPTIONID", "test-subscription-id") + + _, err := CreateCredentialsRequest(context.Background(), k, key, tc.secret) + require.EqualError(t, err, tc.wantError) + }) + } +} + func TestCreateCredentialsRequest_DoNothing_WhenCredentialsRequestExist(t *testing.T) { k := &k8sfakes.FakeClient{} key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} @@ -43,7 +107,7 @@ func TestCreateCredentialsRequest_DoNothing_WhenCredentialsRequestExist(t *testi return errors.NewAlreadyExists(schema.GroupResource{}, "credentialsrequest exists") } - secretRef, err := CreateCredentialsRequest(context.Background(), k, key) + secretRef, err := CreateCredentialsRequest(context.Background(), k, key, nil) require.NoError(t, err) require.NotEmpty(t, secretRef) require.Equal(t, 1, k.CreateCallCount()) diff --git a/operator/internal/handlers/internal/storage/secrets.go b/operator/internal/handlers/internal/storage/secrets.go index d2d39e5ac8570..21cd58b7c3c25 100644 --- a/operator/internal/handlers/internal/storage/secrets.go +++ b/operator/internal/handlers/internal/storage/secrets.go @@ -3,6 +3,7 @@ package storage import ( "context" "crypto/sha1" + "encoding/json" "errors" "fmt" "sort" @@ -29,10 +30,16 @@ var ( errS3NoAuth = errors.New("missing secret fields for static or sts authentication") - errAzureNoCredentials = errors.New("azure storage secret does contain neither account_key or client_id") - errAzureMixedCredentials = errors.New("azure storage secret can not contain both account_key and client_id") + errAzureNoCredentials = errors.New("azure storage secret does contain neither account_key or client_id") + errAzureMixedCredentials = errors.New("azure storage secret can not contain both account_key and client_id") + errAzureManagedIdentityNoOverride = errors.New("when in managed mode, storage secret can not contain credentials") + + errGCPParseCredentialsFile = errors.New("gcp storage secret cannot be parsed from JSON content") + errGCPWrongCredentialSourceFile = errors.New("credential source in secret needs to point to token file") ) +const gcpAccountTypeExternal = "external_account" + func getSecrets(ctx context.Context, k k8s.Client, stack *lokiv1.LokiStack, fg configv1.FeatureGates) (*corev1.Secret, *corev1.Secret, error) { var ( storageSecret corev1.Secret @@ -110,7 +117,7 @@ func extractSecrets(secretType lokiv1.ObjectStorageSecretType, objStore, managed switch secretType { case lokiv1.ObjectStorageSecretAzure: - storageOpts.Azure, err = extractAzureConfigSecret(objStore) + storageOpts.Azure, err = extractAzureConfigSecret(objStore, fg) case lokiv1.ObjectStorageSecretGCS: storageOpts.GCS, err = extractGCSConfigSecret(objStore) case lokiv1.ObjectStorageSecretS3: @@ -158,41 +165,62 @@ func hashSecretData(s *corev1.Secret) (string, error) { return fmt.Sprintf("%x", h.Sum(nil)), nil } -func extractAzureConfigSecret(s *corev1.Secret) (*storage.AzureStorageConfig, error) { +func extractAzureConfigSecret(s *corev1.Secret, fg configv1.FeatureGates) (*storage.AzureStorageConfig, error) { // Extract and validate mandatory fields env := s.Data[storage.KeyAzureEnvironmentName] if len(env) == 0 { return nil, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyAzureEnvironmentName) } + + accountName := s.Data[storage.KeyAzureStorageAccountName] + if len(accountName) == 0 { + return nil, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyAzureStorageAccountName) + } + container := s.Data[storage.KeyAzureStorageContainerName] if len(container) == 0 { return nil, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyAzureStorageContainerName) } - workloadIdentity, err := validateAzureCredentials(s) + + workloadIdentity, err := validateAzureCredentials(s, fg) if err != nil { return nil, err } // Extract and validate optional fields endpointSuffix := s.Data[storage.KeyAzureStorageEndpointSuffix] + audience := s.Data[storage.KeyAzureAudience] + + if !workloadIdentity && len(audience) > 0 { + return nil, fmt.Errorf("%w: %s", errSecretFieldNotAllowed, storage.KeyAzureAudience) + } return &storage.AzureStorageConfig{ Env: string(env), Container: string(container), EndpointSuffix: string(endpointSuffix), + Audience: string(audience), WorkloadIdentity: workloadIdentity, }, nil } -func validateAzureCredentials(s *corev1.Secret) (workloadIdentity bool, err error) { - accountName := s.Data[storage.KeyAzureStorageAccountName] +func validateAzureCredentials(s *corev1.Secret, fg configv1.FeatureGates) (workloadIdentity bool, err error) { accountKey := s.Data[storage.KeyAzureStorageAccountKey] clientID := s.Data[storage.KeyAzureStorageClientID] tenantID := s.Data[storage.KeyAzureStorageTenantID] subscriptionID := s.Data[storage.KeyAzureStorageSubscriptionID] - if len(accountName) == 0 { - return false, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyAzureStorageAccountName) + if fg.OpenShift.ManagedAuthEnabled() { + region := s.Data[storage.KeyAzureRegion] + if len(region) == 0 { + return false, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyAzureRegion) + } + + if len(accountKey) > 0 || len(clientID) > 0 || len(tenantID) > 0 || len(subscriptionID) > 0 { + return false, errAzureManagedIdentityNoOverride + } + + return true, nil } if len(accountKey) == 0 && len(clientID) == 0 { @@ -233,8 +261,36 @@ func extractGCSConfigSecret(s *corev1.Secret) (*storage.GCSStorageConfig, error) return nil, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyGCPServiceAccountKeyFilename) } + credentialsFile := struct { + CredentialsType string `json:"type"` + CredentialsSource struct { + File string `json:"file"` + } `json:"credential_source"` + }{} + + err := json.Unmarshal(keyJSON, &credentialsFile) + if err != nil { + return nil, errGCPParseCredentialsFile + } + + var ( + audience = s.Data[storage.KeyGCPWorkloadIdentityProviderAudience] + isWorkloadIdentity = credentialsFile.CredentialsType == gcpAccountTypeExternal + ) + if isWorkloadIdentity { + if len(audience) == 0 { + return nil, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyGCPWorkloadIdentityProviderAudience) + } + + if credentialsFile.CredentialsSource.File != storage.GCPDefautCredentialsFile { + return nil, fmt.Errorf("%w: %s", errGCPWrongCredentialSourceFile, storage.GCPDefautCredentialsFile) + } + } + return &storage.GCSStorageConfig{ - Bucket: string(bucket), + Bucket: string(bucket), + WorkloadIdentity: isWorkloadIdentity, + Audience: string(audience), }, nil } diff --git a/operator/internal/handlers/internal/storage/secrets_test.go b/operator/internal/handlers/internal/storage/secrets_test.go index 51dc6e15f670e..cc18360232315 100644 --- a/operator/internal/handlers/internal/storage/secrets_test.go +++ b/operator/internal/handlers/internal/storage/secrets_test.go @@ -71,9 +71,11 @@ func TestUnknownType(t *testing.T) { func TestAzureExtract(t *testing.T) { type test struct { - name string - secret *corev1.Secret - wantError string + name string + secret *corev1.Secret + managedSecret *corev1.Secret + featureGates configv1.FeatureGates + wantError string } table := []test{ { @@ -82,23 +84,23 @@ func TestAzureExtract(t *testing.T) { wantError: "missing secret field: environment", }, { - name: "missing container", + name: "missing account_name", secret: &corev1.Secret{ Data: map[string][]byte{ "environment": []byte("here"), }, }, - wantError: "missing secret field: container", + wantError: "missing secret field: account_name", }, { - name: "missing account_name", + name: "missing container", secret: &corev1.Secret{ Data: map[string][]byte{ - "environment": []byte("here"), - "container": []byte("this,that"), + "environment": []byte("here"), + "account_name": []byte("id"), }, }, - wantError: "missing secret field: account_name", + wantError: "missing secret field: container", }, { name: "no account_key or client_id", @@ -153,6 +155,64 @@ func TestAzureExtract(t *testing.T) { }, wantError: "missing secret field: subscription_id", }, + { + name: "managed auth - no region", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "environment": []byte("here"), + "account_name": []byte("test-account-name"), + "container": []byte("this,that"), + }, + }, + managedSecret: &corev1.Secret{ + Data: map[string][]byte{}, + }, + featureGates: configv1.FeatureGates{ + OpenShift: configv1.OpenShiftFeatureGates{ + Enabled: true, + ManagedAuthEnv: true, + }, + }, + wantError: "missing secret field: region", + }, + { + name: "managed auth - no auth override", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "environment": []byte("here"), + "account_name": []byte("test-account-name"), + "container": []byte("this,that"), + "region": []byte("test-region"), + "account_key": []byte("test-account-key"), + }, + }, + managedSecret: &corev1.Secret{ + Data: map[string][]byte{}, + }, + featureGates: configv1.FeatureGates{ + OpenShift: configv1.OpenShiftFeatureGates{ + Enabled: true, + ManagedAuthEnv: true, + }, + }, + wantError: errAzureManagedIdentityNoOverride.Error(), + }, + { + name: "audience used with static authentication", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "environment": []byte("here"), + "container": []byte("this,that"), + "account_name": []byte("id"), + "account_key": []byte("secret"), + "audience": []byte("test-audience"), + }, + }, + wantError: "secret field not allowed: audience", + }, { name: "mandatory for normal authentication set", secret: &corev1.Secret{ @@ -180,6 +240,27 @@ func TestAzureExtract(t *testing.T) { }, }, }, + { + name: "mandatory for managed workload-identity set", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "environment": []byte("here"), + "account_name": []byte("test-account-name"), + "container": []byte("this,that"), + "region": []byte("test-region"), + }, + }, + managedSecret: &corev1.Secret{ + Data: map[string][]byte{}, + }, + featureGates: configv1.FeatureGates{ + OpenShift: configv1.OpenShiftFeatureGates{ + Enabled: true, + ManagedAuthEnv: true, + }, + }, + }, { name: "all set including optional", secret: &corev1.Secret{ @@ -199,7 +280,7 @@ func TestAzureExtract(t *testing.T) { t.Run(tst.name, func(t *testing.T) { t.Parallel() - opts, err := extractSecrets(lokiv1.ObjectStorageSecretAzure, tst.secret, nil, configv1.FeatureGates{}) + opts, err := extractSecrets(lokiv1.ObjectStorageSecretAzure, tst.secret, tst.managedSecret, tst.featureGates) if tst.wantError == "" { require.NoError(t, err) require.NotEmpty(t, opts.SecretName) @@ -233,13 +314,47 @@ func TestGCSExtract(t *testing.T) { }, wantError: "missing secret field: key.json", }, + { + name: "missing audience", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "bucketname": []byte("here"), + "key.json": []byte("{\"type\": \"external_account\"}"), + }, + }, + wantError: "missing secret field: audience", + }, + { + name: "credential_source file no override", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "bucketname": []byte("here"), + "audience": []byte("test"), + "key.json": []byte("{\"type\": \"external_account\", \"credential_source\": {\"file\": \"/custom/path/to/secret/gcp/serviceaccount/token\"}}"), + }, + }, + wantError: "credential source in secret needs to point to token file: /var/run/secrets/gcp/serviceaccount/token", + }, { name: "all set", secret: &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{Name: "test"}, Data: map[string][]byte{ "bucketname": []byte("here"), - "key.json": []byte("{\"type\": \"SA\"}"), + "key.json": []byte("{\"type\": \"service_account\"}"), + }, + }, + }, + { + name: "mandatory for workload-identity set", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "bucketname": []byte("here"), + "audience": []byte("test"), + "key.json": []byte("{\"type\": \"external_account\", \"credential_source\": {\"file\": \"/var/run/secrets/gcp/serviceaccount/token\"}}"), }, }, }, diff --git a/operator/internal/manifests/openshift/credentialsrequest.go b/operator/internal/manifests/openshift/credentialsrequest.go index d2da20a194534..2962b61d0d1ef 100644 --- a/operator/internal/manifests/openshift/credentialsrequest.go +++ b/operator/internal/manifests/openshift/credentialsrequest.go @@ -74,6 +74,34 @@ func encodeProviderSpec(stackName string, env *ManagedAuthEnv) (*runtime.RawExte STSIAMRoleARN: env.AWS.RoleARN, } secretName = fmt.Sprintf("%s-aws-creds", stackName) + case env.Azure != nil: + azure := env.Azure + + spec = &cloudcredentialv1.AzureProviderSpec{ + Permissions: []string{ + "Microsoft.Storage/storageAccounts/blobServices/read", + "Microsoft.Storage/storageAccounts/blobServices/containers/read", + "Microsoft.Storage/storageAccounts/blobServices/containers/write", + "Microsoft.Storage/storageAccounts/blobServices/generateUserDelegationKey/action", + "Microsoft.Storage/storageAccounts/read", + "Microsoft.Storage/storageAccounts/write", + "Microsoft.Storage/storageAccounts/delete", + "Microsoft.Storage/storageAccounts/listKeys/action", + "Microsoft.Resources/tags/write", + }, + DataPermissions: []string{ + "Microsoft.Storage/storageAccounts/blobServices/containers/blobs/delete", + "Microsoft.Storage/storageAccounts/blobServices/containers/blobs/write", + "Microsoft.Storage/storageAccounts/blobServices/containers/blobs/read", + "Microsoft.Storage/storageAccounts/blobServices/containers/blobs/add/action", + "Microsoft.Storage/storageAccounts/blobServices/containers/blobs/move/action", + }, + AzureClientID: azure.ClientID, + AzureRegion: azure.Region, + AzureSubscriptionID: azure.SubscriptionID, + AzureTenantID: azure.TenantID, + } + secretName = fmt.Sprintf("%s-azure-creds", stackName) } encodedSpec, err := cloudcredentialv1.Codec.EncodeProviderSpec(spec.DeepCopyObject()) @@ -84,6 +112,11 @@ func DiscoverManagedAuthEnv() *ManagedAuthEnv { // AWS roleARN := os.Getenv("ROLEARN") + // Azure + clientID := os.Getenv("CLIENTID") + tenantID := os.Getenv("TENANTID") + subscriptionID := os.Getenv("SUBSCRIPTIONID") + switch { case roleARN != "": return &ManagedAuthEnv{ @@ -91,6 +124,14 @@ func DiscoverManagedAuthEnv() *ManagedAuthEnv { RoleARN: roleARN, }, } + case clientID != "" && tenantID != "" && subscriptionID != "": + return &ManagedAuthEnv{ + Azure: &AzureWIFEnvironment{ + ClientID: clientID, + SubscriptionID: subscriptionID, + TenantID: tenantID, + }, + } } return nil diff --git a/operator/internal/manifests/openshift/options.go b/operator/internal/manifests/openshift/options.go index e5d33a3355269..9bc2e4faae36e 100644 --- a/operator/internal/manifests/openshift/options.go +++ b/operator/internal/manifests/openshift/options.go @@ -59,8 +59,16 @@ type AWSSTSEnv struct { RoleARN string } +type AzureWIFEnvironment struct { + ClientID string + SubscriptionID string + TenantID string + Region string +} + type ManagedAuthEnv struct { - AWS *AWSSTSEnv + AWS *AWSSTSEnv + Azure *AzureWIFEnvironment } // NewOptions returns an openshift options struct. diff --git a/operator/internal/manifests/storage/configure.go b/operator/internal/manifests/storage/configure.go index da5f6970da171..49958ebec7b9c 100644 --- a/operator/internal/manifests/storage/configure.go +++ b/operator/internal/manifests/storage/configure.go @@ -193,6 +193,16 @@ func managedAuthCredentials(opts Options) []corev1.EnvVar { } } case lokiv1.ObjectStorageSecretAzure: + if opts.OpenShift.ManagedAuthEnabled() { + return []corev1.EnvVar{ + envVarFromSecret(EnvAzureStorageAccountName, opts.SecretName, KeyAzureStorageAccountName), + envVarFromSecret(EnvAzureClientID, opts.OpenShift.CloudCredentials.SecretName, azureManagedCredentialKeyClientID), + envVarFromSecret(EnvAzureTenantID, opts.OpenShift.CloudCredentials.SecretName, azureManagedCredentialKeyTenantID), + envVarFromSecret(EnvAzureSubscriptionID, opts.OpenShift.CloudCredentials.SecretName, azureManagedCredentialKeySubscriptionID), + envVarFromValue(EnvAzureFederatedTokenFile, path.Join(azureTokenVolumeDirectory, "token")), + } + } + return []corev1.EnvVar{ envVarFromSecret(EnvAzureStorageAccountName, opts.SecretName, KeyAzureStorageAccountName), envVarFromSecret(EnvAzureClientID, opts.SecretName, KeyAzureStorageClientID), @@ -200,6 +210,10 @@ func managedAuthCredentials(opts Options) []corev1.EnvVar { envVarFromSecret(EnvAzureSubscriptionID, opts.SecretName, KeyAzureStorageSubscriptionID), envVarFromValue(EnvAzureFederatedTokenFile, path.Join(azureTokenVolumeDirectory, "token")), } + case lokiv1.ObjectStorageSecretGCS: + return []corev1.EnvVar{ + envVarFromValue(EnvGoogleApplicationCredentials, path.Join(secretDirectory, KeyGCPServiceAccountKeyFilename)), + } default: return []corev1.EnvVar{} } @@ -280,6 +294,8 @@ func managedAuthEnabled(opts Options) bool { return opts.S3 != nil && opts.S3.STS case lokiv1.ObjectStorageSecretAzure: return opts.Azure != nil && opts.Azure.WorkloadIdentity + case lokiv1.ObjectStorageSecretGCS: + return opts.GCS != nil && opts.GCS.WorkloadIdentity default: return false } @@ -292,6 +308,8 @@ func saTokenVolumeMount(opts Options) corev1.VolumeMount { tokenPath = AWSTokenVolumeDirectory case lokiv1.ObjectStorageSecretAzure: tokenPath = azureTokenVolumeDirectory + case lokiv1.ObjectStorageSecretGCS: + tokenPath = gcpTokenVolumeDirectory } return corev1.VolumeMount{ Name: saTokenVolumeName, @@ -310,6 +328,11 @@ func saTokenVolume(opts Options) corev1.Volume { } case lokiv1.ObjectStorageSecretAzure: audience = azureDefaultAudience + if opts.Azure.Audience != "" { + audience = opts.Azure.Audience + } + case lokiv1.ObjectStorageSecretGCS: + audience = opts.GCS.Audience } return corev1.Volume{ Name: saTokenVolumeName, diff --git a/operator/internal/manifests/storage/configure_test.go b/operator/internal/manifests/storage/configure_test.go index 0c0505fbe63e5..f17a9af6c3524 100644 --- a/operator/internal/manifests/storage/configure_test.go +++ b/operator/internal/manifests/storage/configure_test.go @@ -293,10 +293,14 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, { - desc: "object storage GCS", + desc: "object storage Azure with WIF and custom audience", opts: Options{ SecretName: "test", - SharedStore: lokiv1.ObjectStorageSecretGCS, + SharedStore: lokiv1.ObjectStorageSecretAzure, + Azure: &AzureStorageConfig{ + WorkloadIdentity: true, + Audience: "custom-audience", + }, }, dpl: &appsv1.Deployment{ Spec: appsv1.DeploymentSpec{ @@ -324,11 +328,60 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { ReadOnly: false, MountPath: "/etc/storage/secrets", }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: "/var/run/secrets/azure/serviceaccount", + }, }, Env: []corev1.EnvVar{ { - Name: EnvGoogleApplicationCredentials, - Value: "/etc/storage/secrets/key.json", + Name: EnvAzureStorageAccountName, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAzureStorageAccountName, + }, + }, + }, + { + Name: EnvAzureClientID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAzureStorageClientID, + }, + }, + }, + { + Name: EnvAzureTenantID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAzureStorageTenantID, + }, + }, + }, + { + Name: EnvAzureSubscriptionID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAzureStorageSubscriptionID, + }, + }, + }, + { + Name: EnvAzureFederatedTokenFile, + Value: "/var/run/secrets/azure/serviceaccount/token", }, }, }, @@ -342,6 +395,22 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: "custom-audience", + ExpirationSeconds: ptr.To[int64](3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, }, }, }, @@ -349,10 +418,20 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, { - desc: "object storage S3", + desc: "object storage Azure with WIF and OpenShift Managed Credentials", opts: Options{ SecretName: "test", - SharedStore: lokiv1.ObjectStorageSecretS3, + SharedStore: lokiv1.ObjectStorageSecretAzure, + Azure: &AzureStorageConfig{ + WorkloadIdentity: true, + }, + OpenShift: OpenShiftOptions{ + Enabled: true, + CloudCredentials: CloudCredentials{ + SecretName: "cloud-credentials", + SHA1: "deadbeef", + }, + }, }, dpl: &appsv1.Deployment{ Spec: appsv1.DeploymentSpec{ @@ -380,30 +459,65 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { ReadOnly: false, MountPath: "/etc/storage/secrets", }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: "/var/run/secrets/azure/serviceaccount", + }, + { + Name: "cloud-credentials", + MountPath: managedAuthSecretDirectory, + }, }, Env: []corev1.EnvVar{ { - Name: EnvAWSAccessKeyID, + Name: EnvAzureStorageAccountName, ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ Name: "test", }, - Key: KeyAWSAccessKeyID, + Key: KeyAzureStorageAccountName, }, }, }, { - Name: EnvAWSAccessKeySecret, + Name: EnvAzureClientID, ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ - Name: "test", + Name: "cloud-credentials", }, - Key: KeyAWSAccessKeySecret, + Key: azureManagedCredentialKeyClientID, + }, + }, + }, + { + Name: EnvAzureTenantID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "cloud-credentials", + }, + Key: azureManagedCredentialKeyTenantID, }, }, }, + { + Name: EnvAzureSubscriptionID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "cloud-credentials", + }, + Key: azureManagedCredentialKeySubscriptionID, + }, + }, + }, + { + Name: EnvAzureFederatedTokenFile, + Value: "/var/run/secrets/azure/serviceaccount/token", + }, }, }, }, @@ -416,6 +530,30 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: azureDefaultAudience, + ExpirationSeconds: ptr.To[int64](3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, + { + Name: "cloud-credentials", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "cloud-credentials", + }, + }, + }, }, }, }, @@ -423,14 +561,10 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, { - desc: "object storage S3 in STS Mode", + desc: "object storage GCS", opts: Options{ SecretName: "test", - SharedStore: lokiv1.ObjectStorageSecretS3, - S3: &S3StorageConfig{ - STS: true, - Audience: "test", - }, + SharedStore: lokiv1.ObjectStorageSecretGCS, }, dpl: &appsv1.Deployment{ Spec: appsv1.DeploymentSpec{ @@ -458,27 +592,11 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { ReadOnly: false, MountPath: "/etc/storage/secrets", }, - { - Name: saTokenVolumeName, - ReadOnly: false, - MountPath: "/var/run/secrets/aws/serviceaccount", - }, }, Env: []corev1.EnvVar{ { - Name: EnvAWSRoleArn, - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: "test", - }, - Key: KeyAWSRoleArn, - }, - }, - }, - { - Name: "AWS_WEB_IDENTITY_TOKEN_FILE", - Value: "/var/run/secrets/aws/serviceaccount/token", + Name: EnvGoogleApplicationCredentials, + Value: "/etc/storage/secrets/key.json", }, }, }, @@ -492,22 +610,6 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, }, - { - Name: saTokenVolumeName, - VolumeSource: corev1.VolumeSource{ - Projected: &corev1.ProjectedVolumeSource{ - Sources: []corev1.VolumeProjection{ - { - ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ - Audience: "test", - ExpirationSeconds: ptr.To[int64](3600), - Path: corev1.ServiceAccountTokenKey, - }, - }, - }, - }, - }, - }, }, }, }, @@ -515,19 +617,13 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, { - desc: "object storage S3 in STS Mode in OpenShift", + desc: "object storage GCS with Workload Identity", opts: Options{ SecretName: "test", - SharedStore: lokiv1.ObjectStorageSecretS3, - S3: &S3StorageConfig{ - STS: true, - }, - OpenShift: OpenShiftOptions{ - Enabled: true, - CloudCredentials: CloudCredentials{ - SecretName: "cloud-credentials", - SHA1: "deadbeef", - }, + SharedStore: lokiv1.ObjectStorageSecretGCS, + GCS: &GCSStorageConfig{ + Audience: "test", + WorkloadIdentity: true, }, }, dpl: &appsv1.Deployment{ @@ -559,22 +655,13 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/aws/serviceaccount", - }, - { - Name: "cloud-credentials", - ReadOnly: false, - MountPath: "/etc/storage/managed-auth", + MountPath: "/var/run/secrets/gcp/serviceaccount", }, }, Env: []corev1.EnvVar{ { - Name: "AWS_SHARED_CREDENTIALS_FILE", - Value: "/etc/storage/managed-auth/credentials", - }, - { - Name: "AWS_SDK_LOAD_CONFIG", - Value: "true", + Name: EnvGoogleApplicationCredentials, + Value: "/etc/storage/secrets/key.json", }, }, }, @@ -595,7 +682,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { Sources: []corev1.VolumeProjection{ { ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ - Audience: awsDefaultAudience, + Audience: "test", ExpirationSeconds: ptr.To[int64](3600), Path: corev1.ServiceAccountTokenKey, }, @@ -604,14 +691,6 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, }, - { - Name: "cloud-credentials", - VolumeSource: corev1.VolumeSource{ - Secret: &corev1.SecretVolumeSource{ - SecretName: "cloud-credentials", - }, - }, - }, }, }, }, @@ -619,16 +698,10 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, { - desc: "object storage S3 with SSE KMS encryption context", + desc: "object storage S3", opts: Options{ SecretName: "test", SharedStore: lokiv1.ObjectStorageSecretS3, - S3: &S3StorageConfig{ - SSE: S3SSEConfig{ - Type: SSEKMSType, - KMSEncryptionContext: "test", - }, - }, }, dpl: &appsv1.Deployment{ Spec: appsv1.DeploymentSpec{ @@ -680,17 +753,6 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, }, - { - Name: EnvAWSSseKmsEncryptionContext, - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: "test", - }, - Key: KeyAWSSseKmsEncryptionContext, - }, - }, - }, }, }, }, @@ -710,14 +772,673 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, { - desc: "object storage Swift", + desc: "object storage S3 in STS Mode", opts: Options{ SecretName: "test", - SharedStore: lokiv1.ObjectStorageSecretSwift, - }, - dpl: &appsv1.Deployment{ - Spec: appsv1.DeploymentSpec{ - Template: corev1.PodTemplateSpec{ + SharedStore: lokiv1.ObjectStorageSecretS3, + S3: &S3StorageConfig{ + STS: true, + Audience: "test", + }, + }, + dpl: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: "/var/run/secrets/aws/serviceaccount", + }, + }, + Env: []corev1.EnvVar{ + { + Name: EnvAWSRoleArn, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAWSRoleArn, + }, + }, + }, + { + Name: "AWS_WEB_IDENTITY_TOKEN_FILE", + Value: "/var/run/secrets/aws/serviceaccount/token", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "test", + }, + }, + }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: "test", + ExpirationSeconds: ptr.To[int64](3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + desc: "object storage S3 in STS Mode in OpenShift", + opts: Options{ + SecretName: "test", + SharedStore: lokiv1.ObjectStorageSecretS3, + S3: &S3StorageConfig{ + STS: true, + }, + OpenShift: OpenShiftOptions{ + Enabled: true, + CloudCredentials: CloudCredentials{ + SecretName: "cloud-credentials", + SHA1: "deadbeef", + }, + }, + }, + dpl: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: "/var/run/secrets/aws/serviceaccount", + }, + { + Name: "cloud-credentials", + ReadOnly: false, + MountPath: "/etc/storage/managed-auth", + }, + }, + Env: []corev1.EnvVar{ + { + Name: "AWS_SHARED_CREDENTIALS_FILE", + Value: "/etc/storage/managed-auth/credentials", + }, + { + Name: "AWS_SDK_LOAD_CONFIG", + Value: "true", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "test", + }, + }, + }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: awsDefaultAudience, + ExpirationSeconds: ptr.To[int64](3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, + { + Name: "cloud-credentials", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "cloud-credentials", + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + desc: "object storage S3 with SSE KMS encryption context", + opts: Options{ + SecretName: "test", + SharedStore: lokiv1.ObjectStorageSecretS3, + S3: &S3StorageConfig{ + SSE: S3SSEConfig{ + Type: SSEKMSType, + KMSEncryptionContext: "test", + }, + }, + }, + dpl: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + }, + Env: []corev1.EnvVar{ + { + Name: EnvAWSAccessKeyID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAWSAccessKeyID, + }, + }, + }, + { + Name: EnvAWSAccessKeySecret, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAWSAccessKeySecret, + }, + }, + }, + { + Name: EnvAWSSseKmsEncryptionContext, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAWSSseKmsEncryptionContext, + }, + }, + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "test", + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + desc: "object storage Swift", + opts: Options{ + SecretName: "test", + SharedStore: lokiv1.ObjectStorageSecretSwift, + }, + dpl: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + }, + Env: []corev1.EnvVar{ + { + Name: EnvSwiftUsername, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeySwiftUsername, + }, + }, + }, + { + Name: EnvSwiftPassword, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeySwiftPassword, + }, + }, + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "test", + }, + }, + }, + }, + }, + }, + }, + }, + }, + } + + for _, tc := range tc { + tc := tc + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + err := ConfigureDeployment(tc.dpl, tc.opts) + require.NoError(t, err) + require.Equal(t, tc.want, tc.dpl) + }) + } +} + +func TestConfigureStatefulSetForStorageType(t *testing.T) { + type tt struct { + desc string + opts Options + sts *appsv1.StatefulSet + want *appsv1.StatefulSet + } + + tc := []tt{ + { + desc: "object storage AlibabaCloud", + opts: Options{ + SecretName: "test", + SharedStore: lokiv1.ObjectStorageSecretAlibabaCloud, + }, + sts: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + }, + Env: []corev1.EnvVar{ + { + Name: EnvAlibabaCloudAccessKeyID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAlibabaCloudAccessKeyID, + }, + }, + }, + { + Name: EnvAlibabaCloudAccessKeySecret, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAlibabaCloudSecretAccessKey, + }, + }, + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "test", + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + desc: "object storage Azure", + opts: Options{ + SecretName: "test", + SharedStore: lokiv1.ObjectStorageSecretAzure, + }, + sts: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + }, + Env: []corev1.EnvVar{ + { + Name: EnvAzureStorageAccountName, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAzureStorageAccountName, + }, + }, + }, + { + Name: EnvAzureStorageAccountKey, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAzureStorageAccountKey, + }, + }, + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "test", + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + desc: "object storage Azure with WIF", + opts: Options{ + SecretName: "test", + SharedStore: lokiv1.ObjectStorageSecretAzure, + Azure: &AzureStorageConfig{ + WorkloadIdentity: true, + }, + }, + sts: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: "/var/run/secrets/azure/serviceaccount", + }, + }, + Env: []corev1.EnvVar{ + { + Name: EnvAzureStorageAccountName, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAzureStorageAccountName, + }, + }, + }, + { + Name: EnvAzureClientID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAzureStorageClientID, + }, + }, + }, + { + Name: EnvAzureTenantID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAzureStorageTenantID, + }, + }, + }, + { + Name: EnvAzureSubscriptionID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAzureStorageSubscriptionID, + }, + }, + }, + { + Name: EnvAzureFederatedTokenFile, + Value: "/var/run/secrets/azure/serviceaccount/token", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "test", + }, + }, + }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: azureDefaultAudience, + ExpirationSeconds: ptr.To[int64](3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + desc: "object storage Azure with WIF and custom audience", + opts: Options{ + SecretName: "test", + SharedStore: lokiv1.ObjectStorageSecretAzure, + Azure: &AzureStorageConfig{ + WorkloadIdentity: true, + Audience: "custom-audience", + }, + }, + sts: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ Containers: []corev1.Container{ { @@ -728,8 +1449,8 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, }, - want: &appsv1.Deployment{ - Spec: appsv1.DeploymentSpec{ + want: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ Containers: []corev1.Container{ @@ -741,30 +1462,61 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { ReadOnly: false, MountPath: "/etc/storage/secrets", }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: "/var/run/secrets/azure/serviceaccount", + }, }, Env: []corev1.EnvVar{ { - Name: EnvSwiftUsername, + Name: EnvAzureStorageAccountName, ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ Name: "test", }, - Key: KeySwiftUsername, + Key: KeyAzureStorageAccountName, }, }, }, { - Name: EnvSwiftPassword, + Name: EnvAzureClientID, ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ Name: "test", }, - Key: KeySwiftPassword, + Key: KeyAzureStorageClientID, + }, + }, + }, + { + Name: EnvAzureTenantID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAzureStorageTenantID, }, }, }, + { + Name: EnvAzureSubscriptionID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAzureStorageSubscriptionID, + }, + }, + }, + { + Name: EnvAzureFederatedTokenFile, + Value: "/var/run/secrets/azure/serviceaccount/token", + }, }, }, }, @@ -777,39 +1529,43 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: "custom-audience", + ExpirationSeconds: ptr.To[int64](3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, }, }, }, }, }, }, - } - - for _, tc := range tc { - tc := tc - t.Run(tc.desc, func(t *testing.T) { - t.Parallel() - err := ConfigureDeployment(tc.dpl, tc.opts) - require.NoError(t, err) - require.Equal(t, tc.want, tc.dpl) - }) - } -} - -func TestConfigureStatefulSetForStorageType(t *testing.T) { - type tt struct { - desc string - opts Options - sts *appsv1.StatefulSet - want *appsv1.StatefulSet - } - - tc := []tt{ { - desc: "object storage AlibabaCloud", + desc: "object storage Azure with WIF and OpenShift Managed Credentials", opts: Options{ SecretName: "test", - SharedStore: lokiv1.ObjectStorageSecretAlibabaCloud, + SharedStore: lokiv1.ObjectStorageSecretAzure, + Azure: &AzureStorageConfig{ + WorkloadIdentity: true, + }, + OpenShift: OpenShiftOptions{ + Enabled: true, + CloudCredentials: CloudCredentials{ + SecretName: "cloud-credentials", + SHA1: "deadbeef", + }, + }, }, sts: &appsv1.StatefulSet{ Spec: appsv1.StatefulSetSpec{ @@ -837,30 +1593,65 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { ReadOnly: false, MountPath: "/etc/storage/secrets", }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: "/var/run/secrets/azure/serviceaccount", + }, + { + Name: "cloud-credentials", + MountPath: managedAuthSecretDirectory, + }, }, Env: []corev1.EnvVar{ { - Name: EnvAlibabaCloudAccessKeyID, + Name: EnvAzureStorageAccountName, ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ Name: "test", }, - Key: KeyAlibabaCloudAccessKeyID, + Key: KeyAzureStorageAccountName, }, }, }, { - Name: EnvAlibabaCloudAccessKeySecret, + Name: EnvAzureClientID, ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ - Name: "test", + Name: "cloud-credentials", }, - Key: KeyAlibabaCloudSecretAccessKey, + Key: azureManagedCredentialKeyClientID, + }, + }, + }, + { + Name: EnvAzureTenantID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "cloud-credentials", + }, + Key: azureManagedCredentialKeyTenantID, + }, + }, + }, + { + Name: EnvAzureSubscriptionID, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "cloud-credentials", + }, + Key: azureManagedCredentialKeySubscriptionID, }, }, }, + { + Name: EnvAzureFederatedTokenFile, + Value: "/var/run/secrets/azure/serviceaccount/token", + }, }, }, }, @@ -873,6 +1664,30 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, }, }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: azureDefaultAudience, + ExpirationSeconds: ptr.To[int64](3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, + { + Name: "cloud-credentials", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "cloud-credentials", + }, + }, + }, }, }, }, @@ -880,10 +1695,10 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, }, { - desc: "object storage Azure", + desc: "object storage GCS", opts: Options{ SecretName: "test", - SharedStore: lokiv1.ObjectStorageSecretAzure, + SharedStore: lokiv1.ObjectStorageSecretGCS, }, sts: &appsv1.StatefulSet{ Spec: appsv1.StatefulSetSpec{ @@ -914,26 +1729,8 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, Env: []corev1.EnvVar{ { - Name: EnvAzureStorageAccountName, - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: "test", - }, - Key: KeyAzureStorageAccountName, - }, - }, - }, - { - Name: EnvAzureStorageAccountKey, - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: "test", - }, - Key: KeyAzureStorageAccountKey, - }, - }, + Name: EnvGoogleApplicationCredentials, + Value: "/etc/storage/secrets/key.json", }, }, }, @@ -954,10 +1751,14 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, }, { - desc: "object storage GCS", + desc: "object storage GCS with Workload Identity", opts: Options{ SecretName: "test", SharedStore: lokiv1.ObjectStorageSecretGCS, + GCS: &GCSStorageConfig{ + Audience: "test", + WorkloadIdentity: true, + }, }, sts: &appsv1.StatefulSet{ Spec: appsv1.StatefulSetSpec{ @@ -985,6 +1786,11 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { ReadOnly: false, MountPath: "/etc/storage/secrets", }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: "/var/run/secrets/gcp/serviceaccount", + }, }, Env: []corev1.EnvVar{ { @@ -1003,6 +1809,22 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, }, }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: "test", + ExpirationSeconds: ptr.To[int64](3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, }, }, }, diff --git a/operator/internal/manifests/storage/options.go b/operator/internal/manifests/storage/options.go index 86aa494318519..e525640da6c0c 100644 --- a/operator/internal/manifests/storage/options.go +++ b/operator/internal/manifests/storage/options.go @@ -28,12 +28,15 @@ type AzureStorageConfig struct { Env string Container string EndpointSuffix string + Audience string WorkloadIdentity bool } // GCSStorageConfig for GCS storage config type GCSStorageConfig struct { - Bucket string + Bucket string + Audience string + WorkloadIdentity bool } // S3StorageConfig for S3 storage config diff --git a/operator/internal/manifests/storage/var.go b/operator/internal/manifests/storage/var.go index 6184cff0463da..49ec0b0a16ae1 100644 --- a/operator/internal/manifests/storage/var.go +++ b/operator/internal/manifests/storage/var.go @@ -86,7 +86,13 @@ const ( KeyAzureStorageEndpointSuffix = "endpoint_suffix" // KeyAzureEnvironmentName is the secret data key for the Azure cloud environment name. KeyAzureEnvironmentName = "environment" + // KeyAzureRegion is the secret data key for storing the Azure cloud region. + KeyAzureRegion = "region" + // KeyAzureAudience is the secret data key for customizing the audience used for the ServiceAccount token. + KeyAzureAudience = "audience" + // KeyGCPWorkloadIdentityProviderAudience is the secret data key for the GCP Workload Identity Provider audience. + KeyGCPWorkloadIdentityProviderAudience = "audience" // KeyGCPStorageBucketName is the secret data key for the GCS bucket name. KeyGCPStorageBucketName = "bucketname" // KeyGCPServiceAccountKeyFilename is the service account key filename containing the Google authentication credentials. @@ -136,5 +142,12 @@ const ( azureDefaultAudience = "api://AzureADTokenExchange" azureTokenVolumeDirectory = "/var/run/secrets/azure/serviceaccount" + azureManagedCredentialKeyClientID = "azure_client_id" + azureManagedCredentialKeyTenantID = "azure_tenant_id" + azureManagedCredentialKeySubscriptionID = "azure_subscription_id" + + gcpTokenVolumeDirectory = "/var/run/secrets/gcp/serviceaccount" + GCPDefautCredentialsFile = gcpTokenVolumeDirectory + "/token" + AnnotationCredentialsRequestsSecretRef = "loki.grafana.com/credentials-request-secret-ref" ) diff --git a/pkg/bloomcompactor/bloomcompactor.go b/pkg/bloomcompactor/bloomcompactor.go index cf3b3fafcb6d1..8a3e7c6266c1d 100644 --- a/pkg/bloomcompactor/bloomcompactor.go +++ b/pkg/bloomcompactor/bloomcompactor.go @@ -2,23 +2,25 @@ package bloomcompactor import ( "context" - "fmt" + "math" + "sync" "time" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/backoff" + "github.com/grafana/dskit/concurrency" "github.com/grafana/dskit/multierror" "github.com/grafana/dskit/services" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" - "github.com/grafana/loki/pkg/bloomutils" - "github.com/grafana/loki/pkg/compactor" + "github.com/grafana/loki/pkg/storage" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/storage/stores" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" - "github.com/grafana/loki/pkg/util" ) /* @@ -33,38 +35,74 @@ Bloom-compactor regularly runs to check for changes in meta.jsons and runs compa type Compactor struct { services.Service - cfg Config - logger log.Logger - limits Limits + cfg Config + schemaCfg config.SchemaConfig + logger log.Logger + limits Limits - // temporary workaround until store has implemented read/write shipper interface - store bloomshipper.Store + tsdbStore TSDBStore + // TODO(owen-d): ShardingStrategy + controller *SimpleBloomController + + // temporary workaround until bloomStore has implemented read/write shipper interface + bloomStore bloomshipper.Store sharding ShardingStrategy - metrics *metrics + metrics *Metrics btMetrics *v1.Metrics } func New( cfg Config, - store bloomshipper.Store, + schemaCfg config.SchemaConfig, + storeCfg storage.Config, + clientMetrics storage.ClientMetrics, + fetcherProvider stores.ChunkFetcherProvider, sharding ShardingStrategy, limits Limits, logger log.Logger, r prometheus.Registerer, ) (*Compactor, error) { c := &Compactor{ - cfg: cfg, - store: store, - logger: logger, - sharding: sharding, - limits: limits, + cfg: cfg, + schemaCfg: schemaCfg, + logger: logger, + sharding: sharding, + limits: limits, + } + + tsdbStore, err := NewTSDBStores(schemaCfg, storeCfg, clientMetrics) + if err != nil { + return nil, errors.Wrap(err, "failed to create TSDB store") + } + c.tsdbStore = tsdbStore + + // TODO(owen-d): export bloomstore as a dependency that can be reused by the compactor & gateway rather that + bloomStore, err := bloomshipper.NewBloomStore(schemaCfg.Configs, storeCfg, clientMetrics, nil, nil, logger) + if err != nil { + return nil, errors.Wrap(err, "failed to create bloom store") } + c.bloomStore = bloomStore // initialize metrics c.btMetrics = v1.NewMetrics(prometheus.WrapRegistererWithPrefix("loki_bloom_tokenizer", r)) - c.metrics = newMetrics(r) + c.metrics = NewMetrics(r, c.btMetrics) + + chunkLoader := NewStoreChunkLoader( + NewFetcherProviderAdapter(fetcherProvider), + c.metrics, + ) + + c.controller = NewSimpleBloomController( + c.tsdbStore, + c.bloomStore, + chunkLoader, + c.limits, + c.metrics, + c.logger, + ) + c.metrics.compactionRunInterval.Set(cfg.CompactionInterval.Seconds()) c.Service = services.NewBasicService(c.starting, c.running, c.stopping) @@ -76,192 +114,193 @@ func (c *Compactor) starting(_ context.Context) (err error) { return err } +func (c *Compactor) stopping(_ error) error { + c.metrics.compactorRunning.Set(0) + return nil +} + func (c *Compactor) running(ctx context.Context) error { - // Run an initial compaction before starting the interval. - if err := c.runCompaction(ctx); err != nil { - level.Error(c.logger).Log("msg", "failed to run compaction", "err", err) + // run once at beginning + if err := c.runOne(ctx); err != nil { + return err } - ticker := time.NewTicker(util.DurationWithJitter(c.cfg.CompactionInterval, 0.05)) + ticker := time.NewTicker(c.cfg.CompactionInterval) defer ticker.Stop() - for { select { - case start := <-ticker.C: - c.metrics.compactionRunsStarted.Inc() - if err := c.runCompaction(ctx); err != nil { - c.metrics.compactionRunsCompleted.WithLabelValues(statusFailure).Inc() - c.metrics.compactionRunTime.WithLabelValues(statusFailure).Observe(time.Since(start).Seconds()) - level.Error(c.logger).Log("msg", "failed to run compaction", "err", err) - continue - } - c.metrics.compactionRunsCompleted.WithLabelValues(statusSuccess).Inc() - c.metrics.compactionRunTime.WithLabelValues(statusSuccess).Observe(time.Since(start).Seconds()) case <-ctx.Done(): - return nil + return ctx.Err() + + case <-ticker.C: + if err := c.runOne(ctx); err != nil { + level.Error(c.logger).Log("msg", "compaction iteration failed", "err", err) + return err + } } } } -func (c *Compactor) stopping(_ error) error { - c.metrics.compactorRunning.Set(0) - return nil -} - -func (c *Compactor) runCompaction(ctx context.Context) error { - var tables []string - // TODO(owen-d): resolve tables +func runWithRetries( + ctx context.Context, + minBackoff, maxBackoff time.Duration, + maxRetries int, + f func(ctx context.Context) error, +) error { + var lastErr error - // process most recent tables first - tablesIntervals := getIntervalsForTables(tables) - compactor.SortTablesByRange(tables) + retries := backoff.New(ctx, backoff.Config{ + MinBackoff: minBackoff, + MaxBackoff: maxBackoff, + MaxRetries: maxRetries, + }) - // TODO(owen-d): parallelize at the bottom level, not the top level. - // Can dispatch to a queue & wait. - for _, table := range tables { - logger := log.With(c.logger, "table", table) - err := c.compactTable(ctx, logger, table, tablesIntervals[table]) - if err != nil { - level.Error(logger).Log("msg", "failed to compact table", "err", err) - return errors.Wrapf(err, "failed to compact table %s", table) + for retries.Ongoing() { + lastErr = f(ctx) + if lastErr == nil { + return nil } + + retries.Wait() } - return nil + + return lastErr +} + +type tenantTable struct { + tenant string + table DayTable + ownershipRange v1.FingerprintBounds } -func (c *Compactor) compactTable(ctx context.Context, logger log.Logger, tableName string, tableInterval model.Interval) error { - // Ensure the context has not been canceled (ie. compactor shutdown has been triggered). - if err := ctx.Err(); err != nil { - return fmt.Errorf("interrupting compaction of table: %w", err) +func (c *Compactor) tenants(ctx context.Context, table DayTable) (v1.Iterator[string], error) { + tenants, err := c.tsdbStore.UsersForPeriod(ctx, table) + if err != nil { + return nil, errors.Wrap(err, "getting tenants") } - var tenants []string + return v1.NewSliceIter(tenants), nil +} - level.Info(logger).Log("msg", "discovered tenants from bucket", "users", len(tenants)) - return c.compactUsers(ctx, logger, tableName, tableInterval, tenants) +// TODO(owen-d): implement w/ subrings +func (c *Compactor) ownsTenant(_ string) (ownershipRange v1.FingerprintBounds, owns bool) { + return v1.NewBounds(0, math.MaxUint64), true } -func (c *Compactor) compactUsers(ctx context.Context, logger log.Logger, tableName string, tableInterval model.Interval, tenants []string) error { - // Keep track of tenants owned by this shard, so that we can delete the local files for all other users. - errs := multierror.New() - ownedTenants := make(map[string]struct{}, len(tenants)) - for _, tenant := range tenants { - tenantLogger := log.With(logger, "tenant", tenant) +// runs a single round of compaction for all relevant tenants and tables +func (c *Compactor) runOne(ctx context.Context) error { + var workersErr error + var wg sync.WaitGroup + ch := make(chan tenantTable) + wg.Add(1) + go func() { + workersErr = c.runWorkers(ctx, ch) + wg.Done() + }() + + err := c.loadWork(ctx, ch) + + wg.Wait() + return multierror.New(workersErr, err, ctx.Err()).Err() +} - // Ensure the context has not been canceled (ie. compactor shutdown has been triggered). - if err := ctx.Err(); err != nil { - return fmt.Errorf("interrupting compaction of tenants: %w", err) - } +func (c *Compactor) tables(ts time.Time) *dayRangeIterator { + from := model.TimeFromUnixNano(ts.Add(-c.cfg.MaxCompactionAge).UnixNano() / int64(config.ObjectStorageIndexRequiredPeriod)) + through := model.TimeFromUnixNano(ts.Add(-c.cfg.MinCompactionAge).UnixNano() / int64(config.ObjectStorageIndexRequiredPeriod)) + return newDayRangeIterator(DayTable(from), DayTable(through)) +} - // Skip tenant if compaction is not enabled - if !c.limits.BloomCompactorEnabled(tenant) { - level.Info(tenantLogger).Log("msg", "compaction disabled for tenant. Skipping.") - continue - } +func (c *Compactor) loadWork(ctx context.Context, ch chan<- tenantTable) error { + tables := c.tables(time.Now()) - // Skip this table if it is too old for the tenant limits. - now := model.Now() - tableMaxAge := c.limits.BloomCompactorMaxTableAge(tenant) - if tableMaxAge > 0 && tableInterval.Start.Before(now.Add(-tableMaxAge)) { - level.Debug(tenantLogger).Log("msg", "skipping tenant because table is too old", "table-max-age", tableMaxAge, "table-start", tableInterval.Start, "now", now) - continue - } + for tables.Next() && tables.Err() == nil && ctx.Err() == nil { - // Ensure the tenant ID belongs to our shard. - if !c.sharding.OwnsTenant(tenant) { - c.metrics.compactionRunSkippedTenants.Inc() - level.Debug(tenantLogger).Log("msg", "skipping tenant because it is not owned by this shard") - continue + table := tables.At() + tenants, err := c.tenants(ctx, table) + if err != nil { + return errors.Wrap(err, "getting tenants") } - ownedTenants[tenant] = struct{}{} - - start := time.Now() - if err := c.compactTenantWithRetries(ctx, tenantLogger, tableName, tenant); err != nil { - switch { - case errors.Is(err, context.Canceled): - // We don't want to count shutdowns as failed compactions because we will pick up with the rest of the compaction after the restart. - level.Info(tenantLogger).Log("msg", "compaction for tenant was interrupted by a shutdown") - return nil - default: - c.metrics.compactionRunTenantsCompleted.WithLabelValues(statusFailure).Inc() - c.metrics.compactionRunTenantsTime.WithLabelValues(statusFailure).Observe(time.Since(start).Seconds()) - level.Error(tenantLogger).Log("msg", "failed to compact tenant", "err", err) - errs.Add(err) + for tenants.Next() && tenants.Err() == nil && ctx.Err() == nil { + tenant := tenants.At() + ownershipRange, owns := c.ownsTenant(tenant) + if !owns { + continue + } + + select { + case ch <- tenantTable{tenant: tenant, table: table, ownershipRange: ownershipRange}: + case <-ctx.Done(): + return ctx.Err() } - continue } - c.metrics.compactionRunTenantsCompleted.WithLabelValues(statusSuccess).Inc() - c.metrics.compactionRunTenantsTime.WithLabelValues(statusSuccess).Observe(time.Since(start).Seconds()) - level.Info(tenantLogger).Log("msg", "successfully compacted tenant") + if err := tenants.Err(); err != nil { + return errors.Wrap(err, "iterating tenants") + } + } - return errs.Err() + if err := tables.Err(); err != nil { + return errors.Wrap(err, "iterating tables") + } - // TODO: Delete local files for unowned tenants, if there are any. + close(ch) + return ctx.Err() } -func (c *Compactor) compactTenant(ctx context.Context, logger log.Logger, _ string, tenant string) error { - level.Info(logger).Log("msg", "starting compaction of tenant") - - // Ensure the context has not been canceled (ie. compactor shutdown has been triggered). - if err := ctx.Err(); err != nil { - return err - } - - // Tokenizer is not thread-safe so we need one per goroutine. - nGramLen := c.limits.BloomNGramLength(tenant) - nGramSkip := c.limits.BloomNGramSkip(tenant) - _ = v1.NewBloomTokenizer(nGramLen, nGramSkip, c.btMetrics) +func (c *Compactor) runWorkers(ctx context.Context, ch <-chan tenantTable) error { + + return concurrency.ForEachJob(ctx, c.cfg.WorkerParallelism, c.cfg.WorkerParallelism, func(ctx context.Context, idx int) error { + + for { + select { + case <-ctx.Done(): + return ctx.Err() + + case tt, ok := <-ch: + if !ok { + return nil + } + + if err := c.compactTenantTable(ctx, tt); err != nil { + return errors.Wrapf( + err, + "compacting tenant table (%s) for tenant (%s) with ownership (%s)", + tt.table, + tt.tenant, + tt.ownershipRange, + ) + } + } + } - rs, err := c.sharding.GetTenantSubRing(tenant).GetAllHealthy(RingOp) - if err != nil { - return err - } - tokenRanges := bloomutils.GetInstanceWithTokenRange(c.cfg.Ring.InstanceID, rs.Instances) - for _, tr := range tokenRanges { - level.Debug(logger).Log("msg", "got token range for instance", "id", tr.Instance.Id, "min", tr.MinToken, "max", tr.MaxToken) - } + }) - // TODO(owen-d): impl - return nil } -func runWithRetries( - ctx context.Context, - minBackoff, maxBackoff time.Duration, - maxRetries int, - f func(ctx context.Context) error, -) error { - var lastErr error +func (c *Compactor) compactTenantTable(ctx context.Context, tt tenantTable) error { + level.Info(c.logger).Log("msg", "compacting", "org_id", tt.tenant, "table", tt.table, "ownership", tt.ownershipRange) + return c.controller.buildBlocks(ctx, tt.table, tt.tenant, tt.ownershipRange) +} - retries := backoff.New(ctx, backoff.Config{ - MinBackoff: minBackoff, - MaxBackoff: maxBackoff, - MaxRetries: maxRetries, - }) +type dayRangeIterator struct { + min, max, cur DayTable +} - for retries.Ongoing() { - lastErr = f(ctx) - if lastErr == nil { - return nil - } +func newDayRangeIterator(min, max DayTable) *dayRangeIterator { + return &dayRangeIterator{min: min, max: max, cur: min.Dec()} +} - retries.Wait() - } +func (r *dayRangeIterator) Next() bool { + r.cur = r.cur.Inc() + return r.cur.Before(r.max) +} - return lastErr +func (r *dayRangeIterator) At() DayTable { + return r.cur } -func (c *Compactor) compactTenantWithRetries(ctx context.Context, logger log.Logger, tableName string, tenant string) error { - return runWithRetries( - ctx, - c.cfg.RetryMinBackoff, - c.cfg.RetryMaxBackoff, - c.cfg.CompactionRetries, - func(ctx context.Context) error { - return c.compactTenant(ctx, logger, tableName, tenant) - }, - ) +func (r *dayRangeIterator) Err() error { + return nil } diff --git a/pkg/bloomcompactor/config.go b/pkg/bloomcompactor/config.go index 884034fdd043d..37aac3310829a 100644 --- a/pkg/bloomcompactor/config.go +++ b/pkg/bloomcompactor/config.go @@ -2,8 +2,13 @@ package bloomcompactor import ( "flag" + "fmt" "time" + "github.com/prometheus/common/model" + + "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/downloads" "github.com/grafana/loki/pkg/util/ring" ) @@ -18,10 +23,12 @@ type Config struct { Enabled bool `yaml:"enabled"` WorkingDirectory string `yaml:"working_directory"` CompactionInterval time.Duration `yaml:"compaction_interval"` - - RetryMinBackoff time.Duration `yaml:"compaction_retries_min_backoff"` - RetryMaxBackoff time.Duration `yaml:"compaction_retries_max_backoff"` - CompactionRetries int `yaml:"compaction_retries"` + MinCompactionAge time.Duration `yaml:"min_compaction_age"` + MaxCompactionAge time.Duration `yaml:"max_compaction_age"` + WorkerParallelism int `yaml:"worker_parallelism"` + RetryMinBackoff time.Duration `yaml:"compaction_retries_min_backoff"` + RetryMaxBackoff time.Duration `yaml:"compaction_retries_max_backoff"` + CompactionRetries int `yaml:"compaction_retries"` MaxCompactionParallelism int `yaml:"max_compaction_parallelism"` } @@ -32,6 +39,15 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.BoolVar(&cfg.Enabled, "bloom-compactor.enabled", false, "Flag to enable or disable the usage of the bloom-compactor component.") f.StringVar(&cfg.WorkingDirectory, "bloom-compactor.working-directory", "", "Directory where files can be downloaded for compaction.") f.DurationVar(&cfg.CompactionInterval, "bloom-compactor.compaction-interval", 10*time.Minute, "Interval at which to re-run the compaction operation.") + f.IntVar(&cfg.WorkerParallelism, "bloom-compactor.worker-parallelism", 1, "Number of workers to run in parallel for compaction.") + f.DurationVar(&cfg.MinCompactionAge, "bloom-compactor.min-compaction-age", 24*time.Hour, "Minimum age of a table before it is considered for compaction.") + // TODO(owen-d): ideally we'd set this per tenant based on their `reject_old_samples_max_age` setting, + // but due to how we need to discover tenants, we can't do that yet. Tenant+Period discovery is done by + // iterating the table periods in object storage and looking for tenants within that period. + // In order to have this done dynamically, we'd need to account for tenant specific overrides, which are also + // dynamically reloaded. + // I'm doing it the simple way for now. + f.DurationVar(&cfg.MaxCompactionAge, "bloom-compactor.max-compaction-age", 7*24*time.Hour, "Maximum age of a table before it is considered for compaction.") f.DurationVar(&cfg.RetryMinBackoff, "bloom-compactor.compaction-retries-min-backoff", 10*time.Second, "Minimum backoff time between retries.") f.DurationVar(&cfg.RetryMaxBackoff, "bloom-compactor.compaction-retries-max-backoff", time.Minute, "Maximum backoff time between retries.") f.IntVar(&cfg.CompactionRetries, "bloom-compactor.compaction-retries", 3, "Number of retries to perform when compaction fails.") @@ -47,4 +63,39 @@ type Limits interface { BloomNGramLength(tenantID string) int BloomNGramSkip(tenantID string) int BloomFalsePositiveRate(tenantID string) float64 + BloomCompactorMaxBlockSize(tenantID string) int +} + +// TODO(owen-d): Remove this type in favor of config.DayTime +type DayTable model.Time + +func (d DayTable) String() string { + return fmt.Sprintf("%d", d.ModelTime().Time().UnixNano()/int64(config.ObjectStorageIndexRequiredPeriod)) +} + +func (d DayTable) Inc() DayTable { + return DayTable(d.ModelTime().Add(config.ObjectStorageIndexRequiredPeriod)) +} + +func (d DayTable) Dec() DayTable { + return DayTable(d.ModelTime().Add(-config.ObjectStorageIndexRequiredPeriod)) +} + +func (d DayTable) Before(other DayTable) bool { + return d.ModelTime().Before(model.Time(other)) +} + +func (d DayTable) After(other DayTable) bool { + return d.ModelTime().After(model.Time(other)) +} + +func (d DayTable) ModelTime() model.Time { + return model.Time(d) +} + +func (d DayTable) Bounds() bloomshipper.Interval { + return bloomshipper.Interval{ + Start: model.Time(d), + End: model.Time(d.Inc()), + } } diff --git a/pkg/bloomcompactor/controller.go b/pkg/bloomcompactor/controller.go index c9b3ecae35c2c..cf6fff090f0ae 100644 --- a/pkg/bloomcompactor/controller.go +++ b/pkg/bloomcompactor/controller.go @@ -1,12 +1,14 @@ package bloomcompactor import ( + "bytes" "context" "fmt" "sort" "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/grafana/dskit/multierror" "github.com/pkg/errors" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" @@ -15,60 +17,58 @@ import ( ) type SimpleBloomController struct { - ownershipRange v1.FingerprintBounds // ownership range of this controller - tsdbStore TSDBStore - metaStore MetaStore - blockStore BlockStore - chunkLoader ChunkLoader - rwFn func() (v1.BlockWriter, v1.BlockReader) - metrics *Metrics + tsdbStore TSDBStore + bloomStore bloomshipper.Store + chunkLoader ChunkLoader + metrics *Metrics + limits Limits // TODO(owen-d): add metrics logger log.Logger } func NewSimpleBloomController( - ownershipRange v1.FingerprintBounds, tsdbStore TSDBStore, - metaStore MetaStore, - blockStore BlockStore, + blockStore bloomshipper.Store, chunkLoader ChunkLoader, - rwFn func() (v1.BlockWriter, v1.BlockReader), + limits Limits, metrics *Metrics, logger log.Logger, ) *SimpleBloomController { return &SimpleBloomController{ - ownershipRange: ownershipRange, - tsdbStore: tsdbStore, - metaStore: metaStore, - blockStore: blockStore, - chunkLoader: chunkLoader, - rwFn: rwFn, - metrics: metrics, - logger: log.With(logger, "ownership", ownershipRange), + tsdbStore: tsdbStore, + bloomStore: blockStore, + chunkLoader: chunkLoader, + metrics: metrics, + limits: limits, + logger: logger, } } -func (s *SimpleBloomController) do(ctx context.Context) error { +// TODO(owen-d): pool, evaluate if memory-only is the best choice +func (s *SimpleBloomController) rwFn() (v1.BlockWriter, v1.BlockReader) { + indexBuf := bytes.NewBuffer(nil) + bloomsBuf := bytes.NewBuffer(nil) + return v1.NewMemoryBlockWriter(indexBuf, bloomsBuf), v1.NewByteReader(indexBuf, bloomsBuf) +} + +func (s *SimpleBloomController) buildBlocks( + ctx context.Context, + table DayTable, + tenant string, + ownershipRange v1.FingerprintBounds, +) error { + logger := log.With(s.logger, "ownership", ownershipRange, "org_id", tenant, "table", table) + // 1. Resolve TSDBs - tsdbs, err := s.tsdbStore.ResolveTSDBs() + tsdbs, err := s.tsdbStore.ResolveTSDBs(ctx, table, tenant) if err != nil { - level.Error(s.logger).Log("msg", "failed to resolve tsdbs", "err", err) + level.Error(logger).Log("msg", "failed to resolve tsdbs", "err", err) return errors.Wrap(err, "failed to resolve tsdbs") } - // 2. Resolve Metas - metaRefs, err := s.metaStore.ResolveMetas(s.ownershipRange) - if err != nil { - level.Error(s.logger).Log("msg", "failed to resolve metas", "err", err) - return errors.Wrap(err, "failed to resolve metas") - } - - // 3. Fetch metas - metas, err := s.metaStore.GetMetas(metaRefs) - if err != nil { - level.Error(s.logger).Log("msg", "failed to get metas", "err", err) - return errors.Wrap(err, "failed to get metas") + if len(tsdbs) == 0 { + return nil } ids := make([]tsdb.Identifier, 0, len(tsdbs)) @@ -76,26 +76,49 @@ func (s *SimpleBloomController) do(ctx context.Context) error { ids = append(ids, id) } - // 4. Determine which TSDBs have gaps in the ownership range and need to + // 2. Fetch metas + bounds := table.Bounds() + metas, err := s.bloomStore.FetchMetas( + ctx, + bloomshipper.MetaSearchParams{ + TenantID: tenant, + Interval: bloomshipper.Interval{ + Start: bounds.Start, + End: bounds.End, + }, + Keyspace: ownershipRange, + }, + ) + if err != nil { + level.Error(logger).Log("msg", "failed to get metas", "err", err) + return errors.Wrap(err, "failed to get metas") + } + + // 3. Determine which TSDBs have gaps in the ownership range and need to // be processed. - tsdbsWithGaps, err := gapsBetweenTSDBsAndMetas(s.ownershipRange, ids, metas) + tsdbsWithGaps, err := gapsBetweenTSDBsAndMetas(ownershipRange, ids, metas) if err != nil { - level.Error(s.logger).Log("msg", "failed to find gaps", "err", err) + level.Error(logger).Log("msg", "failed to find gaps", "err", err) return errors.Wrap(err, "failed to find gaps") } if len(tsdbsWithGaps) == 0 { - level.Debug(s.logger).Log("msg", "blooms exist for all tsdbs") + level.Debug(logger).Log("msg", "blooms exist for all tsdbs") return nil } work, err := blockPlansForGaps(tsdbsWithGaps, metas) if err != nil { - level.Error(s.logger).Log("msg", "failed to create plan", "err", err) + level.Error(logger).Log("msg", "failed to create plan", "err", err) return errors.Wrap(err, "failed to create plan") } - // 5. Generate Blooms + nGramSize := uint64(s.limits.BloomNGramLength(tenant)) + nGramSkip := uint64(s.limits.BloomNGramSkip(tenant)) + maxBlockSize := uint64(s.limits.BloomCompactorMaxBlockSize(tenant)) + blockOpts := v1.NewBlockOptions(nGramSize, nGramSkip, maxBlockSize) + + // 4. Generate Blooms // Now that we have the gaps, we will generate a bloom block for each gap. // We can accelerate this by using existing blocks which may already contain // needed chunks in their blooms, for instance after a new TSDB version is generated @@ -115,61 +138,95 @@ func (s *SimpleBloomController) do(ctx context.Context) error { for _, gap := range plan.gaps { // Fetch blocks that aren't up to date but are in the desired fingerprint range // to try and accelerate bloom creation - seriesItr, preExistingBlocks, err := s.loadWorkForGap(plan.tsdb, gap) + seriesItr, preExistingBlocks, err := s.loadWorkForGap(ctx, table, tenant, plan.tsdb, gap) if err != nil { - level.Error(s.logger).Log("msg", "failed to get series and blocks", "err", err) + level.Error(logger).Log("msg", "failed to get series and blocks", "err", err) return errors.Wrap(err, "failed to get series and blocks") } + // Close all remaining blocks on exit + closePreExistingBlocks := func() { + var closeErrors multierror.MultiError + for _, block := range preExistingBlocks { + closeErrors.Add(block.Close()) + } + if err := closeErrors.Err(); err != nil { + level.Error(s.logger).Log("msg", "failed to close blocks", "err", err) + } + } gen := NewSimpleBloomGenerator( - v1.DefaultBlockOptions, + tenant, + blockOpts, seriesItr, s.chunkLoader, preExistingBlocks, s.rwFn, s.metrics, - log.With(s.logger, "tsdb", plan.tsdb.Name(), "ownership", gap, "blocks", len(preExistingBlocks)), + log.With(logger, "tsdb", plan.tsdb.Name(), "ownership", gap, "blocks", len(preExistingBlocks)), ) _, newBlocks, err := gen.Generate(ctx) if err != nil { // TODO(owen-d): metrics - level.Error(s.logger).Log("msg", "failed to generate bloom", "err", err) + level.Error(logger).Log("msg", "failed to generate bloom", "err", err) + closePreExistingBlocks() return errors.Wrap(err, "failed to generate bloom") } - // TODO(owen-d): dispatch this to a queue for writing, handling retries/backpressure, etc? + client, err := s.bloomStore.Client(table.ModelTime()) + if err != nil { + level.Error(logger).Log("msg", "failed to get client", "err", err) + closePreExistingBlocks() + return errors.Wrap(err, "failed to get client") + } for newBlocks.Next() { blockCt++ blk := newBlocks.At() - if err := s.blockStore.PutBlock(blk); err != nil { - level.Error(s.logger).Log("msg", "failed to write block", "err", err) + + if err := client.PutBlock( + ctx, + bloomshipper.BlockFrom(tenant, table.String(), blk), + ); err != nil { + level.Error(logger).Log("msg", "failed to write block", "err", err) + closePreExistingBlocks() return errors.Wrap(err, "failed to write block") } } if err := newBlocks.Err(); err != nil { // TODO(owen-d): metrics - level.Error(s.logger).Log("msg", "failed to generate bloom", "err", err) + level.Error(logger).Log("msg", "failed to generate bloom", "err", err) + closePreExistingBlocks() return errors.Wrap(err, "failed to generate bloom") } + // Close pre-existing blocks + closePreExistingBlocks() } } - level.Debug(s.logger).Log("msg", "finished bloom generation", "blocks", blockCt, "tsdbs", tsdbCt) + // TODO(owen-d): build meta from blocks + // TODO(owen-d): reap tombstones, old metas + + level.Debug(logger).Log("msg", "finished bloom generation", "blocks", blockCt, "tsdbs", tsdbCt) return nil } -func (s *SimpleBloomController) loadWorkForGap(id tsdb.Identifier, gap gapWithBlocks) (v1.CloseableIterator[*v1.Series], []*v1.Block, error) { +func (s *SimpleBloomController) loadWorkForGap( + ctx context.Context, + table DayTable, + tenant string, + id tsdb.Identifier, + gap gapWithBlocks, +) (v1.CloseableIterator[*v1.Series], []*bloomshipper.CloseableBlockQuerier, error) { // load a series iterator for the gap - seriesItr, err := s.tsdbStore.LoadTSDB(id, gap.bounds) + seriesItr, err := s.tsdbStore.LoadTSDB(ctx, table, tenant, id, gap.bounds) if err != nil { return nil, nil, errors.Wrap(err, "failed to load tsdb") } - blocks, err := s.blockStore.GetBlocks(gap.blocks) + blocks, err := s.bloomStore.FetchBlocks(ctx, gap.blocks) if err != nil { return nil, nil, errors.Wrap(err, "failed to get blocks") } @@ -199,7 +256,7 @@ type blockPlan struct { // blockPlansForGaps groups tsdb gaps we wish to fill with overlapping but out of date blocks. // This allows us to expedite bloom generation by using existing blocks to fill in the gaps // since many will contain the same chunks. -func blockPlansForGaps(tsdbs []tsdbGaps, metas []Meta) ([]blockPlan, error) { +func blockPlansForGaps(tsdbs []tsdbGaps, metas []bloomshipper.Meta) ([]blockPlan, error) { plans := make([]blockPlan, 0, len(tsdbs)) for _, idx := range tsdbs { @@ -215,7 +272,7 @@ func blockPlansForGaps(tsdbs []tsdbGaps, metas []Meta) ([]blockPlan, error) { for _, meta := range metas { - if meta.OwnershipRange.Intersection(gap) == nil { + if meta.Bounds.Intersection(gap) == nil { // this meta doesn't overlap the gap, skip continue } @@ -279,7 +336,7 @@ type tsdbGaps struct { func gapsBetweenTSDBsAndMetas( ownershipRange v1.FingerprintBounds, tsdbs []tsdb.Identifier, - metas []Meta, + metas []bloomshipper.Meta, ) (res []tsdbGaps, err error) { for _, db := range tsdbs { id := db.Name() @@ -288,7 +345,7 @@ func gapsBetweenTSDBsAndMetas( for _, meta := range metas { for _, s := range meta.Sources { if s.Name() == id { - relevantMetas = append(relevantMetas, meta.OwnershipRange) + relevantMetas = append(relevantMetas, meta.Bounds) } } } diff --git a/pkg/bloomcompactor/controller_test.go b/pkg/bloomcompactor/controller_test.go index 1f89a0e318efd..0660a5b601eea 100644 --- a/pkg/bloomcompactor/controller_test.go +++ b/pkg/bloomcompactor/controller_test.go @@ -120,10 +120,14 @@ func tsdbID(n int) tsdb.SingleTenantTSDBIdentifier { } } -func genMeta(min, max model.Fingerprint, sources []int, blocks []bloomshipper.BlockRef) Meta { - m := Meta{ - OwnershipRange: v1.NewBounds(min, max), - Blocks: blocks, +func genMeta(min, max model.Fingerprint, sources []int, blocks []bloomshipper.BlockRef) bloomshipper.Meta { + m := bloomshipper.Meta{ + MetaRef: bloomshipper.MetaRef{ + Ref: bloomshipper.Ref{ + Bounds: v1.NewBounds(min, max), + }, + }, + Blocks: blocks, } for _, source := range sources { m.Sources = append(m.Sources, tsdbID(source)) @@ -139,14 +143,14 @@ func Test_gapsBetweenTSDBsAndMetas(t *testing.T) { exp []tsdbGaps ownershipRange v1.FingerprintBounds tsdbs []tsdb.Identifier - metas []Meta + metas []bloomshipper.Meta }{ { desc: "non-overlapping tsdbs and metas", err: true, ownershipRange: v1.NewBounds(0, 10), tsdbs: []tsdb.Identifier{tsdbID(0)}, - metas: []Meta{ + metas: []bloomshipper.Meta{ genMeta(11, 20, []int{0}, nil), }, }, @@ -154,7 +158,7 @@ func Test_gapsBetweenTSDBsAndMetas(t *testing.T) { desc: "single tsdb", ownershipRange: v1.NewBounds(0, 10), tsdbs: []tsdb.Identifier{tsdbID(0)}, - metas: []Meta{ + metas: []bloomshipper.Meta{ genMeta(4, 8, []int{0}, nil), }, exp: []tsdbGaps{ @@ -171,7 +175,7 @@ func Test_gapsBetweenTSDBsAndMetas(t *testing.T) { desc: "multiple tsdbs with separate blocks", ownershipRange: v1.NewBounds(0, 10), tsdbs: []tsdb.Identifier{tsdbID(0), tsdbID(1)}, - metas: []Meta{ + metas: []bloomshipper.Meta{ genMeta(0, 5, []int{0}, nil), genMeta(6, 10, []int{1}, nil), }, @@ -194,7 +198,7 @@ func Test_gapsBetweenTSDBsAndMetas(t *testing.T) { desc: "multiple tsdbs with the same blocks", ownershipRange: v1.NewBounds(0, 10), tsdbs: []tsdb.Identifier{tsdbID(0), tsdbID(1)}, - metas: []Meta{ + metas: []bloomshipper.Meta{ genMeta(0, 5, []int{0, 1}, nil), genMeta(6, 8, []int{1}, nil), }, @@ -239,7 +243,7 @@ func Test_blockPlansForGaps(t *testing.T) { desc string ownershipRange v1.FingerprintBounds tsdbs []tsdb.Identifier - metas []Meta + metas []bloomshipper.Meta err bool exp []blockPlan }{ @@ -247,7 +251,7 @@ func Test_blockPlansForGaps(t *testing.T) { desc: "single overlapping meta+no overlapping block", ownershipRange: v1.NewBounds(0, 10), tsdbs: []tsdb.Identifier{tsdbID(0)}, - metas: []Meta{ + metas: []bloomshipper.Meta{ genMeta(5, 20, []int{1}, []bloomshipper.BlockRef{genBlockRef(11, 20)}), }, exp: []blockPlan{ @@ -265,7 +269,7 @@ func Test_blockPlansForGaps(t *testing.T) { desc: "single overlapping meta+one overlapping block", ownershipRange: v1.NewBounds(0, 10), tsdbs: []tsdb.Identifier{tsdbID(0)}, - metas: []Meta{ + metas: []bloomshipper.Meta{ genMeta(5, 20, []int{1}, []bloomshipper.BlockRef{genBlockRef(9, 20)}), }, exp: []blockPlan{ @@ -287,7 +291,7 @@ func Test_blockPlansForGaps(t *testing.T) { desc: "trims up to date area", ownershipRange: v1.NewBounds(0, 10), tsdbs: []tsdb.Identifier{tsdbID(0)}, - metas: []Meta{ + metas: []bloomshipper.Meta{ genMeta(9, 20, []int{0}, []bloomshipper.BlockRef{genBlockRef(9, 20)}), // block for same tsdb genMeta(9, 20, []int{1}, []bloomshipper.BlockRef{genBlockRef(9, 20)}), // block for different tsdb }, @@ -306,7 +310,7 @@ func Test_blockPlansForGaps(t *testing.T) { desc: "uses old block for overlapping range", ownershipRange: v1.NewBounds(0, 10), tsdbs: []tsdb.Identifier{tsdbID(0)}, - metas: []Meta{ + metas: []bloomshipper.Meta{ genMeta(9, 20, []int{0}, []bloomshipper.BlockRef{genBlockRef(9, 20)}), // block for same tsdb genMeta(5, 20, []int{1}, []bloomshipper.BlockRef{genBlockRef(5, 20)}), // block for different tsdb }, @@ -326,7 +330,7 @@ func Test_blockPlansForGaps(t *testing.T) { desc: "multi case", ownershipRange: v1.NewBounds(0, 10), tsdbs: []tsdb.Identifier{tsdbID(0), tsdbID(1)}, // generate for both tsdbs - metas: []Meta{ + metas: []bloomshipper.Meta{ genMeta(0, 2, []int{0}, []bloomshipper.BlockRef{ genBlockRef(0, 1), genBlockRef(1, 2), @@ -374,7 +378,7 @@ func Test_blockPlansForGaps(t *testing.T) { desc: "dedupes block refs", ownershipRange: v1.NewBounds(0, 10), tsdbs: []tsdb.Identifier{tsdbID(0)}, - metas: []Meta{ + metas: []bloomshipper.Meta{ genMeta(9, 20, []int{1}, []bloomshipper.BlockRef{ genBlockRef(1, 4), genBlockRef(9, 20), diff --git a/pkg/bloomcompactor/meta.go b/pkg/bloomcompactor/meta.go deleted file mode 100644 index c0a333c5c907e..0000000000000 --- a/pkg/bloomcompactor/meta.go +++ /dev/null @@ -1,114 +0,0 @@ -package bloomcompactor - -import ( - "fmt" - "path" - - "github.com/pkg/errors" - - v1 "github.com/grafana/loki/pkg/storage/bloom/v1" - "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" - "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" -) - -const ( - BloomPrefix = "bloom" - MetasPrefix = "metas" -) - -type MetaRef struct { - OwnershipRange v1.FingerprintBounds - Checksum uint32 -} - -// `bloom///metas/--.json` -func (m MetaRef) Address(tenant string, period int) (string, error) { - joined := path.Join( - BloomPrefix, - fmt.Sprintf("%v", period), - tenant, - MetasPrefix, - fmt.Sprintf("%v-%v", m.OwnershipRange, m.Checksum), - ) - - return fmt.Sprintf("%s.json", joined), nil -} - -type Meta struct { - - // The fingerprint range of the block. This is the range _owned_ by the meta and - // is greater than or equal to the range of the actual data in the underlying blocks. - OwnershipRange v1.FingerprintBounds - - // Old blocks which can be deleted in the future. These should be from previous compaction rounds. - Tombstones []bloomshipper.BlockRef - - // The specific TSDB files used to generate the block. - Sources []tsdb.SingleTenantTSDBIdentifier - - // A list of blocks that were generated - Blocks []bloomshipper.BlockRef -} - -// Generate MetaRef from Meta -func (m Meta) Ref() (MetaRef, error) { - checksum, err := m.Checksum() - if err != nil { - return MetaRef{}, errors.Wrap(err, "getting checksum") - } - return MetaRef{ - OwnershipRange: m.OwnershipRange, - Checksum: checksum, - }, nil -} - -func (m Meta) Checksum() (uint32, error) { - h := v1.Crc32HashPool.Get() - defer v1.Crc32HashPool.Put(h) - - _, err := h.Write([]byte(m.OwnershipRange.String())) - if err != nil { - return 0, errors.Wrap(err, "writing OwnershipRange") - } - - for _, tombstone := range m.Tombstones { - err = tombstone.Hash(h) - if err != nil { - return 0, errors.Wrap(err, "writing Tombstones") - } - } - - for _, source := range m.Sources { - err = source.Hash(h) - if err != nil { - return 0, errors.Wrap(err, "writing Sources") - } - } - - for _, block := range m.Blocks { - err = block.Hash(h) - if err != nil { - return 0, errors.Wrap(err, "writing Blocks") - } - } - - return h.Sum32(), nil - -} - -type TSDBStore interface { - ResolveTSDBs() ([]*tsdb.SingleTenantTSDBIdentifier, error) - LoadTSDB(id tsdb.Identifier, bounds v1.FingerprintBounds) (v1.CloseableIterator[*v1.Series], error) -} - -type MetaStore interface { - ResolveMetas(bounds v1.FingerprintBounds) ([]MetaRef, error) - GetMetas([]MetaRef) ([]Meta, error) - PutMeta(Meta) error -} - -type BlockStore interface { - // TODO(owen-d): flesh out|integrate against bloomshipper.Client - GetBlocks([]bloomshipper.BlockRef) ([]*v1.Block, error) - PutBlock(interface{}) error -} diff --git a/pkg/bloomcompactor/metrics.go b/pkg/bloomcompactor/metrics.go index ee2f1630ab5ec..b02ac32aca727 100644 --- a/pkg/bloomcompactor/metrics.go +++ b/pkg/bloomcompactor/metrics.go @@ -3,6 +3,8 @@ package bloomcompactor import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" ) const ( @@ -13,7 +15,10 @@ const ( statusFailure = "failure" ) -type metrics struct { +type Metrics struct { + bloomMetrics *v1.Metrics + chunkSize prometheus.Histogram // uncompressed size of all chunks summed per series + compactionRunsStarted prometheus.Counter compactionRunsCompleted *prometheus.CounterVec compactionRunTime *prometheus.HistogramVec @@ -28,8 +33,14 @@ type metrics struct { compactorRunning prometheus.Gauge } -func newMetrics(r prometheus.Registerer) *metrics { - m := metrics{ +func NewMetrics(r prometheus.Registerer, bloomMetrics *v1.Metrics) *Metrics { + m := Metrics{ + bloomMetrics: bloomMetrics, + chunkSize: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ + Name: "bloom_chunk_series_size", + Help: "Uncompressed size of chunks in a series", + Buckets: prometheus.ExponentialBucketsRange(1024, 1073741824, 10), + }), compactionRunsStarted: promauto.With(r).NewCounter(prometheus.CounterOpts{ Namespace: metricsNamespace, Subsystem: metricsSubsystem, diff --git a/pkg/bloomcompactor/spec.go b/pkg/bloomcompactor/spec.go index e0d964e9e9724..70ea71c4e605f 100644 --- a/pkg/bloomcompactor/spec.go +++ b/pkg/bloomcompactor/spec.go @@ -9,8 +9,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/pkg/errors" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/model" "github.com/grafana/loki/pkg/chunkenc" @@ -18,33 +16,11 @@ import ( logql_log "github.com/grafana/loki/pkg/logql/log" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/chunk" + "github.com/grafana/loki/pkg/storage/stores" + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" ) -/* -This file maintains a number of things supporting bloom generation. Most notably, the `BloomGenerator` interface/implementation which builds bloom filters. - -- `BloomGenerator`: Builds blooms. Most other things in this file are supporting this in various ways. -- `SimpleBloomGenerator`: A foundational implementation of `BloomGenerator` which wires up a few different components to generate bloom filters for a set of blocks and handles schema compatibility: -- `chunkLoader`: Loads chunks w/ a specific fingerprint from the store, returns an iterator of chunk iterators. We return iterators rather than chunk implementations mainly for ease of testing. In practice, this will just be an iterator over `MemChunk`s. -*/ - -type Metrics struct { - bloomMetrics *v1.Metrics - chunkSize prometheus.Histogram // uncompressed size of all chunks summed per series -} - -func NewMetrics(r prometheus.Registerer, bloomMetrics *v1.Metrics) *Metrics { - return &Metrics{ - bloomMetrics: bloomMetrics, - chunkSize: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Name: "bloom_chunk_series_size", - Help: "Uncompressed size of chunks in a series", - Buckets: prometheus.ExponentialBucketsRange(1024, 1073741824, 10), - }), - } -} - // inclusive range type Keyspace struct { min, max model.Fingerprint @@ -67,11 +43,12 @@ type BloomGenerator interface { // Simple implementation of a BloomGenerator. type SimpleBloomGenerator struct { + userID string store v1.Iterator[*v1.Series] chunkLoader ChunkLoader // TODO(owen-d): blocks need not be all downloaded prior. Consider implementing // as an iterator of iterators, where each iterator is a batch of overlapping blocks. - blocks []*v1.Block + blocks []*bloomshipper.CloseableBlockQuerier // options to build blocks with opts v1.BlockOptions @@ -89,17 +66,18 @@ type SimpleBloomGenerator struct { // and handles schema compatibility: // Blocks which are incompatible with the schema are skipped and will have their chunks reindexed func NewSimpleBloomGenerator( + userID string, opts v1.BlockOptions, store v1.Iterator[*v1.Series], chunkLoader ChunkLoader, - blocks []*v1.Block, + blocks []*bloomshipper.CloseableBlockQuerier, readWriterFn func() (v1.BlockWriter, v1.BlockReader), metrics *Metrics, logger log.Logger, ) *SimpleBloomGenerator { return &SimpleBloomGenerator{ - opts: opts, - // TODO(owen-d): implement Iterator[Series] against TSDB files to hook in here. + userID: userID, + opts: opts, store: store, chunkLoader: chunkLoader, blocks: blocks, @@ -113,7 +91,7 @@ func NewSimpleBloomGenerator( func (s *SimpleBloomGenerator) populator(ctx context.Context) func(series *v1.Series, bloom *v1.Bloom) error { return func(series *v1.Series, bloom *v1.Bloom) error { - chunkItersWithFP, err := s.chunkLoader.Load(ctx, series) + chunkItersWithFP, err := s.chunkLoader.Load(ctx, s.userID, series) if err != nil { return errors.Wrapf(err, "failed to load chunks for series: %+v", series) } @@ -129,45 +107,115 @@ func (s *SimpleBloomGenerator) populator(ctx context.Context) func(series *v1.Se } -func (s *SimpleBloomGenerator) Generate(ctx context.Context) (skippedBlocks []*v1.Block, results v1.Iterator[*v1.Block], err error) { - - blocksMatchingSchema := make([]v1.PeekingIterator[*v1.SeriesWithBloom], 0, len(s.blocks)) +func (s *SimpleBloomGenerator) Generate(ctx context.Context) (skippedBlocks []v1.BlockMetadata, results v1.Iterator[*v1.Block], err error) { + blocksMatchingSchema := make([]*bloomshipper.CloseableBlockQuerier, 0, len(s.blocks)) for _, block := range s.blocks { - // TODO(owen-d): implement block naming so we can log the affected block in all these calls - logger := log.With(s.logger, "block", fmt.Sprintf("%+v", block)) - schema, err := block.Schema() + logger := log.With(s.logger, "block", block.BlockRef) + md, err := block.Metadata() + schema := md.Options.Schema if err != nil { level.Warn(logger).Log("msg", "failed to get schema for block", "err", err) - skippedBlocks = append(skippedBlocks, block) + skippedBlocks = append(skippedBlocks, md) + continue } if !s.opts.Schema.Compatible(schema) { level.Warn(logger).Log("msg", "block schema incompatible with options", "generator_schema", fmt.Sprintf("%+v", s.opts.Schema), "block_schema", fmt.Sprintf("%+v", schema)) - skippedBlocks = append(skippedBlocks, block) + skippedBlocks = append(skippedBlocks, md) + continue } level.Debug(logger).Log("msg", "adding compatible block to bloom generation inputs") - itr := v1.NewPeekingIter[*v1.SeriesWithBloom](v1.NewBlockQuerier(block)) - blocksMatchingSchema = append(blocksMatchingSchema, itr) + blocksMatchingSchema = append(blocksMatchingSchema, block) } level.Debug(s.logger).Log("msg", "generating bloom filters for blocks", "num_blocks", len(blocksMatchingSchema), "skipped_blocks", len(skippedBlocks), "schema", fmt.Sprintf("%+v", s.opts.Schema)) - // TODO(owen-d): implement bounded block sizes + series := v1.NewPeekingIter(s.store) + blockIter := NewLazyBlockBuilderIterator(ctx, s.opts, s.populator(ctx), s.readWriterFn, series, blocksMatchingSchema) + return skippedBlocks, blockIter, nil +} + +// LazyBlockBuilderIterator is a lazy iterator over blocks that builds +// each block by adding series to them until they are full. +type LazyBlockBuilderIterator struct { + ctx context.Context + opts v1.BlockOptions + populate func(*v1.Series, *v1.Bloom) error + readWriterFn func() (v1.BlockWriter, v1.BlockReader) + series v1.PeekingIterator[*v1.Series] + blocks []*bloomshipper.CloseableBlockQuerier + + blocksAsPeekingIter []v1.PeekingIterator[*v1.SeriesWithBloom] + curr *v1.Block + err error +} + +func NewLazyBlockBuilderIterator( + ctx context.Context, + opts v1.BlockOptions, + populate func(*v1.Series, *v1.Bloom) error, + readWriterFn func() (v1.BlockWriter, v1.BlockReader), + series v1.PeekingIterator[*v1.Series], + blocks []*bloomshipper.CloseableBlockQuerier, +) *LazyBlockBuilderIterator { + it := &LazyBlockBuilderIterator{ + ctx: ctx, + opts: opts, + populate: populate, + readWriterFn: readWriterFn, + series: series, + blocks: blocks, + + blocksAsPeekingIter: make([]v1.PeekingIterator[*v1.SeriesWithBloom], len(blocks)), + } + + return it +} - mergeBuilder := v1.NewMergeBuilder(blocksMatchingSchema, s.store, s.populator(ctx)) - writer, reader := s.readWriterFn() - blockBuilder, err := v1.NewBlockBuilder(v1.NewBlockOptionsFromSchema(s.opts.Schema), writer) +func (b *LazyBlockBuilderIterator) Next() bool { + // No more series to process + if _, hasNext := b.series.Peek(); !hasNext { + return false + } + + // reset all the blocks to the start + for i, block := range b.blocks { + if err := block.Reset(); err != nil { + b.err = errors.Wrapf(err, "failed to reset block iterator %d", i) + return false + } + b.blocksAsPeekingIter[i] = v1.NewPeekingIter[*v1.SeriesWithBloom](block) + } + + if err := b.ctx.Err(); err != nil { + b.err = errors.Wrap(err, "context canceled") + return false + } + + mergeBuilder := v1.NewMergeBuilder(b.blocksAsPeekingIter, b.series, b.populate) + writer, reader := b.readWriterFn() + blockBuilder, err := v1.NewBlockBuilder(b.opts, writer) if err != nil { - return skippedBlocks, nil, errors.Wrap(err, "failed to create bloom block builder") + b.err = errors.Wrap(err, "failed to create bloom block builder") + return false } _, err = mergeBuilder.Build(blockBuilder) if err != nil { - return skippedBlocks, nil, errors.Wrap(err, "failed to build bloom block") + b.err = errors.Wrap(err, "failed to build bloom block") + return false } - return skippedBlocks, v1.NewSliceIter[*v1.Block]([]*v1.Block{v1.NewBlock(reader)}), nil + b.curr = v1.NewBlock(reader) + return true +} +func (b *LazyBlockBuilderIterator) At() *v1.Block { + return b.curr +} + +func (b *LazyBlockBuilderIterator) Err() error { + return b.err } // IndexLoader loads an index. This helps us do things like @@ -184,7 +232,7 @@ type ChunkItersByFingerprint struct { // ChunkLoader loads chunks from a store type ChunkLoader interface { - Load(context.Context, *v1.Series) (*ChunkItersByFingerprint, error) + Load(ctx context.Context, userID string, series *v1.Series) (*ChunkItersByFingerprint, error) } // interface modeled from `pkg/storage/stores/composite_store.ChunkFetcherProvider` @@ -197,23 +245,37 @@ type chunkFetcher interface { FetchChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) } +// Adapter turning `stores.ChunkFetcherProvider` into `fetcherProvider` +// The former returns a concrete type and is heavily used externally +// while the latter returns an interface for better testing and +// is used internally +type FetcherProviderAdapter struct { + root stores.ChunkFetcherProvider +} + +func NewFetcherProviderAdapter(root stores.ChunkFetcherProvider) *FetcherProviderAdapter { + return &FetcherProviderAdapter{root: root} +} + +func (f *FetcherProviderAdapter) GetChunkFetcher(t model.Time) chunkFetcher { + return f.root.GetChunkFetcher(t) +} + // StoreChunkLoader loads chunks from a store type StoreChunkLoader struct { - userID string fetcherProvider fetcherProvider metrics *Metrics } -func NewStoreChunkLoader(userID string, fetcherProvider fetcherProvider, metrics *Metrics) *StoreChunkLoader { +func NewStoreChunkLoader(fetcherProvider fetcherProvider, metrics *Metrics) *StoreChunkLoader { return &StoreChunkLoader{ - userID: userID, fetcherProvider: fetcherProvider, metrics: metrics, } } -func (s *StoreChunkLoader) Load(ctx context.Context, series *v1.Series) (*ChunkItersByFingerprint, error) { - // TODO(owen-d): This is probalby unnecessary as we should only have one fetcher +func (s *StoreChunkLoader) Load(ctx context.Context, userID string, series *v1.Series) (*ChunkItersByFingerprint, error) { + // NB(owen-d): This is probalby unnecessary as we should only have one fetcher // because we'll only be working on a single index period at a time, but this should protect // us in the case of refactoring/changing this and likely isn't a perf bottleneck. chksByFetcher := make(map[chunkFetcher][]chunk.Chunk) @@ -222,7 +284,7 @@ func (s *StoreChunkLoader) Load(ctx context.Context, series *v1.Series) (*ChunkI chksByFetcher[fetcher] = append(chksByFetcher[fetcher], chunk.Chunk{ ChunkRef: logproto.ChunkRef{ Fingerprint: uint64(series.Fingerprint), - UserID: s.userID, + UserID: userID, From: chk.Start, Through: chk.End, Checksum: chk.Checksum, @@ -304,7 +366,7 @@ func (b *batchedLoader) format(c chunk.Chunk) (v1.ChunkRefWithIter, error) { b.metrics.chunkSize.Observe(float64(chk.UncompressedSize())) itr, err := chk.Iterator( b.ctx, - time.Unix(0, 0), // TODO: Parameterize/better handle the timestamps? + time.Unix(0, 0), time.Unix(0, math.MaxInt64), logproto.FORWARD, logql_log.NewNoopPipeline().ForStream(c.Metric), diff --git a/pkg/bloomcompactor/spec_test.go b/pkg/bloomcompactor/spec_test.go index 08c722d06e5d4..798d65e2f2bcd 100644 --- a/pkg/bloomcompactor/spec_test.go +++ b/pkg/bloomcompactor/spec_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/require" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" ) func blocksFromSchema(t *testing.T, n int, options v1.BlockOptions) (res []*v1.Block, data []v1.SeriesWithBloom) { @@ -27,7 +28,7 @@ func blocksFromSchemaWithRange(t *testing.T, n int, options v1.BlockOptions, fro numKeysPerSeries := 10000 data, _ = v1.MkBasicSeriesWithBlooms(numSeries, numKeysPerSeries, fromFP, throughFp, 0, 10000) - seriesPerBlock := 100 / n + seriesPerBlock := numSeries / n for i := 0; i < n; i++ { // references for linking in memory reader+writer @@ -55,7 +56,7 @@ func blocksFromSchemaWithRange(t *testing.T, n int, options v1.BlockOptions, fro // doesn't actually load any chunks type dummyChunkLoader struct{} -func (dummyChunkLoader) Load(_ context.Context, series *v1.Series) (*ChunkItersByFingerprint, error) { +func (dummyChunkLoader) Load(_ context.Context, _ string, series *v1.Series) (*ChunkItersByFingerprint, error) { return &ChunkItersByFingerprint{ fp: series.Fingerprint, itr: v1.NewEmptyIter[v1.ChunkRefWithIter](), @@ -63,11 +64,19 @@ func (dummyChunkLoader) Load(_ context.Context, series *v1.Series) (*ChunkItersB } func dummyBloomGen(opts v1.BlockOptions, store v1.Iterator[*v1.Series], blocks []*v1.Block) *SimpleBloomGenerator { + bqs := make([]*bloomshipper.CloseableBlockQuerier, 0, len(blocks)) + for _, b := range blocks { + bqs = append(bqs, &bloomshipper.CloseableBlockQuerier{ + BlockQuerier: v1.NewBlockQuerier(b), + }) + } + return NewSimpleBloomGenerator( + "fake", opts, store, dummyChunkLoader{}, - blocks, + bqs, func() (v1.BlockWriter, v1.BlockReader) { indexBuf := bytes.NewBuffer(nil) bloomsBuf := bytes.NewBuffer(nil) @@ -79,24 +88,35 @@ func dummyBloomGen(opts v1.BlockOptions, store v1.Iterator[*v1.Series], blocks [ } func TestSimpleBloomGenerator(t *testing.T) { + const maxBlockSize = 100 << 20 // 100MB for _, tc := range []struct { - desc string - fromSchema, toSchema v1.BlockOptions - sourceBlocks, numSkipped int + desc string + fromSchema, toSchema v1.BlockOptions + sourceBlocks, numSkipped, outputBlocks int }{ { desc: "SkipsIncompatibleSchemas", - fromSchema: v1.NewBlockOptions(3, 0), - toSchema: v1.NewBlockOptions(4, 0), + fromSchema: v1.NewBlockOptions(3, 0, maxBlockSize), + toSchema: v1.NewBlockOptions(4, 0, maxBlockSize), sourceBlocks: 2, numSkipped: 2, + outputBlocks: 1, }, { desc: "CombinesBlocks", - fromSchema: v1.NewBlockOptions(4, 0), - toSchema: v1.NewBlockOptions(4, 0), + fromSchema: v1.NewBlockOptions(4, 0, maxBlockSize), + toSchema: v1.NewBlockOptions(4, 0, maxBlockSize), sourceBlocks: 2, numSkipped: 0, + outputBlocks: 1, + }, + { + desc: "MaxBlockSize", + fromSchema: v1.NewBlockOptions(4, 0, maxBlockSize), + toSchema: v1.NewBlockOptions(4, 0, 1<<10), // 1KB + sourceBlocks: 2, + numSkipped: 0, + outputBlocks: 3, }, } { t.Run(tc.desc, func(t *testing.T) { @@ -113,22 +133,25 @@ func TestSimpleBloomGenerator(t *testing.T) { require.Nil(t, err) require.Equal(t, tc.numSkipped, len(skipped)) - require.True(t, results.Next()) - block := results.At() - require.False(t, results.Next()) - - refs := v1.PointerSlice[v1.SeriesWithBloom](data) - - v1.EqualIterators[*v1.SeriesWithBloom]( - t, - func(a, b *v1.SeriesWithBloom) { - // TODO(owen-d): better equality check - // once chunk fetching is implemented - require.Equal(t, a.Series, b.Series) - }, - v1.NewSliceIter[*v1.SeriesWithBloom](refs), - block.Querier(), - ) + var outputBlocks []*v1.Block + for results.Next() { + outputBlocks = append(outputBlocks, results.At()) + } + require.Equal(t, tc.outputBlocks, len(outputBlocks)) + + // Check all the input series are present in the output blocks. + expectedRefs := v1.PointerSlice(data) + outputRefs := make([]*v1.SeriesWithBloom, 0, len(data)) + for _, block := range outputBlocks { + bq := block.Querier() + for bq.Next() { + outputRefs = append(outputRefs, bq.At()) + } + } + require.Equal(t, len(expectedRefs), len(outputRefs)) + for i := range expectedRefs { + require.Equal(t, expectedRefs[i].Series, outputRefs[i].Series) + } }) } } diff --git a/pkg/bloomcompactor/tsdb.go b/pkg/bloomcompactor/tsdb.go new file mode 100644 index 0000000000000..be45d293f6286 --- /dev/null +++ b/pkg/bloomcompactor/tsdb.go @@ -0,0 +1,295 @@ +package bloomcompactor + +import ( + "context" + "fmt" + "io" + "math" + "path" + "strings" + + "github.com/pkg/errors" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" + + baseStore "github.com/grafana/loki/pkg/storage" + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/storage" + "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" + "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb/index" +) + +const ( + gzipExtension = ".gz" +) + +type TSDBStore interface { + UsersForPeriod(ctx context.Context, table DayTable) ([]string, error) + ResolveTSDBs(ctx context.Context, table DayTable, tenant string) ([]tsdb.SingleTenantTSDBIdentifier, error) + LoadTSDB( + ctx context.Context, + table DayTable, + tenant string, + id tsdb.Identifier, + bounds v1.FingerprintBounds, + ) (v1.CloseableIterator[*v1.Series], error) +} + +// BloomTSDBStore is a wrapper around the storage.Client interface which +// implements the TSDBStore interface for this pkg. +type BloomTSDBStore struct { + storage storage.Client +} + +func NewBloomTSDBStore(storage storage.Client) *BloomTSDBStore { + return &BloomTSDBStore{ + storage: storage, + } +} + +func (b *BloomTSDBStore) UsersForPeriod(ctx context.Context, table DayTable) ([]string, error) { + _, users, err := b.storage.ListFiles(ctx, table.String(), false) + return users, err +} + +func (b *BloomTSDBStore) ResolveTSDBs(ctx context.Context, table DayTable, tenant string) ([]tsdb.SingleTenantTSDBIdentifier, error) { + indices, err := b.storage.ListUserFiles(ctx, table.String(), tenant, false) + if err != nil { + return nil, errors.Wrap(err, "failed to list user files") + } + + ids := make([]tsdb.SingleTenantTSDBIdentifier, 0, len(indices)) + for _, index := range indices { + key := index.Name + if decompress := storage.IsCompressedFile(index.Name); decompress { + key = strings.TrimSuffix(key, gzipExtension) + } + + id, ok := tsdb.ParseSingleTenantTSDBPath(path.Base(key)) + if !ok { + return nil, errors.Errorf("failed to parse single tenant tsdb path: %s", key) + } + + ids = append(ids, id) + + } + return ids, nil +} + +func (b *BloomTSDBStore) LoadTSDB( + ctx context.Context, + table DayTable, + tenant string, + id tsdb.Identifier, + bounds v1.FingerprintBounds, +) (v1.CloseableIterator[*v1.Series], error) { + data, err := b.storage.GetUserFile(ctx, table.String(), tenant, id.Name()) + if err != nil { + return nil, errors.Wrap(err, "failed to get file") + } + + buf, err := io.ReadAll(data) + if err != nil { + return nil, errors.Wrap(err, "failed to read file") + } + _ = data.Close() + + reader, err := index.NewReader(index.RealByteSlice(buf)) + if err != nil { + return nil, errors.Wrap(err, "failed to create index reader") + } + + idx := tsdb.NewTSDBIndex(reader) + + return NewTSDBSeriesIter(ctx, idx, bounds), nil +} + +// TSDBStore is an interface for interacting with the TSDB, +// modeled off a relevant subset of the `tsdb.TSDBIndex` struct +type forSeries interface { + ForSeries( + ctx context.Context, + fpFilter index.FingerprintFilter, + from model.Time, + through model.Time, + fn func(labels.Labels, model.Fingerprint, []index.ChunkMeta), + matchers ...*labels.Matcher, + ) error + Close() error +} + +type TSDBSeriesIter struct { + f forSeries + bounds v1.FingerprintBounds + ctx context.Context + + ch chan *v1.Series + initialized bool + next *v1.Series + err error +} + +func NewTSDBSeriesIter(ctx context.Context, f forSeries, bounds v1.FingerprintBounds) *TSDBSeriesIter { + return &TSDBSeriesIter{ + f: f, + bounds: bounds, + ctx: ctx, + ch: make(chan *v1.Series), + } +} + +func (t *TSDBSeriesIter) Next() bool { + if !t.initialized { + t.initialized = true + t.background() + } + + select { + case <-t.ctx.Done(): + return false + case next, ok := <-t.ch: + t.next = next + return ok + } +} + +func (t *TSDBSeriesIter) At() *v1.Series { + return t.next +} + +func (t *TSDBSeriesIter) Err() error { + if t.err != nil { + return t.err + } + + return t.ctx.Err() +} + +func (t *TSDBSeriesIter) Close() error { + return t.f.Close() +} + +// background iterates over the tsdb file, populating the next +// value via a channel to handle backpressure +func (t *TSDBSeriesIter) background() { + go func() { + t.err = t.f.ForSeries( + t.ctx, + t.bounds, + 0, math.MaxInt64, + func(_ labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) { + + res := &v1.Series{ + Fingerprint: fp, + Chunks: make(v1.ChunkRefs, 0, len(chks)), + } + for _, chk := range chks { + res.Chunks = append(res.Chunks, v1.ChunkRef{ + Start: model.Time(chk.MinTime), + End: model.Time(chk.MaxTime), + Checksum: chk.Checksum, + }) + } + + select { + case <-t.ctx.Done(): + return + case t.ch <- res: + } + }, + labels.MustNewMatcher(labels.MatchEqual, "", ""), + ) + close(t.ch) + }() +} + +type TSDBStores struct { + schemaCfg config.SchemaConfig + stores []TSDBStore +} + +func NewTSDBStores( + schemaCfg config.SchemaConfig, + storeCfg baseStore.Config, + clientMetrics baseStore.ClientMetrics, +) (*TSDBStores, error) { + res := &TSDBStores{ + schemaCfg: schemaCfg, + stores: make([]TSDBStore, len(schemaCfg.Configs)), + } + + for i, cfg := range schemaCfg.Configs { + if cfg.IndexType == config.TSDBType { + + c, err := baseStore.NewObjectClient(cfg.ObjectType, storeCfg, clientMetrics) + if err != nil { + return nil, errors.Wrap(err, "failed to create object client") + } + res.stores[i] = NewBloomTSDBStore(storage.NewIndexStorageClient(c, cfg.IndexTables.PathPrefix)) + } + } + + return res, nil +} + +func (s *TSDBStores) storeForPeriod(table DayTable) (TSDBStore, error) { + for i := len(s.schemaCfg.Configs) - 1; i >= 0; i-- { + period := s.schemaCfg.Configs[i] + + if !table.Before(DayTable(period.From.Time)) { + // we have the desired period config + + if s.stores[i] != nil { + // valid: it's of tsdb type + return s.stores[i], nil + } + + // invalid + return nil, errors.Errorf( + "store for period is not of TSDB type (%s) while looking up store for (%v)", + period.IndexType, + table.ModelTime().Time(), + ) + } + + } + + return nil, fmt.Errorf( + "There is no store matching no matching period found for table (%v) -- too early", + table.ModelTime().Time(), + ) +} + +func (s *TSDBStores) UsersForPeriod(ctx context.Context, table DayTable) ([]string, error) { + store, err := s.storeForPeriod(table) + if err != nil { + return nil, err + } + + return store.UsersForPeriod(ctx, table) +} + +func (s *TSDBStores) ResolveTSDBs(ctx context.Context, table DayTable, tenant string) ([]tsdb.SingleTenantTSDBIdentifier, error) { + store, err := s.storeForPeriod(table) + if err != nil { + return nil, err + } + + return store.ResolveTSDBs(ctx, table, tenant) +} + +func (s *TSDBStores) LoadTSDB( + ctx context.Context, + table DayTable, + tenant string, + id tsdb.Identifier, + bounds v1.FingerprintBounds, +) (v1.CloseableIterator[*v1.Series], error) { + store, err := s.storeForPeriod(table) + if err != nil { + return nil, err + } + + return store.LoadTSDB(ctx, table, tenant, id, bounds) +} diff --git a/pkg/bloomcompactor/tsdb_test.go b/pkg/bloomcompactor/tsdb_test.go new file mode 100644 index 0000000000000..08f301758bf53 --- /dev/null +++ b/pkg/bloomcompactor/tsdb_test.go @@ -0,0 +1,86 @@ +package bloomcompactor + +import ( + "context" + "math" + "testing" + + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" + "github.com/stretchr/testify/require" + + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb/index" +) + +type forSeriesTestImpl []*v1.Series + +func (f forSeriesTestImpl) ForSeries( + _ context.Context, + _ index.FingerprintFilter, + _ model.Time, + _ model.Time, + fn func(labels.Labels, model.Fingerprint, []index.ChunkMeta), + _ ...*labels.Matcher, +) error { + for i := range f { + unmapped := make([]index.ChunkMeta, 0, len(f[i].Chunks)) + for _, c := range f[i].Chunks { + unmapped = append(unmapped, index.ChunkMeta{ + MinTime: int64(c.Start), + MaxTime: int64(c.End), + Checksum: c.Checksum, + }) + } + + fn(nil, f[i].Fingerprint, unmapped) + } + return nil +} + +func (f forSeriesTestImpl) Close() error { + return nil +} + +func TestTSDBSeriesIter(t *testing.T) { + input := []*v1.Series{ + { + Fingerprint: 1, + Chunks: []v1.ChunkRef{ + { + Start: 0, + End: 1, + Checksum: 2, + }, + { + Start: 3, + End: 4, + Checksum: 5, + }, + }, + }, + } + srcItr := v1.NewSliceIter(input) + itr := NewTSDBSeriesIter(context.Background(), forSeriesTestImpl(input), v1.NewBounds(0, math.MaxUint64)) + + v1.EqualIterators[*v1.Series]( + t, + func(a, b *v1.Series) { + require.Equal(t, a, b) + }, + itr, + srcItr, + ) +} + +func TestTSDBSeriesIter_Expiry(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + itr := NewTSDBSeriesIter(ctx, forSeriesTestImpl{ + {}, // a single entry + }, v1.NewBounds(0, math.MaxUint64)) + + require.False(t, itr.Next()) + require.Error(t, itr.Err()) + +} diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 1e7a54f1d1e33..abecbf6773fd3 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -23,13 +23,15 @@ of line filter expressions. | bloomgateway.Gateway | - queue.RequestQueue + queue.RequestQueue | - bloomgateway.Worker + bloomgateway.Worker | - bloomshipper.Shipper + bloomgateway.Processor | - bloomshipper.BloomFileClient + bloomshipper.Store + | + bloomshipper.Client | ObjectClient | @@ -56,6 +58,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/pkg/logqlmodel/stats" "github.com/grafana/loki/pkg/queue" "github.com/grafana/loki/pkg/storage" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" @@ -170,11 +173,9 @@ type Gateway struct { workerMetrics *workerMetrics queueMetrics *queue.Metrics - queue *queue.RequestQueue - activeUsers *util.ActiveUsersCleanupService - bloomShipper bloomshipper.Interface - - sharding ShardingStrategy + queue *queue.RequestQueue + activeUsers *util.ActiveUsersCleanupService + bloomStore bloomshipper.Store pendingTasks *pendingTasks @@ -193,12 +194,11 @@ func (l *fixedQueueLimits) MaxConsumers(_ string, _ int) int { } // New returns a new instance of the Bloom Gateway. -func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, overrides Limits, shardingStrategy ShardingStrategy, cm storage.ClientMetrics, logger log.Logger, reg prometheus.Registerer) (*Gateway, error) { +func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, overrides Limits, cm storage.ClientMetrics, logger log.Logger, reg prometheus.Registerer) (*Gateway, error) { g := &Gateway{ cfg: cfg, logger: logger, metrics: newMetrics(reg, constants.Loki, metricsSubsystem), - sharding: shardingStrategy, pendingTasks: makePendingTasks(pendingTasksInitialCap), workerConfig: workerConfig{ maxItems: 100, @@ -206,25 +206,33 @@ func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, o workerMetrics: newWorkerMetrics(reg, constants.Loki, metricsSubsystem), queueMetrics: queue.NewMetrics(reg, constants.Loki, metricsSubsystem), } + var err error g.queue = queue.NewRequestQueue(cfg.MaxOutstandingPerTenant, time.Minute, &fixedQueueLimits{0}, g.queueMetrics) g.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(g.queueMetrics.Cleanup) - // TODO(chaudum): Plug in cache var metasCache cache.Cache - var blocksCache *cache.EmbeddedCache[string, bloomshipper.BlockDirectory] - store, err := bloomshipper.NewBloomStore(schemaCfg.Configs, storageCfg, cm, metasCache, blocksCache, logger) - if err != nil { - return nil, err + mcCfg := storageCfg.BloomShipperConfig.MetasCache + if cache.IsCacheConfigured(mcCfg) { + metasCache, err = cache.New(mcCfg, reg, logger, stats.BloomMetasCache, constants.Loki) + if err != nil { + return nil, err + } } - bloomShipper, err := bloomshipper.NewShipper(store, storageCfg.BloomShipperConfig, overrides, logger, reg) + var blocksCache cache.TypedCache[string, bloomshipper.BlockDirectory] + bcCfg := storageCfg.BloomShipperConfig.BlocksCache + if bcCfg.IsEnabled() { + blocksCache = bloomshipper.NewBlocksCache(bcCfg, reg, logger) + } + + store, err := bloomshipper.NewBloomStore(schemaCfg.Configs, storageCfg, cm, metasCache, blocksCache, logger) if err != nil { return nil, err } // We need to keep a reference to be able to call Stop() on shutdown of the gateway. - g.bloomShipper = bloomShipper + g.bloomStore = store if err := g.initServices(); err != nil { return nil, err @@ -239,7 +247,7 @@ func (g *Gateway) initServices() error { svcs := []services.Service{g.queue, g.activeUsers} for i := 0; i < g.cfg.WorkerConcurrency; i++ { id := fmt.Sprintf("bloom-query-worker-%d", i) - w := newWorker(id, g.workerConfig, g.queue, g.bloomShipper, g.pendingTasks, g.logger, g.workerMetrics) + w := newWorker(id, g.workerConfig, g.queue, g.bloomStore, g.pendingTasks, g.logger, g.workerMetrics) svcs = append(svcs, w) } g.serviceMngr, err = services.NewManager(svcs...) @@ -291,7 +299,7 @@ func (g *Gateway) running(ctx context.Context) error { } func (g *Gateway) stopping(_ error) error { - g.bloomShipper.Stop() + g.bloomStore.Stop() return services.StopManagerAndAwaitStopped(context.Background(), g.serviceMngr) } diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index c8da44a7c719b..f07e014b84dc3 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -3,6 +3,7 @@ package bloomgateway import ( "context" "fmt" + "math/rand" "os" "testing" "time" @@ -45,8 +46,6 @@ func newLimits() *validation.Overrides { } func TestBloomGateway_StartStopService(t *testing.T) { - - ss := NewNoopStrategy() logger := log.NewNopLogger() reg := prometheus.NewRegistry() limits := newLimits() @@ -96,7 +95,7 @@ func TestBloomGateway_StartStopService(t *testing.T) { MaxOutstandingPerTenant: 1024, } - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) @@ -113,8 +112,6 @@ func TestBloomGateway_StartStopService(t *testing.T) { func TestBloomGateway_FilterChunkRefs(t *testing.T) { tenantID := "test" - - ss := NewNoopStrategy() logger := log.NewLogfmtLogger(os.Stderr) reg := prometheus.NewRegistry() limits := newLimits() @@ -165,15 +162,17 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("shipper error is propagated", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) now := mktime("2023-10-03 10:00") - bqs, data := createBlockQueriers(t, 10, now.Add(-24*time.Hour), now, 0, 1000) - mockStore := newMockBloomStore(bqs) - mockStore.err = errors.New("failed to fetch block") - gw.bloomShipper = mockStore + // replace store implementation and re-initialize workers and sub-services + _, metas, queriers, data := createBlocks(t, tenantID, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + mockStore := newMockBloomStore(queriers, metas) + mockStore.err = errors.New("request failed") + gw.bloomStore = mockStore err = gw.initServices() require.NoError(t, err) @@ -185,7 +184,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { require.NoError(t, err) }) - chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 10) + chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 100) // saturate workers // then send additional request @@ -204,21 +203,23 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Cleanup(cancelFn) res, err := gw.FilterChunkRefs(ctx, req) - require.ErrorContainsf(t, err, "request failed: failed to fetch block", "%+v", res) + require.ErrorContainsf(t, err, "request failed", "%+v", res) } }) t.Run("request cancellation does not result in channel locking", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) now := mktime("2024-01-25 10:00") - bqs, data := createBlockQueriers(t, 50, now.Add(-24*time.Hour), now, 0, 1024) - mockStore := newMockBloomStore(bqs) - mockStore.delay = 50 * time.Millisecond // delay for each block - 50x50=2500ms - gw.bloomShipper = mockStore + // replace store implementation and re-initialize workers and sub-services + _, metas, queriers, data := createBlocks(t, tenantID, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + mockStore := newMockBloomStore(queriers, metas) + mockStore.delay = 2000 * time.Millisecond + gw.bloomStore = mockStore err = gw.initServices() require.NoError(t, err) @@ -255,7 +256,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("returns unfiltered chunk refs if no filters provided", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) @@ -300,7 +301,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("gateway tracks active users", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) @@ -340,14 +341,15 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("use fuse queriers to filter chunks", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) now := mktime("2023-10-03 10:00") // replace store implementation and re-initialize workers and sub-services - bqs, data := createBlockQueriers(t, 5, now.Add(-8*time.Hour), now, 0, 1024) - gw.bloomShipper = newMockBloomStore(bqs) + _, metas, queriers, data := createBlocks(t, tenantID, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + gw.bloomStore = newMockBloomStore(queriers, metas) err = gw.initServices() require.NoError(t, err) @@ -358,7 +360,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { require.NoError(t, err) }) - chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 100) + chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 10) t.Run("no match - return empty response", func(t *testing.T) { inputChunkRefs := groupRefs(t, chunkRefs) @@ -382,27 +384,37 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("match - return filtered", func(t *testing.T) { inputChunkRefs := groupRefs(t, chunkRefs) - // hack to get indexed key for a specific series - // the indexed key range for a series is defined as - // i * keysPerSeries ... i * keysPerSeries + keysPerSeries - 1 - // where i is the nth series in a block - // fortunately, i is also used as Checksum for the single chunk of a series - // see mkBasicSeriesWithBlooms() in pkg/storage/bloom/v1/test_util.go - key := inputChunkRefs[0].Refs[0].Checksum*1000 + 500 + // Hack to get search string for a specific series + // see MkBasicSeriesWithBlooms() in pkg/storage/bloom/v1/test_util.go + // each series has 1 chunk + // each chunk has multiple strings, from int(fp) to int(nextFp)-1 + x := rand.Intn(len(inputChunkRefs)) + fp := inputChunkRefs[x].Fingerprint + chks := inputChunkRefs[x].Refs + line := fmt.Sprintf("%04x:%04x", int(fp), 0) // first line + + t.Log("x=", x, "fp=", fp, "line=", line) req := &logproto.FilterChunkRefRequest{ From: now.Add(-8 * time.Hour), Through: now, Refs: inputChunkRefs, Filters: []syntax.LineFilter{ - {Ty: labels.MatchEqual, Match: fmt.Sprintf("series %d", key)}, + {Ty: labels.MatchEqual, Match: line}, }, } ctx := user.InjectOrgID(context.Background(), tenantID) res, err := gw.FilterChunkRefs(ctx, req) require.NoError(t, err) + expectedResponse := &logproto.FilterChunkRefResponse{ - ChunkRefs: inputChunkRefs[:1], + ChunkRefs: []*logproto.GroupedChunkRefs{ + { + Fingerprint: fp, + Refs: chks, + Tenant: tenantID, + }, + }, } require.Equal(t, expectedResponse, res) }) diff --git a/pkg/bloomgateway/client.go b/pkg/bloomgateway/client.go index 6453987b91683..9a75e4e87c26b 100644 --- a/pkg/bloomgateway/client.go +++ b/pkg/bloomgateway/client.go @@ -36,6 +36,10 @@ import ( ) var ( + // BlocksOwnerRead is the operation used to check the authoritative owners of a block + // (replicas included) that are available for queries (a bloom gateway is available for + // queries only when ACTIVE). + BlocksOwnerRead = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, nil) // groupedChunksRefPool pooling slice of logproto.GroupedChunkRefs [64, 128, 256, ..., 65536] groupedChunksRefPool = queue.NewSlicePool[*logproto.GroupedChunkRefs](1<<6, 1<<16, 2) // ringGetBuffersPool pooling for ringGetBuffers to avoid calling ring.MakeBuffersForGet() for each request @@ -226,15 +230,16 @@ func (c *GatewayClient) FilterChunks(ctx context.Context, tenant string, from, t } subRing := GetShuffleShardingSubring(c.ring, tenant, c.limits) - rs, err := subRing.GetAllHealthy(BlocksRead) + rs, err := subRing.GetAllHealthy(BlocksOwnerRead) if err != nil { return nil, errors.Wrap(err, "bloom gateway get healthy instances") } - streamsByInst, err := c.groupFingerprintsByServer(groups, subRing, rs.Instances) + servers, err := serverAddressesWithTokenRanges(subRing, rs.Instances) if err != nil { return nil, err } + streamsByInst := groupFingerprintsByServer(groups, servers) filteredChunkRefs := groupedChunksRefPool.Get(len(groups)) defer groupedChunksRefPool.Put(filteredChunkRefs) @@ -286,13 +291,9 @@ func (c *GatewayClient) doForAddrs(addrs []string, fn func(logproto.BloomGateway return err } -func (c *GatewayClient) groupFingerprintsByServer(groups []*logproto.GroupedChunkRefs, subRing ring.ReadRing, instances []ring.InstanceDesc) ([]instanceWithFingerprints, error) { - servers, err := serverAddressesWithTokenRanges(subRing, instances) - if err != nil { - return nil, err - } +func groupFingerprintsByServer(groups []*logproto.GroupedChunkRefs, servers []addrsWithTokenRange) []instanceWithFingerprints { boundedFingerprints := partitionFingerprintsByAddresses(groups, servers) - return groupByInstance(boundedFingerprints), nil + return groupByInstance(boundedFingerprints) } func serverAddressesWithTokenRanges(subRing ring.ReadRing, instances []ring.InstanceDesc) ([]addrsWithTokenRange, error) { @@ -303,7 +304,7 @@ func serverAddressesWithTokenRanges(subRing ring.ReadRing, instances []ring.Inst for it.Next() { // We can use on of the tokens from the token range // to obtain all addresses for that token. - rs, err := subRing.Get(it.At().MaxToken, BlocksRead, bufDescs, bufHosts, bufZones) + rs, err := subRing.Get(it.At().MaxToken, BlocksOwnerRead, bufDescs, bufHosts, bufZones) if err != nil { return nil, errors.Wrap(err, "bloom gateway get ring") } @@ -410,3 +411,21 @@ func groupByInstance(boundedFingerprints []instanceWithFingerprints) []instanceW return result } + +// GetShuffleShardingSubring returns the subring to be used for a given user. +// This function should be used both by index gateway servers and clients in +// order to guarantee the same logic is used. +func GetShuffleShardingSubring(ring ring.ReadRing, tenantID string, limits Limits) ring.ReadRing { + shardSize := limits.BloomGatewayShardSize(tenantID) + + // A shard size of 0 means shuffle sharding is disabled for this specific user, + // so we just return the full ring so that indexes will be sharded across all index gateways. + // Since we set the shard size to replication factor if shard size is 0, this + // can only happen if both the shard size and the replication factor are set + // to 0. + if shardSize <= 0 { + return ring + } + + return ring.ShuffleShard(tenantID, shardSize) +} diff --git a/pkg/bloomgateway/client_test.go b/pkg/bloomgateway/client_test.go index e59fff2306ab9..b1716de8150ea 100644 --- a/pkg/bloomgateway/client_test.go +++ b/pkg/bloomgateway/client_test.go @@ -207,19 +207,6 @@ func TestBloomGatewayClient_ServerAddressesWithTokenRanges(t *testing.T) { } func TestBloomGatewayClient_GroupFingerprintsByServer(t *testing.T) { - - logger := log.NewNopLogger() - reg := prometheus.NewRegistry() - - l, err := validation.NewOverrides(validation.Limits{BloomGatewayShardSize: 1}, nil) - require.NoError(t, err) - - cfg := ClientConfig{} - flagext.DefaultValues(&cfg) - - c, err := NewClient(cfg, nil, l, reg, logger, "loki", nil, false) - require.NoError(t, err) - instances := []ring.InstanceDesc{ {Id: "instance-1", Addr: "10.0.0.1", Tokens: []uint32{2146405214, 1029997044, 678878693}}, {Id: "instance-2", Addr: "10.0.0.2", Tokens: []uint32{296463531, 1697323986, 800258284}}, @@ -339,8 +326,9 @@ func TestBloomGatewayClient_GroupFingerprintsByServer(t *testing.T) { return tc.chunks[i].Fingerprint < tc.chunks[j].Fingerprint }) - res, err := c.groupFingerprintsByServer(tc.chunks, subRing, instances) + servers, err := serverAddressesWithTokenRanges(subRing, instances) require.NoError(t, err) + res := groupFingerprintsByServer(tc.chunks, servers) require.Equal(t, tc.expected, res) }) } diff --git a/pkg/bloomgateway/processor.go b/pkg/bloomgateway/processor.go index 117e736e4f54f..4fe9c38483cbe 100644 --- a/pkg/bloomgateway/processor.go +++ b/pkg/bloomgateway/processor.go @@ -4,6 +4,7 @@ import ( "context" "math" "sort" + "time" "github.com/go-kit/log" "github.com/prometheus/common/model" @@ -17,18 +18,20 @@ type tasksForBlock struct { tasks []Task } -type blockLoader interface { - LoadBlocks(context.Context, []bloomshipper.BlockRef) (v1.Iterator[bloomshipper.BlockQuerierWithFingerprintRange], error) -} - -type store interface { - blockLoader - bloomshipper.Store +func newProcessor(id string, store bloomshipper.Store, logger log.Logger, metrics *workerMetrics) *processor { + return &processor{ + id: id, + store: store, + logger: logger, + metrics: metrics, + } } type processor struct { - store store - logger log.Logger + id string + store bloomshipper.Store + logger log.Logger + metrics *workerMetrics } func (p *processor) run(ctx context.Context, tasks []Task) error { @@ -60,6 +63,8 @@ func (p *processor) processTasks(ctx context.Context, tenant string, interval bl if err != nil { return err } + p.metrics.metasFetched.WithLabelValues(p.id).Observe(float64(len(metas))) + blocksRefs := bloomshipper.BlocksForMetas(metas, interval, keyspaces) return p.processBlocks(ctx, partition(tasks, blocksRefs)) } @@ -70,17 +75,21 @@ func (p *processor) processBlocks(ctx context.Context, data []tasksForBlock) err refs = append(refs, block.blockRef) } - blockIter, err := p.store.LoadBlocks(ctx, refs) + bqs, err := p.store.FetchBlocks(ctx, refs) if err != nil { return err } + p.metrics.metasFetched.WithLabelValues(p.id).Observe(float64(len(bqs))) + + blockIter := v1.NewSliceIter(bqs) outer: for blockIter.Next() { bq := blockIter.At() for i, block := range data { - if block.blockRef.Bounds.Equal(bq.FingerprintBounds) { + if block.blockRef.Bounds.Equal(bq.Bounds) { err := p.processBlock(ctx, bq.BlockQuerier, block.tasks) + bq.Close() if err != nil { return err } @@ -88,6 +97,8 @@ outer: continue outer } } + // should not happen, but close anyway + bq.Close() } return nil } @@ -106,7 +117,16 @@ func (p *processor) processBlock(_ context.Context, blockQuerier *v1.BlockQuerie } fq := blockQuerier.Fuse(iters) - return fq.Run() + + start := time.Now() + err = fq.Run() + if err != nil { + p.metrics.blockQueryLatency.WithLabelValues(p.id, labelFailure).Observe(time.Since(start).Seconds()) + } else { + p.metrics.blockQueryLatency.WithLabelValues(p.id, labelSuccess).Observe(time.Since(start).Seconds()) + } + + return err } // getFirstLast returns the first and last item of a fingerprint slice diff --git a/pkg/bloomgateway/processor_test.go b/pkg/bloomgateway/processor_test.go index 33f7513fbf592..c4c8f8457b3a1 100644 --- a/pkg/bloomgateway/processor_test.go +++ b/pkg/bloomgateway/processor_test.go @@ -7,24 +7,40 @@ import ( "testing" "time" + "github.com/go-kit/log" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "go.uber.org/atomic" "github.com/grafana/loki/pkg/logql/syntax" - v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" + "github.com/grafana/loki/pkg/util/constants" ) -var _ store = &dummyStore{} +var _ bloomshipper.Store = &dummyStore{} + +func newMockBloomStore(bqs []*bloomshipper.CloseableBlockQuerier, metas []bloomshipper.Meta) *dummyStore { + return &dummyStore{ + querieres: bqs, + metas: metas, + } +} type dummyStore struct { metas []bloomshipper.Meta - blocks []bloomshipper.BlockRef - querieres []bloomshipper.BlockQuerierWithFingerprintRange + querieres []*bloomshipper.CloseableBlockQuerier + + // mock how long it takes to serve block queriers + delay time.Duration + // mock response error when serving block queriers in ForEach + err error } func (s *dummyStore) ResolveMetas(_ context.Context, _ bloomshipper.MetaSearchParams) ([][]bloomshipper.MetaRef, []*bloomshipper.Fetcher, error) { + time.Sleep(s.delay) + //TODO(chaudum) Filter metas based on search params refs := make([]bloomshipper.MetaRef, 0, len(s.metas)) for _, meta := range s.metas { @@ -38,23 +54,28 @@ func (s *dummyStore) FetchMetas(_ context.Context, _ bloomshipper.MetaSearchPara return s.metas, nil } -func (s *dummyStore) FetchBlocks(_ context.Context, _ []bloomshipper.BlockRef) ([]bloomshipper.BlockDirectory, error) { - panic("don't call me") +func (s *dummyStore) Fetcher(_ model.Time) (*bloomshipper.Fetcher, error) { + return nil, nil } -func (s *dummyStore) Fetcher(_ model.Time) *bloomshipper.Fetcher { - return nil +func (s *dummyStore) Client(_ model.Time) (bloomshipper.Client, error) { + return nil, nil } func (s *dummyStore) Stop() { } -func (s *dummyStore) LoadBlocks(_ context.Context, refs []bloomshipper.BlockRef) (v1.Iterator[bloomshipper.BlockQuerierWithFingerprintRange], error) { - result := make([]bloomshipper.BlockQuerierWithFingerprintRange, len(s.querieres)) +func (s *dummyStore) FetchBlocks(_ context.Context, refs []bloomshipper.BlockRef) ([]*bloomshipper.CloseableBlockQuerier, error) { + result := make([]*bloomshipper.CloseableBlockQuerier, 0, len(s.querieres)) + + if s.err != nil { + time.Sleep(s.delay) + return result, s.err + } for _, ref := range refs { for _, bq := range s.querieres { - if ref.Bounds.Equal(bq.FingerprintBounds) { + if ref.Bounds.Equal(bq.Bounds) { result = append(result, bq) } } @@ -64,23 +85,22 @@ func (s *dummyStore) LoadBlocks(_ context.Context, refs []bloomshipper.BlockRef) result[i], result[j] = result[j], result[i] }) - return v1.NewSliceIter(result), nil + time.Sleep(s.delay) + + return result, nil } func TestProcessor(t *testing.T) { ctx := context.Background() tenant := "fake" now := mktime("2024-01-27 12:00") + metrics := newWorkerMetrics(prometheus.NewPedanticRegistry(), constants.Loki, "bloom_gatway") - t.Run("dummy", func(t *testing.T) { - blocks, metas, queriers, data := createBlocks(t, tenant, 10, now.Add(-1*time.Hour), now, 0x0000, 0x1000) - p := &processor{ - store: &dummyStore{ - querieres: queriers, - metas: metas, - blocks: blocks, - }, - } + t.Run("success case", func(t *testing.T) { + _, metas, queriers, data := createBlocks(t, tenant, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + mockStore := newMockBloomStore(queriers, metas) + p := newProcessor("worker", mockStore, log.NewNopLogger(), metrics) chunkRefs := createQueryInputFromBlockData(t, tenant, data, 10) swb := seriesWithBounds{ @@ -117,4 +137,48 @@ func TestProcessor(t *testing.T) { require.NoError(t, err) require.Equal(t, int64(len(swb.series)), results.Load()) }) + + t.Run("failure case", func(t *testing.T) { + _, metas, queriers, data := createBlocks(t, tenant, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + mockStore := newMockBloomStore(queriers, metas) + mockStore.err = errors.New("store failed") + + p := newProcessor("worker", mockStore, log.NewNopLogger(), metrics) + + chunkRefs := createQueryInputFromBlockData(t, tenant, data, 10) + swb := seriesWithBounds{ + series: groupRefs(t, chunkRefs), + bounds: model.Interval{ + Start: now.Add(-1 * time.Hour), + End: now, + }, + day: truncateDay(now), + } + filters := []syntax.LineFilter{ + {Ty: 0, Match: "no match"}, + } + + t.Log("series", len(swb.series)) + task, _ := NewTask(ctx, "fake", swb, filters) + tasks := []Task{task} + + results := atomic.NewInt64(0) + var wg sync.WaitGroup + for i := range tasks { + wg.Add(1) + go func(ta Task) { + defer wg.Done() + for range ta.resCh { + results.Inc() + } + t.Log("done", results.Load()) + }(tasks[i]) + } + + err := p.run(ctx, tasks) + wg.Wait() + require.Errorf(t, err, "store failed") + require.Equal(t, int64(0), results.Load()) + }) } diff --git a/pkg/bloomgateway/sharding.go b/pkg/bloomgateway/sharding.go deleted file mode 100644 index 5dfb9f11732a0..0000000000000 --- a/pkg/bloomgateway/sharding.go +++ /dev/null @@ -1,156 +0,0 @@ -package bloomgateway - -import ( - "context" - - "github.com/go-kit/log" - "github.com/grafana/dskit/ring" - - util_ring "github.com/grafana/loki/pkg/util/ring" -) - -// TODO(chaudum): Replace this placeholder with actual BlockRef struct. -type BlockRef struct { - FromFp, ThroughFp uint64 - FromTs, ThroughTs int64 -} - -var ( - // BlocksOwnerSync is the operation used to check the authoritative owners of a block - // (replicas included). - BlocksOwnerSync = ring.NewOp([]ring.InstanceState{ring.JOINING, ring.ACTIVE, ring.LEAVING}, nil) - - // BlocksOwnerRead is the operation used to check the authoritative owners of a block - // (replicas included) that are available for queries (a bloom gateway is available for - // queries only when ACTIVE). - BlocksOwnerRead = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, nil) - - // BlocksRead is the operation run by the querier to query blocks via the bloom gateway. - BlocksRead = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, func(s ring.InstanceState) bool { - // Blocks can only be queried from ACTIVE instances. However, if the block belongs to - // a non-active instance, then we should extend the replication set and try to query it - // from the next ACTIVE instance in the ring (which is expected to have it because a - // bloom gateway keeps their previously owned blocks until new owners are ACTIVE). - return s != ring.ACTIVE - }) -) - -type ShardingStrategy interface { - // FilterTenants whose indexes should be loaded by the index gateway. - // Returns the list of user IDs that should be synced by the index gateway. - FilterTenants(ctx context.Context, tenantIDs []string) ([]string, error) - FilterBlocks(ctx context.Context, tenantID string, blockRefs []BlockRef) ([]BlockRef, error) -} - -type ShuffleShardingStrategy struct { - util_ring.TenantSharding - r ring.ReadRing - ringLifeCycler *ring.BasicLifecycler - logger log.Logger -} - -func NewShuffleShardingStrategy(r ring.ReadRing, ringLifecycler *ring.BasicLifecycler, limits Limits, logger log.Logger) *ShuffleShardingStrategy { - return &ShuffleShardingStrategy{ - TenantSharding: util_ring.NewTenantShuffleSharding(r, ringLifecycler, limits.BloomGatewayShardSize), - ringLifeCycler: ringLifecycler, - logger: logger, - } -} - -// FilterTenants implements ShardingStrategy. -func (s *ShuffleShardingStrategy) FilterTenants(_ context.Context, tenantIDs []string) ([]string, error) { - // As a protection, ensure the bloom gateway instance is healthy in the ring. It could also be missing - // in the ring if it was failing to heartbeat the ring and it got remove from another healthy bloom gateway - // instance, because of the auto-forget feature. - if set, err := s.r.GetAllHealthy(BlocksOwnerSync); err != nil { - return nil, err - } else if !set.Includes(s.ringLifeCycler.GetInstanceID()) { - return nil, errGatewayUnhealthy - } - - var filteredIDs []string - - for _, tenantID := range tenantIDs { - // Include the user only if it belongs to this bloom gateway shard. - if s.OwnsTenant(tenantID) { - filteredIDs = append(filteredIDs, tenantID) - } - } - - return filteredIDs, nil -} - -// nolint:revive -func getBucket(rangeMin, rangeMax, pos uint64) int { - return 0 -} - -// FilterBlocks implements ShardingStrategy. -func (s *ShuffleShardingStrategy) FilterBlocks(_ context.Context, tenantID string, blockRefs []BlockRef) ([]BlockRef, error) { - if !s.OwnsTenant(tenantID) { - return nil, nil - } - - filteredBlockRefs := make([]BlockRef, 0, len(blockRefs)) - - tenantRing := s.GetTenantSubRing(tenantID) - - fpSharding := util_ring.NewFingerprintShuffleSharding(tenantRing, s.ringLifeCycler, BlocksOwnerSync) - for _, blockRef := range blockRefs { - owns, err := fpSharding.OwnsFingerprint(blockRef.FromFp) - if err != nil { - return nil, err - } - if owns { - filteredBlockRefs = append(filteredBlockRefs, blockRef) - continue - } - - owns, err = fpSharding.OwnsFingerprint(blockRef.ThroughFp) - if err != nil { - return nil, err - } - if owns { - filteredBlockRefs = append(filteredBlockRefs, blockRef) - continue - } - } - - return filteredBlockRefs, nil -} - -// GetShuffleShardingSubring returns the subring to be used for a given user. -// This function should be used both by index gateway servers and clients in -// order to guarantee the same logic is used. -func GetShuffleShardingSubring(ring ring.ReadRing, tenantID string, limits Limits) ring.ReadRing { - shardSize := limits.BloomGatewayShardSize(tenantID) - - // A shard size of 0 means shuffle sharding is disabled for this specific user, - // so we just return the full ring so that indexes will be sharded across all index gateways. - // Since we set the shard size to replication factor if shard size is 0, this - // can only happen if both the shard size and the replication factor are set - // to 0. - if shardSize <= 0 { - return ring - } - - return ring.ShuffleShard(tenantID, shardSize) -} - -// NoopStrategy is an implementation of the ShardingStrategy that does not -// filter anything. -type NoopStrategy struct{} - -func NewNoopStrategy() *NoopStrategy { - return &NoopStrategy{} -} - -// FilterTenants implements ShardingStrategy. -func (s *NoopStrategy) FilterTenants(_ context.Context, tenantIDs []string) ([]string, error) { - return tenantIDs, nil -} - -// FilterBlocks implements ShardingStrategy. -func (s *NoopStrategy) FilterBlocks(_ context.Context, _ string, blockRefs []BlockRef) ([]BlockRef, error) { - return blockRefs, nil -} diff --git a/pkg/bloomgateway/util_test.go b/pkg/bloomgateway/util_test.go index d60ab5f13a190..8fc37f20bac8e 100644 --- a/pkg/bloomgateway/util_test.go +++ b/pkg/bloomgateway/util_test.go @@ -1,8 +1,6 @@ package bloomgateway import ( - "context" - "math/rand" "testing" "time" @@ -295,35 +293,12 @@ func TestPartitionRequest(t *testing.T) { } -func createBlockQueriers(t *testing.T, numBlocks int, from, through model.Time, minFp, maxFp model.Fingerprint) ([]bloomshipper.BlockQuerierWithFingerprintRange, [][]v1.SeriesWithBloom) { - t.Helper() - step := (maxFp - minFp) / model.Fingerprint(numBlocks) - bqs := make([]bloomshipper.BlockQuerierWithFingerprintRange, 0, numBlocks) - series := make([][]v1.SeriesWithBloom, 0, numBlocks) - for i := 0; i < numBlocks; i++ { - fromFp := minFp + (step * model.Fingerprint(i)) - throughFp := fromFp + step - 1 - // last block needs to include maxFp - if i == numBlocks-1 { - throughFp = maxFp - } - blockQuerier, data := v1.MakeBlockQuerier(t, fromFp, throughFp, from, through) - bq := bloomshipper.BlockQuerierWithFingerprintRange{ - BlockQuerier: blockQuerier, - FingerprintBounds: v1.NewBounds(fromFp, throughFp), - } - bqs = append(bqs, bq) - series = append(series, data) - } - return bqs, series -} - -func createBlocks(t *testing.T, tenant string, n int, from, through model.Time, minFp, maxFp model.Fingerprint) ([]bloomshipper.BlockRef, []bloomshipper.Meta, []bloomshipper.BlockQuerierWithFingerprintRange, [][]v1.SeriesWithBloom) { +func createBlocks(t *testing.T, tenant string, n int, from, through model.Time, minFp, maxFp model.Fingerprint) ([]bloomshipper.BlockRef, []bloomshipper.Meta, []*bloomshipper.CloseableBlockQuerier, [][]v1.SeriesWithBloom) { t.Helper() - blocks := make([]bloomshipper.BlockRef, 0, n) + blockRefs := make([]bloomshipper.BlockRef, 0, n) metas := make([]bloomshipper.Meta, 0, n) - queriers := make([]bloomshipper.BlockQuerierWithFingerprintRange, 0, n) + queriers := make([]*bloomshipper.CloseableBlockQuerier, 0, n) series := make([][]v1.SeriesWithBloom, 0, n) step := (maxFp - minFp) / model.Fingerprint(n) @@ -341,7 +316,7 @@ func createBlocks(t *testing.T, tenant string, n int, from, through model.Time, StartTimestamp: from, EndTimestamp: through, } - block := bloomshipper.BlockRef{ + blockRef := bloomshipper.BlockRef{ Ref: ref, } meta := bloomshipper.Meta{ @@ -349,76 +324,26 @@ func createBlocks(t *testing.T, tenant string, n int, from, through model.Time, Ref: ref, }, Tombstones: []bloomshipper.BlockRef{}, - Blocks: []bloomshipper.BlockRef{block}, + Blocks: []bloomshipper.BlockRef{blockRef}, } - blockQuerier, data := v1.MakeBlockQuerier(t, fromFp, throughFp, from, through) - querier := bloomshipper.BlockQuerierWithFingerprintRange{ - BlockQuerier: blockQuerier, - FingerprintBounds: v1.NewBounds(fromFp, throughFp), + block, data, _ := v1.MakeBlock(t, n, fromFp, throughFp, from, through) + // Printing fingerprints and the log lines of its chunks comes handy for debugging... + // for i := range keys { + // t.Log(data[i].Series.Fingerprint) + // for j := range keys[i] { + // t.Log(i, j, string(keys[i][j])) + // } + // } + querier := &bloomshipper.CloseableBlockQuerier{ + BlockQuerier: v1.NewBlockQuerier(block), + BlockRef: blockRef, } queriers = append(queriers, querier) metas = append(metas, meta) - blocks = append(blocks, block) + blockRefs = append(blockRefs, blockRef) series = append(series, data) } - return blocks, metas, queriers, series -} - -func newMockBloomStore(bqs []bloomshipper.BlockQuerierWithFingerprintRange) *mockBloomStore { - return &mockBloomStore{bqs: bqs} -} - -type mockBloomStore struct { - bqs []bloomshipper.BlockQuerierWithFingerprintRange - // mock how long it takes to serve block queriers - delay time.Duration - // mock response error when serving block queriers in ForEach - err error -} - -var _ bloomshipper.Interface = &mockBloomStore{} - -// GetBlockRefs implements bloomshipper.Interface -func (s *mockBloomStore) GetBlockRefs(_ context.Context, tenant string, _ bloomshipper.Interval) ([]bloomshipper.BlockRef, error) { - time.Sleep(s.delay) - blocks := make([]bloomshipper.BlockRef, 0, len(s.bqs)) - for i := range s.bqs { - blocks = append(blocks, bloomshipper.BlockRef{ - Ref: bloomshipper.Ref{ - Bounds: v1.NewBounds(s.bqs[i].Min, s.bqs[i].Max), - TenantID: tenant, - }, - }) - } - return blocks, nil -} - -// Stop implements bloomshipper.Interface -func (s *mockBloomStore) Stop() {} - -// Fetch implements bloomshipper.Interface -func (s *mockBloomStore) Fetch(_ context.Context, _ string, _ []bloomshipper.BlockRef, callback bloomshipper.ForEachBlockCallback) error { - if s.err != nil { - time.Sleep(s.delay) - return s.err - } - - shuffled := make([]bloomshipper.BlockQuerierWithFingerprintRange, len(s.bqs)) - _ = copy(shuffled, s.bqs) - - rand.Shuffle(len(shuffled), func(i, j int) { - shuffled[i], shuffled[j] = shuffled[j], shuffled[i] - }) - - for _, bq := range shuffled { - // ignore errors in the mock - time.Sleep(s.delay) - err := callback(bq.BlockQuerier, bq.FingerprintBounds) - if err != nil { - return err - } - } - return nil + return blockRefs, metas, queriers, series } func createQueryInputFromBlockData(t *testing.T, tenant string, data [][]v1.SeriesWithBloom, nthSeries int) []*logproto.ChunkRef { @@ -443,7 +368,7 @@ func createQueryInputFromBlockData(t *testing.T, tenant string, data [][]v1.Seri return res } -func createBlockRefsFromBlockData(t *testing.T, tenant string, data []bloomshipper.BlockQuerierWithFingerprintRange) []bloomshipper.BlockRef { +func createBlockRefsFromBlockData(t *testing.T, tenant string, data []*bloomshipper.CloseableBlockQuerier) []bloomshipper.BlockRef { t.Helper() res := make([]bloomshipper.BlockRef, 0) for i := range data { @@ -451,7 +376,7 @@ func createBlockRefsFromBlockData(t *testing.T, tenant string, data []bloomshipp Ref: bloomshipper.Ref{ TenantID: tenant, TableName: "", - Bounds: v1.NewBounds(data[i].Min, data[i].Max), + Bounds: v1.NewBounds(data[i].Bounds.Min, data[i].Bounds.Max), StartTimestamp: 0, EndTimestamp: 0, Checksum: 0, diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index 34a01e50c4354..5c57c0a2e4952 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -10,60 +10,76 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/prometheus/common/model" - "golang.org/x/exp/slices" "github.com/grafana/loki/pkg/queue" - v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" ) +const ( + labelSuccess = "success" + labelFailure = "failure" +) + type workerConfig struct { maxItems int } type workerMetrics struct { - dequeuedTasks *prometheus.CounterVec - dequeueErrors *prometheus.CounterVec - dequeueWaitTime *prometheus.SummaryVec - storeAccessLatency *prometheus.HistogramVec - bloomQueryLatency *prometheus.HistogramVec + dequeueDuration *prometheus.HistogramVec + processDuration *prometheus.HistogramVec + metasFetched *prometheus.HistogramVec + blocksFetched *prometheus.HistogramVec + tasksDequeued *prometheus.CounterVec + tasksProcessed *prometheus.CounterVec + blockQueryLatency *prometheus.HistogramVec } func newWorkerMetrics(registerer prometheus.Registerer, namespace, subsystem string) *workerMetrics { labels := []string{"worker"} + r := promauto.With(registerer) return &workerMetrics{ - dequeuedTasks: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + dequeueDuration: r.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "dequeued_tasks_total", - Help: "Total amount of tasks that the worker dequeued from the bloom query queue", + Name: "dequeue_duration_seconds", + Help: "Time spent dequeuing tasks from queue in seconds", }, labels), - dequeueErrors: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + processDuration: r.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "process_duration_seconds", + Help: "Time spent processing tasks in seconds", + }, append(labels, "status")), + metasFetched: r.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "dequeue_errors_total", - Help: "Total amount of failed dequeue operations", + Name: "metas_fetched", + Help: "Amount of metas fetched", }, labels), - dequeueWaitTime: promauto.With(registerer).NewSummaryVec(prometheus.SummaryOpts{ + blocksFetched: r.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "dequeue_wait_time", - Help: "Time spent waiting for dequeuing tasks from queue", + Name: "blocks_fetched", + Help: "Amount of blocks fetched", }, labels), - bloomQueryLatency: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ + tasksDequeued: r.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "bloom_query_latency", - Help: "Latency in seconds of processing bloom blocks", + Name: "tasks_dequeued_total", + Help: "Total amount of tasks that the worker dequeued from the queue", }, append(labels, "status")), - // TODO(chaudum): Move this metric into the bloomshipper - storeAccessLatency: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ + tasksProcessed: r.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "store_latency", - Help: "Latency in seconds of accessing the bloom store component", - }, append(labels, "operation")), + Name: "tasks_processed_total", + Help: "Total amount of tasks that the worker processed", + }, append(labels, "status")), + blockQueryLatency: r.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "block_query_latency", + Help: "Time spent running searches against a bloom block", + }, append(labels, "status")), } } @@ -78,18 +94,18 @@ type worker struct { id string cfg workerConfig queue *queue.RequestQueue - shipper bloomshipper.Interface + store bloomshipper.Store pending *pendingTasks logger log.Logger metrics *workerMetrics } -func newWorker(id string, cfg workerConfig, queue *queue.RequestQueue, shipper bloomshipper.Interface, pending *pendingTasks, logger log.Logger, metrics *workerMetrics) *worker { +func newWorker(id string, cfg workerConfig, queue *queue.RequestQueue, store bloomshipper.Store, pending *pendingTasks, logger log.Logger, metrics *workerMetrics) *worker { w := &worker{ id: id, cfg: cfg, queue: queue, - shipper: shipper, + store: store, pending: pending, logger: log.With(logger, "worker", id), metrics: metrics, @@ -107,17 +123,19 @@ func (w *worker) starting(_ context.Context) error { func (w *worker) running(_ context.Context) error { idx := queue.StartIndexWithLocalQueue + p := newProcessor(w.id, w.store, w.logger, w.metrics) + for st := w.State(); st == services.Running || st == services.Stopping; { taskCtx := context.Background() - dequeueStart := time.Now() + start := time.Now() items, newIdx, err := w.queue.DequeueMany(taskCtx, idx, w.id, w.cfg.maxItems) - w.metrics.dequeueWaitTime.WithLabelValues(w.id).Observe(time.Since(dequeueStart).Seconds()) + w.metrics.dequeueDuration.WithLabelValues(w.id).Observe(time.Since(start).Seconds()) if err != nil { // We only return an error if the queue is stopped and dequeuing did not yield any items if err == queue.ErrStopped && len(items) == 0 { return err } - w.metrics.dequeueErrors.WithLabelValues(w.id).Inc() + w.metrics.tasksDequeued.WithLabelValues(w.id, labelFailure).Inc() level.Error(w.logger).Log("msg", "failed to dequeue tasks", "err", err, "items", len(items)) } idx = newIdx @@ -126,10 +144,9 @@ func (w *worker) running(_ context.Context) error { w.queue.ReleaseRequests(items) continue } - w.metrics.dequeuedTasks.WithLabelValues(w.id).Add(float64(len(items))) - - tasksPerDay := make(map[model.Time][]Task) + w.metrics.tasksDequeued.WithLabelValues(w.id, labelSuccess).Add(float64(len(items))) + tasks := make([]Task, 0, len(items)) for _, item := range items { task, ok := item.(Task) if !ok { @@ -139,91 +156,19 @@ func (w *worker) running(_ context.Context) error { } level.Debug(w.logger).Log("msg", "dequeued task", "task", task.ID) w.pending.Delete(task.ID) - - tasksPerDay[task.day] = append(tasksPerDay[task.day], task) + tasks = append(tasks, task) } - for day, tasks := range tasksPerDay { - - // Remove tasks that are already cancelled - tasks = slices.DeleteFunc(tasks, func(t Task) bool { - if res := t.ctx.Err(); res != nil { - t.CloseWithError(res) - return true - } - return false - }) - // no tasks to process - // continue with tasks of next day - if len(tasks) == 0 { - continue - } - - // interval is [Start, End) - interval := bloomshipper.NewInterval(day, day.Add(Day)) - logger := log.With(w.logger, "day", day.Time(), "tenant", tasks[0].Tenant) - level.Debug(logger).Log("msg", "process tasks", "tasks", len(tasks)) - - storeFetchStart := time.Now() - blockRefs, err := w.shipper.GetBlockRefs(taskCtx, tasks[0].Tenant, interval) - w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockRefs").Observe(time.Since(storeFetchStart).Seconds()) - if err != nil { - for _, t := range tasks { - t.CloseWithError(err) - } - // continue with tasks of next day - continue - } - if len(tasks) == 0 { - continue - } - - // No blocks found. - // Since there are no blocks for the given tasks, we need to return the - // unfiltered list of chunk refs. - if len(blockRefs) == 0 { - level.Warn(logger).Log("msg", "no blocks found") - for _, t := range tasks { - t.Close() - } - // continue with tasks of next day - continue - } - - // Remove tasks that are already cancelled - tasks = slices.DeleteFunc(tasks, func(t Task) bool { - if res := t.ctx.Err(); res != nil { - t.CloseWithError(res) - return true - } - return false - }) - // no tasks to process - // continue with tasks of next day - if len(tasks) == 0 { - continue - } - - tasksForBlocks := partitionFingerprintRange(tasks, blockRefs) - blockRefs = blockRefs[:0] - for _, b := range tasksForBlocks { - blockRefs = append(blockRefs, b.blockRef) - } + start = time.Now() + err = p.run(taskCtx, tasks) - err = w.processBlocksWithCallback(taskCtx, tasks[0].Tenant, blockRefs, tasksForBlocks) - if err != nil { - for _, t := range tasks { - t.CloseWithError(err) - } - // continue with tasks of next day - continue - } - - // all tasks for this day are done. - // close them to notify the request handler - for _, task := range tasks { - task.Close() - } + if err != nil { + w.metrics.processDuration.WithLabelValues(w.id, labelSuccess).Observe(time.Since(start).Seconds()) + w.metrics.tasksProcessed.WithLabelValues(w.id, labelFailure).Add(float64(len(tasks))) + level.Error(w.logger).Log("msg", "failed to process tasks", "err", err) + } else { + w.metrics.processDuration.WithLabelValues(w.id, labelSuccess).Observe(time.Since(start).Seconds()) + w.metrics.tasksProcessed.WithLabelValues(w.id, labelSuccess).Add(float64(len(tasks))) } // return dequeued items back to the pool @@ -238,41 +183,3 @@ func (w *worker) stopping(err error) error { w.queue.UnregisterConsumerConnection(w.id) return nil } - -func (w *worker) processBlocksWithCallback(taskCtx context.Context, tenant string, blockRefs []bloomshipper.BlockRef, boundedRefs []boundedTasks) error { - return w.shipper.Fetch(taskCtx, tenant, blockRefs, func(bq *v1.BlockQuerier, bounds v1.FingerprintBounds) error { - for _, b := range boundedRefs { - if b.blockRef.Bounds.Equal(bounds) { - return w.processBlock(bq, b.tasks) - } - } - return nil - }) -} - -func (w *worker) processBlock(blockQuerier *v1.BlockQuerier, tasks []Task) error { - schema, err := blockQuerier.Schema() - if err != nil { - return err - } - - tokenizer := v1.NewNGramTokenizer(schema.NGramLen(), 0) - iters := make([]v1.PeekingIterator[v1.Request], 0, len(tasks)) - for _, task := range tasks { - it := v1.NewPeekingIter(task.RequestIter(tokenizer)) - iters = append(iters, it) - } - fq := blockQuerier.Fuse(iters) - - start := time.Now() - err = fq.Run() - duration := time.Since(start).Seconds() - - if err != nil { - w.metrics.bloomQueryLatency.WithLabelValues(w.id, "failure").Observe(duration) - return err - } - - w.metrics.bloomQueryLatency.WithLabelValues(w.id, "success").Observe(duration) - return nil -} diff --git a/pkg/logql/accumulator.go b/pkg/logql/accumulator.go new file mode 100644 index 0000000000000..9e9784cb037ef --- /dev/null +++ b/pkg/logql/accumulator.go @@ -0,0 +1,379 @@ +package logql + +import ( + "container/heap" + "context" + "fmt" + "sort" + "time" + + "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/pkg/logqlmodel" + "github.com/grafana/loki/pkg/logqlmodel/metadata" + "github.com/grafana/loki/pkg/logqlmodel/stats" + "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase/definitions" + "github.com/grafana/loki/pkg/util/math" +) + +// NewBufferedAccumulator returns an accumulator which aggregates all query +// results in a slice. This is useful for metric queries, which are generally +// small payloads and the memory overhead for buffering is negligible. +func NewBufferedAccumulator(n int) *BufferedAccumulator { + return &BufferedAccumulator{ + results: make([]logqlmodel.Result, n), + } +} + +type BufferedAccumulator struct { + results []logqlmodel.Result +} + +func (a *BufferedAccumulator) Accumulate(_ context.Context, acc logqlmodel.Result, i int) error { + a.results[i] = acc + return nil +} + +func (a *BufferedAccumulator) Result() []logqlmodel.Result { + return a.results +} + +type QuantileSketchAccumulator struct { + matrix ProbabilisticQuantileMatrix +} + +// newQuantileSketchAccumulator returns an accumulator for sharded +// probabilistic quantile queries that merges results as they come in. +func newQuantileSketchAccumulator() *QuantileSketchAccumulator { + return &QuantileSketchAccumulator{} +} + +func (a *QuantileSketchAccumulator) Accumulate(_ context.Context, res logqlmodel.Result, _ int) error { + if res.Data.Type() != QuantileSketchMatrixType { + return fmt.Errorf("unexpected matrix data type: got (%s), want (%s)", res.Data.Type(), QuantileSketchMatrixType) + } + data, ok := res.Data.(ProbabilisticQuantileMatrix) + if !ok { + return fmt.Errorf("unexpected matrix type: got (%T), want (ProbabilisticQuantileMatrix)", res.Data) + } + if a.matrix == nil { + a.matrix = data + return nil + } + + var err error + a.matrix, err = a.matrix.Merge(data) + return err +} + +func (a *QuantileSketchAccumulator) Result() []logqlmodel.Result { + return []logqlmodel.Result{{Data: a.matrix}} +} + +// heap impl for keeping only the top n results across m streams +// importantly, AccumulatedStreams is _bounded_, so it will only +// store the top `limit` results across all streams. +// To implement this, we use a min-heap when looking +// for the max values (logproto.FORWARD) +// and vice versa for logproto.BACKWARD. +// This allows us to easily find the 'worst' value +// and replace it with a better one. +// Once we've fully processed all log lines, +// we return the heap in opposite order and then reverse it +// to get the correct order. +// Heap implements container/heap.Interface +// solely to use heap.Interface as a library. +// It is not intended for the heap pkg functions +// to otherwise call this type. +type AccumulatedStreams struct { + count, limit int + labelmap map[string]int + streams []*logproto.Stream + order logproto.Direction + + stats stats.Result // for accumulating statistics from downstream requests + headers map[string][]string // for accumulating headers from downstream requests +} + +// NewStreamAccumulator returns an accumulator for limited log queries. +// Log queries, sharded thousands of times and each returning +// results, can be _considerably_ larger. In this case, we eagerly +// accumulate the results into a logsAccumulator, discarding values +// over the limit to keep memory pressure down while other subqueries +// are executing. +func NewStreamAccumulator(params Params) *AccumulatedStreams { + // the stream accumulator stores a heap with reversed order + // from the results we expect, so we need to reverse the direction + order := logproto.FORWARD + if params.Direction() == logproto.FORWARD { + order = logproto.BACKWARD + } + + return &AccumulatedStreams{ + labelmap: make(map[string]int), + order: order, + limit: int(params.Limit()), + + headers: make(map[string][]string), + } +} + +// returns the top priority +func (acc *AccumulatedStreams) top() (time.Time, bool) { + if len(acc.streams) > 0 && len(acc.streams[0].Entries) > 0 { + return acc.streams[0].Entries[len(acc.streams[0].Entries)-1].Timestamp, true + } + return time.Time{}, false +} + +func (acc *AccumulatedStreams) Find(labels string) (int, bool) { + i, ok := acc.labelmap[labels] + return i, ok +} + +// number of streams +func (acc *AccumulatedStreams) Len() int { return len(acc.streams) } + +func (acc *AccumulatedStreams) Swap(i, j int) { + // for i=0, j=1 + + // {'a': 0, 'b': 1} + // [a, b] + acc.streams[i], acc.streams[j] = acc.streams[j], acc.streams[i] + // {'a': 0, 'b': 1} + // [b, a] + acc.labelmap[acc.streams[i].Labels] = i + acc.labelmap[acc.streams[j].Labels] = j + // {'a': 1, 'b': 0} + // [b, a] +} + +// first order by timestamp, then by labels +func (acc *AccumulatedStreams) Less(i, j int) bool { + // order by the 'oldest' entry in the stream + if a, b := acc.streams[i].Entries[len(acc.streams[i].Entries)-1].Timestamp, acc.streams[j].Entries[len(acc.streams[j].Entries)-1].Timestamp; !a.Equal(b) { + return acc.less(a, b) + } + return acc.streams[i].Labels <= acc.streams[j].Labels +} + +func (acc *AccumulatedStreams) less(a, b time.Time) bool { + // use after for stable sort + if acc.order == logproto.FORWARD { + return !a.After(b) + } + return !b.After(a) +} + +func (acc *AccumulatedStreams) Push(x any) { + s := x.(*logproto.Stream) + if len(s.Entries) == 0 { + return + } + + if room := acc.limit - acc.count; room >= len(s.Entries) { + if i, ok := acc.Find(s.Labels); ok { + // stream already exists, append entries + + // these are already guaranteed to be sorted + // Reasoning: we shard subrequests so each stream exists on only one + // shard. Therefore, the only time a stream should already exist + // is in successive splits, which are already guaranteed to be ordered + // and we can just append. + acc.appendTo(acc.streams[i], s) + + return + } + + // new stream + acc.addStream(s) + return + } + + // there's not enough room for all the entries, + // so we need to + acc.push(s) +} + +// there's not enough room for all the entries. +// since we store them in a reverse heap relative to what we _want_ +// (i.e. the max value for FORWARD, the min value for BACKWARD), +// we test if the new entry is better than the worst entry, +// swapping them if so. +func (acc *AccumulatedStreams) push(s *logproto.Stream) { + worst, ok := acc.top() + room := math.Min(acc.limit-acc.count, len(s.Entries)) + + if !ok { + if room == 0 { + // special case: limit must be zero since there's no room and no worst entry + return + } + s.Entries = s.Entries[:room] + // special case: there are no entries in the heap. Push entries up to the limit + acc.addStream(s) + return + } + + // since entries are sorted by timestamp from best -> worst, + // we can discard the entire stream if the incoming best entry + // is worse than the worst entry in the heap. + cutoff := sort.Search(len(s.Entries), func(i int) bool { + // TODO(refactor label comparison -- should be in another fn) + if worst.Equal(s.Entries[i].Timestamp) { + return acc.streams[0].Labels < s.Labels + } + return acc.less(s.Entries[i].Timestamp, worst) + }) + s.Entries = s.Entries[:cutoff] + + for i := 0; i < len(s.Entries) && acc.less(worst, s.Entries[i].Timestamp); i++ { + + // push one entry at a time + room = acc.limit - acc.count + // pop if there's no room to make the heap small enough for an append; + // in the short path of Push() we know that there's room for at least one entry + if room == 0 { + acc.Pop() + } + + cpy := *s + cpy.Entries = []logproto.Entry{s.Entries[i]} + acc.Push(&cpy) + + // update worst + worst, _ = acc.top() + } +} + +func (acc *AccumulatedStreams) addStream(s *logproto.Stream) { + // ensure entries conform to order we expect + // TODO(owen-d): remove? should be unnecessary since we insert in appropriate order + // but it's nice to have the safeguard + sort.Slice(s.Entries, func(i, j int) bool { + return acc.less(s.Entries[j].Timestamp, s.Entries[i].Timestamp) + }) + + acc.streams = append(acc.streams, s) + i := len(acc.streams) - 1 + acc.labelmap[s.Labels] = i + acc.count += len(s.Entries) + heap.Fix(acc, i) +} + +// dst must already exist in acc +func (acc *AccumulatedStreams) appendTo(dst, src *logproto.Stream) { + // these are already guaranteed to be sorted + // Reasoning: we shard subrequests so each stream exists on only one + // shard. Therefore, the only time a stream should already exist + // is in successive splits, which are already guaranteed to be ordered + // and we can just append. + + var needsSort bool + for _, e := range src.Entries { + // sort if order has broken + if len(dst.Entries) > 0 && acc.less(dst.Entries[len(dst.Entries)-1].Timestamp, e.Timestamp) { + needsSort = true + } + dst.Entries = append(dst.Entries, e) + } + + if needsSort { + sort.Slice(dst.Entries, func(i, j int) bool { + // store in reverse order so we can more reliably insert without sorting and pop from end + return acc.less(dst.Entries[j].Timestamp, dst.Entries[i].Timestamp) + }) + } + + acc.count += len(src.Entries) + heap.Fix(acc, acc.labelmap[dst.Labels]) + +} + +// Pop returns a stream with one entry. It pops the first entry of the first stream +func (acc *AccumulatedStreams) Pop() any { + n := acc.Len() + if n == 0 { + return nil + } + + stream := acc.streams[0] + cpy := *stream + cpy.Entries = []logproto.Entry{cpy.Entries[len(stream.Entries)-1]} + stream.Entries = stream.Entries[:len(stream.Entries)-1] + + acc.count-- + + if len(stream.Entries) == 0 { + // remove stream + acc.Swap(0, n-1) + acc.streams[n-1] = nil // avoid leaking reference + delete(acc.labelmap, stream.Labels) + acc.streams = acc.streams[:n-1] + + } + + if acc.Len() > 0 { + heap.Fix(acc, 0) + } + + return &cpy +} + +// Note: can only be called once as it will alter stream ordreing. +func (acc *AccumulatedStreams) Result() []logqlmodel.Result { + // sort streams by label + sort.Slice(acc.streams, func(i, j int) bool { + return acc.streams[i].Labels < acc.streams[j].Labels + }) + + streams := make(logqlmodel.Streams, 0, len(acc.streams)) + + for _, s := range acc.streams { + // sort entries by timestamp, inversely based on direction + sort.Slice(s.Entries, func(i, j int) bool { + return acc.less(s.Entries[j].Timestamp, s.Entries[i].Timestamp) + }) + streams = append(streams, *s) + } + + res := logqlmodel.Result{ + // stats & headers are already aggregated in the context + Data: streams, + Statistics: acc.stats, + Headers: make([]*definitions.PrometheusResponseHeader, 0, len(acc.headers)), + } + + for name, vals := range acc.headers { + res.Headers = append( + res.Headers, + &definitions.PrometheusResponseHeader{ + Name: name, + Values: vals, + }, + ) + } + + return []logqlmodel.Result{res} +} + +func (acc *AccumulatedStreams) Accumulate(_ context.Context, x logqlmodel.Result, _ int) error { + // TODO(owen-d/ewelch): Shard counts should be set by the querier + // so we don't have to do it in tricky ways in multiple places. + // See pkg/logql/downstream.go:DownstreamEvaluator.Downstream + // for another example. + if x.Statistics.Summary.Shards == 0 { + x.Statistics.Summary.Shards = 1 + } + acc.stats.Merge(x.Statistics) + metadata.ExtendHeaders(acc.headers, x.Headers) + + switch got := x.Data.(type) { + case logqlmodel.Streams: + for i := range got { + acc.Push(&got[i]) + } + default: + return fmt.Errorf("unexpected response type during response result accumulation. Got (%T), wanted %s", got, logqlmodel.ValueTypeStreams) + } + return nil +} diff --git a/pkg/logql/accumulator_test.go b/pkg/logql/accumulator_test.go new file mode 100644 index 0000000000000..d827e3ea02e71 --- /dev/null +++ b/pkg/logql/accumulator_test.go @@ -0,0 +1,273 @@ +package logql + +import ( + "context" + "fmt" + "math/rand" + "testing" + "time" + + "github.com/prometheus/prometheus/model/labels" + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/pkg/logql/sketch" + "github.com/grafana/loki/pkg/logqlmodel" +) + +func TestAccumulatedStreams(t *testing.T) { + lim := 30 + nStreams := 10 + start, end := 0, 10 + // for a logproto.BACKWARD query, we use a min heap based on FORWARD + // to store the _earliest_ timestamp of the _latest_ entries, up to `limit` + xs := newStreams(time.Unix(int64(start), 0), time.Unix(int64(end), 0), time.Second, nStreams, logproto.BACKWARD) + acc := NewStreamAccumulator(LiteralParams{ + direction: logproto.BACKWARD, + limit: uint32(lim), + }) + for _, x := range xs { + acc.Push(x) + } + + for i := 0; i < lim; i++ { + got := acc.Pop().(*logproto.Stream) + require.Equal(t, fmt.Sprintf(`{n="%d"}`, i%nStreams), got.Labels) + exp := (nStreams*(end-start) - lim + i) / nStreams + require.Equal(t, time.Unix(int64(exp), 0), got.Entries[0].Timestamp) + } + +} + +func TestDownstreamAccumulatorSimple(t *testing.T) { + lim := 30 + start, end := 0, 10 + direction := logproto.BACKWARD + + streams := newStreams(time.Unix(int64(start), 0), time.Unix(int64(end), 0), time.Second, 10, direction) + x := make(logqlmodel.Streams, 0, len(streams)) + for _, s := range streams { + x = append(x, *s) + } + // dummy params. Only need to populate direction & limit + params, err := NewLiteralParams( + `{app="foo"}`, time.Time{}, time.Time{}, 0, 0, direction, uint32(lim), nil, + ) + require.NoError(t, err) + + acc := NewStreamAccumulator(params) + result := logqlmodel.Result{ + Data: x, + } + + require.Nil(t, acc.Accumulate(context.Background(), result, 0)) + + res := acc.Result()[0] + got, ok := res.Data.(logqlmodel.Streams) + require.Equal(t, true, ok) + require.Equal(t, 10, len(got), "correct number of streams") + + // each stream should have the top 3 entries + for i := 0; i < 10; i++ { + require.Equal(t, 3, len(got[i].Entries), "correct number of entries in stream") + for j := 0; j < 3; j++ { + require.Equal(t, time.Unix(int64(9-j), 0), got[i].Entries[j].Timestamp, "correct timestamp") + } + } +} + +// TestDownstreamAccumulatorMultiMerge simulates merging multiple +// sub-results from different queries. +func TestDownstreamAccumulatorMultiMerge(t *testing.T) { + for _, direction := range []logproto.Direction{logproto.BACKWARD, logproto.FORWARD} { + t.Run(direction.String(), func(t *testing.T) { + nQueries := 10 + delta := 10 // 10 entries per stream, 1s apart + streamsPerQuery := 10 + lim := 30 + + payloads := make([]logqlmodel.Streams, 0, nQueries) + for i := 0; i < nQueries; i++ { + start := i * delta + end := start + delta + streams := newStreams(time.Unix(int64(start), 0), time.Unix(int64(end), 0), time.Second, streamsPerQuery, direction) + var res logqlmodel.Streams + for i := range streams { + res = append(res, *streams[i]) + } + payloads = append(payloads, res) + + } + + // queries are always dispatched in the correct order. + // oldest time ranges first in the case of logproto.FORWARD + // and newest time ranges first in the case of logproto.BACKWARD + if direction == logproto.BACKWARD { + for i, j := 0, len(payloads)-1; i < j; i, j = i+1, j-1 { + payloads[i], payloads[j] = payloads[j], payloads[i] + } + } + + // dummy params. Only need to populate direction & limit + params, err := NewLiteralParams( + `{app="foo"}`, time.Time{}, time.Time{}, 0, 0, direction, uint32(lim), nil, + ) + require.NoError(t, err) + + acc := NewStreamAccumulator(params) + for i := 0; i < nQueries; i++ { + err := acc.Accumulate(context.Background(), logqlmodel.Result{ + Data: payloads[i], + }, i) + require.Nil(t, err) + } + + got, ok := acc.Result()[0].Data.(logqlmodel.Streams) + require.Equal(t, true, ok) + require.Equal(t, int64(nQueries), acc.Result()[0].Statistics.Summary.Shards) + + // each stream should have the top 3 entries + for i := 0; i < streamsPerQuery; i++ { + stream := got[i] + require.Equal(t, fmt.Sprintf(`{n="%d"}`, i), stream.Labels, "correct labels") + ln := lim / streamsPerQuery + require.Equal(t, ln, len(stream.Entries), "correct number of entries in stream") + switch direction { + case logproto.BACKWARD: + for i := 0; i < ln; i++ { + offset := delta*nQueries - 1 - i + require.Equal(t, time.Unix(int64(offset), 0), stream.Entries[i].Timestamp, "correct timestamp") + } + default: + for i := 0; i < ln; i++ { + offset := i + require.Equal(t, time.Unix(int64(offset), 0), stream.Entries[i].Timestamp, "correct timestamp") + } + } + } + }) + } +} + +func BenchmarkAccumulator(b *testing.B) { + + // dummy params. Only need to populate direction & limit + lim := 30 + params, err := NewLiteralParams( + `{app="foo"}`, time.Time{}, time.Time{}, 0, 0, logproto.BACKWARD, uint32(lim), nil, + ) + require.NoError(b, err) + + for acc, tc := range map[string]struct { + results []logqlmodel.Result + newAcc func(Params, []logqlmodel.Result) Accumulator + params Params + }{ + "streams": { + newStreamResults(), + func(p Params, _ []logqlmodel.Result) Accumulator { + return NewStreamAccumulator(p) + }, + params, + }, + "quantile sketches": { + newQuantileSketchResults(), + func(p Params, _ []logqlmodel.Result) Accumulator { + return newQuantileSketchAccumulator() + }, + params, + }, + } { + b.Run(acc, func(b *testing.B) { + b.ResetTimer() + b.ReportAllocs() + for n := 0; n < b.N; n++ { + + acc := tc.newAcc(params, tc.results) + for i, r := range tc.results { + err := acc.Accumulate(context.Background(), r, i) + require.Nil(b, err) + } + + acc.Result() + } + }) + } +} + +func newStreamResults() []logqlmodel.Result { + nQueries := 50 + delta := 100 // 10 entries per stream, 1s apart + streamsPerQuery := 50 + + results := make([]logqlmodel.Result, nQueries) + for i := 0; i < nQueries; i++ { + start := i * delta + end := start + delta + streams := newStreams(time.Unix(int64(start), 0), time.Unix(int64(end), 0), time.Second, streamsPerQuery, logproto.BACKWARD) + var res logqlmodel.Streams + for i := range streams { + res = append(res, *streams[i]) + } + results[i] = logqlmodel.Result{Data: res} + + } + + return results +} + +func newQuantileSketchResults() []logqlmodel.Result { + results := make([]logqlmodel.Result, 100) + + for r := range results { + vectors := make([]ProbabilisticQuantileVector, 10) + for i := range vectors { + vectors[i] = make(ProbabilisticQuantileVector, 10) + for j := range vectors[i] { + vectors[i][j] = ProbabilisticQuantileSample{ + T: int64(i), + F: newRandomSketch(), + Metric: []labels.Label{{Name: "foo", Value: fmt.Sprintf("bar-%d", j)}}, + } + } + } + results[r] = logqlmodel.Result{Data: ProbabilisticQuantileMatrix(vectors)} + } + + return results +} + +func newStreamWithDirection(start, end time.Time, delta time.Duration, ls string, direction logproto.Direction) *logproto.Stream { + s := &logproto.Stream{ + Labels: ls, + } + for t := start; t.Before(end); t = t.Add(delta) { + s.Entries = append(s.Entries, logproto.Entry{ + Timestamp: t, + Line: fmt.Sprintf("%d", t.Unix()), + }) + } + if direction == logproto.BACKWARD { + // simulate data coming in reverse order (logproto.BACKWARD) + for i, j := 0, len(s.Entries)-1; i < j; i, j = i+1, j-1 { + s.Entries[i], s.Entries[j] = s.Entries[j], s.Entries[i] + } + } + return s +} + +func newStreams(start, end time.Time, delta time.Duration, n int, direction logproto.Direction) (res []*logproto.Stream) { + for i := 0; i < n; i++ { + res = append(res, newStreamWithDirection(start, end, delta, fmt.Sprintf(`{n="%d"}`, i), direction)) + } + return res +} + +func newRandomSketch() sketch.QuantileSketch { + r := rand.New(rand.NewSource(42)) + s := sketch.NewDDSketch() + for i := 0; i < 1000; i++ { + _ = s.Add(r.Float64()) + } + return s +} diff --git a/pkg/logql/downstream.go b/pkg/logql/downstream.go index 76594dc040c22..33d945f11b923 100644 --- a/pkg/logql/downstream.go +++ b/pkg/logql/downstream.go @@ -83,6 +83,29 @@ func (d DownstreamSampleExpr) String() string { return fmt.Sprintf("downstream<%s, shard=%s>", d.SampleExpr.String(), d.shard) } +// The DownstreamSampleExpr is not part of LogQL. In the prettified version it's +// represented as e.g. `downstream` +func (d DownstreamSampleExpr) Pretty(level int) string { + s := syntax.Indent(level) + if !syntax.NeedSplit(d) { + return s + d.String() + } + + s += "downstream<\n" + + s += d.SampleExpr.Pretty(level + 1) + s += ",\n" + s += syntax.Indent(level+1) + "shard=" + if d.shard != nil { + s += d.shard.String() + "\n" + } else { + s += "nil\n" + } + + s += syntax.Indent(level) + ">" + return s +} + // DownstreamLogSelectorExpr is a LogSelectorExpr which signals downstream computation type DownstreamLogSelectorExpr struct { shard *astmapper.ShardAnnotation @@ -93,6 +116,29 @@ func (d DownstreamLogSelectorExpr) String() string { return fmt.Sprintf("downstream<%s, shard=%s>", d.LogSelectorExpr.String(), d.shard) } +// The DownstreamLogSelectorExpr is not part of LogQL. In the prettified version it's +// represented as e.g. `downstream<{foo="bar"} |= "error", shard=1_of_3>` +func (d DownstreamLogSelectorExpr) Pretty(level int) string { + s := syntax.Indent(level) + if !syntax.NeedSplit(d) { + return s + d.String() + } + + s += "downstream<\n" + + s += d.LogSelectorExpr.Pretty(level + 1) + s += ",\n" + s += syntax.Indent(level+1) + "shard=" + if d.shard != nil { + s += d.shard.String() + "\n" + } else { + s += "nil\n" + } + + s += syntax.Indent(level) + ">" + return s +} + func (d DownstreamSampleExpr) Walk(f syntax.WalkFn) { f(d) } var defaultMaxDepth = 4 @@ -105,7 +151,7 @@ type ConcatSampleExpr struct { next *ConcatSampleExpr } -func (c ConcatSampleExpr) String() string { +func (c *ConcatSampleExpr) String() string { if c.next == nil { return c.DownstreamSampleExpr.String() } @@ -115,7 +161,7 @@ func (c ConcatSampleExpr) String() string { // in order to not display huge queries with thousands of shards, // we can limit the number of stringified subqueries. -func (c ConcatSampleExpr) string(maxDepth int) string { +func (c *ConcatSampleExpr) string(maxDepth int) string { if c.next == nil { return c.DownstreamSampleExpr.String() } @@ -125,18 +171,46 @@ func (c ConcatSampleExpr) string(maxDepth int) string { return fmt.Sprintf("%s ++ %s", c.DownstreamSampleExpr.String(), c.next.string(maxDepth-1)) } -func (c ConcatSampleExpr) Walk(f syntax.WalkFn) { +func (c *ConcatSampleExpr) Walk(f syntax.WalkFn) { f(c) f(c.next) } +// ConcatSampleExpr has no LogQL repretenstation. It is expressed in in the +// prettified version as e.g. `concat(downstream ++ )` +func (c *ConcatSampleExpr) Pretty(level int) string { + s := syntax.Indent(level) + if !syntax.NeedSplit(c) { + return s + c.String() + } + + s += "concat(\n" + + head := c + for i := 0; i < defaultMaxDepth && head != nil; i++ { + if i > 0 { + s += syntax.Indent(level+1) + "++\n" + } + s += head.DownstreamSampleExpr.Pretty(level + 1) + s += "\n" + head = head.next + } + // There are more downstream samples... + if head != nil { + s += syntax.Indent(level+1) + "++ ...\n" + } + s += syntax.Indent(level) + ")" + + return s +} + // ConcatLogSelectorExpr is an expr for concatenating multiple LogSelectorExpr type ConcatLogSelectorExpr struct { DownstreamLogSelectorExpr next *ConcatLogSelectorExpr } -func (c ConcatLogSelectorExpr) String() string { +func (c *ConcatLogSelectorExpr) String() string { if c.next == nil { return c.DownstreamLogSelectorExpr.String() } @@ -146,7 +220,7 @@ func (c ConcatLogSelectorExpr) String() string { // in order to not display huge queries with thousands of shards, // we can limit the number of stringified subqueries. -func (c ConcatLogSelectorExpr) string(maxDepth int) string { +func (c *ConcatLogSelectorExpr) string(maxDepth int) string { if c.next == nil { return c.DownstreamLogSelectorExpr.String() } @@ -156,6 +230,34 @@ func (c ConcatLogSelectorExpr) string(maxDepth int) string { return fmt.Sprintf("%s ++ %s", c.DownstreamLogSelectorExpr.String(), c.next.string(maxDepth-1)) } +// ConcatLogSelectorExpr has no representation in LogQL. Its prettified version +// is e.g. `concat(downstream<{foo="bar"} |= "error", shard=1_of_3>)` +func (c *ConcatLogSelectorExpr) Pretty(level int) string { + s := syntax.Indent(level) + if !syntax.NeedSplit(c) { + return s + c.String() + } + + s += "concat(\n" + + head := c + for i := 0; i < defaultMaxDepth && head != nil; i++ { + if i > 0 { + s += syntax.Indent(level+1) + "++\n" + } + s += head.DownstreamLogSelectorExpr.Pretty(level + 1) + s += "\n" + head = head.next + } + // There are more downstream samples... + if head != nil { + s += syntax.Indent(level+1) + "++ ...\n" + } + s += ")" + + return s +} + // QuantileSketchEvalExpr evaluates a quantile sketch to the actual quantile. type QuantileSketchEvalExpr struct { syntax.SampleExpr @@ -244,7 +346,13 @@ type Resp struct { // Downstreamer is an interface for deferring responsibility for query execution. // It is decoupled from but consumed by a downStreamEvaluator to dispatch ASTs. type Downstreamer interface { - Downstream(context.Context, []DownstreamQuery) ([]logqlmodel.Result, error) + Downstream(context.Context, []DownstreamQuery, Accumulator) ([]logqlmodel.Result, error) +} + +// Accumulator is an interface for accumulating query results. +type Accumulator interface { + Accumulate(context.Context, logqlmodel.Result, int) error + Result() []logqlmodel.Result } // DownstreamEvaluator is an evaluator which handles shard aware AST nodes @@ -254,8 +362,8 @@ type DownstreamEvaluator struct { } // Downstream runs queries and collects stats from the embedded Downstreamer -func (ev DownstreamEvaluator) Downstream(ctx context.Context, queries []DownstreamQuery) ([]logqlmodel.Result, error) { - results, err := ev.Downstreamer.Downstream(ctx, queries) +func (ev DownstreamEvaluator) Downstream(ctx context.Context, queries []DownstreamQuery, acc Accumulator) ([]logqlmodel.Result, error) { + results, err := ev.Downstreamer.Downstream(ctx, queries, acc) if err != nil { return nil, err } @@ -314,12 +422,13 @@ func (ev *DownstreamEvaluator) NewStepEvaluator( if e.shard != nil { shards = append(shards, *e.shard) } + acc := NewBufferedAccumulator(1) results, err := ev.Downstream(ctx, []DownstreamQuery{{ Params: ParamsWithShardsOverride{ Params: ParamsWithExpressionOverride{Params: params, ExpressionOverride: e.SampleExpr}, ShardsOverride: Shards(shards).Encode(), }, - }}) + }}, acc) if err != nil { return nil, err } @@ -339,7 +448,8 @@ func (ev *DownstreamEvaluator) NewStepEvaluator( cur = cur.next } - results, err := ev.Downstream(ctx, queries) + acc := NewBufferedAccumulator(len(queries)) + results, err := ev.Downstream(ctx, queries, acc) if err != nil { return nil, err } @@ -379,7 +489,8 @@ func (ev *DownstreamEvaluator) NewStepEvaluator( } } - results, err := ev.Downstream(ctx, queries) + acc := newQuantileSketchAccumulator() + results, err := ev.Downstream(ctx, queries, acc) if err != nil { return nil, err } @@ -413,12 +524,13 @@ func (ev *DownstreamEvaluator) NewIterator( if e.shard != nil { shards = append(shards, *e.shard) } + acc := NewStreamAccumulator(params) results, err := ev.Downstream(ctx, []DownstreamQuery{{ Params: ParamsWithShardsOverride{ Params: ParamsWithExpressionOverride{Params: params, ExpressionOverride: e.LogSelectorExpr}, ShardsOverride: shards.Encode(), }, - }}) + }}, acc) if err != nil { return nil, err } @@ -438,7 +550,8 @@ func (ev *DownstreamEvaluator) NewIterator( cur = cur.next } - results, err := ev.Downstream(ctx, queries) + acc := NewStreamAccumulator(params) + results, err := ev.Downstream(ctx, queries, acc) if err != nil { return nil, err } diff --git a/pkg/logql/downstream_test.go b/pkg/logql/downstream_test.go index 426722a554594..ec5f3170468d0 100644 --- a/pkg/logql/downstream_test.go +++ b/pkg/logql/downstream_test.go @@ -8,12 +8,14 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/user" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql/syntax" + "github.com/grafana/loki/pkg/querier/astmapper" ) var nilShardMetrics = NewShardMapperMetrics(nil) @@ -543,3 +545,142 @@ func relativeError(t *testing.T, expected, actual promql.Matrix, alpha float64) require.InEpsilonSlice(t, e, a, alpha) } } + +func TestFormat_ShardedExpr(t *testing.T) { + oldMax := syntax.MaxCharsPerLine + syntax.MaxCharsPerLine = 20 + + oldDefaultDepth := defaultMaxDepth + defaultMaxDepth = 2 + defer func() { + syntax.MaxCharsPerLine = oldMax + defaultMaxDepth = oldDefaultDepth + }() + + cases := []struct { + name string + in syntax.Expr + exp string + }{ + { + name: "ConcatSampleExpr", + in: &ConcatSampleExpr{ + DownstreamSampleExpr: DownstreamSampleExpr{ + shard: &astmapper.ShardAnnotation{ + Shard: 0, + Of: 3, + }, + SampleExpr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeRate, + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")}, + }, + Interval: time.Minute, + }, + }, + }, + next: &ConcatSampleExpr{ + DownstreamSampleExpr: DownstreamSampleExpr{ + shard: &astmapper.ShardAnnotation{ + Shard: 1, + Of: 3, + }, + SampleExpr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeRate, + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")}, + }, + Interval: time.Minute, + }, + }, + }, + next: &ConcatSampleExpr{ + DownstreamSampleExpr: DownstreamSampleExpr{ + shard: &astmapper.ShardAnnotation{ + Shard: 1, + Of: 3, + }, + SampleExpr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeRate, + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")}, + }, + Interval: time.Minute, + }, + }, + }, + next: nil, + }, + }, + }, + exp: `concat( + downstream< + rate( + {foo="bar"} [1m] + ), + shard=0_of_3 + > + ++ + downstream< + rate( + {foo="bar"} [1m] + ), + shard=1_of_3 + > + ++ ... +)`, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got := syntax.Prettify(c.in) + assert.Equal(t, c.exp, got) + }) + } +} + +func TestPrettierWithoutShards(t *testing.T) { + q := `((quantile_over_time(0.5,{foo="bar"} | json | unwrap bytes[1d]) by (cluster) > 42) and (count by (cluster)(max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace)) > 10))` + e := syntax.MustParseExpr(q) + + mapper := NewShardMapper(ConstantShards(4), nilShardMetrics, []string{}) + _, _, mapped, err := mapper.Parse(e) + require.NoError(t, err) + got := syntax.Prettify(mapped) + expected := ` downstream> + > + 42 +and + count by (cluster)( + max by (cluster, namespace)( + concat( + downstream< + max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace), + shard=0_of_4 + > + ++ + downstream< + max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace), + shard=1_of_4 + > + ++ + downstream< + max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace), + shard=2_of_4 + > + ++ + downstream< + max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace), + shard=3_of_4 + > + ) + ) + ) + > + 10` + assert.Equal(t, expected, got) +} diff --git a/pkg/logql/metrics.go b/pkg/logql/metrics.go index 63051e362eae6..40fbece82d87d 100644 --- a/pkg/logql/metrics.go +++ b/pkg/logql/metrics.go @@ -114,13 +114,17 @@ func RecordRangeAndInstantQueryMetrics( } queryTags, _ := ctx.Value(httpreq.QueryTagsHTTPHeader).(string) // it's ok to be empty. + var ( + query = p.QueryString() + hashedQuery = util.HashedQuery(query) + ) logValues := make([]interface{}, 0, 50) logValues = append(logValues, []interface{}{ "latency", latencyType, // this can be used to filter log lines. - "query", p.QueryString(), - "query_hash", util.HashedQuery(p.QueryString()), + "query", query, + "query_hash", hashedQuery, "query_type", queryType, "range_type", rt, "length", p.End().Sub(p.Start()), diff --git a/pkg/logql/shardmapper_test.go b/pkg/logql/shardmapper_test.go index 96955109a9413..0e345291eed3b 100644 --- a/pkg/logql/shardmapper_test.go +++ b/pkg/logql/shardmapper_test.go @@ -1598,3 +1598,32 @@ func TestStringTrimming(t *testing.T) { func float64p(v float64) *float64 { return &v } + +func TestShardTopk(t *testing.T) { + expr := `topk( + 10, + sum by (ip) ( + sum_over_time({job="foo"} | json | unwrap bytes(bytes)[1m]) + ) + )` + m := NewShardMapper(ConstantShards(5), nilShardMetrics, []string{ShardQuantileOverTime}) + _, _, mappedExpr, err := m.Parse(syntax.MustParseExpr(expr)) + require.NoError(t, err) + + expected := `topk( + 10, + sum by (ip)( + concat( + downstream + ++ + downstream + ++ + downstream + ++ + downstream + ++ ... + ) + ) +)` + require.Equal(t, expected, mappedExpr.Pretty(0)) +} diff --git a/pkg/logql/syntax/prettier.go b/pkg/logql/syntax/prettier.go index cf346e26c562f..1b407453858f7 100644 --- a/pkg/logql/syntax/prettier.go +++ b/pkg/logql/syntax/prettier.go @@ -35,8 +35,8 @@ import ( // var ( - // maxCharsPerLine is used to qualify whether some LogQL expressions are worth `splitting` into new lines. - maxCharsPerLine = 100 + // MaxCharsPerLine is used to qualify whether some LogQL expressions are worth `splitting` into new lines. + MaxCharsPerLine = 100 ) func Prettify(e Expr) string { @@ -51,8 +51,8 @@ func (e *MatchersExpr) Pretty(level int) string { // e.g: `{foo="bar"} | logfmt | level="error"` // Here, left = `{foo="bar"}` and multistages would collection of each stage in pipeline, here `logfmt` and `level="error"` func (e *PipelineExpr) Pretty(level int) string { - if !needSplit(e) { - return indent(level) + e.String() + if !NeedSplit(e) { + return Indent(level) + e.String() } s := fmt.Sprintf("%s\n", e.Left.Pretty(level)) @@ -73,8 +73,8 @@ func (e *PipelineExpr) Pretty(level int) string { // e.g: `|= "error" != "memcache" |= ip("192.168.0.1")` // NOTE: here `ip` is Op in this expression. func (e *LineFilterExpr) Pretty(level int) string { - if !needSplit(e) { - return indent(level) + e.String() + if !NeedSplit(e) { + return Indent(level) + e.String() } var s string @@ -90,7 +90,7 @@ func (e *LineFilterExpr) Pretty(level int) string { s += "\n" } - s += indent(level) + s += Indent(level) // We re-use LineFilterExpr's String() implementation to avoid duplication. // We create new LineFilterExpr without `Left`. @@ -153,7 +153,7 @@ func (e *LogfmtExpressionParser) Pretty(level int) string { // e.g: sum_over_time({foo="bar"} | logfmt | unwrap bytes_processed [5m]) func (e *UnwrapExpr) Pretty(level int) string { - s := indent(level) + s := Indent(level) if e.Operation != "" { s += fmt.Sprintf("%s %s %s(%s)", OpPipe, OpUnwrap, e.Operation, e.Identifier) @@ -161,7 +161,7 @@ func (e *UnwrapExpr) Pretty(level int) string { s += fmt.Sprintf("%s %s %s", OpPipe, OpUnwrap, e.Identifier) } for _, f := range e.PostFilters { - s += fmt.Sprintf("\n%s%s %s", indent(level), OpPipe, f) + s += fmt.Sprintf("\n%s%s %s", Indent(level), OpPipe, f) } return s } @@ -200,8 +200,8 @@ func (e *OffsetExpr) Pretty(_ int) string { // e.g: count_over_time({foo="bar"}[5m]) func (e *RangeAggregationExpr) Pretty(level int) string { - s := indent(level) - if !needSplit(e) { + s := Indent(level) + if !NeedSplit(e) { return s + e.String() } @@ -211,13 +211,13 @@ func (e *RangeAggregationExpr) Pretty(level int) string { // print args to the function. if e.Params != nil { - s = fmt.Sprintf("%s%s%s,", s, indent(level+1), fmt.Sprint(*e.Params)) + s = fmt.Sprintf("%s%s%s,", s, Indent(level+1), fmt.Sprint(*e.Params)) s += "\n" } s += e.Left.Pretty(level + 1) - s += "\n" + indent(level) + ")" + s += "\n" + Indent(level) + ")" if e.Grouping != nil { s += e.Grouping.Pretty(level) @@ -236,9 +236,9 @@ func (e *RangeAggregationExpr) Pretty(level int) string { // - vector on which aggregation is done. // [without|by (