diff --git a/.drone/drone.jsonnet b/.drone/drone.jsonnet index 49f67f06861a3..9351f2f693c06 100644 --- a/.drone/drone.jsonnet +++ b/.drone/drone.jsonnet @@ -610,23 +610,6 @@ local build_image_tag = '0.33.0'; 'cd -', ]) { depends_on: ['clone'], when: onPRs }, make('test', container=false) { depends_on: ['clone-target-branch', 'check-generated-files'] }, - run('test-target-branch', commands=['cd ../loki-target-branch && BUILD_IN_CONTAINER=false make test']) { depends_on: ['clone-target-branch'], when: onPRs }, - make('compare-coverage', container=false, args=[ - 'old=../loki-target-branch/test_results.txt', - 'new=test_results.txt', - 'packages=ingester,distributor,querier,querier/queryrange,iter,storage,chunkenc,logql,loki', - '> diff.txt', - ]) { depends_on: ['test', 'test-target-branch'], when: onPRs }, - run('report-coverage', commands=[ - "total_diff=$(sed 's/%//' diff.txt | awk '{sum+=$3;}END{print sum;}')", - 'if [ $total_diff = 0 ]; then exit 0; fi', - "pull=$(echo $CI_COMMIT_REF | awk -F '/' '{print $3}')", - "body=$(jq -Rs '{body: . }' diff.txt)", - 'curl -X POST -u $USER:$TOKEN -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/grafana/loki/issues/$pull/comments -d "$body" > /dev/null', - ], env={ - USER: 'grafanabot', - TOKEN: { from_secret: github_secret.name }, - }) { depends_on: ['compare-coverage'], when: onPRs }, make('lint', container=false) { depends_on: ['check-generated-files'] }, make('check-mod', container=false) { depends_on: ['test', 'lint'] }, { diff --git a/.drone/drone.yml b/.drone/drone.yml index 7a62b621262a8..c33a66998e71c 100644 --- a/.drone/drone.yml +++ b/.drone/drone.yml @@ -212,47 +212,6 @@ steps: environment: {} image: grafana/loki-build-image:0.33.0 name: test -- commands: - - cd ../loki-target-branch && BUILD_IN_CONTAINER=false make test - depends_on: - - clone-target-branch - environment: {} - image: grafana/loki-build-image:0.33.0 - name: test-target-branch - when: - event: - - pull_request -- commands: - - make BUILD_IN_CONTAINER=false compare-coverage old=../loki-target-branch/test_results.txt - new=test_results.txt packages=ingester,distributor,querier,querier/queryrange,iter,storage,chunkenc,logql,loki - > diff.txt - depends_on: - - test - - test-target-branch - environment: {} - image: grafana/loki-build-image:0.33.0 - name: compare-coverage - when: - event: - - pull_request -- commands: - - total_diff=$(sed 's/%//' diff.txt | awk '{sum+=$3;}END{print sum;}') - - if [ $total_diff = 0 ]; then exit 0; fi - - pull=$(echo $CI_COMMIT_REF | awk -F '/' '{print $3}') - - 'body=$(jq -Rs ''{body: . }'' diff.txt)' - - 'curl -X POST -u $USER:$TOKEN -H "Accept: application/vnd.github.v3+json" https://api.github.com/repos/grafana/loki/issues/$pull/comments - -d "$body" > /dev/null' - depends_on: - - compare-coverage - environment: - TOKEN: - from_secret: github_token - USER: grafanabot - image: grafana/loki-build-image:0.33.0 - name: report-coverage - when: - event: - - pull_request - commands: - make BUILD_IN_CONTAINER=false lint depends_on: @@ -2113,6 +2072,6 @@ kind: secret name: gpg_private_key --- kind: signature -hmac: 457592d17208477ceb480f81dbdb88f7b95a5ad015c88d9d6fed06c2422a52f9 +hmac: 51861919f0ba5370a152bdb9267828c742f2042819fb01388c6d23bf44e3cbb7 ... diff --git a/clients/cmd/fluent-bit/Dockerfile b/clients/cmd/fluent-bit/Dockerfile index 563614a75f52e..f0dfbc90c36a3 100644 --- a/clients/cmd/fluent-bit/Dockerfile +++ b/clients/cmd/fluent-bit/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.21.3-bullseye AS builder +FROM golang:1.22.0-bullseye AS builder COPY . /src diff --git a/docs/sources/configure/_index.md b/docs/sources/configure/_index.md index aae02319894f7..382890b5bcab7 100644 --- a/docs/sources/configure/_index.md +++ b/docs/sources/configure/_index.md @@ -2331,27 +2331,26 @@ bloom_shipper: [max_tasks_enqueued_per_tenant: | default = 10000] blocks_cache: - # Whether embedded cache is enabled. - # CLI flag: -blocks-cache.enabled + # Cache for bloom blocks. Whether embedded cache is enabled. + # CLI flag: -bloom.blocks-cache.enabled [enabled: | default = false] - # Maximum memory size of the cache in MB. - # CLI flag: -blocks-cache.max-size-mb + # Cache for bloom blocks. Maximum memory size of the cache in MB. + # CLI flag: -bloom.blocks-cache.max-size-mb [max_size_mb: | default = 100] - # Maximum number of entries in the cache. - # CLI flag: -blocks-cache.max-size-items + # Cache for bloom blocks. Maximum number of entries in the cache. + # CLI flag: -bloom.blocks-cache.max-size-items [max_size_items: | default = 0] - # The time to live for items in the cache before they get purged. - # CLI flag: -blocks-cache.ttl - [ttl: | default = 0s] + # Cache for bloom blocks. The time to live for items in the cache before + # they get purged. + # CLI flag: -bloom.blocks-cache.ttl + [ttl: | default = 24h] - # During this period the process waits until the directory becomes not used - # and only after this it will be deleted. If the timeout is reached, the - # directory is force deleted. - # CLI flag: -blocks-cache.remove-directory-graceful-period - [remove_directory_graceful_period: | default = 5m] + # The cache block configures the cache backend. + # The CLI flags prefix for this block configuration is: bloom.metas-cache + [metas_cache: ] ``` ### chunk_store_config @@ -2646,14 +2645,27 @@ ring: # CLI flag: -bloom-compactor.enabled [enabled: | default = false] -# Directory where files can be downloaded for compaction. -# CLI flag: -bloom-compactor.working-directory -[working_directory: | default = ""] - # Interval at which to re-run the compaction operation. # CLI flag: -bloom-compactor.compaction-interval [compaction_interval: | default = 10m] +# How many index periods (days) to wait before compacting a table. This can be +# used to lower cost by not re-writing data to object storage too frequently +# since recent data changes more often. +# CLI flag: -bloom-compactor.min-table-compaction-period +[min_table_compaction_period: | default = 1] + +# How many index periods (days) to wait before compacting a table. This can be +# used to lower cost by not trying to compact older data which doesn't change. +# This can be optimized by aligning it with the maximum +# `reject_old_samples_max_age` setting of any tenant. +# CLI flag: -bloom-compactor.max-table-compaction-period +[max_table_compaction_period: | default = 7] + +# Number of workers to run in parallel for compaction. +# CLI flag: -bloom-compactor.worker-parallelism +[worker_parallelism: | default = 1] + # Minimum backoff time between retries. # CLI flag: -bloom-compactor.compaction-retries-min-backoff [compaction_retries_min_backoff: | default = 10s] @@ -3133,6 +3145,12 @@ shard_streams: # CLI flag: -bloom-gateway.cache-key-interval [bloom_gateway_cache_key_interval: | default = 15m] +# The maximum bloom block size. A value of 0 sets an unlimited size. Default is +# 200MB. The actual block size might exceed this limit since blooms will be +# added to blocks until the block exceeds the maximum block size. +# CLI flag: -bloom-compactor.max-block-size +[bloom_compactor_max_block_size: | default = 200MB] + # Allow user to send structured metadata in push payload. # CLI flag: -validation.allow-structured-metadata [allow_structured_metadata: | default = false] @@ -4358,6 +4376,7 @@ The TLS configuration. The cache block configures the cache backend. The supported CLI flags `` used to reference this configuration block are: - `bloom-gateway-client.cache` +- `bloom.metas-cache` - `frontend` - `frontend.index-stats-results-cache` - `frontend.label-results-cache` diff --git a/integration/cluster/cluster.go b/integration/cluster/cluster.go index 831da46f2cb99..7e978b84eb326 100644 --- a/integration/cluster/cluster.go +++ b/integration/cluster/cluster.go @@ -84,7 +84,6 @@ bloom_gateway: bloom_compactor: enabled: false - working_directory: {{.dataPath}}/bloom-compactor compactor: working_directory: {{.dataPath}}/compactor diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index d978c0c8f423d..59afb29708782 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,7 @@ ## Main +- [11920](https://github.com/grafana/loki/pull/11920) **xperimental**: Refactor handling of credentials in managed-auth mode +- [11869](https://github.com/grafana/loki/pull/11869) **periklis**: Add support for running with Google Workload Identity - [11868](https://github.com/grafana/loki/pull/11868) **xperimental**: Integrate support for OpenShift-managed credentials in Azure - [11854](https://github.com/grafana/loki/pull/11854) **periklis**: Allow custom audience for managed-auth on STS - [11802](https://github.com/grafana/loki/pull/11802) **xperimental**: Add support for running with Azure Workload Identity diff --git a/operator/apis/config/v1/projectconfig_types.go b/operator/apis/config/v1/projectconfig_types.go index 06ff8cb090598..8e510b5d3ab79 100644 --- a/operator/apis/config/v1/projectconfig_types.go +++ b/operator/apis/config/v1/projectconfig_types.go @@ -52,16 +52,11 @@ type OpenShiftFeatureGates struct { // Dashboards enables the loki-mixin dashboards into the OpenShift Console Dashboards bool `json:"dashboards,omitempty"` - // ManagedAuthEnv enabled when the operator installation is on OpenShift STS clusters. + // ManagedAuthEnv is true when OpenShift-functions are enabled and the operator has detected + // that it is running with some kind of "workload identity" (AWS STS, Azure WIF) enabled. ManagedAuthEnv bool } -// ManagedAuthEnabled returns true when OpenShift-functions are enabled and the operator has detected that it is -// running with some kind of "workload identity" (AWS STS, Azure WIF) enabled. -func (o *OpenShiftFeatureGates) ManagedAuthEnabled() bool { - return o.Enabled && o.ManagedAuthEnv -} - // FeatureGates is the supported set of all operator feature gates. type FeatureGates struct { // ServiceMonitors enables creating a Prometheus-Operator managed ServiceMonitor diff --git a/operator/apis/loki/v1/lokistack_types.go b/operator/apis/loki/v1/lokistack_types.go index a50fb48b187ea..b652ba0c7a4d9 100644 --- a/operator/apis/loki/v1/lokistack_types.go +++ b/operator/apis/loki/v1/lokistack_types.go @@ -1174,6 +1174,27 @@ type LokiStackComponentStatus struct { Ruler PodStatusMap `json:"ruler,omitempty"` } +// CredentialMode represents the type of authentication used for accessing the object storage. +// +// +kubebuilder:validation:Enum=static;token;managed +type CredentialMode string + +const ( + // CredentialModeStatic represents the usage of static, long-lived credentials stored in a Secret. + // This is the default authentication mode and available for all supported object storage types. + CredentialModeStatic CredentialMode = "static" + // CredentialModeToken represents the usage of short-lived tokens retrieved from a credential source. + // In this mode the static configuration does not contain credentials needed for the object storage. + // Instead, they are generated during runtime using a service, which allows for shorter-lived credentials and + // much more granular control. This authentication mode is not supported for all object storage types. + CredentialModeToken CredentialMode = "token" + // CredentialModeManaged represents the usage of short-lived tokens retrieved from a credential source. + // This mode is similar to CredentialModeToken,but instead of having a user-configured credential source, + // it is configured by the environment, for example the Cloud Credential Operator in OpenShift. + // This mode is only supported for certain object storage types in certain runtime environments. + CredentialModeManaged CredentialMode = "managed" +) + // LokiStackStorageStatus defines the observed state of // the Loki storage configuration. type LokiStackStorageStatus struct { @@ -1183,6 +1204,12 @@ type LokiStackStorageStatus struct { // +optional // +kubebuilder:validation:Optional Schemas []ObjectStorageSchema `json:"schemas,omitempty"` + + // CredentialMode contains the authentication mode used for accessing the object storage. + // + // +optional + // +kubebuilder:validation:Optional + CredentialMode CredentialMode `json:"credentialMode,omitempty"` } // LokiStackStatus defines the observed state of LokiStack diff --git a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml index 6854bf38ff661..ad2b2e1bc93b4 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:0.5.0 - createdAt: "2024-01-31T16:48:07Z" + createdAt: "2024-02-12T14:48:52Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. features.operators.openshift.io/disconnected: "true" @@ -1472,6 +1472,7 @@ spec: - delete - get - list + - update - watch - apiGroups: - config.openshift.io diff --git a/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml index a8033e692214e..e1a7e5578965a 100644 --- a/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml @@ -4064,6 +4064,14 @@ spec: description: Storage provides summary of all changes that have occurred to the storage configuration. properties: + credentialMode: + description: CredentialMode contains the authentication mode used + for accessing the object storage. + enum: + - static + - token + - managed + type: string schemas: description: Schemas is a list of schemas which have been applied to the LokiStack. diff --git a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml index f8c37162b5a44..b372a29504e3a 100644 --- a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:0.5.0 - createdAt: "2024-01-31T16:48:04Z" + createdAt: "2024-02-12T14:48:49Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. operators.operatorframework.io/builder: operator-sdk-unknown @@ -1452,6 +1452,7 @@ spec: - delete - get - list + - update - watch - apiGroups: - config.openshift.io diff --git a/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml index 8b86ddfff8bbf..f92665f5095d2 100644 --- a/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml @@ -4064,6 +4064,14 @@ spec: description: Storage provides summary of all changes that have occurred to the storage configuration. properties: + credentialMode: + description: CredentialMode contains the authentication mode used + for accessing the object storage. + enum: + - static + - token + - managed + type: string schemas: description: Schemas is a list of schemas which have been applied to the LokiStack. diff --git a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml index 234ddb423a3aa..8026bbcd0fc4c 100644 --- a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: quay.io/openshift-logging/loki-operator:0.1.0 - createdAt: "2024-01-31T16:48:10Z" + createdAt: "2024-02-12T14:48:55Z" description: | The Loki Operator for OCP provides a means for configuring and managing a Loki stack for cluster logging. ## Prerequisites and Requirements @@ -1457,6 +1457,7 @@ spec: - delete - get - list + - update - watch - apiGroups: - config.openshift.io diff --git a/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml index f121699ec6fb8..3163752ad36f0 100644 --- a/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml @@ -4064,6 +4064,14 @@ spec: description: Storage provides summary of all changes that have occurred to the storage configuration. properties: + credentialMode: + description: CredentialMode contains the authentication mode used + for accessing the object storage. + enum: + - static + - token + - managed + type: string schemas: description: Schemas is a list of schemas which have been applied to the LokiStack. diff --git a/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml b/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml index 4661097811b75..d603ef2a9b644 100644 --- a/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml +++ b/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml @@ -4046,6 +4046,14 @@ spec: description: Storage provides summary of all changes that have occurred to the storage configuration. properties: + credentialMode: + description: CredentialMode contains the authentication mode used + for accessing the object storage. + enum: + - static + - token + - managed + type: string schemas: description: Schemas is a list of schemas which have been applied to the LokiStack. diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml index 766a6d7d191e6..072efd5b99128 100644 --- a/operator/config/rbac/role.yaml +++ b/operator/config/rbac/role.yaml @@ -56,6 +56,7 @@ rules: - delete - get - list + - update - watch - apiGroups: - config.openshift.io diff --git a/operator/controllers/loki/credentialsrequests_controller.go b/operator/controllers/loki/credentialsrequests_controller.go deleted file mode 100644 index efd0226c6a340..0000000000000 --- a/operator/controllers/loki/credentialsrequests_controller.go +++ /dev/null @@ -1,82 +0,0 @@ -package controllers - -import ( - "context" - - "github.com/go-logr/logr" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - - lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" - "github.com/grafana/loki/operator/controllers/loki/internal/lokistack" - "github.com/grafana/loki/operator/controllers/loki/internal/management/state" - "github.com/grafana/loki/operator/internal/external/k8s" - "github.com/grafana/loki/operator/internal/handlers" -) - -// CredentialsRequestsReconciler reconciles a single CredentialsRequest resource for each LokiStack request. -type CredentialsRequestsReconciler struct { - client.Client - Scheme *runtime.Scheme - Log logr.Logger -} - -// Reconcile creates a single CredentialsRequest per LokiStack for the OpenShift cloud-credentials-operator (CCO) to -// provide a managed cloud credentials Secret. On successful creation, the LokiStack resource is annotated -// with `loki.grafana.com/credentials-request-secret-ref` that refers to the secret provided by CCO. If the LokiStack -// resource is not found its accompanying CredentialsRequest resource is deleted. -func (r *CredentialsRequestsReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - var stack lokiv1.LokiStack - if err := r.Client.Get(ctx, req.NamespacedName, &stack); err != nil { - if apierrors.IsNotFound(err) { - return ctrl.Result{}, handlers.DeleteCredentialsRequest(ctx, r.Client, req.NamespacedName) - } - return ctrl.Result{}, err - } - - managed, err := state.IsManaged(ctx, req, r.Client) - if err != nil { - return ctrl.Result{}, err - } - if !managed { - r.Log.Info("Skipping reconciliation for unmanaged LokiStack resource", "name", req.String()) - // Stop requeueing for unmanaged LokiStack custom resources - return ctrl.Result{}, nil - } - - storageSecretName := client.ObjectKey{ - Namespace: req.Namespace, - Name: stack.Spec.Storage.Secret.Name, - } - storageSecret := &corev1.Secret{} - err = r.Client.Get(ctx, storageSecretName, storageSecret) - if err != nil { - return ctrl.Result{}, err - } - - secretRef, err := handlers.CreateCredentialsRequest(ctx, r.Client, req.NamespacedName, storageSecret) - if err != nil { - return ctrl.Result{}, err - } - - if err := lokistack.AnnotateForCredentialsRequest(ctx, r.Client, req.NamespacedName, secretRef); err != nil { - return ctrl.Result{}, err - } - - return ctrl.Result{}, nil -} - -// SetupWithManager sets up the controller with the Manager. -func (r *CredentialsRequestsReconciler) SetupWithManager(mgr ctrl.Manager) error { - b := ctrl.NewControllerManagedBy(mgr) - return r.buildController(k8s.NewCtrlBuilder(b)) -} - -func (r *CredentialsRequestsReconciler) buildController(bld k8s.Builder) error { - return bld. - For(&lokiv1.LokiStack{}). - Complete(r) -} diff --git a/operator/controllers/loki/credentialsrequests_controller_test.go b/operator/controllers/loki/credentialsrequests_controller_test.go deleted file mode 100644 index 3c91ee2275e97..0000000000000 --- a/operator/controllers/loki/credentialsrequests_controller_test.go +++ /dev/null @@ -1,164 +0,0 @@ -package controllers - -import ( - "context" - "testing" - - cloudcredentialsv1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - - lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" - "github.com/grafana/loki/operator/internal/external/k8s/k8sfakes" - "github.com/grafana/loki/operator/internal/manifests/storage" -) - -func TestCredentialsRequestController_RegistersCustomResource_WithDefaultPredicates(t *testing.T) { - b := &k8sfakes.FakeBuilder{} - k := &k8sfakes.FakeClient{} - c := &CredentialsRequestsReconciler{Client: k, Scheme: scheme} - - b.ForReturns(b) - b.OwnsReturns(b) - - err := c.buildController(b) - require.NoError(t, err) - - // Require only one For-Call for the custom resource - require.Equal(t, 1, b.ForCallCount()) - - // Require For-call with LokiStack resource - obj, _ := b.ForArgsForCall(0) - require.Equal(t, &lokiv1.LokiStack{}, obj) -} - -func TestCredentialsRequestController_DeleteCredentialsRequest_WhenLokiStackNotFound(t *testing.T) { - k := &k8sfakes.FakeClient{} - c := &CredentialsRequestsReconciler{Client: k, Scheme: scheme} - r := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "my-stack", - Namespace: "ns", - }, - } - - // Set managed auth environment - t.Setenv("ROLEARN", "a-role-arn") - - k.GetStub = func(_ context.Context, key types.NamespacedName, _ client.Object, _ ...client.GetOption) error { - if key.Name == r.Name && key.Namespace == r.Namespace { - return apierrors.NewNotFound(schema.GroupResource{}, "lokistack not found") - } - return nil - } - - res, err := c.Reconcile(context.Background(), r) - require.NoError(t, err) - require.Equal(t, ctrl.Result{}, res) - require.Equal(t, 1, k.DeleteCallCount()) -} - -func TestCredentialsRequestController_CreateCredentialsRequest_WhenLokiStackNotAnnotated(t *testing.T) { - k := &k8sfakes.FakeClient{} - c := &CredentialsRequestsReconciler{Client: k, Scheme: scheme} - r := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "my-stack", - Namespace: "ns", - }, - } - s := lokiv1.LokiStack{ - ObjectMeta: metav1.ObjectMeta{ - Name: "my-stack", - Namespace: "ns", - }, - Spec: lokiv1.LokiStackSpec{ - ManagementState: lokiv1.ManagementStateManaged, - }, - } - secret := &corev1.Secret{} - - // Set managed auth environment - t.Setenv("ROLEARN", "a-role-arn") - - k.GetStub = func(_ context.Context, key types.NamespacedName, out client.Object, _ ...client.GetOption) error { - switch out.(type) { - case *lokiv1.LokiStack: - if key.Name == r.Name && key.Namespace == r.Namespace { - k.SetClientObject(out, &s) - return nil - } - return apierrors.NewNotFound(schema.GroupResource{}, "lokistack not found") - case *corev1.Secret: - k.SetClientObject(out, secret) - return nil - } - return nil - } - - k.CreateStub = func(_ context.Context, o client.Object, _ ...client.CreateOption) error { - _, isCredReq := o.(*cloudcredentialsv1.CredentialsRequest) - if !isCredReq { - return apierrors.NewBadRequest("something went wrong creating a credentials request") - } - return nil - } - - k.UpdateStub = func(_ context.Context, o client.Object, _ ...client.UpdateOption) error { - stack, ok := o.(*lokiv1.LokiStack) - if !ok { - return apierrors.NewBadRequest("something went wrong creating a credentials request") - } - - _, hasSecretRef := stack.Annotations[storage.AnnotationCredentialsRequestsSecretRef] - if !hasSecretRef { - return apierrors.NewBadRequest("something went updating the lokistack annotations") - } - return nil - } - - res, err := c.Reconcile(context.Background(), r) - require.NoError(t, err) - require.Equal(t, ctrl.Result{}, res) - require.Equal(t, 1, k.CreateCallCount()) - require.Equal(t, 1, k.UpdateCallCount()) -} - -func TestCredentialsRequestController_SkipsUnmanaged(t *testing.T) { - k := &k8sfakes.FakeClient{} - c := &CredentialsRequestsReconciler{Client: k, Scheme: scheme} - r := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "my-stack", - Namespace: "ns", - }, - } - - s := lokiv1.LokiStack{ - ObjectMeta: metav1.ObjectMeta{ - Name: "my-stack", - Namespace: "ns", - }, - Spec: lokiv1.LokiStackSpec{ - ManagementState: lokiv1.ManagementStateUnmanaged, - }, - } - - k.GetStub = func(_ context.Context, key types.NamespacedName, out client.Object, _ ...client.GetOption) error { - if key.Name == s.Name && key.Namespace == s.Namespace { - k.SetClientObject(out, &s) - return nil - } - return apierrors.NewNotFound(schema.GroupResource{}, "something not found") - } - - res, err := c.Reconcile(context.Background(), r) - require.NoError(t, err) - require.Equal(t, ctrl.Result{}, res) -} diff --git a/operator/controllers/loki/internal/lokistack/credentialsrequest_discovery.go b/operator/controllers/loki/internal/lokistack/credentialsrequest_discovery.go deleted file mode 100644 index c911c1196eed4..0000000000000 --- a/operator/controllers/loki/internal/lokistack/credentialsrequest_discovery.go +++ /dev/null @@ -1,30 +0,0 @@ -package lokistack - -import ( - "context" - - "github.com/ViaQ/logerr/v2/kverrors" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/grafana/loki/operator/internal/external/k8s" - "github.com/grafana/loki/operator/internal/manifests/storage" -) - -// AnnotateForCredentialsRequest adds the `loki.grafana.com/credentials-request-secret-ref` annotation -// to the named Lokistack. If no LokiStack is found, then skip reconciliation. Or else return an error. -func AnnotateForCredentialsRequest(ctx context.Context, k k8s.Client, key client.ObjectKey, secretRef string) error { - stack, err := getLokiStack(ctx, k, key) - if stack == nil || err != nil { - return err - } - - if val, ok := stack.Annotations[storage.AnnotationCredentialsRequestsSecretRef]; ok && val == secretRef { - return nil - } - - if err := updateAnnotation(ctx, k, stack, storage.AnnotationCredentialsRequestsSecretRef, secretRef); err != nil { - return kverrors.Wrap(err, "failed to update lokistack `credentialsRequestSecretRef` annotation", "key", key) - } - - return nil -} diff --git a/operator/controllers/loki/internal/lokistack/credentialsrequest_discovery_test.go b/operator/controllers/loki/internal/lokistack/credentialsrequest_discovery_test.go deleted file mode 100644 index ef073ca853ba5..0000000000000 --- a/operator/controllers/loki/internal/lokistack/credentialsrequest_discovery_test.go +++ /dev/null @@ -1,98 +0,0 @@ -package lokistack - -import ( - "context" - "testing" - - "github.com/stretchr/testify/require" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" - - lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" - "github.com/grafana/loki/operator/internal/external/k8s/k8sfakes" - "github.com/grafana/loki/operator/internal/manifests/storage" -) - -func TestAnnotateForCredentialsRequest_ReturnError_WhenLokiStackMissing(t *testing.T) { - k := &k8sfakes.FakeClient{} - annotationVal := "ns-my-stack-aws-creds" - stackKey := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - - k.GetStub = func(_ context.Context, _ types.NamespacedName, out client.Object, _ ...client.GetOption) error { - return apierrors.NewBadRequest("failed to get lokistack") - } - - err := AnnotateForCredentialsRequest(context.Background(), k, stackKey, annotationVal) - require.Error(t, err) -} - -func TestAnnotateForCredentialsRequest_DoNothing_WhenAnnotationExists(t *testing.T) { - k := &k8sfakes.FakeClient{} - - annotationVal := "ns-my-stack-aws-creds" - s := &lokiv1.LokiStack{ - ObjectMeta: metav1.ObjectMeta{ - Name: "my-stack", - Namespace: "ns", - Annotations: map[string]string{ - storage.AnnotationCredentialsRequestsSecretRef: annotationVal, - }, - }, - } - stackKey := client.ObjectKeyFromObject(s) - - k.GetStub = func(_ context.Context, key types.NamespacedName, out client.Object, _ ...client.GetOption) error { - if key.Name == stackKey.Name && key.Namespace == stackKey.Namespace { - k.SetClientObject(out, s) - return nil - } - return nil - } - - err := AnnotateForCredentialsRequest(context.Background(), k, stackKey, annotationVal) - require.NoError(t, err) - require.Equal(t, 0, k.UpdateCallCount()) -} - -func TestAnnotateForCredentialsRequest_UpdateLokistack_WhenAnnotationMissing(t *testing.T) { - k := &k8sfakes.FakeClient{} - - annotationVal := "ns-my-stack-aws-creds" - s := &lokiv1.LokiStack{ - ObjectMeta: metav1.ObjectMeta{ - Name: "my-stack", - Namespace: "ns", - Annotations: map[string]string{}, - }, - } - stackKey := client.ObjectKeyFromObject(s) - - k.GetStub = func(_ context.Context, key types.NamespacedName, out client.Object, _ ...client.GetOption) error { - if key.Name == stackKey.Name && key.Namespace == stackKey.Namespace { - k.SetClientObject(out, s) - return nil - } - return nil - } - - k.UpdateStub = func(_ context.Context, o client.Object, _ ...client.UpdateOption) error { - stack, ok := o.(*lokiv1.LokiStack) - if !ok { - return apierrors.NewBadRequest("failed conversion to *lokiv1.LokiStack") - } - val, ok := stack.Annotations[storage.AnnotationCredentialsRequestsSecretRef] - if !ok { - return apierrors.NewBadRequest("missing annotation") - } - if val != annotationVal { - return apierrors.NewBadRequest("annotations does not match input") - } - return nil - } - - err := AnnotateForCredentialsRequest(context.Background(), k, stackKey, annotationVal) - require.NoError(t, err) - require.Equal(t, 1, k.UpdateCallCount()) -} diff --git a/operator/controllers/loki/lokistack_controller.go b/operator/controllers/loki/lokistack_controller.go index 40e7691bd1a2b..eb30a1a9bf555 100644 --- a/operator/controllers/loki/lokistack_controller.go +++ b/operator/controllers/loki/lokistack_controller.go @@ -3,7 +3,6 @@ package controllers import ( "context" "errors" - "strings" "time" "github.com/go-logr/logr" @@ -16,7 +15,6 @@ import ( corev1 "k8s.io/api/core/v1" networkingv1 "k8s.io/api/networking/v1" rbacv1 "k8s.io/api/rbac/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" @@ -31,6 +29,7 @@ import ( configv1 "github.com/grafana/loki/operator/apis/config/v1" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" "github.com/grafana/loki/operator/controllers/loki/internal/management/state" + "github.com/grafana/loki/operator/internal/config" "github.com/grafana/loki/operator/internal/external/k8s" "github.com/grafana/loki/operator/internal/handlers" manifestsocp "github.com/grafana/loki/operator/internal/manifests/openshift" @@ -111,6 +110,7 @@ type LokiStackReconciler struct { Log logr.Logger Scheme *runtime.Scheme FeatureGates configv1.FeatureGates + AuthConfig *config.ManagedAuthConfig } // +kubebuilder:rbac:groups=loki.grafana.com,resources=lokistacks,verbs=get;list;watch;create;update;patch;delete @@ -128,7 +128,7 @@ type LokiStackReconciler struct { // +kubebuilder:rbac:groups=policy,resources=poddisruptionbudgets,verbs=get;list;watch;create;update // +kubebuilder:rbac:groups=config.openshift.io,resources=dnses;apiservers;proxies,verbs=get;list;watch // +kubebuilder:rbac:groups=route.openshift.io,resources=routes,verbs=get;list;watch;create;update;delete -// +kubebuilder:rbac:groups=cloudcredential.openshift.io,resources=credentialsrequests,verbs=get;list;watch;create;delete +// +kubebuilder:rbac:groups=cloudcredential.openshift.io,resources=credentialsrequests,verbs=get;list;watch;create;update;delete // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -150,7 +150,7 @@ func (r *LokiStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } var degraded *status.DegradedError - err = r.updateResources(ctx, req) + credentialMode, err := r.updateResources(ctx, req) switch { case errors.As(err, °raded): // degraded errors are handled by status.Refresh below @@ -158,7 +158,7 @@ func (r *LokiStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, err } - err = status.Refresh(ctx, r.Client, req, time.Now(), degraded) + err = status.Refresh(ctx, r.Client, req, time.Now(), credentialMode, degraded) if err != nil { return ctrl.Result{}, err } @@ -172,18 +172,25 @@ func (r *LokiStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, nil } -func (r *LokiStackReconciler) updateResources(ctx context.Context, req ctrl.Request) error { +func (r *LokiStackReconciler) updateResources(ctx context.Context, req ctrl.Request) (lokiv1.CredentialMode, error) { if r.FeatureGates.BuiltInCertManagement.Enabled { if err := handlers.CreateOrRotateCertificates(ctx, r.Log, req, r.Client, r.Scheme, r.FeatureGates); err != nil { - return err + return "", err } } - if err := handlers.CreateOrUpdateLokiStack(ctx, r.Log, req, r.Client, r.Scheme, r.FeatureGates); err != nil { - return err + if r.FeatureGates.OpenShift.ManagedAuthEnv { + if err := handlers.CreateCredentialsRequest(ctx, r.Log, r.Scheme, r.AuthConfig, r.Client, req); err != nil { + return "", err + } + } + + credentialMode, err := handlers.CreateOrUpdateLokiStack(ctx, r.Log, req, r.Client, r.Scheme, r.FeatureGates) + if err != nil { + return "", err } - return nil + return credentialMode, nil } // SetupWithManager sets up the controller with the Manager. @@ -216,7 +223,7 @@ func (r *LokiStackReconciler) buildController(bld k8s.Builder) error { if r.FeatureGates.OpenShift.Enabled { bld = bld. Owns(&routev1.Route{}, updateOrDeleteOnlyPred). - Watches(&cloudcredentialv1.CredentialsRequest{}, r.enqueueForCredentialsRequest(), updateOrDeleteOnlyPred) + Owns(&cloudcredentialv1.CredentialsRequest{}, updateOrDeleteOnlyPred) if r.FeatureGates.OpenShift.ClusterTLSPolicy { bld = bld.Watches(&openshiftconfigv1.APIServer{}, r.enqueueAllLokiStacksHandler(), updateOrDeleteOnlyPred) @@ -358,34 +365,3 @@ func (r *LokiStackReconciler) enqueueForStorageCA() handler.EventHandler { return requests }) } - -func (r *LokiStackReconciler) enqueueForCredentialsRequest() handler.EventHandler { - return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { - a := obj.GetAnnotations() - owner, ok := a[manifestsocp.AnnotationCredentialsRequestOwner] - if !ok { - return nil - } - - var ( - ownerParts = strings.Split(owner, "/") - namespace = ownerParts[0] - name = ownerParts[1] - key = client.ObjectKey{Namespace: namespace, Name: name} - ) - - var stack lokiv1.LokiStack - if err := r.Client.Get(ctx, key, &stack); err != nil { - if !apierrors.IsNotFound(err) { - r.Log.Error(err, "failed retrieving CredentialsRequest owning Lokistack", "key", key) - } - return nil - } - - return []reconcile.Request{ - { - NamespacedName: key, - }, - } - }) -} diff --git a/operator/controllers/loki/lokistack_controller_test.go b/operator/controllers/loki/lokistack_controller_test.go index 515d829766aa1..6be22022c19db 100644 --- a/operator/controllers/loki/lokistack_controller_test.go +++ b/operator/controllers/loki/lokistack_controller_test.go @@ -161,7 +161,18 @@ func TestLokiStackController_RegisterOwnedResourcesForUpdateOrDeleteOnly(t *test { obj: &routev1.Route{}, index: 10, - ownCallsCount: 11, + ownCallsCount: 12, + featureGates: configv1.FeatureGates{ + OpenShift: configv1.OpenShiftFeatureGates{ + Enabled: true, + }, + }, + pred: updateOrDeleteOnlyPred, + }, + { + obj: &cloudcredentialv1.CredentialsRequest{}, + index: 11, + ownCallsCount: 12, featureGates: configv1.FeatureGates{ OpenShift: configv1.OpenShiftFeatureGates{ Enabled: true, @@ -203,20 +214,9 @@ func TestLokiStackController_RegisterWatchedResources(t *testing.T) { } table := []test{ { - src: &cloudcredentialv1.CredentialsRequest{}, + src: &openshiftconfigv1.APIServer{}, index: 3, watchesCallsCount: 4, - featureGates: configv1.FeatureGates{ - OpenShift: configv1.OpenShiftFeatureGates{ - Enabled: true, - }, - }, - pred: updateOrDeleteOnlyPred, - }, - { - src: &openshiftconfigv1.APIServer{}, - index: 4, - watchesCallsCount: 5, featureGates: configv1.FeatureGates{ OpenShift: configv1.OpenShiftFeatureGates{ Enabled: true, @@ -227,8 +227,8 @@ func TestLokiStackController_RegisterWatchedResources(t *testing.T) { }, { src: &openshiftconfigv1.Proxy{}, - index: 4, - watchesCallsCount: 5, + index: 3, + watchesCallsCount: 4, featureGates: configv1.FeatureGates{ OpenShift: configv1.OpenShiftFeatureGates{ Enabled: true, diff --git a/operator/docs/operator/api.md b/operator/docs/operator/api.md index 92f93dd970224..48fbe0c8a7e48 100644 --- a/operator/docs/operator/api.md +++ b/operator/docs/operator/api.md @@ -1100,6 +1100,40 @@ string +## CredentialMode { #loki-grafana-com-v1-CredentialMode } +(string alias) +

+(Appears on:LokiStackStorageStatus) +

+
+

CredentialMode represents the type of authentication used for accessing the object storage.

+
+ + + + + + + + + + + + + + +
ValueDescription

"managed"

CredentialModeManaged represents the usage of short-lived tokens retrieved from a credential source. +This mode is similar to CredentialModeToken,but instead of having a user-configured credential source, +it is configured by the environment, for example the Cloud Credential Operator in OpenShift. +This mode is only supported for certain object storage types in certain runtime environments.

+

"static"

CredentialModeStatic represents the usage of static, long-lived credentials stored in a Secret. +This is the default authentication mode and available for all supported object storage types.

+

"token"

CredentialModeToken represents the usage of short-lived tokens retrieved from a credential source. +In this mode the static configuration does not contain credentials needed for the object storage. +Instead, they are generated during runtime using a service, which allows for shorter-lived credentials and +much more granular control. This authentication mode is not supported for all object storage types.

+
+ ## HashRingSpec { #loki-grafana-com-v1-HashRingSpec }

(Appears on:LokiStackSpec) @@ -2152,6 +2186,20 @@ the Loki storage configuration.

to the LokiStack.

+ + +credentialMode
+ + +CredentialMode + + + + +(Optional) +

CredentialMode contains the authentication mode used for accessing the object storage.

+ + diff --git a/operator/docs/operator/feature-gates.md b/operator/docs/operator/feature-gates.md index 34fbdf4b69a4d..189b72e4ddb12 100644 --- a/operator/docs/operator/feature-gates.md +++ b/operator/docs/operator/feature-gates.md @@ -417,7 +417,8 @@ bool -

ManagedAuthEnv enabled when the operator installation is on OpenShift STS clusters.

+

ManagedAuthEnv is true when OpenShift-functions are enabled and the operator has detected +that it is running with some kind of “workload identity” (AWS STS, Azure WIF) enabled.

diff --git a/operator/internal/config/managed_auth.go b/operator/internal/config/managed_auth.go new file mode 100644 index 0000000000000..73598e7032f8f --- /dev/null +++ b/operator/internal/config/managed_auth.go @@ -0,0 +1,48 @@ +package config + +import "os" + +type AWSEnvironment struct { + RoleARN string +} + +type AzureEnvironment struct { + ClientID string + SubscriptionID string + TenantID string + Region string +} + +type ManagedAuthConfig struct { + AWS *AWSEnvironment + Azure *AzureEnvironment +} + +func discoverManagedAuthConfig() *ManagedAuthConfig { + // AWS + roleARN := os.Getenv("ROLEARN") + + // Azure + clientID := os.Getenv("CLIENTID") + tenantID := os.Getenv("TENANTID") + subscriptionID := os.Getenv("SUBSCRIPTIONID") + + switch { + case roleARN != "": + return &ManagedAuthConfig{ + AWS: &AWSEnvironment{ + RoleARN: roleARN, + }, + } + case clientID != "" && tenantID != "" && subscriptionID != "": + return &ManagedAuthConfig{ + Azure: &AzureEnvironment{ + ClientID: clientID, + SubscriptionID: subscriptionID, + TenantID: tenantID, + }, + } + } + + return nil +} diff --git a/operator/internal/config/options.go b/operator/internal/config/options.go index 7ed9abb526a7b..dc54404f22450 100644 --- a/operator/internal/config/options.go +++ b/operator/internal/config/options.go @@ -17,19 +17,24 @@ import ( // LoadConfig initializes the controller configuration, optionally overriding the defaults // from a provided configuration file. -func LoadConfig(scheme *runtime.Scheme, configFile string) (*configv1.ProjectConfig, ctrl.Options, error) { +func LoadConfig(scheme *runtime.Scheme, configFile string) (*configv1.ProjectConfig, *ManagedAuthConfig, ctrl.Options, error) { options := ctrl.Options{Scheme: scheme} if configFile == "" { - return &configv1.ProjectConfig{}, options, nil + return &configv1.ProjectConfig{}, nil, options, nil } ctrlCfg, err := loadConfigFile(scheme, configFile) if err != nil { - return nil, options, fmt.Errorf("failed to parse controller manager config file: %w", err) + return nil, nil, options, fmt.Errorf("failed to parse controller manager config file: %w", err) + } + + managedAuth := discoverManagedAuthConfig() + if ctrlCfg.Gates.OpenShift.Enabled && managedAuth != nil { + ctrlCfg.Gates.OpenShift.ManagedAuthEnv = true } options = mergeOptionsFromFile(options, ctrlCfg) - return ctrlCfg, options, nil + return ctrlCfg, managedAuth, options, nil } func mergeOptionsFromFile(o manager.Options, cfg *configv1.ProjectConfig) manager.Options { diff --git a/operator/internal/handlers/credentialsrequest_create.go b/operator/internal/handlers/credentialsrequest_create.go index 6074e10b2d5af..50e06375ffd8b 100644 --- a/operator/internal/handlers/credentialsrequest_create.go +++ b/operator/internal/handlers/credentialsrequest_create.go @@ -3,64 +3,102 @@ package handlers import ( "context" "errors" + "fmt" "github.com/ViaQ/logerr/v2/kverrors" + "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" + "github.com/grafana/loki/operator/internal/config" "github.com/grafana/loki/operator/internal/external/k8s" + "github.com/grafana/loki/operator/internal/manifests" "github.com/grafana/loki/operator/internal/manifests/openshift" "github.com/grafana/loki/operator/internal/manifests/storage" ) -var ( - errAzureNoSecretFound = errors.New("can not create CredentialsRequest: no azure secret found") - errAzureNoRegion = errors.New("can not create CredentialsRequest: missing secret field: region") -) +var errAzureNoRegion = errors.New("can not create CredentialsRequest: missing secret field: region") // CreateCredentialsRequest creates a new CredentialsRequest resource for a Lokistack // to request a cloud credentials Secret resource from the OpenShift cloud-credentials-operator. -func CreateCredentialsRequest(ctx context.Context, k k8s.Client, stack client.ObjectKey, secret *corev1.Secret) (string, error) { - managedAuthEnv := openshift.DiscoverManagedAuthEnv() - if managedAuthEnv == nil { - return "", nil +func CreateCredentialsRequest(ctx context.Context, log logr.Logger, scheme *runtime.Scheme, managedAuth *config.ManagedAuthConfig, k k8s.Client, req ctrl.Request) error { + ll := log.WithValues("lokistack", req.NamespacedName, "event", "createCredentialsRequest") + + var stack lokiv1.LokiStack + if err := k.Get(ctx, req.NamespacedName, &stack); err != nil { + if apierrors.IsNotFound(err) { + // maybe the user deleted it before we could react? Either way this isn't an issue + ll.Error(err, "could not find the requested LokiStack", "name", req.String()) + return nil + } + return kverrors.Wrap(err, "failed to lookup LokiStack", "name", req.String()) } - if managedAuthEnv.Azure != nil && managedAuthEnv.Azure.Region == "" { + if managedAuth.Azure != nil && managedAuth.Azure.Region == "" { // Managed environment for Azure does not provide Region, but we need this for the CredentialsRequest. // This looks like an oversight when creating the UI in OpenShift, but for now we need to pull this data // from somewhere else -> the Azure Storage Secret - if secret == nil { - return "", errAzureNoSecretFound + storageSecretName := client.ObjectKey{ + Namespace: stack.Namespace, + Name: stack.Spec.Storage.Secret.Name, + } + storageSecret := &corev1.Secret{} + if err := k.Get(ctx, storageSecretName, storageSecret); err != nil { + if apierrors.IsNotFound(err) { + // Skip this error here as it will be picked up by the LokiStack handler instead + ll.Error(err, "could not find secret for LokiStack", "name", req.String()) + return nil + } + return err } - region := secret.Data[storage.KeyAzureRegion] + region := storageSecret.Data[storage.KeyAzureRegion] if len(region) == 0 { - return "", errAzureNoRegion + return errAzureNoRegion } - managedAuthEnv.Azure.Region = string(region) + managedAuth.Azure.Region = string(region) } opts := openshift.Options{ BuildOpts: openshift.BuildOptions{ LokiStackName: stack.Name, LokiStackNamespace: stack.Namespace, + RulerName: manifests.RulerName(stack.Name), }, - ManagedAuthEnv: managedAuthEnv, + ManagedAuth: managedAuth, } credReq, err := openshift.BuildCredentialsRequest(opts) if err != nil { - return "", err + return err } - if err := k.Create(ctx, credReq); err != nil { - if !apierrors.IsAlreadyExists(err) { - return "", kverrors.Wrap(err, "failed to create credentialsrequest", "key", client.ObjectKeyFromObject(credReq)) - } + err = ctrl.SetControllerReference(&stack, credReq, scheme) + if err != nil { + return kverrors.Wrap(err, "failed to set controller owner reference to resource") + } + + desired := credReq.DeepCopyObject().(client.Object) + mutateFn := manifests.MutateFuncFor(credReq, desired, map[string]string{}) + + op, err := ctrl.CreateOrUpdate(ctx, k, credReq, mutateFn) + if err != nil { + return kverrors.Wrap(err, "failed to configure CredentialRequest") + } + + msg := fmt.Sprintf("Resource has been %s", op) + switch op { + case ctrlutil.OperationResultNone: + ll.V(1).Info(msg) + default: + ll.Info(msg) } - return credReq.Spec.SecretRef.Name, nil + return nil } diff --git a/operator/internal/handlers/credentialsrequest_create_test.go b/operator/internal/handlers/credentialsrequest_create_test.go index df903eaec662f..626302a113274 100644 --- a/operator/internal/handlers/credentialsrequest_create_test.go +++ b/operator/internal/handlers/credentialsrequest_create_test.go @@ -8,51 +8,108 @@ import ( "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" + "github.com/grafana/loki/operator/internal/config" "github.com/grafana/loki/operator/internal/external/k8s/k8sfakes" ) -func TestCreateCredentialsRequest_DoNothing_WhenManagedAuthEnvMissing(t *testing.T) { +func credentialsRequestFakeClient(cr *cloudcredentialv1.CredentialsRequest, lokistack *lokiv1.LokiStack, secret *corev1.Secret) *k8sfakes.FakeClient { k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} + k.GetStub = func(_ context.Context, name types.NamespacedName, object client.Object, _ ...client.GetOption) error { + switch object.(type) { + case *cloudcredentialv1.CredentialsRequest: + if cr == nil { + return errors.NewNotFound(schema.GroupResource{}, name.Name) + } + k.SetClientObject(object, cr) + case *lokiv1.LokiStack: + if lokistack == nil { + return errors.NewNotFound(schema.GroupResource{}, name.Name) + } + k.SetClientObject(object, lokistack) + case *corev1.Secret: + if secret == nil { + return errors.NewNotFound(schema.GroupResource{}, name.Name) + } + k.SetClientObject(object, secret) + } + return nil + } - secretRef, err := CreateCredentialsRequest(context.Background(), k, key, nil) - require.NoError(t, err) - require.Empty(t, secretRef) + return k } func TestCreateCredentialsRequest_CreateNewResource(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} + wantServiceAccountNames := []string{ + "my-stack", + "my-stack-ruler", + } + + lokistack := &lokiv1.LokiStack{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-stack", + Namespace: "ns", + }, + } - t.Setenv("ROLEARN", "a-role-arn") + k := credentialsRequestFakeClient(nil, lokistack, nil) + req := ctrl.Request{ + NamespacedName: client.ObjectKey{Name: "my-stack", Namespace: "ns"}, + } + + managedAuth := &config.ManagedAuthConfig{ + AWS: &config.AWSEnvironment{ + RoleARN: "a-role-arn", + }, + } - secretRef, err := CreateCredentialsRequest(context.Background(), k, key, nil) + err := CreateCredentialsRequest(context.Background(), logger, scheme, managedAuth, k, req) require.NoError(t, err) - require.NotEmpty(t, secretRef) require.Equal(t, 1, k.CreateCallCount()) + + _, obj, _ := k.CreateArgsForCall(0) + credReq, ok := obj.(*cloudcredentialv1.CredentialsRequest) + require.True(t, ok) + + require.Equal(t, wantServiceAccountNames, credReq.Spec.ServiceAccountNames) } func TestCreateCredentialsRequest_CreateNewResourceAzure(t *testing.T) { wantRegion := "test-region" - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} + lokistack := &lokiv1.LokiStack{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-stack", + Namespace: "ns", + }, + } secret := &corev1.Secret{ Data: map[string][]byte{ "region": []byte(wantRegion), }, } - t.Setenv("CLIENTID", "test-client-id") - t.Setenv("TENANTID", "test-tenant-id") - t.Setenv("SUBSCRIPTIONID", "test-subscription-id") + k := credentialsRequestFakeClient(nil, lokistack, secret) + req := ctrl.Request{ + NamespacedName: client.ObjectKey{Name: "my-stack", Namespace: "ns"}, + } - secretRef, err := CreateCredentialsRequest(context.Background(), k, key, secret) + managedAuth := &config.ManagedAuthConfig{ + Azure: &config.AzureEnvironment{ + ClientID: "test-client-id", + SubscriptionID: "test-tenant-id", + TenantID: "test-subscription-id", + }, + } + + err := CreateCredentialsRequest(context.Background(), logger, scheme, managedAuth, k, req) require.NoError(t, err) - require.NotEmpty(t, secretRef) require.Equal(t, 1, k.CreateCallCount()) _, obj, _ := k.CreateArgsForCall(0) @@ -66,17 +123,20 @@ func TestCreateCredentialsRequest_CreateNewResourceAzure(t *testing.T) { } func TestCreateCredentialsRequest_CreateNewResourceAzure_Errors(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} + lokistack := &lokiv1.LokiStack{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-stack", + Namespace: "ns", + }, + } + req := ctrl.Request{ + NamespacedName: client.ObjectKey{Name: "my-stack", Namespace: "ns"}, + } tt := []struct { secret *corev1.Secret wantError string }{ - { - secret: nil, - wantError: errAzureNoSecretFound.Error(), - }, { secret: &corev1.Secret{}, wantError: errAzureNoRegion.Error(), @@ -86,29 +146,52 @@ func TestCreateCredentialsRequest_CreateNewResourceAzure_Errors(t *testing.T) { for _, tc := range tt { tc := tc t.Run(tc.wantError, func(t *testing.T) { - // Not parallel (environment variables) - t.Setenv("CLIENTID", "test-client-id") - t.Setenv("TENANTID", "test-tenant-id") - t.Setenv("SUBSCRIPTIONID", "test-subscription-id") - - _, err := CreateCredentialsRequest(context.Background(), k, key, tc.secret) + t.Parallel() + + managedAuth := &config.ManagedAuthConfig{ + Azure: &config.AzureEnvironment{ + ClientID: "test-client-id", + SubscriptionID: "test-tenant-id", + TenantID: "test-subscription-id", + }, + } + k := credentialsRequestFakeClient(nil, lokistack, tc.secret) + + err := CreateCredentialsRequest(context.Background(), logger, scheme, managedAuth, k, req) require.EqualError(t, err, tc.wantError) }) } } func TestCreateCredentialsRequest_DoNothing_WhenCredentialsRequestExist(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} + req := ctrl.Request{ + NamespacedName: client.ObjectKey{Name: "my-stack", Namespace: "ns"}, + } - t.Setenv("ROLEARN", "a-role-arn") + managedAuth := &config.ManagedAuthConfig{ + AWS: &config.AWSEnvironment{ + RoleARN: "a-role-arn", + }, + } - k.CreateStub = func(_ context.Context, _ client.Object, _ ...client.CreateOption) error { - return errors.NewAlreadyExists(schema.GroupResource{}, "credentialsrequest exists") + cr := &cloudcredentialv1.CredentialsRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-stack", + Namespace: "ns", + }, + } + lokistack := &lokiv1.LokiStack{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-stack", + Namespace: "ns", + }, } - secretRef, err := CreateCredentialsRequest(context.Background(), k, key, nil) + k := credentialsRequestFakeClient(cr, lokistack, nil) + + err := CreateCredentialsRequest(context.Background(), logger, scheme, managedAuth, k, req) require.NoError(t, err) - require.NotEmpty(t, secretRef) - require.Equal(t, 1, k.CreateCallCount()) + require.Equal(t, 2, k.GetCallCount()) + require.Equal(t, 0, k.CreateCallCount()) + require.Equal(t, 1, k.UpdateCallCount()) } diff --git a/operator/internal/handlers/credentialsrequest_delete.go b/operator/internal/handlers/credentialsrequest_delete.go deleted file mode 100644 index edf05fcb205d0..0000000000000 --- a/operator/internal/handlers/credentialsrequest_delete.go +++ /dev/null @@ -1,43 +0,0 @@ -package handlers - -import ( - "context" - - "github.com/ViaQ/logerr/v2/kverrors" - "k8s.io/apimachinery/pkg/api/errors" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/grafana/loki/operator/internal/external/k8s" - "github.com/grafana/loki/operator/internal/manifests/openshift" -) - -// DeleteCredentialsRequest deletes a LokiStack's accompanying CredentialsRequest resource -// to trigger the OpenShift cloud-credentials-operator to wipe out any credentials related -// Secret resource on the LokiStack namespace. -func DeleteCredentialsRequest(ctx context.Context, k k8s.Client, stack client.ObjectKey) error { - managedAuthEnv := openshift.DiscoverManagedAuthEnv() - if managedAuthEnv == nil { - return nil - } - - opts := openshift.Options{ - BuildOpts: openshift.BuildOptions{ - LokiStackName: stack.Name, - LokiStackNamespace: stack.Namespace, - }, - ManagedAuthEnv: managedAuthEnv, - } - - credReq, err := openshift.BuildCredentialsRequest(opts) - if err != nil { - return kverrors.Wrap(err, "failed to build credentialsrequest", "key", stack) - } - - if err := k.Delete(ctx, credReq); err != nil { - if !errors.IsNotFound(err) { - return kverrors.Wrap(err, "failed to delete credentialsrequest", "key", client.ObjectKeyFromObject(credReq)) - } - } - - return nil -} diff --git a/operator/internal/handlers/credentialsrequest_delete_test.go b/operator/internal/handlers/credentialsrequest_delete_test.go deleted file mode 100644 index 57f1c005ee706..0000000000000 --- a/operator/internal/handlers/credentialsrequest_delete_test.go +++ /dev/null @@ -1,47 +0,0 @@ -package handlers - -import ( - "context" - "testing" - - "github.com/stretchr/testify/require" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime/schema" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/grafana/loki/operator/internal/external/k8s/k8sfakes" -) - -func TestDeleteCredentialsRequest_DoNothing_WhenManagedAuthEnvMissing(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - - err := DeleteCredentialsRequest(context.Background(), k, key) - require.NoError(t, err) -} - -func TestDeleteCredentialsRequest_DeleteExistingResource(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - - t.Setenv("ROLEARN", "a-role-arn") - - err := DeleteCredentialsRequest(context.Background(), k, key) - require.NoError(t, err) - require.Equal(t, 1, k.DeleteCallCount()) -} - -func TestDeleteCredentialsRequest_DoNothing_WhenCredentialsRequestNotExists(t *testing.T) { - k := &k8sfakes.FakeClient{} - key := client.ObjectKey{Name: "my-stack", Namespace: "ns"} - - t.Setenv("ROLEARN", "a-role-arn") - - k.DeleteStub = func(_ context.Context, _ client.Object, _ ...client.DeleteOption) error { - return errors.NewNotFound(schema.GroupResource{}, "credentials request not found") - } - - err := DeleteCredentialsRequest(context.Background(), k, key) - require.NoError(t, err) - require.Equal(t, 1, k.DeleteCallCount()) -} diff --git a/operator/internal/handlers/internal/storage/secrets.go b/operator/internal/handlers/internal/storage/secrets.go index 6b8275d2d28ae..99bafb911ec26 100644 --- a/operator/internal/handlers/internal/storage/secrets.go +++ b/operator/internal/handlers/internal/storage/secrets.go @@ -3,6 +3,7 @@ package storage import ( "context" "crypto/sha1" + "encoding/json" "errors" "fmt" "sort" @@ -32,8 +33,13 @@ var ( errAzureNoCredentials = errors.New("azure storage secret does contain neither account_key or client_id") errAzureMixedCredentials = errors.New("azure storage secret can not contain both account_key and client_id") errAzureManagedIdentityNoOverride = errors.New("when in managed mode, storage secret can not contain credentials") + + errGCPParseCredentialsFile = errors.New("gcp storage secret cannot be parsed from JSON content") + errGCPWrongCredentialSourceFile = errors.New("credential source in secret needs to point to token file") ) +const gcpAccountTypeExternal = "external_account" + func getSecrets(ctx context.Context, k k8s.Client, stack *lokiv1.LokiStack, fg configv1.FeatureGates) (*corev1.Secret, *corev1.Secret, error) { var ( storageSecret corev1.Secret @@ -53,15 +59,7 @@ func getSecrets(ctx context.Context, k k8s.Client, stack *lokiv1.LokiStack, fg c } if fg.OpenShift.ManagedAuthEnv { - secretName, ok := stack.Annotations[storage.AnnotationCredentialsRequestsSecretRef] - if !ok { - return nil, nil, &status.DegradedError{ - Message: "Missing OpenShift cloud credentials request", - Reason: lokiv1.ReasonMissingCredentialsRequest, - Requeue: true, - } - } - + secretName := storage.ManagedCredentialsSecretName(stack.Name) managedAuthCredsKey := client.ObjectKey{Name: secretName, Namespace: stack.Namespace} if err := k.Get(ctx, managedAuthCredsKey, &managedAuthSecret); err != nil { if apierrors.IsNotFound(err) { @@ -94,7 +92,7 @@ func extractSecrets(secretType lokiv1.ObjectStorageSecretType, objStore, managed SharedStore: secretType, } - if fg.OpenShift.ManagedAuthEnabled() { + if fg.OpenShift.ManagedAuthEnv { var managedAuthHash string managedAuthHash, err = hashSecretData(managedAuth) if err != nil { @@ -184,11 +182,18 @@ func extractAzureConfigSecret(s *corev1.Secret, fg configv1.FeatureGates) (*stor // Extract and validate optional fields endpointSuffix := s.Data[storage.KeyAzureStorageEndpointSuffix] audience := s.Data[storage.KeyAzureAudience] + region := s.Data[storage.KeyAzureRegion] if !workloadIdentity && len(audience) > 0 { return nil, fmt.Errorf("%w: %s", errSecretFieldNotAllowed, storage.KeyAzureAudience) } + if fg.OpenShift.ManagedAuthEnv { + if len(region) == 0 { + return nil, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyAzureRegion) + } + } + return &storage.AzureStorageConfig{ Env: string(env), Container: string(container), @@ -204,12 +209,7 @@ func validateAzureCredentials(s *corev1.Secret, fg configv1.FeatureGates) (workl tenantID := s.Data[storage.KeyAzureStorageTenantID] subscriptionID := s.Data[storage.KeyAzureStorageSubscriptionID] - if fg.OpenShift.ManagedAuthEnabled() { - region := s.Data[storage.KeyAzureRegion] - if len(region) == 0 { - return false, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyAzureRegion) - } - + if fg.OpenShift.ManagedAuthEnv { if len(accountKey) > 0 || len(clientID) > 0 || len(tenantID) > 0 || len(subscriptionID) > 0 { return false, errAzureManagedIdentityNoOverride } @@ -255,8 +255,36 @@ func extractGCSConfigSecret(s *corev1.Secret) (*storage.GCSStorageConfig, error) return nil, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyGCPServiceAccountKeyFilename) } + credentialsFile := struct { + CredentialsType string `json:"type"` + CredentialsSource struct { + File string `json:"file"` + } `json:"credential_source"` + }{} + + err := json.Unmarshal(keyJSON, &credentialsFile) + if err != nil { + return nil, errGCPParseCredentialsFile + } + + var ( + audience = s.Data[storage.KeyGCPWorkloadIdentityProviderAudience] + isWorkloadIdentity = credentialsFile.CredentialsType == gcpAccountTypeExternal + ) + if isWorkloadIdentity { + if len(audience) == 0 { + return nil, fmt.Errorf("%w: %s", errSecretMissingField, storage.KeyGCPWorkloadIdentityProviderAudience) + } + + if credentialsFile.CredentialsSource.File != storage.ServiceAccountTokenFilePath { + return nil, fmt.Errorf("%w: %s", errGCPWrongCredentialSourceFile, storage.ServiceAccountTokenFilePath) + } + } + return &storage.GCSStorageConfig{ - Bucket: string(bucket), + Bucket: string(bucket), + WorkloadIdentity: isWorkloadIdentity, + Audience: string(audience), }, nil } @@ -296,7 +324,7 @@ func extractS3ConfigSecret(s *corev1.Secret, fg configv1.FeatureGates) (*storage ) switch { - case fg.OpenShift.ManagedAuthEnabled(): + case fg.OpenShift.ManagedAuthEnv: cfg.STS = true cfg.Audience = string(audience) // Do not allow users overriding the role arn provided on Loki Operator installation diff --git a/operator/internal/handlers/internal/storage/secrets_test.go b/operator/internal/handlers/internal/storage/secrets_test.go index 94b6ae2e3aaa1..1363cd4a660a6 100644 --- a/operator/internal/handlers/internal/storage/secrets_test.go +++ b/operator/internal/handlers/internal/storage/secrets_test.go @@ -71,11 +71,12 @@ func TestUnknownType(t *testing.T) { func TestAzureExtract(t *testing.T) { type test struct { - name string - secret *corev1.Secret - managedSecret *corev1.Secret - featureGates configv1.FeatureGates - wantError string + name string + secret *corev1.Secret + managedSecret *corev1.Secret + featureGates configv1.FeatureGates + wantError string + wantCredentialMode lokiv1.CredentialMode } table := []test{ { @@ -224,6 +225,7 @@ func TestAzureExtract(t *testing.T) { "account_key": []byte("secret"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, }, { name: "mandatory for workload-identity set", @@ -239,6 +241,7 @@ func TestAzureExtract(t *testing.T) { "region": []byte("test-region"), }, }, + wantCredentialMode: lokiv1.CredentialModeToken, }, { name: "mandatory for managed workload-identity set", @@ -252,7 +255,14 @@ func TestAzureExtract(t *testing.T) { }, }, managedSecret: &corev1.Secret{ - Data: map[string][]byte{}, + ObjectMeta: metav1.ObjectMeta{ + Name: "managed-secret", + }, + Data: map[string][]byte{ + "azure_client_id": []byte("test-client-id"), + "azure_tenant_id": []byte("test-tenant-id"), + "azure_subscription_id": []byte("test-subscription-id"), + }, }, featureGates: configv1.FeatureGates{ OpenShift: configv1.OpenShiftFeatureGates{ @@ -260,6 +270,7 @@ func TestAzureExtract(t *testing.T) { ManagedAuthEnv: true, }, }, + wantCredentialMode: lokiv1.CredentialModeManaged, }, { name: "all set including optional", @@ -273,6 +284,7 @@ func TestAzureExtract(t *testing.T) { "endpoint_suffix": []byte("suffix"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, }, } for _, tst := range table { @@ -285,7 +297,8 @@ func TestAzureExtract(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, opts.SecretName) require.NotEmpty(t, opts.SecretSHA1) - require.Equal(t, opts.SharedStore, lokiv1.ObjectStorageSecretAzure) + require.Equal(t, lokiv1.ObjectStorageSecretAzure, opts.SharedStore) + require.Equal(t, tst.wantCredentialMode, opts.CredentialMode()) } else { require.EqualError(t, err, tst.wantError) } @@ -295,9 +308,10 @@ func TestAzureExtract(t *testing.T) { func TestGCSExtract(t *testing.T) { type test struct { - name string - secret *corev1.Secret - wantError string + name string + secret *corev1.Secret + wantError string + wantCredentialMode lokiv1.CredentialMode } table := []test{ { @@ -314,15 +328,51 @@ func TestGCSExtract(t *testing.T) { }, wantError: "missing secret field: key.json", }, + { + name: "missing audience", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "bucketname": []byte("here"), + "key.json": []byte("{\"type\": \"external_account\"}"), + }, + }, + wantError: "missing secret field: audience", + }, + { + name: "credential_source file no override", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "bucketname": []byte("here"), + "audience": []byte("test"), + "key.json": []byte("{\"type\": \"external_account\", \"credential_source\": {\"file\": \"/custom/path/to/secret/storage/serviceaccount/token\"}}"), + }, + }, + wantError: "credential source in secret needs to point to token file: /var/run/secrets/storage/serviceaccount/token", + }, { name: "all set", secret: &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{Name: "test"}, Data: map[string][]byte{ "bucketname": []byte("here"), - "key.json": []byte("{\"type\": \"SA\"}"), + "key.json": []byte("{\"type\": \"service_account\"}"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, + }, + { + name: "mandatory for workload-identity set", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "bucketname": []byte("here"), + "audience": []byte("test"), + "key.json": []byte("{\"type\": \"external_account\", \"credential_source\": {\"file\": \"/var/run/secrets/storage/serviceaccount/token\"}}"), + }, + }, + wantCredentialMode: lokiv1.CredentialModeToken, }, } for _, tst := range table { @@ -330,9 +380,10 @@ func TestGCSExtract(t *testing.T) { t.Run(tst.name, func(t *testing.T) { t.Parallel() - _, err := extractSecrets(lokiv1.ObjectStorageSecretGCS, tst.secret, nil, configv1.FeatureGates{}) + opts, err := extractSecrets(lokiv1.ObjectStorageSecretGCS, tst.secret, nil, configv1.FeatureGates{}) if tst.wantError == "" { require.NoError(t, err) + require.Equal(t, tst.wantCredentialMode, opts.CredentialMode()) } else { require.EqualError(t, err, tst.wantError) } @@ -342,9 +393,10 @@ func TestGCSExtract(t *testing.T) { func TestS3Extract(t *testing.T) { type test struct { - name string - secret *corev1.Secret - wantError string + name string + secret *corev1.Secret + wantError string + wantCredentialMode lokiv1.CredentialMode } table := []test{ { @@ -422,6 +474,7 @@ func TestS3Extract(t *testing.T) { "sse_kms_key_id": []byte("kms-key-id"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, }, { name: "all set with SSE-KMS with encryption context", @@ -437,6 +490,7 @@ func TestS3Extract(t *testing.T) { "sse_kms_encryption_context": []byte("kms-encryption-ctx"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, }, { name: "all set with SSE-S3", @@ -450,6 +504,7 @@ func TestS3Extract(t *testing.T) { "sse_type": []byte("SSE-S3"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, }, { name: "all set without SSE", @@ -462,6 +517,7 @@ func TestS3Extract(t *testing.T) { "access_key_secret": []byte("secret"), }, }, + wantCredentialMode: lokiv1.CredentialModeStatic, }, { name: "STS missing region", @@ -484,6 +540,7 @@ func TestS3Extract(t *testing.T) { "region": []byte("here"), }, }, + wantCredentialMode: lokiv1.CredentialModeToken, }, { name: "STS all set", @@ -496,6 +553,7 @@ func TestS3Extract(t *testing.T) { "audience": []byte("audience"), }, }, + wantCredentialMode: lokiv1.CredentialModeToken, }, } for _, tst := range table { @@ -508,7 +566,8 @@ func TestS3Extract(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, opts.SecretName) require.NotEmpty(t, opts.SecretSHA1) - require.Equal(t, opts.SharedStore, lokiv1.ObjectStorageSecretS3) + require.Equal(t, lokiv1.ObjectStorageSecretS3, opts.SharedStore) + require.Equal(t, tst.wantCredentialMode, opts.CredentialMode()) } else { require.EqualError(t, err, tst.wantError) } @@ -582,10 +641,11 @@ func TestS3Extract_WithOpenShiftManagedAuth(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, opts.SecretName) require.NotEmpty(t, opts.SecretSHA1) - require.Equal(t, opts.SharedStore, lokiv1.ObjectStorageSecretS3) + require.Equal(t, lokiv1.ObjectStorageSecretS3, opts.SharedStore) require.True(t, opts.S3.STS) - require.Equal(t, opts.OpenShift.CloudCredentials.SecretName, tst.managedAuthSecret.Name) + require.Equal(t, tst.managedAuthSecret.Name, opts.OpenShift.CloudCredentials.SecretName) require.NotEmpty(t, opts.OpenShift.CloudCredentials.SHA1) + require.Equal(t, lokiv1.CredentialModeManaged, opts.CredentialMode()) } else { require.EqualError(t, err, tst.wantError) } @@ -733,7 +793,8 @@ func TestSwiftExtract(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, opts.SecretName) require.NotEmpty(t, opts.SecretSHA1) - require.Equal(t, opts.SharedStore, lokiv1.ObjectStorageSecretSwift) + require.Equal(t, lokiv1.ObjectStorageSecretSwift, opts.SharedStore) + require.Equal(t, lokiv1.CredentialModeStatic, opts.CredentialMode()) } else { require.EqualError(t, err, tst.wantError) } @@ -806,7 +867,8 @@ func TestAlibabaCloudExtract(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, opts.SecretName) require.NotEmpty(t, opts.SecretSHA1) - require.Equal(t, opts.SharedStore, lokiv1.ObjectStorageSecretAlibabaCloud) + require.Equal(t, lokiv1.ObjectStorageSecretAlibabaCloud, opts.SharedStore) + require.Equal(t, lokiv1.CredentialModeStatic, opts.CredentialMode()) } else { require.EqualError(t, err, tst.wantError) } diff --git a/operator/internal/handlers/internal/storage/storage_test.go b/operator/internal/handlers/internal/storage/storage_test.go index 9e041bf99a23a..45f5b0f2865ba 100644 --- a/operator/internal/handlers/internal/storage/storage_test.go +++ b/operator/internal/handlers/internal/storage/storage_test.go @@ -17,7 +17,6 @@ import ( configv1 "github.com/grafana/loki/operator/apis/config/v1" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" "github.com/grafana/loki/operator/internal/external/k8s/k8sfakes" - "github.com/grafana/loki/operator/internal/manifests/storage" "github.com/grafana/loki/operator/internal/status" ) @@ -135,77 +134,6 @@ func TestBuildOptions_WhenMissingSecret_SetDegraded(t *testing.T) { require.Equal(t, degradedErr, err) } -func TestBuildOptions_WhenMissingCloudCredentialsRequest_SetDegraded(t *testing.T) { - sw := &k8sfakes.FakeStatusWriter{} - k := &k8sfakes.FakeClient{} - r := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "my-stack", - Namespace: "some-ns", - }, - } - - fg := configv1.FeatureGates{ - OpenShift: configv1.OpenShiftFeatureGates{ - ManagedAuthEnv: true, - }, - } - - degradedErr := &status.DegradedError{ - Message: "Missing OpenShift cloud credentials request", - Reason: lokiv1.ReasonMissingCredentialsRequest, - Requeue: true, - } - - stack := &lokiv1.LokiStack{ - TypeMeta: metav1.TypeMeta{ - Kind: "LokiStack", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "my-stack", - Namespace: "some-ns", - UID: "b23f9a38-9672-499f-8c29-15ede74d3ece", - Annotations: map[string]string{}, - }, - Spec: lokiv1.LokiStackSpec{ - Size: lokiv1.SizeOneXExtraSmall, - Storage: lokiv1.ObjectStorageSpec{ - Schemas: []lokiv1.ObjectStorageSchema{ - { - Version: lokiv1.ObjectStorageSchemaV11, - EffectiveDate: "2020-10-11", - }, - }, - Secret: lokiv1.ObjectStorageSecretSpec{ - Name: defaultManagedAuthSecret.Name, - Type: lokiv1.ObjectStorageSecretS3, - }, - }, - }, - } - - k.GetStub = func(_ context.Context, name types.NamespacedName, object client.Object, _ ...client.GetOption) error { - _, isLokiStack := object.(*lokiv1.LokiStack) - if r.Name == name.Name && r.Namespace == name.Namespace && isLokiStack { - k.SetClientObject(object, stack) - return nil - } - if name.Name == defaultManagedAuthSecret.Name { - k.SetClientObject(object, &defaultManagedAuthSecret) - return nil - } - return apierrors.NewNotFound(schema.GroupResource{}, "something is not found") - } - - k.StatusStub = func() client.StatusWriter { return sw } - - _, err := BuildOptions(context.TODO(), k, stack, fg) - - // make sure error is returned - require.Error(t, err) - require.Equal(t, degradedErr, err) -} - func TestBuildOptions_WhenMissingCloudCredentialsSecret_SetDegraded(t *testing.T) { sw := &k8sfakes.FakeStatusWriter{} k := &k8sfakes.FakeClient{} @@ -236,9 +164,6 @@ func TestBuildOptions_WhenMissingCloudCredentialsSecret_SetDegraded(t *testing.T Name: "my-stack", Namespace: "some-ns", UID: "b23f9a38-9672-499f-8c29-15ede74d3ece", - Annotations: map[string]string{ - storage.AnnotationCredentialsRequestsSecretRef: "my-stack-aws-creds", - }, }, Spec: lokiv1.LokiStackSpec{ Size: lokiv1.SizeOneXExtraSmall, diff --git a/operator/internal/handlers/lokistack_create_or_update.go b/operator/internal/handlers/lokistack_create_or_update.go index 2f78f75d02c5b..47e7a309bf8b9 100644 --- a/operator/internal/handlers/lokistack_create_or_update.go +++ b/operator/internal/handlers/lokistack_create_or_update.go @@ -36,7 +36,7 @@ func CreateOrUpdateLokiStack( k k8s.Client, s *runtime.Scheme, fg configv1.FeatureGates, -) error { +) (lokiv1.CredentialMode, error) { ll := log.WithValues("lokistack", req.NamespacedName, "event", "createOrUpdate") var stack lokiv1.LokiStack @@ -44,9 +44,9 @@ func CreateOrUpdateLokiStack( if apierrors.IsNotFound(err) { // maybe the user deleted it before we could react? Either way this isn't an issue ll.Error(err, "could not find the requested loki stack", "name", req.NamespacedName) - return nil + return "", nil } - return kverrors.Wrap(err, "failed to lookup lokistack", "name", req.NamespacedName) + return "", kverrors.Wrap(err, "failed to lookup lokistack", "name", req.NamespacedName) } img := os.Getenv(manifests.EnvRelatedImageLoki) @@ -61,21 +61,21 @@ func CreateOrUpdateLokiStack( objStore, err := storage.BuildOptions(ctx, k, &stack, fg) if err != nil { - return err + return "", err } baseDomain, tenants, err := gateway.BuildOptions(ctx, ll, k, &stack, fg) if err != nil { - return err + return "", err } if err = rules.Cleanup(ctx, ll, k, &stack); err != nil { - return err + return "", err } alertingRules, recordingRules, ruler, ocpOptions, err := rules.BuildOptions(ctx, ll, k, &stack) if err != nil { - return err + return "", err } certRotationRequiredAt := "" @@ -86,7 +86,7 @@ func CreateOrUpdateLokiStack( timeoutConfig, err := manifests.NewTimeoutConfig(stack.Spec.Limits) if err != nil { ll.Error(err, "failed to parse query timeout") - return &status.DegradedError{ + return "", &status.DegradedError{ Message: fmt.Sprintf("Error parsing query timeout: %s", err), Reason: lokiv1.ReasonQueryTimeoutInvalid, Requeue: false, @@ -116,13 +116,13 @@ func CreateOrUpdateLokiStack( if optErr := manifests.ApplyDefaultSettings(&opts); optErr != nil { ll.Error(optErr, "failed to conform options to build settings") - return optErr + return "", optErr } if fg.LokiStackGateway { if optErr := manifests.ApplyGatewayDefaultOptions(&opts); optErr != nil { ll.Error(optErr, "failed to apply defaults options to gateway settings") - return optErr + return "", optErr } } @@ -140,13 +140,13 @@ func CreateOrUpdateLokiStack( if optErr := manifests.ApplyTLSSettings(&opts, tlsProfile); optErr != nil { ll.Error(optErr, "failed to conform options to tls profile settings") - return optErr + return "", optErr } objects, err := manifests.BuildAll(opts) if err != nil { ll.Error(err, "failed to build manifests") - return err + return "", err } ll.Info("manifests built", "count", len(objects)) @@ -158,7 +158,7 @@ func CreateOrUpdateLokiStack( // a user possibly being unable to read logs. if err := status.SetStorageSchemaStatus(ctx, k, req, objStore.Schemas); err != nil { ll.Error(err, "failed to set storage schema status") - return err + return "", err } var errCount int32 @@ -182,7 +182,7 @@ func CreateOrUpdateLokiStack( depAnnotations, err := dependentAnnotations(ctx, k, obj) if err != nil { l.Error(err, "failed to set dependent annotations") - return err + return "", err } desired := obj.DeepCopyObject().(client.Object) @@ -205,7 +205,7 @@ func CreateOrUpdateLokiStack( } if errCount > 0 { - return kverrors.New("failed to configure lokistack resources", "name", req.NamespacedName) + return "", kverrors.New("failed to configure lokistack resources", "name", req.NamespacedName) } // 1x.demo is used only for development, so the metrics will not @@ -214,7 +214,7 @@ func CreateOrUpdateLokiStack( metrics.Collect(&opts.Stack, opts.Name) } - return nil + return objStore.CredentialMode(), nil } func dependentAnnotations(ctx context.Context, k k8s.Client, obj client.Object) (map[string]string, error) { diff --git a/operator/internal/handlers/lokistack_create_or_update_test.go b/operator/internal/handlers/lokistack_create_or_update_test.go index 4ba9a9affc369..bef5ffc9efb70 100644 --- a/operator/internal/handlers/lokistack_create_or_update_test.go +++ b/operator/internal/handlers/lokistack_create_or_update_test.go @@ -108,7 +108,7 @@ func TestCreateOrUpdateLokiStack_WhenGetReturnsNotFound_DoesNotError(t *testing. k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) require.NoError(t, err) // make sure create was NOT called because the Get failed @@ -132,7 +132,7 @@ func TestCreateOrUpdateLokiStack_WhenGetReturnsAnErrorOtherThanNotFound_ReturnsT k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) require.Equal(t, badRequestErr, errors.Unwrap(err)) @@ -219,7 +219,7 @@ func TestCreateOrUpdateLokiStack_SetsNamespaceOnAllObjects(t *testing.T) { k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) require.NoError(t, err) // make sure create was called @@ -327,7 +327,7 @@ func TestCreateOrUpdateLokiStack_SetsOwnerRefOnAllObjects(t *testing.T) { k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) require.NoError(t, err) // make sure create was called @@ -387,7 +387,7 @@ func TestCreateOrUpdateLokiStack_WhenSetControllerRefInvalid_ContinueWithOtherOb k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) // make sure error is returned to re-trigger reconciliation require.Error(t, err) @@ -490,7 +490,7 @@ func TestCreateOrUpdateLokiStack_WhenGetReturnsNoError_UpdateObjects(t *testing. k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) require.NoError(t, err) // make sure create not called @@ -556,7 +556,7 @@ func TestCreateOrUpdateLokiStack_WhenCreateReturnsError_ContinueWithOtherObjects k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) // make sure error is returned to re-trigger reconciliation require.Error(t, err) @@ -663,7 +663,7 @@ func TestCreateOrUpdateLokiStack_WhenUpdateReturnsError_ContinueWithOtherObjects k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) // make sure error is returned to re-trigger reconciliation require.Error(t, err) @@ -734,7 +734,7 @@ func TestCreateOrUpdateLokiStack_WhenInvalidQueryTimeout_SetDegraded(t *testing. k.StatusStub = func() client.StatusWriter { return sw } - err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) + _, err := CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, featureGates) // make sure error is returned require.Error(t, err) diff --git a/operator/internal/manifests/mutate.go b/operator/internal/manifests/mutate.go index 27421750bf2cc..63308bb9ceb62 100644 --- a/operator/internal/manifests/mutate.go +++ b/operator/internal/manifests/mutate.go @@ -6,6 +6,7 @@ import ( "github.com/ViaQ/logerr/v2/kverrors" "github.com/imdario/mergo" routev1 "github.com/openshift/api/route/v1" + cloudcredentialv1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -123,6 +124,11 @@ func MutateFuncFor(existing, desired client.Object, depAnnotations map[string]st wantRt := desired.(*routev1.Route) mutateRoute(rt, wantRt) + case *cloudcredentialv1.CredentialsRequest: + cr := existing.(*cloudcredentialv1.CredentialsRequest) + wantCr := desired.(*cloudcredentialv1.CredentialsRequest) + mutateCredentialRequest(cr, wantCr) + case *monitoringv1.PrometheusRule: pr := existing.(*monitoringv1.PrometheusRule) wantPr := desired.(*monitoringv1.PrometheusRule) @@ -213,6 +219,10 @@ func mutateRoute(existing, desired *routev1.Route) { existing.Spec = desired.Spec } +func mutateCredentialRequest(existing, desired *cloudcredentialv1.CredentialsRequest) { + existing.Spec = desired.Spec +} + func mutatePrometheusRule(existing, desired *monitoringv1.PrometheusRule) { existing.Annotations = desired.Annotations existing.Labels = desired.Labels diff --git a/operator/internal/manifests/openshift/credentialsrequest.go b/operator/internal/manifests/openshift/credentialsrequest.go index 2962b61d0d1ef..0e97dd97c2b19 100644 --- a/operator/internal/manifests/openshift/credentialsrequest.go +++ b/operator/internal/manifests/openshift/credentialsrequest.go @@ -1,10 +1,6 @@ package openshift import ( - "fmt" - "os" - "path" - "github.com/ViaQ/logerr/v2/kverrors" cloudcredentialv1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" corev1 "k8s.io/api/core/v1" @@ -12,32 +8,26 @@ import ( "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/grafana/loki/operator/internal/config" "github.com/grafana/loki/operator/internal/manifests/storage" ) -const ( - ccoNamespace = "openshift-cloud-credential-operator" -) - func BuildCredentialsRequest(opts Options) (*cloudcredentialv1.CredentialsRequest, error) { stack := client.ObjectKey{Name: opts.BuildOpts.LokiStackName, Namespace: opts.BuildOpts.LokiStackNamespace} - providerSpec, secretName, err := encodeProviderSpec(opts.BuildOpts.LokiStackName, opts.ManagedAuthEnv) + providerSpec, err := encodeProviderSpec(opts.ManagedAuth) if err != nil { return nil, kverrors.Wrap(err, "failed encoding credentialsrequest provider spec") } return &cloudcredentialv1.CredentialsRequest{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s", stack.Namespace, secretName), - Namespace: ccoNamespace, - Annotations: map[string]string{ - AnnotationCredentialsRequestOwner: stack.String(), - }, + Name: stack.Name, + Namespace: stack.Namespace, }, Spec: cloudcredentialv1.CredentialsRequestSpec{ SecretRef: corev1.ObjectReference{ - Name: secretName, + Name: storage.ManagedCredentialsSecretName(stack.Name), Namespace: stack.Namespace, }, ProviderSpec: providerSpec, @@ -45,16 +35,13 @@ func BuildCredentialsRequest(opts Options) (*cloudcredentialv1.CredentialsReques stack.Name, rulerServiceAccountName(opts), }, - CloudTokenPath: path.Join(storage.AWSTokenVolumeDirectory, "token"), + CloudTokenPath: storage.ServiceAccountTokenFilePath, }, }, nil } -func encodeProviderSpec(stackName string, env *ManagedAuthEnv) (*runtime.RawExtension, string, error) { - var ( - spec runtime.Object - secretName string - ) +func encodeProviderSpec(env *config.ManagedAuthConfig) (*runtime.RawExtension, error) { + var spec runtime.Object switch { case env.AWS != nil: @@ -73,7 +60,6 @@ func encodeProviderSpec(stackName string, env *ManagedAuthEnv) (*runtime.RawExte }, STSIAMRoleARN: env.AWS.RoleARN, } - secretName = fmt.Sprintf("%s-aws-creds", stackName) case env.Azure != nil: azure := env.Azure @@ -101,38 +87,8 @@ func encodeProviderSpec(stackName string, env *ManagedAuthEnv) (*runtime.RawExte AzureSubscriptionID: azure.SubscriptionID, AzureTenantID: azure.TenantID, } - secretName = fmt.Sprintf("%s-azure-creds", stackName) } encodedSpec, err := cloudcredentialv1.Codec.EncodeProviderSpec(spec.DeepCopyObject()) - return encodedSpec, secretName, err -} - -func DiscoverManagedAuthEnv() *ManagedAuthEnv { - // AWS - roleARN := os.Getenv("ROLEARN") - - // Azure - clientID := os.Getenv("CLIENTID") - tenantID := os.Getenv("TENANTID") - subscriptionID := os.Getenv("SUBSCRIPTIONID") - - switch { - case roleARN != "": - return &ManagedAuthEnv{ - AWS: &AWSSTSEnv{ - RoleARN: roleARN, - }, - } - case clientID != "" && tenantID != "" && subscriptionID != "": - return &ManagedAuthEnv{ - Azure: &AzureWIFEnvironment{ - ClientID: clientID, - SubscriptionID: subscriptionID, - TenantID: tenantID, - }, - } - } - - return nil + return encodedSpec, err } diff --git a/operator/internal/manifests/openshift/credentialsrequest_test.go b/operator/internal/manifests/openshift/credentialsrequest_test.go index 21b193c8c7d7e..36c6e2331f7e5 100644 --- a/operator/internal/manifests/openshift/credentialsrequest_test.go +++ b/operator/internal/manifests/openshift/credentialsrequest_test.go @@ -1,40 +1,22 @@ package openshift import ( - "strings" "testing" "github.com/stretchr/testify/require" + "github.com/grafana/loki/operator/internal/config" "github.com/grafana/loki/operator/internal/manifests/storage" ) -func TestBuildCredentialsRequest_HasOwnerAnnotation(t *testing.T) { - opts := Options{ - BuildOpts: BuildOptions{ - LokiStackName: "a-stack", - LokiStackNamespace: "ns", - }, - ManagedAuthEnv: &ManagedAuthEnv{ - AWS: &AWSSTSEnv{ - RoleARN: "role-arn", - }, - }, - } - - credReq, err := BuildCredentialsRequest(opts) - require.NoError(t, err) - require.Contains(t, credReq.Annotations, AnnotationCredentialsRequestOwner) -} - func TestBuildCredentialsRequest_HasSecretRef_MatchingLokiStackNamespace(t *testing.T) { opts := Options{ BuildOpts: BuildOptions{ LokiStackName: "a-stack", LokiStackNamespace: "ns", }, - ManagedAuthEnv: &ManagedAuthEnv{ - AWS: &AWSSTSEnv{ + ManagedAuth: &config.ManagedAuthConfig{ + AWS: &config.AWSEnvironment{ RoleARN: "role-arn", }, }, @@ -51,8 +33,8 @@ func TestBuildCredentialsRequest_HasServiceAccountNames_ContainsAllLokiStackServ LokiStackName: "a-stack", LokiStackNamespace: "ns", }, - ManagedAuthEnv: &ManagedAuthEnv{ - AWS: &AWSSTSEnv{ + ManagedAuth: &config.ManagedAuthConfig{ + AWS: &config.AWSEnvironment{ RoleARN: "role-arn", }, }, @@ -70,8 +52,8 @@ func TestBuildCredentialsRequest_CloudTokenPath_MatchinOpenShiftSADirectory(t *t LokiStackName: "a-stack", LokiStackNamespace: "ns", }, - ManagedAuthEnv: &ManagedAuthEnv{ - AWS: &AWSSTSEnv{ + ManagedAuth: &config.ManagedAuthConfig{ + AWS: &config.AWSEnvironment{ RoleARN: "role-arn", }, }, @@ -79,7 +61,7 @@ func TestBuildCredentialsRequest_CloudTokenPath_MatchinOpenShiftSADirectory(t *t credReq, err := BuildCredentialsRequest(opts) require.NoError(t, err) - require.True(t, strings.HasPrefix(credReq.Spec.CloudTokenPath, storage.AWSTokenVolumeDirectory)) + require.Equal(t, storage.ServiceAccountTokenFilePath, credReq.Spec.CloudTokenPath) } func TestBuildCredentialsRequest_FollowsNamingConventions(t *testing.T) { @@ -96,14 +78,14 @@ func TestBuildCredentialsRequest_FollowsNamingConventions(t *testing.T) { LokiStackName: "a-stack", LokiStackNamespace: "ns", }, - ManagedAuthEnv: &ManagedAuthEnv{ - AWS: &AWSSTSEnv{ + ManagedAuth: &config.ManagedAuthConfig{ + AWS: &config.AWSEnvironment{ RoleARN: "role-arn", }, }, }, - wantName: "ns-a-stack-aws-creds", - wantSecretName: "a-stack-aws-creds", + wantName: "a-stack", + wantSecretName: "a-stack-managed-credentials", }, } for _, test := range tests { diff --git a/operator/internal/manifests/openshift/options.go b/operator/internal/manifests/openshift/options.go index 9bc2e4faae36e..572db7fe64453 100644 --- a/operator/internal/manifests/openshift/options.go +++ b/operator/internal/manifests/openshift/options.go @@ -6,6 +6,7 @@ import ( "time" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" + "github.com/grafana/loki/operator/internal/config" ) // Options is the set of internal template options for rendering @@ -14,7 +15,7 @@ type Options struct { BuildOpts BuildOptions Authentication []AuthenticationSpec Authorization AuthorizationSpec - ManagedAuthEnv *ManagedAuthEnv + ManagedAuth *config.ManagedAuthConfig } // AuthenticationSpec describes the authentication specification @@ -55,22 +56,6 @@ type TenantData struct { CookieSecret string } -type AWSSTSEnv struct { - RoleARN string -} - -type AzureWIFEnvironment struct { - ClientID string - SubscriptionID string - TenantID string - Region string -} - -type ManagedAuthEnv struct { - AWS *AWSSTSEnv - Azure *AzureWIFEnvironment -} - // NewOptions returns an openshift options struct. func NewOptions( stackName, stackNamespace string, diff --git a/operator/internal/manifests/openshift/var.go b/operator/internal/manifests/openshift/var.go index 84928c48d7e28..5e3ac6300e3eb 100644 --- a/operator/internal/manifests/openshift/var.go +++ b/operator/internal/manifests/openshift/var.go @@ -48,8 +48,6 @@ var ( MonitoringSVCUserWorkload = "alertmanager-user-workload" MonitoringUserWorkloadNS = "openshift-user-workload-monitoring" - - AnnotationCredentialsRequestOwner = "loki.grafana.com/credentialsrequest-owner" ) func authorizerRbacName(componentName string) string { diff --git a/operator/internal/manifests/storage/configure.go b/operator/internal/manifests/storage/configure.go index f3fd86ebbaa1c..ede098425323d 100644 --- a/operator/internal/manifests/storage/configure.go +++ b/operator/internal/manifests/storage/configure.go @@ -13,6 +13,18 @@ import ( lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" ) +var ( + managedAuthConfigVolumeMount = corev1.VolumeMount{ + Name: managedAuthConfigVolumeName, + MountPath: managedAuthConfigDirectory, + } + + saTokenVolumeMount = corev1.VolumeMount{ + Name: saTokenVolumeName, + MountPath: saTokenVolumeMountPath, + } +) + // ConfigureDeployment appends additional pod volumes and container env vars, args, volume mounts // based on the object storage type. Currently supported amendments: // - All: Ensure object storage secret mounted and auth projected as env vars. @@ -127,11 +139,11 @@ func ensureObjectStoreCredentials(p *corev1.PodSpec, opts Options) corev1.PodSpe if managedAuthEnabled(opts) { container.Env = append(container.Env, managedAuthCredentials(opts)...) volumes = append(volumes, saTokenVolume(opts)) - container.VolumeMounts = append(container.VolumeMounts, saTokenVolumeMount(opts)) + container.VolumeMounts = append(container.VolumeMounts, saTokenVolumeMount) - if opts.OpenShift.ManagedAuthEnabled() { - volumes = append(volumes, managedAuthVolume(opts)) - container.VolumeMounts = append(container.VolumeMounts, managedAuthVolumeMount(opts)) + if opts.OpenShift.ManagedAuthEnabled() && opts.S3 != nil && opts.S3.STS { + volumes = append(volumes, managedAuthConfigVolume(opts)) + container.VolumeMounts = append(container.VolumeMounts, managedAuthConfigVolumeMount) } } else { container.Env = append(container.Env, staticAuthCredentials(opts)...) @@ -183,13 +195,13 @@ func managedAuthCredentials(opts Options) []corev1.EnvVar { case lokiv1.ObjectStorageSecretS3: if opts.OpenShift.ManagedAuthEnabled() { return []corev1.EnvVar{ - envVarFromValue(EnvAWSCredentialsFile, path.Join(managedAuthSecretDirectory, KeyAWSCredentialsFilename)), + envVarFromValue(EnvAWSCredentialsFile, path.Join(managedAuthConfigDirectory, KeyAWSCredentialsFilename)), envVarFromValue(EnvAWSSdkLoadConfig, "true"), } } else { return []corev1.EnvVar{ envVarFromSecret(EnvAWSRoleArn, opts.SecretName, KeyAWSRoleArn), - envVarFromValue(EnvAWSWebIdentityTokenFile, path.Join(AWSTokenVolumeDirectory, "token")), + envVarFromValue(EnvAWSWebIdentityTokenFile, ServiceAccountTokenFilePath), } } case lokiv1.ObjectStorageSecretAzure: @@ -199,7 +211,7 @@ func managedAuthCredentials(opts Options) []corev1.EnvVar { envVarFromSecret(EnvAzureClientID, opts.OpenShift.CloudCredentials.SecretName, azureManagedCredentialKeyClientID), envVarFromSecret(EnvAzureTenantID, opts.OpenShift.CloudCredentials.SecretName, azureManagedCredentialKeyTenantID), envVarFromSecret(EnvAzureSubscriptionID, opts.OpenShift.CloudCredentials.SecretName, azureManagedCredentialKeySubscriptionID), - envVarFromValue(EnvAzureFederatedTokenFile, path.Join(azureTokenVolumeDirectory, "token")), + envVarFromValue(EnvAzureFederatedTokenFile, ServiceAccountTokenFilePath), } } @@ -208,7 +220,11 @@ func managedAuthCredentials(opts Options) []corev1.EnvVar { envVarFromSecret(EnvAzureClientID, opts.SecretName, KeyAzureStorageClientID), envVarFromSecret(EnvAzureTenantID, opts.SecretName, KeyAzureStorageTenantID), envVarFromSecret(EnvAzureSubscriptionID, opts.SecretName, KeyAzureStorageSubscriptionID), - envVarFromValue(EnvAzureFederatedTokenFile, path.Join(azureTokenVolumeDirectory, "token")), + envVarFromValue(EnvAzureFederatedTokenFile, ServiceAccountTokenFilePath), + } + case lokiv1.ObjectStorageSecretGCS: + return []corev1.EnvVar{ + envVarFromValue(EnvGoogleApplicationCredentials, path.Join(secretDirectory, KeyGCPServiceAccountKeyFilename)), } default: return []corev1.EnvVar{} @@ -290,25 +306,13 @@ func managedAuthEnabled(opts Options) bool { return opts.S3 != nil && opts.S3.STS case lokiv1.ObjectStorageSecretAzure: return opts.Azure != nil && opts.Azure.WorkloadIdentity + case lokiv1.ObjectStorageSecretGCS: + return opts.GCS != nil && opts.GCS.WorkloadIdentity default: return false } } -func saTokenVolumeMount(opts Options) corev1.VolumeMount { - var tokenPath string - switch opts.SharedStore { - case lokiv1.ObjectStorageSecretS3: - tokenPath = AWSTokenVolumeDirectory - case lokiv1.ObjectStorageSecretAzure: - tokenPath = azureTokenVolumeDirectory - } - return corev1.VolumeMount{ - Name: saTokenVolumeName, - MountPath: tokenPath, - } -} - func saTokenVolume(opts Options) corev1.Volume { var audience string storeType := opts.SharedStore @@ -323,6 +327,8 @@ func saTokenVolume(opts Options) corev1.Volume { if opts.Azure.Audience != "" { audience = opts.Azure.Audience } + case lokiv1.ObjectStorageSecretGCS: + audience = opts.GCS.Audience } return corev1.Volume{ Name: saTokenVolumeName, @@ -342,16 +348,9 @@ func saTokenVolume(opts Options) corev1.Volume { } } -func managedAuthVolumeMount(opts Options) corev1.VolumeMount { - return corev1.VolumeMount{ - Name: opts.OpenShift.CloudCredentials.SecretName, - MountPath: managedAuthSecretDirectory, - } -} - -func managedAuthVolume(opts Options) corev1.Volume { +func managedAuthConfigVolume(opts Options) corev1.Volume { return corev1.Volume{ - Name: opts.OpenShift.CloudCredentials.SecretName, + Name: managedAuthConfigVolumeName, VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ SecretName: opts.OpenShift.CloudCredentials.SecretName, diff --git a/operator/internal/manifests/storage/configure_test.go b/operator/internal/manifests/storage/configure_test.go index 03e22682f4028..2cd7b079a4b4a 100644 --- a/operator/internal/manifests/storage/configure_test.go +++ b/operator/internal/manifests/storage/configure_test.go @@ -206,7 +206,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/azure/serviceaccount", + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -256,7 +256,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, { Name: EnvAzureFederatedTokenFile, - Value: "/var/run/secrets/azure/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -331,7 +331,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/azure/serviceaccount", + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -381,7 +381,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, { Name: EnvAzureFederatedTokenFile, - Value: "/var/run/secrets/azure/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -462,11 +462,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/azure/serviceaccount", - }, - { - Name: "cloud-credentials", - MountPath: managedAuthSecretDirectory, + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -516,7 +512,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, { Name: EnvAzureFederatedTokenFile, - Value: "/var/run/secrets/azure/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -546,11 +542,59 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, }, + }, + }, + }, + }, + }, + }, + { + desc: "object storage GCS", + opts: Options{ + SecretName: "test", + SharedStore: lokiv1.ObjectStorageSecretGCS, + }, + dpl: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ { - Name: "cloud-credentials", + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + }, + Env: []corev1.EnvVar{ + { + Name: EnvGoogleApplicationCredentials, + Value: "/etc/storage/secrets/key.json", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: "cloud-credentials", + SecretName: "test", }, }, }, @@ -561,10 +605,14 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, { - desc: "object storage GCS", + desc: "object storage GCS with Workload Identity", opts: Options{ SecretName: "test", SharedStore: lokiv1.ObjectStorageSecretGCS, + GCS: &GCSStorageConfig{ + Audience: "test", + WorkloadIdentity: true, + }, }, dpl: &appsv1.Deployment{ Spec: appsv1.DeploymentSpec{ @@ -592,6 +640,11 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { ReadOnly: false, MountPath: "/etc/storage/secrets", }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: saTokenVolumeMountPath, + }, }, Env: []corev1.EnvVar{ { @@ -610,6 +663,22 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: "test", + ExpirationSeconds: ptr.To[int64](3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, }, }, }, @@ -729,7 +798,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/aws/serviceaccount", + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -746,7 +815,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, { Name: "AWS_WEB_IDENTITY_TOKEN_FILE", - Value: "/var/run/secrets/aws/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -827,13 +896,9 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/aws/serviceaccount", - }, - { - Name: "cloud-credentials", - ReadOnly: false, - MountPath: "/etc/storage/managed-auth", + MountPath: saTokenVolumeMountPath, }, + managedAuthConfigVolumeMount, }, Env: []corev1.EnvVar{ { @@ -873,7 +938,7 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, { - Name: "cloud-credentials", + Name: managedAuthConfigVolumeName, VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ SecretName: "cloud-credentials", @@ -1259,7 +1324,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/azure/serviceaccount", + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -1309,7 +1374,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, { Name: EnvAzureFederatedTokenFile, - Value: "/var/run/secrets/azure/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -1384,7 +1449,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/azure/serviceaccount", + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -1434,7 +1499,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, { Name: EnvAzureFederatedTokenFile, - Value: "/var/run/secrets/azure/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -1515,11 +1580,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/azure/serviceaccount", - }, - { - Name: "cloud-credentials", - MountPath: managedAuthSecretDirectory, + MountPath: saTokenVolumeMountPath, }, }, Env: []corev1.EnvVar{ @@ -1569,7 +1630,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, { Name: EnvAzureFederatedTokenFile, - Value: "/var/run/secrets/azure/serviceaccount/token", + Value: "/var/run/secrets/storage/serviceaccount/token", }, }, }, @@ -1599,11 +1660,59 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, }, }, + }, + }, + }, + }, + }, + }, + { + desc: "object storage GCS", + opts: Options{ + SecretName: "test", + SharedStore: lokiv1.ObjectStorageSecretGCS, + }, + sts: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ { - Name: "cloud-credentials", + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + }, + Env: []corev1.EnvVar{ + { + Name: EnvGoogleApplicationCredentials, + Value: "/etc/storage/secrets/key.json", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: "cloud-credentials", + SecretName: "test", }, }, }, @@ -1614,10 +1723,14 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, }, { - desc: "object storage GCS", + desc: "object storage GCS with Workload Identity", opts: Options{ SecretName: "test", SharedStore: lokiv1.ObjectStorageSecretGCS, + GCS: &GCSStorageConfig{ + Audience: "test", + WorkloadIdentity: true, + }, }, sts: &appsv1.StatefulSet{ Spec: appsv1.StatefulSetSpec{ @@ -1645,6 +1758,11 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { ReadOnly: false, MountPath: "/etc/storage/secrets", }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: saTokenVolumeMountPath, + }, }, Env: []corev1.EnvVar{ { @@ -1663,6 +1781,22 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, }, }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: "test", + ExpirationSeconds: ptr.To[int64](3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, }, }, }, @@ -1788,13 +1922,9 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { { Name: saTokenVolumeName, ReadOnly: false, - MountPath: "/var/run/secrets/aws/serviceaccount", - }, - { - Name: "cloud-credentials", - ReadOnly: false, - MountPath: "/etc/storage/managed-auth", + MountPath: saTokenVolumeMountPath, }, + managedAuthConfigVolumeMount, }, Env: []corev1.EnvVar{ { @@ -1834,7 +1964,7 @@ func TestConfigureStatefulSetForStorageType(t *testing.T) { }, }, { - Name: "cloud-credentials", + Name: managedAuthConfigVolumeName, VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ SecretName: "cloud-credentials", diff --git a/operator/internal/manifests/storage/options.go b/operator/internal/manifests/storage/options.go index 4c62ce7513755..6693d2261e978 100644 --- a/operator/internal/manifests/storage/options.go +++ b/operator/internal/manifests/storage/options.go @@ -23,18 +23,55 @@ type Options struct { OpenShift OpenShiftOptions } +// CredentialMode returns which mode is used by the current storage configuration. +// This defaults to CredentialModeStatic, but can be CredentialModeToken +// or CredentialModeManaged depending on the object storage provide, the provided +// secret and whether the operator is running in a managed-auth cluster. +func (o Options) CredentialMode() lokiv1.CredentialMode { + if o.Azure != nil { + if o.OpenShift.ManagedAuthEnabled() { + return lokiv1.CredentialModeManaged + } + + if o.Azure.WorkloadIdentity { + return lokiv1.CredentialModeToken + } + } + + if o.GCS != nil { + if o.GCS.WorkloadIdentity { + return lokiv1.CredentialModeToken + } + } + + if o.S3 != nil { + if o.OpenShift.ManagedAuthEnabled() { + return lokiv1.CredentialModeManaged + } + + if o.S3.STS { + return lokiv1.CredentialModeToken + } + } + + return lokiv1.CredentialModeStatic +} + // AzureStorageConfig for Azure storage config type AzureStorageConfig struct { Env string Container string EndpointSuffix string Audience string + Region string WorkloadIdentity bool } // GCSStorageConfig for GCS storage config type GCSStorageConfig struct { - Bucket string + Bucket string + Audience string + WorkloadIdentity bool } // S3StorageConfig for S3 storage config diff --git a/operator/internal/manifests/storage/var.go b/operator/internal/manifests/storage/var.go index 418fb27152bd3..cbd944a821c34 100644 --- a/operator/internal/manifests/storage/var.go +++ b/operator/internal/manifests/storage/var.go @@ -1,5 +1,7 @@ package storage +import "fmt" + const ( // EnvAlibabaCloudAccessKeyID is the environment variable to specify the AlibabaCloud client id to access S3. EnvAlibabaCloudAccessKeyID = "ALIBABA_CLOUD_ACCESS_KEY_ID" @@ -91,6 +93,8 @@ const ( // KeyAzureAudience is the secret data key for customizing the audience used for the ServiceAccount token. KeyAzureAudience = "audience" + // KeyGCPWorkloadIdentityProviderAudience is the secret data key for the GCP Workload Identity Provider audience. + KeyGCPWorkloadIdentityProviderAudience = "audience" // KeyGCPStorageBucketName is the secret data key for the GCS bucket name. KeyGCPStorageBucketName = "bucketname" // KeyGCPServiceAccountKeyFilename is the service account key filename containing the Google authentication credentials. @@ -125,24 +129,29 @@ const ( // KeySwiftUsername is the secret data key for the OpenStack Swift password. KeySwiftUsername = "username" - saTokenVolumeK8sDirectory = "/var/run/secrets/kubernetes.io/serviceaccount" - saTokenVolumeName = "bound-sa-token" - saTokenExpiration int64 = 3600 + saTokenVolumeName = "bound-sa-token" + saTokenExpiration int64 = 3600 + saTokenVolumeMountPath = "/var/run/secrets/storage/serviceaccount" + + ServiceAccountTokenFilePath = saTokenVolumeMountPath + "/token" + + secretDirectory = "/etc/storage/secrets" + storageTLSVolume = "storage-tls" + caDirectory = "/etc/storage/ca" - secretDirectory = "/etc/storage/secrets" - managedAuthSecretDirectory = "/etc/storage/managed-auth" - storageTLSVolume = "storage-tls" - caDirectory = "/etc/storage/ca" + managedAuthConfigVolumeName = "managed-auth-config" + managedAuthConfigDirectory = "/etc/storage/managed-auth" - awsDefaultAudience = "sts.amazonaws.com" - AWSTokenVolumeDirectory = "/var/run/secrets/aws/serviceaccount" + awsDefaultAudience = "sts.amazonaws.com" - azureDefaultAudience = "api://AzureADTokenExchange" - azureTokenVolumeDirectory = "/var/run/secrets/azure/serviceaccount" + azureDefaultAudience = "api://AzureADTokenExchange" azureManagedCredentialKeyClientID = "azure_client_id" azureManagedCredentialKeyTenantID = "azure_tenant_id" azureManagedCredentialKeySubscriptionID = "azure_subscription_id" - - AnnotationCredentialsRequestsSecretRef = "loki.grafana.com/credentials-request-secret-ref" ) + +// ManagedCredentialsSecretName returns the name of the secret holding the managed credentials. +func ManagedCredentialsSecretName(stackName string) string { + return fmt.Sprintf("%s-managed-credentials", stackName) +} diff --git a/operator/internal/status/status.go b/operator/internal/status/status.go index 281a167355c37..c544695d3d2ea 100644 --- a/operator/internal/status/status.go +++ b/operator/internal/status/status.go @@ -17,7 +17,7 @@ import ( // Refresh executes an aggregate update of the LokiStack Status struct, i.e. // - It recreates the Status.Components pod status map per component. // - It sets the appropriate Status.Condition to true that matches the pod status maps. -func Refresh(ctx context.Context, k k8s.Client, req ctrl.Request, now time.Time, degradedErr *DegradedError) error { +func Refresh(ctx context.Context, k k8s.Client, req ctrl.Request, now time.Time, credentialMode lokiv1.CredentialMode, degradedErr *DegradedError) error { var stack lokiv1.LokiStack if err := k.Get(ctx, req.NamespacedName, &stack); err != nil { if apierrors.IsNotFound(err) { @@ -45,6 +45,7 @@ func Refresh(ctx context.Context, k k8s.Client, req ctrl.Request, now time.Time, statusUpdater := func(stack *lokiv1.LokiStack) { stack.Status.Components = *cs stack.Status.Conditions = mergeConditions(stack.Status.Conditions, activeConditions, metaTime) + stack.Status.Storage.CredentialMode = credentialMode } statusUpdater(&stack) diff --git a/operator/internal/status/status_test.go b/operator/internal/status/status_test.go index c7895cbe8020e..32ef892ed1bde 100644 --- a/operator/internal/status/status_test.go +++ b/operator/internal/status/status_test.go @@ -54,7 +54,9 @@ func TestRefreshSuccess(t *testing.T) { Gateway: map[corev1.PodPhase][]string{corev1.PodRunning: {"lokistack-gateway-pod-0"}}, Ruler: map[corev1.PodPhase][]string{corev1.PodRunning: {"ruler-pod-0"}}, }, - Storage: lokiv1.LokiStackStorageStatus{}, + Storage: lokiv1.LokiStackStorageStatus{ + CredentialMode: lokiv1.CredentialModeStatic, + }, Conditions: []metav1.Condition{ { Type: string(lokiv1.ConditionReady), @@ -68,7 +70,7 @@ func TestRefreshSuccess(t *testing.T) { k, sw := setupListClient(t, stack, componentPods) - err := Refresh(context.Background(), k, req, now, nil) + err := Refresh(context.Background(), k, req, now, lokiv1.CredentialModeStatic, nil) require.NoError(t, err) require.Equal(t, 1, k.GetCallCount()) @@ -130,7 +132,7 @@ func TestRefreshSuccess_ZoneAwarePendingPod(t *testing.T) { return nil } - err := Refresh(context.Background(), k, req, now, nil) + err := Refresh(context.Background(), k, req, now, lokiv1.CredentialModeStatic, nil) require.NoError(t, err) require.Equal(t, 1, k.GetCallCount()) diff --git a/operator/main.go b/operator/main.go index a88a857bcee44..e212c268cbad8 100644 --- a/operator/main.go +++ b/operator/main.go @@ -21,7 +21,6 @@ import ( lokiv1beta1 "github.com/grafana/loki/operator/apis/loki/v1beta1" lokictrl "github.com/grafana/loki/operator/controllers/loki" "github.com/grafana/loki/operator/internal/config" - manifestsocp "github.com/grafana/loki/operator/internal/manifests/openshift" "github.com/grafana/loki/operator/internal/metrics" "github.com/grafana/loki/operator/internal/operator" "github.com/grafana/loki/operator/internal/validation" @@ -60,12 +59,16 @@ func main() { var err error - ctrlCfg, options, err := config.LoadConfig(scheme, configFile) + ctrlCfg, managedAuth, options, err := config.LoadConfig(scheme, configFile) if err != nil { logger.Error(err, "failed to load operator configuration") os.Exit(1) } + if managedAuth != nil { + logger.Info("Discovered OpenShift Cluster within a managed authentication environment") + } + if ctrlCfg.Gates.LokiStackAlerts && !ctrlCfg.Gates.ServiceMonitors { logger.Error(kverrors.New("LokiStackAlerts flag requires ServiceMonitors"), "") os.Exit(1) @@ -95,16 +98,12 @@ func main() { os.Exit(1) } - if ctrlCfg.Gates.OpenShift.Enabled && manifestsocp.DiscoverManagedAuthEnv() != nil { - logger.Info("discovered OpenShift Cluster within a managed authentication environment") - ctrlCfg.Gates.OpenShift.ManagedAuthEnv = true - } - if err = (&lokictrl.LokiStackReconciler{ Client: mgr.GetClient(), Log: logger.WithName("controllers").WithName("lokistack"), Scheme: mgr.GetScheme(), FeatureGates: ctrlCfg.Gates, + AuthConfig: managedAuth, }).SetupWithManager(mgr); err != nil { logger.Error(err, "unable to create controller", "controller", "lokistack") os.Exit(1) @@ -129,17 +128,6 @@ func main() { } } - if ctrlCfg.Gates.OpenShift.ManagedAuthEnabled() { - if err = (&lokictrl.CredentialsRequestsReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Log: logger.WithName("controllers").WithName("lokistack-credentialsrequest"), - }).SetupWithManager(mgr); err != nil { - logger.Error(err, "unable to create controller", "controller", "lokistack-credentialsrequest") - os.Exit(1) - } - } - if ctrlCfg.Gates.LokiStackWebhook { v := &validation.LokiStackValidator{} if err = v.SetupWebhookWithManager(mgr); err != nil { diff --git a/pkg/bloomcompactor/batch.go b/pkg/bloomcompactor/batch.go new file mode 100644 index 0000000000000..2d43f83219df9 --- /dev/null +++ b/pkg/bloomcompactor/batch.go @@ -0,0 +1,95 @@ +package bloomcompactor + +import ( + "context" + + "github.com/grafana/dskit/multierror" + + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" +) + +// interface modeled from `pkg/storage/stores/shipper/bloomshipper.Fetcher` +type blocksFetcher interface { + FetchBlocks(context.Context, []bloomshipper.BlockRef) ([]*bloomshipper.CloseableBlockQuerier, error) +} + +func newBatchedBlockLoader(ctx context.Context, fetcher blocksFetcher, blocks []bloomshipper.BlockRef) (*batchedBlockLoader, error) { + return &batchedBlockLoader{ + ctx: ctx, + batchSize: 10, // make configurable? + source: blocks, + fetcher: fetcher, + }, nil +} + +type batchedBlockLoader struct { + ctx context.Context + batchSize int + + source []bloomshipper.BlockRef + fetcher blocksFetcher + + batch []*bloomshipper.CloseableBlockQuerier + cur *bloomshipper.CloseableBlockQuerier + err error +} + +// At implements v1.CloseableIterator. +func (b *batchedBlockLoader) At() *bloomshipper.CloseableBlockQuerier { + return b.cur +} + +// Close implements v1.CloseableIterator. +func (b *batchedBlockLoader) Close() error { + if b.cur != nil { + return b.cur.Close() + } + return nil +} + +// CloseBatch closes the remaining items from the current batch +func (b *batchedBlockLoader) CloseBatch() error { + var err multierror.MultiError + for _, cur := range b.batch { + err.Add(cur.Close()) + } + if len(b.batch) > 0 { + b.batch = b.batch[:0] + } + return err.Err() +} + +// Err implements v1.CloseableIterator. +func (b *batchedBlockLoader) Err() error { + return b.err +} + +// Next implements v1.CloseableIterator. +func (b *batchedBlockLoader) Next() bool { + if len(b.batch) > 0 { + return b.setNext() + } + + if len(b.source) == 0 { + return false + } + + // setup next batch + batchSize := min(b.batchSize, len(b.source)) + toFetch := b.source[:batchSize] + + // update source + b.source = b.source[batchSize:] + + b.batch, b.err = b.fetcher.FetchBlocks(b.ctx, toFetch) + if b.err != nil { + return false + } + return b.setNext() +} + +func (b *batchedBlockLoader) setNext() bool { + b.cur, b.err = b.batch[0], nil + b.batch = b.batch[1:] + return true +} diff --git a/pkg/bloomcompactor/batch_test.go b/pkg/bloomcompactor/batch_test.go new file mode 100644 index 0000000000000..a1922bf931b86 --- /dev/null +++ b/pkg/bloomcompactor/batch_test.go @@ -0,0 +1,37 @@ +package bloomcompactor + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "go.uber.org/atomic" + + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" +) + +type dummyBlocksFetcher struct { + count *atomic.Int32 +} + +func (f *dummyBlocksFetcher) FetchBlocks(_ context.Context, blocks []bloomshipper.BlockRef) ([]*bloomshipper.CloseableBlockQuerier, error) { + f.count.Inc() + return make([]*bloomshipper.CloseableBlockQuerier, len(blocks)), nil +} + +func TestBatchedBlockLoader(t *testing.T) { + ctx := context.Background() + f := &dummyBlocksFetcher{count: atomic.NewInt32(0)} + + blocks := make([]bloomshipper.BlockRef, 25) + blocksIter, err := newBatchedBlockLoader(ctx, f, blocks) + require.NoError(t, err) + + var count int + for blocksIter.Next() && blocksIter.Err() == nil { + count++ + } + + require.Equal(t, len(blocks), count) + require.Equal(t, int32(len(blocks)/blocksIter.batchSize+1), f.count.Load()) +} diff --git a/pkg/bloomcompactor/bloomcompactor.go b/pkg/bloomcompactor/bloomcompactor.go index cf3b3fafcb6d1..ed1f50ae72582 100644 --- a/pkg/bloomcompactor/bloomcompactor.go +++ b/pkg/bloomcompactor/bloomcompactor.go @@ -2,23 +2,25 @@ package bloomcompactor import ( "context" - "fmt" + "math" + "sync" "time" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/backoff" + "github.com/grafana/dskit/concurrency" "github.com/grafana/dskit/multierror" "github.com/grafana/dskit/services" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" - "github.com/grafana/loki/pkg/bloomutils" - "github.com/grafana/loki/pkg/compactor" + "github.com/grafana/loki/pkg/storage" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/storage/stores" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" - "github.com/grafana/loki/pkg/util" ) /* @@ -33,38 +35,74 @@ Bloom-compactor regularly runs to check for changes in meta.jsons and runs compa type Compactor struct { services.Service - cfg Config - logger log.Logger - limits Limits + cfg Config + schemaCfg config.SchemaConfig + logger log.Logger + limits Limits - // temporary workaround until store has implemented read/write shipper interface - store bloomshipper.Store + tsdbStore TSDBStore + // TODO(owen-d): ShardingStrategy + controller *SimpleBloomController + + // temporary workaround until bloomStore has implemented read/write shipper interface + bloomStore bloomshipper.Store sharding ShardingStrategy - metrics *metrics + metrics *Metrics btMetrics *v1.Metrics } func New( cfg Config, - store bloomshipper.Store, + schemaCfg config.SchemaConfig, + storeCfg storage.Config, + clientMetrics storage.ClientMetrics, + fetcherProvider stores.ChunkFetcherProvider, sharding ShardingStrategy, limits Limits, logger log.Logger, r prometheus.Registerer, ) (*Compactor, error) { c := &Compactor{ - cfg: cfg, - store: store, - logger: logger, - sharding: sharding, - limits: limits, + cfg: cfg, + schemaCfg: schemaCfg, + logger: logger, + sharding: sharding, + limits: limits, + } + + tsdbStore, err := NewTSDBStores(schemaCfg, storeCfg, clientMetrics) + if err != nil { + return nil, errors.Wrap(err, "failed to create TSDB store") + } + c.tsdbStore = tsdbStore + + // TODO(owen-d): export bloomstore as a dependency that can be reused by the compactor & gateway rather that + bloomStore, err := bloomshipper.NewBloomStore(schemaCfg.Configs, storeCfg, clientMetrics, nil, nil, logger) + if err != nil { + return nil, errors.Wrap(err, "failed to create bloom store") } + c.bloomStore = bloomStore // initialize metrics c.btMetrics = v1.NewMetrics(prometheus.WrapRegistererWithPrefix("loki_bloom_tokenizer", r)) - c.metrics = newMetrics(r) + c.metrics = NewMetrics(r, c.btMetrics) + + chunkLoader := NewStoreChunkLoader( + fetcherProvider, + c.metrics, + ) + + c.controller = NewSimpleBloomController( + c.tsdbStore, + c.bloomStore, + chunkLoader, + c.limits, + c.metrics, + c.logger, + ) + c.metrics.compactionRunInterval.Set(cfg.CompactionInterval.Seconds()) c.Service = services.NewBasicService(c.starting, c.running, c.stopping) @@ -76,192 +114,203 @@ func (c *Compactor) starting(_ context.Context) (err error) { return err } +func (c *Compactor) stopping(_ error) error { + c.metrics.compactorRunning.Set(0) + return nil +} + func (c *Compactor) running(ctx context.Context) error { - // Run an initial compaction before starting the interval. - if err := c.runCompaction(ctx); err != nil { - level.Error(c.logger).Log("msg", "failed to run compaction", "err", err) + // run once at beginning + if err := c.runOne(ctx); err != nil { + return err } - ticker := time.NewTicker(util.DurationWithJitter(c.cfg.CompactionInterval, 0.05)) + ticker := time.NewTicker(c.cfg.CompactionInterval) defer ticker.Stop() - for { select { - case start := <-ticker.C: - c.metrics.compactionRunsStarted.Inc() - if err := c.runCompaction(ctx); err != nil { - c.metrics.compactionRunsCompleted.WithLabelValues(statusFailure).Inc() - c.metrics.compactionRunTime.WithLabelValues(statusFailure).Observe(time.Since(start).Seconds()) - level.Error(c.logger).Log("msg", "failed to run compaction", "err", err) - continue - } - c.metrics.compactionRunsCompleted.WithLabelValues(statusSuccess).Inc() - c.metrics.compactionRunTime.WithLabelValues(statusSuccess).Observe(time.Since(start).Seconds()) case <-ctx.Done(): - return nil + return ctx.Err() + + case <-ticker.C: + if err := c.runOne(ctx); err != nil { + level.Error(c.logger).Log("msg", "compaction iteration failed", "err", err) + return err + } } } } -func (c *Compactor) stopping(_ error) error { - c.metrics.compactorRunning.Set(0) - return nil -} - -func (c *Compactor) runCompaction(ctx context.Context) error { - var tables []string - // TODO(owen-d): resolve tables +func runWithRetries( + ctx context.Context, + minBackoff, maxBackoff time.Duration, + maxRetries int, + f func(ctx context.Context) error, +) error { + var lastErr error - // process most recent tables first - tablesIntervals := getIntervalsForTables(tables) - compactor.SortTablesByRange(tables) + retries := backoff.New(ctx, backoff.Config{ + MinBackoff: minBackoff, + MaxBackoff: maxBackoff, + MaxRetries: maxRetries, + }) - // TODO(owen-d): parallelize at the bottom level, not the top level. - // Can dispatch to a queue & wait. - for _, table := range tables { - logger := log.With(c.logger, "table", table) - err := c.compactTable(ctx, logger, table, tablesIntervals[table]) - if err != nil { - level.Error(logger).Log("msg", "failed to compact table", "err", err) - return errors.Wrapf(err, "failed to compact table %s", table) + for retries.Ongoing() { + lastErr = f(ctx) + if lastErr == nil { + return nil } + + retries.Wait() } - return nil + + return lastErr } -func (c *Compactor) compactTable(ctx context.Context, logger log.Logger, tableName string, tableInterval model.Interval) error { - // Ensure the context has not been canceled (ie. compactor shutdown has been triggered). - if err := ctx.Err(); err != nil { - return fmt.Errorf("interrupting compaction of table: %w", err) +type tenantTable struct { + tenant string + table DayTable + ownershipRange v1.FingerprintBounds +} + +func (c *Compactor) tenants(ctx context.Context, table DayTable) (v1.Iterator[string], error) { + tenants, err := c.tsdbStore.UsersForPeriod(ctx, table) + if err != nil { + return nil, errors.Wrap(err, "getting tenants") } - var tenants []string + return v1.NewSliceIter(tenants), nil +} - level.Info(logger).Log("msg", "discovered tenants from bucket", "users", len(tenants)) - return c.compactUsers(ctx, logger, tableName, tableInterval, tenants) +// TODO(owen-d): implement w/ subrings +func (c *Compactor) ownsTenant(_ string) (ownershipRange v1.FingerprintBounds, owns bool) { + return v1.NewBounds(0, math.MaxUint64), true } -func (c *Compactor) compactUsers(ctx context.Context, logger log.Logger, tableName string, tableInterval model.Interval, tenants []string) error { - // Keep track of tenants owned by this shard, so that we can delete the local files for all other users. - errs := multierror.New() - ownedTenants := make(map[string]struct{}, len(tenants)) - for _, tenant := range tenants { - tenantLogger := log.With(logger, "tenant", tenant) +// runs a single round of compaction for all relevant tenants and tables +func (c *Compactor) runOne(ctx context.Context) error { + var workersErr error + var wg sync.WaitGroup + ch := make(chan tenantTable) + wg.Add(1) + go func() { + workersErr = c.runWorkers(ctx, ch) + wg.Done() + }() + + err := c.loadWork(ctx, ch) + + wg.Wait() + return multierror.New(workersErr, err, ctx.Err()).Err() +} - // Ensure the context has not been canceled (ie. compactor shutdown has been triggered). - if err := ctx.Err(); err != nil { - return fmt.Errorf("interrupting compaction of tenants: %w", err) - } +func (c *Compactor) tables(ts time.Time) *dayRangeIterator { + // adjust the minimum by one to make it inclusive, which is more intuitive + // for a configuration variable + adjustedMin := min(c.cfg.MinTableCompactionPeriod - 1) + minCompactionPeriod := time.Duration(adjustedMin) * config.ObjectStorageIndexRequiredPeriod + maxCompactionPeriod := time.Duration(c.cfg.MaxTableCompactionPeriod) * config.ObjectStorageIndexRequiredPeriod - // Skip tenant if compaction is not enabled - if !c.limits.BloomCompactorEnabled(tenant) { - level.Info(tenantLogger).Log("msg", "compaction disabled for tenant. Skipping.") - continue - } + from := ts.Add(-maxCompactionPeriod).UnixNano() / int64(config.ObjectStorageIndexRequiredPeriod) * int64(config.ObjectStorageIndexRequiredPeriod) + through := ts.Add(-minCompactionPeriod).UnixNano() / int64(config.ObjectStorageIndexRequiredPeriod) * int64(config.ObjectStorageIndexRequiredPeriod) - // Skip this table if it is too old for the tenant limits. - now := model.Now() - tableMaxAge := c.limits.BloomCompactorMaxTableAge(tenant) - if tableMaxAge > 0 && tableInterval.Start.Before(now.Add(-tableMaxAge)) { - level.Debug(tenantLogger).Log("msg", "skipping tenant because table is too old", "table-max-age", tableMaxAge, "table-start", tableInterval.Start, "now", now) - continue - } + fromDay := DayTable(model.TimeFromUnixNano(from)) + throughDay := DayTable(model.TimeFromUnixNano(through)) + return newDayRangeIterator(fromDay, throughDay) + +} - // Ensure the tenant ID belongs to our shard. - if !c.sharding.OwnsTenant(tenant) { - c.metrics.compactionRunSkippedTenants.Inc() - level.Debug(tenantLogger).Log("msg", "skipping tenant because it is not owned by this shard") - continue +func (c *Compactor) loadWork(ctx context.Context, ch chan<- tenantTable) error { + tables := c.tables(time.Now()) + + for tables.Next() && tables.Err() == nil && ctx.Err() == nil { + + table := tables.At() + tenants, err := c.tenants(ctx, table) + if err != nil { + return errors.Wrap(err, "getting tenants") } - ownedTenants[tenant] = struct{}{} - - start := time.Now() - if err := c.compactTenantWithRetries(ctx, tenantLogger, tableName, tenant); err != nil { - switch { - case errors.Is(err, context.Canceled): - // We don't want to count shutdowns as failed compactions because we will pick up with the rest of the compaction after the restart. - level.Info(tenantLogger).Log("msg", "compaction for tenant was interrupted by a shutdown") - return nil - default: - c.metrics.compactionRunTenantsCompleted.WithLabelValues(statusFailure).Inc() - c.metrics.compactionRunTenantsTime.WithLabelValues(statusFailure).Observe(time.Since(start).Seconds()) - level.Error(tenantLogger).Log("msg", "failed to compact tenant", "err", err) - errs.Add(err) + for tenants.Next() && tenants.Err() == nil && ctx.Err() == nil { + tenant := tenants.At() + ownershipRange, owns := c.ownsTenant(tenant) + if !owns { + continue + } + + select { + case ch <- tenantTable{tenant: tenant, table: table, ownershipRange: ownershipRange}: + case <-ctx.Done(): + return ctx.Err() } - continue } - c.metrics.compactionRunTenantsCompleted.WithLabelValues(statusSuccess).Inc() - c.metrics.compactionRunTenantsTime.WithLabelValues(statusSuccess).Observe(time.Since(start).Seconds()) - level.Info(tenantLogger).Log("msg", "successfully compacted tenant") + if err := tenants.Err(); err != nil { + return errors.Wrap(err, "iterating tenants") + } + } - return errs.Err() + if err := tables.Err(); err != nil { + return errors.Wrap(err, "iterating tables") + } - // TODO: Delete local files for unowned tenants, if there are any. + close(ch) + return ctx.Err() } -func (c *Compactor) compactTenant(ctx context.Context, logger log.Logger, _ string, tenant string) error { - level.Info(logger).Log("msg", "starting compaction of tenant") - - // Ensure the context has not been canceled (ie. compactor shutdown has been triggered). - if err := ctx.Err(); err != nil { - return err - } - - // Tokenizer is not thread-safe so we need one per goroutine. - nGramLen := c.limits.BloomNGramLength(tenant) - nGramSkip := c.limits.BloomNGramSkip(tenant) - _ = v1.NewBloomTokenizer(nGramLen, nGramSkip, c.btMetrics) +func (c *Compactor) runWorkers(ctx context.Context, ch <-chan tenantTable) error { + + return concurrency.ForEachJob(ctx, c.cfg.WorkerParallelism, c.cfg.WorkerParallelism, func(ctx context.Context, idx int) error { + + for { + select { + case <-ctx.Done(): + return ctx.Err() + + case tt, ok := <-ch: + if !ok { + return nil + } + + if err := c.compactTenantTable(ctx, tt); err != nil { + return errors.Wrapf( + err, + "compacting tenant table (%s) for tenant (%s) with ownership (%s)", + tt.table, + tt.tenant, + tt.ownershipRange, + ) + } + } + } - rs, err := c.sharding.GetTenantSubRing(tenant).GetAllHealthy(RingOp) - if err != nil { - return err - } - tokenRanges := bloomutils.GetInstanceWithTokenRange(c.cfg.Ring.InstanceID, rs.Instances) - for _, tr := range tokenRanges { - level.Debug(logger).Log("msg", "got token range for instance", "id", tr.Instance.Id, "min", tr.MinToken, "max", tr.MaxToken) - } + }) - // TODO(owen-d): impl - return nil } -func runWithRetries( - ctx context.Context, - minBackoff, maxBackoff time.Duration, - maxRetries int, - f func(ctx context.Context) error, -) error { - var lastErr error +func (c *Compactor) compactTenantTable(ctx context.Context, tt tenantTable) error { + level.Info(c.logger).Log("msg", "compacting", "org_id", tt.tenant, "table", tt.table, "ownership", tt.ownershipRange) + return c.controller.buildBlocks(ctx, tt.table, tt.tenant, tt.ownershipRange) +} - retries := backoff.New(ctx, backoff.Config{ - MinBackoff: minBackoff, - MaxBackoff: maxBackoff, - MaxRetries: maxRetries, - }) +type dayRangeIterator struct { + min, max, cur DayTable +} - for retries.Ongoing() { - lastErr = f(ctx) - if lastErr == nil { - return nil - } +func newDayRangeIterator(min, max DayTable) *dayRangeIterator { + return &dayRangeIterator{min: min, max: max, cur: min.Dec()} +} - retries.Wait() - } +func (r *dayRangeIterator) Next() bool { + r.cur = r.cur.Inc() + return r.cur.Before(r.max) +} - return lastErr +func (r *dayRangeIterator) At() DayTable { + return r.cur } -func (c *Compactor) compactTenantWithRetries(ctx context.Context, logger log.Logger, tableName string, tenant string) error { - return runWithRetries( - ctx, - c.cfg.RetryMinBackoff, - c.cfg.RetryMaxBackoff, - c.cfg.CompactionRetries, - func(ctx context.Context) error { - return c.compactTenant(ctx, logger, tableName, tenant) - }, - ) +func (r *dayRangeIterator) Err() error { + return nil } diff --git a/pkg/bloomcompactor/config.go b/pkg/bloomcompactor/config.go index 884034fdd043d..dd821d81c906b 100644 --- a/pkg/bloomcompactor/config.go +++ b/pkg/bloomcompactor/config.go @@ -2,8 +2,13 @@ package bloomcompactor import ( "flag" + "fmt" "time" + "github.com/prometheus/common/model" + + "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/downloads" "github.com/grafana/loki/pkg/util/ring" ) @@ -15,13 +20,14 @@ type Config struct { // section and the ingester configuration by default). Ring ring.RingConfig `yaml:"ring,omitempty" doc:"description=Defines the ring to be used by the bloom-compactor servers. In case this isn't configured, this block supports inheriting configuration from the common ring section."` // Enabled configures whether bloom-compactors should be used to compact index values into bloomfilters - Enabled bool `yaml:"enabled"` - WorkingDirectory string `yaml:"working_directory"` - CompactionInterval time.Duration `yaml:"compaction_interval"` - - RetryMinBackoff time.Duration `yaml:"compaction_retries_min_backoff"` - RetryMaxBackoff time.Duration `yaml:"compaction_retries_max_backoff"` - CompactionRetries int `yaml:"compaction_retries"` + Enabled bool `yaml:"enabled"` + CompactionInterval time.Duration `yaml:"compaction_interval"` + MinTableCompactionPeriod int `yaml:"min_table_compaction_period"` + MaxTableCompactionPeriod int `yaml:"max_table_compaction_period"` + WorkerParallelism int `yaml:"worker_parallelism"` + RetryMinBackoff time.Duration `yaml:"compaction_retries_min_backoff"` + RetryMaxBackoff time.Duration `yaml:"compaction_retries_max_backoff"` + CompactionRetries int `yaml:"compaction_retries"` MaxCompactionParallelism int `yaml:"max_compaction_parallelism"` } @@ -30,14 +36,29 @@ type Config struct { func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.Ring.RegisterFlagsWithPrefix("bloom-compactor.", "collectors/", f) f.BoolVar(&cfg.Enabled, "bloom-compactor.enabled", false, "Flag to enable or disable the usage of the bloom-compactor component.") - f.StringVar(&cfg.WorkingDirectory, "bloom-compactor.working-directory", "", "Directory where files can be downloaded for compaction.") f.DurationVar(&cfg.CompactionInterval, "bloom-compactor.compaction-interval", 10*time.Minute, "Interval at which to re-run the compaction operation.") + f.IntVar(&cfg.WorkerParallelism, "bloom-compactor.worker-parallelism", 1, "Number of workers to run in parallel for compaction.") + f.IntVar(&cfg.MinTableCompactionPeriod, "bloom-compactor.min-table-compaction-period", 1, "How many index periods (days) to wait before compacting a table. This can be used to lower cost by not re-writing data to object storage too frequently since recent data changes more often.") + // TODO(owen-d): ideally we'd set this per tenant based on their `reject_old_samples_max_age` setting, + // but due to how we need to discover tenants, we can't do that yet. Tenant+Period discovery is done by + // iterating the table periods in object storage and looking for tenants within that period. + // In order to have this done dynamically, we'd need to account for tenant specific overrides, which are also + // dynamically reloaded. + // I'm doing it the simple way for now. + f.IntVar(&cfg.MaxTableCompactionPeriod, "bloom-compactor.max-table-compaction-period", 7, "How many index periods (days) to wait before compacting a table. This can be used to lower cost by not trying to compact older data which doesn't change. This can be optimized by aligning it with the maximum `reject_old_samples_max_age` setting of any tenant.") f.DurationVar(&cfg.RetryMinBackoff, "bloom-compactor.compaction-retries-min-backoff", 10*time.Second, "Minimum backoff time between retries.") f.DurationVar(&cfg.RetryMaxBackoff, "bloom-compactor.compaction-retries-max-backoff", time.Minute, "Maximum backoff time between retries.") f.IntVar(&cfg.CompactionRetries, "bloom-compactor.compaction-retries", 3, "Number of retries to perform when compaction fails.") f.IntVar(&cfg.MaxCompactionParallelism, "bloom-compactor.max-compaction-parallelism", 1, "Maximum number of tables to compact in parallel. While increasing this value, please make sure compactor has enough disk space allocated to be able to store and compact as many tables.") } +func (cfg *Config) Validate() error { + if cfg.MinTableCompactionPeriod > cfg.MaxTableCompactionPeriod { + return fmt.Errorf("min_compaction_age must be less than or equal to max_compaction_age") + } + return nil +} + type Limits interface { downloads.Limits BloomCompactorShardSize(tenantID string) int @@ -47,4 +68,39 @@ type Limits interface { BloomNGramLength(tenantID string) int BloomNGramSkip(tenantID string) int BloomFalsePositiveRate(tenantID string) float64 + BloomCompactorMaxBlockSize(tenantID string) int +} + +// TODO(owen-d): Remove this type in favor of config.DayTime +type DayTable model.Time + +func (d DayTable) String() string { + return fmt.Sprintf("%d", d.ModelTime().Time().UnixNano()/int64(config.ObjectStorageIndexRequiredPeriod)) +} + +func (d DayTable) Inc() DayTable { + return DayTable(d.ModelTime().Add(config.ObjectStorageIndexRequiredPeriod)) +} + +func (d DayTable) Dec() DayTable { + return DayTable(d.ModelTime().Add(-config.ObjectStorageIndexRequiredPeriod)) +} + +func (d DayTable) Before(other DayTable) bool { + return d.ModelTime().Before(model.Time(other)) +} + +func (d DayTable) After(other DayTable) bool { + return d.ModelTime().After(model.Time(other)) +} + +func (d DayTable) ModelTime() model.Time { + return model.Time(d) +} + +func (d DayTable) Bounds() bloomshipper.Interval { + return bloomshipper.Interval{ + Start: model.Time(d), + End: model.Time(d.Inc()), + } } diff --git a/pkg/bloomcompactor/controller.go b/pkg/bloomcompactor/controller.go index 2002d8ce2a8bc..47d9627d92e1a 100644 --- a/pkg/bloomcompactor/controller.go +++ b/pkg/bloomcompactor/controller.go @@ -1,12 +1,15 @@ package bloomcompactor import ( + "bytes" "context" "fmt" + "io" "sort" "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/grafana/dskit/multierror" "github.com/pkg/errors" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" @@ -14,57 +17,54 @@ import ( "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" ) -type uploader interface { - PutBlock(ctx context.Context, block bloomshipper.Block) error - PutMeta(ctx context.Context, meta bloomshipper.Meta) error -} - type SimpleBloomController struct { - // TODO(owen-d): consider making tenant+table dynamic (not 1 struct per combination) - tenant string - table string - ownershipRange v1.FingerprintBounds // ownership range of this controller - tsdbStore TSDBStore - bloomStore bloomshipper.Store - uploader uploader - chunkLoader ChunkLoader - rwFn func() (v1.BlockWriter, v1.BlockReader) - metrics *Metrics + tsdbStore TSDBStore + bloomStore bloomshipper.Store + chunkLoader ChunkLoader + metrics *Metrics + limits Limits // TODO(owen-d): add metrics logger log.Logger } func NewSimpleBloomController( - tenant, table string, - ownershipRange v1.FingerprintBounds, tsdbStore TSDBStore, blockStore bloomshipper.Store, - uploader uploader, chunkLoader ChunkLoader, - rwFn func() (v1.BlockWriter, v1.BlockReader), + limits Limits, metrics *Metrics, logger log.Logger, ) *SimpleBloomController { return &SimpleBloomController{ - tenant: tenant, - table: table, - ownershipRange: ownershipRange, - tsdbStore: tsdbStore, - bloomStore: blockStore, - uploader: uploader, - chunkLoader: chunkLoader, - rwFn: rwFn, - metrics: metrics, - logger: log.With(logger, "ownership", ownershipRange), + tsdbStore: tsdbStore, + bloomStore: blockStore, + chunkLoader: chunkLoader, + metrics: metrics, + limits: limits, + logger: logger, } } -func (s *SimpleBloomController) do(ctx context.Context) error { +// TODO(owen-d): pool, evaluate if memory-only is the best choice +func (s *SimpleBloomController) rwFn() (v1.BlockWriter, v1.BlockReader) { + indexBuf := bytes.NewBuffer(nil) + bloomsBuf := bytes.NewBuffer(nil) + return v1.NewMemoryBlockWriter(indexBuf, bloomsBuf), v1.NewByteReader(indexBuf, bloomsBuf) +} + +func (s *SimpleBloomController) buildBlocks( + ctx context.Context, + table DayTable, + tenant string, + ownershipRange v1.FingerprintBounds, +) error { + logger := log.With(s.logger, "ownership", ownershipRange, "org_id", tenant, "table", table) + // 1. Resolve TSDBs - tsdbs, err := s.tsdbStore.ResolveTSDBs() + tsdbs, err := s.tsdbStore.ResolveTSDBs(ctx, table, tenant) if err != nil { - level.Error(s.logger).Log("msg", "failed to resolve tsdbs", "err", err) + level.Error(logger).Log("msg", "failed to resolve tsdbs", "err", err) return errors.Wrap(err, "failed to resolve tsdbs") } @@ -78,38 +78,47 @@ func (s *SimpleBloomController) do(ctx context.Context) error { } // 2. Fetch metas + bounds := table.Bounds() metas, err := s.bloomStore.FetchMetas( ctx, bloomshipper.MetaSearchParams{ - TenantID: s.tenant, - Interval: bloomshipper.Interval{}, // TODO(owen-d): gen interval - Keyspace: s.ownershipRange, + TenantID: tenant, + Interval: bloomshipper.Interval{ + Start: bounds.Start, + End: bounds.End, + }, + Keyspace: ownershipRange, }, ) if err != nil { - level.Error(s.logger).Log("msg", "failed to get metas", "err", err) + level.Error(logger).Log("msg", "failed to get metas", "err", err) return errors.Wrap(err, "failed to get metas") } // 3. Determine which TSDBs have gaps in the ownership range and need to // be processed. - tsdbsWithGaps, err := gapsBetweenTSDBsAndMetas(s.ownershipRange, ids, metas) + tsdbsWithGaps, err := gapsBetweenTSDBsAndMetas(ownershipRange, ids, metas) if err != nil { - level.Error(s.logger).Log("msg", "failed to find gaps", "err", err) + level.Error(logger).Log("msg", "failed to find gaps", "err", err) return errors.Wrap(err, "failed to find gaps") } if len(tsdbsWithGaps) == 0 { - level.Debug(s.logger).Log("msg", "blooms exist for all tsdbs") + level.Debug(logger).Log("msg", "blooms exist for all tsdbs") return nil } work, err := blockPlansForGaps(tsdbsWithGaps, metas) if err != nil { - level.Error(s.logger).Log("msg", "failed to create plan", "err", err) + level.Error(logger).Log("msg", "failed to create plan", "err", err) return errors.Wrap(err, "failed to create plan") } + nGramSize := uint64(s.limits.BloomNGramLength(tenant)) + nGramSkip := uint64(s.limits.BloomNGramSkip(tenant)) + maxBlockSize := uint64(s.limits.BloomCompactorMaxBlockSize(tenant)) + blockOpts := v1.NewBlockOptions(nGramSize, nGramSkip, maxBlockSize) + // 4. Generate Blooms // Now that we have the gaps, we will generate a bloom block for each gap. // We can accelerate this by using existing blocks which may already contain @@ -130,73 +139,128 @@ func (s *SimpleBloomController) do(ctx context.Context) error { for _, gap := range plan.gaps { // Fetch blocks that aren't up to date but are in the desired fingerprint range // to try and accelerate bloom creation - seriesItr, preExistingBlocks, err := s.loadWorkForGap(ctx, plan.tsdb, gap) + seriesItr, blocksIter, err := s.loadWorkForGap(ctx, table, tenant, plan.tsdb, gap) if err != nil { - level.Error(s.logger).Log("msg", "failed to get series and blocks", "err", err) + level.Error(logger).Log("msg", "failed to get series and blocks", "err", err) return errors.Wrap(err, "failed to get series and blocks") } gen := NewSimpleBloomGenerator( - v1.DefaultBlockOptions, + tenant, + blockOpts, seriesItr, s.chunkLoader, - preExistingBlocks, + blocksIter, s.rwFn, s.metrics, - log.With(s.logger, "tsdb", plan.tsdb.Name(), "ownership", gap, "blocks", len(preExistingBlocks)), + log.With(logger, "tsdb", plan.tsdb.Name(), "ownership", gap), ) - _, newBlocks, err := gen.Generate(ctx) + _, loaded, newBlocks, err := gen.Generate(ctx) + if err != nil { // TODO(owen-d): metrics - level.Error(s.logger).Log("msg", "failed to generate bloom", "err", err) + level.Error(logger).Log("msg", "failed to generate bloom", "err", err) + s.closeLoadedBlocks(loaded, blocksIter) return errors.Wrap(err, "failed to generate bloom") } - // TODO(owen-d): dispatch this to a queue for writing, handling retries/backpressure, etc? - for newBlocks.Next() { + client, err := s.bloomStore.Client(table.ModelTime()) + if err != nil { + level.Error(logger).Log("msg", "failed to get client", "err", err) + s.closeLoadedBlocks(loaded, blocksIter) + return errors.Wrap(err, "failed to get client") + } + + for newBlocks.Next() && newBlocks.Err() == nil { blockCt++ blk := newBlocks.At() - if err := s.uploader.PutBlock( + built, err := bloomshipper.BlockFrom(tenant, table.String(), blk) + if err != nil { + level.Error(logger).Log("msg", "failed to build block", "err", err) + return errors.Wrap(err, "failed to build block") + } + + if err := client.PutBlock( ctx, - bloomshipper.BlockFrom(s.tenant, s.table, blk), + built, ); err != nil { - level.Error(s.logger).Log("msg", "failed to write block", "err", err) + level.Error(logger).Log("msg", "failed to write block", "err", err) + s.closeLoadedBlocks(loaded, blocksIter) return errors.Wrap(err, "failed to write block") } } if err := newBlocks.Err(); err != nil { // TODO(owen-d): metrics - level.Error(s.logger).Log("msg", "failed to generate bloom", "err", err) + level.Error(logger).Log("msg", "failed to generate bloom", "err", err) + s.closeLoadedBlocks(loaded, blocksIter) return errors.Wrap(err, "failed to generate bloom") } + // Close pre-existing blocks + s.closeLoadedBlocks(loaded, blocksIter) } } // TODO(owen-d): build meta from blocks // TODO(owen-d): reap tombstones, old metas - level.Debug(s.logger).Log("msg", "finished bloom generation", "blocks", blockCt, "tsdbs", tsdbCt) + level.Debug(logger).Log("msg", "finished bloom generation", "blocks", blockCt, "tsdbs", tsdbCt) return nil } -func (s *SimpleBloomController) loadWorkForGap(ctx context.Context, id tsdb.Identifier, gap gapWithBlocks) (v1.CloseableIterator[*v1.Series], []*bloomshipper.CloseableBlockQuerier, error) { +func (s *SimpleBloomController) loadWorkForGap( + ctx context.Context, + table DayTable, + tenant string, + id tsdb.Identifier, + gap gapWithBlocks, +) (v1.CloseableIterator[*v1.Series], v1.CloseableIterator[*bloomshipper.CloseableBlockQuerier], error) { // load a series iterator for the gap - seriesItr, err := s.tsdbStore.LoadTSDB(id, gap.bounds) + seriesItr, err := s.tsdbStore.LoadTSDB(ctx, table, tenant, id, gap.bounds) if err != nil { return nil, nil, errors.Wrap(err, "failed to load tsdb") } - blocks, err := s.bloomStore.FetchBlocks(ctx, gap.blocks) + // load a blocks iterator for the gap + fetcher, err := s.bloomStore.Fetcher(table.ModelTime()) + if err != nil { + return nil, nil, errors.Wrap(err, "failed to get fetcher") + } + + blocksIter, err := newBatchedBlockLoader(ctx, fetcher, gap.blocks) if err != nil { - return nil, nil, errors.Wrap(err, "failed to get blocks") + return nil, nil, errors.Wrap(err, "failed to load blocks") + } + + return seriesItr, blocksIter, nil +} + +func (s *SimpleBloomController) closeLoadedBlocks(toClose []io.Closer, it v1.CloseableIterator[*bloomshipper.CloseableBlockQuerier]) { + // close loaded blocks + var err multierror.MultiError + for _, closer := range toClose { + err.Add(closer.Close()) + } + + switch itr := it.(type) { + case *batchedBlockLoader: + // close remaining loaded blocks from batch + err.Add(itr.CloseBatch()) + default: + // close remaining loaded blocks + for itr.Next() && itr.Err() == nil { + err.Add(itr.At().Close()) + } } - return seriesItr, blocks, nil + // log error + if err.Err() != nil { + level.Error(s.logger).Log("msg", "failed to close blocks", "err", err) + } } type gapWithBlocks struct { diff --git a/pkg/bloomcompactor/meta.go b/pkg/bloomcompactor/meta.go deleted file mode 100644 index 2f2c2cd9de16e..0000000000000 --- a/pkg/bloomcompactor/meta.go +++ /dev/null @@ -1,16 +0,0 @@ -package bloomcompactor - -import ( - v1 "github.com/grafana/loki/pkg/storage/bloom/v1" - "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" -) - -const ( - BloomPrefix = "bloom" - MetasPrefix = "metas" -) - -type TSDBStore interface { - ResolveTSDBs() ([]*tsdb.SingleTenantTSDBIdentifier, error) - LoadTSDB(id tsdb.Identifier, bounds v1.FingerprintBounds) (v1.CloseableIterator[*v1.Series], error) -} diff --git a/pkg/bloomcompactor/metrics.go b/pkg/bloomcompactor/metrics.go index ee2f1630ab5ec..b02ac32aca727 100644 --- a/pkg/bloomcompactor/metrics.go +++ b/pkg/bloomcompactor/metrics.go @@ -3,6 +3,8 @@ package bloomcompactor import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" ) const ( @@ -13,7 +15,10 @@ const ( statusFailure = "failure" ) -type metrics struct { +type Metrics struct { + bloomMetrics *v1.Metrics + chunkSize prometheus.Histogram // uncompressed size of all chunks summed per series + compactionRunsStarted prometheus.Counter compactionRunsCompleted *prometheus.CounterVec compactionRunTime *prometheus.HistogramVec @@ -28,8 +33,14 @@ type metrics struct { compactorRunning prometheus.Gauge } -func newMetrics(r prometheus.Registerer) *metrics { - m := metrics{ +func NewMetrics(r prometheus.Registerer, bloomMetrics *v1.Metrics) *Metrics { + m := Metrics{ + bloomMetrics: bloomMetrics, + chunkSize: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ + Name: "bloom_chunk_series_size", + Help: "Uncompressed size of chunks in a series", + Buckets: prometheus.ExponentialBucketsRange(1024, 1073741824, 10), + }), compactionRunsStarted: promauto.With(r).NewCounter(prometheus.CounterOpts{ Namespace: metricsNamespace, Subsystem: metricsSubsystem, diff --git a/pkg/bloomcompactor/spec.go b/pkg/bloomcompactor/spec.go index bf9a0a02387b4..4a1125082ca54 100644 --- a/pkg/bloomcompactor/spec.go +++ b/pkg/bloomcompactor/spec.go @@ -3,51 +3,26 @@ package bloomcompactor import ( "context" "fmt" + "io" "math" "time" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/pkg/errors" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/model" - "github.com/grafana/dskit/multierror" - "github.com/grafana/loki/pkg/chunkenc" "github.com/grafana/loki/pkg/logproto" logql_log "github.com/grafana/loki/pkg/logql/log" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/chunk" + "github.com/grafana/loki/pkg/storage/chunk/fetcher" + "github.com/grafana/loki/pkg/storage/stores" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" ) -/* -This file maintains a number of things supporting bloom generation. Most notably, the `BloomGenerator` interface/implementation which builds bloom filters. - -- `BloomGenerator`: Builds blooms. Most other things in this file are supporting this in various ways. -- `SimpleBloomGenerator`: A foundational implementation of `BloomGenerator` which wires up a few different components to generate bloom filters for a set of blocks and handles schema compatibility: -- `chunkLoader`: Loads chunks w/ a specific fingerprint from the store, returns an iterator of chunk iterators. We return iterators rather than chunk implementations mainly for ease of testing. In practice, this will just be an iterator over `MemChunk`s. -*/ - -type Metrics struct { - bloomMetrics *v1.Metrics - chunkSize prometheus.Histogram // uncompressed size of all chunks summed per series -} - -func NewMetrics(r prometheus.Registerer, bloomMetrics *v1.Metrics) *Metrics { - return &Metrics{ - bloomMetrics: bloomMetrics, - chunkSize: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ - Name: "bloom_chunk_series_size", - Help: "Uncompressed size of chunks in a series", - Buckets: prometheus.ExponentialBucketsRange(1024, 1073741824, 10), - }), - } -} - // inclusive range type Keyspace struct { min, max model.Fingerprint @@ -65,16 +40,15 @@ func (k Keyspace) Cmp(other Keyspace) v1.BoundsCheck { // Store is likely bound within. This allows specifying impls like ShardedStore // to only request the shard-range needed from the existing store. type BloomGenerator interface { - Generate(ctx context.Context) (skippedBlocks []*v1.Block, results v1.Iterator[*v1.Block], err error) + Generate(ctx context.Context) (skippedBlocks []v1.BlockMetadata, toClose []io.Closer, results v1.Iterator[*v1.Block], err error) } // Simple implementation of a BloomGenerator. type SimpleBloomGenerator struct { + userID string store v1.Iterator[*v1.Series] chunkLoader ChunkLoader - // TODO(owen-d): blocks need not be all downloaded prior. Consider implementing - // as an iterator of iterators, where each iterator is a batch of overlapping blocks. - blocks []*bloomshipper.CloseableBlockQuerier + blocksIter v1.CloseableIterator[*bloomshipper.CloseableBlockQuerier] // options to build blocks with opts v1.BlockOptions @@ -92,20 +66,21 @@ type SimpleBloomGenerator struct { // and handles schema compatibility: // Blocks which are incompatible with the schema are skipped and will have their chunks reindexed func NewSimpleBloomGenerator( + userID string, opts v1.BlockOptions, store v1.Iterator[*v1.Series], chunkLoader ChunkLoader, - blocks []*bloomshipper.CloseableBlockQuerier, + blocksIter v1.CloseableIterator[*bloomshipper.CloseableBlockQuerier], readWriterFn func() (v1.BlockWriter, v1.BlockReader), metrics *Metrics, logger log.Logger, ) *SimpleBloomGenerator { return &SimpleBloomGenerator{ - opts: opts, - // TODO(owen-d): implement Iterator[Series] against TSDB files to hook in here. + userID: userID, + opts: opts, store: store, chunkLoader: chunkLoader, - blocks: blocks, + blocksIter: blocksIter, logger: log.With(logger, "component", "bloom_generator"), readWriterFn: readWriterFn, metrics: metrics, @@ -116,7 +91,7 @@ func NewSimpleBloomGenerator( func (s *SimpleBloomGenerator) populator(ctx context.Context) func(series *v1.Series, bloom *v1.Bloom) error { return func(series *v1.Series, bloom *v1.Bloom) error { - chunkItersWithFP, err := s.chunkLoader.Load(ctx, series) + chunkItersWithFP, err := s.chunkLoader.Load(ctx, s.userID, series) if err != nil { return errors.Wrapf(err, "failed to load chunks for series: %+v", series) } @@ -132,69 +107,126 @@ func (s *SimpleBloomGenerator) populator(ctx context.Context) func(series *v1.Se } -func (s *SimpleBloomGenerator) Generate(ctx context.Context) (skippedBlocks []v1.BlockMetadata, results v1.Iterator[*v1.Block], err error) { +func (s *SimpleBloomGenerator) Generate(ctx context.Context) ([]v1.BlockMetadata, []io.Closer, v1.Iterator[*v1.Block], error) { + skippedBlocks := make([]v1.BlockMetadata, 0) + toClose := make([]io.Closer, 0) + blocksMatchingSchema := make([]*bloomshipper.CloseableBlockQuerier, 0) - var closeErrors multierror.MultiError - blocksMatchingSchema := make([]v1.PeekingIterator[*v1.SeriesWithBloom], 0, len(s.blocks)) - toClose := make([]*bloomshipper.CloseableBlockQuerier, 0, len(s.blocks)) - // Close all remaining blocks on exit - defer func() { - for _, block := range toClose { - closeErrors.Add(block.Close()) - } - if err := closeErrors.Err(); err != nil { - level.Error(s.logger).Log("msg", "failed to close blocks", "err", err) - } - }() + for s.blocksIter.Next() && s.blocksIter.Err() == nil { + block := s.blocksIter.At() + toClose = append(toClose, block) - for _, block := range s.blocks { - // TODO(owen-d): implement block naming so we can log the affected block in all these calls - logger := log.With(s.logger, "block", fmt.Sprintf("%+v", block)) + logger := log.With(s.logger, "block", block.BlockRef) md, err := block.Metadata() schema := md.Options.Schema if err != nil { level.Warn(logger).Log("msg", "failed to get schema for block", "err", err) skippedBlocks = append(skippedBlocks, md) - - // Close unneeded block - closeErrors.Add(block.Close()) continue } if !s.opts.Schema.Compatible(schema) { level.Warn(logger).Log("msg", "block schema incompatible with options", "generator_schema", fmt.Sprintf("%+v", s.opts.Schema), "block_schema", fmt.Sprintf("%+v", schema)) skippedBlocks = append(skippedBlocks, md) - - // Close unneeded block - closeErrors.Add(block.Close()) continue } level.Debug(logger).Log("msg", "adding compatible block to bloom generation inputs") - itr := v1.NewPeekingIter[*v1.SeriesWithBloom](block) - blocksMatchingSchema = append(blocksMatchingSchema, itr) - // append needed block to close list (when finished) - toClose = append(toClose, block) + blocksMatchingSchema = append(blocksMatchingSchema, block) + } + + if s.blocksIter.Err() != nil { + // should we ignore the error and continue with the blocks we got? + return skippedBlocks, toClose, v1.NewSliceIter([]*v1.Block{}), s.blocksIter.Err() } level.Debug(s.logger).Log("msg", "generating bloom filters for blocks", "num_blocks", len(blocksMatchingSchema), "skipped_blocks", len(skippedBlocks), "schema", fmt.Sprintf("%+v", s.opts.Schema)) - // TODO(owen-d): implement bounded block sizes - mergeBuilder := v1.NewMergeBuilder(blocksMatchingSchema, s.store, s.populator(ctx)) - writer, reader := s.readWriterFn() + series := v1.NewPeekingIter(s.store) + blockIter := NewLazyBlockBuilderIterator(ctx, s.opts, s.populator(ctx), s.readWriterFn, series, blocksMatchingSchema) + return skippedBlocks, toClose, blockIter, nil +} - blockBuilder, err := v1.NewBlockBuilder(v1.NewBlockOptionsFromSchema(s.opts.Schema), writer) - if err != nil { - return skippedBlocks, nil, errors.Wrap(err, "failed to create bloom block builder") +// LazyBlockBuilderIterator is a lazy iterator over blocks that builds +// each block by adding series to them until they are full. +type LazyBlockBuilderIterator struct { + ctx context.Context + opts v1.BlockOptions + populate func(*v1.Series, *v1.Bloom) error + readWriterFn func() (v1.BlockWriter, v1.BlockReader) + series v1.PeekingIterator[*v1.Series] + blocks []*bloomshipper.CloseableBlockQuerier + + blocksAsPeekingIter []v1.PeekingIterator[*v1.SeriesWithBloom] + curr *v1.Block + err error +} + +func NewLazyBlockBuilderIterator( + ctx context.Context, + opts v1.BlockOptions, + populate func(*v1.Series, *v1.Bloom) error, + readWriterFn func() (v1.BlockWriter, v1.BlockReader), + series v1.PeekingIterator[*v1.Series], + blocks []*bloomshipper.CloseableBlockQuerier, +) *LazyBlockBuilderIterator { + it := &LazyBlockBuilderIterator{ + ctx: ctx, + opts: opts, + populate: populate, + readWriterFn: readWriterFn, + series: series, + blocks: blocks, + + blocksAsPeekingIter: make([]v1.PeekingIterator[*v1.SeriesWithBloom], len(blocks)), + } + + return it +} + +func (b *LazyBlockBuilderIterator) Next() bool { + // No more series to process + if _, hasNext := b.series.Peek(); !hasNext { + return false + } + + // reset all the blocks to the start + for i, block := range b.blocks { + if err := block.Reset(); err != nil { + b.err = errors.Wrapf(err, "failed to reset block iterator %d", i) + return false + } + b.blocksAsPeekingIter[i] = v1.NewPeekingIter[*v1.SeriesWithBloom](block) + } + + if err := b.ctx.Err(); err != nil { + b.err = errors.Wrap(err, "context canceled") + return false } + mergeBuilder := v1.NewMergeBuilder(b.blocksAsPeekingIter, b.series, b.populate) + writer, reader := b.readWriterFn() + blockBuilder, err := v1.NewBlockBuilder(b.opts, writer) + if err != nil { + b.err = errors.Wrap(err, "failed to create bloom block builder") + return false + } _, err = mergeBuilder.Build(blockBuilder) if err != nil { - return skippedBlocks, nil, errors.Wrap(err, "failed to build bloom block") + b.err = errors.Wrap(err, "failed to build bloom block") + return false } - return skippedBlocks, v1.NewSliceIter[*v1.Block]([]*v1.Block{v1.NewBlock(reader)}), nil + b.curr = v1.NewBlock(reader) + return true +} + +func (b *LazyBlockBuilderIterator) At() *v1.Block { + return b.curr +} +func (b *LazyBlockBuilderIterator) Err() error { + return b.err } // IndexLoader loads an index. This helps us do things like @@ -211,45 +243,33 @@ type ChunkItersByFingerprint struct { // ChunkLoader loads chunks from a store type ChunkLoader interface { - Load(context.Context, *v1.Series) (*ChunkItersByFingerprint, error) -} - -// interface modeled from `pkg/storage/stores/composite_store.ChunkFetcherProvider` -type fetcherProvider interface { - GetChunkFetcher(model.Time) chunkFetcher -} - -// interface modeled from `pkg/storage/chunk/fetcher.Fetcher` -type chunkFetcher interface { - FetchChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) + Load(ctx context.Context, userID string, series *v1.Series) (*ChunkItersByFingerprint, error) } // StoreChunkLoader loads chunks from a store type StoreChunkLoader struct { - userID string - fetcherProvider fetcherProvider + fetcherProvider stores.ChunkFetcherProvider metrics *Metrics } -func NewStoreChunkLoader(userID string, fetcherProvider fetcherProvider, metrics *Metrics) *StoreChunkLoader { +func NewStoreChunkLoader(fetcherProvider stores.ChunkFetcherProvider, metrics *Metrics) *StoreChunkLoader { return &StoreChunkLoader{ - userID: userID, fetcherProvider: fetcherProvider, metrics: metrics, } } -func (s *StoreChunkLoader) Load(ctx context.Context, series *v1.Series) (*ChunkItersByFingerprint, error) { - // TODO(owen-d): This is probalby unnecessary as we should only have one fetcher +func (s *StoreChunkLoader) Load(ctx context.Context, userID string, series *v1.Series) (*ChunkItersByFingerprint, error) { + // NB(owen-d): This is probably unnecessary as we should only have one fetcher // because we'll only be working on a single index period at a time, but this should protect // us in the case of refactoring/changing this and likely isn't a perf bottleneck. - chksByFetcher := make(map[chunkFetcher][]chunk.Chunk) + chksByFetcher := make(map[*fetcher.Fetcher][]chunk.Chunk) for _, chk := range series.Chunks { fetcher := s.fetcherProvider.GetChunkFetcher(chk.Start) chksByFetcher[fetcher] = append(chksByFetcher[fetcher], chunk.Chunk{ ChunkRef: logproto.ChunkRef{ Fingerprint: uint64(series.Fingerprint), - UserID: s.userID, + UserID: userID, From: chk.Start, Through: chk.End, Checksum: chk.Checksum, @@ -257,104 +277,152 @@ func (s *StoreChunkLoader) Load(ctx context.Context, series *v1.Series) (*ChunkI }) } - work := make([]chunkWork, 0, len(chksByFetcher)) + var ( + fetchers = make([]Fetcher[chunk.Chunk, chunk.Chunk], 0, len(chksByFetcher)) + inputs = make([][]chunk.Chunk, 0, len(chksByFetcher)) + ) for fetcher, chks := range chksByFetcher { - work = append(work, chunkWork{ - fetcher: fetcher, - chks: chks, - }) + fn := FetchFunc[chunk.Chunk, chunk.Chunk](fetcher.FetchChunks) + fetchers = append(fetchers, fn) + inputs = append(inputs, chks) } return &ChunkItersByFingerprint{ fp: series.Fingerprint, - itr: newBatchedLoader(ctx, work, batchedLoaderDefaultBatchSize, s.metrics), + itr: newBatchedChunkLoader(ctx, fetchers, inputs, s.metrics, batchedLoaderDefaultBatchSize), }, nil } -type chunkWork struct { - fetcher chunkFetcher - chks []chunk.Chunk +type Fetcher[A, B any] interface { + Fetch(ctx context.Context, inputs []A) ([]B, error) +} + +type FetchFunc[A, B any] func(ctx context.Context, inputs []A) ([]B, error) + +func (f FetchFunc[A, B]) Fetch(ctx context.Context, inputs []A) ([]B, error) { + return f(ctx, inputs) } // batchedLoader implements `v1.Iterator[v1.ChunkRefWithIter]` in batches // to ensure memory is bounded while loading chunks // TODO(owen-d): testware -type batchedLoader struct { +type batchedLoader[A, B, C any] struct { metrics *Metrics batchSize int ctx context.Context - work []chunkWork + fetchers []Fetcher[A, B] + work [][]A - cur v1.ChunkRefWithIter - batch []chunk.Chunk - err error + mapper func(B) (C, error) + cur C + batch []B + err error } const batchedLoaderDefaultBatchSize = 50 -func newBatchedLoader(ctx context.Context, work []chunkWork, batchSize int, metrics *Metrics) *batchedLoader { - return &batchedLoader{ - metrics: metrics, - batchSize: batchSize, +func newBatchedLoader[A, B, C any]( + ctx context.Context, + fetchers []Fetcher[A, B], + inputs [][]A, + mapper func(B) (C, error), + batchSize int, +) *batchedLoader[A, B, C] { + return &batchedLoader[A, B, C]{ + batchSize: max(batchSize, 1), ctx: ctx, - work: work, + fetchers: fetchers, + work: inputs, + mapper: mapper, } } -func (b *batchedLoader) Next() bool { - if len(b.batch) > 0 { - b.cur, b.err = b.format(b.batch[0]) - b.batch = b.batch[1:] - return b.err == nil - } +func (b *batchedLoader[A, B, C]) Next() bool { - if len(b.work) == 0 { - return false - } + // iterate work until we have non-zero length batch + for len(b.batch) == 0 { - // setup next batch - next := b.work[0] - batchSize := min(b.batchSize, len(next.chks)) - toFetch := next.chks[:batchSize] - // update work - b.work[0].chks = next.chks[batchSize:] - if len(b.work[0].chks) == 0 { - b.work = b.work[1:] + // empty batch + no work remaining = we're done + if len(b.work) == 0 { + return false + } + + // setup next batch + next := b.work[0] + batchSize := min(b.batchSize, len(next)) + toFetch := next[:batchSize] + fetcher := b.fetchers[0] + + // update work + b.work[0] = b.work[0][batchSize:] + if len(b.work[0]) == 0 { + // if we've exhausted work from this set of inputs, + // set pointer to next set of inputs + // and their respective fetcher + b.work = b.work[1:] + b.fetchers = b.fetchers[1:] + } + + // there was no work in this batch; continue (should not happen) + if len(toFetch) == 0 { + continue + } + + b.batch, b.err = fetcher.Fetch(b.ctx, toFetch) + // error fetching, short-circuit iteration + if b.err != nil { + return false + } } - b.batch, b.err = next.fetcher.FetchChunks(b.ctx, toFetch) + return b.prepNext() +} + +func (b *batchedLoader[_, B, C]) prepNext() bool { + b.cur, b.err = b.mapper(b.batch[0]) + b.batch = b.batch[1:] return b.err == nil } -func (b *batchedLoader) format(c chunk.Chunk) (v1.ChunkRefWithIter, error) { - chk := c.Data.(*chunkenc.Facade).LokiChunk() - b.metrics.chunkSize.Observe(float64(chk.UncompressedSize())) - itr, err := chk.Iterator( - b.ctx, - time.Unix(0, 0), // TODO: Parameterize/better handle the timestamps? - time.Unix(0, math.MaxInt64), - logproto.FORWARD, - logql_log.NewNoopPipeline().ForStream(c.Metric), - ) +func newBatchedChunkLoader( + ctx context.Context, + fetchers []Fetcher[chunk.Chunk, chunk.Chunk], + inputs [][]chunk.Chunk, + metrics *Metrics, + batchSize int, +) *batchedLoader[chunk.Chunk, chunk.Chunk, v1.ChunkRefWithIter] { + + mapper := func(c chunk.Chunk) (v1.ChunkRefWithIter, error) { + chk := c.Data.(*chunkenc.Facade).LokiChunk() + metrics.chunkSize.Observe(float64(chk.UncompressedSize())) + itr, err := chk.Iterator( + ctx, + time.Unix(0, 0), + time.Unix(0, math.MaxInt64), + logproto.FORWARD, + logql_log.NewNoopPipeline().ForStream(c.Metric), + ) - if err != nil { - return v1.ChunkRefWithIter{}, err - } + if err != nil { + return v1.ChunkRefWithIter{}, err + } - return v1.ChunkRefWithIter{ - Ref: v1.ChunkRef{ - Start: c.From, - End: c.Through, - Checksum: c.Checksum, - }, - Itr: itr, - }, nil + return v1.ChunkRefWithIter{ + Ref: v1.ChunkRef{ + Start: c.From, + End: c.Through, + Checksum: c.Checksum, + }, + Itr: itr, + }, nil + } + return newBatchedLoader(ctx, fetchers, inputs, mapper, batchSize) } -func (b *batchedLoader) At() v1.ChunkRefWithIter { +func (b *batchedLoader[_, _, C]) At() C { return b.cur } -func (b *batchedLoader) Err() error { +func (b *batchedLoader[_, _, _]) Err() error { return b.err } diff --git a/pkg/bloomcompactor/spec_test.go b/pkg/bloomcompactor/spec_test.go index c43a4b715a1e7..bb4fde6cc2359 100644 --- a/pkg/bloomcompactor/spec_test.go +++ b/pkg/bloomcompactor/spec_test.go @@ -3,6 +3,7 @@ package bloomcompactor import ( "bytes" "context" + "errors" "testing" "github.com/go-kit/log" @@ -28,7 +29,7 @@ func blocksFromSchemaWithRange(t *testing.T, n int, options v1.BlockOptions, fro numKeysPerSeries := 10000 data, _ = v1.MkBasicSeriesWithBlooms(numSeries, numKeysPerSeries, fromFP, throughFp, 0, 10000) - seriesPerBlock := 100 / n + seriesPerBlock := numSeries / n for i := 0; i < n; i++ { // references for linking in memory reader+writer @@ -56,7 +57,7 @@ func blocksFromSchemaWithRange(t *testing.T, n int, options v1.BlockOptions, fro // doesn't actually load any chunks type dummyChunkLoader struct{} -func (dummyChunkLoader) Load(_ context.Context, series *v1.Series) (*ChunkItersByFingerprint, error) { +func (dummyChunkLoader) Load(_ context.Context, _ string, series *v1.Series) (*ChunkItersByFingerprint, error) { return &ChunkItersByFingerprint{ fp: series.Fingerprint, itr: v1.NewEmptyIter[v1.ChunkRefWithIter](), @@ -70,12 +71,14 @@ func dummyBloomGen(opts v1.BlockOptions, store v1.Iterator[*v1.Series], blocks [ BlockQuerier: v1.NewBlockQuerier(b), }) } + blocksIter := v1.NewCloseableIterator(v1.NewSliceIter(bqs)) return NewSimpleBloomGenerator( + "fake", opts, store, dummyChunkLoader{}, - bqs, + blocksIter, func() (v1.BlockWriter, v1.BlockReader) { indexBuf := bytes.NewBuffer(nil) bloomsBuf := bytes.NewBuffer(nil) @@ -87,24 +90,35 @@ func dummyBloomGen(opts v1.BlockOptions, store v1.Iterator[*v1.Series], blocks [ } func TestSimpleBloomGenerator(t *testing.T) { + const maxBlockSize = 100 << 20 // 100MB for _, tc := range []struct { - desc string - fromSchema, toSchema v1.BlockOptions - sourceBlocks, numSkipped int + desc string + fromSchema, toSchema v1.BlockOptions + sourceBlocks, numSkipped, outputBlocks int }{ { desc: "SkipsIncompatibleSchemas", - fromSchema: v1.NewBlockOptions(3, 0), - toSchema: v1.NewBlockOptions(4, 0), + fromSchema: v1.NewBlockOptions(3, 0, maxBlockSize), + toSchema: v1.NewBlockOptions(4, 0, maxBlockSize), sourceBlocks: 2, numSkipped: 2, + outputBlocks: 1, }, { desc: "CombinesBlocks", - fromSchema: v1.NewBlockOptions(4, 0), - toSchema: v1.NewBlockOptions(4, 0), + fromSchema: v1.NewBlockOptions(4, 0, maxBlockSize), + toSchema: v1.NewBlockOptions(4, 0, maxBlockSize), sourceBlocks: 2, numSkipped: 0, + outputBlocks: 1, + }, + { + desc: "MaxBlockSize", + fromSchema: v1.NewBlockOptions(4, 0, maxBlockSize), + toSchema: v1.NewBlockOptions(4, 0, 1<<10), // 1KB + sourceBlocks: 2, + numSkipped: 0, + outputBlocks: 3, }, } { t.Run(tc.desc, func(t *testing.T) { @@ -117,26 +131,155 @@ func TestSimpleBloomGenerator(t *testing.T) { ) gen := dummyBloomGen(tc.toSchema, storeItr, sourceBlocks) - skipped, results, err := gen.Generate(context.Background()) + skipped, _, results, err := gen.Generate(context.Background()) require.Nil(t, err) require.Equal(t, tc.numSkipped, len(skipped)) - require.True(t, results.Next()) - block := results.At() - require.False(t, results.Next()) + var outputBlocks []*v1.Block + for results.Next() { + outputBlocks = append(outputBlocks, results.At()) + } + require.Equal(t, tc.outputBlocks, len(outputBlocks)) - refs := v1.PointerSlice[v1.SeriesWithBloom](data) + // Check all the input series are present in the output blocks. + expectedRefs := v1.PointerSlice(data) + outputRefs := make([]*v1.SeriesWithBloom, 0, len(data)) + for _, block := range outputBlocks { + bq := block.Querier() + for bq.Next() { + outputRefs = append(outputRefs, bq.At()) + } + } + require.Equal(t, len(expectedRefs), len(outputRefs)) + for i := range expectedRefs { + require.Equal(t, expectedRefs[i].Series, outputRefs[i].Series) + } + }) + } +} - v1.EqualIterators[*v1.SeriesWithBloom]( - t, - func(a, b *v1.SeriesWithBloom) { - // TODO(owen-d): better equality check - // once chunk fetching is implemented - require.Equal(t, a.Series, b.Series) - }, - v1.NewSliceIter[*v1.SeriesWithBloom](refs), - block.Querier(), +func TestBatchedLoader(t *testing.T) { + errMapper := func(i int) (int, error) { + return 0, errors.New("bzzt") + } + successMapper := func(i int) (int, error) { + return i, nil + } + + expired, cancel := context.WithCancel(context.Background()) + cancel() + + for _, tc := range []struct { + desc string + ctx context.Context + batchSize int + mapper func(int) (int, error) + err bool + inputs [][]int + exp []int + }{ + { + desc: "OneBatch", + ctx: context.Background(), + batchSize: 2, + mapper: successMapper, + err: false, + inputs: [][]int{{0, 1}}, + exp: []int{0, 1}, + }, + { + desc: "ZeroBatchSizeStillWorks", + ctx: context.Background(), + batchSize: 0, + mapper: successMapper, + err: false, + inputs: [][]int{{0, 1}}, + exp: []int{0, 1}, + }, + { + desc: "OneBatchLessThanFull", + ctx: context.Background(), + batchSize: 2, + mapper: successMapper, + err: false, + inputs: [][]int{{0}}, + exp: []int{0}, + }, + { + desc: "TwoBatches", + ctx: context.Background(), + batchSize: 2, + mapper: successMapper, + err: false, + inputs: [][]int{{0, 1, 2, 3}}, + exp: []int{0, 1, 2, 3}, + }, + { + desc: "MultipleBatchesMultipleLoaders", + ctx: context.Background(), + batchSize: 2, + mapper: successMapper, + err: false, + inputs: [][]int{{0, 1}, {2}, {3, 4, 5}}, + exp: []int{0, 1, 2, 3, 4, 5}, + }, + { + desc: "HandlesEmptyInputs", + ctx: context.Background(), + batchSize: 2, + mapper: successMapper, + err: false, + inputs: [][]int{{0, 1, 2, 3}, nil, {4}}, + exp: []int{0, 1, 2, 3, 4}, + }, + { + desc: "Timeout", + ctx: expired, + batchSize: 2, + mapper: successMapper, + err: true, + inputs: [][]int{{0}}, + }, + { + desc: "MappingFailure", + ctx: context.Background(), + batchSize: 2, + mapper: errMapper, + err: true, + inputs: [][]int{{0}}, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + fetchers := make([]Fetcher[int, int], 0, len(tc.inputs)) + for range tc.inputs { + fetchers = append( + fetchers, + FetchFunc[int, int](func(ctx context.Context, xs []int) ([]int, error) { + if ctx.Err() != nil { + return nil, ctx.Err() + } + return xs, nil + }), + ) + } + + loader := newBatchedLoader[int, int, int]( + tc.ctx, + fetchers, + tc.inputs, + tc.mapper, + tc.batchSize, ) + + got, err := v1.Collect[int](loader) + if tc.err { + require.Error(t, err) + return + } + require.NoError(t, err) + require.Equal(t, tc.exp, got) + }) } + } diff --git a/pkg/bloomcompactor/tsdb.go b/pkg/bloomcompactor/tsdb.go index bb4383cc84f60..e6fd92961c46c 100644 --- a/pkg/bloomcompactor/tsdb.go +++ b/pkg/bloomcompactor/tsdb.go @@ -2,15 +2,119 @@ package bloomcompactor import ( "context" + "fmt" + "io" "math" + "path" + "strings" + "github.com/pkg/errors" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" + "github.com/grafana/loki/pkg/chunkenc" + baseStore "github.com/grafana/loki/pkg/storage" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/storage" + "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb/index" ) +const ( + gzipExtension = ".gz" +) + +type TSDBStore interface { + UsersForPeriod(ctx context.Context, table DayTable) ([]string, error) + ResolveTSDBs(ctx context.Context, table DayTable, tenant string) ([]tsdb.SingleTenantTSDBIdentifier, error) + LoadTSDB( + ctx context.Context, + table DayTable, + tenant string, + id tsdb.Identifier, + bounds v1.FingerprintBounds, + ) (v1.CloseableIterator[*v1.Series], error) +} + +// BloomTSDBStore is a wrapper around the storage.Client interface which +// implements the TSDBStore interface for this pkg. +type BloomTSDBStore struct { + storage storage.Client +} + +func NewBloomTSDBStore(storage storage.Client) *BloomTSDBStore { + return &BloomTSDBStore{ + storage: storage, + } +} + +func (b *BloomTSDBStore) UsersForPeriod(ctx context.Context, table DayTable) ([]string, error) { + _, users, err := b.storage.ListFiles(ctx, table.String(), true) // bypass cache for ease of testing + return users, err +} + +func (b *BloomTSDBStore) ResolveTSDBs(ctx context.Context, table DayTable, tenant string) ([]tsdb.SingleTenantTSDBIdentifier, error) { + indices, err := b.storage.ListUserFiles(ctx, table.String(), tenant, true) // bypass cache for ease of testing + if err != nil { + return nil, errors.Wrap(err, "failed to list user files") + } + + ids := make([]tsdb.SingleTenantTSDBIdentifier, 0, len(indices)) + for _, index := range indices { + key := index.Name + if decompress := storage.IsCompressedFile(index.Name); decompress { + key = strings.TrimSuffix(key, gzipExtension) + } + + id, ok := tsdb.ParseSingleTenantTSDBPath(path.Base(key)) + if !ok { + return nil, errors.Errorf("failed to parse single tenant tsdb path: %s", key) + } + + ids = append(ids, id) + + } + return ids, nil +} + +func (b *BloomTSDBStore) LoadTSDB( + ctx context.Context, + table DayTable, + tenant string, + id tsdb.Identifier, + bounds v1.FingerprintBounds, +) (v1.CloseableIterator[*v1.Series], error) { + withCompression := id.Name() + gzipExtension + + data, err := b.storage.GetUserFile(ctx, table.String(), tenant, withCompression) + if err != nil { + return nil, errors.Wrap(err, "failed to get file") + } + defer data.Close() + + decompressorPool := chunkenc.GetReaderPool(chunkenc.EncGZIP) + decompressor, err := decompressorPool.GetReader(data) + if err != nil { + return nil, errors.Wrap(err, "failed to get decompressor") + } + defer decompressorPool.PutReader(decompressor) + + buf, err := io.ReadAll(decompressor) + if err != nil { + return nil, errors.Wrap(err, "failed to read file") + } + + reader, err := index.NewReader(index.RealByteSlice(buf)) + if err != nil { + return nil, errors.Wrap(err, "failed to create index reader") + } + + idx := tsdb.NewTSDBIndex(reader) + + return NewTSDBSeriesIter(ctx, idx, bounds), nil +} + // TSDBStore is an interface for interacting with the TSDB, // modeled off a relevant subset of the `tsdb.TSDBIndex` struct type forSeries interface { @@ -109,3 +213,94 @@ func (t *TSDBSeriesIter) background() { close(t.ch) }() } + +type TSDBStores struct { + schemaCfg config.SchemaConfig + stores []TSDBStore +} + +func NewTSDBStores( + schemaCfg config.SchemaConfig, + storeCfg baseStore.Config, + clientMetrics baseStore.ClientMetrics, +) (*TSDBStores, error) { + res := &TSDBStores{ + schemaCfg: schemaCfg, + stores: make([]TSDBStore, len(schemaCfg.Configs)), + } + + for i, cfg := range schemaCfg.Configs { + if cfg.IndexType == config.TSDBType { + + c, err := baseStore.NewObjectClient(cfg.ObjectType, storeCfg, clientMetrics) + if err != nil { + return nil, errors.Wrap(err, "failed to create object client") + } + prefix := path.Join(cfg.IndexTables.PathPrefix, cfg.IndexTables.Prefix) + res.stores[i] = NewBloomTSDBStore(storage.NewIndexStorageClient(c, prefix)) + } + } + + return res, nil +} + +func (s *TSDBStores) storeForPeriod(table DayTable) (TSDBStore, error) { + for i := len(s.schemaCfg.Configs) - 1; i >= 0; i-- { + period := s.schemaCfg.Configs[i] + + if !table.Before(DayTable(period.From.Time)) { + // we have the desired period config + + if s.stores[i] != nil { + // valid: it's of tsdb type + return s.stores[i], nil + } + + // invalid + return nil, errors.Errorf( + "store for period is not of TSDB type (%s) while looking up store for (%v)", + period.IndexType, + table.ModelTime().Time(), + ) + } + + } + + return nil, fmt.Errorf( + "There is no store matching no matching period found for table (%v) -- too early", + table.ModelTime().Time(), + ) +} + +func (s *TSDBStores) UsersForPeriod(ctx context.Context, table DayTable) ([]string, error) { + store, err := s.storeForPeriod(table) + if err != nil { + return nil, err + } + + return store.UsersForPeriod(ctx, table) +} + +func (s *TSDBStores) ResolveTSDBs(ctx context.Context, table DayTable, tenant string) ([]tsdb.SingleTenantTSDBIdentifier, error) { + store, err := s.storeForPeriod(table) + if err != nil { + return nil, err + } + + return store.ResolveTSDBs(ctx, table, tenant) +} + +func (s *TSDBStores) LoadTSDB( + ctx context.Context, + table DayTable, + tenant string, + id tsdb.Identifier, + bounds v1.FingerprintBounds, +) (v1.CloseableIterator[*v1.Series], error) { + store, err := s.storeForPeriod(table) + if err != nil { + return nil, err + } + + return store.LoadTSDB(ctx, table, tenant, id, bounds) +} diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 1e7a54f1d1e33..abecbf6773fd3 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -23,13 +23,15 @@ of line filter expressions. | bloomgateway.Gateway | - queue.RequestQueue + queue.RequestQueue | - bloomgateway.Worker + bloomgateway.Worker | - bloomshipper.Shipper + bloomgateway.Processor | - bloomshipper.BloomFileClient + bloomshipper.Store + | + bloomshipper.Client | ObjectClient | @@ -56,6 +58,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/pkg/logqlmodel/stats" "github.com/grafana/loki/pkg/queue" "github.com/grafana/loki/pkg/storage" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" @@ -170,11 +173,9 @@ type Gateway struct { workerMetrics *workerMetrics queueMetrics *queue.Metrics - queue *queue.RequestQueue - activeUsers *util.ActiveUsersCleanupService - bloomShipper bloomshipper.Interface - - sharding ShardingStrategy + queue *queue.RequestQueue + activeUsers *util.ActiveUsersCleanupService + bloomStore bloomshipper.Store pendingTasks *pendingTasks @@ -193,12 +194,11 @@ func (l *fixedQueueLimits) MaxConsumers(_ string, _ int) int { } // New returns a new instance of the Bloom Gateway. -func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, overrides Limits, shardingStrategy ShardingStrategy, cm storage.ClientMetrics, logger log.Logger, reg prometheus.Registerer) (*Gateway, error) { +func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, overrides Limits, cm storage.ClientMetrics, logger log.Logger, reg prometheus.Registerer) (*Gateway, error) { g := &Gateway{ cfg: cfg, logger: logger, metrics: newMetrics(reg, constants.Loki, metricsSubsystem), - sharding: shardingStrategy, pendingTasks: makePendingTasks(pendingTasksInitialCap), workerConfig: workerConfig{ maxItems: 100, @@ -206,25 +206,33 @@ func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, o workerMetrics: newWorkerMetrics(reg, constants.Loki, metricsSubsystem), queueMetrics: queue.NewMetrics(reg, constants.Loki, metricsSubsystem), } + var err error g.queue = queue.NewRequestQueue(cfg.MaxOutstandingPerTenant, time.Minute, &fixedQueueLimits{0}, g.queueMetrics) g.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(g.queueMetrics.Cleanup) - // TODO(chaudum): Plug in cache var metasCache cache.Cache - var blocksCache *cache.EmbeddedCache[string, bloomshipper.BlockDirectory] - store, err := bloomshipper.NewBloomStore(schemaCfg.Configs, storageCfg, cm, metasCache, blocksCache, logger) - if err != nil { - return nil, err + mcCfg := storageCfg.BloomShipperConfig.MetasCache + if cache.IsCacheConfigured(mcCfg) { + metasCache, err = cache.New(mcCfg, reg, logger, stats.BloomMetasCache, constants.Loki) + if err != nil { + return nil, err + } } - bloomShipper, err := bloomshipper.NewShipper(store, storageCfg.BloomShipperConfig, overrides, logger, reg) + var blocksCache cache.TypedCache[string, bloomshipper.BlockDirectory] + bcCfg := storageCfg.BloomShipperConfig.BlocksCache + if bcCfg.IsEnabled() { + blocksCache = bloomshipper.NewBlocksCache(bcCfg, reg, logger) + } + + store, err := bloomshipper.NewBloomStore(schemaCfg.Configs, storageCfg, cm, metasCache, blocksCache, logger) if err != nil { return nil, err } // We need to keep a reference to be able to call Stop() on shutdown of the gateway. - g.bloomShipper = bloomShipper + g.bloomStore = store if err := g.initServices(); err != nil { return nil, err @@ -239,7 +247,7 @@ func (g *Gateway) initServices() error { svcs := []services.Service{g.queue, g.activeUsers} for i := 0; i < g.cfg.WorkerConcurrency; i++ { id := fmt.Sprintf("bloom-query-worker-%d", i) - w := newWorker(id, g.workerConfig, g.queue, g.bloomShipper, g.pendingTasks, g.logger, g.workerMetrics) + w := newWorker(id, g.workerConfig, g.queue, g.bloomStore, g.pendingTasks, g.logger, g.workerMetrics) svcs = append(svcs, w) } g.serviceMngr, err = services.NewManager(svcs...) @@ -291,7 +299,7 @@ func (g *Gateway) running(ctx context.Context) error { } func (g *Gateway) stopping(_ error) error { - g.bloomShipper.Stop() + g.bloomStore.Stop() return services.StopManagerAndAwaitStopped(context.Background(), g.serviceMngr) } diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index c8da44a7c719b..f07e014b84dc3 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -3,6 +3,7 @@ package bloomgateway import ( "context" "fmt" + "math/rand" "os" "testing" "time" @@ -45,8 +46,6 @@ func newLimits() *validation.Overrides { } func TestBloomGateway_StartStopService(t *testing.T) { - - ss := NewNoopStrategy() logger := log.NewNopLogger() reg := prometheus.NewRegistry() limits := newLimits() @@ -96,7 +95,7 @@ func TestBloomGateway_StartStopService(t *testing.T) { MaxOutstandingPerTenant: 1024, } - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) @@ -113,8 +112,6 @@ func TestBloomGateway_StartStopService(t *testing.T) { func TestBloomGateway_FilterChunkRefs(t *testing.T) { tenantID := "test" - - ss := NewNoopStrategy() logger := log.NewLogfmtLogger(os.Stderr) reg := prometheus.NewRegistry() limits := newLimits() @@ -165,15 +162,17 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("shipper error is propagated", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) now := mktime("2023-10-03 10:00") - bqs, data := createBlockQueriers(t, 10, now.Add(-24*time.Hour), now, 0, 1000) - mockStore := newMockBloomStore(bqs) - mockStore.err = errors.New("failed to fetch block") - gw.bloomShipper = mockStore + // replace store implementation and re-initialize workers and sub-services + _, metas, queriers, data := createBlocks(t, tenantID, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + mockStore := newMockBloomStore(queriers, metas) + mockStore.err = errors.New("request failed") + gw.bloomStore = mockStore err = gw.initServices() require.NoError(t, err) @@ -185,7 +184,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { require.NoError(t, err) }) - chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 10) + chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 100) // saturate workers // then send additional request @@ -204,21 +203,23 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Cleanup(cancelFn) res, err := gw.FilterChunkRefs(ctx, req) - require.ErrorContainsf(t, err, "request failed: failed to fetch block", "%+v", res) + require.ErrorContainsf(t, err, "request failed", "%+v", res) } }) t.Run("request cancellation does not result in channel locking", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) now := mktime("2024-01-25 10:00") - bqs, data := createBlockQueriers(t, 50, now.Add(-24*time.Hour), now, 0, 1024) - mockStore := newMockBloomStore(bqs) - mockStore.delay = 50 * time.Millisecond // delay for each block - 50x50=2500ms - gw.bloomShipper = mockStore + // replace store implementation and re-initialize workers and sub-services + _, metas, queriers, data := createBlocks(t, tenantID, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + mockStore := newMockBloomStore(queriers, metas) + mockStore.delay = 2000 * time.Millisecond + gw.bloomStore = mockStore err = gw.initServices() require.NoError(t, err) @@ -255,7 +256,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("returns unfiltered chunk refs if no filters provided", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) @@ -300,7 +301,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("gateway tracks active users", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) @@ -340,14 +341,15 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("use fuse queriers to filter chunks", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, cm, logger, reg) require.NoError(t, err) now := mktime("2023-10-03 10:00") // replace store implementation and re-initialize workers and sub-services - bqs, data := createBlockQueriers(t, 5, now.Add(-8*time.Hour), now, 0, 1024) - gw.bloomShipper = newMockBloomStore(bqs) + _, metas, queriers, data := createBlocks(t, tenantID, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + gw.bloomStore = newMockBloomStore(queriers, metas) err = gw.initServices() require.NoError(t, err) @@ -358,7 +360,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { require.NoError(t, err) }) - chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 100) + chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 10) t.Run("no match - return empty response", func(t *testing.T) { inputChunkRefs := groupRefs(t, chunkRefs) @@ -382,27 +384,37 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("match - return filtered", func(t *testing.T) { inputChunkRefs := groupRefs(t, chunkRefs) - // hack to get indexed key for a specific series - // the indexed key range for a series is defined as - // i * keysPerSeries ... i * keysPerSeries + keysPerSeries - 1 - // where i is the nth series in a block - // fortunately, i is also used as Checksum for the single chunk of a series - // see mkBasicSeriesWithBlooms() in pkg/storage/bloom/v1/test_util.go - key := inputChunkRefs[0].Refs[0].Checksum*1000 + 500 + // Hack to get search string for a specific series + // see MkBasicSeriesWithBlooms() in pkg/storage/bloom/v1/test_util.go + // each series has 1 chunk + // each chunk has multiple strings, from int(fp) to int(nextFp)-1 + x := rand.Intn(len(inputChunkRefs)) + fp := inputChunkRefs[x].Fingerprint + chks := inputChunkRefs[x].Refs + line := fmt.Sprintf("%04x:%04x", int(fp), 0) // first line + + t.Log("x=", x, "fp=", fp, "line=", line) req := &logproto.FilterChunkRefRequest{ From: now.Add(-8 * time.Hour), Through: now, Refs: inputChunkRefs, Filters: []syntax.LineFilter{ - {Ty: labels.MatchEqual, Match: fmt.Sprintf("series %d", key)}, + {Ty: labels.MatchEqual, Match: line}, }, } ctx := user.InjectOrgID(context.Background(), tenantID) res, err := gw.FilterChunkRefs(ctx, req) require.NoError(t, err) + expectedResponse := &logproto.FilterChunkRefResponse{ - ChunkRefs: inputChunkRefs[:1], + ChunkRefs: []*logproto.GroupedChunkRefs{ + { + Fingerprint: fp, + Refs: chks, + Tenant: tenantID, + }, + }, } require.Equal(t, expectedResponse, res) }) diff --git a/pkg/bloomgateway/client.go b/pkg/bloomgateway/client.go index 6453987b91683..9a75e4e87c26b 100644 --- a/pkg/bloomgateway/client.go +++ b/pkg/bloomgateway/client.go @@ -36,6 +36,10 @@ import ( ) var ( + // BlocksOwnerRead is the operation used to check the authoritative owners of a block + // (replicas included) that are available for queries (a bloom gateway is available for + // queries only when ACTIVE). + BlocksOwnerRead = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, nil) // groupedChunksRefPool pooling slice of logproto.GroupedChunkRefs [64, 128, 256, ..., 65536] groupedChunksRefPool = queue.NewSlicePool[*logproto.GroupedChunkRefs](1<<6, 1<<16, 2) // ringGetBuffersPool pooling for ringGetBuffers to avoid calling ring.MakeBuffersForGet() for each request @@ -226,15 +230,16 @@ func (c *GatewayClient) FilterChunks(ctx context.Context, tenant string, from, t } subRing := GetShuffleShardingSubring(c.ring, tenant, c.limits) - rs, err := subRing.GetAllHealthy(BlocksRead) + rs, err := subRing.GetAllHealthy(BlocksOwnerRead) if err != nil { return nil, errors.Wrap(err, "bloom gateway get healthy instances") } - streamsByInst, err := c.groupFingerprintsByServer(groups, subRing, rs.Instances) + servers, err := serverAddressesWithTokenRanges(subRing, rs.Instances) if err != nil { return nil, err } + streamsByInst := groupFingerprintsByServer(groups, servers) filteredChunkRefs := groupedChunksRefPool.Get(len(groups)) defer groupedChunksRefPool.Put(filteredChunkRefs) @@ -286,13 +291,9 @@ func (c *GatewayClient) doForAddrs(addrs []string, fn func(logproto.BloomGateway return err } -func (c *GatewayClient) groupFingerprintsByServer(groups []*logproto.GroupedChunkRefs, subRing ring.ReadRing, instances []ring.InstanceDesc) ([]instanceWithFingerprints, error) { - servers, err := serverAddressesWithTokenRanges(subRing, instances) - if err != nil { - return nil, err - } +func groupFingerprintsByServer(groups []*logproto.GroupedChunkRefs, servers []addrsWithTokenRange) []instanceWithFingerprints { boundedFingerprints := partitionFingerprintsByAddresses(groups, servers) - return groupByInstance(boundedFingerprints), nil + return groupByInstance(boundedFingerprints) } func serverAddressesWithTokenRanges(subRing ring.ReadRing, instances []ring.InstanceDesc) ([]addrsWithTokenRange, error) { @@ -303,7 +304,7 @@ func serverAddressesWithTokenRanges(subRing ring.ReadRing, instances []ring.Inst for it.Next() { // We can use on of the tokens from the token range // to obtain all addresses for that token. - rs, err := subRing.Get(it.At().MaxToken, BlocksRead, bufDescs, bufHosts, bufZones) + rs, err := subRing.Get(it.At().MaxToken, BlocksOwnerRead, bufDescs, bufHosts, bufZones) if err != nil { return nil, errors.Wrap(err, "bloom gateway get ring") } @@ -410,3 +411,21 @@ func groupByInstance(boundedFingerprints []instanceWithFingerprints) []instanceW return result } + +// GetShuffleShardingSubring returns the subring to be used for a given user. +// This function should be used both by index gateway servers and clients in +// order to guarantee the same logic is used. +func GetShuffleShardingSubring(ring ring.ReadRing, tenantID string, limits Limits) ring.ReadRing { + shardSize := limits.BloomGatewayShardSize(tenantID) + + // A shard size of 0 means shuffle sharding is disabled for this specific user, + // so we just return the full ring so that indexes will be sharded across all index gateways. + // Since we set the shard size to replication factor if shard size is 0, this + // can only happen if both the shard size and the replication factor are set + // to 0. + if shardSize <= 0 { + return ring + } + + return ring.ShuffleShard(tenantID, shardSize) +} diff --git a/pkg/bloomgateway/client_test.go b/pkg/bloomgateway/client_test.go index e59fff2306ab9..b1716de8150ea 100644 --- a/pkg/bloomgateway/client_test.go +++ b/pkg/bloomgateway/client_test.go @@ -207,19 +207,6 @@ func TestBloomGatewayClient_ServerAddressesWithTokenRanges(t *testing.T) { } func TestBloomGatewayClient_GroupFingerprintsByServer(t *testing.T) { - - logger := log.NewNopLogger() - reg := prometheus.NewRegistry() - - l, err := validation.NewOverrides(validation.Limits{BloomGatewayShardSize: 1}, nil) - require.NoError(t, err) - - cfg := ClientConfig{} - flagext.DefaultValues(&cfg) - - c, err := NewClient(cfg, nil, l, reg, logger, "loki", nil, false) - require.NoError(t, err) - instances := []ring.InstanceDesc{ {Id: "instance-1", Addr: "10.0.0.1", Tokens: []uint32{2146405214, 1029997044, 678878693}}, {Id: "instance-2", Addr: "10.0.0.2", Tokens: []uint32{296463531, 1697323986, 800258284}}, @@ -339,8 +326,9 @@ func TestBloomGatewayClient_GroupFingerprintsByServer(t *testing.T) { return tc.chunks[i].Fingerprint < tc.chunks[j].Fingerprint }) - res, err := c.groupFingerprintsByServer(tc.chunks, subRing, instances) + servers, err := serverAddressesWithTokenRanges(subRing, instances) require.NoError(t, err) + res := groupFingerprintsByServer(tc.chunks, servers) require.Equal(t, tc.expected, res) }) } diff --git a/pkg/bloomgateway/processor.go b/pkg/bloomgateway/processor.go index 26895bc43eda5..4fe9c38483cbe 100644 --- a/pkg/bloomgateway/processor.go +++ b/pkg/bloomgateway/processor.go @@ -4,6 +4,7 @@ import ( "context" "math" "sort" + "time" "github.com/go-kit/log" "github.com/prometheus/common/model" @@ -17,9 +18,20 @@ type tasksForBlock struct { tasks []Task } +func newProcessor(id string, store bloomshipper.Store, logger log.Logger, metrics *workerMetrics) *processor { + return &processor{ + id: id, + store: store, + logger: logger, + metrics: metrics, + } +} + type processor struct { - store bloomshipper.Store - logger log.Logger + id string + store bloomshipper.Store + logger log.Logger + metrics *workerMetrics } func (p *processor) run(ctx context.Context, tasks []Task) error { @@ -51,6 +63,8 @@ func (p *processor) processTasks(ctx context.Context, tenant string, interval bl if err != nil { return err } + p.metrics.metasFetched.WithLabelValues(p.id).Observe(float64(len(metas))) + blocksRefs := bloomshipper.BlocksForMetas(metas, interval, keyspaces) return p.processBlocks(ctx, partition(tasks, blocksRefs)) } @@ -65,6 +79,7 @@ func (p *processor) processBlocks(ctx context.Context, data []tasksForBlock) err if err != nil { return err } + p.metrics.metasFetched.WithLabelValues(p.id).Observe(float64(len(bqs))) blockIter := v1.NewSliceIter(bqs) @@ -102,7 +117,16 @@ func (p *processor) processBlock(_ context.Context, blockQuerier *v1.BlockQuerie } fq := blockQuerier.Fuse(iters) - return fq.Run() + + start := time.Now() + err = fq.Run() + if err != nil { + p.metrics.blockQueryLatency.WithLabelValues(p.id, labelFailure).Observe(time.Since(start).Seconds()) + } else { + p.metrics.blockQueryLatency.WithLabelValues(p.id, labelSuccess).Observe(time.Since(start).Seconds()) + } + + return err } // getFirstLast returns the first and last item of a fingerprint slice diff --git a/pkg/bloomgateway/processor_test.go b/pkg/bloomgateway/processor_test.go index d39ba61a89613..c4c8f8457b3a1 100644 --- a/pkg/bloomgateway/processor_test.go +++ b/pkg/bloomgateway/processor_test.go @@ -7,23 +7,40 @@ import ( "testing" "time" + "github.com/go-kit/log" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "go.uber.org/atomic" "github.com/grafana/loki/pkg/logql/syntax" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" + "github.com/grafana/loki/pkg/util/constants" ) var _ bloomshipper.Store = &dummyStore{} +func newMockBloomStore(bqs []*bloomshipper.CloseableBlockQuerier, metas []bloomshipper.Meta) *dummyStore { + return &dummyStore{ + querieres: bqs, + metas: metas, + } +} + type dummyStore struct { metas []bloomshipper.Meta - blocks []bloomshipper.BlockRef querieres []*bloomshipper.CloseableBlockQuerier + + // mock how long it takes to serve block queriers + delay time.Duration + // mock response error when serving block queriers in ForEach + err error } func (s *dummyStore) ResolveMetas(_ context.Context, _ bloomshipper.MetaSearchParams) ([][]bloomshipper.MetaRef, []*bloomshipper.Fetcher, error) { + time.Sleep(s.delay) + //TODO(chaudum) Filter metas based on search params refs := make([]bloomshipper.MetaRef, 0, len(s.metas)) for _, meta := range s.metas { @@ -51,6 +68,11 @@ func (s *dummyStore) Stop() { func (s *dummyStore) FetchBlocks(_ context.Context, refs []bloomshipper.BlockRef) ([]*bloomshipper.CloseableBlockQuerier, error) { result := make([]*bloomshipper.CloseableBlockQuerier, 0, len(s.querieres)) + if s.err != nil { + time.Sleep(s.delay) + return result, s.err + } + for _, ref := range refs { for _, bq := range s.querieres { if ref.Bounds.Equal(bq.Bounds) { @@ -63,6 +85,8 @@ func (s *dummyStore) FetchBlocks(_ context.Context, refs []bloomshipper.BlockRef result[i], result[j] = result[j], result[i] }) + time.Sleep(s.delay) + return result, nil } @@ -70,16 +94,13 @@ func TestProcessor(t *testing.T) { ctx := context.Background() tenant := "fake" now := mktime("2024-01-27 12:00") + metrics := newWorkerMetrics(prometheus.NewPedanticRegistry(), constants.Loki, "bloom_gatway") - t.Run("dummy", func(t *testing.T) { - blocks, metas, queriers, data := createBlocks(t, tenant, 10, now.Add(-1*time.Hour), now, 0x0000, 0x1000) - p := &processor{ - store: &dummyStore{ - querieres: queriers, - metas: metas, - blocks: blocks, - }, - } + t.Run("success case", func(t *testing.T) { + _, metas, queriers, data := createBlocks(t, tenant, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + mockStore := newMockBloomStore(queriers, metas) + p := newProcessor("worker", mockStore, log.NewNopLogger(), metrics) chunkRefs := createQueryInputFromBlockData(t, tenant, data, 10) swb := seriesWithBounds{ @@ -116,4 +137,48 @@ func TestProcessor(t *testing.T) { require.NoError(t, err) require.Equal(t, int64(len(swb.series)), results.Load()) }) + + t.Run("failure case", func(t *testing.T) { + _, metas, queriers, data := createBlocks(t, tenant, 10, now.Add(-1*time.Hour), now, 0x0000, 0x0fff) + + mockStore := newMockBloomStore(queriers, metas) + mockStore.err = errors.New("store failed") + + p := newProcessor("worker", mockStore, log.NewNopLogger(), metrics) + + chunkRefs := createQueryInputFromBlockData(t, tenant, data, 10) + swb := seriesWithBounds{ + series: groupRefs(t, chunkRefs), + bounds: model.Interval{ + Start: now.Add(-1 * time.Hour), + End: now, + }, + day: truncateDay(now), + } + filters := []syntax.LineFilter{ + {Ty: 0, Match: "no match"}, + } + + t.Log("series", len(swb.series)) + task, _ := NewTask(ctx, "fake", swb, filters) + tasks := []Task{task} + + results := atomic.NewInt64(0) + var wg sync.WaitGroup + for i := range tasks { + wg.Add(1) + go func(ta Task) { + defer wg.Done() + for range ta.resCh { + results.Inc() + } + t.Log("done", results.Load()) + }(tasks[i]) + } + + err := p.run(ctx, tasks) + wg.Wait() + require.Errorf(t, err, "store failed") + require.Equal(t, int64(0), results.Load()) + }) } diff --git a/pkg/bloomgateway/sharding.go b/pkg/bloomgateway/sharding.go deleted file mode 100644 index 5dfb9f11732a0..0000000000000 --- a/pkg/bloomgateway/sharding.go +++ /dev/null @@ -1,156 +0,0 @@ -package bloomgateway - -import ( - "context" - - "github.com/go-kit/log" - "github.com/grafana/dskit/ring" - - util_ring "github.com/grafana/loki/pkg/util/ring" -) - -// TODO(chaudum): Replace this placeholder with actual BlockRef struct. -type BlockRef struct { - FromFp, ThroughFp uint64 - FromTs, ThroughTs int64 -} - -var ( - // BlocksOwnerSync is the operation used to check the authoritative owners of a block - // (replicas included). - BlocksOwnerSync = ring.NewOp([]ring.InstanceState{ring.JOINING, ring.ACTIVE, ring.LEAVING}, nil) - - // BlocksOwnerRead is the operation used to check the authoritative owners of a block - // (replicas included) that are available for queries (a bloom gateway is available for - // queries only when ACTIVE). - BlocksOwnerRead = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, nil) - - // BlocksRead is the operation run by the querier to query blocks via the bloom gateway. - BlocksRead = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, func(s ring.InstanceState) bool { - // Blocks can only be queried from ACTIVE instances. However, if the block belongs to - // a non-active instance, then we should extend the replication set and try to query it - // from the next ACTIVE instance in the ring (which is expected to have it because a - // bloom gateway keeps their previously owned blocks until new owners are ACTIVE). - return s != ring.ACTIVE - }) -) - -type ShardingStrategy interface { - // FilterTenants whose indexes should be loaded by the index gateway. - // Returns the list of user IDs that should be synced by the index gateway. - FilterTenants(ctx context.Context, tenantIDs []string) ([]string, error) - FilterBlocks(ctx context.Context, tenantID string, blockRefs []BlockRef) ([]BlockRef, error) -} - -type ShuffleShardingStrategy struct { - util_ring.TenantSharding - r ring.ReadRing - ringLifeCycler *ring.BasicLifecycler - logger log.Logger -} - -func NewShuffleShardingStrategy(r ring.ReadRing, ringLifecycler *ring.BasicLifecycler, limits Limits, logger log.Logger) *ShuffleShardingStrategy { - return &ShuffleShardingStrategy{ - TenantSharding: util_ring.NewTenantShuffleSharding(r, ringLifecycler, limits.BloomGatewayShardSize), - ringLifeCycler: ringLifecycler, - logger: logger, - } -} - -// FilterTenants implements ShardingStrategy. -func (s *ShuffleShardingStrategy) FilterTenants(_ context.Context, tenantIDs []string) ([]string, error) { - // As a protection, ensure the bloom gateway instance is healthy in the ring. It could also be missing - // in the ring if it was failing to heartbeat the ring and it got remove from another healthy bloom gateway - // instance, because of the auto-forget feature. - if set, err := s.r.GetAllHealthy(BlocksOwnerSync); err != nil { - return nil, err - } else if !set.Includes(s.ringLifeCycler.GetInstanceID()) { - return nil, errGatewayUnhealthy - } - - var filteredIDs []string - - for _, tenantID := range tenantIDs { - // Include the user only if it belongs to this bloom gateway shard. - if s.OwnsTenant(tenantID) { - filteredIDs = append(filteredIDs, tenantID) - } - } - - return filteredIDs, nil -} - -// nolint:revive -func getBucket(rangeMin, rangeMax, pos uint64) int { - return 0 -} - -// FilterBlocks implements ShardingStrategy. -func (s *ShuffleShardingStrategy) FilterBlocks(_ context.Context, tenantID string, blockRefs []BlockRef) ([]BlockRef, error) { - if !s.OwnsTenant(tenantID) { - return nil, nil - } - - filteredBlockRefs := make([]BlockRef, 0, len(blockRefs)) - - tenantRing := s.GetTenantSubRing(tenantID) - - fpSharding := util_ring.NewFingerprintShuffleSharding(tenantRing, s.ringLifeCycler, BlocksOwnerSync) - for _, blockRef := range blockRefs { - owns, err := fpSharding.OwnsFingerprint(blockRef.FromFp) - if err != nil { - return nil, err - } - if owns { - filteredBlockRefs = append(filteredBlockRefs, blockRef) - continue - } - - owns, err = fpSharding.OwnsFingerprint(blockRef.ThroughFp) - if err != nil { - return nil, err - } - if owns { - filteredBlockRefs = append(filteredBlockRefs, blockRef) - continue - } - } - - return filteredBlockRefs, nil -} - -// GetShuffleShardingSubring returns the subring to be used for a given user. -// This function should be used both by index gateway servers and clients in -// order to guarantee the same logic is used. -func GetShuffleShardingSubring(ring ring.ReadRing, tenantID string, limits Limits) ring.ReadRing { - shardSize := limits.BloomGatewayShardSize(tenantID) - - // A shard size of 0 means shuffle sharding is disabled for this specific user, - // so we just return the full ring so that indexes will be sharded across all index gateways. - // Since we set the shard size to replication factor if shard size is 0, this - // can only happen if both the shard size and the replication factor are set - // to 0. - if shardSize <= 0 { - return ring - } - - return ring.ShuffleShard(tenantID, shardSize) -} - -// NoopStrategy is an implementation of the ShardingStrategy that does not -// filter anything. -type NoopStrategy struct{} - -func NewNoopStrategy() *NoopStrategy { - return &NoopStrategy{} -} - -// FilterTenants implements ShardingStrategy. -func (s *NoopStrategy) FilterTenants(_ context.Context, tenantIDs []string) ([]string, error) { - return tenantIDs, nil -} - -// FilterBlocks implements ShardingStrategy. -func (s *NoopStrategy) FilterBlocks(_ context.Context, _ string, blockRefs []BlockRef) ([]BlockRef, error) { - return blockRefs, nil -} diff --git a/pkg/bloomgateway/util_test.go b/pkg/bloomgateway/util_test.go index f19564b43ef59..8fc37f20bac8e 100644 --- a/pkg/bloomgateway/util_test.go +++ b/pkg/bloomgateway/util_test.go @@ -1,8 +1,6 @@ package bloomgateway import ( - "context" - "math/rand" "testing" "time" @@ -295,39 +293,10 @@ func TestPartitionRequest(t *testing.T) { } -func createBlockQueriers(t *testing.T, numBlocks int, from, through model.Time, minFp, maxFp model.Fingerprint) ([]*bloomshipper.CloseableBlockQuerier, [][]v1.SeriesWithBloom) { - t.Helper() - step := (maxFp - minFp) / model.Fingerprint(numBlocks) - bqs := make([]*bloomshipper.CloseableBlockQuerier, 0, numBlocks) - series := make([][]v1.SeriesWithBloom, 0, numBlocks) - for i := 0; i < numBlocks; i++ { - fromFp := minFp + (step * model.Fingerprint(i)) - throughFp := fromFp + step - 1 - // last block needs to include maxFp - if i == numBlocks-1 { - throughFp = maxFp - } - blockQuerier, data := v1.MakeBlockQuerier(t, fromFp, throughFp, from, through) - bq := &bloomshipper.CloseableBlockQuerier{ - BlockQuerier: blockQuerier, - BlockRef: bloomshipper.BlockRef{ - Ref: bloomshipper.Ref{ - Bounds: v1.NewBounds(fromFp, throughFp), - StartTimestamp: from, - EndTimestamp: through, - }, - }, - } - bqs = append(bqs, bq) - series = append(series, data) - } - return bqs, series -} - func createBlocks(t *testing.T, tenant string, n int, from, through model.Time, minFp, maxFp model.Fingerprint) ([]bloomshipper.BlockRef, []bloomshipper.Meta, []*bloomshipper.CloseableBlockQuerier, [][]v1.SeriesWithBloom) { t.Helper() - blocks := make([]bloomshipper.BlockRef, 0, n) + blockRefs := make([]bloomshipper.BlockRef, 0, n) metas := make([]bloomshipper.Meta, 0, n) queriers := make([]*bloomshipper.CloseableBlockQuerier, 0, n) series := make([][]v1.SeriesWithBloom, 0, n) @@ -347,7 +316,7 @@ func createBlocks(t *testing.T, tenant string, n int, from, through model.Time, StartTimestamp: from, EndTimestamp: through, } - block := bloomshipper.BlockRef{ + blockRef := bloomshipper.BlockRef{ Ref: ref, } meta := bloomshipper.Meta{ @@ -355,71 +324,26 @@ func createBlocks(t *testing.T, tenant string, n int, from, through model.Time, Ref: ref, }, Tombstones: []bloomshipper.BlockRef{}, - Blocks: []bloomshipper.BlockRef{block}, + Blocks: []bloomshipper.BlockRef{blockRef}, } - blockQuerier, data := v1.MakeBlockQuerier(t, fromFp, throughFp, from, through) + block, data, _ := v1.MakeBlock(t, n, fromFp, throughFp, from, through) + // Printing fingerprints and the log lines of its chunks comes handy for debugging... + // for i := range keys { + // t.Log(data[i].Series.Fingerprint) + // for j := range keys[i] { + // t.Log(i, j, string(keys[i][j])) + // } + // } querier := &bloomshipper.CloseableBlockQuerier{ - BlockQuerier: blockQuerier, - BlockRef: block, + BlockQuerier: v1.NewBlockQuerier(block), + BlockRef: blockRef, } queriers = append(queriers, querier) metas = append(metas, meta) - blocks = append(blocks, block) + blockRefs = append(blockRefs, blockRef) series = append(series, data) } - return blocks, metas, queriers, series -} - -func newMockBloomStore(bqs []*bloomshipper.CloseableBlockQuerier) *mockBloomStore { - return &mockBloomStore{bqs: bqs} -} - -type mockBloomStore struct { - bqs []*bloomshipper.CloseableBlockQuerier - // mock how long it takes to serve block queriers - delay time.Duration - // mock response error when serving block queriers in ForEach - err error -} - -var _ bloomshipper.Interface = &mockBloomStore{} - -// GetBlockRefs implements bloomshipper.Interface -func (s *mockBloomStore) GetBlockRefs(_ context.Context, _ string, _ bloomshipper.Interval) ([]bloomshipper.BlockRef, error) { - time.Sleep(s.delay) - blocks := make([]bloomshipper.BlockRef, 0, len(s.bqs)) - for i := range s.bqs { - blocks = append(blocks, s.bqs[i].BlockRef) - } - return blocks, nil -} - -// Stop implements bloomshipper.Interface -func (s *mockBloomStore) Stop() {} - -// ForEach implements bloomshipper.Interface -func (s *mockBloomStore) ForEach(_ context.Context, _ string, _ []bloomshipper.BlockRef, callback bloomshipper.ForEachBlockCallback) error { - if s.err != nil { - time.Sleep(s.delay) - return s.err - } - - shuffled := make([]*bloomshipper.CloseableBlockQuerier, len(s.bqs)) - _ = copy(shuffled, s.bqs) - - rand.Shuffle(len(shuffled), func(i, j int) { - shuffled[i], shuffled[j] = shuffled[j], shuffled[i] - }) - - for _, bq := range shuffled { - // ignore errors in the mock - time.Sleep(s.delay) - err := callback(bq.BlockQuerier, bq.Bounds) - if err != nil { - return err - } - } - return nil + return blockRefs, metas, queriers, series } func createQueryInputFromBlockData(t *testing.T, tenant string, data [][]v1.SeriesWithBloom, nthSeries int) []*logproto.ChunkRef { diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index 0f7db8a9ca586..5c57c0a2e4952 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -10,60 +10,76 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/prometheus/common/model" - "golang.org/x/exp/slices" "github.com/grafana/loki/pkg/queue" - v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" ) +const ( + labelSuccess = "success" + labelFailure = "failure" +) + type workerConfig struct { maxItems int } type workerMetrics struct { - dequeuedTasks *prometheus.CounterVec - dequeueErrors *prometheus.CounterVec - dequeueWaitTime *prometheus.SummaryVec - storeAccessLatency *prometheus.HistogramVec - bloomQueryLatency *prometheus.HistogramVec + dequeueDuration *prometheus.HistogramVec + processDuration *prometheus.HistogramVec + metasFetched *prometheus.HistogramVec + blocksFetched *prometheus.HistogramVec + tasksDequeued *prometheus.CounterVec + tasksProcessed *prometheus.CounterVec + blockQueryLatency *prometheus.HistogramVec } func newWorkerMetrics(registerer prometheus.Registerer, namespace, subsystem string) *workerMetrics { labels := []string{"worker"} + r := promauto.With(registerer) return &workerMetrics{ - dequeuedTasks: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + dequeueDuration: r.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "dequeued_tasks_total", - Help: "Total amount of tasks that the worker dequeued from the bloom query queue", + Name: "dequeue_duration_seconds", + Help: "Time spent dequeuing tasks from queue in seconds", }, labels), - dequeueErrors: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + processDuration: r.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "process_duration_seconds", + Help: "Time spent processing tasks in seconds", + }, append(labels, "status")), + metasFetched: r.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "dequeue_errors_total", - Help: "Total amount of failed dequeue operations", + Name: "metas_fetched", + Help: "Amount of metas fetched", }, labels), - dequeueWaitTime: promauto.With(registerer).NewSummaryVec(prometheus.SummaryOpts{ + blocksFetched: r.NewHistogramVec(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "dequeue_wait_time", - Help: "Time spent waiting for dequeuing tasks from queue", + Name: "blocks_fetched", + Help: "Amount of blocks fetched", }, labels), - bloomQueryLatency: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ + tasksDequeued: r.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "bloom_query_latency", - Help: "Latency in seconds of processing bloom blocks", + Name: "tasks_dequeued_total", + Help: "Total amount of tasks that the worker dequeued from the queue", }, append(labels, "status")), - // TODO(chaudum): Move this metric into the bloomshipper - storeAccessLatency: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ + tasksProcessed: r.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "store_latency", - Help: "Latency in seconds of accessing the bloom store component", - }, append(labels, "operation")), + Name: "tasks_processed_total", + Help: "Total amount of tasks that the worker processed", + }, append(labels, "status")), + blockQueryLatency: r.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "block_query_latency", + Help: "Time spent running searches against a bloom block", + }, append(labels, "status")), } } @@ -78,18 +94,18 @@ type worker struct { id string cfg workerConfig queue *queue.RequestQueue - shipper bloomshipper.Interface + store bloomshipper.Store pending *pendingTasks logger log.Logger metrics *workerMetrics } -func newWorker(id string, cfg workerConfig, queue *queue.RequestQueue, shipper bloomshipper.Interface, pending *pendingTasks, logger log.Logger, metrics *workerMetrics) *worker { +func newWorker(id string, cfg workerConfig, queue *queue.RequestQueue, store bloomshipper.Store, pending *pendingTasks, logger log.Logger, metrics *workerMetrics) *worker { w := &worker{ id: id, cfg: cfg, queue: queue, - shipper: shipper, + store: store, pending: pending, logger: log.With(logger, "worker", id), metrics: metrics, @@ -107,17 +123,19 @@ func (w *worker) starting(_ context.Context) error { func (w *worker) running(_ context.Context) error { idx := queue.StartIndexWithLocalQueue + p := newProcessor(w.id, w.store, w.logger, w.metrics) + for st := w.State(); st == services.Running || st == services.Stopping; { taskCtx := context.Background() - dequeueStart := time.Now() + start := time.Now() items, newIdx, err := w.queue.DequeueMany(taskCtx, idx, w.id, w.cfg.maxItems) - w.metrics.dequeueWaitTime.WithLabelValues(w.id).Observe(time.Since(dequeueStart).Seconds()) + w.metrics.dequeueDuration.WithLabelValues(w.id).Observe(time.Since(start).Seconds()) if err != nil { // We only return an error if the queue is stopped and dequeuing did not yield any items if err == queue.ErrStopped && len(items) == 0 { return err } - w.metrics.dequeueErrors.WithLabelValues(w.id).Inc() + w.metrics.tasksDequeued.WithLabelValues(w.id, labelFailure).Inc() level.Error(w.logger).Log("msg", "failed to dequeue tasks", "err", err, "items", len(items)) } idx = newIdx @@ -126,10 +144,9 @@ func (w *worker) running(_ context.Context) error { w.queue.ReleaseRequests(items) continue } - w.metrics.dequeuedTasks.WithLabelValues(w.id).Add(float64(len(items))) - - tasksPerDay := make(map[model.Time][]Task) + w.metrics.tasksDequeued.WithLabelValues(w.id, labelSuccess).Add(float64(len(items))) + tasks := make([]Task, 0, len(items)) for _, item := range items { task, ok := item.(Task) if !ok { @@ -139,91 +156,19 @@ func (w *worker) running(_ context.Context) error { } level.Debug(w.logger).Log("msg", "dequeued task", "task", task.ID) w.pending.Delete(task.ID) - - tasksPerDay[task.day] = append(tasksPerDay[task.day], task) + tasks = append(tasks, task) } - for day, tasks := range tasksPerDay { - - // Remove tasks that are already cancelled - tasks = slices.DeleteFunc(tasks, func(t Task) bool { - if res := t.ctx.Err(); res != nil { - t.CloseWithError(res) - return true - } - return false - }) - // no tasks to process - // continue with tasks of next day - if len(tasks) == 0 { - continue - } - - // interval is [Start, End) - interval := bloomshipper.NewInterval(day, day.Add(Day)) - logger := log.With(w.logger, "day", day.Time(), "tenant", tasks[0].Tenant) - level.Debug(logger).Log("msg", "process tasks", "tasks", len(tasks)) - - storeFetchStart := time.Now() - blockRefs, err := w.shipper.GetBlockRefs(taskCtx, tasks[0].Tenant, interval) - w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockRefs").Observe(time.Since(storeFetchStart).Seconds()) - if err != nil { - for _, t := range tasks { - t.CloseWithError(err) - } - // continue with tasks of next day - continue - } - if len(tasks) == 0 { - continue - } - - // No blocks found. - // Since there are no blocks for the given tasks, we need to return the - // unfiltered list of chunk refs. - if len(blockRefs) == 0 { - level.Warn(logger).Log("msg", "no blocks found") - for _, t := range tasks { - t.Close() - } - // continue with tasks of next day - continue - } - - // Remove tasks that are already cancelled - tasks = slices.DeleteFunc(tasks, func(t Task) bool { - if res := t.ctx.Err(); res != nil { - t.CloseWithError(res) - return true - } - return false - }) - // no tasks to process - // continue with tasks of next day - if len(tasks) == 0 { - continue - } - - tasksForBlocks := partitionFingerprintRange(tasks, blockRefs) - blockRefs = blockRefs[:0] - for _, b := range tasksForBlocks { - blockRefs = append(blockRefs, b.blockRef) - } + start = time.Now() + err = p.run(taskCtx, tasks) - err = w.processBlocksWithCallback(taskCtx, tasks[0].Tenant, blockRefs, tasksForBlocks) - if err != nil { - for _, t := range tasks { - t.CloseWithError(err) - } - // continue with tasks of next day - continue - } - - // all tasks for this day are done. - // close them to notify the request handler - for _, task := range tasks { - task.Close() - } + if err != nil { + w.metrics.processDuration.WithLabelValues(w.id, labelSuccess).Observe(time.Since(start).Seconds()) + w.metrics.tasksProcessed.WithLabelValues(w.id, labelFailure).Add(float64(len(tasks))) + level.Error(w.logger).Log("msg", "failed to process tasks", "err", err) + } else { + w.metrics.processDuration.WithLabelValues(w.id, labelSuccess).Observe(time.Since(start).Seconds()) + w.metrics.tasksProcessed.WithLabelValues(w.id, labelSuccess).Add(float64(len(tasks))) } // return dequeued items back to the pool @@ -238,41 +183,3 @@ func (w *worker) stopping(err error) error { w.queue.UnregisterConsumerConnection(w.id) return nil } - -func (w *worker) processBlocksWithCallback(taskCtx context.Context, tenant string, blockRefs []bloomshipper.BlockRef, boundedRefs []boundedTasks) error { - return w.shipper.ForEach(taskCtx, tenant, blockRefs, func(bq *v1.BlockQuerier, bounds v1.FingerprintBounds) error { - for _, b := range boundedRefs { - if b.blockRef.Bounds.Equal(bounds) { - return w.processBlock(bq, b.tasks) - } - } - return nil - }) -} - -func (w *worker) processBlock(blockQuerier *v1.BlockQuerier, tasks []Task) error { - schema, err := blockQuerier.Schema() - if err != nil { - return err - } - - tokenizer := v1.NewNGramTokenizer(schema.NGramLen(), 0) - iters := make([]v1.PeekingIterator[v1.Request], 0, len(tasks)) - for _, task := range tasks { - it := v1.NewPeekingIter(task.RequestIter(tokenizer)) - iters = append(iters, it) - } - fq := blockQuerier.Fuse(iters) - - start := time.Now() - err = fq.Run() - duration := time.Since(start).Seconds() - - if err != nil { - w.metrics.bloomQueryLatency.WithLabelValues(w.id, "failure").Observe(duration) - return err - } - - w.metrics.bloomQueryLatency.WithLabelValues(w.id, "success").Observe(duration) - return nil -} diff --git a/pkg/logql/accumulator.go b/pkg/logql/accumulator.go new file mode 100644 index 0000000000000..9e9784cb037ef --- /dev/null +++ b/pkg/logql/accumulator.go @@ -0,0 +1,379 @@ +package logql + +import ( + "container/heap" + "context" + "fmt" + "sort" + "time" + + "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/pkg/logqlmodel" + "github.com/grafana/loki/pkg/logqlmodel/metadata" + "github.com/grafana/loki/pkg/logqlmodel/stats" + "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase/definitions" + "github.com/grafana/loki/pkg/util/math" +) + +// NewBufferedAccumulator returns an accumulator which aggregates all query +// results in a slice. This is useful for metric queries, which are generally +// small payloads and the memory overhead for buffering is negligible. +func NewBufferedAccumulator(n int) *BufferedAccumulator { + return &BufferedAccumulator{ + results: make([]logqlmodel.Result, n), + } +} + +type BufferedAccumulator struct { + results []logqlmodel.Result +} + +func (a *BufferedAccumulator) Accumulate(_ context.Context, acc logqlmodel.Result, i int) error { + a.results[i] = acc + return nil +} + +func (a *BufferedAccumulator) Result() []logqlmodel.Result { + return a.results +} + +type QuantileSketchAccumulator struct { + matrix ProbabilisticQuantileMatrix +} + +// newQuantileSketchAccumulator returns an accumulator for sharded +// probabilistic quantile queries that merges results as they come in. +func newQuantileSketchAccumulator() *QuantileSketchAccumulator { + return &QuantileSketchAccumulator{} +} + +func (a *QuantileSketchAccumulator) Accumulate(_ context.Context, res logqlmodel.Result, _ int) error { + if res.Data.Type() != QuantileSketchMatrixType { + return fmt.Errorf("unexpected matrix data type: got (%s), want (%s)", res.Data.Type(), QuantileSketchMatrixType) + } + data, ok := res.Data.(ProbabilisticQuantileMatrix) + if !ok { + return fmt.Errorf("unexpected matrix type: got (%T), want (ProbabilisticQuantileMatrix)", res.Data) + } + if a.matrix == nil { + a.matrix = data + return nil + } + + var err error + a.matrix, err = a.matrix.Merge(data) + return err +} + +func (a *QuantileSketchAccumulator) Result() []logqlmodel.Result { + return []logqlmodel.Result{{Data: a.matrix}} +} + +// heap impl for keeping only the top n results across m streams +// importantly, AccumulatedStreams is _bounded_, so it will only +// store the top `limit` results across all streams. +// To implement this, we use a min-heap when looking +// for the max values (logproto.FORWARD) +// and vice versa for logproto.BACKWARD. +// This allows us to easily find the 'worst' value +// and replace it with a better one. +// Once we've fully processed all log lines, +// we return the heap in opposite order and then reverse it +// to get the correct order. +// Heap implements container/heap.Interface +// solely to use heap.Interface as a library. +// It is not intended for the heap pkg functions +// to otherwise call this type. +type AccumulatedStreams struct { + count, limit int + labelmap map[string]int + streams []*logproto.Stream + order logproto.Direction + + stats stats.Result // for accumulating statistics from downstream requests + headers map[string][]string // for accumulating headers from downstream requests +} + +// NewStreamAccumulator returns an accumulator for limited log queries. +// Log queries, sharded thousands of times and each returning +// results, can be _considerably_ larger. In this case, we eagerly +// accumulate the results into a logsAccumulator, discarding values +// over the limit to keep memory pressure down while other subqueries +// are executing. +func NewStreamAccumulator(params Params) *AccumulatedStreams { + // the stream accumulator stores a heap with reversed order + // from the results we expect, so we need to reverse the direction + order := logproto.FORWARD + if params.Direction() == logproto.FORWARD { + order = logproto.BACKWARD + } + + return &AccumulatedStreams{ + labelmap: make(map[string]int), + order: order, + limit: int(params.Limit()), + + headers: make(map[string][]string), + } +} + +// returns the top priority +func (acc *AccumulatedStreams) top() (time.Time, bool) { + if len(acc.streams) > 0 && len(acc.streams[0].Entries) > 0 { + return acc.streams[0].Entries[len(acc.streams[0].Entries)-1].Timestamp, true + } + return time.Time{}, false +} + +func (acc *AccumulatedStreams) Find(labels string) (int, bool) { + i, ok := acc.labelmap[labels] + return i, ok +} + +// number of streams +func (acc *AccumulatedStreams) Len() int { return len(acc.streams) } + +func (acc *AccumulatedStreams) Swap(i, j int) { + // for i=0, j=1 + + // {'a': 0, 'b': 1} + // [a, b] + acc.streams[i], acc.streams[j] = acc.streams[j], acc.streams[i] + // {'a': 0, 'b': 1} + // [b, a] + acc.labelmap[acc.streams[i].Labels] = i + acc.labelmap[acc.streams[j].Labels] = j + // {'a': 1, 'b': 0} + // [b, a] +} + +// first order by timestamp, then by labels +func (acc *AccumulatedStreams) Less(i, j int) bool { + // order by the 'oldest' entry in the stream + if a, b := acc.streams[i].Entries[len(acc.streams[i].Entries)-1].Timestamp, acc.streams[j].Entries[len(acc.streams[j].Entries)-1].Timestamp; !a.Equal(b) { + return acc.less(a, b) + } + return acc.streams[i].Labels <= acc.streams[j].Labels +} + +func (acc *AccumulatedStreams) less(a, b time.Time) bool { + // use after for stable sort + if acc.order == logproto.FORWARD { + return !a.After(b) + } + return !b.After(a) +} + +func (acc *AccumulatedStreams) Push(x any) { + s := x.(*logproto.Stream) + if len(s.Entries) == 0 { + return + } + + if room := acc.limit - acc.count; room >= len(s.Entries) { + if i, ok := acc.Find(s.Labels); ok { + // stream already exists, append entries + + // these are already guaranteed to be sorted + // Reasoning: we shard subrequests so each stream exists on only one + // shard. Therefore, the only time a stream should already exist + // is in successive splits, which are already guaranteed to be ordered + // and we can just append. + acc.appendTo(acc.streams[i], s) + + return + } + + // new stream + acc.addStream(s) + return + } + + // there's not enough room for all the entries, + // so we need to + acc.push(s) +} + +// there's not enough room for all the entries. +// since we store them in a reverse heap relative to what we _want_ +// (i.e. the max value for FORWARD, the min value for BACKWARD), +// we test if the new entry is better than the worst entry, +// swapping them if so. +func (acc *AccumulatedStreams) push(s *logproto.Stream) { + worst, ok := acc.top() + room := math.Min(acc.limit-acc.count, len(s.Entries)) + + if !ok { + if room == 0 { + // special case: limit must be zero since there's no room and no worst entry + return + } + s.Entries = s.Entries[:room] + // special case: there are no entries in the heap. Push entries up to the limit + acc.addStream(s) + return + } + + // since entries are sorted by timestamp from best -> worst, + // we can discard the entire stream if the incoming best entry + // is worse than the worst entry in the heap. + cutoff := sort.Search(len(s.Entries), func(i int) bool { + // TODO(refactor label comparison -- should be in another fn) + if worst.Equal(s.Entries[i].Timestamp) { + return acc.streams[0].Labels < s.Labels + } + return acc.less(s.Entries[i].Timestamp, worst) + }) + s.Entries = s.Entries[:cutoff] + + for i := 0; i < len(s.Entries) && acc.less(worst, s.Entries[i].Timestamp); i++ { + + // push one entry at a time + room = acc.limit - acc.count + // pop if there's no room to make the heap small enough for an append; + // in the short path of Push() we know that there's room for at least one entry + if room == 0 { + acc.Pop() + } + + cpy := *s + cpy.Entries = []logproto.Entry{s.Entries[i]} + acc.Push(&cpy) + + // update worst + worst, _ = acc.top() + } +} + +func (acc *AccumulatedStreams) addStream(s *logproto.Stream) { + // ensure entries conform to order we expect + // TODO(owen-d): remove? should be unnecessary since we insert in appropriate order + // but it's nice to have the safeguard + sort.Slice(s.Entries, func(i, j int) bool { + return acc.less(s.Entries[j].Timestamp, s.Entries[i].Timestamp) + }) + + acc.streams = append(acc.streams, s) + i := len(acc.streams) - 1 + acc.labelmap[s.Labels] = i + acc.count += len(s.Entries) + heap.Fix(acc, i) +} + +// dst must already exist in acc +func (acc *AccumulatedStreams) appendTo(dst, src *logproto.Stream) { + // these are already guaranteed to be sorted + // Reasoning: we shard subrequests so each stream exists on only one + // shard. Therefore, the only time a stream should already exist + // is in successive splits, which are already guaranteed to be ordered + // and we can just append. + + var needsSort bool + for _, e := range src.Entries { + // sort if order has broken + if len(dst.Entries) > 0 && acc.less(dst.Entries[len(dst.Entries)-1].Timestamp, e.Timestamp) { + needsSort = true + } + dst.Entries = append(dst.Entries, e) + } + + if needsSort { + sort.Slice(dst.Entries, func(i, j int) bool { + // store in reverse order so we can more reliably insert without sorting and pop from end + return acc.less(dst.Entries[j].Timestamp, dst.Entries[i].Timestamp) + }) + } + + acc.count += len(src.Entries) + heap.Fix(acc, acc.labelmap[dst.Labels]) + +} + +// Pop returns a stream with one entry. It pops the first entry of the first stream +func (acc *AccumulatedStreams) Pop() any { + n := acc.Len() + if n == 0 { + return nil + } + + stream := acc.streams[0] + cpy := *stream + cpy.Entries = []logproto.Entry{cpy.Entries[len(stream.Entries)-1]} + stream.Entries = stream.Entries[:len(stream.Entries)-1] + + acc.count-- + + if len(stream.Entries) == 0 { + // remove stream + acc.Swap(0, n-1) + acc.streams[n-1] = nil // avoid leaking reference + delete(acc.labelmap, stream.Labels) + acc.streams = acc.streams[:n-1] + + } + + if acc.Len() > 0 { + heap.Fix(acc, 0) + } + + return &cpy +} + +// Note: can only be called once as it will alter stream ordreing. +func (acc *AccumulatedStreams) Result() []logqlmodel.Result { + // sort streams by label + sort.Slice(acc.streams, func(i, j int) bool { + return acc.streams[i].Labels < acc.streams[j].Labels + }) + + streams := make(logqlmodel.Streams, 0, len(acc.streams)) + + for _, s := range acc.streams { + // sort entries by timestamp, inversely based on direction + sort.Slice(s.Entries, func(i, j int) bool { + return acc.less(s.Entries[j].Timestamp, s.Entries[i].Timestamp) + }) + streams = append(streams, *s) + } + + res := logqlmodel.Result{ + // stats & headers are already aggregated in the context + Data: streams, + Statistics: acc.stats, + Headers: make([]*definitions.PrometheusResponseHeader, 0, len(acc.headers)), + } + + for name, vals := range acc.headers { + res.Headers = append( + res.Headers, + &definitions.PrometheusResponseHeader{ + Name: name, + Values: vals, + }, + ) + } + + return []logqlmodel.Result{res} +} + +func (acc *AccumulatedStreams) Accumulate(_ context.Context, x logqlmodel.Result, _ int) error { + // TODO(owen-d/ewelch): Shard counts should be set by the querier + // so we don't have to do it in tricky ways in multiple places. + // See pkg/logql/downstream.go:DownstreamEvaluator.Downstream + // for another example. + if x.Statistics.Summary.Shards == 0 { + x.Statistics.Summary.Shards = 1 + } + acc.stats.Merge(x.Statistics) + metadata.ExtendHeaders(acc.headers, x.Headers) + + switch got := x.Data.(type) { + case logqlmodel.Streams: + for i := range got { + acc.Push(&got[i]) + } + default: + return fmt.Errorf("unexpected response type during response result accumulation. Got (%T), wanted %s", got, logqlmodel.ValueTypeStreams) + } + return nil +} diff --git a/pkg/logql/accumulator_test.go b/pkg/logql/accumulator_test.go new file mode 100644 index 0000000000000..d827e3ea02e71 --- /dev/null +++ b/pkg/logql/accumulator_test.go @@ -0,0 +1,273 @@ +package logql + +import ( + "context" + "fmt" + "math/rand" + "testing" + "time" + + "github.com/prometheus/prometheus/model/labels" + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/pkg/logql/sketch" + "github.com/grafana/loki/pkg/logqlmodel" +) + +func TestAccumulatedStreams(t *testing.T) { + lim := 30 + nStreams := 10 + start, end := 0, 10 + // for a logproto.BACKWARD query, we use a min heap based on FORWARD + // to store the _earliest_ timestamp of the _latest_ entries, up to `limit` + xs := newStreams(time.Unix(int64(start), 0), time.Unix(int64(end), 0), time.Second, nStreams, logproto.BACKWARD) + acc := NewStreamAccumulator(LiteralParams{ + direction: logproto.BACKWARD, + limit: uint32(lim), + }) + for _, x := range xs { + acc.Push(x) + } + + for i := 0; i < lim; i++ { + got := acc.Pop().(*logproto.Stream) + require.Equal(t, fmt.Sprintf(`{n="%d"}`, i%nStreams), got.Labels) + exp := (nStreams*(end-start) - lim + i) / nStreams + require.Equal(t, time.Unix(int64(exp), 0), got.Entries[0].Timestamp) + } + +} + +func TestDownstreamAccumulatorSimple(t *testing.T) { + lim := 30 + start, end := 0, 10 + direction := logproto.BACKWARD + + streams := newStreams(time.Unix(int64(start), 0), time.Unix(int64(end), 0), time.Second, 10, direction) + x := make(logqlmodel.Streams, 0, len(streams)) + for _, s := range streams { + x = append(x, *s) + } + // dummy params. Only need to populate direction & limit + params, err := NewLiteralParams( + `{app="foo"}`, time.Time{}, time.Time{}, 0, 0, direction, uint32(lim), nil, + ) + require.NoError(t, err) + + acc := NewStreamAccumulator(params) + result := logqlmodel.Result{ + Data: x, + } + + require.Nil(t, acc.Accumulate(context.Background(), result, 0)) + + res := acc.Result()[0] + got, ok := res.Data.(logqlmodel.Streams) + require.Equal(t, true, ok) + require.Equal(t, 10, len(got), "correct number of streams") + + // each stream should have the top 3 entries + for i := 0; i < 10; i++ { + require.Equal(t, 3, len(got[i].Entries), "correct number of entries in stream") + for j := 0; j < 3; j++ { + require.Equal(t, time.Unix(int64(9-j), 0), got[i].Entries[j].Timestamp, "correct timestamp") + } + } +} + +// TestDownstreamAccumulatorMultiMerge simulates merging multiple +// sub-results from different queries. +func TestDownstreamAccumulatorMultiMerge(t *testing.T) { + for _, direction := range []logproto.Direction{logproto.BACKWARD, logproto.FORWARD} { + t.Run(direction.String(), func(t *testing.T) { + nQueries := 10 + delta := 10 // 10 entries per stream, 1s apart + streamsPerQuery := 10 + lim := 30 + + payloads := make([]logqlmodel.Streams, 0, nQueries) + for i := 0; i < nQueries; i++ { + start := i * delta + end := start + delta + streams := newStreams(time.Unix(int64(start), 0), time.Unix(int64(end), 0), time.Second, streamsPerQuery, direction) + var res logqlmodel.Streams + for i := range streams { + res = append(res, *streams[i]) + } + payloads = append(payloads, res) + + } + + // queries are always dispatched in the correct order. + // oldest time ranges first in the case of logproto.FORWARD + // and newest time ranges first in the case of logproto.BACKWARD + if direction == logproto.BACKWARD { + for i, j := 0, len(payloads)-1; i < j; i, j = i+1, j-1 { + payloads[i], payloads[j] = payloads[j], payloads[i] + } + } + + // dummy params. Only need to populate direction & limit + params, err := NewLiteralParams( + `{app="foo"}`, time.Time{}, time.Time{}, 0, 0, direction, uint32(lim), nil, + ) + require.NoError(t, err) + + acc := NewStreamAccumulator(params) + for i := 0; i < nQueries; i++ { + err := acc.Accumulate(context.Background(), logqlmodel.Result{ + Data: payloads[i], + }, i) + require.Nil(t, err) + } + + got, ok := acc.Result()[0].Data.(logqlmodel.Streams) + require.Equal(t, true, ok) + require.Equal(t, int64(nQueries), acc.Result()[0].Statistics.Summary.Shards) + + // each stream should have the top 3 entries + for i := 0; i < streamsPerQuery; i++ { + stream := got[i] + require.Equal(t, fmt.Sprintf(`{n="%d"}`, i), stream.Labels, "correct labels") + ln := lim / streamsPerQuery + require.Equal(t, ln, len(stream.Entries), "correct number of entries in stream") + switch direction { + case logproto.BACKWARD: + for i := 0; i < ln; i++ { + offset := delta*nQueries - 1 - i + require.Equal(t, time.Unix(int64(offset), 0), stream.Entries[i].Timestamp, "correct timestamp") + } + default: + for i := 0; i < ln; i++ { + offset := i + require.Equal(t, time.Unix(int64(offset), 0), stream.Entries[i].Timestamp, "correct timestamp") + } + } + } + }) + } +} + +func BenchmarkAccumulator(b *testing.B) { + + // dummy params. Only need to populate direction & limit + lim := 30 + params, err := NewLiteralParams( + `{app="foo"}`, time.Time{}, time.Time{}, 0, 0, logproto.BACKWARD, uint32(lim), nil, + ) + require.NoError(b, err) + + for acc, tc := range map[string]struct { + results []logqlmodel.Result + newAcc func(Params, []logqlmodel.Result) Accumulator + params Params + }{ + "streams": { + newStreamResults(), + func(p Params, _ []logqlmodel.Result) Accumulator { + return NewStreamAccumulator(p) + }, + params, + }, + "quantile sketches": { + newQuantileSketchResults(), + func(p Params, _ []logqlmodel.Result) Accumulator { + return newQuantileSketchAccumulator() + }, + params, + }, + } { + b.Run(acc, func(b *testing.B) { + b.ResetTimer() + b.ReportAllocs() + for n := 0; n < b.N; n++ { + + acc := tc.newAcc(params, tc.results) + for i, r := range tc.results { + err := acc.Accumulate(context.Background(), r, i) + require.Nil(b, err) + } + + acc.Result() + } + }) + } +} + +func newStreamResults() []logqlmodel.Result { + nQueries := 50 + delta := 100 // 10 entries per stream, 1s apart + streamsPerQuery := 50 + + results := make([]logqlmodel.Result, nQueries) + for i := 0; i < nQueries; i++ { + start := i * delta + end := start + delta + streams := newStreams(time.Unix(int64(start), 0), time.Unix(int64(end), 0), time.Second, streamsPerQuery, logproto.BACKWARD) + var res logqlmodel.Streams + for i := range streams { + res = append(res, *streams[i]) + } + results[i] = logqlmodel.Result{Data: res} + + } + + return results +} + +func newQuantileSketchResults() []logqlmodel.Result { + results := make([]logqlmodel.Result, 100) + + for r := range results { + vectors := make([]ProbabilisticQuantileVector, 10) + for i := range vectors { + vectors[i] = make(ProbabilisticQuantileVector, 10) + for j := range vectors[i] { + vectors[i][j] = ProbabilisticQuantileSample{ + T: int64(i), + F: newRandomSketch(), + Metric: []labels.Label{{Name: "foo", Value: fmt.Sprintf("bar-%d", j)}}, + } + } + } + results[r] = logqlmodel.Result{Data: ProbabilisticQuantileMatrix(vectors)} + } + + return results +} + +func newStreamWithDirection(start, end time.Time, delta time.Duration, ls string, direction logproto.Direction) *logproto.Stream { + s := &logproto.Stream{ + Labels: ls, + } + for t := start; t.Before(end); t = t.Add(delta) { + s.Entries = append(s.Entries, logproto.Entry{ + Timestamp: t, + Line: fmt.Sprintf("%d", t.Unix()), + }) + } + if direction == logproto.BACKWARD { + // simulate data coming in reverse order (logproto.BACKWARD) + for i, j := 0, len(s.Entries)-1; i < j; i, j = i+1, j-1 { + s.Entries[i], s.Entries[j] = s.Entries[j], s.Entries[i] + } + } + return s +} + +func newStreams(start, end time.Time, delta time.Duration, n int, direction logproto.Direction) (res []*logproto.Stream) { + for i := 0; i < n; i++ { + res = append(res, newStreamWithDirection(start, end, delta, fmt.Sprintf(`{n="%d"}`, i), direction)) + } + return res +} + +func newRandomSketch() sketch.QuantileSketch { + r := rand.New(rand.NewSource(42)) + s := sketch.NewDDSketch() + for i := 0; i < 1000; i++ { + _ = s.Add(r.Float64()) + } + return s +} diff --git a/pkg/logql/downstream.go b/pkg/logql/downstream.go index 76594dc040c22..33d945f11b923 100644 --- a/pkg/logql/downstream.go +++ b/pkg/logql/downstream.go @@ -83,6 +83,29 @@ func (d DownstreamSampleExpr) String() string { return fmt.Sprintf("downstream<%s, shard=%s>", d.SampleExpr.String(), d.shard) } +// The DownstreamSampleExpr is not part of LogQL. In the prettified version it's +// represented as e.g. `downstream` +func (d DownstreamSampleExpr) Pretty(level int) string { + s := syntax.Indent(level) + if !syntax.NeedSplit(d) { + return s + d.String() + } + + s += "downstream<\n" + + s += d.SampleExpr.Pretty(level + 1) + s += ",\n" + s += syntax.Indent(level+1) + "shard=" + if d.shard != nil { + s += d.shard.String() + "\n" + } else { + s += "nil\n" + } + + s += syntax.Indent(level) + ">" + return s +} + // DownstreamLogSelectorExpr is a LogSelectorExpr which signals downstream computation type DownstreamLogSelectorExpr struct { shard *astmapper.ShardAnnotation @@ -93,6 +116,29 @@ func (d DownstreamLogSelectorExpr) String() string { return fmt.Sprintf("downstream<%s, shard=%s>", d.LogSelectorExpr.String(), d.shard) } +// The DownstreamLogSelectorExpr is not part of LogQL. In the prettified version it's +// represented as e.g. `downstream<{foo="bar"} |= "error", shard=1_of_3>` +func (d DownstreamLogSelectorExpr) Pretty(level int) string { + s := syntax.Indent(level) + if !syntax.NeedSplit(d) { + return s + d.String() + } + + s += "downstream<\n" + + s += d.LogSelectorExpr.Pretty(level + 1) + s += ",\n" + s += syntax.Indent(level+1) + "shard=" + if d.shard != nil { + s += d.shard.String() + "\n" + } else { + s += "nil\n" + } + + s += syntax.Indent(level) + ">" + return s +} + func (d DownstreamSampleExpr) Walk(f syntax.WalkFn) { f(d) } var defaultMaxDepth = 4 @@ -105,7 +151,7 @@ type ConcatSampleExpr struct { next *ConcatSampleExpr } -func (c ConcatSampleExpr) String() string { +func (c *ConcatSampleExpr) String() string { if c.next == nil { return c.DownstreamSampleExpr.String() } @@ -115,7 +161,7 @@ func (c ConcatSampleExpr) String() string { // in order to not display huge queries with thousands of shards, // we can limit the number of stringified subqueries. -func (c ConcatSampleExpr) string(maxDepth int) string { +func (c *ConcatSampleExpr) string(maxDepth int) string { if c.next == nil { return c.DownstreamSampleExpr.String() } @@ -125,18 +171,46 @@ func (c ConcatSampleExpr) string(maxDepth int) string { return fmt.Sprintf("%s ++ %s", c.DownstreamSampleExpr.String(), c.next.string(maxDepth-1)) } -func (c ConcatSampleExpr) Walk(f syntax.WalkFn) { +func (c *ConcatSampleExpr) Walk(f syntax.WalkFn) { f(c) f(c.next) } +// ConcatSampleExpr has no LogQL repretenstation. It is expressed in in the +// prettified version as e.g. `concat(downstream ++ )` +func (c *ConcatSampleExpr) Pretty(level int) string { + s := syntax.Indent(level) + if !syntax.NeedSplit(c) { + return s + c.String() + } + + s += "concat(\n" + + head := c + for i := 0; i < defaultMaxDepth && head != nil; i++ { + if i > 0 { + s += syntax.Indent(level+1) + "++\n" + } + s += head.DownstreamSampleExpr.Pretty(level + 1) + s += "\n" + head = head.next + } + // There are more downstream samples... + if head != nil { + s += syntax.Indent(level+1) + "++ ...\n" + } + s += syntax.Indent(level) + ")" + + return s +} + // ConcatLogSelectorExpr is an expr for concatenating multiple LogSelectorExpr type ConcatLogSelectorExpr struct { DownstreamLogSelectorExpr next *ConcatLogSelectorExpr } -func (c ConcatLogSelectorExpr) String() string { +func (c *ConcatLogSelectorExpr) String() string { if c.next == nil { return c.DownstreamLogSelectorExpr.String() } @@ -146,7 +220,7 @@ func (c ConcatLogSelectorExpr) String() string { // in order to not display huge queries with thousands of shards, // we can limit the number of stringified subqueries. -func (c ConcatLogSelectorExpr) string(maxDepth int) string { +func (c *ConcatLogSelectorExpr) string(maxDepth int) string { if c.next == nil { return c.DownstreamLogSelectorExpr.String() } @@ -156,6 +230,34 @@ func (c ConcatLogSelectorExpr) string(maxDepth int) string { return fmt.Sprintf("%s ++ %s", c.DownstreamLogSelectorExpr.String(), c.next.string(maxDepth-1)) } +// ConcatLogSelectorExpr has no representation in LogQL. Its prettified version +// is e.g. `concat(downstream<{foo="bar"} |= "error", shard=1_of_3>)` +func (c *ConcatLogSelectorExpr) Pretty(level int) string { + s := syntax.Indent(level) + if !syntax.NeedSplit(c) { + return s + c.String() + } + + s += "concat(\n" + + head := c + for i := 0; i < defaultMaxDepth && head != nil; i++ { + if i > 0 { + s += syntax.Indent(level+1) + "++\n" + } + s += head.DownstreamLogSelectorExpr.Pretty(level + 1) + s += "\n" + head = head.next + } + // There are more downstream samples... + if head != nil { + s += syntax.Indent(level+1) + "++ ...\n" + } + s += ")" + + return s +} + // QuantileSketchEvalExpr evaluates a quantile sketch to the actual quantile. type QuantileSketchEvalExpr struct { syntax.SampleExpr @@ -244,7 +346,13 @@ type Resp struct { // Downstreamer is an interface for deferring responsibility for query execution. // It is decoupled from but consumed by a downStreamEvaluator to dispatch ASTs. type Downstreamer interface { - Downstream(context.Context, []DownstreamQuery) ([]logqlmodel.Result, error) + Downstream(context.Context, []DownstreamQuery, Accumulator) ([]logqlmodel.Result, error) +} + +// Accumulator is an interface for accumulating query results. +type Accumulator interface { + Accumulate(context.Context, logqlmodel.Result, int) error + Result() []logqlmodel.Result } // DownstreamEvaluator is an evaluator which handles shard aware AST nodes @@ -254,8 +362,8 @@ type DownstreamEvaluator struct { } // Downstream runs queries and collects stats from the embedded Downstreamer -func (ev DownstreamEvaluator) Downstream(ctx context.Context, queries []DownstreamQuery) ([]logqlmodel.Result, error) { - results, err := ev.Downstreamer.Downstream(ctx, queries) +func (ev DownstreamEvaluator) Downstream(ctx context.Context, queries []DownstreamQuery, acc Accumulator) ([]logqlmodel.Result, error) { + results, err := ev.Downstreamer.Downstream(ctx, queries, acc) if err != nil { return nil, err } @@ -314,12 +422,13 @@ func (ev *DownstreamEvaluator) NewStepEvaluator( if e.shard != nil { shards = append(shards, *e.shard) } + acc := NewBufferedAccumulator(1) results, err := ev.Downstream(ctx, []DownstreamQuery{{ Params: ParamsWithShardsOverride{ Params: ParamsWithExpressionOverride{Params: params, ExpressionOverride: e.SampleExpr}, ShardsOverride: Shards(shards).Encode(), }, - }}) + }}, acc) if err != nil { return nil, err } @@ -339,7 +448,8 @@ func (ev *DownstreamEvaluator) NewStepEvaluator( cur = cur.next } - results, err := ev.Downstream(ctx, queries) + acc := NewBufferedAccumulator(len(queries)) + results, err := ev.Downstream(ctx, queries, acc) if err != nil { return nil, err } @@ -379,7 +489,8 @@ func (ev *DownstreamEvaluator) NewStepEvaluator( } } - results, err := ev.Downstream(ctx, queries) + acc := newQuantileSketchAccumulator() + results, err := ev.Downstream(ctx, queries, acc) if err != nil { return nil, err } @@ -413,12 +524,13 @@ func (ev *DownstreamEvaluator) NewIterator( if e.shard != nil { shards = append(shards, *e.shard) } + acc := NewStreamAccumulator(params) results, err := ev.Downstream(ctx, []DownstreamQuery{{ Params: ParamsWithShardsOverride{ Params: ParamsWithExpressionOverride{Params: params, ExpressionOverride: e.LogSelectorExpr}, ShardsOverride: shards.Encode(), }, - }}) + }}, acc) if err != nil { return nil, err } @@ -438,7 +550,8 @@ func (ev *DownstreamEvaluator) NewIterator( cur = cur.next } - results, err := ev.Downstream(ctx, queries) + acc := NewStreamAccumulator(params) + results, err := ev.Downstream(ctx, queries, acc) if err != nil { return nil, err } diff --git a/pkg/logql/downstream_test.go b/pkg/logql/downstream_test.go index 426722a554594..ec5f3170468d0 100644 --- a/pkg/logql/downstream_test.go +++ b/pkg/logql/downstream_test.go @@ -8,12 +8,14 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/user" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql/syntax" + "github.com/grafana/loki/pkg/querier/astmapper" ) var nilShardMetrics = NewShardMapperMetrics(nil) @@ -543,3 +545,142 @@ func relativeError(t *testing.T, expected, actual promql.Matrix, alpha float64) require.InEpsilonSlice(t, e, a, alpha) } } + +func TestFormat_ShardedExpr(t *testing.T) { + oldMax := syntax.MaxCharsPerLine + syntax.MaxCharsPerLine = 20 + + oldDefaultDepth := defaultMaxDepth + defaultMaxDepth = 2 + defer func() { + syntax.MaxCharsPerLine = oldMax + defaultMaxDepth = oldDefaultDepth + }() + + cases := []struct { + name string + in syntax.Expr + exp string + }{ + { + name: "ConcatSampleExpr", + in: &ConcatSampleExpr{ + DownstreamSampleExpr: DownstreamSampleExpr{ + shard: &astmapper.ShardAnnotation{ + Shard: 0, + Of: 3, + }, + SampleExpr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeRate, + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")}, + }, + Interval: time.Minute, + }, + }, + }, + next: &ConcatSampleExpr{ + DownstreamSampleExpr: DownstreamSampleExpr{ + shard: &astmapper.ShardAnnotation{ + Shard: 1, + Of: 3, + }, + SampleExpr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeRate, + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")}, + }, + Interval: time.Minute, + }, + }, + }, + next: &ConcatSampleExpr{ + DownstreamSampleExpr: DownstreamSampleExpr{ + shard: &astmapper.ShardAnnotation{ + Shard: 1, + Of: 3, + }, + SampleExpr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeRate, + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")}, + }, + Interval: time.Minute, + }, + }, + }, + next: nil, + }, + }, + }, + exp: `concat( + downstream< + rate( + {foo="bar"} [1m] + ), + shard=0_of_3 + > + ++ + downstream< + rate( + {foo="bar"} [1m] + ), + shard=1_of_3 + > + ++ ... +)`, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got := syntax.Prettify(c.in) + assert.Equal(t, c.exp, got) + }) + } +} + +func TestPrettierWithoutShards(t *testing.T) { + q := `((quantile_over_time(0.5,{foo="bar"} | json | unwrap bytes[1d]) by (cluster) > 42) and (count by (cluster)(max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace)) > 10))` + e := syntax.MustParseExpr(q) + + mapper := NewShardMapper(ConstantShards(4), nilShardMetrics, []string{}) + _, _, mapped, err := mapper.Parse(e) + require.NoError(t, err) + got := syntax.Prettify(mapped) + expected := ` downstream> + > + 42 +and + count by (cluster)( + max by (cluster, namespace)( + concat( + downstream< + max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace), + shard=0_of_4 + > + ++ + downstream< + max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace), + shard=1_of_4 + > + ++ + downstream< + max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace), + shard=2_of_4 + > + ++ + downstream< + max_over_time({foo="baz"} |= "error" | json | unwrap bytes[1d]) by (cluster,namespace), + shard=3_of_4 + > + ) + ) + ) + > + 10` + assert.Equal(t, expected, got) +} diff --git a/pkg/logql/metrics.go b/pkg/logql/metrics.go index 63051e362eae6..40fbece82d87d 100644 --- a/pkg/logql/metrics.go +++ b/pkg/logql/metrics.go @@ -114,13 +114,17 @@ func RecordRangeAndInstantQueryMetrics( } queryTags, _ := ctx.Value(httpreq.QueryTagsHTTPHeader).(string) // it's ok to be empty. + var ( + query = p.QueryString() + hashedQuery = util.HashedQuery(query) + ) logValues := make([]interface{}, 0, 50) logValues = append(logValues, []interface{}{ "latency", latencyType, // this can be used to filter log lines. - "query", p.QueryString(), - "query_hash", util.HashedQuery(p.QueryString()), + "query", query, + "query_hash", hashedQuery, "query_type", queryType, "range_type", rt, "length", p.End().Sub(p.Start()), diff --git a/pkg/logql/shardmapper_test.go b/pkg/logql/shardmapper_test.go index 96955109a9413..0e345291eed3b 100644 --- a/pkg/logql/shardmapper_test.go +++ b/pkg/logql/shardmapper_test.go @@ -1598,3 +1598,32 @@ func TestStringTrimming(t *testing.T) { func float64p(v float64) *float64 { return &v } + +func TestShardTopk(t *testing.T) { + expr := `topk( + 10, + sum by (ip) ( + sum_over_time({job="foo"} | json | unwrap bytes(bytes)[1m]) + ) + )` + m := NewShardMapper(ConstantShards(5), nilShardMetrics, []string{ShardQuantileOverTime}) + _, _, mappedExpr, err := m.Parse(syntax.MustParseExpr(expr)) + require.NoError(t, err) + + expected := `topk( + 10, + sum by (ip)( + concat( + downstream + ++ + downstream + ++ + downstream + ++ + downstream + ++ ... + ) + ) +)` + require.Equal(t, expected, mappedExpr.Pretty(0)) +} diff --git a/pkg/logql/syntax/prettier.go b/pkg/logql/syntax/prettier.go index cf346e26c562f..1b407453858f7 100644 --- a/pkg/logql/syntax/prettier.go +++ b/pkg/logql/syntax/prettier.go @@ -35,8 +35,8 @@ import ( // var ( - // maxCharsPerLine is used to qualify whether some LogQL expressions are worth `splitting` into new lines. - maxCharsPerLine = 100 + // MaxCharsPerLine is used to qualify whether some LogQL expressions are worth `splitting` into new lines. + MaxCharsPerLine = 100 ) func Prettify(e Expr) string { @@ -51,8 +51,8 @@ func (e *MatchersExpr) Pretty(level int) string { // e.g: `{foo="bar"} | logfmt | level="error"` // Here, left = `{foo="bar"}` and multistages would collection of each stage in pipeline, here `logfmt` and `level="error"` func (e *PipelineExpr) Pretty(level int) string { - if !needSplit(e) { - return indent(level) + e.String() + if !NeedSplit(e) { + return Indent(level) + e.String() } s := fmt.Sprintf("%s\n", e.Left.Pretty(level)) @@ -73,8 +73,8 @@ func (e *PipelineExpr) Pretty(level int) string { // e.g: `|= "error" != "memcache" |= ip("192.168.0.1")` // NOTE: here `ip` is Op in this expression. func (e *LineFilterExpr) Pretty(level int) string { - if !needSplit(e) { - return indent(level) + e.String() + if !NeedSplit(e) { + return Indent(level) + e.String() } var s string @@ -90,7 +90,7 @@ func (e *LineFilterExpr) Pretty(level int) string { s += "\n" } - s += indent(level) + s += Indent(level) // We re-use LineFilterExpr's String() implementation to avoid duplication. // We create new LineFilterExpr without `Left`. @@ -153,7 +153,7 @@ func (e *LogfmtExpressionParser) Pretty(level int) string { // e.g: sum_over_time({foo="bar"} | logfmt | unwrap bytes_processed [5m]) func (e *UnwrapExpr) Pretty(level int) string { - s := indent(level) + s := Indent(level) if e.Operation != "" { s += fmt.Sprintf("%s %s %s(%s)", OpPipe, OpUnwrap, e.Operation, e.Identifier) @@ -161,7 +161,7 @@ func (e *UnwrapExpr) Pretty(level int) string { s += fmt.Sprintf("%s %s %s", OpPipe, OpUnwrap, e.Identifier) } for _, f := range e.PostFilters { - s += fmt.Sprintf("\n%s%s %s", indent(level), OpPipe, f) + s += fmt.Sprintf("\n%s%s %s", Indent(level), OpPipe, f) } return s } @@ -200,8 +200,8 @@ func (e *OffsetExpr) Pretty(_ int) string { // e.g: count_over_time({foo="bar"}[5m]) func (e *RangeAggregationExpr) Pretty(level int) string { - s := indent(level) - if !needSplit(e) { + s := Indent(level) + if !NeedSplit(e) { return s + e.String() } @@ -211,13 +211,13 @@ func (e *RangeAggregationExpr) Pretty(level int) string { // print args to the function. if e.Params != nil { - s = fmt.Sprintf("%s%s%s,", s, indent(level+1), fmt.Sprint(*e.Params)) + s = fmt.Sprintf("%s%s%s,", s, Indent(level+1), fmt.Sprint(*e.Params)) s += "\n" } s += e.Left.Pretty(level + 1) - s += "\n" + indent(level) + ")" + s += "\n" + Indent(level) + ")" if e.Grouping != nil { s += e.Grouping.Pretty(level) @@ -236,9 +236,9 @@ func (e *RangeAggregationExpr) Pretty(level int) string { // - vector on which aggregation is done. // [without|by (