From 4ed3b117d2a3eacd8b0328fef6d66850b3e77117 Mon Sep 17 00:00:00 2001 From: Karsten Jeschkies Date: Thu, 2 Nov 2023 11:06:29 +0100 Subject: [PATCH 01/14] Use newest build image 0.31.2 with golangci-lint update. (#11118) **What this PR does / why we need it**: This just updates the Loki build imaage changed in #11114. **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15) --- .drone/drone.yml | 42 +++++++++---------- .golangci.yml | 5 ++- Makefile | 2 +- clients/pkg/logentry/stages/metrics.go | 1 + clients/pkg/logentry/stages/pack_test.go | 1 + integration/cluster/cluster.go | 1 + pkg/bloomgateway/querier_test.go | 2 +- pkg/bloomgateway/sharding.go | 7 ++-- pkg/logcli/client/client.go | 1 + pkg/logcli/query/part_file.go | 2 +- pkg/loghttp/push/otlp_test.go | 2 +- pkg/logql/engine.go | 4 +- pkg/logql/evaluator.go | 4 +- pkg/loki/config_wrapper.go | 8 ++-- pkg/loki/loki.go | 2 +- pkg/loki/modules.go | 2 +- pkg/lokifrontend/frontend/v1/frontend_test.go | 2 +- pkg/querier/http.go | 6 +-- pkg/querier/ingester_querier.go | 6 +-- pkg/querier/ingester_querier_test.go | 6 --- pkg/querier/querier_test.go | 3 +- pkg/querier/queryrange/codec.go | 1 + pkg/querier/queryrange/querysharding.go | 4 +- pkg/querier/queryrange/roundtrip.go | 3 +- pkg/querier/worker_service_test.go | 2 +- pkg/storage/bloom/v1/bloom_tokenizer.go | 2 +- pkg/storage/bloom/v1/filter/buckets.go | 2 +- pkg/storage/bloom/v1/filter/partitioned.go | 2 +- pkg/storage/bloom/v1/filter/scalable.go | 2 +- pkg/storage/bloom/v1/fuse_test.go | 1 + pkg/storage/chunk/client/congestion/config.go | 1 + pkg/storage/config/store.go | 3 +- pkg/storage/factory.go | 2 +- pkg/storage/store.go | 2 +- .../stores/shipper/bloomshipper/store_test.go | 4 +- .../gatewayclient/gateway_client.go | 2 +- .../gatewayclient/gateway_client_test.go | 11 +++-- tools/deprecated-config-checker/main.go | 1 + tools/doc-generator/writer.go | 1 + tools/tsdb/bloom-tester/concurrent.go | 3 +- tools/tsdb/bloom-tester/lib.go | 2 +- tools/tsdb/bloom-tester/lrucache_test.go | 3 +- tools/tsdb/bloom-tester/main.go | 6 ++- tools/tsdb/bloom-tester/metrics.go | 7 ++-- tools/tsdb/bloom-tester/readlib.go | 1 + tools/tsdb/bloom-tester/readlib_test.go | 3 +- 46 files changed, 95 insertions(+), 85 deletions(-) diff --git a/.drone/drone.yml b/.drone/drone.yml index 5c526995328b0..ca0c09c4c7391 100644 --- a/.drone/drone.yml +++ b/.drone/drone.yml @@ -93,14 +93,14 @@ steps: depends_on: - clone environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: check-drone-drift - commands: - make BUILD_IN_CONTAINER=false check-generated-files depends_on: - clone environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: check-generated-files - commands: - cd .. @@ -110,7 +110,7 @@ steps: depends_on: - clone environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: clone-target-branch when: event: @@ -121,14 +121,14 @@ steps: - clone-target-branch - check-generated-files environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: test - commands: - cd ../loki-target-branch && BUILD_IN_CONTAINER=false make test depends_on: - clone-target-branch environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: test-target-branch when: event: @@ -141,7 +141,7 @@ steps: - test - test-target-branch environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: compare-coverage when: event: @@ -159,7 +159,7 @@ steps: TOKEN: from_secret: github_token USER: grafanabot - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: report-coverage when: event: @@ -169,7 +169,7 @@ steps: depends_on: - check-generated-files environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: lint - commands: - make BUILD_IN_CONTAINER=false check-mod @@ -177,7 +177,7 @@ steps: - test - lint environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: check-mod - commands: - apk add make bash && make lint-scripts @@ -188,21 +188,21 @@ steps: depends_on: - check-generated-files environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: loki - commands: - make BUILD_IN_CONTAINER=false check-doc depends_on: - loki environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: check-doc - commands: - make BUILD_IN_CONTAINER=false check-format GIT_TARGET_BRANCH="$DRONE_TARGET_BRANCH" depends_on: - loki environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: check-format when: event: @@ -212,14 +212,14 @@ steps: depends_on: - loki environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: validate-example-configs - commands: - make BUILD_IN_CONTAINER=false check-example-config-doc depends_on: - clone environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: check-example-config-doc - commands: - mkdir -p /hugo/content/docs/loki/latest @@ -252,7 +252,7 @@ steps: depends_on: - clone environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: loki-mixin-check when: event: @@ -277,7 +277,7 @@ steps: depends_on: - clone environment: {} - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: documentation-helm-reference-check trigger: ref: @@ -1683,7 +1683,7 @@ steps: NFPM_SIGNING_KEY: from_secret: gpg_private_key NFPM_SIGNING_KEY_FILE: /drone/src/private-key.key - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: write-key - commands: - make BUILD_IN_CONTAINER=false packages @@ -1691,7 +1691,7 @@ steps: NFPM_PASSPHRASE: from_secret: gpg_passphrase NFPM_SIGNING_KEY_FILE: /drone/src/private-key.key - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: test packaging - commands: - ./tools/packaging/verify-deb-install.sh @@ -1717,7 +1717,7 @@ steps: NFPM_PASSPHRASE: from_secret: gpg_passphrase NFPM_SIGNING_KEY_FILE: /drone/src/private-key.key - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: publish when: event: @@ -1752,7 +1752,7 @@ steps: from_secret: docker_password DOCKER_USERNAME: from_secret: docker_username - image: grafana/loki-build-image:0.30.1 + image: grafana/loki-build-image:0.31.2 name: build and push privileged: true volumes: @@ -2017,6 +2017,6 @@ kind: secret name: gpg_private_key --- kind: signature -hmac: 27257b795645c64fe82deb850f6efdf73fec2e0e2217e86ac52ae6bf434a92b5 +hmac: a68ce0151ff769aa0731f120437450f0d9685c843cb3c5b046d4991f910aadd7 ... diff --git a/.golangci.yml b/.golangci.yml index f1d4093919a59..8f8d3b483e86f 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -88,5 +88,8 @@ issues: - Error return value of .*log\.Logger\)\.Log\x60 is not checked - Error return value of .*.Log.* is not checked - Error return value of `` is not checked - + exclude-rules: + - path: '(.+)_test\.go' + linters: + - goconst fix: true diff --git a/Makefile b/Makefile index dace8181353f3..2fb1b515dcdc8 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ DOCKER_IMAGE_DIRS := $(patsubst %/Dockerfile,%,$(DOCKERFILES)) BUILD_IN_CONTAINER ?= true # ensure you run `make drone` after changing this -BUILD_IMAGE_VERSION := 0.30.1 +BUILD_IMAGE_VERSION ?= 0.31.2 # Docker image info IMAGE_PREFIX ?= grafana diff --git a/clients/pkg/logentry/stages/metrics.go b/clients/pkg/logentry/stages/metrics.go index d54f8dce46f7c..14386e3b43a40 100644 --- a/clients/pkg/logentry/stages/metrics.go +++ b/clients/pkg/logentry/stages/metrics.go @@ -179,6 +179,7 @@ func (m *metricStage) Name() string { } // recordCounter will update a counter metric +// nolint:goconst func (m *metricStage) recordCounter(name string, counter *metric.Counters, labels model.LabelSet, v interface{}) { // If value matching is defined, make sure value matches. if counter.Cfg.Value != nil { diff --git a/clients/pkg/logentry/stages/pack_test.go b/clients/pkg/logentry/stages/pack_test.go index 32bf75053b7a1..c1bf7814f636a 100644 --- a/clients/pkg/logentry/stages/pack_test.go +++ b/clients/pkg/logentry/stages/pack_test.go @@ -1,3 +1,4 @@ +// nolint:goconst package stages import ( diff --git a/integration/cluster/cluster.go b/integration/cluster/cluster.go index bc6d071e589f0..8ddeac00f1782 100644 --- a/integration/cluster/cluster.go +++ b/integration/cluster/cluster.go @@ -24,6 +24,7 @@ import ( "gopkg.in/yaml.v2" "github.com/grafana/loki/integration/util" + "github.com/grafana/loki/pkg/loki" "github.com/grafana/loki/pkg/storage" "github.com/grafana/loki/pkg/storage/config" diff --git a/pkg/bloomgateway/querier_test.go b/pkg/bloomgateway/querier_test.go index 1e40eb2994c04..abfd7c3afe5dc 100644 --- a/pkg/bloomgateway/querier_test.go +++ b/pkg/bloomgateway/querier_test.go @@ -19,7 +19,7 @@ type noopClient struct { } // FilterChunks implements Client. -func (c *noopClient) FilterChunks(ctx context.Context, tenant string, from, through model.Time, groups []*logproto.GroupedChunkRefs, filters ...*logproto.LineFilterExpression) ([]*logproto.GroupedChunkRefs, error) { +func (c *noopClient) FilterChunks(ctx context.Context, tenant string, from, through model.Time, groups []*logproto.GroupedChunkRefs, filters ...*logproto.LineFilterExpression) ([]*logproto.GroupedChunkRefs, error) { // nolint:revive c.callCount++ return groups, c.err } diff --git a/pkg/bloomgateway/sharding.go b/pkg/bloomgateway/sharding.go index c7bfdc7af92c4..95cf4f05ab3a6 100644 --- a/pkg/bloomgateway/sharding.go +++ b/pkg/bloomgateway/sharding.go @@ -87,12 +87,13 @@ func (s *ShuffleShardingStrategy) FilterTenants(_ context.Context, tenantIDs []s return filteredIDs, nil } +// nolint:revive func getBucket(rangeMin, rangeMax, pos uint64) int { return 0 } // FilterBlocks implements ShardingStrategy. -func (s *ShuffleShardingStrategy) FilterBlocks(ctx context.Context, tenantID string, blockRefs []BlockRef) ([]BlockRef, error) { +func (s *ShuffleShardingStrategy) FilterBlocks(_ context.Context, tenantID string, blockRefs []BlockRef) ([]BlockRef, error) { filteredBlockRefs := make([]BlockRef, 0, len(blockRefs)) subRing := GetShuffleShardingSubring(s.r, tenantID, s.limits) @@ -152,11 +153,11 @@ func NewNoopStrategy() *NoopStrategy { } // FilterTenants implements ShardingStrategy. -func (s *NoopStrategy) FilterTenants(ctx context.Context, tenantIDs []string) ([]string, error) { +func (s *NoopStrategy) FilterTenants(_ context.Context, tenantIDs []string) ([]string, error) { return tenantIDs, nil } // FilterBlocks implements ShardingStrategy. -func (s *NoopStrategy) FilterBlocks(ctx context.Context, tenantID string, blockRefs []BlockRef) ([]BlockRef, error) { +func (s *NoopStrategy) FilterBlocks(_ context.Context, _ string, blockRefs []BlockRef) ([]BlockRef, error) { return blockRefs, nil } diff --git a/pkg/logcli/client/client.go b/pkg/logcli/client/client.go index 7fe3233ec4951..964abc13d30bd 100644 --- a/pkg/logcli/client/client.go +++ b/pkg/logcli/client/client.go @@ -324,6 +324,7 @@ func (c *DefaultClient) doRequest(path, query string, quiet bool, out interface{ return json.NewDecoder(resp.Body).Decode(out) } +// nolint:goconst func (c *DefaultClient) getHTTPRequestHeader() (http.Header, error) { h := make(http.Header) diff --git a/pkg/logcli/query/part_file.go b/pkg/logcli/query/part_file.go index 8cb450c1e3f4f..cb0658964b9aa 100644 --- a/pkg/logcli/query/part_file.go +++ b/pkg/logcli/query/part_file.go @@ -36,7 +36,7 @@ func (f *PartFile) Exists() (bool, error) { } else if errors.Is(err, os.ErrNotExist) { // File does not exist. return false, nil - } else { + } else { // nolint:revive // Unclear if file exists or not, we cannot stat it. return false, fmt.Errorf("failed to check if part file exists: %s: %s", f.finalName, err) } diff --git a/pkg/loghttp/push/otlp_test.go b/pkg/loghttp/push/otlp_test.go index 8018fbd5a1ae6..8d02485833775 100644 --- a/pkg/loghttp/push/otlp_test.go +++ b/pkg/loghttp/push/otlp_test.go @@ -439,6 +439,6 @@ func TestAttributesToLabels(t *testing.T) { type fakeRetention struct{} -func (f fakeRetention) RetentionPeriodFor(userID string, lbs labels.Labels) time.Duration { +func (f fakeRetention) RetentionPeriodFor(_ string, _ labels.Labels) time.Duration { return time.Hour } diff --git a/pkg/logql/engine.go b/pkg/logql/engine.go index 1edf86da3ed58..f830bd194ddf3 100644 --- a/pkg/logql/engine.go +++ b/pkg/logql/engine.go @@ -298,7 +298,7 @@ func (q *query) Eval(ctx context.Context) (promql_parser.Value, error) { } defer util.LogErrorWithContext(ctx, "closing iterator", itr.Close) - streams, err := readStreams(itr, q.params.Limit(), q.params.Direction(), q.params.Interval(), true) + streams, err := readStreams(itr, q.params.Limit(), q.params.Direction(), q.params.Interval()) return streams, err default: return nil, fmt.Errorf("unexpected type (%T): cannot evaluate", e) @@ -508,7 +508,7 @@ func PopulateMatrixFromScalar(data promql.Scalar, params Params) promql.Matrix { // If categorizeLabels is true, the stream labels contains just the stream labels and entries inside each stream have their // structuredMetadata and parsed fields populated with structured metadata labels plus the parsed labels respectively. // Otherwise, the stream labels are the whole series labels including the stream labels, structured metadata labels and parsed labels. -func readStreams(i iter.EntryIterator, size uint32, dir logproto.Direction, interval time.Duration, categorizeLabels bool) (logqlmodel.Streams, error) { +func readStreams(i iter.EntryIterator, size uint32, dir logproto.Direction, interval time.Duration) (logqlmodel.Streams, error) { streams := map[string]*logproto.Stream{} respSize := uint32(0) // lastEntry should be a really old time so that the first comparison is always true, we use a negative diff --git a/pkg/logql/evaluator.go b/pkg/logql/evaluator.go index 07a056e4ffe58..c132870b6c063 100644 --- a/pkg/logql/evaluator.go +++ b/pkg/logql/evaluator.go @@ -739,9 +739,9 @@ func matchingSignature(sample promql.Sample, opts *syntax.BinOpOptions) uint64 { return sample.Metric.Hash() } else if opts.VectorMatching.On { return labels.NewBuilder(sample.Metric).Keep(opts.VectorMatching.MatchingLabels...).Labels().Hash() - } else { - return labels.NewBuilder(sample.Metric).Del(opts.VectorMatching.MatchingLabels...).Labels().Hash() } + + return labels.NewBuilder(sample.Metric).Del(opts.VectorMatching.MatchingLabels...).Labels().Hash() } func vectorBinop(op string, opts *syntax.BinOpOptions, lhs, rhs promql.Vector, lsigs, rsigs []uint64) (promql.Vector, error) { diff --git a/pkg/loki/config_wrapper.go b/pkg/loki/config_wrapper.go index 796f7c8faab58..41a87775a9ecc 100644 --- a/pkg/loki/config_wrapper.go +++ b/pkg/loki/config_wrapper.go @@ -113,11 +113,11 @@ func (c *ConfigWrapper) ApplyDynamicConfig() cfg.Source { } if i := lastBoltdbShipperConfig(r.SchemaConfig.Configs); i != len(r.SchemaConfig.Configs) { - betterBoltdbShipperDefaults(r, &defaults, r.SchemaConfig.Configs[i]) + betterBoltdbShipperDefaults(r) } if i := lastTSDBConfig(r.SchemaConfig.Configs); i != len(r.SchemaConfig.Configs) { - betterTSDBShipperDefaults(r, &defaults, r.SchemaConfig.Configs[i]) + betterTSDBShipperDefaults(r) } applyEmbeddedCacheConfig(r) @@ -575,7 +575,7 @@ func applyStorageConfig(cfg, defaults *ConfigWrapper) error { return nil } -func betterBoltdbShipperDefaults(cfg, defaults *ConfigWrapper, period config.PeriodConfig) { +func betterBoltdbShipperDefaults(cfg *ConfigWrapper) { if cfg.Common.PathPrefix != "" { prefix := strings.TrimSuffix(cfg.Common.PathPrefix, "/") @@ -589,7 +589,7 @@ func betterBoltdbShipperDefaults(cfg, defaults *ConfigWrapper, period config.Per } } -func betterTSDBShipperDefaults(cfg, defaults *ConfigWrapper, period config.PeriodConfig) { +func betterTSDBShipperDefaults(cfg *ConfigWrapper) { if cfg.Common.PathPrefix != "" { prefix := strings.TrimSuffix(cfg.Common.PathPrefix, "/") diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index 54a0a52275dcc..e84bdcbe68613 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -250,7 +250,7 @@ func (c *Config) Validate() error { if err := c.CompactorConfig.Validate(); err != nil { return errors.Wrap(err, "invalid compactor config") } - if err := c.ChunkStoreConfig.Validate(util_log.Logger); err != nil { + if err := c.ChunkStoreConfig.Validate(); err != nil { return errors.Wrap(err, "invalid chunk store config") } if err := c.QueryRange.Validate(); err != nil { diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go index b31fdecc1165b..0d6d90da91799 100644 --- a/pkg/loki/modules.go +++ b/pkg/loki/modules.go @@ -782,7 +782,7 @@ func (t *Loki) setupAsyncStore() error { } func (t *Loki) initIngesterQuerier() (_ services.Service, err error) { - t.ingesterQuerier, err = querier.NewIngesterQuerier(t.Cfg.IngesterClient, t.ring, t.Cfg.Querier.ExtraQueryDelay, t.Cfg.MetricsNamespace) + t.ingesterQuerier, err = querier.NewIngesterQuerier(t.Cfg.IngesterClient, t.ring, t.Cfg.Querier.ExtraQueryDelay) if err != nil { return nil, err } diff --git a/pkg/lokifrontend/frontend/v1/frontend_test.go b/pkg/lokifrontend/frontend/v1/frontend_test.go index 6cfc1964d9d4b..f715d3e8f5fd0 100644 --- a/pkg/lokifrontend/frontend/v1/frontend_test.go +++ b/pkg/lokifrontend/frontend/v1/frontend_test.go @@ -228,7 +228,7 @@ func TestFrontendMetricsCleanup(t *testing.T) { } } -func testFrontend(t *testing.T, config Config, handler queryrangebase.Handler, test func(addr string, frontend *Frontend), matchMaxConcurrency bool, reg prometheus.Registerer) { +func testFrontend(t *testing.T, config Config, handler queryrangebase.Handler, test func(addr string, frontend *Frontend), _ bool, reg prometheus.Registerer) { logger := log.NewNopLogger() var workerConfig querier_worker.Config diff --git a/pkg/querier/http.go b/pkg/querier/http.go index b6ba4750aec40..1ecde15626ecb 100644 --- a/pkg/querier/http.go +++ b/pkg/querier/http.go @@ -201,10 +201,10 @@ func (q *QuerierAPI) TailHandler(w http.ResponseWriter, r *http.Request) { break } else if tailer.stopped { return - } else { - level.Error(logger).Log("msg", "Unexpected error from client", "err", err) - break } + + level.Error(logger).Log("msg", "Unexpected error from client", "err", err) + break } } doneChan <- struct{}{} diff --git a/pkg/querier/ingester_querier.go b/pkg/querier/ingester_querier.go index 4bdd21cb24f26..1312cf7168ea6 100644 --- a/pkg/querier/ingester_querier.go +++ b/pkg/querier/ingester_querier.go @@ -41,17 +41,17 @@ type IngesterQuerier struct { extraQueryDelay time.Duration } -func NewIngesterQuerier(clientCfg client.Config, ring ring.ReadRing, extraQueryDelay time.Duration, metricsNamespace string) (*IngesterQuerier, error) { +func NewIngesterQuerier(clientCfg client.Config, ring ring.ReadRing, extraQueryDelay time.Duration) (*IngesterQuerier, error) { factory := func(addr string) (ring_client.PoolClient, error) { return client.New(clientCfg, addr) } - return newIngesterQuerier(clientCfg, ring, extraQueryDelay, ring_client.PoolAddrFunc(factory), metricsNamespace) + return newIngesterQuerier(clientCfg, ring, extraQueryDelay, ring_client.PoolAddrFunc(factory)) } // newIngesterQuerier creates a new IngesterQuerier and allows to pass a custom ingester client factory // used for testing purposes -func newIngesterQuerier(clientCfg client.Config, ring ring.ReadRing, extraQueryDelay time.Duration, clientFactory ring_client.PoolFactory, metricsNamespace string) (*IngesterQuerier, error) { +func newIngesterQuerier(clientCfg client.Config, ring ring.ReadRing, extraQueryDelay time.Duration, clientFactory ring_client.PoolFactory) (*IngesterQuerier, error) { iq := IngesterQuerier{ ring: ring, pool: clientpool.NewPool("ingester", clientCfg.PoolConfig, ring, clientFactory, util_log.Logger), diff --git a/pkg/querier/ingester_querier_test.go b/pkg/querier/ingester_querier_test.go index a5cfd9a54dd8d..a3eb6d3cc3b0b 100644 --- a/pkg/querier/ingester_querier_test.go +++ b/pkg/querier/ingester_querier_test.go @@ -19,7 +19,6 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql" - "github.com/grafana/loki/pkg/util/constants" ) func TestIngesterQuerier_earlyExitOnQuorum(t *testing.T) { @@ -105,7 +104,6 @@ func TestIngesterQuerier_earlyExitOnQuorum(t *testing.T) { newReadRingMock(ringIngesters, 1), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), - constants.Loki, ) require.NoError(t, err) @@ -205,7 +203,6 @@ func TestIngesterQuerier_earlyExitOnQuorum(t *testing.T) { newReadRingMock(ringIngesters, 1), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), - constants.Loki, ) require.NoError(t, err) @@ -303,7 +300,6 @@ func TestQuerier_tailDisconnectedIngesters(t *testing.T) { newReadRingMock(testData.ringIngesters, 0), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), - constants.Loki, ) require.NoError(t, err) @@ -366,7 +362,6 @@ func TestIngesterQuerier_Volume(t *testing.T) { newReadRingMock([]ring.InstanceDesc{mockInstanceDesc("1.1.1.1", ring.ACTIVE), mockInstanceDesc("3.3.3.3", ring.ACTIVE)}, 0), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), - constants.Loki, ) require.NoError(t, err) @@ -387,7 +382,6 @@ func TestIngesterQuerier_Volume(t *testing.T) { newReadRingMock([]ring.InstanceDesc{mockInstanceDesc("1.1.1.1", ring.ACTIVE), mockInstanceDesc("3.3.3.3", ring.ACTIVE)}, 0), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), - constants.Loki, ) require.NoError(t, err) diff --git a/pkg/querier/querier_test.go b/pkg/querier/querier_test.go index fd21ee47d063b..4bbaa012bc12b 100644 --- a/pkg/querier/querier_test.go +++ b/pkg/querier/querier_test.go @@ -24,7 +24,6 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql" "github.com/grafana/loki/pkg/storage" - "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -1287,7 +1286,7 @@ func TestQuerier_SelectSamplesWithDeletes(t *testing.T) { } func newQuerier(cfg Config, clientCfg client.Config, clientFactory ring_client.PoolFactory, ring ring.ReadRing, dg *mockDeleteGettter, store storage.Store, limits *validation.Overrides) (*SingleTenantQuerier, error) { - iq, err := newIngesterQuerier(clientCfg, ring, cfg.ExtraQueryDelay, clientFactory, constants.Loki) + iq, err := newIngesterQuerier(clientCfg, ring, cfg.ExtraQueryDelay, clientFactory) if err != nil { return nil, err } diff --git a/pkg/querier/queryrange/codec.go b/pkg/querier/queryrange/codec.go index 7a604780219cc..2f37aaecf7530 100644 --- a/pkg/querier/queryrange/codec.go +++ b/pkg/querier/queryrange/codec.go @@ -706,6 +706,7 @@ func (c Codec) EncodeRequest(ctx context.Context, r queryrangebase.Request) (*ht } } +// nolint:goconst func (c Codec) Path(r queryrangebase.Request) string { switch request := r.(type) { case *LokiRequest: diff --git a/pkg/querier/queryrange/querysharding.go b/pkg/querier/queryrange/querysharding.go index 143174439c159..26ec924ce5c4f 100644 --- a/pkg/querier/queryrange/querysharding.go +++ b/pkg/querier/queryrange/querysharding.go @@ -333,9 +333,9 @@ func (confs ShardingConfigs) ValidRange(start, end int64) (config.PeriodConfig, } else if end < int64(confs[i+1].From.Time) { // The request is entirely scoped into this shard config return conf, nil - } else { - continue } + + continue } return config.PeriodConfig{}, errInvalidShardingRange diff --git a/pkg/querier/queryrange/roundtrip.go b/pkg/querier/queryrange/roundtrip.go index 5442fcee42b9c..ff61d6b671fbd 100644 --- a/pkg/querier/queryrange/roundtrip.go +++ b/pkg/querier/queryrange/roundtrip.go @@ -189,7 +189,7 @@ func NewMiddleware( return nil, nil, err } - instantMetricTripperware, err := NewInstantMetricTripperware(cfg, engineOpts, log, limits, schema, codec, metrics, indexStatsTripperware, metricsNamespace) + instantMetricTripperware, err := NewInstantMetricTripperware(cfg, engineOpts, log, limits, schema, metrics, indexStatsTripperware, metricsNamespace) if err != nil { return nil, nil, err } @@ -704,7 +704,6 @@ func NewInstantMetricTripperware( log log.Logger, limits Limits, schema config.SchemaConfig, - merger base.Merger, metrics *Metrics, indexStatsTripperware base.Middleware, metricsNamespace string, diff --git a/pkg/querier/worker_service_test.go b/pkg/querier/worker_service_test.go index a1fd9c0db34cf..ac18c83350ae3 100644 --- a/pkg/querier/worker_service_test.go +++ b/pkg/querier/worker_service_test.go @@ -16,7 +16,7 @@ import ( querier_worker "github.com/grafana/loki/pkg/querier/worker" */) -func Test_InitQuerierService(t *testing.T) { +func Test_InitQuerierService(_ *testing.T) { // TODO: use in modules test /* diff --git a/pkg/storage/bloom/v1/bloom_tokenizer.go b/pkg/storage/bloom/v1/bloom_tokenizer.go index e2659180b4eac..99b804851148d 100644 --- a/pkg/storage/bloom/v1/bloom_tokenizer.go +++ b/pkg/storage/bloom/v1/bloom_tokenizer.go @@ -58,7 +58,7 @@ func (bt *BloomTokenizer) SetLineTokenizer(t Tokenizer) { } // TODO: Something real here with metrics -func newMetrics(r prometheus.Registerer) *metrics { +func newMetrics(_ prometheus.Registerer) *metrics { return &metrics{} } diff --git a/pkg/storage/bloom/v1/filter/buckets.go b/pkg/storage/bloom/v1/filter/buckets.go index 36dfeafc9a1ee..95c55f394e1fa 100644 --- a/pkg/storage/bloom/v1/filter/buckets.go +++ b/pkg/storage/bloom/v1/filter/buckets.go @@ -184,7 +184,7 @@ func (b *Buckets) ReadFrom(stream io.Reader) (int64, error) { return 0, err } - var len uint64 + var len uint64 // nolint:revive err = binary.Read(stream, binary.BigEndian, &len) if err != nil { return 0, err diff --git a/pkg/storage/bloom/v1/filter/partitioned.go b/pkg/storage/bloom/v1/filter/partitioned.go index 916275cff01cd..c3eb949840ed7 100644 --- a/pkg/storage/bloom/v1/filter/partitioned.go +++ b/pkg/storage/bloom/v1/filter/partitioned.go @@ -286,7 +286,7 @@ func (p *PartitionedBloomFilter) ReadFrom(stream io.Reader) (int64, error) { return 0, err } - var len uint64 + var len uint64 // nolint:revive err = binary.Read(stream, binary.BigEndian, &len) if err != nil { return 0, err diff --git a/pkg/storage/bloom/v1/filter/scalable.go b/pkg/storage/bloom/v1/filter/scalable.go index d50beecc7e78e..74db6748c7bc5 100644 --- a/pkg/storage/bloom/v1/filter/scalable.go +++ b/pkg/storage/bloom/v1/filter/scalable.go @@ -309,7 +309,7 @@ func (s *ScalableBloomFilter) ReadFrom(stream io.Reader) (int64, error) { return 0, err } - var len uint64 + var len uint64 // nolint:revive err = binary.Read(stream, binary.BigEndian, &len) if err != nil { return 0, err diff --git a/pkg/storage/bloom/v1/fuse_test.go b/pkg/storage/bloom/v1/fuse_test.go index 38554463e9214..e355804574eb3 100644 --- a/pkg/storage/bloom/v1/fuse_test.go +++ b/pkg/storage/bloom/v1/fuse_test.go @@ -178,6 +178,7 @@ func BenchmarkBlockQuerying(b *testing.B) { context.Background(), len(requestChains), len(requestChains), func(_ context.Context, idx int) error { + // nolint:revive for range requestChains[idx][0].response { } return nil diff --git a/pkg/storage/chunk/client/congestion/config.go b/pkg/storage/chunk/client/congestion/config.go index c08cbf984aa5c..47d86646ad25b 100644 --- a/pkg/storage/chunk/client/congestion/config.go +++ b/pkg/storage/chunk/client/congestion/config.go @@ -38,6 +38,7 @@ func (c *ControllerConfig) RegisterFlags(f *flag.FlagSet) { c.RegisterFlagsWithPrefix("", f) } +// nolint:goconst func (c *ControllerConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.StringVar(&c.Strategy, prefix+"strategy", "", "Congestion control strategy to use (default: none, options: 'aimd').") f.UintVar(&c.AIMD.Start, prefix+"strategy.aimd.start", 2000, "AIMD starting throughput window size: how many requests can be sent per second (default: 2000).") diff --git a/pkg/storage/config/store.go b/pkg/storage/config/store.go index 9fe276b47614c..75bdaa2ace8dc 100644 --- a/pkg/storage/config/store.go +++ b/pkg/storage/config/store.go @@ -4,7 +4,6 @@ import ( "flag" "time" - "github.com/go-kit/log" "github.com/prometheus/common/model" "github.com/grafana/loki/pkg/storage/chunk/cache" @@ -43,6 +42,6 @@ func (cfg *ChunkStoreConfig) RegisterFlags(f *flag.FlagSet) { f.Var(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", "Cache index entries older than this period. 0 to disable.") } -func (cfg *ChunkStoreConfig) Validate(logger log.Logger) error { +func (cfg *ChunkStoreConfig) Validate() error { return nil } diff --git a/pkg/storage/factory.go b/pkg/storage/factory.go index 8c469e03590c0..cd61e8054c621 100644 --- a/pkg/storage/factory.go +++ b/pkg/storage/factory.go @@ -420,7 +420,7 @@ func NewIndexClient(periodCfg config.PeriodConfig, tableRange config.TableRange, return indexGatewayClient, nil } - gateway, err := gatewayclient.NewGatewayClient(cfg.BoltDBShipperConfig.IndexGatewayClientConfig, registerer, limits, logger, metricsNamespace) + gateway, err := gatewayclient.NewGatewayClient(cfg.BoltDBShipperConfig.IndexGatewayClientConfig, registerer, limits, logger) if err != nil { return nil, err } diff --git a/pkg/storage/store.go b/pkg/storage/store.go index 0bd679a361984..0ebdc59e992c6 100644 --- a/pkg/storage/store.go +++ b/pkg/storage/store.go @@ -266,7 +266,7 @@ func (s *LokiStore) storeForPeriod(p config.PeriodConfig, tableRange config.Tabl if p.IndexType == config.TSDBType { if shouldUseIndexGatewayClient(s.cfg.TSDBShipperConfig.Config) { // inject the index-gateway client into the index store - gw, err := gatewayclient.NewGatewayClient(s.cfg.TSDBShipperConfig.IndexGatewayClientConfig, indexClientReg, s.limits, indexClientLogger, s.metricsNamespace) + gw, err := gatewayclient.NewGatewayClient(s.cfg.TSDBShipperConfig.IndexGatewayClientConfig, indexClientReg, s.limits, indexClientLogger) if err != nil { return nil, nil, nil, err } diff --git a/pkg/storage/stores/shipper/bloomshipper/store_test.go b/pkg/storage/stores/shipper/bloomshipper/store_test.go index 8048102c156ba..ec48f7caa0405 100644 --- a/pkg/storage/stores/shipper/bloomshipper/store_test.go +++ b/pkg/storage/stores/shipper/bloomshipper/store_test.go @@ -4,8 +4,8 @@ import ( "testing" ) -func TestBloomShipper(t *testing.T) { +func TestBloomShipper(_ *testing.T) { } -func TestBloomStore(t *testing.T) { +func TestBloomStore(_ *testing.T) { } diff --git a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go index cc0eb791ba780..c6bc474e322e9 100644 --- a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go +++ b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go @@ -109,7 +109,7 @@ type GatewayClient struct { // // If it is configured to be in ring mode, a pool of GRPC connections to all Index Gateway instances is created using a ring. // Otherwise, it creates a GRPC connection pool to as many addresses as can be resolved from the given address. -func NewGatewayClient(cfg IndexGatewayClientConfig, r prometheus.Registerer, limits indexgateway.Limits, logger log.Logger, metricsNamespace string) (*GatewayClient, error) { +func NewGatewayClient(cfg IndexGatewayClientConfig, r prometheus.Registerer, limits indexgateway.Limits, logger log.Logger) (*GatewayClient, error) { latency := prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: constants.Loki, Name: "index_gateway_request_duration_seconds", diff --git a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client_test.go b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client_test.go index 0ec6e81c17754..bb96a68e24bb4 100644 --- a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client_test.go +++ b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client_test.go @@ -24,7 +24,6 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/storage/stores/series/index" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/indexgateway" - "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -192,7 +191,7 @@ func TestGatewayClient_RingMode(t *testing.T) { cfg.Mode = indexgateway.RingMode cfg.Ring = igwRing - c, err := NewGatewayClient(cfg, nil, o, logger, constants.Loki) + c, err := NewGatewayClient(cfg, nil, o, logger) require.NoError(t, err) require.NotNil(t, c) @@ -223,7 +222,7 @@ func TestGatewayClient_RingMode(t *testing.T) { cfg.Mode = indexgateway.RingMode cfg.Ring = igwRing - c, err := NewGatewayClient(cfg, nil, o, logger, constants.Loki) + c, err := NewGatewayClient(cfg, nil, o, logger) require.NoError(t, err) require.NotNil(t, c) @@ -254,7 +253,7 @@ func TestGatewayClient(t *testing.T) { cfg.PoolConfig = clientpool.PoolConfig{ClientCleanupPeriod: 500 * time.Millisecond} overrides, _ := validation.NewOverrides(validation.Limits{}, nil) - gatewayClient, err := NewGatewayClient(cfg, prometheus.DefaultRegisterer, overrides, logger, constants.Loki) + gatewayClient, err := NewGatewayClient(cfg, prometheus.DefaultRegisterer, overrides, logger) require.NoError(t, err) ctx := user.InjectOrgID(context.Background(), "fake") @@ -441,11 +440,11 @@ func TestDoubleRegistration(t *testing.T) { Address: "my-store-address:1234", } - client, err := NewGatewayClient(clientCfg, r, o, logger, constants.Loki) + client, err := NewGatewayClient(clientCfg, r, o, logger) require.NoError(t, err) defer client.Stop() - client, err = NewGatewayClient(clientCfg, r, o, logger, constants.Loki) + client, err = NewGatewayClient(clientCfg, r, o, logger) require.NoError(t, err) defer client.Stop() } diff --git a/tools/deprecated-config-checker/main.go b/tools/deprecated-config-checker/main.go index fc2f5cfda4f25..6ce387c1eec88 100644 --- a/tools/deprecated-config-checker/main.go +++ b/tools/deprecated-config-checker/main.go @@ -6,6 +6,7 @@ import ( "os" "github.com/fatih/color" + "github.com/grafana/loki/tools/deprecated-config-checker/checker" ) diff --git a/tools/doc-generator/writer.go b/tools/doc-generator/writer.go index b73877fc45f86..a13613c7fbffd 100644 --- a/tools/doc-generator/writer.go +++ b/tools/doc-generator/writer.go @@ -37,6 +37,7 @@ func (w *specWriter) writeConfigBlock(b *parse.ConfigBlock, indent int) { } } +// nolint:goconst func (w *specWriter) writeConfigEntry(e *parse.ConfigEntry, indent int) { if e.Kind == parse.KindBlock { // If the block is a root block it will have its dedicated section in the doc, diff --git a/tools/tsdb/bloom-tester/concurrent.go b/tools/tsdb/bloom-tester/concurrent.go index 9f7fd8c1a0bbf..c42d403809aea 100644 --- a/tools/tsdb/bloom-tester/concurrent.go +++ b/tools/tsdb/bloom-tester/concurrent.go @@ -1,9 +1,10 @@ package main import ( - "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb/index" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" + + "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb/index" ) type pool struct { diff --git a/tools/tsdb/bloom-tester/lib.go b/tools/tsdb/bloom-tester/lib.go index 18ceb14b6b611..433e8d6badb8b 100644 --- a/tools/tsdb/bloom-tester/lib.go +++ b/tools/tsdb/bloom-tester/lib.go @@ -507,7 +507,7 @@ func writeSBFToFile(sbf *filter.ScalableBloomFilter, filename string) error { return err } -func writeSBFToObjectStorage(sbf *filter.ScalableBloomFilter, objectStorageFilename, localFilename string, objectClient client.ObjectClient) { +func writeSBFToObjectStorage(_ *filter.ScalableBloomFilter, objectStorageFilename, localFilename string, objectClient client.ObjectClient) { // Probably a better way to do this than to reopen the file, but it's late file, err := os.Open(localFilename) if err != nil { diff --git a/tools/tsdb/bloom-tester/lrucache_test.go b/tools/tsdb/bloom-tester/lrucache_test.go index c1125af01132c..dec5f85688664 100644 --- a/tools/tsdb/bloom-tester/lrucache_test.go +++ b/tools/tsdb/bloom-tester/lrucache_test.go @@ -2,9 +2,10 @@ package main import ( "encoding/binary" - "github.com/stretchr/testify/require" "strconv" "testing" + + "github.com/stretchr/testify/require" ) var num = 1000000 diff --git a/tools/tsdb/bloom-tester/main.go b/tools/tsdb/bloom-tester/main.go index 410babf42d364..916796b917042 100644 --- a/tools/tsdb/bloom-tester/main.go +++ b/tools/tsdb/bloom-tester/main.go @@ -2,10 +2,12 @@ package main import ( "fmt" - "github.com/go-kit/log/level" - util_log "github.com/grafana/loki/pkg/util/log" "os" "strings" + + "github.com/go-kit/log/level" + + util_log "github.com/grafana/loki/pkg/util/log" ) // go build ./tools/tsdb/bloom-tester && HOSTNAME="bloom-tester-121" NUM_TESTERS="128" BUCKET="19625" DIR=/Users/progers/dev/bloom WRITE_MODE="false" BUCKET_PREFIX="new-experiments" ./tools/tsdb/bloom-tester/bloom-tester --config.file=/Users/progers/dev/bloom/config.yaml diff --git a/tools/tsdb/bloom-tester/metrics.go b/tools/tsdb/bloom-tester/metrics.go index c330d7edb8d23..193f829063db8 100644 --- a/tools/tsdb/bloom-tester/metrics.go +++ b/tools/tsdb/bloom-tester/metrics.go @@ -1,10 +1,11 @@ package main import ( - bt "github.com/grafana/loki/pkg/storage/bloom/v1" - "github.com/grafana/loki/pkg/storage/bloom/v1/filter" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + bt "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/bloom/v1/filter" ) type Experiment struct { @@ -36,7 +37,7 @@ func NewQueryExperiment(name string, searchString string) QueryExperiment { const ExperimentLabel = "experiment" const QueryExperimentLabel = "query_experiment" const LookupResultType = "lookup_result_type" -const FalsePositive = "false_postive" +const FalsePositive = "false_positive" const FalseNegative = "false_negative" const TruePositive = "true_positive" const TrueNegative = "true_negative" diff --git a/tools/tsdb/bloom-tester/readlib.go b/tools/tsdb/bloom-tester/readlib.go index 8542b95401a9d..77e0e74167ea2 100644 --- a/tools/tsdb/bloom-tester/readlib.go +++ b/tools/tsdb/bloom-tester/readlib.go @@ -6,6 +6,7 @@ import ( "fmt" "github.com/grafana/dskit/services" + "github.com/grafana/loki/pkg/chunkenc" "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql/log" diff --git a/tools/tsdb/bloom-tester/readlib_test.go b/tools/tsdb/bloom-tester/readlib_test.go index c37af7c22f409..ad7b7f0b732a0 100644 --- a/tools/tsdb/bloom-tester/readlib_test.go +++ b/tools/tsdb/bloom-tester/readlib_test.go @@ -1,8 +1,9 @@ package main import ( - "github.com/stretchr/testify/require" "testing" + + "github.com/stretchr/testify/require" ) func TestSearchSbf(t *testing.T) { From 1c56aa91eff912a8b4db9faaaebdd18824ba1d63 Mon Sep 17 00:00:00 2001 From: Karsten Jeschkies Date: Thu, 2 Nov 2023 11:53:54 +0100 Subject: [PATCH 02/14] Check switches on syntax.Expr for exhaustiveness. (#11113) **What this PR does / why we need it**: In preparation to serlialize the LogQL AST as JSON we are declaring the `syntax.Expr` as a "sum type" and test all switch statements on it to be exhaustive. **Special notes for your reviewer**: **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15) --- .golangci.yml | 1 + pkg/logql/engine.go | 2 +- pkg/logql/evaluator.go | 2 +- pkg/logql/optimize.go | 4 ++-- pkg/logql/rangemapper.go | 8 ++++---- pkg/logql/syntax/ast.go | 6 +++++- pkg/logql/syntax/walk.go | 2 +- pkg/logql/syntax/walk_test.go | 4 ++-- pkg/querier/multi_tenant_querier.go | 2 +- pkg/querier/queryrange/split_by_interval.go | 2 +- 10 files changed, 19 insertions(+), 14 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 8f8d3b483e86f..0653fbf6b5dfa 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -79,6 +79,7 @@ linters: - goimports - gosimple - staticcheck + - gochecksumtype disable: - unused - unparam diff --git a/pkg/logql/engine.go b/pkg/logql/engine.go index f830bd194ddf3..e180dbf054b2d 100644 --- a/pkg/logql/engine.go +++ b/pkg/logql/engine.go @@ -430,7 +430,7 @@ func (q *query) evalSample(ctx context.Context, expr syntax.SampleExpr) (promql_ func (q *query) checkIntervalLimit(expr syntax.SampleExpr, limit time.Duration) error { var err error - expr.Walk(func(e interface{}) { + expr.Walk(func(e syntax.Expr) { switch e := e.(type) { case *syntax.RangeAggregationExpr: if e.Left == nil || e.Left.Interval <= limit { diff --git a/pkg/logql/evaluator.go b/pkg/logql/evaluator.go index c132870b6c063..0c0dba2cad3d5 100644 --- a/pkg/logql/evaluator.go +++ b/pkg/logql/evaluator.go @@ -112,7 +112,7 @@ func Sortable(q Params) (bool, error) { if err != nil { return false, err } - expr.Walk(func(e interface{}) { + expr.Walk(func(e syntax.Expr) { rangeExpr, ok := e.(*syntax.VectorAggregationExpr) if !ok { return diff --git a/pkg/logql/optimize.go b/pkg/logql/optimize.go index de15bce40e200..1f00153e18b87 100644 --- a/pkg/logql/optimize.go +++ b/pkg/logql/optimize.go @@ -6,7 +6,7 @@ import "github.com/grafana/loki/pkg/logql/syntax" func optimizeSampleExpr(expr syntax.SampleExpr) (syntax.SampleExpr, error) { var skip bool // we skip sharding AST for now, it's not easy to clone them since they are not part of the language. - expr.Walk(func(e interface{}) { + expr.Walk(func(e syntax.Expr) { switch e.(type) { case *ConcatSampleExpr, *DownstreamSampleExpr: skip = true @@ -28,7 +28,7 @@ func optimizeSampleExpr(expr syntax.SampleExpr) (syntax.SampleExpr, error) { // removeLineformat removes unnecessary line_format within a SampleExpr. func removeLineformat(expr syntax.SampleExpr) { - expr.Walk(func(e interface{}) { + expr.Walk(func(e syntax.Expr) { rangeExpr, ok := e.(*syntax.RangeAggregationExpr) if !ok { return diff --git a/pkg/logql/rangemapper.go b/pkg/logql/rangemapper.go index 4cb3965ee4910..cc63944bc07e9 100644 --- a/pkg/logql/rangemapper.go +++ b/pkg/logql/rangemapper.go @@ -177,7 +177,7 @@ func (m RangeMapper) Map(expr syntax.SampleExpr, vectorAggrPushdown *syntax.Vect // Example: expression `count_over_time({app="foo"}[10m])` returns 10m func getRangeInterval(expr syntax.SampleExpr) time.Duration { var rangeInterval time.Duration - expr.Walk(func(e interface{}) { + expr.Walk(func(e syntax.Expr) { switch concrete := e.(type) { case *syntax.RangeAggregationExpr: rangeInterval = concrete.Left.Interval @@ -190,7 +190,7 @@ func getRangeInterval(expr syntax.SampleExpr) time.Duration { // such as `| json` or `| logfmt`, that would result in an exploding amount of series in downstream queries. func hasLabelExtractionStage(expr syntax.SampleExpr) bool { found := false - expr.Walk(func(e interface{}) { + expr.Walk(func(e syntax.Expr) { switch concrete := e.(type) { case *syntax.LogfmtParserExpr: found = true @@ -278,7 +278,7 @@ func (m RangeMapper) vectorAggrWithRangeDownstreams(expr *syntax.RangeAggregatio // Returns the updated downstream ConcatSampleExpr. func appendDownstream(downstreams *ConcatSampleExpr, expr syntax.SampleExpr, interval time.Duration, offset time.Duration) *ConcatSampleExpr { sampleExpr := clone(expr) - sampleExpr.Walk(func(e interface{}) { + sampleExpr.Walk(func(e syntax.Expr) { switch concrete := e.(type) { case *syntax.RangeAggregationExpr: concrete.Left.Interval = interval @@ -300,7 +300,7 @@ func getOffsets(expr syntax.SampleExpr) []time.Duration { // Expect to always find at most 1 offset, so preallocate it accordingly offsets := make([]time.Duration, 0, 1) - expr.Walk(func(e interface{}) { + expr.Walk(func(e syntax.Expr) { switch concrete := e.(type) { case *syntax.RangeAggregationExpr: offsets = append(offsets, concrete.Left.Offset) diff --git a/pkg/logql/syntax/ast.go b/pkg/logql/syntax/ast.go index aa4aa7fa80617..4e251022860e5 100644 --- a/pkg/logql/syntax/ast.go +++ b/pkg/logql/syntax/ast.go @@ -22,6 +22,8 @@ import ( ) // Expr is the root expression which can be a SampleExpr or LogSelectorExpr +// +//sumtype:decl type Expr interface { logQLExpr() // ensure it's not implemented accidentally Shardable() bool // A recursive check on the AST to see if it's shardable. @@ -2106,7 +2108,7 @@ func (e *VectorExpr) MatcherGroups() ([]MatcherRange, error) { return nil, e.er func (e *VectorExpr) Extractor() (log.SampleExtractor, error) { return nil, nil } func ReducesLabels(e Expr) (conflict bool) { - e.Walk(func(e interface{}) { + e.Walk(func(e Expr) { switch expr := e.(type) { case *RangeAggregationExpr: if groupingReducesLabels(expr.Grouping) { @@ -2135,6 +2137,8 @@ func ReducesLabels(e Expr) (conflict bool) { break } } + default: + return } }) return diff --git a/pkg/logql/syntax/walk.go b/pkg/logql/syntax/walk.go index 3a8b85d92d0b2..291ec8b31036f 100644 --- a/pkg/logql/syntax/walk.go +++ b/pkg/logql/syntax/walk.go @@ -1,6 +1,6 @@ package syntax -type WalkFn = func(e interface{}) +type WalkFn = func(e Expr) func walkAll(f WalkFn, xs ...Walkable) { for _, x := range xs { diff --git a/pkg/logql/syntax/walk_test.go b/pkg/logql/syntax/walk_test.go index 678e89df99c48..3350515b9c461 100644 --- a/pkg/logql/syntax/walk_test.go +++ b/pkg/logql/syntax/walk_test.go @@ -32,7 +32,7 @@ func Test_Walkable(t *testing.T) { require.Nil(t, err) var cnt int - expr.Walk(func(_ interface{}) { cnt++ }) + expr.Walk(func(_ Expr) { cnt++ }) require.Equal(t, test.want, cnt) }) } @@ -77,7 +77,7 @@ func Test_AppendMatchers(t *testing.T) { expr, err := ParseExpr(test.expr) require.NoError(t, err) - expr.Walk(func(e interface{}) { + expr.Walk(func(e Expr) { switch me := e.(type) { case *MatchersExpr: me.AppendMatchers(test.matchers) diff --git a/pkg/querier/multi_tenant_querier.go b/pkg/querier/multi_tenant_querier.go index c9b1b56b8b284..f4881df48a6d7 100644 --- a/pkg/querier/multi_tenant_querier.go +++ b/pkg/querier/multi_tenant_querier.go @@ -227,7 +227,7 @@ func removeTenantSelector(params logql.SelectSampleParams, tenantIDs []string) ( // replaceMatchers traverses the passed expression and replaces all matchers. func replaceMatchers(expr syntax.Expr, matchers []*labels.Matcher) syntax.Expr { expr, _ = syntax.Clone(expr) - expr.Walk(func(e interface{}) { + expr.Walk(func(e syntax.Expr) { switch concrete := e.(type) { case *syntax.MatchersExpr: concrete.Mts = matchers diff --git a/pkg/querier/queryrange/split_by_interval.go b/pkg/querier/queryrange/split_by_interval.go index f3f2c13d60042..da8326a678ec5 100644 --- a/pkg/querier/queryrange/split_by_interval.go +++ b/pkg/querier/queryrange/split_by_interval.go @@ -322,7 +322,7 @@ func maxRangeVectorAndOffsetDuration(q string) (time.Duration, time.Duration, er } var maxRVDuration, maxOffset time.Duration - expr.Walk(func(e interface{}) { + expr.Walk(func(e syntax.Expr) { if r, ok := e.(*syntax.LogRange); ok { if r.Interval > maxRVDuration { maxRVDuration = r.Interval From a59616119a89f30eefc928074096b221de5bce19 Mon Sep 17 00:00:00 2001 From: Ashwanth Date: Thu, 2 Nov 2023 16:59:50 +0530 Subject: [PATCH 03/14] docs: add migration guide for tsdb (#10978) **What this PR does / why we need it**: Adds a migration guide for moving from any of the older indexes to TSDB **Special notes for your reviewer**: **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [x] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) --- docs/sources/setup/migrate/_index.md | 1 + .../setup/migrate/migrate-to-tsdb/_index.md | 74 +++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 docs/sources/setup/migrate/migrate-to-tsdb/_index.md diff --git a/docs/sources/setup/migrate/_index.md b/docs/sources/setup/migrate/_index.md index f8158b199f046..034791b38040d 100644 --- a/docs/sources/setup/migrate/_index.md +++ b/docs/sources/setup/migrate/_index.md @@ -9,5 +9,6 @@ weight: 300 This section contains instructions for migrating from one Loki implementation to another. +- [Migrate]({{< relref "./migrate-to-tsdb" >}}) to TSDB index. - [Migrate]({{< relref "./migrate-from-distributed" >}}) from the `Loki-distributed` Helm chart to the `loki` Helm chart. - [Migrate]({{< relref "./migrate-to-three-scalable-targets" >}}) from the two target Helm chart to the three target scalable configuration Helm chart. diff --git a/docs/sources/setup/migrate/migrate-to-tsdb/_index.md b/docs/sources/setup/migrate/migrate-to-tsdb/_index.md new file mode 100644 index 0000000000000..59458e700af57 --- /dev/null +++ b/docs/sources/setup/migrate/migrate-to-tsdb/_index.md @@ -0,0 +1,74 @@ +--- +title: Migrate to TSDB +menuTitle: Migrate to TSDB +description: Migration guide for moving from any of the older indexes to TSDB +weight: 100 +keywords: + - migrate + - tsdb +--- + +# Migrate to TSDB + +[TSDB]({{< relref "../../../operations/storage/tsdb" >}}) is the recommeneded index type for Loki and is where the current development lies. +If you are running Loki with [boltb-shipper]({{< relref "../../../operations/storage/boltdb-shipper" >}}) or any of the [legacy index types]({{< relref "../../../storage#index-storage" >}}) that have been deprecated, +we strongly recommend migrating to TSDB. + + +### Configure TSDB index for an upcoming period + +To begin the migration, add a new [period_config]({{< relref "../../../configure#period_config" >}}) entry in your [schema_config]({{< relref "../../../configure#schema_config" >}}). +You can read more about schema config [here]({{< relref "../../../storage#schema-config" >}}). + +{{% admonition type="note" %}} +You must roll out the new `period_config` change to all Loki components in order for it to take effect. +{{% /admonition %}} + +This example adds a new `period_config` which configures Loki to start using the TSDB index for the data ingested starting from `2023-10-20`. + +``` +schema_config: + configs: + - from: 2023-01-01 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + - from: 2023-10-20 ① + store: tsdb ② + object_store: filesystem ③ + schema: v12 ④ + index: + prefix: index_ + period: 24h +``` + +① You must set the new period `from` to a date in the future. + +② Update the new period to use TSDB as the index type by setting `store: tsdb`. + +③ This sample configuration uses filesystem as the storage in both the periods. If you want to use a different storage for the TSDB index and chunks, you can specify a different `object_store` in the new period. + +④ Update the schema to v12 which is the recommended version at the time of writing. Please refer to the [configure page]({{< relref "../../../configure#period_config" >}}) for the current recommend version. + +### Configure TSDB shipper + +It's also important that you configure the `tsdb_shipper` block in [storage_config]({{< relref "../../../configure#storage_config" >}}). Specifically the following options: +- `active_index_directory`: directory where ingesters would write index files which will then be uploaded by shipper to configured storage. +- `cache_location`: cache location for downloading index files from the storage for use in query path. + +``` +storage_config: + tsdb_shipper: + active_index_directory: /data/tsdb-index + cache_location: /data/tsdb-cache +``` + +### Run compactor + +We strongly recommended running the [compactor]({{< relref "../../../operations/storage/retention#compactor" >}}) when using TSDB index. It is responsible for running compaction and retention on TSDB index. +Not running index compaction will result in sub-optimal query performance. + +Please refer to the [compactor section]({{< relref "../../../operations/storage/retention#compactor" >}}) for more information and configuration examples. From e7ecb41331c9233fc416211a4a0ba43d70fb80a2 Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Thu, 2 Nov 2023 17:21:08 +0530 Subject: [PATCH 04/14] add documentation on how to use otel collector for ingesting logs to loki (#11026) **What this PR does / why we need it**: In PR #10727, we added support for ingesting logs to loki in OTLP format. This PR adds the documentation on how to configure otel collector and how the data is mapped from OTLP format to Loki format. **Checklist** - [x] Documentation added --------- Co-authored-by: J Stickler --- docs/sources/send-data/_index.md | 5 ++ docs/sources/send-data/otel/_index.md | 118 ++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 docs/sources/send-data/otel/_index.md diff --git a/docs/sources/send-data/_index.md b/docs/sources/send-data/_index.md index 4db28dab432f3..981d98fe1e12c 100644 --- a/docs/sources/send-data/_index.md +++ b/docs/sources/send-data/_index.md @@ -22,6 +22,11 @@ Promtail is also the client of choice on bare-metal since it can be configured t Lastly, Promtail works well if you want to extract metrics from logs such as counting the occurrences of a particular message. - [xk6-loki extension](https://github.com/grafana/xk6-loki) - The k6-loki extension lets you perform [load testing on Loki]({{< relref "./k6" >}}). +## OpenTelemetry Collector + +Loki natively supports ingesting OpenTelemetry logs over HTTP. +See [Ingesting logs to Loki using OpenTelemetry Collector]({{< relref "./otel" >}}) for more details. + ## Third-party clients The following clients have been developed by the Loki community or other third-parties and can be used to send log data to Loki. diff --git a/docs/sources/send-data/otel/_index.md b/docs/sources/send-data/otel/_index.md new file mode 100644 index 0000000000000..8b6729850fb2a --- /dev/null +++ b/docs/sources/send-data/otel/_index.md @@ -0,0 +1,118 @@ +--- +title: Ingesting logs to Loki using OpenTelemetry Collector +menuTitle: Ingesting OpenTelemetry logs to Loki +description: Ingesting logs to Loki using OpenTelemetry Collector +aliases: +- ../clients/k6/ +weight: 250 +--- + +# Ingesting logs to Loki using OpenTelemetry Collector + +{{% admonition type="warning" %}} +OpenTelemetry logs ingestion is an experimental feature and is subject to change in future releases of Grafana Loki. +{{% /admonition %}} + +Loki natively supports ingesting OpenTelemetry logs over HTTP. +For ingesting logs to Loki using the OpenTelemetry Collector, you must use the [`otlphttp` exporter](https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlphttpexporter). + +## Loki configuration + +When logs are ingested by Loki using an OpenTelemetry protocol (OTLP) ingestion endpoint, some of the data is stored as [Structured Metadata]({{< relref "../../get-started/labels/structured-metadata" >}}). +Since Structured Metadata is still an experimental feature, Loki by default rejects any writes using that feature. +To start ingesting logs in OpenTelemetry format, you need to enable `allow_structured_metadata` per tenant configuration (in the `limits_config`). + +## Configure the OpenTelemetry Collector to write logs into Loki + +You need to make the following changes to the [OpenTelemetry Collector config](https://opentelemetry.io/docs/collector/configuration/) to write logs to Loki on its OTLP ingestion endpoint. + +```yaml +exporters: + otlphttp: + endpoint: http:///otlp +``` + +And enable it in `service.pipelines`: + +```yaml +service: + pipelines: + metrics: + receivers: [...] + processors: [...] + exporters: [..., otlphttp] +``` + +If you want to authenticate using basic auth, we recommend the [`basicauth` extension](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/basicauthextension). + +```yaml +extensions: + basicauth/otlp: + client_auth: + username: username + password: password + +exporters: + otlphttp: + auth: + authenticator: basicauth/otlp + endpoint: http:///otlp + +service: + extensions: [basicauth/otlp] + pipelines: + metrics: + receivers: [...] + processors: [...] + exporters: [..., otlphttp] +``` + +## Format considerations + +Since the OpenTelemetry protocol differs from the Loki storage model, here is how data in the OpenTelemetry format will be mapped to the Loki data model during ingestion: + +- Index labels: Resource attributes map well to index labels in Loki, since both usually identify the source of the logs. Because Loki has a limit of 30 index labels, we have selected the following resource attributes to be stored as index labels, while the remaining attributes are stored as [Structured Metadata]({{< relref "../../get-started/labels/structured-metadata" >}}) with each log entry: + - cloud.availability_zone + - cloud.region + - container.name + - deployment.environment + - k8s.cluster.name + - k8s.container.name + - k8s.cronjob.name + - k8s.daemonset.name + - k8s.deployment.name + - k8s.job.name + - k8s.namespace.name + - k8s.pod.name + - k8s.replicaset.name + - k8s.statefulset.name + - service.instance.id + - service.name + - service.namespace + +- Timestamp: One of `LogRecord.TimeUnixNano` or `LogRecord.ObservedTimestamp`, based on which one is set. If both are not set, the ingestion timestamp will be used. + +- LogLine: `LogRecord.Body` holds the body of the log. However, since Loki only supports Log body in string format, we will stringify non-string values using the [AsString method from the OTEL collector lib](https://github.com/open-telemetry/opentelemetry-collector/blob/ab3d6c5b64701e690aaa340b0a63f443ff22c1f0/pdata/pcommon/value.go#L353). + +- [Structured Metadata]({{< relref "../../get-started/labels/structured-metadata" >}}): Anything which can’t be stored in Index labels and LogLine would be stored as Structured Metadata. Here is a non-exhaustive list of what will be stored in Structured Metadata to give a sense of what it will hold: + - Resource Attributes not stored as Index labels is replicated and stored with each log entry. + - Everything under InstrumentationScope is replicated and stored with each log entry. + - Everything under LogRecord except `LogRecord.Body`, `LogRecord.TimeUnixNano` and sometimes `LogRecord.ObservedTimestamp`. + +Things to note before ingesting OpenTelemetry logs to Loki: + +- Dots (.) are converted to underscores (_). + + Loki does not support `.` or any other special characters other than `_` in label names. The unsupported characters are replaced with an `_` while converting Attributes to Index Labels or Structured Metadata. + Also, please note that while writing the queries, you must use the normalized format, i.e. use `_` instead of special characters while querying data using OTEL Attributes. + + For example, `service.name` in OTLP would become `service_name` in Loki. + +- Flattening of nested Attributes + + While converting Attributes in OTLP to Index labels or Structured Metadata, any nested attribute values are flattened out using `_` as a separator. + It is done in a similar way as to how it is done in the [LogQL json parser](/docs/loki/latest/query/log_queries/#json). + +- Stringification of non-string Attribute values + + While converting Attribute values in OTLP to Index label values or Structured Metadata, any non-string values are converted to string using [AsString method from the OTEL collector lib](https://github.com/open-telemetry/opentelemetry-collector/blob/ab3d6c5b64701e690aaa340b0a63f443ff22c1f0/pdata/pcommon/value.go#L353). From 4d61d0a90124688d0bd66937acdabc1dfd8f7080 Mon Sep 17 00:00:00 2001 From: J Stickler Date: Thu, 2 Nov 2023 13:42:14 -0400 Subject: [PATCH 05/14] [Style] Data source as two words. (#11124) **What this PR does / why we need it**: Updating "datasource" to be "data source" per recent decision by Style Council. --- docs/sources/configure/examples/query-frontend.md | 2 +- docs/sources/get-started/_index.md | 4 ++-- docs/sources/operations/query-fairness/_index.md | 4 ++-- docs/sources/operations/troubleshooting.md | 4 ++-- docs/sources/send-data/k6/query-scenario.md | 2 +- docs/sources/visualize/grafana.md | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/sources/configure/examples/query-frontend.md b/docs/sources/configure/examples/query-frontend.md index af811048981b8..838463bfe9efe 100644 --- a/docs/sources/configure/examples/query-frontend.md +++ b/docs/sources/configure/examples/query-frontend.md @@ -144,7 +144,7 @@ spec: ### Grafana -Once you've deployed these, point your Grafana datasource to the new frontend service. The service is available within the cluster at `http://query-frontend..svc.cluster.local:3100`. +Once you've deployed these, point your Grafana data source to the new frontend service. The service is available within the cluster at `http://query-frontend..svc.cluster.local:3100`. ### GRPC Mode (Pull model) diff --git a/docs/sources/get-started/_index.md b/docs/sources/get-started/_index.md index 96fe90e868acd..c01a2cbc7a35b 100644 --- a/docs/sources/get-started/_index.md +++ b/docs/sources/get-started/_index.md @@ -63,11 +63,11 @@ The write component returns `ready` when you point a web browser at http://local ## Use Grafana and the test environment Use [Grafana](/docs/grafana/latest/) to query and observe the log lines captured in the Loki cluster by navigating a browser to http://localhost:3000. -The Grafana instance has Loki configured as a [datasource](/docs/grafana/latest/datasources/loki/). +The Grafana instance has Loki configured as a [data source](/docs/grafana/latest/datasources/loki/). Click on the Grafana instance's [Explore](/docs/grafana/latest/explore/) icon to bring up the explore pane. -Use the Explore dropdown menu to choose the Loki datasource and bring up the Loki query browser. +Use the Explore dropdown menu to choose the Loki data source and bring up the Loki query browser. Try some queries. Enter your query into the **Log browser** box, and click on the blue **Run query** button. diff --git a/docs/sources/operations/query-fairness/_index.md b/docs/sources/operations/query-fairness/_index.md index 26d486880207e..205080c19b7cb 100644 --- a/docs/sources/operations/query-fairness/_index.md +++ b/docs/sources/operations/query-fairness/_index.md @@ -113,9 +113,9 @@ In the examples above the client that invoked the query directly against Loki al HTTP header that controls where in the queue tree the sub-queries are enqueued. However, as an operator, you would usually want to avoid this scenario and control yourself where the header is set. -When using Grafana as the Loki user interface, you can, for example, create multiple datasources +When using Grafana as the Loki user interface, you can, for example, create multiple data sources with the same tenant, but with a different additional HTTP header -`X-Loki-Scope-Actor` and restrict which Grafana user can use which datasource. +`X-Loki-Scope-Actor` and restrict which Grafana user can use which data source. Alternatively, if you have a proxy for authentication in front of Loki, you can pass the (hashed) user from the authentication as downstream header to Loki. diff --git a/docs/sources/operations/troubleshooting.md b/docs/sources/operations/troubleshooting.md index d4179ebbbe218..fd65e9a4d9a97 100644 --- a/docs/sources/operations/troubleshooting.md +++ b/docs/sources/operations/troubleshooting.md @@ -11,7 +11,7 @@ aliases: ## "Loki: Bad Gateway. 502" This error can appear in Grafana when Grafana Loki is added as a -datasource, indicating that Grafana in unable to connect to Loki. There may +data source, indicating that Grafana in unable to connect to Loki. There may one of many root causes: - If Loki is deployed with Docker, and Grafana and Loki are not running in the @@ -24,7 +24,7 @@ one of many root causes: ## "Data source connected, but no labels received. Verify that Loki and Promtail is configured properly." -This error can appear in Grafana when Loki is added as a datasource, indicating +This error can appear in Grafana when Loki is added as a data source, indicating that although Grafana has connected to Loki, Loki hasn't received any logs from Promtail yet. There may be one of many root causes: diff --git a/docs/sources/send-data/k6/query-scenario.md b/docs/sources/send-data/k6/query-scenario.md index 5eac53a0c6aaf..6a27c7a0dcdbb 100644 --- a/docs/sources/send-data/k6/query-scenario.md +++ b/docs/sources/send-data/k6/query-scenario.md @@ -22,7 +22,7 @@ Loki has 5 types of queries: * series query In a real-world use-case, such as querying Loki using it as a Grafana -datasource, all of these queries are used. Each of them has a different +data source, all of these queries are used. Each of them has a different [API]({{< relref "../../reference/api.md" >}}) endpoint. The xk6-loki extension provides a [Javascript API](https://github.com/grafana/xk6-loki#javascript-api) for all these query types. diff --git a/docs/sources/visualize/grafana.md b/docs/sources/visualize/grafana.md index 949928398f92d..3715ac62c8f89 100644 --- a/docs/sources/visualize/grafana.md +++ b/docs/sources/visualize/grafana.md @@ -32,7 +32,7 @@ recent version to take advantage of [LogQL]({{< relref "../query/_index.md" >}}) On Mac: `docker.for.mac.localhost` \ On Windows: `docker.for.win.localhost` 1. To see the logs, click Explore on the sidebar, select the Loki - datasource in the top-left dropdown, and then choose a log stream using the + data source in the top-left dropdown, and then choose a log stream using the Log labels button. 1. Learn more about querying by reading about Loki's query language [LogQL]({{< relref "../query/_index.md" >}}). @@ -40,6 +40,6 @@ Read more about Grafana's Explore feature in the [Grafana documentation](http://docs.grafana.org/features/explore) and on how to search and filter for logs with Loki. -To configure Loki as a datasource via provisioning, see [Configuring Grafana via +To configure Loki as a data source via provisioning, see [Configuring Grafana via Provisioning](http://docs.grafana.org/features/datasources/loki/#configure-the-datasource-with-provisioning). Set the URL in the provisioning. From 01f88af75d8e275664c15c3272790cc0e7418875 Mon Sep 17 00:00:00 2001 From: J Stickler Date: Thu, 2 Nov 2023 16:26:00 -0400 Subject: [PATCH 06/14] [Style] Update timeseries to time series. (#11125) **What this PR does / why we need it**: Small update to conform to recent Style decision re: spelling of `time series`. https://grafana.com/docs/writers-toolkit/write/style-guide/word-list/#t --- docs/sources/query/_index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/sources/query/_index.md b/docs/sources/query/_index.md index 845fb32906ef2..2811c63cc14e5 100644 --- a/docs/sources/query/_index.md +++ b/docs/sources/query/_index.md @@ -290,7 +290,7 @@ Loki supports functions to operate on data. ### label_replace() -For each timeseries in `v`, +For each time series in `v`, ``` label_replace(v instant-vector, @@ -300,12 +300,12 @@ label_replace(v instant-vector, regex string) ``` matches the regular expression `regex` against the label `src_label`. -If it matches, then the timeseries is returned with the label `dst_label` replaced by the expansion of `replacement`. +If it matches, then the time series is returned with the label `dst_label` replaced by the expansion of `replacement`. `$1` is replaced with the first matching subgroup, `$2` with the second etc. If the regular expression doesn't match, -then the timeseries is returned unchanged. +then the time series is returned unchanged. This example will return a vector with each time series having a `foo` label with the value `a` added to it: From e93f5bfd93d03411af791405663156726f475693 Mon Sep 17 00:00:00 2001 From: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> Date: Fri, 3 Nov 2023 10:21:30 +0000 Subject: [PATCH 07/14] Add dashboard changes for migrating from cortex_ metrics to loki_ metrics (#11082) **What this PR does / why we need it**: - add missing namespace param for ruler manager metrics - update the loki mixin to use the renamed metrics so both are supported - created the compiled mixin - make the prefix of the cortex_distributor_ingester_clients metrics configurable **Checklist** - [X] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15) --------- Signed-off-by: Michel Hollands --- docs/sources/configure/_index.md | 2 +- pkg/bloomgateway/client.go | 4 +- pkg/bloomgateway/client_test.go | 4 +- .../clientpool/ingester_client_pool.go | 15 +- pkg/distributor/distributor.go | 3 +- pkg/loki/modules.go | 4 +- pkg/querier/ingester_querier.go | 8 +- pkg/querier/ingester_querier_test.go | 6 + pkg/querier/querier_test.go | 3 +- pkg/ruler/base/manager.go | 2 +- pkg/ruler/base/manager_metrics.go | 24 +-- pkg/ruler/base/manager_metrics_test.go | 169 +++++++++--------- .../chunk/client/aws/metrics_autoscaling.go | 2 +- pkg/storage/factory.go | 3 +- pkg/storage/store.go | 2 +- .../gatewayclient/gateway_client.go | 4 +- .../gatewayclient/gateway_client_test.go | 11 +- .../helm/loki/src/dashboards/loki-chunks.json | 2 +- .../dashboards/loki-chunks.json | 2 +- .../dashboards/loki-operational.json | 22 +-- .../dashboards/loki-chunks.json | 2 +- .../dashboards/loki-operational.json | 22 +-- .../dashboards/loki-reads-resources.json | 2 +- .../dashboard-loki-operational.json | 30 ++-- .../dashboards/loki-chunks.libsonnet | 2 +- .../dashboards/loki-reads-resources.libsonnet | 2 +- 26 files changed, 183 insertions(+), 169 deletions(-) diff --git a/docs/sources/configure/_index.md b/docs/sources/configure/_index.md index 27120c71b9b97..1796e2b57cf8e 100644 --- a/docs/sources/configure/_index.md +++ b/docs/sources/configure/_index.md @@ -4410,7 +4410,7 @@ dynamodb: # query to fetch ingester queue length # CLI flag: -metrics.queue-length-query - [queue_length_query: | default = "sum(avg_over_time(cortex_ingester_flush_queue_length{job=\"cortex/ingester\"}[2m]))"] + [queue_length_query: | default = "sum(avg_over_time(loki_ingester_flush_queue_length{job=\"cortex/ingester\"}[2m])) or sum(avg_over_time(cortex_ingester_flush_queue_length{job=\"cortex/ingester\"}[2m]))"] # query to fetch throttle rates per table # CLI flag: -metrics.write-throttle-query diff --git a/pkg/bloomgateway/client.go b/pkg/bloomgateway/client.go index cc1e59d5ea0a7..2216e0b43e3bb 100644 --- a/pkg/bloomgateway/client.go +++ b/pkg/bloomgateway/client.go @@ -93,7 +93,7 @@ type GatewayClient struct { ring ring.ReadRing } -func NewGatewayClient(cfg ClientConfig, limits Limits, registerer prometheus.Registerer, logger log.Logger) (*GatewayClient, error) { +func NewGatewayClient(cfg ClientConfig, limits Limits, registerer prometheus.Registerer, logger log.Logger, metricsNamespace string) (*GatewayClient, error) { latency := promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ Namespace: constants.Loki, Subsystem: "bloom_gateway", @@ -119,7 +119,7 @@ func NewGatewayClient(cfg ClientConfig, limits Limits, registerer prometheus.Reg cfg: cfg, logger: logger, limits: limits, - pool: clientpool.NewPool("bloom-gateway", cfg.PoolConfig, cfg.Ring, ringclient.PoolAddrFunc(poolFactory), logger), + pool: clientpool.NewPool("bloom-gateway", cfg.PoolConfig, cfg.Ring, ringclient.PoolAddrFunc(poolFactory), logger, metricsNamespace), } return c, nil diff --git a/pkg/bloomgateway/client_test.go b/pkg/bloomgateway/client_test.go index d1e31643e4b0f..670c050517163 100644 --- a/pkg/bloomgateway/client_test.go +++ b/pkg/bloomgateway/client_test.go @@ -24,7 +24,7 @@ func TestBloomGatewayClient(t *testing.T) { flagext.DefaultValues(&cfg) t.Run("", func(t *testing.T) { - _, err := NewGatewayClient(cfg, l, reg, logger) + _, err := NewGatewayClient(cfg, l, reg, logger, "loki") require.NoError(t, err) }) } @@ -40,7 +40,7 @@ func TestBloomGatewayClient_GroupStreamsByAddresses(t *testing.T) { cfg := ClientConfig{} flagext.DefaultValues(&cfg) - c, err := NewGatewayClient(cfg, l, reg, logger) + c, err := NewGatewayClient(cfg, l, reg, logger, "loki") require.NoError(t, err) testCases := []struct { diff --git a/pkg/distributor/clientpool/ingester_client_pool.go b/pkg/distributor/clientpool/ingester_client_pool.go index 0979d4607b440..55e0e7c1083aa 100644 --- a/pkg/distributor/clientpool/ingester_client_pool.go +++ b/pkg/distributor/clientpool/ingester_client_pool.go @@ -11,11 +11,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" ) -var clients = promauto.NewGauge(prometheus.GaugeOpts{ - Namespace: "cortex", - Name: "distributor_ingester_clients", - Help: "The current number of ingester clients.", -}) +var clients prometheus.Gauge // PoolConfig is config for creating a Pool. type PoolConfig struct { @@ -31,13 +27,20 @@ func (cfg *PoolConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.DurationVar(&cfg.RemoteTimeout, prefix+"remote-timeout", 1*time.Second, "Timeout for the health check.") } -func NewPool(name string, cfg PoolConfig, ring ring.ReadRing, factory ring_client.PoolFactory, logger log.Logger) *ring_client.Pool { +func NewPool(name string, cfg PoolConfig, ring ring.ReadRing, factory ring_client.PoolFactory, logger log.Logger, metricsNamespace string) *ring_client.Pool { poolCfg := ring_client.PoolConfig{ CheckInterval: cfg.ClientCleanupPeriod, HealthCheckEnabled: cfg.HealthCheckIngesters, HealthCheckTimeout: cfg.RemoteTimeout, } + if clients == nil { + clients = promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: metricsNamespace, + Name: "distributor_ingester_clients", + Help: "The current number of ingester clients.", + }) + } // TODO(chaudum): Allow configuration of metric name by the caller. return ring_client.NewPool(name, poolCfg, ring_client.NewRingServiceDiscovery(ring), factory, clients, logger) } diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index ba31224a47334..963b5cc4302b5 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -177,7 +177,7 @@ func New( tenantsRetention: retention.NewTenantsRetention(overrides), ingestersRing: ingestersRing, validator: validator, - pool: clientpool.NewPool("ingester", clientCfg.PoolConfig, ingestersRing, factory, logger), + pool: clientpool.NewPool("ingester", clientCfg.PoolConfig, ingestersRing, factory, logger, metricsNamespace), labelCache: labelCache, shardTracker: NewShardTracker(), healthyInstancesCount: atomic.NewUint32(0), @@ -236,6 +236,7 @@ func New( ingestersRing, ring_client.PoolAddrFunc(internalFactory), logger, + metricsNamespace, ), overrides, registerer, diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go index 0d6d90da91799..ccd1839117cb5 100644 --- a/pkg/loki/modules.go +++ b/pkg/loki/modules.go @@ -782,7 +782,7 @@ func (t *Loki) setupAsyncStore() error { } func (t *Loki) initIngesterQuerier() (_ services.Service, err error) { - t.ingesterQuerier, err = querier.NewIngesterQuerier(t.Cfg.IngesterClient, t.ring, t.Cfg.Querier.ExtraQueryDelay) + t.ingesterQuerier, err = querier.NewIngesterQuerier(t.Cfg.IngesterClient, t.ring, t.Cfg.Querier.ExtraQueryDelay, t.Cfg.MetricsNamespace) if err != nil { return nil, err } @@ -1326,7 +1326,7 @@ func (t *Loki) initIndexGateway() (services.Service, error) { var bloomQuerier indexgateway.BloomQuerier if t.Cfg.BloomGateway.Enabled { - bloomGatewayClient, err := bloomgateway.NewGatewayClient(t.Cfg.BloomGateway.Client, t.Overrides, prometheus.DefaultRegisterer, logger) + bloomGatewayClient, err := bloomgateway.NewGatewayClient(t.Cfg.BloomGateway.Client, t.Overrides, prometheus.DefaultRegisterer, logger, t.Cfg.MetricsNamespace) if err != nil { return nil, err } diff --git a/pkg/querier/ingester_querier.go b/pkg/querier/ingester_querier.go index 1312cf7168ea6..fb57a415ba7f6 100644 --- a/pkg/querier/ingester_querier.go +++ b/pkg/querier/ingester_querier.go @@ -41,20 +41,20 @@ type IngesterQuerier struct { extraQueryDelay time.Duration } -func NewIngesterQuerier(clientCfg client.Config, ring ring.ReadRing, extraQueryDelay time.Duration) (*IngesterQuerier, error) { +func NewIngesterQuerier(clientCfg client.Config, ring ring.ReadRing, extraQueryDelay time.Duration, metricsNamespace string) (*IngesterQuerier, error) { factory := func(addr string) (ring_client.PoolClient, error) { return client.New(clientCfg, addr) } - return newIngesterQuerier(clientCfg, ring, extraQueryDelay, ring_client.PoolAddrFunc(factory)) + return newIngesterQuerier(clientCfg, ring, extraQueryDelay, ring_client.PoolAddrFunc(factory), metricsNamespace) } // newIngesterQuerier creates a new IngesterQuerier and allows to pass a custom ingester client factory // used for testing purposes -func newIngesterQuerier(clientCfg client.Config, ring ring.ReadRing, extraQueryDelay time.Duration, clientFactory ring_client.PoolFactory) (*IngesterQuerier, error) { +func newIngesterQuerier(clientCfg client.Config, ring ring.ReadRing, extraQueryDelay time.Duration, clientFactory ring_client.PoolFactory, metricsNamespace string) (*IngesterQuerier, error) { iq := IngesterQuerier{ ring: ring, - pool: clientpool.NewPool("ingester", clientCfg.PoolConfig, ring, clientFactory, util_log.Logger), + pool: clientpool.NewPool("ingester", clientCfg.PoolConfig, ring, clientFactory, util_log.Logger, metricsNamespace), extraQueryDelay: extraQueryDelay, } diff --git a/pkg/querier/ingester_querier_test.go b/pkg/querier/ingester_querier_test.go index a3eb6d3cc3b0b..a5cfd9a54dd8d 100644 --- a/pkg/querier/ingester_querier_test.go +++ b/pkg/querier/ingester_querier_test.go @@ -19,6 +19,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql" + "github.com/grafana/loki/pkg/util/constants" ) func TestIngesterQuerier_earlyExitOnQuorum(t *testing.T) { @@ -104,6 +105,7 @@ func TestIngesterQuerier_earlyExitOnQuorum(t *testing.T) { newReadRingMock(ringIngesters, 1), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), + constants.Loki, ) require.NoError(t, err) @@ -203,6 +205,7 @@ func TestIngesterQuerier_earlyExitOnQuorum(t *testing.T) { newReadRingMock(ringIngesters, 1), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), + constants.Loki, ) require.NoError(t, err) @@ -300,6 +303,7 @@ func TestQuerier_tailDisconnectedIngesters(t *testing.T) { newReadRingMock(testData.ringIngesters, 0), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), + constants.Loki, ) require.NoError(t, err) @@ -362,6 +366,7 @@ func TestIngesterQuerier_Volume(t *testing.T) { newReadRingMock([]ring.InstanceDesc{mockInstanceDesc("1.1.1.1", ring.ACTIVE), mockInstanceDesc("3.3.3.3", ring.ACTIVE)}, 0), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), + constants.Loki, ) require.NoError(t, err) @@ -382,6 +387,7 @@ func TestIngesterQuerier_Volume(t *testing.T) { newReadRingMock([]ring.InstanceDesc{mockInstanceDesc("1.1.1.1", ring.ACTIVE), mockInstanceDesc("3.3.3.3", ring.ACTIVE)}, 0), mockQuerierConfig().ExtraQueryDelay, newIngesterClientMockFactory(ingesterClient), + constants.Loki, ) require.NoError(t, err) diff --git a/pkg/querier/querier_test.go b/pkg/querier/querier_test.go index 4bbaa012bc12b..fd21ee47d063b 100644 --- a/pkg/querier/querier_test.go +++ b/pkg/querier/querier_test.go @@ -24,6 +24,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/logql" "github.com/grafana/loki/pkg/storage" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -1286,7 +1287,7 @@ func TestQuerier_SelectSamplesWithDeletes(t *testing.T) { } func newQuerier(cfg Config, clientCfg client.Config, clientFactory ring_client.PoolFactory, ring ring.ReadRing, dg *mockDeleteGettter, store storage.Store, limits *validation.Overrides) (*SingleTenantQuerier, error) { - iq, err := newIngesterQuerier(clientCfg, ring, cfg.ExtraQueryDelay, clientFactory) + iq, err := newIngesterQuerier(clientCfg, ring, cfg.ExtraQueryDelay, clientFactory, constants.Loki) if err != nil { return nil, err } diff --git a/pkg/ruler/base/manager.go b/pkg/ruler/base/manager.go index 3787fbe16263d..371eb712508e4 100644 --- a/pkg/ruler/base/manager.go +++ b/pkg/ruler/base/manager.go @@ -60,7 +60,7 @@ func NewDefaultMultiTenantManager(cfg Config, managerFactory ManagerFactory, reg return v - }) + }, metricsNamespace) if reg != nil { reg.MustRegister(userManagerMetrics) } diff --git a/pkg/ruler/base/manager_metrics.go b/pkg/ruler/base/manager_metrics.go index 3064641f63753..d5caab8c2ef87 100644 --- a/pkg/ruler/base/manager_metrics.go +++ b/pkg/ruler/base/manager_metrics.go @@ -30,7 +30,7 @@ type ManagerMetrics struct { const RuleGroupLabel = "rule_group" // NewManagerMetrics returns a ManagerMetrics struct -func NewManagerMetrics(disableRuleGroupLabel bool, tf util.MetricLabelTransformFunc) *ManagerMetrics { +func NewManagerMetrics(disableRuleGroupLabel bool, tf util.MetricLabelTransformFunc, metricsNamespace string) *ManagerMetrics { commonLabels := []string{"user"} if !disableRuleGroupLabel { commonLabels = append(commonLabels, RuleGroupLabel) @@ -41,67 +41,67 @@ func NewManagerMetrics(disableRuleGroupLabel bool, tf util.MetricLabelTransformF metricLabelTransformer: tf, EvalDuration: prometheus.NewDesc( - "cortex_prometheus_rule_evaluation_duration_seconds", + metricsNamespace+"_prometheus_rule_evaluation_duration_seconds", "The duration for a rule to execute.", []string{"user"}, nil, ), IterationDuration: prometheus.NewDesc( - "cortex_prometheus_rule_group_duration_seconds", + metricsNamespace+"_prometheus_rule_group_duration_seconds", "The duration of rule group evaluations.", []string{"user"}, nil, ), IterationsMissed: prometheus.NewDesc( - "cortex_prometheus_rule_group_iterations_missed_total", + metricsNamespace+"_prometheus_rule_group_iterations_missed_total", "The total number of rule group evaluations missed due to slow rule group evaluation.", commonLabels, nil, ), IterationsScheduled: prometheus.NewDesc( - "cortex_prometheus_rule_group_iterations_total", + metricsNamespace+"_prometheus_rule_group_iterations_total", "The total number of scheduled rule group evaluations, whether executed or missed.", commonLabels, nil, ), EvalTotal: prometheus.NewDesc( - "cortex_prometheus_rule_evaluations_total", + metricsNamespace+"_prometheus_rule_evaluations_total", "The total number of rule evaluations.", commonLabels, nil, ), EvalFailures: prometheus.NewDesc( - "cortex_prometheus_rule_evaluation_failures_total", + metricsNamespace+"_prometheus_rule_evaluation_failures_total", "The total number of rule evaluation failures.", commonLabels, nil, ), GroupInterval: prometheus.NewDesc( - "cortex_prometheus_rule_group_interval_seconds", + metricsNamespace+"_prometheus_rule_group_interval_seconds", "The interval of a rule group.", commonLabels, nil, ), GroupLastEvalTime: prometheus.NewDesc( - "cortex_prometheus_rule_group_last_evaluation_timestamp_seconds", + metricsNamespace+"_prometheus_rule_group_last_evaluation_timestamp_seconds", "The timestamp of the last rule group evaluation in seconds.", commonLabels, nil, ), GroupLastDuration: prometheus.NewDesc( - "cortex_prometheus_rule_group_last_duration_seconds", + metricsNamespace+"_prometheus_rule_group_last_duration_seconds", "The duration of the last rule group evaluation.", commonLabels, nil, ), GroupRules: prometheus.NewDesc( - "cortex_prometheus_rule_group_rules", + metricsNamespace+"_prometheus_rule_group_rules", "The number of rules.", commonLabels, nil, ), GroupLastEvalSamples: prometheus.NewDesc( - "cortex_prometheus_last_evaluation_samples", + metricsNamespace+"_prometheus_last_evaluation_samples", "The number of samples returned during the last rule group evaluation.", commonLabels, nil, diff --git a/pkg/ruler/base/manager_metrics_test.go b/pkg/ruler/base/manager_metrics_test.go index bc301a54533d6..421133bd4f060 100644 --- a/pkg/ruler/base/manager_metrics_test.go +++ b/pkg/ruler/base/manager_metrics_test.go @@ -14,12 +14,13 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/ruler/rulespb" + "github.com/grafana/loki/pkg/util/constants" ) func TestManagerMetricsWithRuleGroupLabel(t *testing.T) { mainReg := prometheus.NewPedanticRegistry() - managerMetrics := NewManagerMetrics(false, nil) + managerMetrics := NewManagerMetrics(false, nil, constants.Cortex) mainReg.MustRegister(managerMetrics) managerMetrics.AddUserRegistry("user1", populateManager(1)) managerMetrics.AddUserRegistry("user2", populateManager(10)) @@ -141,7 +142,7 @@ cortex_prometheus_rule_group_rules{rule_group="group_two",user="user3"} 100000 func TestManagerMetricsWithoutRuleGroupLabel(t *testing.T) { mainReg := prometheus.NewPedanticRegistry() - managerMetrics := NewManagerMetrics(true, nil) + managerMetrics := NewManagerMetrics(true, nil, constants.Loki) mainReg.MustRegister(managerMetrics) managerMetrics.AddUserRegistry("user1", populateManager(1)) managerMetrics.AddUserRegistry("user2", populateManager(10)) @@ -152,86 +153,86 @@ func TestManagerMetricsWithoutRuleGroupLabel(t *testing.T) { //noinspection ALL err := testutil.GatherAndCompare(mainReg, bytes.NewBufferString(` -# HELP cortex_prometheus_last_evaluation_samples The number of samples returned during the last rule group evaluation. -# TYPE cortex_prometheus_last_evaluation_samples gauge -cortex_prometheus_last_evaluation_samples{user="user1"} 2000 -cortex_prometheus_last_evaluation_samples{user="user2"} 20000 -cortex_prometheus_last_evaluation_samples{user="user3"} 200000 -# HELP cortex_prometheus_rule_evaluation_duration_seconds The duration for a rule to execute. -# TYPE cortex_prometheus_rule_evaluation_duration_seconds summary -cortex_prometheus_rule_evaluation_duration_seconds{user="user1",quantile="0.5"} 1 -cortex_prometheus_rule_evaluation_duration_seconds{user="user1",quantile="0.9"} 1 -cortex_prometheus_rule_evaluation_duration_seconds{user="user1",quantile="0.99"} 1 -cortex_prometheus_rule_evaluation_duration_seconds_sum{user="user1"} 1 -cortex_prometheus_rule_evaluation_duration_seconds_count{user="user1"} 1 -cortex_prometheus_rule_evaluation_duration_seconds{user="user2",quantile="0.5"} 10 -cortex_prometheus_rule_evaluation_duration_seconds{user="user2",quantile="0.9"} 10 -cortex_prometheus_rule_evaluation_duration_seconds{user="user2",quantile="0.99"} 10 -cortex_prometheus_rule_evaluation_duration_seconds_sum{user="user2"} 10 -cortex_prometheus_rule_evaluation_duration_seconds_count{user="user2"} 1 -cortex_prometheus_rule_evaluation_duration_seconds{user="user3",quantile="0.5"} 100 -cortex_prometheus_rule_evaluation_duration_seconds{user="user3",quantile="0.9"} 100 -cortex_prometheus_rule_evaluation_duration_seconds{user="user3",quantile="0.99"} 100 -cortex_prometheus_rule_evaluation_duration_seconds_sum{user="user3"} 100 -cortex_prometheus_rule_evaluation_duration_seconds_count{user="user3"} 1 -# HELP cortex_prometheus_rule_evaluation_failures_total The total number of rule evaluation failures. -# TYPE cortex_prometheus_rule_evaluation_failures_total counter -cortex_prometheus_rule_evaluation_failures_total{user="user1"} 2 -cortex_prometheus_rule_evaluation_failures_total{user="user2"} 20 -cortex_prometheus_rule_evaluation_failures_total{user="user3"} 200 -# HELP cortex_prometheus_rule_evaluations_total The total number of rule evaluations. -# TYPE cortex_prometheus_rule_evaluations_total counter -cortex_prometheus_rule_evaluations_total{user="user1"} 2 -cortex_prometheus_rule_evaluations_total{user="user2"} 20 -cortex_prometheus_rule_evaluations_total{user="user3"} 200 -# HELP cortex_prometheus_rule_group_duration_seconds The duration of rule group evaluations. -# TYPE cortex_prometheus_rule_group_duration_seconds summary -cortex_prometheus_rule_group_duration_seconds{user="user1",quantile="0.01"} 1 -cortex_prometheus_rule_group_duration_seconds{user="user1",quantile="0.05"} 1 -cortex_prometheus_rule_group_duration_seconds{user="user1",quantile="0.5"} 1 -cortex_prometheus_rule_group_duration_seconds{user="user1",quantile="0.9"} 1 -cortex_prometheus_rule_group_duration_seconds{user="user1",quantile="0.99"} 1 -cortex_prometheus_rule_group_duration_seconds_sum{user="user1"} 1 -cortex_prometheus_rule_group_duration_seconds_count{user="user1"} 1 -cortex_prometheus_rule_group_duration_seconds{user="user2",quantile="0.01"} 10 -cortex_prometheus_rule_group_duration_seconds{user="user2",quantile="0.05"} 10 -cortex_prometheus_rule_group_duration_seconds{user="user2",quantile="0.5"} 10 -cortex_prometheus_rule_group_duration_seconds{user="user2",quantile="0.9"} 10 -cortex_prometheus_rule_group_duration_seconds{user="user2",quantile="0.99"} 10 -cortex_prometheus_rule_group_duration_seconds_sum{user="user2"} 10 -cortex_prometheus_rule_group_duration_seconds_count{user="user2"} 1 -cortex_prometheus_rule_group_duration_seconds{user="user3",quantile="0.01"} 100 -cortex_prometheus_rule_group_duration_seconds{user="user3",quantile="0.05"} 100 -cortex_prometheus_rule_group_duration_seconds{user="user3",quantile="0.5"} 100 -cortex_prometheus_rule_group_duration_seconds{user="user3",quantile="0.9"} 100 -cortex_prometheus_rule_group_duration_seconds{user="user3",quantile="0.99"} 100 -cortex_prometheus_rule_group_duration_seconds_sum{user="user3"} 100 -cortex_prometheus_rule_group_duration_seconds_count{user="user3"} 1 -# HELP cortex_prometheus_rule_group_iterations_missed_total The total number of rule group evaluations missed due to slow rule group evaluation. -# TYPE cortex_prometheus_rule_group_iterations_missed_total counter -cortex_prometheus_rule_group_iterations_missed_total{user="user1"} 2 -cortex_prometheus_rule_group_iterations_missed_total{user="user2"} 20 -cortex_prometheus_rule_group_iterations_missed_total{user="user3"} 200 -# HELP cortex_prometheus_rule_group_iterations_total The total number of scheduled rule group evaluations, whether executed or missed. -# TYPE cortex_prometheus_rule_group_iterations_total counter -cortex_prometheus_rule_group_iterations_total{user="user1"} 2 -cortex_prometheus_rule_group_iterations_total{user="user2"} 20 -cortex_prometheus_rule_group_iterations_total{user="user3"} 200 -# HELP cortex_prometheus_rule_group_last_duration_seconds The duration of the last rule group evaluation. -# TYPE cortex_prometheus_rule_group_last_duration_seconds gauge -cortex_prometheus_rule_group_last_duration_seconds{user="user1"} 2000 -cortex_prometheus_rule_group_last_duration_seconds{user="user2"} 20000 -cortex_prometheus_rule_group_last_duration_seconds{user="user3"} 200000 -# HELP cortex_prometheus_rule_group_last_evaluation_timestamp_seconds The timestamp of the last rule group evaluation in seconds. -# TYPE cortex_prometheus_rule_group_last_evaluation_timestamp_seconds gauge -cortex_prometheus_rule_group_last_evaluation_timestamp_seconds{user="user1"} 2000 -cortex_prometheus_rule_group_last_evaluation_timestamp_seconds{user="user2"} 20000 -cortex_prometheus_rule_group_last_evaluation_timestamp_seconds{user="user3"} 200000 -# HELP cortex_prometheus_rule_group_rules The number of rules. -# TYPE cortex_prometheus_rule_group_rules gauge -cortex_prometheus_rule_group_rules{user="user1"} 2000 -cortex_prometheus_rule_group_rules{user="user2"} 20000 -cortex_prometheus_rule_group_rules{user="user3"} 200000 +# HELP loki_prometheus_last_evaluation_samples The number of samples returned during the last rule group evaluation. +# TYPE loki_prometheus_last_evaluation_samples gauge +loki_prometheus_last_evaluation_samples{user="user1"} 2000 +loki_prometheus_last_evaluation_samples{user="user2"} 20000 +loki_prometheus_last_evaluation_samples{user="user3"} 200000 +# HELP loki_prometheus_rule_evaluation_duration_seconds The duration for a rule to execute. +# TYPE loki_prometheus_rule_evaluation_duration_seconds summary +loki_prometheus_rule_evaluation_duration_seconds{user="user1",quantile="0.5"} 1 +loki_prometheus_rule_evaluation_duration_seconds{user="user1",quantile="0.9"} 1 +loki_prometheus_rule_evaluation_duration_seconds{user="user1",quantile="0.99"} 1 +loki_prometheus_rule_evaluation_duration_seconds_sum{user="user1"} 1 +loki_prometheus_rule_evaluation_duration_seconds_count{user="user1"} 1 +loki_prometheus_rule_evaluation_duration_seconds{user="user2",quantile="0.5"} 10 +loki_prometheus_rule_evaluation_duration_seconds{user="user2",quantile="0.9"} 10 +loki_prometheus_rule_evaluation_duration_seconds{user="user2",quantile="0.99"} 10 +loki_prometheus_rule_evaluation_duration_seconds_sum{user="user2"} 10 +loki_prometheus_rule_evaluation_duration_seconds_count{user="user2"} 1 +loki_prometheus_rule_evaluation_duration_seconds{user="user3",quantile="0.5"} 100 +loki_prometheus_rule_evaluation_duration_seconds{user="user3",quantile="0.9"} 100 +loki_prometheus_rule_evaluation_duration_seconds{user="user3",quantile="0.99"} 100 +loki_prometheus_rule_evaluation_duration_seconds_sum{user="user3"} 100 +loki_prometheus_rule_evaluation_duration_seconds_count{user="user3"} 1 +# HELP loki_prometheus_rule_evaluation_failures_total The total number of rule evaluation failures. +# TYPE loki_prometheus_rule_evaluation_failures_total counter +loki_prometheus_rule_evaluation_failures_total{user="user1"} 2 +loki_prometheus_rule_evaluation_failures_total{user="user2"} 20 +loki_prometheus_rule_evaluation_failures_total{user="user3"} 200 +# HELP loki_prometheus_rule_evaluations_total The total number of rule evaluations. +# TYPE loki_prometheus_rule_evaluations_total counter +loki_prometheus_rule_evaluations_total{user="user1"} 2 +loki_prometheus_rule_evaluations_total{user="user2"} 20 +loki_prometheus_rule_evaluations_total{user="user3"} 200 +# HELP loki_prometheus_rule_group_duration_seconds The duration of rule group evaluations. +# TYPE loki_prometheus_rule_group_duration_seconds summary +loki_prometheus_rule_group_duration_seconds{user="user1",quantile="0.01"} 1 +loki_prometheus_rule_group_duration_seconds{user="user1",quantile="0.05"} 1 +loki_prometheus_rule_group_duration_seconds{user="user1",quantile="0.5"} 1 +loki_prometheus_rule_group_duration_seconds{user="user1",quantile="0.9"} 1 +loki_prometheus_rule_group_duration_seconds{user="user1",quantile="0.99"} 1 +loki_prometheus_rule_group_duration_seconds_sum{user="user1"} 1 +loki_prometheus_rule_group_duration_seconds_count{user="user1"} 1 +loki_prometheus_rule_group_duration_seconds{user="user2",quantile="0.01"} 10 +loki_prometheus_rule_group_duration_seconds{user="user2",quantile="0.05"} 10 +loki_prometheus_rule_group_duration_seconds{user="user2",quantile="0.5"} 10 +loki_prometheus_rule_group_duration_seconds{user="user2",quantile="0.9"} 10 +loki_prometheus_rule_group_duration_seconds{user="user2",quantile="0.99"} 10 +loki_prometheus_rule_group_duration_seconds_sum{user="user2"} 10 +loki_prometheus_rule_group_duration_seconds_count{user="user2"} 1 +loki_prometheus_rule_group_duration_seconds{user="user3",quantile="0.01"} 100 +loki_prometheus_rule_group_duration_seconds{user="user3",quantile="0.05"} 100 +loki_prometheus_rule_group_duration_seconds{user="user3",quantile="0.5"} 100 +loki_prometheus_rule_group_duration_seconds{user="user3",quantile="0.9"} 100 +loki_prometheus_rule_group_duration_seconds{user="user3",quantile="0.99"} 100 +loki_prometheus_rule_group_duration_seconds_sum{user="user3"} 100 +loki_prometheus_rule_group_duration_seconds_count{user="user3"} 1 +# HELP loki_prometheus_rule_group_iterations_missed_total The total number of rule group evaluations missed due to slow rule group evaluation. +# TYPE loki_prometheus_rule_group_iterations_missed_total counter +loki_prometheus_rule_group_iterations_missed_total{user="user1"} 2 +loki_prometheus_rule_group_iterations_missed_total{user="user2"} 20 +loki_prometheus_rule_group_iterations_missed_total{user="user3"} 200 +# HELP loki_prometheus_rule_group_iterations_total The total number of scheduled rule group evaluations, whether executed or missed. +# TYPE loki_prometheus_rule_group_iterations_total counter +loki_prometheus_rule_group_iterations_total{user="user1"} 2 +loki_prometheus_rule_group_iterations_total{user="user2"} 20 +loki_prometheus_rule_group_iterations_total{user="user3"} 200 +# HELP loki_prometheus_rule_group_last_duration_seconds The duration of the last rule group evaluation. +# TYPE loki_prometheus_rule_group_last_duration_seconds gauge +loki_prometheus_rule_group_last_duration_seconds{user="user1"} 2000 +loki_prometheus_rule_group_last_duration_seconds{user="user2"} 20000 +loki_prometheus_rule_group_last_duration_seconds{user="user3"} 200000 +# HELP loki_prometheus_rule_group_last_evaluation_timestamp_seconds The timestamp of the last rule group evaluation in seconds. +# TYPE loki_prometheus_rule_group_last_evaluation_timestamp_seconds gauge +loki_prometheus_rule_group_last_evaluation_timestamp_seconds{user="user1"} 2000 +loki_prometheus_rule_group_last_evaluation_timestamp_seconds{user="user2"} 20000 +loki_prometheus_rule_group_last_evaluation_timestamp_seconds{user="user3"} 200000 +# HELP loki_prometheus_rule_group_rules The number of rules. +# TYPE loki_prometheus_rule_group_rules gauge +loki_prometheus_rule_group_rules{user="user1"} 2000 +loki_prometheus_rule_group_rules{user="user2"} 20000 +loki_prometheus_rule_group_rules{user="user3"} 200000 `)) require.NoError(t, err) } @@ -367,7 +368,7 @@ func newGroupMetrics(r prometheus.Registerer) *groupMetrics { func TestMetricsArePerUser(t *testing.T) { mainReg := prometheus.NewPedanticRegistry() - managerMetrics := NewManagerMetrics(true, nil) + managerMetrics := NewManagerMetrics(true, nil, constants.Loki) mainReg.MustRegister(managerMetrics) managerMetrics.AddUserRegistry("user1", populateManager(1)) managerMetrics.AddUserRegistry("user2", populateManager(10)) @@ -417,7 +418,7 @@ func TestMetricLabelTransformer(t *testing.T) { } return v - }) + }, constants.Loki) mainReg.MustRegister(managerMetrics) reg := prometheus.NewRegistry() diff --git a/pkg/storage/chunk/client/aws/metrics_autoscaling.go b/pkg/storage/chunk/client/aws/metrics_autoscaling.go index f9a6bc14917d3..7aee4df91a47b 100644 --- a/pkg/storage/chunk/client/aws/metrics_autoscaling.go +++ b/pkg/storage/chunk/client/aws/metrics_autoscaling.go @@ -27,7 +27,7 @@ const ( // fetch Ingester queue length // average the queue length over 2 minutes to avoid aliasing with the 1-minute flush period - defaultQueueLenQuery = `sum(avg_over_time(cortex_ingester_flush_queue_length{job="cortex/ingester"}[2m]))` + defaultQueueLenQuery = `sum(avg_over_time(loki_ingester_flush_queue_length{job="cortex/ingester"}[2m])) or sum(avg_over_time(cortex_ingester_flush_queue_length{job="cortex/ingester"}[2m]))` // fetch write throttle rate per DynamoDB table defaultThrottleRateQuery = `sum(rate(cortex_dynamo_throttled_total{operation="DynamoDB.BatchWriteItem"}[1m])) by (table) > 0` // fetch write capacity usage per DynamoDB table diff --git a/pkg/storage/factory.go b/pkg/storage/factory.go index cd61e8054c621..931f33234ea87 100644 --- a/pkg/storage/factory.go +++ b/pkg/storage/factory.go @@ -40,6 +40,7 @@ import ( "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/indexgateway" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/tsdb" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/constants" ) var ( @@ -420,7 +421,7 @@ func NewIndexClient(periodCfg config.PeriodConfig, tableRange config.TableRange, return indexGatewayClient, nil } - gateway, err := gatewayclient.NewGatewayClient(cfg.BoltDBShipperConfig.IndexGatewayClientConfig, registerer, limits, logger) + gateway, err := gatewayclient.NewGatewayClient(cfg.BoltDBShipperConfig.IndexGatewayClientConfig, registerer, limits, logger, constants.Loki) if err != nil { return nil, err } diff --git a/pkg/storage/store.go b/pkg/storage/store.go index 0ebdc59e992c6..0bd679a361984 100644 --- a/pkg/storage/store.go +++ b/pkg/storage/store.go @@ -266,7 +266,7 @@ func (s *LokiStore) storeForPeriod(p config.PeriodConfig, tableRange config.Tabl if p.IndexType == config.TSDBType { if shouldUseIndexGatewayClient(s.cfg.TSDBShipperConfig.Config) { // inject the index-gateway client into the index store - gw, err := gatewayclient.NewGatewayClient(s.cfg.TSDBShipperConfig.IndexGatewayClientConfig, indexClientReg, s.limits, indexClientLogger) + gw, err := gatewayclient.NewGatewayClient(s.cfg.TSDBShipperConfig.IndexGatewayClientConfig, indexClientReg, s.limits, indexClientLogger, s.metricsNamespace) if err != nil { return nil, nil, nil, err } diff --git a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go index c6bc474e322e9..949627d29cff8 100644 --- a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go +++ b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client.go @@ -109,7 +109,7 @@ type GatewayClient struct { // // If it is configured to be in ring mode, a pool of GRPC connections to all Index Gateway instances is created using a ring. // Otherwise, it creates a GRPC connection pool to as many addresses as can be resolved from the given address. -func NewGatewayClient(cfg IndexGatewayClientConfig, r prometheus.Registerer, limits indexgateway.Limits, logger log.Logger) (*GatewayClient, error) { +func NewGatewayClient(cfg IndexGatewayClientConfig, r prometheus.Registerer, limits indexgateway.Limits, logger log.Logger, metricsNamespace string) (*GatewayClient, error) { latency := prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: constants.Loki, Name: "index_gateway_request_duration_seconds", @@ -156,7 +156,7 @@ func NewGatewayClient(cfg IndexGatewayClientConfig, r prometheus.Registerer, lim sgClient.cfg.PoolConfig.HealthCheckIngesters = true if sgClient.cfg.Mode == indexgateway.RingMode { - sgClient.pool = clientpool.NewPool("index-gateway", sgClient.cfg.PoolConfig, sgClient.ring, client.PoolAddrFunc(factory), logger) + sgClient.pool = clientpool.NewPool("index-gateway", sgClient.cfg.PoolConfig, sgClient.ring, client.PoolAddrFunc(factory), logger, metricsNamespace) } else { // Note we don't use clientpool.NewPool because we want to provide our own discovery function poolCfg := client.PoolConfig{ diff --git a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client_test.go b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client_test.go index bb96a68e24bb4..0ec6e81c17754 100644 --- a/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client_test.go +++ b/pkg/storage/stores/shipper/indexshipper/gatewayclient/gateway_client_test.go @@ -24,6 +24,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/storage/stores/series/index" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/indexgateway" + "github.com/grafana/loki/pkg/util/constants" "github.com/grafana/loki/pkg/validation" ) @@ -191,7 +192,7 @@ func TestGatewayClient_RingMode(t *testing.T) { cfg.Mode = indexgateway.RingMode cfg.Ring = igwRing - c, err := NewGatewayClient(cfg, nil, o, logger) + c, err := NewGatewayClient(cfg, nil, o, logger, constants.Loki) require.NoError(t, err) require.NotNil(t, c) @@ -222,7 +223,7 @@ func TestGatewayClient_RingMode(t *testing.T) { cfg.Mode = indexgateway.RingMode cfg.Ring = igwRing - c, err := NewGatewayClient(cfg, nil, o, logger) + c, err := NewGatewayClient(cfg, nil, o, logger, constants.Loki) require.NoError(t, err) require.NotNil(t, c) @@ -253,7 +254,7 @@ func TestGatewayClient(t *testing.T) { cfg.PoolConfig = clientpool.PoolConfig{ClientCleanupPeriod: 500 * time.Millisecond} overrides, _ := validation.NewOverrides(validation.Limits{}, nil) - gatewayClient, err := NewGatewayClient(cfg, prometheus.DefaultRegisterer, overrides, logger) + gatewayClient, err := NewGatewayClient(cfg, prometheus.DefaultRegisterer, overrides, logger, constants.Loki) require.NoError(t, err) ctx := user.InjectOrgID(context.Background(), "fake") @@ -440,11 +441,11 @@ func TestDoubleRegistration(t *testing.T) { Address: "my-store-address:1234", } - client, err := NewGatewayClient(clientCfg, r, o, logger) + client, err := NewGatewayClient(clientCfg, r, o, logger, constants.Loki) require.NoError(t, err) defer client.Stop() - client, err = NewGatewayClient(clientCfg, r, o, logger) + client, err = NewGatewayClient(clientCfg, r, o, logger, constants.Loki) require.NoError(t, err) defer client.Stop() } diff --git a/production/helm/loki/src/dashboards/loki-chunks.json b/production/helm/loki/src/dashboards/loki-chunks.json index 8f30328bf7f9c..bec1997c20d47 100644 --- a/production/helm/loki/src/dashboards/loki-chunks.json +++ b/production/helm/loki/src/dashboards/loki-chunks.json @@ -598,7 +598,7 @@ "steppedLine": false, "targets": [ { - "expr": "cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}", + "expr": "loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", diff --git a/production/loki-mixin-compiled-ssd/dashboards/loki-chunks.json b/production/loki-mixin-compiled-ssd/dashboards/loki-chunks.json index 8f30328bf7f9c..bec1997c20d47 100644 --- a/production/loki-mixin-compiled-ssd/dashboards/loki-chunks.json +++ b/production/loki-mixin-compiled-ssd/dashboards/loki-chunks.json @@ -598,7 +598,7 @@ "steppedLine": false, "targets": [ { - "expr": "cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}", + "expr": "loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", diff --git a/production/loki-mixin-compiled-ssd/dashboards/loki-operational.json b/production/loki-mixin-compiled-ssd/dashboards/loki-operational.json index 19471e6a8a806..c40cdb516a28c 100644 --- a/production/loki-mixin-compiled-ssd/dashboards/loki-operational.json +++ b/production/loki-mixin-compiled-ssd/dashboards/loki-operational.json @@ -4825,7 +4825,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -4911,7 +4911,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -4997,7 +4997,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5083,7 +5083,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5169,17 +5169,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".5", "refId": "C" } @@ -5269,19 +5269,19 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" @@ -5372,7 +5372,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" diff --git a/production/loki-mixin-compiled/dashboards/loki-chunks.json b/production/loki-mixin-compiled/dashboards/loki-chunks.json index 7b3fc88618b8b..f84c6a1f0751c 100644 --- a/production/loki-mixin-compiled/dashboards/loki-chunks.json +++ b/production/loki-mixin-compiled/dashboards/loki-chunks.json @@ -598,7 +598,7 @@ "steppedLine": false, "targets": [ { - "expr": "cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}", + "expr": "loki_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"} or cortex_ingester_flush_queue_length{cluster=\"$cluster\", job=~\"$namespace/ingester.*\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", diff --git a/production/loki-mixin-compiled/dashboards/loki-operational.json b/production/loki-mixin-compiled/dashboards/loki-operational.json index 54682aa76d87d..5f04aadc665e8 100644 --- a/production/loki-mixin-compiled/dashboards/loki-operational.json +++ b/production/loki-mixin-compiled/dashboards/loki-operational.json @@ -5327,7 +5327,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5413,7 +5413,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5499,7 +5499,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5585,7 +5585,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5671,17 +5671,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".5", "refId": "C" } @@ -5771,19 +5771,19 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" @@ -5874,7 +5874,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" diff --git a/production/loki-mixin-compiled/dashboards/loki-reads-resources.json b/production/loki-mixin-compiled/dashboards/loki-reads-resources.json index 9e4ef679a8725..9f1dc904125fa 100644 --- a/production/loki-mixin-compiled/dashboards/loki-reads-resources.json +++ b/production/loki-mixin-compiled/dashboards/loki-reads-resources.json @@ -1969,7 +1969,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", + "expr": "sum by(pod) (loki_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"}) or sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod}}", diff --git a/production/loki-mixin/dashboards/dashboard-loki-operational.json b/production/loki-mixin/dashboards/dashboard-loki-operational.json index 1342dfc98a29e..e1a9ddbf68b4c 100644 --- a/production/loki-mixin/dashboards/dashboard-loki-operational.json +++ b/production/loki-mixin/dashboards/dashboard-loki-operational.json @@ -5264,7 +5264,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_failures_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5349,7 +5349,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5434,7 +5434,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_throttled_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5519,7 +5519,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", + "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], @@ -5604,17 +5604,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".5", "refId": "C" } @@ -5703,19 +5703,19 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" @@ -5805,7 +5805,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" @@ -6324,19 +6324,19 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(cortex_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(cortex_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.9, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(cortex_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "expr": "histogram_quantile(.5, sum(rate(loki_cassandra_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" @@ -6426,7 +6426,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_cassandra_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "expr": "sum(rate(loki_cassandra_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" diff --git a/production/loki-mixin/dashboards/loki-chunks.libsonnet b/production/loki-mixin/dashboards/loki-chunks.libsonnet index 3aa1e71404808..99a1fa06fe8c7 100644 --- a/production/loki-mixin/dashboards/loki-chunks.libsonnet +++ b/production/loki-mixin/dashboards/loki-chunks.libsonnet @@ -63,7 +63,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('Flush Stats') .addPanel( $.panel('Queue Length') + - $.queryPanel('cortex_ingester_flush_queue_length{%s}' % dashboards['loki-chunks.json'].labelsSelector, '{{pod}}'), + $.queryPanel('loki_ingester_flush_queue_length{%(label)s} or cortex_ingester_flush_queue_length{%(label)s}' % { label: dashboards['loki-chunks.json'].labelsSelector }, '{{pod}}'), ) .addPanel( $.panel('Flush Rate') + diff --git a/production/loki-mixin/dashboards/loki-reads-resources.libsonnet b/production/loki-mixin/dashboards/loki-reads-resources.libsonnet index 4e54760b513ba..3d17903cf83c0 100644 --- a/production/loki-mixin/dashboards/loki-reads-resources.libsonnet +++ b/production/loki-mixin/dashboards/loki-reads-resources.libsonnet @@ -139,7 +139,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addPanel( $.panel('Rules') + $.queryPanel( - 'sum by(%s) (cortex_prometheus_rule_group_rules{%s})' % [$._config.per_instance_label, $.jobMatcher('ruler')], + 'sum by(%(label)s) (loki_prometheus_rule_group_rules{%(matcher)s}) or sum by(%(label)s) (cortex_prometheus_rule_group_rules{%(matcher)s})' % { label: $._config.per_instance_label, matcher: $.jobMatcher('ruler') }, '{{%s}}' % $._config.per_instance_label ), ) From 380f902c30be6341316d5b3181a21d1f606573cd Mon Sep 17 00:00:00 2001 From: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> Date: Fri, 3 Nov 2023 10:21:59 +0000 Subject: [PATCH 08/14] Change default of metrics.namespace (#11110) **What this PR does / why we need it**: Change the default for a subset of metrics. After this all Loki metrics should start with "loki_". This changes the following metrics: cortex_distributor_ingester_clients cortex_dns_failures_total cortex_dns_lookups_total cortex_dns_provider_results cortex_frontend_query_range_duration_seconds_bucket cortex_frontend_query_range_duration_seconds_count cortex_frontend_query_range_duration_seconds_sum cortex_ingester_flush_queue_length cortex_kv_request_duration_seconds_bucket cortex_kv_request_duration_seconds_count cortex_kv_request_duration_seconds_sum cortex_member_consul_heartbeats_total cortex_prometheus_last_evaluation_samples cortex_prometheus_notifications_alertmanagers_discovered cortex_prometheus_notifications_dropped_total cortex_prometheus_notifications_errors_total cortex_prometheus_notifications_latency_seconds cortex_prometheus_notifications_latency_seconds_count cortex_prometheus_notifications_latency_seconds_sum cortex_prometheus_notifications_queue_capacity cortex_prometheus_notifications_queue_length cortex_prometheus_notifications_sent_total cortex_prometheus_rule_evaluation_duration_seconds cortex_prometheus_rule_evaluation_duration_seconds_count cortex_prometheus_rule_evaluation_duration_seconds_sum cortex_prometheus_rule_evaluation_failures_total cortex_prometheus_rule_evaluations_total cortex_prometheus_rule_group_duration_seconds cortex_prometheus_rule_group_duration_seconds_count cortex_prometheus_rule_group_duration_seconds_sum cortex_prometheus_rule_group_interval_seconds cortex_prometheus_rule_group_iterations_missed_total cortex_prometheus_rule_group_iterations_total cortex_prometheus_rule_group_last_duration_seconds cortex_prometheus_rule_group_last_evaluation_timestamp_seconds cortex_prometheus_rule_group_rules cortex_query_frontend_connected_schedulers cortex_query_frontend_queries_in_progress cortex_query_frontend_retries_bucket cortex_query_frontend_retries_count cortex_query_frontend_retries_sum cortex_query_scheduler_connected_frontend_clients cortex_query_scheduler_connected_querier_clients cortex_query_scheduler_inflight_requests cortex_query_scheduler_inflight_requests_count cortex_query_scheduler_inflight_requests_sum cortex_query_scheduler_queue_duration_seconds_bucket cortex_query_scheduler_queue_duration_seconds_count cortex_query_scheduler_queue_duration_seconds_sum cortex_query_scheduler_queue_length cortex_query_scheduler_running cortex_quota_cgroup_cpu_max cortex_quota_cgroup_cpu_period cortex_quota_cpu_count cortex_quota_gomaxprocs cortex_ring_member_heartbeats_total cortex_ring_member_tokens_owned cortex_ring_member_tokens_to_own cortex_ring_members cortex_ring_oldest_member_timestamp cortex_ring_tokens_total cortex_ruler_client_request_duration_seconds_bucket cortex_ruler_client_request_duration_seconds_count cortex_ruler_client_request_duration_seconds_sum cortex_ruler_clients cortex_ruler_config_last_reload_successful cortex_ruler_config_last_reload_successful_seconds cortex_ruler_config_updates_total cortex_ruler_managers_total cortex_ruler_ring_check_errors_total cortex_ruler_sync_rules_total **Checklist** - [X] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [X] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15) --------- Signed-off-by: Michel Hollands Co-authored-by: Ashwanth --- CHANGELOG.md | 1 + cmd/migrate/main.go | 3 +- docs/sources/configure/_index.md | 3 +- docs/sources/setup/upgrade/_index.md | 79 +++++++++++++++++++++++++ integration/loki_micro_services_test.go | 4 +- pkg/loki/loki.go | 2 +- 6 files changed, 87 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f406c13dfbb41..f9bcc208412f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ ##### Enhancements +* [11110](https://github.com/grafana/loki/pull/11003) **MichelHollands**: Change the default of the `metrics-namespace` flag to 'loki'. * [11086](https://github.com/grafana/loki/pull/11086) **kandrew5**: Helm: Allow topologySpreadConstraints * [11003](https://github.com/grafana/loki/pull/11003) **MichelHollands**: Add the `metrics-namespace` flag to change the namespace of metrics currently using cortex as namespace. * [10096](https://github.com/grafana/loki/pull/10096) **aschleck**: Storage: Allow setting a constant prefix for all created keys diff --git a/cmd/migrate/main.go b/cmd/migrate/main.go index 477afe4a9c86e..b60a8533c9841 100644 --- a/cmd/migrate/main.go +++ b/cmd/migrate/main.go @@ -24,6 +24,7 @@ import ( "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper" "github.com/grafana/loki/pkg/util/cfg" + "github.com/grafana/loki/pkg/util/constants" util_log "github.com/grafana/loki/pkg/util/log" "github.com/grafana/loki/pkg/validation" ) @@ -48,7 +49,7 @@ func main() { batch := flag.Int("batchLen", 500, "Specify how many chunks to read/write in one batch") shardBy := flag.Duration("shardBy", 6*time.Hour, "Break down the total interval into shards of this size, making this too small can lead to syncing a lot of duplicate chunks") parallel := flag.Int("parallel", 8, "How many parallel threads to process each shard") - metricsNamespace := flag.String("metrics.namespace", "cortex", "Namespace of the generated metrics") + metricsNamespace := flag.String("metrics.namespace", constants.Loki, "Namespace of the generated metrics") flag.Parse() go func() { diff --git a/docs/sources/configure/_index.md b/docs/sources/configure/_index.md index 1796e2b57cf8e..349b7016a37a8 100644 --- a/docs/sources/configure/_index.md +++ b/docs/sources/configure/_index.md @@ -226,8 +226,9 @@ Pass the `-config.expand-env` flag at the command line to enable this way of set [shutdown_delay: | default = 0s] # Namespace of the metrics that in previous releases had cortex as namespace. +# This setting is deprecated and will be removed in the next minor release. # CLI flag: -metrics-namespace -[metrics_namespace: | default = "cortex"] +[metrics_namespace: | default = "loki"] ``` ### server diff --git a/docs/sources/setup/upgrade/_index.md b/docs/sources/setup/upgrade/_index.md index 0a007e2927364..c33f0529b9747 100644 --- a/docs/sources/setup/upgrade/_index.md +++ b/docs/sources/setup/upgrade/_index.md @@ -182,6 +182,85 @@ If you using a [legacy index type]({{< relref "../../storage#index-storage" >}}) - Already deprecated metric `querier_cache_stale_gets_total` is now removed. +#### Metrics namespace + +Some Loki metrics started with the prefix `cortex_`. In this release they will be changed so they start with `loki_`. To keep them at `cortex_` change the `metrics_namespace` from the default `loki` to `cortex`. These metrics will be changed: + + - `cortex_distributor_ingester_clients` + - `cortex_dns_failures_total` + - `cortex_dns_lookups_total` + - `cortex_dns_provider_results` + - `cortex_frontend_query_range_duration_seconds_bucket` + - `cortex_frontend_query_range_duration_seconds_count` + - `cortex_frontend_query_range_duration_seconds_sum` + - `cortex_ingester_flush_queue_length` + - `cortex_kv_request_duration_seconds_bucket` + - `cortex_kv_request_duration_seconds_count` + - `cortex_kv_request_duration_seconds_sum` + - `cortex_member_consul_heartbeats_total` + - `cortex_prometheus_last_evaluation_samples` + - `cortex_prometheus_notifications_alertmanagers_discovered` + - `cortex_prometheus_notifications_dropped_total` + - `cortex_prometheus_notifications_errors_total` + - `cortex_prometheus_notifications_latency_seconds` + - `cortex_prometheus_notifications_latency_seconds_count` + - `cortex_prometheus_notifications_latency_seconds_sum` + - `cortex_prometheus_notifications_queue_capacity` + - `cortex_prometheus_notifications_queue_length` + - `cortex_prometheus_notifications_sent_total` + - `cortex_prometheus_rule_evaluation_duration_seconds` + - `cortex_prometheus_rule_evaluation_duration_seconds_count` + - `cortex_prometheus_rule_evaluation_duration_seconds_sum` + - `cortex_prometheus_rule_evaluation_failures_total` + - `cortex_prometheus_rule_evaluations_total` + - `cortex_prometheus_rule_group_duration_seconds` + - `cortex_prometheus_rule_group_duration_seconds_count` + - `cortex_prometheus_rule_group_duration_seconds_sum` + - `cortex_prometheus_rule_group_interval_seconds` + - `cortex_prometheus_rule_group_iterations_missed_total` + - `cortex_prometheus_rule_group_iterations_total` + - `cortex_prometheus_rule_group_last_duration_seconds` + - `cortex_prometheus_rule_group_last_evaluation_timestamp_seconds` + - `cortex_prometheus_rule_group_rules` + - `cortex_query_frontend_connected_schedulers` + - `cortex_query_frontend_queries_in_progress` + - `cortex_query_frontend_retries_bucket` + - `cortex_query_frontend_retries_count` + - `cortex_query_frontend_retries_sum` + - `cortex_query_scheduler_connected_frontend_clients` + - `cortex_query_scheduler_connected_querier_clients` + - `cortex_query_scheduler_inflight_requests` + - `cortex_query_scheduler_inflight_requests_count` + - `cortex_query_scheduler_inflight_requests_sum` + - `cortex_query_scheduler_queue_duration_seconds_bucket` + - `cortex_query_scheduler_queue_duration_seconds_count` + - `cortex_query_scheduler_queue_duration_seconds_sum` + - `cortex_query_scheduler_queue_length` + - `cortex_query_scheduler_running` + - `cortex_quota_cgroup_cpu_max` + - `cortex_quota_cgroup_cpu_period` + - `cortex_quota_cpu_count` + - `cortex_quota_gomaxprocs` + - `cortex_ring_member_heartbeats_total` + - `cortex_ring_member_tokens_owned` + - `cortex_ring_member_tokens_to_own` + - `cortex_ring_members` + - `cortex_ring_oldest_member_timestamp` + - `cortex_ring_tokens_total` + - `cortex_ruler_client_request_duration_seconds_bucket` + - `cortex_ruler_client_request_duration_seconds_count` + - `cortex_ruler_client_request_duration_seconds_sum` + - `cortex_ruler_clients` + - `cortex_ruler_config_last_reload_successful` + - `cortex_ruler_config_last_reload_successful_seconds` + - `cortex_ruler_config_updates_total` + - `cortex_ruler_managers_total` + - `cortex_ruler_ring_check_errors_total` + - `cortex_ruler_sync_rules_total` + + +The `metrics_namespace` setting is deprecated already. It will be removed in the next minor release. The default prefix will be `loki` then. + ### LogCLI #### Store for retrieving remote schema diff --git a/integration/loki_micro_services_test.go b/integration/loki_micro_services_test.go index 45942076569c3..1e1cb36044176 100644 --- a/integration/loki_micro_services_test.go +++ b/integration/loki_micro_services_test.go @@ -544,14 +544,14 @@ func TestSchedulerRing(t *testing.T) { // Check metrics to see if query scheduler is connected with query-frontend metrics, err := cliQueryScheduler.Metrics() require.NoError(t, err) - return getMetricValue(t, "cortex_query_scheduler_connected_frontend_clients", metrics) == 5 + return getMetricValue(t, "loki_query_scheduler_connected_frontend_clients", metrics) == 5 }, 5*time.Second, 500*time.Millisecond) require.Eventually(t, func() bool { // Check metrics to see if query scheduler is connected with query-frontend metrics, err := cliQueryScheduler.Metrics() require.NoError(t, err) - return getMetricValue(t, "cortex_query_scheduler_connected_querier_clients", metrics) == 4 + return getMetricValue(t, "loki_query_scheduler_connected_querier_clients", metrics) == 4 }, 5*time.Second, 500*time.Millisecond) }) diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index e84bdcbe68613..cbfbda3b7cfa8 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -149,7 +149,7 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&c.ShutdownDelay, "shutdown-delay", 0, "How long to wait between SIGTERM and shutdown. After receiving SIGTERM, Loki will report 503 Service Unavailable status via /ready endpoint.") - f.StringVar(&c.MetricsNamespace, "metrics-namespace", "cortex", "Namespace of the metrics that in previous releases had cortex as namespace.") + f.StringVar(&c.MetricsNamespace, "metrics-namespace", constants.Loki, "Namespace of the metrics that in previous releases had cortex as namespace. This setting is deprecated and will be removed in the next minor release.") c.registerServerFlagsWithChangedDefaultValues(f) c.Common.RegisterFlags(f) From 59a2afaed9f089f7f20412147e144f1160c62899 Mon Sep 17 00:00:00 2001 From: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> Date: Fri, 3 Nov 2023 10:22:16 +0000 Subject: [PATCH 09/14] Update changed metrics in docs (#11109) **What this PR does / why we need it**: After the default metrics namespace is changed to "loki" these metrics mentioned in the docs have to change as well. Should be merged at the same time as https://github.com/grafana/loki/pull/11110 **Checklist** - [X] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15) Signed-off-by: Michel Hollands --- docs/sources/operations/autoscaling_queriers.md | 10 +++++----- docs/sources/operations/observability.md | 2 +- docs/sources/operations/scalability.md | 2 +- docs/sources/operations/shuffle-sharding/_index.md | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/sources/operations/autoscaling_queriers.md b/docs/sources/operations/autoscaling_queriers.md index 06ae330f971b4..52bcc60e00a0f 100644 --- a/docs/sources/operations/autoscaling_queriers.md +++ b/docs/sources/operations/autoscaling_queriers.md @@ -27,14 +27,14 @@ Because queriers pull queries from the query-scheduler queue and process them on - The scheduler queue size. - The queries running in the queriers. -The query-scheduler exposes the `cortex_query_scheduler_inflight_requests` metric. +The query-scheduler exposes the `loki_query_scheduler_inflight_requests` metric. It tracks the sum of queued queries plus the number of queries currently running in the querier workers. The following query is useful to scale queriers based on the inflight requests. ```promql sum( max_over_time( - cortex_query_scheduler_inflight_requests{namespace="loki-cluster", quantile=""}[] + loki_query_scheduler_inflight_requests{namespace="loki-cluster", quantile=""}[] ) ) ``` @@ -66,7 +66,7 @@ So if we use 6 workers per querier, we will use the following query: ```promql clamp_min(ceil( avg( - avg_over_time(cortex_query_scheduler_inflight_requests{namespace="loki-cluster", quantile="0.75"}[7d]) + avg_over_time(loki_query_scheduler_inflight_requests{namespace="loki-cluster", quantile="0.75"}[7d]) ) / scalar(floor(vector(6 * 0.75))) ), 1) ``` @@ -79,7 +79,7 @@ The resulting query becomes: ```promql ceil( max( - max_over_time(cortex_query_scheduler_inflight_requests{namespace="loki-cluster", quantile="0.5"}[7d]) + max_over_time(loki_query_scheduler_inflight_requests{namespace="loki-cluster", quantile="0.5"}[7d]) ) / 6 ) ``` @@ -111,7 +111,7 @@ spec: triggers: - metadata: metricName: querier_autoscaling_metric - query: sum(max_over_time(cortex_query_scheduler_inflight_requests{namespace="loki-cluster", quantile="0.75"}[2m])) + query: sum(max_over_time(loki_query_scheduler_inflight_requests{namespace="loki-cluster", quantile="0.75"}[2m])) serverAddress: http://prometheus.default:9090/prometheus threshold: "4" type: prometheus diff --git a/docs/sources/operations/observability.md b/docs/sources/operations/observability.md index 58336dd4f7647..8f617bcf869dc 100644 --- a/docs/sources/operations/observability.md +++ b/docs/sources/operations/observability.md @@ -33,7 +33,7 @@ The Loki Ingesters expose the following metrics: | Metric Name | Metric Type | Description | | -------------------------------------------- | ----------- | --------------------------------------------------------------------------------------------------------- | -| `cortex_ingester_flush_queue_length` | Gauge | The total number of series pending in the flush queue. | +| `loki_ingester_flush_queue_length` | Gauge | The total number of series pending in the flush queue. | | `loki_chunk_store_index_entries_per_chunk` | Histogram | Number of index entries written to storage per chunk. | | `loki_ingester_memory_chunks` | Gauge | The total number of chunks in memory. | | `loki_ingester_memory_streams` | Gauge | The total number of streams in memory. | diff --git a/docs/sources/operations/scalability.md b/docs/sources/operations/scalability.md index 2de1278238546..ff8f1d06a0385 100644 --- a/docs/sources/operations/scalability.md +++ b/docs/sources/operations/scalability.md @@ -36,7 +36,7 @@ which informed the implementation._ By default, the `ruler` component embeds a query engine to evaluate rules. This generally works fine, except when rules are complex or have to process a large amount of data regularly. Poor performance of the `ruler` manifests as recording rules metrics -with gaps or missed alerts. This situation can be detected by alerting on the `cortex_prometheus_rule_group_iterations_missed_total` metric +with gaps or missed alerts. This situation can be detected by alerting on the `loki_prometheus_rule_group_iterations_missed_total` metric when it has a non-zero value. A solution to this problem is to externalize rule evaluation from the `ruler` process. The `ruler` embedded query engine diff --git a/docs/sources/operations/shuffle-sharding/_index.md b/docs/sources/operations/shuffle-sharding/_index.md index 4c80e48485ba5..3002b774ee542 100644 --- a/docs/sources/operations/shuffle-sharding/_index.md +++ b/docs/sources/operations/shuffle-sharding/_index.md @@ -86,9 +86,9 @@ The maximum number of queriers can be overridden on a per-tenant basis in the li These metrics reveal information relevant to shuffle sharding: -- the overall query-scheduler queue duration, `cortex_query_scheduler_queue_duration_seconds_*` +- the overall query-scheduler queue duration, `loki_query_scheduler_queue_duration_seconds_*` -- the query-scheduler queue length per tenant, `cortex_query_scheduler_queue_length` +- the query-scheduler queue length per tenant, `loki_query_scheduler_queue_length` - the query-scheduler queue duration per tenant can be found with this query: ``` From 6d822928b48be8a47f01275e41a7657e03ab8c0b Mon Sep 17 00:00:00 2001 From: Dylan Guedes Date: Fri, 3 Nov 2023 08:51:43 -0300 Subject: [PATCH 10/14] Jsonnet: Fix unit used by per-pod latency panel (#11104) **What this PR does / why we need it**: The per-pod latency panel should use "ms" as the unit, instead of the default (which is "short"). I'm also wrapping everything around "defaults", because that's how it is used everywhere. **Which issue(s) this PR fixes**: N/A --- .../dashboards/loki-reads.json | 34 +++--- .../dashboards/loki-reads.json | 102 ++++++++++-------- .../dashboards/loki-reads.libsonnet | 15 +-- 3 files changed, 89 insertions(+), 62 deletions(-) diff --git a/production/loki-mixin-compiled-ssd/dashboards/loki-reads.json b/production/loki-mixin-compiled-ssd/dashboards/loki-reads.json index 0499d3746bbae..8c0e5e8f63b77 100644 --- a/production/loki-mixin-compiled-ssd/dashboards/loki-reads.json +++ b/production/loki-mixin-compiled-ssd/dashboards/loki-reads.json @@ -209,14 +209,17 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } } - } + }, + "unit": "ms" }, "fill": 1, "id": 3, @@ -482,14 +485,17 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } } - } + }, + "unit": "ms" }, "fill": 1, "id": 6, diff --git a/production/loki-mixin-compiled/dashboards/loki-reads.json b/production/loki-mixin-compiled/dashboards/loki-reads.json index 49e8a5eb8fd67..54bdb2870da1c 100644 --- a/production/loki-mixin-compiled/dashboards/loki-reads.json +++ b/production/loki-mixin-compiled/dashboards/loki-reads.json @@ -209,14 +209,17 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } } - } + }, + "unit": "ms" }, "fill": 1, "id": 3, @@ -482,14 +485,17 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } } - } + }, + "unit": "ms" }, "fill": 1, "id": 6, @@ -755,14 +761,17 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } } - } + }, + "unit": "ms" }, "fill": 1, "id": 9, @@ -1028,14 +1037,17 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } } - } + }, + "unit": "ms" }, "fill": 1, "id": 12, @@ -1301,14 +1313,17 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } } - } + }, + "unit": "ms" }, "fill": 1, "id": 15, @@ -1574,14 +1589,17 @@ "dashes": false, "datasource": "$datasource", "fieldConfig": { - "custom": { - "fillOpacity": 50, - "showPoints": "never", - "stacking": { - "group": "A", - "mode": "normal" + "defaults": { + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } } - } + }, + "unit": "ms" }, "fill": 1, "id": 18, diff --git a/production/loki-mixin/dashboards/loki-reads.libsonnet b/production/loki-mixin/dashboards/loki-reads.libsonnet index 136a041e41fda..a1d68a15637c1 100644 --- a/production/loki-mixin/dashboards/loki-reads.libsonnet +++ b/production/loki-mixin/dashboards/loki-reads.libsonnet @@ -28,14 +28,17 @@ local utils = import 'mixin-utils/utils.libsonnet'; }, ], fieldConfig+: { - custom+: { - fillOpacity: 50, - showPoints: 'never', - stacking: { - group: 'A', - mode: 'normal', + defaults+: { + custom+: { + fillOpacity: 50, + showPoints: 'never', + stacking: { + group: 'A', + mode: 'normal', + }, }, }, + unit: 'ms', }, }, From c6ddd7044a7910addb2fb947cda43e9b3d5a070b Mon Sep 17 00:00:00 2001 From: Robert Jacob Date: Fri, 3 Nov 2023 12:53:06 +0100 Subject: [PATCH 11/14] operator: Disable update of repo-internal dependencies (#11023) --- .github/renovate.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/renovate.json b/.github/renovate.json index e775e0697c5b5..e5a99e5c55f4f 100644 --- a/.github/renovate.json +++ b/.github/renovate.json @@ -11,6 +11,14 @@ "matchBaseBranches": ["release-2.9.x","release-2.8.x"], "packagePatterns": ["*"], "enabled": false + }, + { + "matchFileNames": [ "operator/go.mod" ], + "matchPackageNames": [ + "github.com/grafana/loki", + "github.com/grafana/loki/operator/apis/loki" + ], + "enabled": false } ], "vulnerabilityAlerts": { From c31668ebe663ad5b48f909be5e9a7f7f4f4778c6 Mon Sep 17 00:00:00 2001 From: Yannick te Kulve <738464+YannickTeKulve@users.noreply.github.com> Date: Fri, 3 Nov 2023 16:09:36 +0100 Subject: [PATCH 12/14] Helm: Add support for dnsmasq (#11013) **What this PR does / why we need it**: When a dns in k8s is rebooted, nginx will not notice this. Resulting in failing dnslookups and failing writes to the write pods. A more in depth explanation is written here: https://gist.github.com/joemiller/68ab3f7a7a08e4a9d5ad5d023cb14fc2 It's already fixed in the loki-distributed helm charts https://github.com/grafana/helm-charts/pull/1272 This pr simply ports the necessary fixes to the loki helm chart. **Which issue(s) this PR fixes**: Fixes none I could find **Special notes for your reviewer**: **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [x] Documentation added - [ ] Tests updated - [x] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [x] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. --------- Signed-off-by: Yannick te Kulve <738464+YannickTeKulve@users.noreply.github.com> Co-authored-by: J Stickler Co-authored-by: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> --- docs/sources/setup/install/helm/reference.md | 18 ++++++++++++++++++ production/helm/loki/CHANGELOG.md | 6 ++++-- production/helm/loki/Chart.yaml | 2 +- production/helm/loki/README.md | 2 +- production/helm/loki/templates/_helpers.tpl | 4 ++++ .../templates/gateway/deployment-gateway.yaml | 3 +++ production/helm/loki/values.yaml | 4 ++++ 7 files changed, 35 insertions(+), 4 deletions(-) diff --git a/docs/sources/setup/install/helm/reference.md b/docs/sources/setup/install/helm/reference.md index a3516706f5949..2155ae9afc66f 100644 --- a/docs/sources/setup/install/helm/reference.md +++ b/docs/sources/setup/install/helm/reference.md @@ -1046,6 +1046,15 @@ true
 []
 
+ + + + gateway.extraContainers + list + Containers to add to the gateway pods +
+[]
+
@@ -1260,6 +1269,15 @@ See values.yaml
 "main '$remote_addr - $remote_user [$time_local]  $status '\n        '\"$request\" $body_bytes_sent \"$http_referer\" '\n        '\"$http_user_agent\" \"$http_x_forwarded_for\"';"
 
+ + + + gateway.nginxConfig.resolver + string + Allows overriding the DNS resolver address nginx will use. +
+""
+
diff --git a/production/helm/loki/CHANGELOG.md b/production/helm/loki/CHANGELOG.md index da96d49237a8e..7f45b3155661c 100644 --- a/production/helm/loki/CHANGELOG.md +++ b/production/helm/loki/CHANGELOG.md @@ -13,16 +13,18 @@ Entries should include a reference to the pull request that introduced the chang [//]: # ( : do not remove this line. This locator is used by the CI pipeline to automatically create a changelog entry for each new Loki release. Add other chart versions and respective changelog entries bellow this line.) +## 5.36.2 + +- [BUGFIX] Add support to run dnsmasq + ## 5.36.1 - [FEATURE] Allow topology spread constraints for Loki - ## 5.36.0 - [CHANGE] Changed version of Loki to 2.9.2 - ## 5.35.0 - [FEATURE] Add support for configuring distributor. diff --git a/production/helm/loki/Chart.yaml b/production/helm/loki/Chart.yaml index 992cd3cd02289..bebda1a445d3a 100644 --- a/production/helm/loki/Chart.yaml +++ b/production/helm/loki/Chart.yaml @@ -3,7 +3,7 @@ name: loki description: Helm chart for Grafana Loki in simple, scalable mode type: application appVersion: 2.9.2 -version: 5.36.1 +version: 5.36.2 home: https://grafana.github.io/helm-charts sources: - https://github.com/grafana/loki diff --git a/production/helm/loki/README.md b/production/helm/loki/README.md index f917146e3dd26..a83e55f71159d 100644 --- a/production/helm/loki/README.md +++ b/production/helm/loki/README.md @@ -1,6 +1,6 @@ # loki -![Version: 5.36.1](https://img.shields.io/badge/Version-5.36.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.9.2](https://img.shields.io/badge/AppVersion-2.9.2-informational?style=flat-square) +![Version: 5.36.2](https://img.shields.io/badge/Version-5.36.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.9.2](https://img.shields.io/badge/AppVersion-2.9.2-informational?style=flat-square) Helm chart for Grafana Loki in simple, scalable mode diff --git a/production/helm/loki/templates/_helpers.tpl b/production/helm/loki/templates/_helpers.tpl index 9d3eedf5a2df8..964a5a6dcd97b 100644 --- a/production/helm/loki/templates/_helpers.tpl +++ b/production/helm/loki/templates/_helpers.tpl @@ -604,7 +604,11 @@ http { sendfile on; tcp_nopush on; + {{- if .Values.gateway.nginxConfig.resolver }} + resolver {{ .Values.gateway.nginxConfig.resolver }}; + {{- else }} resolver {{ .Values.global.dnsService }}.{{ .Values.global.dnsNamespace }}.svc.{{ .Values.global.clusterDomain }}.; + {{- end }} {{- with .Values.gateway.nginxConfig.httpSnippet }} {{- tpl . $ | nindent 2 }} diff --git a/production/helm/loki/templates/gateway/deployment-gateway.yaml b/production/helm/loki/templates/gateway/deployment-gateway.yaml index 5605f9287d20e..4ffa0c935b0a4 100644 --- a/production/helm/loki/templates/gateway/deployment-gateway.yaml +++ b/production/helm/loki/templates/gateway/deployment-gateway.yaml @@ -96,6 +96,9 @@ spec: {{- end }} resources: {{- toYaml .Values.gateway.resources | nindent 12 }} + {{- if .Values.gateway.extraContainers }} + {{- toYaml .Values.gateway.extraContainers | nindent 8}} + {{- end }} {{- with .Values.gateway.affinity }} affinity: {{- tpl . $ | nindent 8 }} diff --git a/production/helm/loki/values.yaml b/production/helm/loki/values.yaml index c09791a84ac7e..92b7069af39f2 100644 --- a/production/helm/loki/values.yaml +++ b/production/helm/loki/values.yaml @@ -1324,6 +1324,8 @@ gateway: allowPrivilegeEscalation: false # -- Resource requests and limits for the gateway resources: {} + # -- Containers to add to the gateway pods + extraContainers: [] # -- Grace period to allow the gateway to shutdown before it is killed terminationGracePeriodSeconds: 30 # -- Affinity for gateway pods. Passed through `tpl` and, thus, to be configured as string @@ -1427,6 +1429,8 @@ gateway: customWriteUrl: null # -- Override Backend URL customBackendUrl: null + # -- Allows overriding the DNS resolver address nginx will use. + resolver: "" # -- Config file contents for Nginx. Passed through the `tpl` function to allow templating # @default -- See values.yaml file: | From 91ec2fc2bb9f29744e8496bd22a3aa208a954240 Mon Sep 17 00:00:00 2001 From: Trevor Whitney Date: Fri, 3 Nov 2023 10:57:43 -0600 Subject: [PATCH 13/14] add tests for log volume middleware (#11126) **What this PR does / why we need it**: This adds some tests around the volume middleware, which were very useful in understanding a recent issue --- pkg/querier/queryrange/volume_test.go | 79 +++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/pkg/querier/queryrange/volume_test.go b/pkg/querier/queryrange/volume_test.go index 6e6d9fc6ca1ad..5cbce28ac9e95 100644 --- a/pkg/querier/queryrange/volume_test.go +++ b/pkg/querier/queryrange/volume_test.go @@ -1,15 +1,19 @@ package queryrange import ( + "context" "testing" "time" "github.com/stretchr/testify/require" + "github.com/grafana/dskit/user" + "github.com/grafana/loki/pkg/loghttp" "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/push" "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase" + "github.com/grafana/loki/pkg/storage/stores/index/seriesvolume" ) const forRangeQuery = false @@ -250,3 +254,78 @@ func Test_toPrometheusResponse(t *testing.T) { }, promResp.Response.Data) }) } + +func Test_VolumeMiddleware(t *testing.T) { + makeVolumeRequest := func(req *logproto.VolumeRequest) *queryrangebase.PrometheusResponse { + nextHandler := queryrangebase.HandlerFunc(func(ctx context.Context, r queryrangebase.Request) (queryrangebase.Response, error) { + return &VolumeResponse{ + Response: &logproto.VolumeResponse{ + Volumes: []logproto.Volume{ + { + Name: `{foo="bar"}`, + Volume: 42, + }, + }, + }, + }, nil + }) + + m := NewVolumeMiddleware() + wrapped := m.Wrap(nextHandler) + + ctx := user.InjectOrgID(context.Background(), "fake") + resp, err := wrapped.Do(ctx, req) + require.NoError(t, err) + require.NotNil(t, resp) + + return resp.(*LokiPromResponse).Response + } + + t.Run("it breaks query up into subqueries according to step", func(t *testing.T) { + volumeReq := &logproto.VolumeRequest{ + From: 10, + Through: 20, + Matchers: `{foo="bar"}`, + Limit: seriesvolume.DefaultLimit, + Step: 1, + AggregateBy: seriesvolume.Series, + } + promResp := makeVolumeRequest(volumeReq) + + require.Equal(t, promResp.Data.ResultType, loghttp.ResultTypeMatrix) + require.Equal(t, len(promResp.Data.Result), 1) + require.Equal(t, len(promResp.Data.Result[0].Samples), 10) + }) + + t.Run("only returns one datapoint when step is > than time range", func(t *testing.T) { + volumeReq := &logproto.VolumeRequest{ + From: 10, + Through: 20, + Matchers: `{foo="bar"}`, + Limit: seriesvolume.DefaultLimit, + Step: 20, + AggregateBy: seriesvolume.Series, + } + promResp := makeVolumeRequest(volumeReq) + + require.Equal(t, promResp.Data.ResultType, loghttp.ResultTypeVector) + require.Equal(t, len(promResp.Data.Result), 1) + require.Equal(t, len(promResp.Data.Result[0].Samples), 1) + }) + + t.Run("when requested time range is not evenly divisible by step, an extra datpoint is added", func(t *testing.T) { + volumeReq := &logproto.VolumeRequest{ + From: 1698830441000, // 2023-11-01T09:20:41Z + Through: 1698830498000, // 2023-11-01T09:21:38Z, difference is 57s + Matchers: `{foo="bar"}`, + Limit: seriesvolume.DefaultLimit, + Step: 60000, // 60s + AggregateBy: seriesvolume.Series, + } + promResp := makeVolumeRequest(volumeReq) + + require.Equal(t, promResp.Data.ResultType, loghttp.ResultTypeMatrix) + require.Equal(t, 1, len(promResp.Data.Result)) + require.Equal(t, 2, len(promResp.Data.Result[0].Samples)) + }) +} From 060600c104905be8bd6c503c4600bbef226f9c75 Mon Sep 17 00:00:00 2001 From: Trevor Whitney Date: Fri, 3 Nov 2023 12:03:40 -0600 Subject: [PATCH 14/14] add vulnerability scanning github action (#11096) Adds a GitHub action that will run `snyk` and `trivy` scans against PRs and comment on the PR if a high or critical vulnerability is found --- .github/workflows/vulnerability-scan.yml | 90 ++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 .github/workflows/vulnerability-scan.yml diff --git a/.github/workflows/vulnerability-scan.yml b/.github/workflows/vulnerability-scan.yml new file mode 100644 index 0000000000000..083704778fc1a --- /dev/null +++ b/.github/workflows/vulnerability-scan.yml @@ -0,0 +1,90 @@ +name: PR Vulnerability Scan +on: pull_request + +permissions: + pull-requests: write + contents: write + +jobs: + snyk: + name: Snyk Scan + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@master + - name: Run Snyk to check for vulnerabilities + uses: snyk/actions/golang@master + continue-on-error: true # To make sure that PR comment is made + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + with: + command: test + args: --severity-threshold=high --json-file-output=snyk.json + + - name: Prepare Snyk message + run: | + echo "Snyk scan found the following vulnerabilities:" > snyk.txt + + - name: Format Snyk Message + uses: sergeysova/jq-action@v2 + continue-on-error: true + with: + cmd: jq -r '.vulnerabilities[] | "* **\(.severity)** - [\(.identifiers.CVE[0])] \(.title) in `\(.moduleName)` v\(.version). Fixed in \(.fixedIn)"' snyk.json >> snyk.txt + + - name: Determine whether to comment + continue-on-error: true + id: should-comment + run: | + if [[ $(wc -l < snyk.txt) -gt 1 ]]; then exit 0; fi + exit 1 + + - name: Comment on PR with Snyk scan results + uses: mshick/add-pr-comment@v2 + if: ${{ steps.should-comment.outcome == 'success' }} + with: + message-id: snyk-${{ github.event.number }} + message-path: snyk.txt + trivy: + name: Trivy Scan + runs-on: ubuntu-20.04 + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Build Loki Image + run: | + IMAGE_TAG="$(./tools/image-tag)" + make loki-image + echo "IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_ENV + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + with: + image-ref: "docker.io/grafana/loki:${{ env.IMAGE_TAG }}" + format: "json" + output: "trivy.json" + severity: "CRITICAL,HIGH" + + - name: Prepare Trivy Message + run: | + echo "Trivy scan found the following vulnerabilities:" > trivy.txt + + - name: Format Trivy Message + uses: sergeysova/jq-action@v2 + continue-on-error: true + with: + cmd: jq -r '.Results[] | select(.Vulnerabilities != null) | .Vulnerabilities[] | "* **\(.Severity)** [\(.Title)](\(.PrimaryURL)) in `\(.PkgName)` v\(.InstalledVersion). Fixed in v\(.FixedVersion)"' trivy.json >> trivy.txt + + - name: Determine whether to comment + continue-on-error: true + id: should-comment + run: | + if [[ $(wc -l < trivy.txt) -gt 1 ]]; then exit 0; fi + exit 1 + + - name: Comment on PR with Trivy scan results + uses: mshick/add-pr-comment@v2 + if: ${{ steps.should-comment.outcome == 'success' }} + with: + message-id: trivy-${{ github.event.number }} + message-path: trivy.txt