From 14d303ac6ce84e73bf4bfae8bba81b8dde7fb685 Mon Sep 17 00:00:00 2001 From: Karsten Jeschkies Date: Wed, 1 Nov 2023 11:51:57 +0100 Subject: [PATCH] Bring back RED metrics for querier when processing scheduler requests. (#11097) **What this PR does / why we need it**: A previous change removed the RED metrics for the querier. This adds them back as part of a middleware. **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15) --- pkg/loki/loki.go | 3 +- pkg/loki/modules.go | 11 ++++-- pkg/querier/queryrange/codec.go | 23 ++++++++++++ pkg/querier/queryrange/instrument.go | 54 ++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 pkg/querier/queryrange/instrument.go diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index 7b28d4b5ef41c..9d47120a6cf87 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -327,7 +327,8 @@ type Loki struct { HTTPAuthMiddleware middleware.Interface - Codec Codec + Codec Codec + Metrics *server.Metrics } // New makes a new Loki. diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go index db0a8b5a4d7d6..0059b4665842a 100644 --- a/pkg/loki/modules.go +++ b/pkg/loki/modules.go @@ -141,7 +141,9 @@ func (t *Loki) initServer() (services.Service, error) { // Loki handles signals on its own. DisableSignalHandling(&t.Cfg.Server) - serv, err := server.New(t.Cfg.Server) + + t.Metrics = server.NewServerMetrics(t.Cfg.Server) + serv, err := server.NewWithMetrics(t.Cfg.Server, t.Metrics) if err != nil { return nil, err } @@ -511,10 +513,15 @@ func (t *Loki) initQuerier() (services.Service, error) { t.Server.HTTP.Path("/loki/api/v1/tail").Methods("GET", "POST").Handler(httpMiddleware.Wrap(http.HandlerFunc(t.querierAPI.TailHandler))) t.Server.HTTP.Path("/api/prom/tail").Methods("GET", "POST").Handler(httpMiddleware.Wrap(http.HandlerFunc(t.querierAPI.TailHandler))) + internalHandler := queryrangebase.MergeMiddlewares( + serverutil.RecoveryMiddleware, + queryrange.Instrument{Metrics: t.Metrics}, + ).Wrap(handler) + svc, err := querier.InitWorkerService( querierWorkerServiceConfig, prometheus.DefaultRegisterer, - serverutil.RecoveryMiddleware.Wrap(handler), + internalHandler, t.Codec, ) if err != nil { diff --git a/pkg/querier/queryrange/codec.go b/pkg/querier/queryrange/codec.go index 05f1f4379922f..7a604780219cc 100644 --- a/pkg/querier/queryrange/codec.go +++ b/pkg/querier/queryrange/codec.go @@ -497,6 +497,10 @@ func (Codec) DecodeHTTPGrpcRequest(ctx context.Context, r *httpgrpc.HTTPRequest) // DecodeHTTPGrpcResponse decodes an httpgrp.HTTPResponse to queryrangebase.Response. func (Codec) DecodeHTTPGrpcResponse(r *httpgrpc.HTTPResponse, req queryrangebase.Request) (queryrangebase.Response, error) { + if r.Code/100 != 2 { + return nil, httpgrpc.Errorf(int(r.Code), string(r.Body)) + } + headers := make(http.Header) for _, header := range r.Headers { headers[header.Key] = header.Values @@ -702,6 +706,25 @@ func (c Codec) EncodeRequest(ctx context.Context, r queryrangebase.Request) (*ht } } +func (c Codec) Path(r queryrangebase.Request) string { + switch request := r.(type) { + case *LokiRequest: + return "loki/api/v1/query_range" + case *LokiSeriesRequest: + return "loki/api/v1/series" + case *LabelRequest: + return request.Path() // NOTE: this could be either /label or /label/{name}/values endpoint. So forward the original path as it is. + case *LokiInstantRequest: + return "/loki/api/v1/query" + case *logproto.IndexStatsRequest: + return "/loki/api/v1/index/stats" + case *logproto.VolumeRequest: + return "/loki/api/v1/index/volume_range" + } + + return "other" +} + func (p RequestProtobufCodec) EncodeRequest(ctx context.Context, r queryrangebase.Request) (*http.Request, error) { req, err := p.Codec.EncodeRequest(ctx, r) if err != nil { diff --git a/pkg/querier/queryrange/instrument.go b/pkg/querier/queryrange/instrument.go new file mode 100644 index 0000000000000..8c32fad4ca304 --- /dev/null +++ b/pkg/querier/queryrange/instrument.go @@ -0,0 +1,54 @@ +package queryrange + +import ( + "context" + "strconv" + "time" + + "github.com/grafana/dskit/httpgrpc" + "github.com/grafana/dskit/instrument" + "github.com/grafana/dskit/middleware" + + "github.com/grafana/dskit/server" + + "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase" +) + +const ( + method = "GET" +) + +type Instrument struct { + *server.Metrics +} + +var _ queryrangebase.Middleware = Instrument{} + +// Wrap implements the queryrangebase.Middleware +func (i Instrument) Wrap(next queryrangebase.Handler) queryrangebase.Handler { + return queryrangebase.HandlerFunc(func(ctx context.Context, r queryrangebase.Request) (queryrangebase.Response, error) { + route := DefaultCodec.Path(r) + route = middleware.MakeLabelValue(route) + inflight := i.InflightRequests.WithLabelValues(method, route) + inflight.Inc() + defer inflight.Dec() + + begin := time.Now() + result, err := next.Do(ctx, r) + i.observe(ctx, route, err, time.Since(begin)) + + return result, err + }) +} + +func (i Instrument) observe(ctx context.Context, route string, err error, duration time.Duration) { + respStatus := "200" + if err != nil { + if errResp, ok := httpgrpc.HTTPResponseFromError(err); ok { + respStatus = strconv.Itoa(int(errResp.Code)) + } else { + respStatus = "500" + } + } + instrument.ObserveWithExemplar(ctx, i.RequestDuration.WithLabelValues(method, route, respStatus, "false"), duration.Seconds()) +}