From bc5b69531834fa67bdaf9d1f81187d2af4f80e48 Mon Sep 17 00:00:00 2001 From: Ashwanth Date: Mon, 1 Jul 2024 13:10:45 +0530 Subject: [PATCH] fix(retry): fix retries when using protobuf encoding (#13316) (cherry picked from commit a457c5d171d5ffa0a7060c98a8bc48abd735911a) --- .../queryrange/queryrangebase/retry.go | 8 ++-- .../queryrange/queryrangebase/retry_test.go | 46 +++++++++++++++---- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/pkg/querier/queryrange/queryrangebase/retry.go b/pkg/querier/queryrange/queryrangebase/retry.go index f02b5d73cd6c4..a3f2693b1ba75 100644 --- a/pkg/querier/queryrange/queryrangebase/retry.go +++ b/pkg/querier/queryrange/queryrangebase/retry.go @@ -7,9 +7,10 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/backoff" - "github.com/grafana/dskit/httpgrpc" + "github.com/grafana/dskit/grpcutil" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "google.golang.org/grpc/codes" "github.com/grafana/loki/v3/pkg/util" util_log "github.com/grafana/loki/v3/pkg/util/log" @@ -88,9 +89,8 @@ func (r retry) Do(ctx context.Context, req Request) (Response, error) { return resp, nil } - // Retry if we get a HTTP 500 or a non-HTTP error. - httpResp, ok := httpgrpc.HTTPResponseFromError(err) - if !ok || httpResp.Code/100 == 5 { + // Retry if we get a HTTP 500 or an unknown error. + if code := grpcutil.ErrorToStatusCode(err); code == codes.Unknown || code/100 == 5 { lastErr = err level.Error(util_log.WithContext(ctx, r.log)).Log( "msg", "error processing request", diff --git a/pkg/querier/queryrange/queryrangebase/retry_test.go b/pkg/querier/queryrange/queryrangebase/retry_test.go index 7476fa21a06b2..f3a33b45c9d1e 100644 --- a/pkg/querier/queryrange/queryrangebase/retry_test.go +++ b/pkg/querier/queryrange/queryrangebase/retry_test.go @@ -8,9 +8,11 @@ import ( "testing" "github.com/go-kit/log" + "github.com/gogo/status" "github.com/grafana/dskit/httpgrpc" "github.com/stretchr/testify/require" "go.uber.org/atomic" + "google.golang.org/grpc/codes" "github.com/grafana/loki/v3/pkg/util/constants" ) @@ -19,10 +21,11 @@ func TestRetry(t *testing.T) { var try atomic.Int32 for _, tc := range []struct { - name string - handler Handler - resp Response - err error + name string + handler Handler + resp Response + err error + expectedCalls int }{ { name: "retry failures", @@ -32,21 +35,26 @@ func TestRetry(t *testing.T) { } return nil, fmt.Errorf("fail") }), - resp: &PrometheusResponse{Status: "Hello World"}, + resp: &PrometheusResponse{Status: "Hello World"}, + expectedCalls: 5, }, { name: "don't retry 400s", handler: HandlerFunc(func(_ context.Context, req Request) (Response, error) { + try.Inc() return nil, httpgrpc.Errorf(http.StatusBadRequest, "Bad Request") }), - err: httpgrpc.Errorf(http.StatusBadRequest, "Bad Request"), + err: httpgrpc.Errorf(http.StatusBadRequest, "Bad Request"), + expectedCalls: 1, }, { name: "retry 500s", handler: HandlerFunc(func(_ context.Context, req Request) (Response, error) { + try.Inc() return nil, httpgrpc.Errorf(http.StatusInternalServerError, "Internal Server Error") }), - err: httpgrpc.Errorf(http.StatusInternalServerError, "Internal Server Error"), + err: httpgrpc.Errorf(http.StatusInternalServerError, "Internal Server Error"), + expectedCalls: 5, }, { name: "last error", @@ -56,7 +64,28 @@ func TestRetry(t *testing.T) { } return nil, httpgrpc.Errorf(http.StatusInternalServerError, "Internal Server Error") }), - err: httpgrpc.Errorf(http.StatusBadRequest, "Bad Request"), + err: httpgrpc.Errorf(http.StatusBadRequest, "Bad Request"), + expectedCalls: 5, + }, + // previous entires in this table use httpgrpc.Errorf for generating the error which also populates the Details field with the marshalled http response. + // Next set of tests validate the retry behavior when using protobuf encoding where the status does not include the details. + { + name: "protobuf enc don't retry 400s", + handler: HandlerFunc(func(_ context.Context, req Request) (Response, error) { + try.Inc() + return nil, status.New(codes.Code(http.StatusBadRequest), "Bad Request").Err() + }), + err: status.New(codes.Code(http.StatusBadRequest), "Bad Request").Err(), + expectedCalls: 1, + }, + { + name: "protobuf enc retry 500s", + handler: HandlerFunc(func(_ context.Context, req Request) (Response, error) { + try.Inc() + return nil, status.New(codes.Code(http.StatusInternalServerError), "Internal Server Error").Err() + }), + err: status.New(codes.Code(http.StatusInternalServerError), "Internal Server Error").Err(), + expectedCalls: 5, }, } { t.Run(tc.name, func(t *testing.T) { @@ -68,6 +97,7 @@ func TestRetry(t *testing.T) { resp, err := h.Do(context.Background(), req) require.Equal(t, tc.err, err) require.Equal(t, tc.resp, resp) + require.EqualValues(t, tc.expectedCalls, try.Load()) }) } }