From 5991c3d2ef20310a9f27bd4e9e0b28bd089c14eb Mon Sep 17 00:00:00 2001 From: "Grot (@grafanabot)" <43478413+grafanabot@users.noreply.github.com> Date: Tue, 16 Jan 2024 16:27:40 -0600 Subject: [PATCH] [k184] fix: align semantics of metric and log query label extraction (#11668) Backport 9759c130fe5e5b52de8afb5d86195e5188a4f37e from #11587 --- **What this PR does / why we need it**: Align the label parsing logic of metric and log queries to both only extract the first instance of a label when the same label is requested multiple times. **Which issue(s) this PR fixes**: Fixes #11647 --------- Co-authored-by: Trevor Whitney --- CHANGELOG.md | 1 + pkg/logql/log/labels.go | 4 +- pkg/logql/log/parser.go | 5 +- pkg/logql/log/parser_hints.go | 35 +++++----- pkg/logql/log/parser_hints_test.go | 20 ++++-- pkg/logql/log/parser_test.go | 108 ++++++++++++++--------------- 6 files changed, 94 insertions(+), 79 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f1d251dd97ca..5f48290cc5612 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,6 +54,7 @@ * [11601](https://github.com/grafana/loki/pull/11601) **dannykopping** Ruler: Fixed a panic that can be caused by concurrent read-write access of tenant configs when there are a large amount of rules. * [11606](https://github.com/grafana/loki/pull/11606) **dannykopping** Fixed regression adding newlines to HTTP error response bodies which may break client integrations. * [11657](https://github.com/grafana/loki/pull/11657) **ashwanthgoli** Log results cache: compose empty response based on the request being served to avoid returning incorrect limit or direction. +* [11587](https://github.com/grafana/loki/pull/11587) **trevorwhitney** Fix semantics of label parsing logic of metrics and logs queries. Both only parse the first label if multiple extractions into the same label are requested. ##### Changes diff --git a/pkg/logql/log/labels.go b/pkg/logql/log/labels.go index 2144abdf5d7e3..ea4570e652df6 100644 --- a/pkg/logql/log/labels.go +++ b/pkg/logql/log/labels.go @@ -157,7 +157,7 @@ type LabelsBuilder struct { // NewBaseLabelsBuilderWithGrouping creates a new base labels builder with grouping to compute results. func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints ParserHint, without, noLabels bool) *BaseLabelsBuilder { if parserKeyHints == nil { - parserKeyHints = noParserHints + parserKeyHints = NoParserHints() } const labelsCapacity = 16 @@ -179,7 +179,7 @@ func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints ParserHint // NewBaseLabelsBuilder creates a new base labels builder. func NewBaseLabelsBuilder() *BaseLabelsBuilder { - return NewBaseLabelsBuilderWithGrouping(nil, noParserHints, false, false) + return NewBaseLabelsBuilderWithGrouping(nil, NoParserHints(), false, false) } // ForLabels creates a labels builder for a given labels set as base. diff --git a/pkg/logql/log/parser.go b/pkg/logql/log/parser.go index be059a2831560..c03e7c91cb960 100644 --- a/pkg/logql/log/parser.go +++ b/pkg/logql/log/parser.go @@ -493,11 +493,13 @@ func (l *LogfmtExpressionParser) Process(_ int64, line []byte, lbs *LabelsBuilde return "", false } - if !lbs.ParserLabelHints().ShouldExtract(sanitized) { + _, alwaysExtract := keys[sanitized] + if !alwaysExtract && !lbs.ParserLabelHints().ShouldExtract(sanitized) { return "", false } return sanitized, true }) + if !ok { continue } @@ -530,6 +532,7 @@ func (l *LogfmtExpressionParser) Process(_ int64, line []byte, lbs *LabelsBuilde } } } + if l.strict && l.dec.Err() != nil { addErrLabel(errLogfmt, l.dec.Err(), lbs) return line, true diff --git a/pkg/logql/log/parser_hints.go b/pkg/logql/log/parser_hints.go index cdb61015dd4dd..3fd4cff2b3323 100644 --- a/pkg/logql/log/parser_hints.go +++ b/pkg/logql/log/parser_hints.go @@ -6,7 +6,9 @@ import ( "github.com/grafana/loki/pkg/logqlmodel" ) -var noParserHints = &Hints{} +func NoParserHints() ParserHint { + return &Hints{} +} // ParserHint are hints given to LogQL parsers. // This is specially useful for parser that extract implicitly all possible label keys. @@ -58,10 +60,6 @@ type Hints struct { } func (p *Hints) ShouldExtract(key string) bool { - if len(p.requiredLabels) == 0 { - return true - } - for _, l := range p.extracted { if l == key { return false @@ -74,7 +72,7 @@ func (p *Hints) ShouldExtract(key string) bool { } } - return false + return len(p.requiredLabels) == 0 } func (p *Hints) ShouldExtractPrefix(prefix string) bool { @@ -95,19 +93,25 @@ func (p *Hints) NoLabels() bool { } func (p *Hints) RecordExtracted(key string) { - for _, l := range p.requiredLabels { - if l == key { - p.extracted = append(p.extracted, key) - return - } - } + p.extracted = append(p.extracted, key) } func (p *Hints) AllRequiredExtracted() bool { - if len(p.requiredLabels) == 0 { + if len(p.requiredLabels) == 0 || len(p.extracted) < len(p.requiredLabels) { return false } - return len(p.extracted) == len(p.requiredLabels) + + found := 0 + for _, l := range p.requiredLabels { + for _, e := range p.extracted { + if l == e { + found++ + break + } + } + } + + return len(p.requiredLabels) == found } func (p *Hints) Reset() { @@ -172,9 +176,6 @@ func NewParserHint(requiredLabelNames, groups []string, without, noLabels bool, return ph } - ph.requiredLabels = hints - ph.shouldPreserveError = containsError(hints) - return &Hints{requiredLabels: hints, extracted: extracted, shouldPreserveError: containsError(hints)} } diff --git a/pkg/logql/log/parser_hints_test.go b/pkg/logql/log/parser_hints_test.go index ac232bfd871b4..42d0134bc1d8f 100644 --- a/pkg/logql/log/parser_hints_test.go +++ b/pkg/logql/log/parser_hints_test.go @@ -28,7 +28,10 @@ var ( "response": { "status": 204, "latency_seconds": "30.001" - } + }, + "message": { + "message": "foo", + } }`) packedLine = []byte(`{ @@ -58,14 +61,14 @@ func Test_ParserHints(t *testing.T) { jsonLine, true, 1.0, - `{app="nginx", cluster="us-central-west", cluster_extracted="us-east-west", protocol="HTTP/2.0", remote_user="foo", request_host="foo.grafana.net", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_latency_seconds="30.001", response_status="204", upstream_addr="10.0.0.1:80"}`, + `{app="nginx", cluster="us-central-west", cluster_extracted="us-east-west", message_message="foo", protocol="HTTP/2.0", remote_user="foo", request_host="foo.grafana.net", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_latency_seconds="30.001", response_status="204", upstream_addr="10.0.0.1:80"}`, }, { `sum without (request_host,app,cluster) (rate({app="nginx"} | json | __error__="" | response_status = 204 [1m]))`, jsonLine, true, 1.0, - `{cluster_extracted="us-east-west", protocol="HTTP/2.0", remote_user="foo", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_latency_seconds="30.001", response_status="204", upstream_addr="10.0.0.1:80"}`, + `{cluster_extracted="us-east-west", message_message="foo", protocol="HTTP/2.0", remote_user="foo", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_latency_seconds="30.001", response_status="204", upstream_addr="10.0.0.1:80"}`, }, { `sum by (request_host,app) (rate({app="nginx"} | json | __error__="" | response_status = 204 [1m]))`, @@ -114,14 +117,14 @@ func Test_ParserHints(t *testing.T) { jsonLine, true, 30.001, - `{app="nginx", cluster="us-central-west", cluster_extracted="us-east-west", protocol="HTTP/2.0", remote_user="foo", request_host="foo.grafana.net", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_status="204", upstream_addr="10.0.0.1:80"}`, + `{app="nginx", cluster="us-central-west", cluster_extracted="us-east-west", message_message="foo", protocol="HTTP/2.0", remote_user="foo", request_host="foo.grafana.net", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_status="204", upstream_addr="10.0.0.1:80"}`, }, { `sum without (request_host,app,cluster)(rate({app="nginx"} | json | response_status = 204 | unwrap response_latency_seconds [1m]))`, jsonLine, true, 30.001, - `{cluster_extracted="us-east-west", protocol="HTTP/2.0", remote_user="foo", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_status="204", upstream_addr="10.0.0.1:80"}`, + `{cluster_extracted="us-east-west", message_message="foo", protocol="HTTP/2.0", remote_user="foo", request_method="POST", request_size="101", request_time="30.001", request_uri="/rpc/v2/stage", response_status="204", upstream_addr="10.0.0.1:80"}`, }, { `sum(rate({app="nginx"} | logfmt | org_id=3677 | unwrap Ingester_TotalReached[1m]))`, @@ -214,6 +217,13 @@ func Test_ParserHints(t *testing.T) { 0, ``, }, + { + `sum by (message_message,app)(count_over_time({app="nginx"} | json | response_status = 204 and remote_user = "foo"[1m]))`, + jsonLine, + true, + 1, + `{app="nginx", message_message="foo"}`, + }, } { tt := tt t.Run(tt.expr, func(t *testing.T) { diff --git a/pkg/logql/log/parser_test.go b/pkg/logql/log/parser_test.go index 246dbed499c9e..bd57603ab8084 100644 --- a/pkg/logql/log/parser_test.go +++ b/pkg/logql/log/parser_test.go @@ -28,7 +28,7 @@ func Test_jsonParser_Parse(t *testing.T) { "pod_uuid", "foo", "pod_deployment_ref", "foobar", ), - noParserHints, + NoParserHints(), }, { "numeric", @@ -37,7 +37,7 @@ func Test_jsonParser_Parse(t *testing.T) { labels.FromStrings("counter", "1", "price__net_", "5.56909", ), - noParserHints, + NoParserHints(), }, { "escaped", @@ -47,7 +47,7 @@ func Test_jsonParser_Parse(t *testing.T) { "price__net_", "5.56909", "foo", `foo\"bar`, ), - noParserHints, + NoParserHints(), }, { "utf8 error rune", @@ -57,21 +57,21 @@ func Test_jsonParser_Parse(t *testing.T) { "price__net_", "5.56909", "foo", "", ), - noParserHints, + NoParserHints(), }, { "skip arrays", []byte(`{"counter":1, "price": {"net_":["10","20"]}}`), labels.EmptyLabels(), labels.FromStrings("counter", "1"), - noParserHints, + NoParserHints(), }, { "bad key replaced", []byte(`{"cou-nter":1}`), labels.EmptyLabels(), labels.FromStrings("cou_nter", "1"), - noParserHints, + NoParserHints(), }, { "errors", @@ -80,7 +80,7 @@ func Test_jsonParser_Parse(t *testing.T) { labels.FromStrings("__error__", "JSONParserErr", "__error_details__", "Value looks like object, but can't find closing '}' symbol", ), - noParserHints, + NoParserHints(), }, { "errors hints", @@ -103,7 +103,7 @@ func Test_jsonParser_Parse(t *testing.T) { "next_err", "false", "pod_deployment_ref", "foobar", ), - noParserHints, + NoParserHints(), }, } for _, tt := range tests { @@ -255,7 +255,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("app", "foo"), - noParserHints, + NoParserHints(), }, { "alternate syntax", @@ -265,7 +265,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("test", "value"), - noParserHints, + NoParserHints(), }, { "multiple fields", @@ -278,7 +278,7 @@ func TestJSONExpressionParser(t *testing.T) { labels.FromStrings("app", "foo", "namespace", "prod", ), - noParserHints, + NoParserHints(), }, { "utf8", @@ -288,7 +288,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("utf8", "value"), - noParserHints, + NoParserHints(), }, { "nested field", @@ -298,7 +298,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("uuid", "foo"), - noParserHints, + NoParserHints(), }, { "nested field alternate syntax", @@ -308,7 +308,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("uuid", "foo"), - noParserHints, + NoParserHints(), }, { "nested field alternate syntax 2", @@ -318,7 +318,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("uuid", "foo"), - noParserHints, + NoParserHints(), }, { "nested field alternate syntax 3", @@ -328,7 +328,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("uuid", "foo"), - noParserHints, + NoParserHints(), }, { "array element", @@ -338,7 +338,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("param", "1"), - noParserHints, + NoParserHints(), }, { "full array", @@ -348,7 +348,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("params", "[1,2,3]"), - noParserHints, + NoParserHints(), }, { "full object", @@ -358,7 +358,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("deployment", `{"ref":"foobar", "params": [1,2,3]}`), - noParserHints, + NoParserHints(), }, { "expression matching nothing", @@ -368,7 +368,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("nope", ""), - noParserHints, + NoParserHints(), }, { "null field", @@ -379,7 +379,7 @@ func TestJSONExpressionParser(t *testing.T) { labels.EmptyLabels(), labels.FromStrings("nf", ""), // null is coerced to an empty string - noParserHints, + NoParserHints(), }, { "boolean field", @@ -389,7 +389,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("bool", `false`), - noParserHints, + NoParserHints(), }, { "label override", @@ -401,7 +401,7 @@ func TestJSONExpressionParser(t *testing.T) { labels.FromStrings("uuid", "bar", "uuid_extracted", "foo", ), - noParserHints, + NoParserHints(), }, { "non-matching expression", @@ -413,7 +413,7 @@ func TestJSONExpressionParser(t *testing.T) { labels.FromStrings("uuid", "bar", "request_size", "", ), - noParserHints, + NoParserHints(), }, { "empty line", @@ -423,7 +423,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.EmptyLabels(), labels.FromStrings("uuid", ""), - noParserHints, + NoParserHints(), }, { "existing labels are not affected", @@ -435,7 +435,7 @@ func TestJSONExpressionParser(t *testing.T) { labels.FromStrings("foo", "bar", "uuid", "", ), - noParserHints, + NoParserHints(), }, { "invalid JSON line", @@ -447,7 +447,7 @@ func TestJSONExpressionParser(t *testing.T) { labels.FromStrings("foo", "bar", logqlmodel.ErrorLabel, errJSON, ), - noParserHints, + NoParserHints(), }, { "invalid JSON line with hints", @@ -470,7 +470,7 @@ func TestJSONExpressionParser(t *testing.T) { }, labels.FromStrings("foo", "bar"), labels.FromStrings("foo", "bar"), - noParserHints, + NoParserHints(), }, { "nested escaped object", @@ -482,7 +482,7 @@ func TestJSONExpressionParser(t *testing.T) { labels.FromStrings("foo", "bar", "app", `{ "key": "value", "key2":"value2"}`, ), - noParserHints, + NoParserHints(), }, } for _, tt := range tests { @@ -746,7 +746,7 @@ func TestLogfmtParser_parse(t *testing.T) { "__error__", "LogfmtParserErr", "__error_details__", "logfmt syntax error at pos 8 : unexpected '='", ), - noParserHints, + NoParserHints(), }, { "not logfmt with hints", @@ -766,7 +766,7 @@ func TestLogfmtParser_parse(t *testing.T) { labels.EmptyLabels(), labels.FromStrings("buzz", "foo"), nil, - noParserHints, + NoParserHints(), }, { "key alone logfmt", @@ -775,7 +775,7 @@ func TestLogfmtParser_parse(t *testing.T) { labels.FromStrings("foo", "bar", "bar", "foo"), nil, - noParserHints, + NoParserHints(), }, { "quoted logfmt", @@ -785,7 +785,7 @@ func TestLogfmtParser_parse(t *testing.T) { "foobar", "foo bar", ), nil, - noParserHints, + NoParserHints(), }, { "escaped control chars in logfmt", @@ -795,7 +795,7 @@ func TestLogfmtParser_parse(t *testing.T) { "foobar", "foo\nbar\tbaz", ), nil, - noParserHints, + NoParserHints(), }, { "literal control chars in logfmt", @@ -805,7 +805,7 @@ func TestLogfmtParser_parse(t *testing.T) { "foobar", "foo\nbar\tbaz", ), nil, - noParserHints, + NoParserHints(), }, { "escaped slash logfmt", @@ -815,7 +815,7 @@ func TestLogfmtParser_parse(t *testing.T) { "foobar", `foo ba\r baz`, ), nil, - noParserHints, + NoParserHints(), }, { "literal newline and escaped slash logfmt", @@ -825,7 +825,7 @@ func TestLogfmtParser_parse(t *testing.T) { "foobar", "foo bar\nb\\az", ), nil, - noParserHints, + NoParserHints(), }, { "double property logfmt", @@ -836,7 +836,7 @@ func TestLogfmtParser_parse(t *testing.T) { "latency", "10ms", ), nil, - noParserHints, + NoParserHints(), }, { "duplicate from line property", @@ -846,7 +846,7 @@ func TestLogfmtParser_parse(t *testing.T) { "foobar", "10ms", ), nil, - noParserHints, + NoParserHints(), }, { "duplicate property", @@ -857,7 +857,7 @@ func TestLogfmtParser_parse(t *testing.T) { "foobar", "10ms", ), nil, - noParserHints, + NoParserHints(), }, { "invalid key names", @@ -869,7 +869,7 @@ func TestLogfmtParser_parse(t *testing.T) { "test_dash", "foo", ), nil, - noParserHints, + NoParserHints(), }, { "nil", @@ -877,7 +877,7 @@ func TestLogfmtParser_parse(t *testing.T) { labels.FromStrings("foo", "bar"), labels.FromStrings("foo", "bar"), nil, - noParserHints, + NoParserHints(), }, { "empty key", @@ -892,7 +892,7 @@ func TestLogfmtParser_parse(t *testing.T) { "__error__", "LogfmtParserErr", "__error_details__", "logfmt syntax error at pos 15 : unexpected '='", ), - noParserHints, + NoParserHints(), }, { "error rune in key", @@ -906,7 +906,7 @@ func TestLogfmtParser_parse(t *testing.T) { "__error__", "LogfmtParserErr", "__error_details__", "logfmt syntax error at pos 20 : invalid key", ), - noParserHints, + NoParserHints(), }, { "double quote in key", @@ -920,7 +920,7 @@ func TestLogfmtParser_parse(t *testing.T) { "__error__", "LogfmtParserErr", "__error_details__", `logfmt syntax error at pos 17 : unexpected '"'`, ), - noParserHints, + NoParserHints(), }, { "= in value", @@ -933,7 +933,7 @@ func TestLogfmtParser_parse(t *testing.T) { "__error__", "LogfmtParserErr", "__error_details__", `logfmt syntax error at pos 7 : unexpected '='`, ), - noParserHints, + NoParserHints(), }, } @@ -1200,7 +1200,7 @@ func Test_unpackParser_Parse(t *testing.T) { "cluster", "us-central1", ), []byte(`some message`), - noParserHints, + NoParserHints(), }, { "wrong json", @@ -1210,7 +1210,7 @@ func Test_unpackParser_Parse(t *testing.T) { "__error_details__", "expecting json object(6), but it is not", ), []byte(`"app":"foo","namespace":"prod","_entry":"some message","pod":{"uid":"1"}`), - noParserHints, + NoParserHints(), }, { "empty line", @@ -1218,7 +1218,7 @@ func Test_unpackParser_Parse(t *testing.T) { labels.FromStrings("cluster", "us-central1"), labels.FromStrings("cluster", "us-central1"), []byte(``), - noParserHints, + NoParserHints(), }, { "wrong json with hints", @@ -1240,7 +1240,7 @@ func Test_unpackParser_Parse(t *testing.T) { "cluster", "us-central1", ), []byte(`["foo","bar"]`), - noParserHints, + NoParserHints(), }, { "should rename", @@ -1254,7 +1254,7 @@ func Test_unpackParser_Parse(t *testing.T) { "cluster", "us-central1", ), []byte(`some message`), - noParserHints, + NoParserHints(), }, { "should not change log and labels if no packed entry", @@ -1266,7 +1266,7 @@ func Test_unpackParser_Parse(t *testing.T) { "cluster", "us-central1", ), []byte(`{"bar":1,"app":"foo","namespace":"prod","pod":{"uid":"1"}}`), - noParserHints, + NoParserHints(), }, { "non json with escaped quotes", @@ -1278,7 +1278,7 @@ func Test_unpackParser_Parse(t *testing.T) { "cluster", "us-central1", ), []byte(`I0303 17:49:45.976518 1526 kubelet_getters.go:178] "Pod status updated" pod="openshift-etcd/etcd-ip-10-0-150-50.us-east-2.compute.internal" status=Running`), - noParserHints, + NoParserHints(), }, { "invalid key names", @@ -1289,7 +1289,7 @@ func Test_unpackParser_Parse(t *testing.T) { "test_dash", "foo", ), []byte(`some message`), - noParserHints, + NoParserHints(), }, } for _, tt := range tests {