diff --git a/pkg/logql/log/pipeline.go b/pkg/logql/log/pipeline.go index fe4828f682a37..b1e8250c6dc39 100644 --- a/pkg/logql/log/pipeline.go +++ b/pkg/logql/log/pipeline.go @@ -2,7 +2,9 @@ package log import ( "context" + "strings" "sync" + "unicode/utf8" "unsafe" "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" @@ -227,6 +229,9 @@ func (p *streamPipeline) Process(ts int64, line []byte, structuredMetadata ...la for i, lb := range structuredMetadata { structuredMetadata[i].Name = prometheus.NormalizeLabel(lb.Name) + if strings.ContainsRune(structuredMetadata[i].Value, utf8.RuneError) { + structuredMetadata[i].Value = strings.Map(removeInvalidUtf, structuredMetadata[i].Value) + } } p.builder.Add(StructuredMetadataLabel, structuredMetadata...) diff --git a/pkg/logql/log/pipeline_test.go b/pkg/logql/log/pipeline_test.go index 8c11d0c198a10..52c92777f24bc 100644 --- a/pkg/logql/log/pipeline_test.go +++ b/pkg/logql/log/pipeline_test.go @@ -194,6 +194,17 @@ func TestPipelineWithStructuredMetadata(t *testing.T) { require.Equal(t, expectedLabelsResults.String(), lbr.String()) require.Equal(t, true, matches) + // test structured metadata with disallowed label values + withBadLabelValue := append(structuredMetadata, labels.Label{Name: "z_badValue", Value: "test�"}) + expectedStructuredMetadata = append(structuredMetadata, labels.Label{Name: "z_badValue", Value: "test "}) + expectedLabelsResults = append(lbs, expectedStructuredMetadata...) + + _, lbr, matches = p.ForStream(lbs).Process(0, []byte(""), withBadLabelValue...) + require.Equal(t, NewLabelsResult(expectedLabelsResults.String(), expectedLabelsResults.Hash(), lbs, expectedStructuredMetadata, labels.EmptyLabels()), lbr) + require.Equal(t, expectedLabelsResults.Hash(), lbr.Hash()) + require.Equal(t, expectedLabelsResults.String(), lbr.String()) + require.Equal(t, true, matches) + // Reset caches p.baseBuilder.del = []string{"foo", "bar"} p.baseBuilder.add = [numValidCategories]labels.Labels{ @@ -631,12 +642,18 @@ func Benchmark_Pipeline(b *testing.B) { resLine, resLbs, resMatches = sp.Process(0, line, labels.Label{Name: "valid_name", Value: "foo"}) } }) - b.Run("pipeline string with invalid structured metadata", func(b *testing.B) { + b.Run("pipeline string with invalid structured metadata name", func(b *testing.B) { b.ResetTimer() for n := 0; n < b.N; n++ { resLine, resLbs, resMatches = sp.Process(0, line, labels.Label{Name: "invalid-name", Value: "foo"}, labels.Label{Name: "other-invalid-name", Value: "foo"}) } }) + b.Run("pipeline string with invalid structured metadata value", func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + resLine, resLbs, resMatches = sp.Process(0, line, labels.Label{Name: "valid_name", Value: "foo"}, labels.Label{Name: "valid_name_2", Value: "foo�"}) + } + }) extractor, err := NewLineSampleExtractor(CountExtractor, stages, []string{"cluster", "level"}, false, false) require.NoError(b, err) diff --git a/pkg/logql/syntax/parser_test.go b/pkg/logql/syntax/parser_test.go index 7f257cd3c25d1..a18337e6a503f 100644 --- a/pkg/logql/syntax/parser_test.go +++ b/pkg/logql/syntax/parser_test.go @@ -3587,3 +3587,67 @@ func TestParseSampleExpr_String(t *testing.T) { require.Equal(t, "{cluster=\"beep\", namespace=\"boop\"} | msg=~`\\w.*`", expr.String()) }) } + +func newMustLineFormatter(tmpl string) *log.LineFormatter { + l, err := log.NewFormatter(tmpl) + if err != nil { + panic(err) + } + return l +} + +// ensure we can properly sanitize structured metadata from a pipeline that has added the SM as labels +func TestParseLabels_StructuredMetadata(t *testing.T) { + lbs := labels.FromStrings("foo", "bar") + structuredMetadata := labels.FromStrings("user", "asdf bob�") + expectedLabelsResults := append(lbs, labels.FromStrings("user", "asdf bob ")...) + + // regex check for the test near the invalid character + p := log.NewPipeline([]log.Stage{ + log.NewStringLabelFilter(labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")), + log.NewStringLabelFilter(labels.MustNewMatcher(labels.MatchRegexp, "user", "bob.*")), + newMustLineFormatter("lbs {{.foo}} {{.user}}"), + }) + + l, lbr, matches := p.ForStream(lbs).Process(0, []byte("line"), structuredMetadata...) + require.Equal(t, []byte("lbs bar asdf bob "), l) + require.Equal(t, log.NewLabelsResult(expectedLabelsResults.String(), expectedLabelsResults.Hash(), lbs, structuredMetadata, labels.EmptyLabels()), lbr) + require.Equal(t, expectedLabelsResults.Hash(), lbr.Hash()) + require.Equal(t, expectedLabelsResults.String(), lbr.String()) + require.Equal(t, true, matches) + _, err := ParseLabels(lbr.String()) + require.NoError(t, err) + + // equal check for the whole contents + p = log.NewPipeline([]log.Stage{ + log.NewStringLabelFilter(labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")), + log.NewStringLabelFilter(labels.MustNewMatcher(labels.MatchEqual, "user", "asdf bob ")), + newMustLineFormatter("lbs {{.foo}} {{.user}}"), + }) + l, lbr, matches = p.ForStream(lbs).Process(0, []byte("line"), structuredMetadata...) + require.Equal(t, []byte("lbs bar asdf bob "), l) + require.Equal(t, log.NewLabelsResult(expectedLabelsResults.String(), expectedLabelsResults.Hash(), lbs, structuredMetadata, labels.EmptyLabels()), lbr) + require.Equal(t, expectedLabelsResults.Hash(), lbr.Hash()) + require.Equal(t, expectedLabelsResults.String(), lbr.String()) + require.Equal(t, true, matches) + _, err = ParseLabels(lbr.String()) + require.NoError(t, err) + + // check that it works for line filter + f, err := log.NewFilter("asdf bob ", log.LineMatchEqual) + require.NoError(t, err) + equalLineFilterStage := f.ToStage() + p = log.NewPipeline([]log.Stage{ + log.NewStringLabelFilter(labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")), + newMustLineFormatter("lbs {{.foo}} {{.user}}"), + equalLineFilterStage, + }) + l, lbr, matches = p.ForStream(lbs).Process(0, []byte("line"), structuredMetadata...) + require.Equal(t, []byte("lbs bar asdf bob "), l) + require.Equal(t, log.NewLabelsResult(expectedLabelsResults.String(), expectedLabelsResults.Hash(), lbs, structuredMetadata, labels.EmptyLabels()), lbr) + require.Equal(t, expectedLabelsResults.Hash(), lbr.Hash()) + require.Equal(t, expectedLabelsResults.String(), lbr.String()) + require.Equal(t, true, matches) + _, err = ParseLabels(lbr.String()) + require.NoError(t, err) +}