Skip to content

Commit

Permalink
perf: Prune unused pattern branches (#13329)
Browse files Browse the repository at this point in the history
  • Loading branch information
benclive authored Jun 27, 2024
1 parent fcb9b28 commit 5ef18cf
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 0 deletions.
21 changes: 21 additions & 0 deletions pkg/pattern/drain/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,27 @@ func (d *Drain) PatternString(c *LogCluster) string {
return s
}

func (d *Drain) Prune() {
d.pruneTree(d.rootNode)
}

func (d *Drain) pruneTree(node *Node) int {
for key, child := range node.keyToChildNode {
if d.pruneTree(child) == 0 {
delete(node.keyToChildNode, key)
}
}

validClusterIds := 0
for _, clusterID := range node.clusterIDs {
cluster := d.idToCluster.Get(clusterID)
if cluster != nil {
validClusterIds++
}
}
return len(node.keyToChildNode) + validClusterIds
}

func (d *Drain) Delete(cluster *LogCluster) {
d.idToCluster.cache.Remove(cluster.id)
}
Expand Down
66 changes: 66 additions & 0 deletions pkg/pattern/drain/drain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"os"
"strings"
"testing"
"time"

"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"
Expand Down Expand Up @@ -625,3 +626,68 @@ func TestDeduplicatePlaceholders(b *testing.T) {
})
}
}

func TestDrain_PruneTreeClearsOldBranches(t *testing.T) {
t.Parallel()
tests := []struct {
name string
drain *Drain
inputLines []string
}{
{
name: "should prune old branches",
drain: New(DefaultConfig(), nil),
inputLines: []string{
"test test test A",
"test test test B",
"test test test C",
"test test test D",
"test test test E",
"test test test F",
"test test test G",
"my name is W",
"my name is X",
"my name is Y",
"my name is Z",
},
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
now := time.Now()
for i, line := range tt.inputLines {
ts := now.Add(time.Millisecond * time.Duration(i))
if i < 7 {
ts = ts.Add(-time.Duration(7-i) * time.Minute)
}
tt.drain.Train(line, ts.UnixNano())
}

require.Len(t, tt.drain.Clusters(), 2)
require.Equal(t, 8, countNodes(tt.drain.rootNode))

clusters := tt.drain.Clusters()
for _, cluster := range clusters {
cluster.Prune(time.Second * 10)
if cluster.Size == 0 {
tt.drain.Delete(cluster)
}
}
require.Len(t, tt.drain.Clusters(), 1)
require.Equal(t, 8, countNodes(tt.drain.rootNode), "expected same number of nodes before pruning")

tt.drain.Prune()
require.Len(t, tt.drain.Clusters(), 1)
require.Equal(t, 5, countNodes(tt.drain.rootNode), "expected fewer nodes after pruning")
})
}
}

func countNodes(node *Node) int {
total := 1
for _, child := range node.keyToChildNode {
total += countNodes(child)
}
return total
}
2 changes: 2 additions & 0 deletions pkg/pattern/stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ func (s *stream) prune(olderThan time.Duration) bool {
s.patterns.Delete(cluster)
}
}
// Clear empty branches after deleting chunks & clusters
s.patterns.Prune()

chunksPruned := true
if s.chunks != nil {
Expand Down

0 comments on commit 5ef18cf

Please sign in to comment.