From eb07d827d0771dd292d9cffedaa4ae4545fdb93b Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Fri, 20 Oct 2023 11:35:33 +0200 Subject: [PATCH 01/44] Extract existing worker code into separate service struct Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 137 ++++++++++++++++++++----------- 1 file changed, 87 insertions(+), 50 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 425d6713e92f9..3f5c5d9d1e93f 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -23,6 +23,10 @@ of line filter expressions. | bloomgateway.Gateway | + queue.RequestQueue + | + bloomgateway.Worker + | bloomshipper.Store | bloomshipper.Shipper @@ -215,9 +219,14 @@ func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, o return nil, err } + // We need to keep a reference to be able to call Stop() on shutdown of the gateway. g.bloomStore = bloomStore svcs := []services.Service{g.queue, g.activeUsers} + for i := 0; i < numWorkers; i++ { + w := newWorker(i, g.queue, g.bloomStore, g.pendingTasks, logger) + svcs = append(svcs, w) + } g.serviceMngr, err = services.NewManager(svcs...) if err != nil { return nil, err @@ -245,10 +254,6 @@ func (g *Gateway) starting(ctx context.Context) error { return errors.Wrap(err, "unable to start bloom gateway subservices") } - for i := 0; i < numWorkers; i++ { - go g.startWorker(ctx, fmt.Sprintf("worker-%d", i)) - } - return nil } @@ -278,52 +283,6 @@ func (g *Gateway) stopping(_ error) error { return services.StopManagerAndAwaitStopped(context.Background(), g.serviceMngr) } -// This is just a dummy implementation of the worker! -// TODO(chaudum): Implement worker that dequeues multiple pending tasks and -// multiplexes them prior to execution. -func (g *Gateway) startWorker(_ context.Context, id string) error { - level.Info(g.logger).Log("msg", "starting worker", "worker", id) - - g.queue.RegisterConsumerConnection(id) - defer g.queue.UnregisterConsumerConnection(id) - - idx := queue.StartIndexWithLocalQueue - - for { - ctx := context.Background() - item, newIdx, err := g.queue.Dequeue(ctx, idx, id) - if err != nil { - if err != queue.ErrStopped { - level.Error(g.logger).Log("msg", "failed to dequeue task", "worker", id, "err", err) - continue - } - level.Info(g.logger).Log("msg", "stopping worker", "worker", id) - return err - } - task, ok := item.(Task) - if !ok { - level.Error(g.logger).Log("msg", "failed to cast to Task", "item", item) - continue - } - - idx = newIdx - level.Info(g.logger).Log("msg", "dequeued task", "worker", id, "task", task.ID) - g.pendingTasks.Delete(task.ID) - - r := task.Request - if len(r.Filters) > 0 { - r.Refs, err = g.bloomStore.FilterChunkRefs(ctx, task.Tenant, r.From.Time(), r.Through.Time(), r.Refs, r.Filters...) - } - if err != nil { - task.ErrCh <- err - } else { - for _, ref := range r.Refs { - task.ResCh <- ref - } - } - } -} - // FilterChunkRefs implements BloomGatewayServer func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunkRefRequest) (*logproto.FilterChunkRefResponse, error) { tenantID, err := tenant.TenantID(ctx) @@ -371,3 +330,81 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk } } } + +// Worker is a datastructure that consumes tasks from the request queue, +// processes them and returns the result/error back to the response channels of +// the tasks. +// It is responsible for multiplexing tasks so they can be processes in a more +// efficient way. +type worker struct { + services.Service + + ID string + queue *queue.RequestQueue + store bloomshipper.Store + tasks *pendingTasks + logger log.Logger +} + +func newWorker(i int, queue *queue.RequestQueue, store bloomshipper.Store, tasks *pendingTasks, logger log.Logger) *worker { + id := fmt.Sprintf("bloom-query-worker-%d", i) + w := &worker{ + ID: id, + queue: queue, + store: store, + tasks: tasks, + logger: log.With(logger, "worker", id), + } + w.Service = services.NewBasicService(w.starting, w.running, w.stopping) + return w +} + +func (w *worker) starting(_ context.Context) error { + level.Debug(w.logger).Log("msg", "starting worker") + w.queue.RegisterConsumerConnection(w.ID) + return nil +} + +func (w *worker) running(_ context.Context) error { + idx := queue.StartIndexWithLocalQueue + + for { + ctx := context.Background() + item, newIdx, err := w.queue.Dequeue(ctx, idx, w.ID) + if err != nil { + if err != queue.ErrStopped { + level.Error(w.logger).Log("msg", "failed to dequeue task", "err", err) + continue + } + return err + } + task, ok := item.(Task) + if !ok { + level.Error(w.logger).Log("msg", "failed to cast dequeued item to Task", "item", item) + continue + } + level.Debug(w.logger).Log("msg", "dequeued task", "task", task.ID) + + w.tasks.Delete(task.ID) + + idx = newIdx + + r := task.Request + if len(r.Filters) > 0 { + r.Refs, err = w.store.FilterChunkRefs(ctx, task.Tenant, r.From.Time(), r.Through.Time(), r.Refs, r.Filters...) + } + if err != nil { + task.ErrCh <- err + } else { + for _, ref := range r.Refs { + task.ResCh <- ref + } + } + } +} + +func (w *worker) stopping(err error) error { + level.Debug(w.logger).Log("msg", "stopping worker", "err", err) + w.queue.UnregisterConsumerConnection(w.ID) + return nil +} From 4a10400358460d88af1a25292321f168365707a6 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Fri, 20 Oct 2023 11:56:15 +0200 Subject: [PATCH 02/44] Remove duplicate import Signed-off-by: Christian Haudum --- pkg/querier/worker/worker.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/querier/worker/worker.go b/pkg/querier/worker/worker.go index 055b7b5c92717..a7bebfbfccf14 100644 --- a/pkg/querier/worker/worker.go +++ b/pkg/querier/worker/worker.go @@ -20,7 +20,6 @@ import ( "github.com/grafana/loki/pkg/querier/queryrange" "github.com/grafana/loki/pkg/querier/queryrange/queryrangebase" "github.com/grafana/loki/pkg/util" - lokiutil "github.com/grafana/loki/pkg/util" ) type Config struct { @@ -151,7 +150,7 @@ func newQuerierWorkerWithProcessor(cfg Config, metrics *Metrics, logger log.Logg } if ring != nil { - w, err := lokiutil.NewRingWatcher(log.With(logger, "component", "querier-scheduler-worker"), ring, cfg.DNSLookupPeriod, f) + w, err := util.NewRingWatcher(log.With(logger, "component", "querier-scheduler-worker"), ring, cfg.DNSLookupPeriod, f) if err != nil { return nil, err } From 917782235d39c10fd06df7953983faf906b08d79 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Wed, 25 Oct 2023 10:24:21 +0200 Subject: [PATCH 03/44] Multiplex bloom query tasks Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 173 +++++++++++------- pkg/bloomgateway/bloomgateway_test.go | 6 + pkg/bloomgateway/multiplexing.go | 123 +++++++++++++ pkg/bloomgateway/util.go | 129 +++++++++++++ pkg/bloomgateway/util_test.go | 42 +++++ pkg/queue/queue.go | 25 +++ pkg/storage/bloom/v1/fuse.go | 79 ++++---- pkg/storage/bloom/v1/fuse_test.go | 68 +++---- pkg/storage/bloom/v1/merge.go | 15 +- .../stores/shipper/bloomshipper/shipper.go | 17 +- .../stores/shipper/bloomshipper/store.go | 101 ++-------- 11 files changed, 550 insertions(+), 228 deletions(-) create mode 100644 pkg/bloomgateway/multiplexing.go create mode 100644 pkg/bloomgateway/util.go create mode 100644 pkg/bloomgateway/util_test.go diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 3f5c5d9d1e93f..72fb35eeb222e 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -56,10 +56,12 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/common/model" "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/queue" "github.com/grafana/loki/pkg/storage" + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" "github.com/grafana/loki/pkg/util" @@ -102,40 +104,6 @@ func newMetrics(subsystem string, registerer prometheus.Registerer) *metrics { } } -// Task is the data structure that is enqueued to the internal queue and queued by query workers -type Task struct { - // ID is a lexcographically sortable unique identifier of the task - ID ulid.ULID - // Tenant is the tenant ID - Tenant string - // Request is the original request - Request *logproto.FilterChunkRefRequest - // ErrCh is a send-only channel to write an error to - ErrCh chan<- error - // ResCh is a send-only channel to write partial responses to - ResCh chan<- *logproto.GroupedChunkRefs -} - -// newTask returns a new Task that can be enqueued to the task queue. -// As additional arguments, it returns a result and an error channel, as well -// as an error if the instantiation fails. -func newTask(tenantID string, req *logproto.FilterChunkRefRequest) (Task, chan *logproto.GroupedChunkRefs, chan error, error) { - key, err := ulid.New(ulid.Now(), nil) - if err != nil { - return Task{}, nil, nil, err - } - errCh := make(chan error, 1) - resCh := make(chan *logproto.GroupedChunkRefs, 1) - task := Task{ - ID: key, - Tenant: tenantID, - Request: req, - ErrCh: errCh, - ResCh: resCh, - } - return task, resCh, errCh, nil -} - // SyncMap is a map structure which can be synchronized using the RWMutex type SyncMap[k comparable, v any] struct { sync.RWMutex @@ -290,10 +258,21 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk return nil, err } + // Shortcut if request does not contain filters + if len(req.Filters) == 0 { + return &logproto.FilterChunkRefResponse{ + ChunkRefs: req.Refs, + }, nil + } + for _, ref := range req.Refs { if ref.Tenant != tenantID { return nil, errors.Wrapf(errInvalidTenant, "expected chunk refs from tenant %s, got tenant %s", tenantID, ref.Tenant) } + // Sort ShortRefs by From time in ascending order + sort.Slice(ref.Refs, func(i, j int) bool { + return ref.Refs[i].From.Before(ref.Refs[j].From) + }) } // Sort ChunkRefs by fingerprint in ascending order @@ -301,7 +280,7 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk return req.Refs[i].Fingerprint < req.Refs[j].Fingerprint }) - task, resCh, errCh, err := newTask(tenantID, req) + task, resCh, errCh, err := NewTask(tenantID, req) if err != nil { return nil, err } @@ -317,13 +296,17 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk for { select { case <-ctx.Done(): - return nil, ctx.Err() + return nil, errors.Wrap(ctx.Err(), "waiting for results") case err := <-errCh: - return nil, err + return nil, errors.Wrap(err, "waiting for results") case res := <-resCh: - level.Info(g.logger).Log("msg", "got result", "task", task.ID, "tenant", tenantID, "res", res) + level.Debug(g.logger).Log("msg", "got partial result", "task", task.ID, "tenant", tenantID, "fp", res.Fp, "refs", res.Chks.Len()) // wait for all parts of the full response - response = append(response, res) + response = append(response, &logproto.GroupedChunkRefs{ + Tenant: tenantID, + Fingerprint: uint64(res.Fp), + Refs: convertToShortRefs(res.Chks), + }) if len(response) == len(req.Refs) { return &logproto.FilterChunkRefResponse{ChunkRefs: response}, nil } @@ -365,42 +348,108 @@ func (w *worker) starting(_ context.Context) error { return nil } -func (w *worker) running(_ context.Context) error { +func (w *worker) running(ctx context.Context) error { idx := queue.StartIndexWithLocalQueue - for { - ctx := context.Background() - item, newIdx, err := w.queue.Dequeue(ctx, idx, w.ID) + maxItems := 10 + maxWaitTime := 500 * time.Millisecond + + for ctx.Err() == nil { + taskCtx := context.Background() + items, newIdx, err := w.queue.DequeueMany(taskCtx, idx, w.ID, maxItems, maxWaitTime) if err != nil { - if err != queue.ErrStopped { - level.Error(w.logger).Log("msg", "failed to dequeue task", "err", err) - continue + // We only return an error if the queue is stopped and dequeuing did not yield any items + if err == queue.ErrStopped && len(items) == 0 { + return err } - return err + level.Error(w.logger).Log("msg", "failed to dequeue tasks", "err", err, "items", len(items)) } - task, ok := item.(Task) - if !ok { - level.Error(w.logger).Log("msg", "failed to cast dequeued item to Task", "item", item) + idx = newIdx + + if len(items) == 0 { continue } - level.Debug(w.logger).Log("msg", "dequeued task", "task", task.ID) - w.tasks.Delete(task.ID) - - idx = newIdx + tasksPerDay := make(map[time.Time][]Task) - r := task.Request - if len(r.Filters) > 0 { - r.Refs, err = w.store.FilterChunkRefs(ctx, task.Tenant, r.From.Time(), r.Through.Time(), r.Refs, r.Filters...) + for _, item := range items { + task, ok := item.(Task) + if !ok { + level.Error(w.logger).Log("msg", "failed to cast dequeued item to Task", "item", item) + continue + } + level.Debug(w.logger).Log("msg", "dequeued task", "task", task.ID) + w.tasks.Delete(task.ID) + + fromDay := getDayTime(task.Request.From) + throughDay := getDayTime(task.Request.Through) + + if fromDay.Equal(throughDay) { + tasksPerDay[fromDay] = append(tasksPerDay[fromDay], task) + } else { + // split task into separate tasks per day + for i := fromDay; i.Before(throughDay); i = i.Add(24 * time.Hour) { + r := filterRequestForDay(task.Request, i) + t := task.WithRequest(r) + tasksPerDay[i] = append(tasksPerDay[i], t) + } + } } - if err != nil { - task.ErrCh <- err - } else { - for _, ref := range r.Refs { - task.ResCh <- ref + + // TODO(chaudum): Process days in parallel? + for day, tasks := range tasksPerDay { + level.Debug(w.logger).Log("msg", "process tasks for day", "day", day, "tasks", len(tasks)) + + it := newTaskMergeIterator(tasks...) + // TODO(chaudum): Use pool + fingerprints := make([]uint64, 0, 1024) + for it.Next() { + fingerprints = append(fingerprints, uint64(it.At().Fp)) + } + + it.Reset() + + bqs, err := w.store.GetBlockQueriers(taskCtx, tasks[0].Tenant, day, day.Add(24*time.Hour), fingerprints) + if err != nil { + for _, t := range tasks { + t.ErrCh <- err + } + continue + } + + // no blocks found + if len(bqs) == 0 { + level.Warn(w.logger).Log("msg", "no blocks found for day", "day", day) + for _, t := range tasks { + for _, ref := range t.Request.Refs { + t.ResCh <- v1.Output{ + Fp: model.Fingerprint(ref.Fingerprint), + Chks: convertToChunkRefs(ref.Refs), + } + } + } + continue + } + + for _, bq := range bqs { + // TODO(chaudum): Use pool + requests := make([]v1.Request, 0) + for it.Next(); it.At().Fp <= bq.MaxFp; { + requests = append(requests, it.At().Request) + } + fq := bq.Fuse([]v1.PeekingIterator[v1.Request]{NewIterWithIndex(0, requests)}) + err := fq.Run() + if err != nil { + for _, t := range tasks { + t.ErrCh <- err + } + continue + } } + } } + return ctx.Err() } func (w *worker) stopping(err error) error { diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index 0b6a207362ac6..3b55b016043b7 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -202,6 +202,9 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { From: now.Add(-24 * time.Hour), Through: now, Refs: groupRefs(t, chunkRefs), + Filters: []*logproto.LineFilterExpression{ + {Operator: 1, Match: "foo"}, + }, } ctx := user.InjectOrgID(context.Background(), tenantID) @@ -240,6 +243,9 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { From: now.Add(-24 * time.Hour), Through: now, Refs: groupRefs(t, chunkRefs), + Filters: []*logproto.LineFilterExpression{ + {Operator: 1, Match: "foo"}, + }, } ctx := user.InjectOrgID(context.Background(), tenantID) _, err = gw.FilterChunkRefs(ctx, req) diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go new file mode 100644 index 0000000000000..55c9c3403e6a4 --- /dev/null +++ b/pkg/bloomgateway/multiplexing.go @@ -0,0 +1,123 @@ +package bloomgateway + +import ( + "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/oklog/ulid" + "github.com/prometheus/common/model" +) + +// Task is the data structure that is enqueued to the internal queue and queued by query workers +type Task struct { + // ID is a lexcographically sortable unique identifier of the task + ID ulid.ULID + // Tenant is the tenant ID + Tenant string + // Request is the original request + Request *logproto.FilterChunkRefRequest + // ErrCh is a send-only channel to write an error to + ErrCh chan<- error + // ResCh is a send-only channel to write partial responses to + ResCh chan<- v1.Output +} + +// NewTask returns a new Task that can be enqueued to the task queue. +// As additional arguments, it returns a result and an error channel, as well +// as an error if the instantiation fails. +func NewTask(tenantID string, req *logproto.FilterChunkRefRequest) (Task, chan v1.Output, chan error, error) { + key, err := ulid.New(ulid.Now(), nil) + if err != nil { + return Task{}, nil, nil, err + } + errCh := make(chan error, 1) + resCh := make(chan v1.Output, 1) + task := Task{ + ID: key, + Tenant: tenantID, + Request: req, + ErrCh: errCh, + ResCh: resCh, + } + return task, resCh, errCh, nil +} + +// WithRequest returns a copy of the task, which holds the provided Request req. +func (t Task) WithRequest(req *logproto.FilterChunkRefRequest) Task { + return Task{ + ID: t.ID, + Tenant: t.Tenant, + Request: req, + ErrCh: t.ErrCh, + ResCh: t.ResCh, + } +} + +// FilterRequest extends v1.Request with an error channel +type FilterRequest struct { + v1.Request + Error chan<- error +} + +// taskMergeIterator implements v1.Iterator +type taskMergeIterator struct { + curr FilterRequest + heap *v1.HeapIterator[*logproto.GroupedChunkRefs] + tasks []Task +} + +func newTaskMergeIterator(tasks ...Task) *taskMergeIterator { + it := &taskMergeIterator{ + tasks: tasks, + curr: FilterRequest{}, + } + it.init() + return it +} + +func (it *taskMergeIterator) init() { + sequences := make([]v1.PeekingIterator[*logproto.GroupedChunkRefs], 0, len(it.tasks)) + for i := range it.tasks { + sequences = append(sequences, NewIterWithIndex(i, it.tasks[i].Request.Refs)) + } + it.heap = v1.NewHeapIterator( + func(i, j *logproto.GroupedChunkRefs) bool { + return i.Fingerprint < j.Fingerprint + }, + sequences..., + ) +} + +func (it *taskMergeIterator) Reset() { + it.init() +} + +func (it *taskMergeIterator) Next() bool { + ok := it.heap.Next() + if !ok { + return false + } + + currIter, ok := it.heap.CurrIter().(*SliceIterWithIndex[*logproto.GroupedChunkRefs]) + if !ok { + return false + } + iterIndex := currIter.Index() + + task := it.tasks[iterIndex] + group := it.heap.At() + + it.curr.Fp = model.Fingerprint(group.Fingerprint) + it.curr.Chks = convertToChunkRefs(group.Refs) + it.curr.Searches = convertToSearches(task.Request.Filters) + it.curr.Response = task.ResCh + it.curr.Error = task.ErrCh + return true +} + +func (it *taskMergeIterator) At() FilterRequest { + return it.curr +} + +func (it *taskMergeIterator) Err() error { + return nil +} diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go new file mode 100644 index 0000000000000..1a8e8155a6a73 --- /dev/null +++ b/pkg/bloomgateway/util.go @@ -0,0 +1,129 @@ +package bloomgateway + +import ( + "time" + + "github.com/grafana/loki/pkg/logproto" + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/prometheus/common/model" +) + +// SliceIterWithIndex implements v1.PeekingIterator +type SliceIterWithIndex[T any] struct { + xs []T // source slice + pos int // position within the slice + idx int // the index that identifies the iterator + zero T // zero value of T +} + +func (it *SliceIterWithIndex[T]) Next() bool { + it.pos++ + return it.pos < len(it.xs) +} + +func (it *SliceIterWithIndex[T]) Err() error { + return nil +} + +func (it *SliceIterWithIndex[T]) At() T { + return it.xs[it.pos] +} + +func (it *SliceIterWithIndex[T]) Peek() (T, bool) { + if it.pos+1 >= len(it.xs) { + return it.zero, false + } + return it.xs[it.pos+1], true +} + +func (it *SliceIterWithIndex[T]) Index() int { + return it.idx +} + +func NewIterWithIndex[T any](i int, xs []T) *SliceIterWithIndex[T] { + return &SliceIterWithIndex[T]{ + xs: xs, + pos: -1, + idx: i, + } +} + +func getDay(ts model.Time) int64 { + return ts.Unix() / int64(24*time.Hour/time.Second) +} + +func getDayTime(ts model.Time) time.Time { + return time.Date(ts.Time().Year(), ts.Time().Month(), ts.Time().Day(), 0, 0, 0, 0, time.UTC) +} + +func filterRequestForDay(r *logproto.FilterChunkRefRequest, day time.Time) *logproto.FilterChunkRefRequest { + var from, through time.Time + refs := make([]*logproto.GroupedChunkRefs, 0, len(r.Refs)) + for i := range r.Refs { + groupedChunkRefs := &logproto.GroupedChunkRefs{ + Fingerprint: r.Refs[i].Fingerprint, + Tenant: r.Refs[i].Tenant, + Refs: make([]*logproto.ShortRef, 0, len(r.Refs[i].Refs)), + } + for j := range r.Refs[i].Refs { + shortRef := r.Refs[i].Refs[j] + fromDay := getDayTime(shortRef.From) + if fromDay.After(day) { + break + } + throughDay := getDayTime(shortRef.Through) + if fromDay.Equal(day) || throughDay.Equal(day) { + groupedChunkRefs.Refs = append(groupedChunkRefs.Refs, shortRef) + } + } + groupFrom, groupThrough := getFromThrough(groupedChunkRefs.Refs) + if from.Unix() > 0 && groupFrom.Before(from) { + from = groupFrom + } + if groupThrough.After(through) { + through = groupThrough + } + refs = append(refs, groupedChunkRefs) + } + return &logproto.FilterChunkRefRequest{ + From: model.TimeFromUnix(from.Unix()), + Through: model.TimeFromUnix(through.Unix()), + Refs: refs, + Filters: r.Filters, + } +} + +func getFromThrough(refs []*logproto.ShortRef) (time.Time, time.Time) { + if len(refs) == 0 { + return time.Time{}, time.Time{} + } + return refs[0].From.Time(), refs[len(refs)-1].Through.Time() +} + +func convertToSearches(filters []*logproto.LineFilterExpression) [][]byte { + searches := make([][]byte, len(filters)) + for _, f := range filters { + searches = append(searches, []byte(f.Match)) + } + return searches +} + +// convertToShortRefs converts a v1.ChunkRefs into []*logproto.ShortRef +// TODO(chaudum): Avoid conversion by transferring v1.ChunkRefs in gRPC request. +func convertToShortRefs(refs v1.ChunkRefs) []*logproto.ShortRef { + result := make([]*logproto.ShortRef, len(refs)) + for _, ref := range refs { + result = append(result, &logproto.ShortRef{From: ref.Start, Through: ref.End, Checksum: ref.Checksum}) + } + return result +} + +// convertToChunkRefs converts a []*logproto.ShortRef into v1.ChunkRefs +// TODO(chaudum): Avoid conversion by transferring v1.ChunkRefs in gRPC request. +func convertToChunkRefs(refs []*logproto.ShortRef) v1.ChunkRefs { + result := make(v1.ChunkRefs, len(refs)) + for _, ref := range refs { + result = append(result, v1.ChunkRef{Start: ref.From, End: ref.Through, Checksum: ref.Checksum}) + } + return result +} diff --git a/pkg/bloomgateway/util_test.go b/pkg/bloomgateway/util_test.go new file mode 100644 index 0000000000000..df1f3e5aa378c --- /dev/null +++ b/pkg/bloomgateway/util_test.go @@ -0,0 +1,42 @@ +package bloomgateway + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestSliceIterWithIndex(t *testing.T) { + t.Run("SliceIterWithIndex implements v1.Iterator interface", func(t *testing.T) { + xs := []string{"a", "b", "c"} + it := NewIterWithIndex(0, xs) + require.True(t, it.Next()) + require.Equal(t, "a", it.At()) + require.True(t, it.Next()) + require.Equal(t, "b", it.At()) + require.True(t, it.Next()) + require.Equal(t, "c", it.At()) + require.False(t, it.Next()) + require.NoError(t, it.Err()) + }) + t.Run("SliceIterWithIndex implements v1.PeekingIterator interface", func(t *testing.T) { + xs := []string{"a", "b", "c"} + it := NewIterWithIndex(0, xs) + p, ok := it.Peek() + require.True(t, ok) + require.Equal(t, "a", p) + require.True(t, it.Next()) + require.Equal(t, "a", it.At()) + require.True(t, it.Next()) + require.True(t, it.Next()) + p, ok = it.Peek() + require.False(t, ok) + require.Equal(t, "", p) // "" is zero value for type string + }) +} + +func TestGetDay(t *testing.T) {} + +func TestGetDayTime(t *testing.T) {} + +func TestFilterRequestForDay(t *testing.T) {} diff --git a/pkg/queue/queue.go b/pkg/queue/queue.go index fa1860e4e88d3..44868fbefc584 100644 --- a/pkg/queue/queue.go +++ b/pkg/queue/queue.go @@ -125,6 +125,31 @@ func (q *RequestQueue) Enqueue(tenant string, path []string, req Request, maxQue } } +func (q *RequestQueue) DequeueMany(ctx context.Context, last QueueIndex, consumerID string, maxItems int, maxWait time.Duration) ([]Request, QueueIndex, error) { + // create a context for dequeuing with a max time we want to wait to fullfill the desired maxItems + + dequeueCtx, cancel := context.WithTimeout(ctx, maxWait) + defer cancel() + + var idx QueueIndex + items := make([]Request, 0, maxItems) + + for { + item, newIdx, err := q.Dequeue(dequeueCtx, last, consumerID) + if err != nil { + if err == context.DeadlineExceeded { + err = nil + } + return items, idx, err + } + items = append(items, item) + idx = newIdx + if len(items) == maxItems { + return items, idx, nil + } + } +} + // Dequeue find next tenant queue and takes the next request off of it. Will block if there are no requests. // By passing tenant index from previous call of this method, querier guarantees that it iterates over all tenants fairly. // If consumer finds that request from the tenant is already expired, it can get a request for the same tenant by using UserIndex.ReuseLastUser. diff --git a/pkg/storage/bloom/v1/fuse.go b/pkg/storage/bloom/v1/fuse.go index 021fba1e81856..9e506cbda900b 100644 --- a/pkg/storage/bloom/v1/fuse.go +++ b/pkg/storage/bloom/v1/fuse.go @@ -7,50 +7,49 @@ import ( "github.com/prometheus/common/model" ) -type request struct { - fp model.Fingerprint - chks ChunkRefs - searches [][]byte - response chan output +type Request struct { + Fp model.Fingerprint + Chks ChunkRefs + Searches [][]byte + Response chan<- Output } -// output represents a chunk that was present in the bloom -// but failed to pass the search filters and can be removed from -// the list of chunks to download -type output struct { - fp model.Fingerprint - removals ChunkRefs +// Output represents a chunk that failed to pass all searches +// and must be downloaded +type Output struct { + Fp model.Fingerprint + Removals ChunkRefs } // Fuse combines multiple requests into a single loop iteration // over the data set and returns the corresponding outputs // TODO(owen-d): better async control -func (bq *BlockQuerier) Fuse(inputs []PeekingIterator[request]) *FusedQuerier { +func (bq *BlockQuerier) Fuse(inputs []PeekingIterator[Request]) *FusedQuerier { return NewFusedQuerier(bq, inputs) } type FusedQuerier struct { bq *BlockQuerier - inputs Iterator[[]request] + inputs Iterator[[]Request] } -func NewFusedQuerier(bq *BlockQuerier, inputs []PeekingIterator[request]) *FusedQuerier { - heap := NewHeapIterator[request]( - func(a, b request) bool { - return a.fp < b.fp +func NewFusedQuerier(bq *BlockQuerier, inputs []PeekingIterator[Request]) *FusedQuerier { + heap := NewHeapIterator[Request]( + func(a, b Request) bool { + return a.Fp < b.Fp }, inputs..., ) - merging := NewDedupingIter[request, []request]( - func(a request, b []request) bool { - return a.fp == b[0].fp + merging := NewDedupingIter[Request, []Request]( + func(a Request, b []Request) bool { + return a.Fp == b[0].Fp }, - func(a request) []request { return []request{a} }, - func(a request, b []request) []request { + func(a Request) []Request { return []Request{a} }, + func(a Request, b []Request) []Request { return append(b, a) }, - NewPeekingIter[request](heap), + NewPeekingIter[Request](heap), ) return &FusedQuerier{ bq: bq, @@ -63,7 +62,7 @@ func (fq *FusedQuerier) Run() error { // find all queries for the next relevant fingerprint nextBatch := fq.inputs.At() - fp := nextBatch[0].fp + fp := nextBatch[0].Fp // advance the series iterator to the next fingerprint if err := fq.bq.Seek(fp); err != nil { @@ -79,9 +78,9 @@ func (fq *FusedQuerier) Run() error { if series.Fingerprint != fp { // fingerprint not found, can't remove chunks for _, input := range nextBatch { - input.response <- output{ - fp: fp, - removals: nil, + input.Response <- Output{ + Fp: fp, + Removals: nil, } } } @@ -91,9 +90,9 @@ func (fq *FusedQuerier) Run() error { if !fq.bq.blooms.Next() { // fingerprint not found, can't remove chunks for _, input := range nextBatch { - input.response <- output{ - fp: fp, - removals: nil, + input.Response <- Output{ + Fp: fp, + Removals: nil, } } continue @@ -103,19 +102,17 @@ func (fq *FusedQuerier) Run() error { // test every input against this chunk inputLoop: for _, input := range nextBatch { - _, inBlooms := input.chks.Compare(series.Chunks, true) + _, inBlooms := input.Chks.Compare(series.Chunks, true) // First, see if the search passes the series level bloom before checking for chunks individually - for _, search := range input.searches { + for _, search := range input.Searches { if !bloom.Test(search) { - // the entire series bloom didn't pass one of the searches, - // so we can skip checking chunks individually. // We return all the chunks that were the intersection of the query // because they for sure do not match the search and don't // need to be downloaded - input.response <- output{ - fp: fp, - removals: inBlooms, + input.Response <- Output{ + Fp: fp, + Removals: inBlooms, } continue inputLoop } @@ -126,7 +123,7 @@ func (fq *FusedQuerier) Run() error { chunkLoop: for _, chk := range inBlooms { - for _, search := range input.searches { + for _, search := range input.Searches { // TODO(owen-d): meld chunk + search into a single byte slice from the block schema var combined = search @@ -138,9 +135,9 @@ func (fq *FusedQuerier) Run() error { // Otherwise, the chunk passed all the searches } - input.response <- output{ - fp: fp, - removals: removals, + input.Response <- Output{ + Fp: fp, + Removals: removals, } } diff --git a/pkg/storage/bloom/v1/fuse_test.go b/pkg/storage/bloom/v1/fuse_test.go index b990d69f4b7bd..50147c7cd3c16 100644 --- a/pkg/storage/bloom/v1/fuse_test.go +++ b/pkg/storage/bloom/v1/fuse_test.go @@ -74,37 +74,39 @@ func TestFusedQuerier(t *testing.T) { querier := NewBlockQuerier(block) nReqs := 10 - var inputs [][]request + var inputs [][]Request + var resChans []chan Output for i := 0; i < nReqs; i++ { - ch := make(chan output) - var reqs []request + ch := make(chan Output) + var reqs []Request // find 2 series for each for j := 0; j < 2; j++ { idx := numSeries/nReqs*i + j - reqs = append(reqs, request{ - fp: data[idx].Series.Fingerprint, - chks: data[idx].Series.Chunks, - response: ch, + reqs = append(reqs, Request{ + Fp: data[idx].Series.Fingerprint, + Chks: data[idx].Series.Chunks, + Response: ch, }) } inputs = append(inputs, reqs) + resChans = append(resChans, ch) } - var itrs []PeekingIterator[request] + var itrs []PeekingIterator[Request] for _, reqs := range inputs { - itrs = append(itrs, NewPeekingIter[request](NewSliceIter[request](reqs))) + itrs = append(itrs, NewPeekingIter[Request](NewSliceIter[Request](reqs))) } - resps := make([][]output, nReqs) + resps := make([][]Output, nReqs) var g sync.WaitGroup g.Add(1) go func() { require.Nil(t, concurrency.ForEachJob( context.Background(), - len(resps), - len(resps), + len(resChans), + len(resChans), func(_ context.Context, i int) error { - for v := range inputs[i][0].response { + for v := range resChans[i] { resps[i] = append(resps[i], v) } return nil @@ -117,7 +119,7 @@ func TestFusedQuerier(t *testing.T) { require.Nil(t, fused.Run()) for _, input := range inputs { - close(input[0].response) + close(input[0].Response) } g.Wait() @@ -126,9 +128,9 @@ func TestFusedQuerier(t *testing.T) { resp := resps[i][j] require.Equal( t, - output{ - fp: req.fp, - removals: nil, + Output{ + Fp: req.Fp, + Removals: nil, }, resp, ) @@ -136,7 +138,7 @@ func TestFusedQuerier(t *testing.T) { } } -func setupBlockForBenchmark(b *testing.B) (*BlockQuerier, [][]request) { +func setupBlockForBenchmark(b *testing.B) (*BlockQuerier, [][]Request, []chan Output) { indexBuf := bytes.NewBuffer(nil) bloomsBuf := bytes.NewBuffer(nil) writer := NewMemoryBlockWriter(indexBuf, bloomsBuf) @@ -165,11 +167,12 @@ func setupBlockForBenchmark(b *testing.B) (*BlockQuerier, [][]request) { numRequestChains := 100 seriesPerRequest := 100 - var requestChains [][]request + var requestChains [][]Request + var responseChans []chan Output for i := 0; i < numRequestChains; i++ { - var reqs []request + var reqs []Request // ensure they use the same channel - ch := make(chan output) + ch := make(chan Output) // evenly spread out the series queried within a single request chain // to mimic series distribution across keyspace for j := 0; j < seriesPerRequest; j++ { @@ -178,21 +181,22 @@ func setupBlockForBenchmark(b *testing.B) (*BlockQuerier, [][]request) { if idx >= numSeries { idx = numSeries - 1 } - reqs = append(reqs, request{ - fp: data[idx].Series.Fingerprint, - chks: data[idx].Series.Chunks, - response: ch, + reqs = append(reqs, Request{ + Fp: data[idx].Series.Fingerprint, + Chks: data[idx].Series.Chunks, + Response: ch, }) } requestChains = append(requestChains, reqs) + responseChans = append(responseChans, ch) } - return querier, requestChains + return querier, requestChains, responseChans } func BenchmarkBlockQuerying(b *testing.B) { b.StopTimer() - querier, requestChains := setupBlockForBenchmark(b) + querier, requestChains, responseChans := setupBlockForBenchmark(b) // benchmark b.StartTimer() @@ -200,7 +204,7 @@ func BenchmarkBlockQuerying(b *testing.B) { for i := 0; i < b.N; i++ { for _, chain := range requestChains { for _, req := range chain { - _, _ = querier.CheckChunksForSeries(req.fp, req.chks, nil) + _, _ = querier.CheckChunksForSeries(req.Fp, req.Chks, nil) } } } @@ -211,22 +215,22 @@ func BenchmarkBlockQuerying(b *testing.B) { go func() { require.Nil(b, concurrency.ForEachJob( context.Background(), - len(requestChains), len(requestChains), + len(responseChans), len(responseChans), func(_ context.Context, idx int) error { // nolint:revive - for range requestChains[idx][0].response { + for range responseChans[idx] { } return nil }, )) }() - var itrs []PeekingIterator[request] + var itrs []PeekingIterator[Request] for i := 0; i < b.N; i++ { itrs = itrs[:0] for _, reqs := range requestChains { - itrs = append(itrs, NewPeekingIter[request](NewSliceIter[request](reqs))) + itrs = append(itrs, NewPeekingIter[Request](NewSliceIter[Request](reqs))) } fused := querier.Fuse(itrs) _ = fused.Run() diff --git a/pkg/storage/bloom/v1/merge.go b/pkg/storage/bloom/v1/merge.go index 981582f1234cd..a519a44795117 100644 --- a/pkg/storage/bloom/v1/merge.go +++ b/pkg/storage/bloom/v1/merge.go @@ -8,6 +8,8 @@ type HeapIterator[T any] struct { itrs []PeekingIterator[T] less func(T, T) bool + currIter PeekingIterator[T] + zero T // zero value of T cache T ok bool @@ -56,6 +58,11 @@ func (mbq *HeapIterator[T]) Next() (ok bool) { return } +// TODO(chaudum): find a better place +func (mbq *HeapIterator[T]) CurrIter() PeekingIterator[T] { + return mbq.currIter +} + // TODO(owen-d): don't swallow this error func (mbq *HeapIterator[T]) Err() error { return nil @@ -76,15 +83,15 @@ func (mbq *HeapIterator[T]) pop() (T, bool) { return mbq.zero, false } - cur := mbq.itrs[0] - if ok := cur.Next(); !ok { + mbq.currIter = mbq.itrs[0] + if ok := mbq.currIter.Next(); !ok { mbq.remove(0) continue } - result := cur.At() + result := mbq.currIter.At() - _, ok := cur.Peek() + _, ok := mbq.currIter.Peek() if !ok { // that was the end of the iterator. remove it from the heap mbq.remove(0) diff --git a/pkg/storage/stores/shipper/bloomshipper/shipper.go b/pkg/storage/stores/shipper/bloomshipper/shipper.go index 98dbbb20a476a..4fdb3e4a69f5d 100644 --- a/pkg/storage/stores/shipper/bloomshipper/shipper.go +++ b/pkg/storage/stores/shipper/bloomshipper/shipper.go @@ -62,7 +62,7 @@ func (s *Shipper) ForEachBlock( if !ok { return nil } - err = callback(result.BlockQuerier) + err = callback(result.BlockQuerier, result.MinFingerprint, result.MaxFingerprint) if err != nil { return fmt.Errorf("error running callback function for block %s err: %w", result.BlockPath, err) } @@ -79,13 +79,14 @@ func (s *Shipper) Stop() { s.blockDownloader.stop() } -// getFromThrough returns the first and list item of a fingerprint slice +// getFirstLast returns the first and last item of a fingerprint slice // It assumes an ascending sorted list of fingerprints. -func getFromThrough(fingerprints []uint64) (uint64, uint64) { - if len(fingerprints) == 0 { - return 0, 0 +func getFirstLast[T any](s []T) (T, T) { + var zero T + if len(s) == 0 { + return zero, zero } - return fingerprints[0], fingerprints[len(fingerprints)-1] + return s[0], s[len(s)-1] } func (s *Shipper) getActiveBlockRefs( @@ -94,7 +95,7 @@ func (s *Shipper) getActiveBlockRefs( from, through int64, fingerprints []uint64) ([]BlockRef, error) { - minFingerprint, maxFingerprint := getFromThrough(fingerprints) + minFingerprint, maxFingerprint := getFirstLast(fingerprints) metas, err := s.client.GetMetas(ctx, MetaSearchParams{ TenantID: tenantID, MinFingerprint: minFingerprint, @@ -164,7 +165,7 @@ func isOutsideRange(b *BlockRef, startTimestamp, endTimestamp int64, fingerprint } // Then, check if outside of min/max of fingerprint slice - minFp, maxFp := getFromThrough(fingerprints) + minFp, maxFp := getFirstLast(fingerprints) if b.MaxFingerprint < minFp || b.MinFingerprint > maxFp { return true } diff --git a/pkg/storage/stores/shipper/bloomshipper/store.go b/pkg/storage/stores/shipper/bloomshipper/store.go index 80f2c352d5326..7d3d227994637 100644 --- a/pkg/storage/stores/shipper/bloomshipper/store.go +++ b/pkg/storage/stores/shipper/bloomshipper/store.go @@ -2,15 +2,15 @@ package bloomshipper import ( "context" + "sort" "time" "github.com/prometheus/common/model" - "github.com/grafana/loki/pkg/logproto" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" ) -type ForEachBlockCallback func(bq *v1.BlockQuerier) error +type ForEachBlockCallback func(bq *v1.BlockQuerier, minFp, maxFp uint64) error type ReadShipper interface { ForEachBlock(ctx context.Context, tenant string, from, through time.Time, fingerprints []uint64, callback ForEachBlockCallback) error @@ -21,8 +21,13 @@ type Interface interface { Stop() } +type BlockQuerierWithFingerprintRange struct { + *v1.BlockQuerier + MinFp, MaxFp model.Fingerprint +} + type Store interface { - FilterChunkRefs(ctx context.Context, tenant string, from, through time.Time, chunkRefs []*logproto.GroupedChunkRefs, filters ...*logproto.LineFilterExpression) ([]*logproto.GroupedChunkRefs, error) + GetBlockQueriers(ctx context.Context, tenant string, from, through time.Time, fingerprints []uint64) ([]BlockQuerierWithFingerprintRange, error) Stop() } @@ -40,84 +45,18 @@ func (bs *BloomStore) Stop() { bs.shipper.Stop() } -func (bs *BloomStore) FilterChunkRefs(ctx context.Context, tenant string, from, through time.Time, chunkRefs []*logproto.GroupedChunkRefs, filters ...*logproto.LineFilterExpression) ([]*logproto.GroupedChunkRefs, error) { - fingerprints := make([]uint64, 0, len(chunkRefs)) - for _, ref := range chunkRefs { - fingerprints = append(fingerprints, ref.Fingerprint) - } - - blooms, err := bs.queriers(ctx, tenant, from, through, fingerprints) - if err != nil { - return nil, err - } - - searches := convertLineFilterExpressions(filters) - - for _, ref := range chunkRefs { - refs, err := blooms.Filter(ctx, model.Fingerprint(ref.Fingerprint), convertToChunkRefs(ref.Refs), searches) - if err != nil { - return nil, err - } - ref.Refs = convertToShortRefs(refs) - } - return chunkRefs, nil -} - -func (bs *BloomStore) queriers(ctx context.Context, tenant string, from, through time.Time, fingerprints []uint64) (*bloomQueriers, error) { - bf := newBloomFilters(1024) - err := bs.shipper.ForEachBlock(ctx, tenant, from, through, fingerprints, func(bq *v1.BlockQuerier) error { - bf.queriers = append(bf.queriers, bq) +func (bs *BloomStore) GetBlockQueriers(ctx context.Context, tenant string, from, through time.Time, fingerprints []uint64) ([]BlockQuerierWithFingerprintRange, error) { + bqs := make([]BlockQuerierWithFingerprintRange, 0, 32) + err := bs.shipper.ForEachBlock(ctx, tenant, from, through, fingerprints, func(bq *v1.BlockQuerier, minFp uint64, maxFp uint64) error { + bqs = append(bqs, BlockQuerierWithFingerprintRange{ + BlockQuerier: bq, + MinFp: model.Fingerprint(minFp), + MaxFp: model.Fingerprint(maxFp), + }) return nil }) - return bf, err -} - -func convertLineFilterExpressions(filters []*logproto.LineFilterExpression) [][]byte { - searches := make([][]byte, len(filters)) - for _, f := range filters { - searches = append(searches, []byte(f.Match)) - } - return searches -} - -// convertToShortRefs converts a v1.ChunkRefs into []*logproto.ShortRef -// TODO(chaudum): Avoid conversion by transferring v1.ChunkRefs in gRPC request. -func convertToShortRefs(refs v1.ChunkRefs) []*logproto.ShortRef { - result := make([]*logproto.ShortRef, len(refs)) - for _, ref := range refs { - result = append(result, &logproto.ShortRef{From: ref.Start, Through: ref.End, Checksum: ref.Checksum}) - } - return result -} - -// convertToChunkRefs converts a []*logproto.ShortRef into v1.ChunkRefs -// TODO(chaudum): Avoid conversion by transferring v1.ChunkRefs in gRPC request. -func convertToChunkRefs(refs []*logproto.ShortRef) v1.ChunkRefs { - result := make(v1.ChunkRefs, len(refs)) - for _, ref := range refs { - result = append(result, v1.ChunkRef{Start: ref.From, End: ref.Through, Checksum: ref.Checksum}) - } - return result -} - -type bloomQueriers struct { - queriers []*v1.BlockQuerier -} - -func newBloomFilters(size int) *bloomQueriers { - return &bloomQueriers{ - queriers: make([]*v1.BlockQuerier, size), - } -} - -func (bf *bloomQueriers) Filter(_ context.Context, fp model.Fingerprint, chunkRefs v1.ChunkRefs, filters [][]byte) (v1.ChunkRefs, error) { - result := make(v1.ChunkRefs, len(chunkRefs)) - for _, bq := range bf.queriers { - refs, err := bq.CheckChunksForSeries(fp, chunkRefs, filters) - if err != nil { - return nil, err - } - result = append(result, refs...) - } - return result, nil + sort.Slice(bqs, func(i, j int) bool { + return bqs[i].MinFp < bqs[j].MinFp + }) + return bqs, err } From 8af5f39e4a8b2def45ef24a68ea76ec37737900f Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Mon, 6 Nov 2023 14:21:48 +0100 Subject: [PATCH 04/44] Add sync pool Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 36 +++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 72fb35eeb222e..522481743479b 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -314,6 +314,19 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk } } +type RequestPool struct { + sync.Pool +} + +func (p *RequestPool) Get() []v1.Request { + r := p.Pool.Get().([]v1.Request) + return r[:0] +} + +func (p *RequestPool) Put(r []v1.Request) { + p.Pool.Put(r) +} + // Worker is a datastructure that consumes tasks from the request queue, // processes them and returns the result/error back to the response channels of // the tasks. @@ -327,6 +340,8 @@ type worker struct { store bloomshipper.Store tasks *pendingTasks logger log.Logger + + rp RequestPool } func newWorker(i int, queue *queue.RequestQueue, store bloomshipper.Store, tasks *pendingTasks, logger log.Logger) *worker { @@ -339,6 +354,13 @@ func newWorker(i int, queue *queue.RequestQueue, store bloomshipper.Store, tasks logger: log.With(logger, "worker", id), } w.Service = services.NewBasicService(w.starting, w.running, w.stopping) + w.rp = RequestPool{ + Pool: sync.Pool{ + New: func() any { + return make([]v1.Request, 0, 1024) + }, + }, + } return w } @@ -404,7 +426,13 @@ func (w *worker) running(ctx context.Context) error { // TODO(chaudum): Use pool fingerprints := make([]uint64, 0, 1024) for it.Next() { - fingerprints = append(fingerprints, uint64(it.At().Fp)) + // fingerprints are already sorted. we can skip duplicates by checking + // if the next is greater than the previous + fp := uint64(it.At().Fp) + if len(fingerprints) > 0 && fp <= fingerprints[len(fingerprints)-1] { + continue + } + fingerprints = append(fingerprints, fp) } it.Reset() @@ -431,9 +459,9 @@ func (w *worker) running(ctx context.Context) error { continue } + requests := w.rp.Get() for _, bq := range bqs { - // TODO(chaudum): Use pool - requests := make([]v1.Request, 0) + requests = requests[:0] for it.Next(); it.At().Fp <= bq.MaxFp; { requests = append(requests, it.At().Request) } @@ -443,9 +471,11 @@ func (w *worker) running(ctx context.Context) error { for _, t := range tasks { t.ErrCh <- err } + w.rp.Put(requests) continue } } + w.rp.Put(requests) } } From e66f1d3d7b1bc2ca8aec81aeeacf5448f92fb564 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Wed, 8 Nov 2023 15:31:03 +0100 Subject: [PATCH 05/44] Functional test for FilterChunkRefs() gRPC method Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 48 ++++++--- pkg/bloomgateway/bloomgateway_test.go | 146 +++++++++++++++++++++++--- pkg/bloomgateway/util.go | 6 +- pkg/storage/bloom/v1/builder_test.go | 38 ------- pkg/storage/bloom/v1/test_util.go | 83 +++++++++++++++ 5 files changed, 249 insertions(+), 72 deletions(-) create mode 100644 pkg/storage/bloom/v1/test_util.go diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 522481743479b..b6a66cba4b292 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -190,21 +190,28 @@ func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, o // We need to keep a reference to be able to call Stop() on shutdown of the gateway. g.bloomStore = bloomStore + if err := g.initServices(); err != nil { + return nil, err + } + g.Service = services.NewBasicService(g.starting, g.running, g.stopping).WithName("bloom-gateway") + + return g, nil +} + +func (g *Gateway) initServices() error { + var err error svcs := []services.Service{g.queue, g.activeUsers} for i := 0; i < numWorkers; i++ { - w := newWorker(i, g.queue, g.bloomStore, g.pendingTasks, logger) + w := newWorker(i, g.queue, g.bloomStore, g.pendingTasks, g.logger) svcs = append(svcs, w) } g.serviceMngr, err = services.NewManager(svcs...) if err != nil { - return nil, err + return err } g.serviceWatcher = services.NewFailureWatcher() g.serviceWatcher.WatchManager(g.serviceMngr) - - g.Service = services.NewBasicService(g.starting, g.running, g.stopping).WithName("bloom-gateway") - - return g, nil + return nil } func (g *Gateway) starting(ctx context.Context) error { @@ -319,12 +326,11 @@ type RequestPool struct { } func (p *RequestPool) Get() []v1.Request { - r := p.Pool.Get().([]v1.Request) - return r[:0] + return p.Pool.Get().([]v1.Request) } func (p *RequestPool) Put(r []v1.Request) { - p.Pool.Put(r) + p.Pool.Put(r[:0]) } // Worker is a datastructure that consumes tasks from the request queue, @@ -353,7 +359,7 @@ func newWorker(i int, queue *queue.RequestQueue, store bloomshipper.Store, tasks tasks: tasks, logger: log.With(logger, "worker", id), } - w.Service = services.NewBasicService(w.starting, w.running, w.stopping) + w.Service = services.NewBasicService(w.starting, w.running, w.stopping).WithName(id) w.rp = RequestPool{ Pool: sync.Pool{ New: func() any { @@ -418,9 +424,9 @@ func (w *worker) running(ctx context.Context) error { } } - // TODO(chaudum): Process days in parallel? for day, tasks := range tasksPerDay { - level.Debug(w.logger).Log("msg", "process tasks for day", "day", day, "tasks", len(tasks)) + logger := log.With(w.logger, "day", day) + level.Debug(logger).Log("msg", "process tasks for day", "tasks", len(tasks)) it := newTaskMergeIterator(tasks...) // TODO(chaudum): Use pool @@ -447,7 +453,7 @@ func (w *worker) running(ctx context.Context) error { // no blocks found if len(bqs) == 0 { - level.Warn(w.logger).Log("msg", "no blocks found for day", "day", day) + level.Warn(logger).Log("msg", "no blocks found") for _, t := range tasks { for _, ref := range t.Request.Refs { t.ResCh <- v1.Output{ @@ -459,24 +465,34 @@ func (w *worker) running(ctx context.Context) error { continue } + level.Debug(logger).Log("msg", "got block queriers", "count", len(bqs)) + + hasNext := it.Next() requests := w.rp.Get() for _, bq := range bqs { requests = requests[:0] - for it.Next(); it.At().Fp <= bq.MaxFp; { + for hasNext && it.At().Fp <= bq.MaxFp { requests = append(requests, it.At().Request) + hasNext = it.Next() + } + // no fingerprints in the fingerprint range of the current block + if len(requests) == 0 { + continue } fq := bq.Fuse([]v1.PeekingIterator[v1.Request]{NewIterWithIndex(0, requests)}) + level.Debug(logger).Log("msg", "initialized fuse querier", "minFp", bq.MinFp, "maxFp", bq.MaxFp, "requests", len(requests)) err := fq.Run() + level.Debug(logger).Log("msg", "run chunk checks", "err", err) if err != nil { for _, t := range tasks { - t.ErrCh <- err + t.ErrCh <- errors.Wrap(err, "failed to run chunk check") } w.rp.Put(requests) continue } } w.rp.Put(requests) - + level.Debug(logger).Log("msg", "finished chunk checks") } } return ctx.Err() diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index 3b55b016043b7..0160c31635437 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/go-kit/log" + "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" "github.com/grafana/dskit/kv/consul" "github.com/grafana/dskit/ring" @@ -18,9 +19,12 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/storage" + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/chunk/client/local" "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" lokiring "github.com/grafana/loki/pkg/util/ring" + "github.com/grafana/loki/pkg/validation" ) func parseDayTime(s string) config.DayTime { @@ -39,11 +43,21 @@ func groupRefs(t *testing.T, chunkRefs []*logproto.ChunkRef) []*logproto.Grouped return groupChunkRefs(chunkRefs, grouped) } +func newLimits() *validation.Overrides { + limits := validation.Limits{} + flagext.DefaultValues(&limits) + limits.BloomGatewayEnabled = true + + overrides, _ := validation.NewOverrides(limits, nil) + return overrides +} + func TestBloomGateway_StartStopService(t *testing.T) { ss := NewNoopStrategy() logger := log.NewNopLogger() reg := prometheus.NewRegistry() + limits := newLimits() cm := storage.NewClientMetrics() t.Cleanup(cm.Unregister) @@ -82,7 +96,7 @@ func TestBloomGateway_StartStopService(t *testing.T) { }, } - gw, err := New(cfg, schemaCfg, storageCfg, fakeLimits{}, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) @@ -103,6 +117,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { ss := NewNoopStrategy() logger := log.NewLogfmtLogger(os.Stderr) reg := prometheus.NewRegistry() + limits := newLimits() cm := storage.NewClientMetrics() t.Cleanup(cm.Unregister) @@ -142,7 +157,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("returns unfiltered chunk refs if no filters provided", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, fakeLimits{}, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) @@ -188,7 +203,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("returns error if chunk refs do not belong to tenant", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, fakeLimits{}, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) require.NoError(t, err) ts, _ := time.Parse("2006-01-02 15:04", "2023-10-03 10:00") @@ -215,7 +230,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("gateway tracks active users", func(t *testing.T) { reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, fakeLimits{}, ss, cm, logger, reg) + gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) @@ -253,22 +268,123 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { } require.ElementsMatch(t, tenants, gw.activeUsers.ActiveUsers()) }) -} -type fakeLimits struct { + t.Run("use fuse queriers to filter chunks", func(t *testing.T) { + reg := prometheus.NewRegistry() + gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) + require.NoError(t, err) + + // replace store implementation and re-initialize workers and sub-services + gw.bloomStore = newMockBloomStore(t) + gw.initServices() + + err = services.StartAndAwaitRunning(context.Background(), gw) + require.NoError(t, err) + t.Cleanup(func() { + err = services.StopAndAwaitTerminated(context.Background(), gw) + require.NoError(t, err) + }) + + ts, _ := time.Parse("2006-01-02 15:04", "2023-10-03 10:00") + now := model.TimeFromUnix(ts.Unix()) + + chunkRefs := []*logproto.ChunkRef{ + { + Fingerprint: 100, + UserID: tenantID, + From: now.Add(-24 * time.Hour), + Through: now.Add(-23 * time.Hour), + Checksum: 1, + }, + { + Fingerprint: 100, + UserID: tenantID, + From: now.Add(-23 * time.Hour), + Through: now.Add(-22 * time.Hour), + Checksum: 2, + }, + { + Fingerprint: 500, + UserID: tenantID, + From: now.Add(-22 * time.Hour), + Through: now.Add(-21 * time.Hour), + Checksum: 3, + }, + { + Fingerprint: 1000, + UserID: tenantID, + From: now.Add(-20 * time.Hour), + Through: now.Add(-19 * time.Hour), + Checksum: 4, + }, + { + Fingerprint: 1001, + UserID: tenantID, + From: now.Add(-19 * time.Hour), + Through: now.Add(-18 * time.Hour), + Checksum: 5, + }, + } + inputChunkRefs := groupRefs(t, chunkRefs) + + t.Run("no match - return filtered", func(t *testing.T) { + req := &logproto.FilterChunkRefRequest{ + From: now.Add(-24 * time.Hour), + Through: now, + Refs: inputChunkRefs, + Filters: []*logproto.LineFilterExpression{ + {Operator: 1, Match: "does not match"}, + }, + } + ctx := user.InjectOrgID(context.Background(), tenantID) + res, err := gw.FilterChunkRefs(ctx, req) + require.NoError(t, err) + + expectedResponse := &logproto.FilterChunkRefResponse{ + ChunkRefs: inputChunkRefs, // why does it return all chunks? + } + require.Equal(t, expectedResponse, res) + }) + + t.Run("match - return unfiltered", func(t *testing.T) { + req := &logproto.FilterChunkRefRequest{ + From: now.Add(-24 * time.Hour), + Through: now, + Refs: groupRefs(t, chunkRefs), + Filters: []*logproto.LineFilterExpression{ + // series with fingerprint 100 has 1000 keys + // range is from 100_000 to 100_999 + {Operator: 1, Match: "100001"}, + }, + } + ctx := user.InjectOrgID(context.Background(), tenantID) + res, err := gw.FilterChunkRefs(ctx, req) + require.NoError(t, err) + + expectedResponse := &logproto.FilterChunkRefResponse{ + ChunkRefs: inputChunkRefs, // why does it return all chunks? + } + require.Equal(t, expectedResponse, res) + }) + + }) } -func (f fakeLimits) BloomGatewayShardSize(_ string) int { - //TODO implement me - panic("implement me") +func newMockBloomStore(t *testing.T) *mockBloomStore { + return &mockBloomStore{t: t} } -func (f fakeLimits) BloomGatewayEnabled(_ string) bool { - //TODO implement me - panic("implement me") +type mockBloomStore struct { + t *testing.T } -func (f fakeLimits) BloomGatewayBlocksDownloadingParallelism(_ string) int { - //TODO implement me - panic("implement me") +func (s *mockBloomStore) GetBlockQueriers(ctx context.Context, tenant string, from, through time.Time, fingerprints []uint64) ([]bloomshipper.BlockQuerierWithFingerprintRange, error) { + return []bloomshipper.BlockQuerierWithFingerprintRange{ + {BlockQuerier: v1.MakeBlockQuerier(s.t, 0, 255, from.Unix(), through.Unix()), MinFp: 0, MaxFp: 255}, + {BlockQuerier: v1.MakeBlockQuerier(s.t, 256, 511, from.Unix(), through.Unix()), MinFp: 256, MaxFp: 511}, + {BlockQuerier: v1.MakeBlockQuerier(s.t, 512, 767, from.Unix(), through.Unix()), MinFp: 512, MaxFp: 767}, + {BlockQuerier: v1.MakeBlockQuerier(s.t, 768, 1023, from.Unix(), through.Unix()), MinFp: 768, MaxFp: 1023}, + }, nil } + +func (s *mockBloomStore) Stop() {} diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index 1a8e8155a6a73..a2a4b82e2186a 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -101,7 +101,7 @@ func getFromThrough(refs []*logproto.ShortRef) (time.Time, time.Time) { } func convertToSearches(filters []*logproto.LineFilterExpression) [][]byte { - searches := make([][]byte, len(filters)) + searches := make([][]byte, 0, len(filters)) for _, f := range filters { searches = append(searches, []byte(f.Match)) } @@ -111,7 +111,7 @@ func convertToSearches(filters []*logproto.LineFilterExpression) [][]byte { // convertToShortRefs converts a v1.ChunkRefs into []*logproto.ShortRef // TODO(chaudum): Avoid conversion by transferring v1.ChunkRefs in gRPC request. func convertToShortRefs(refs v1.ChunkRefs) []*logproto.ShortRef { - result := make([]*logproto.ShortRef, len(refs)) + result := make([]*logproto.ShortRef, 0, len(refs)) for _, ref := range refs { result = append(result, &logproto.ShortRef{From: ref.Start, Through: ref.End, Checksum: ref.Checksum}) } @@ -121,7 +121,7 @@ func convertToShortRefs(refs v1.ChunkRefs) []*logproto.ShortRef { // convertToChunkRefs converts a []*logproto.ShortRef into v1.ChunkRefs // TODO(chaudum): Avoid conversion by transferring v1.ChunkRefs in gRPC request. func convertToChunkRefs(refs []*logproto.ShortRef) v1.ChunkRefs { - result := make(v1.ChunkRefs, len(refs)) + result := make(v1.ChunkRefs, 0, len(refs)) for _, ref := range refs { result = append(result, v1.ChunkRef{Start: ref.From, End: ref.Through, Checksum: ref.Checksum}) } diff --git a/pkg/storage/bloom/v1/builder_test.go b/pkg/storage/bloom/v1/builder_test.go index 0ea6f6451ebac..e7278b971f6c8 100644 --- a/pkg/storage/bloom/v1/builder_test.go +++ b/pkg/storage/bloom/v1/builder_test.go @@ -3,51 +3,13 @@ package v1 import ( "bytes" "errors" - "fmt" "testing" - "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/grafana/loki/pkg/chunkenc" - "github.com/grafana/loki/pkg/storage/bloom/v1/filter" ) -func mkBasicSeriesWithBlooms(nSeries, keysPerSeries int, fromFp, throughFp model.Fingerprint, fromTs, throughTs model.Time) (seriesList []SeriesWithBloom, keysList [][][]byte) { - seriesList = make([]SeriesWithBloom, 0, nSeries) - keysList = make([][][]byte, 0, nSeries) - for i := 0; i < nSeries; i++ { - var series Series - step := (throughFp - fromFp) / (model.Fingerprint(nSeries)) - series.Fingerprint = fromFp + model.Fingerprint(i)*step - timeDelta := fromTs + (throughTs-fromTs)/model.Time(nSeries)*model.Time(i) - series.Chunks = []ChunkRef{ - { - Start: fromTs + timeDelta*model.Time(i), - End: fromTs + timeDelta*model.Time(i), - Checksum: uint32(i), - }, - } - - var bloom Bloom - bloom.ScalableBloomFilter = *filter.NewScalableBloomFilter(1024, 0.01, 0.8) - - keys := make([][]byte, 0, keysPerSeries) - for j := 0; j < keysPerSeries; j++ { - key := []byte(fmt.Sprint(j)) - bloom.Add(key) - keys = append(keys, key) - } - - seriesList = append(seriesList, SeriesWithBloom{ - Series: &series, - Bloom: &bloom, - }) - keysList = append(keysList, keys) - } - return -} - func EqualIterators[T any](t *testing.T, test func(a, b T), expected, actual Iterator[T]) { for expected.Next() { require.True(t, actual.Next()) diff --git a/pkg/storage/bloom/v1/test_util.go b/pkg/storage/bloom/v1/test_util.go new file mode 100644 index 0000000000000..a8095d020a4f2 --- /dev/null +++ b/pkg/storage/bloom/v1/test_util.go @@ -0,0 +1,83 @@ +package v1 + +import ( + "bytes" + "fmt" + "testing" + + "github.com/grafana/loki/pkg/chunkenc" + "github.com/grafana/loki/pkg/storage/bloom/v1/filter" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" +) + +func MakeBlockQuerier(t testing.TB, fromFp, throughFp uint64, fromTs, throughTs int64) *BlockQuerier { + // references for linking in memory reader+writer + indexBuf := bytes.NewBuffer(nil) + bloomsBuf := bytes.NewBuffer(nil) + writer := NewMemoryBlockWriter(indexBuf, bloomsBuf) + reader := NewByteReader(indexBuf, bloomsBuf) + numSeries := int(throughFp - fromFp) + numKeysPerSeries := 1000 + data, _ := mkBasicSeriesWithBlooms( + numSeries, + numKeysPerSeries, + model.Fingerprint(fromFp), + model.Fingerprint(throughFp), + model.TimeFromUnix(fromTs), + model.TimeFromUnix(throughTs), + ) + + builder, err := NewBlockBuilder( + BlockOptions{ + schema: Schema{ + version: DefaultSchemaVersion, + encoding: chunkenc.EncSnappy, + }, + SeriesPageSize: 100, + BloomPageSize: 10 << 10, + }, + writer, + ) + require.Nil(t, err) + itr := NewSliceIter[SeriesWithBloom](data) + _, err = builder.BuildFrom(itr) + require.Nil(t, err) + block := NewBlock(reader) + return NewBlockQuerier(block) +} + +func mkBasicSeriesWithBlooms(nSeries, keysPerSeries int, fromFp, throughFp model.Fingerprint, fromTs, throughTs model.Time) (seriesList []SeriesWithBloom, keysList [][][]byte) { + seriesList = make([]SeriesWithBloom, 0, nSeries) + keysList = make([][][]byte, 0, nSeries) + for i := 0; i < nSeries; i++ { + var series Series + step := (throughFp - fromFp) / (model.Fingerprint(nSeries)) + series.Fingerprint = fromFp + model.Fingerprint(i)*step + timeDelta := fromTs + (throughTs-fromTs)/model.Time(nSeries)*model.Time(i) + series.Chunks = []ChunkRef{ + { + Start: fromTs + timeDelta*model.Time(i), + End: fromTs + timeDelta*model.Time(i), + Checksum: uint32(i), + }, + } + + var bloom Bloom + bloom.ScalableBloomFilter = *filter.NewScalableBloomFilter(1024, 0.01, 0.8) + + keys := make([][]byte, 0, keysPerSeries) + for j := 0; j < keysPerSeries; j++ { + key := []byte(fmt.Sprint(i*keysPerSeries + j)) + bloom.Add(key) + keys = append(keys, key) + } + + seriesList = append(seriesList, SeriesWithBloom{ + Series: &series, + Bloom: &bloom, + }) + keysList = append(keysList, keys) + } + return +} From 8ade59c8c033cf00675ff9a77ba0fcf1c6e5b69e Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 9 Nov 2023 11:00:35 +0100 Subject: [PATCH 06/44] Add basic worker metrics Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 150 ++++++++++++++++++++----------- pkg/bloomgateway/multiplexing.go | 9 +- pkg/bloomgateway/util.go | 13 +++ 3 files changed, 120 insertions(+), 52 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index b6a66cba4b292..132cf06627fbe 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -141,14 +141,16 @@ func makePendingTasks(n int) *pendingTasks { type Gateway struct { services.Service - cfg Config - logger log.Logger - metrics *metrics + cfg Config + logger log.Logger + + metrics *metrics + workerMetrics *workerMetrics + queueMetrics *queue.Metrics - queue *queue.RequestQueue - queueMetrics *queue.Metrics - activeUsers *util.ActiveUsersCleanupService - bloomStore bloomshipper.Store + queue *queue.RequestQueue + activeUsers *util.ActiveUsersCleanupService + bloomStore bloomshipper.Store sharding ShardingStrategy @@ -156,19 +158,28 @@ type Gateway struct { serviceMngr *services.Manager serviceWatcher *services.FailureWatcher + + workerConfig workerConfig } // New returns a new instance of the Bloom Gateway. func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, overrides Limits, shardingStrategy ShardingStrategy, cm storage.ClientMetrics, logger log.Logger, reg prometheus.Registerer) (*Gateway, error) { + metricsSubsystem := "bloom_gateway" + g := &Gateway{ cfg: cfg, logger: logger, - metrics: newMetrics("bloom_gateway", reg), + metrics: newMetrics(metricsSubsystem, reg), sharding: shardingStrategy, pendingTasks: makePendingTasks(pendingTasksInitialCap), + workerConfig: workerConfig{ + maxWaitTime: 200 * time.Millisecond, + maxItems: 100, + }, + workerMetrics: newWorkerMetrics(metricsSubsystem, reg), + queueMetrics: queue.NewMetrics(reg, constants.Loki, metricsSubsystem), } - g.queueMetrics = queue.NewMetrics(reg, constants.Loki, "bloom_gateway") g.queue = queue.NewRequestQueue(maxTasksPerTenant, time.Minute, g.queueMetrics) g.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(g.queueMetrics.Cleanup) @@ -202,7 +213,8 @@ func (g *Gateway) initServices() error { var err error svcs := []services.Service{g.queue, g.activeUsers} for i := 0; i < numWorkers; i++ { - w := newWorker(i, g.queue, g.bloomStore, g.pendingTasks, g.logger) + id := fmt.Sprintf("bloom-query-worker-%d", i) + w := newWorker(id, g.workerConfig, g.queue, g.bloomStore, g.pendingTasks, g.logger, g.workerMetrics) svcs = append(svcs, w) } g.serviceMngr, err = services.NewManager(svcs...) @@ -307,13 +319,14 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk case err := <-errCh: return nil, errors.Wrap(err, "waiting for results") case res := <-resCh: - level.Debug(g.logger).Log("msg", "got partial result", "task", task.ID, "tenant", tenantID, "fp", res.Fp, "refs", res.Chks.Len()) - // wait for all parts of the full response + // log line is helpful for debugging tests + // level.Debug(g.logger).Log("msg", "got partial result", "task", task.ID, "tenant", tenantID, "fp", res.Fp, "refs", res.Chks.Len()) response = append(response, &logproto.GroupedChunkRefs{ Tenant: tenantID, Fingerprint: uint64(res.Fp), Refs: convertToShortRefs(res.Chks), }) + // wait for all parts of the full response if len(response) == len(req.Refs) { return &logproto.FilterChunkRefResponse{ChunkRefs: response}, nil } @@ -321,19 +334,49 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk } } -type RequestPool struct { - sync.Pool +type workerConfig struct { + maxWaitTime time.Duration + maxItems int } -func (p *RequestPool) Get() []v1.Request { - return p.Pool.Get().([]v1.Request) +type workerMetrics struct { + dequeuedTasks *prometheus.CounterVec + dequeueErrors *prometheus.CounterVec + dequeueWaitTime *prometheus.SummaryVec + storeAccessLatency *prometheus.HistogramVec } -func (p *RequestPool) Put(r []v1.Request) { - p.Pool.Put(r[:0]) +func newWorkerMetrics(subsystem string, registerer prometheus.Registerer) *workerMetrics { + labels := []string{"worker"} + return &workerMetrics{ + dequeuedTasks: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + Namespace: "loki", + Subsystem: subsystem, + Name: "dequeued_tasks_total", + Help: "Total amount of tasks that the worker dequeued from the bloom query queue", + }, labels), + dequeueErrors: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + Namespace: "loki", + Subsystem: subsystem, + Name: "dequeue_errors_total", + Help: "Total amount of failed dequeue operations", + }, labels), + dequeueWaitTime: promauto.With(registerer).NewSummaryVec(prometheus.SummaryOpts{ + Namespace: "loki", + Subsystem: subsystem, + Name: "dequeue_wait_time", + Help: "Time spent waiting for dequeuing tasks from queue", + }, labels), + storeAccessLatency: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "loki", + Subsystem: subsystem, + Name: "store_latency", + Help: "Latency in seconds of accessing the bloom store component", + }, append(labels, "operation")), + } } -// Worker is a datastructure that consumes tasks from the request queue, +// worker is a datastructure that consumes tasks from the request queue, // processes them and returns the result/error back to the response channels of // the tasks. // It is responsible for multiplexing tasks so they can be processes in a more @@ -341,23 +384,25 @@ func (p *RequestPool) Put(r []v1.Request) { type worker struct { services.Service - ID string - queue *queue.RequestQueue - store bloomshipper.Store - tasks *pendingTasks - logger log.Logger - - rp RequestPool + id string + cfg workerConfig + queue *queue.RequestQueue + store bloomshipper.Store + tasks *pendingTasks + logger log.Logger + metrics *workerMetrics + rp RequestPool } -func newWorker(i int, queue *queue.RequestQueue, store bloomshipper.Store, tasks *pendingTasks, logger log.Logger) *worker { - id := fmt.Sprintf("bloom-query-worker-%d", i) +func newWorker(id string, cfg workerConfig, queue *queue.RequestQueue, store bloomshipper.Store, tasks *pendingTasks, logger log.Logger, metrics *workerMetrics) *worker { w := &worker{ - ID: id, - queue: queue, - store: store, - tasks: tasks, - logger: log.With(logger, "worker", id), + id: id, + cfg: cfg, + queue: queue, + store: store, + tasks: tasks, + logger: log.With(logger, "worker", id), + metrics: metrics, } w.Service = services.NewBasicService(w.starting, w.running, w.stopping).WithName(id) w.rp = RequestPool{ @@ -372,24 +417,24 @@ func newWorker(i int, queue *queue.RequestQueue, store bloomshipper.Store, tasks func (w *worker) starting(_ context.Context) error { level.Debug(w.logger).Log("msg", "starting worker") - w.queue.RegisterConsumerConnection(w.ID) + w.queue.RegisterConsumerConnection(w.id) return nil } func (w *worker) running(ctx context.Context) error { idx := queue.StartIndexWithLocalQueue - maxItems := 10 - maxWaitTime := 500 * time.Millisecond - for ctx.Err() == nil { taskCtx := context.Background() - items, newIdx, err := w.queue.DequeueMany(taskCtx, idx, w.ID, maxItems, maxWaitTime) + dequeueStart := time.Now() + items, newIdx, err := w.queue.DequeueMany(taskCtx, idx, w.id, w.cfg.maxItems, w.cfg.maxWaitTime) + w.metrics.dequeueWaitTime.WithLabelValues(w.id).Observe(time.Since(dequeueStart).Seconds()) if err != nil { // We only return an error if the queue is stopped and dequeuing did not yield any items if err == queue.ErrStopped && len(items) == 0 { return err } + w.metrics.dequeueErrors.WithLabelValues(w.id).Inc() level.Error(w.logger).Log("msg", "failed to dequeue tasks", "err", err, "items", len(items)) } idx = newIdx @@ -397,20 +442,20 @@ func (w *worker) running(ctx context.Context) error { if len(items) == 0 { continue } + w.metrics.dequeuedTasks.WithLabelValues(w.id).Add(float64(len(items))) tasksPerDay := make(map[time.Time][]Task) for _, item := range items { task, ok := item.(Task) if !ok { - level.Error(w.logger).Log("msg", "failed to cast dequeued item to Task", "item", item) - continue + // This really should never happen, because only the bloom gateway itself can enqueue tasks. + return errors.Errorf("failed to cast dequeued item to Task: %v", item) } level.Debug(w.logger).Log("msg", "dequeued task", "task", task.ID) w.tasks.Delete(task.ID) - fromDay := getDayTime(task.Request.From) - throughDay := getDayTime(task.Request.Through) + fromDay, throughDay := task.Bounds() if fromDay.Equal(throughDay) { tasksPerDay[fromDay] = append(tasksPerDay[fromDay], task) @@ -426,10 +471,10 @@ func (w *worker) running(ctx context.Context) error { for day, tasks := range tasksPerDay { logger := log.With(w.logger, "day", day) - level.Debug(logger).Log("msg", "process tasks for day", "tasks", len(tasks)) + level.Debug(logger).Log("msg", "process tasks", "tasks", len(tasks)) it := newTaskMergeIterator(tasks...) - // TODO(chaudum): Use pool + fingerprints := make([]uint64, 0, 1024) for it.Next() { // fingerprints are already sorted. we can skip duplicates by checking @@ -443,15 +488,20 @@ func (w *worker) running(ctx context.Context) error { it.Reset() + storeFetchStart := time.Now() bqs, err := w.store.GetBlockQueriers(taskCtx, tasks[0].Tenant, day, day.Add(24*time.Hour), fingerprints) + w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockQueriers").Observe(time.Since(storeFetchStart).Seconds()) if err != nil { for _, t := range tasks { t.ErrCh <- err } + // continue with tasks of next day continue } - // no blocks found + // No blocks found. + // Since there are no blocks for the given tasks, we need to return the + // unfiltered list of chunk refs. if len(bqs) == 0 { level.Warn(logger).Log("msg", "no blocks found") for _, t := range tasks { @@ -462,11 +512,10 @@ func (w *worker) running(ctx context.Context) error { } } } + // continue with tasks of next day continue } - level.Debug(logger).Log("msg", "got block queriers", "count", len(bqs)) - hasNext := it.Next() requests := w.rp.Get() for _, bq := range bqs { @@ -480,19 +529,18 @@ func (w *worker) running(ctx context.Context) error { continue } fq := bq.Fuse([]v1.PeekingIterator[v1.Request]{NewIterWithIndex(0, requests)}) - level.Debug(logger).Log("msg", "initialized fuse querier", "minFp", bq.MinFp, "maxFp", bq.MaxFp, "requests", len(requests)) err := fq.Run() - level.Debug(logger).Log("msg", "run chunk checks", "err", err) if err != nil { for _, t := range tasks { t.ErrCh <- errors.Wrap(err, "failed to run chunk check") } + // return slice back to pool w.rp.Put(requests) continue } } + // return slice back to pool w.rp.Put(requests) - level.Debug(logger).Log("msg", "finished chunk checks") } } return ctx.Err() @@ -500,6 +548,6 @@ func (w *worker) running(ctx context.Context) error { func (w *worker) stopping(err error) error { level.Debug(w.logger).Log("msg", "stopping worker", "err", err) - w.queue.UnregisterConsumerConnection(w.ID) + w.queue.UnregisterConsumerConnection(w.id) return nil } diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index 55c9c3403e6a4..e116a6a44d472 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -1,8 +1,10 @@ package bloomgateway import ( + "time" + "github.com/grafana/loki/pkg/logproto" - "github.com/grafana/loki/pkg/storage/bloom/v1" + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/oklog/ulid" "github.com/prometheus/common/model" ) @@ -41,6 +43,11 @@ func NewTask(tenantID string, req *logproto.FilterChunkRefRequest) (Task, chan v return task, resCh, errCh, nil } +// Bounds returns the day boundaries of the task +func (t Task) Bounds() (time.Time, time.Time) { + return getDayTime(t.Request.From), getDayTime(t.Request.Through) +} + // WithRequest returns a copy of the task, which holds the provided Request req. func (t Task) WithRequest(req *logproto.FilterChunkRefRequest) Task { return Task{ diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index a2a4b82e2186a..0cc18fe0becad 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -1,6 +1,7 @@ package bloomgateway import ( + "sync" "time" "github.com/grafana/loki/pkg/logproto" @@ -48,6 +49,18 @@ func NewIterWithIndex[T any](i int, xs []T) *SliceIterWithIndex[T] { } } +type RequestPool struct { + sync.Pool +} + +func (p *RequestPool) Get() []v1.Request { + return p.Pool.Get().([]v1.Request) +} + +func (p *RequestPool) Put(r []v1.Request) { + p.Pool.Put(r[:0]) +} + func getDay(ts model.Time) int64 { return ts.Unix() / int64(24*time.Hour/time.Second) } From 4cd9961a7f798b631c25e8ddb9a88fac7bdadab9 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 9 Nov 2023 13:25:01 +0100 Subject: [PATCH 07/44] Fix filterRequestForDay function Signed-off-by: Christian Haudum --- pkg/bloomgateway/util.go | 30 +++++++--- pkg/bloomgateway/util_test.go | 106 +++++++++++++++++++++++++++++++++- 2 files changed, 128 insertions(+), 8 deletions(-) diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index 0cc18fe0becad..b2deba0747bc3 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -70,7 +70,9 @@ func getDayTime(ts model.Time) time.Time { } func filterRequestForDay(r *logproto.FilterChunkRefRequest, day time.Time) *logproto.FilterChunkRefRequest { - var from, through time.Time + through := model.TimeFromUnix(day.Unix()) + from := model.TimeFromUnix(day.Add(24 * time.Hour).Unix()) + refs := make([]*logproto.GroupedChunkRefs, 0, len(r.Refs)) for i := range r.Refs { groupedChunkRefs := &logproto.GroupedChunkRefs{ @@ -89,8 +91,14 @@ func filterRequestForDay(r *logproto.FilterChunkRefRequest, day time.Time) *logp groupedChunkRefs.Refs = append(groupedChunkRefs.Refs, shortRef) } } + + // do not add empty groups to request + if len(groupedChunkRefs.Refs) == 0 { + continue + } + groupFrom, groupThrough := getFromThrough(groupedChunkRefs.Refs) - if from.Unix() > 0 && groupFrom.Before(from) { + if groupFrom.Before(from) { from = groupFrom } if groupThrough.After(through) { @@ -98,19 +106,27 @@ func filterRequestForDay(r *logproto.FilterChunkRefRequest, day time.Time) *logp } refs = append(refs, groupedChunkRefs) } + + // The initial value of `from` is the through time and vice versa. + // This is, in order to determine min From and max Through. + // In case no chunk refs match, we need to swap the initial value again. + if len(refs) == 0 { + from, through = through, from + } + return &logproto.FilterChunkRefRequest{ - From: model.TimeFromUnix(from.Unix()), - Through: model.TimeFromUnix(through.Unix()), + From: from, + Through: through, Refs: refs, Filters: r.Filters, } } -func getFromThrough(refs []*logproto.ShortRef) (time.Time, time.Time) { +func getFromThrough(refs []*logproto.ShortRef) (model.Time, model.Time) { if len(refs) == 0 { - return time.Time{}, time.Time{} + return model.Earliest, model.Latest } - return refs[0].From.Time(), refs[len(refs)-1].Through.Time() + return refs[0].From, refs[len(refs)-1].Through } func convertToSearches(filters []*logproto.LineFilterExpression) [][]byte { diff --git a/pkg/bloomgateway/util_test.go b/pkg/bloomgateway/util_test.go index df1f3e5aa378c..a237aa7e053a6 100644 --- a/pkg/bloomgateway/util_test.go +++ b/pkg/bloomgateway/util_test.go @@ -2,7 +2,10 @@ package bloomgateway import ( "testing" + "time" + "github.com/grafana/loki/pkg/logproto" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" ) @@ -39,4 +42,105 @@ func TestGetDay(t *testing.T) {} func TestGetDayTime(t *testing.T) {} -func TestFilterRequestForDay(t *testing.T) {} +func TestFilterRequestForDay(t *testing.T) { + tenant := "fake" + + // Thu Nov 09 2023 10:56:50 UTC + ts := model.TimeFromUnix(1699523810) + + t.Run("filter chunk refs that fall into the day range", func(t *testing.T) { + input := &logproto.FilterChunkRefRequest{ + From: ts.Add(-168 * time.Hour), // 1w ago + Through: ts, + Refs: []*logproto.GroupedChunkRefs{ + {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-168 * time.Hour), Through: ts.Add(-167 * time.Hour), Checksum: 100}, + {From: ts.Add(-143 * time.Hour), Through: ts.Add(-142 * time.Hour), Checksum: 101}, + }}, + {Fingerprint: 200, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-144 * time.Hour), Through: ts.Add(-143 * time.Hour), Checksum: 200}, + {From: ts.Add(-119 * time.Hour), Through: ts.Add(-118 * time.Hour), Checksum: 201}, + }}, + {Fingerprint: 300, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-120 * time.Hour), Through: ts.Add(-119 * time.Hour), Checksum: 300}, + {From: ts.Add(-95 * time.Hour), Through: ts.Add(-94 * time.Hour), Checksum: 301}, + }}, + {Fingerprint: 400, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-96 * time.Hour), Through: ts.Add(-95 * time.Hour), Checksum: 400}, + {From: ts.Add(-71 * time.Hour), Through: ts.Add(-70 * time.Hour), Checksum: 401}, + }}, + {Fingerprint: 500, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-72 * time.Hour), Through: ts.Add(-71 * time.Hour), Checksum: 500}, + {From: ts.Add(-47 * time.Hour), Through: ts.Add(-46 * time.Hour), Checksum: 501}, + }}, + {Fingerprint: 600, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-48 * time.Hour), Through: ts.Add(-47 * time.Hour), Checksum: 600}, + {From: ts.Add(-23 * time.Hour), Through: ts.Add(-22 * time.Hour), Checksum: 601}, + }}, + {Fingerprint: 700, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-24 * time.Hour), Through: ts.Add(-23 * time.Hour), Checksum: 700}, + {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 701}, + }}, + }, + Filters: []*logproto.LineFilterExpression{ + {Operator: 1, Match: "foo"}, + {Operator: 1, Match: "bar"}, + }, + } + + // day ranges from ts-48h to ts-24h + day := getDayTime(ts.Add(-36 * time.Hour)) + + expected := &logproto.FilterChunkRefRequest{ + From: ts.Add(-48 * time.Hour), + Through: ts.Add(-46 * time.Hour), + Refs: []*logproto.GroupedChunkRefs{ + {Fingerprint: 500, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-47 * time.Hour), Through: ts.Add(-46 * time.Hour), Checksum: 501}, + }}, + {Fingerprint: 600, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-48 * time.Hour), Through: ts.Add(-47 * time.Hour), Checksum: 600}, + }}, + }, + Filters: []*logproto.LineFilterExpression{ + {Operator: 1, Match: "foo"}, + {Operator: 1, Match: "bar"}, + }, + } + + output := filterRequestForDay(input, day) + require.Equal(t, expected, output) + }) + + t.Run("empty response returns time range from input day", func(t *testing.T) { + input := &logproto.FilterChunkRefRequest{ + From: ts.Add(-168 * time.Hour), // 1w ago + Through: ts, + Refs: []*logproto.GroupedChunkRefs{ + {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-168 * time.Hour), Through: ts.Add(-167 * time.Hour), Checksum: 100}, + }}, + }, + Filters: []*logproto.LineFilterExpression{ + {Operator: 1, Match: "foo"}, + {Operator: 1, Match: "bar"}, + }, + } + + // day ranges from ts-48h to ts-24h + day := getDayTime(ts.Add(-36 * time.Hour)) + + expected := &logproto.FilterChunkRefRequest{ + From: model.TimeFromUnix(day.Unix()), + Through: model.TimeFromUnix(day.Add(24 * time.Hour).Unix()), + Refs: []*logproto.GroupedChunkRefs{}, + Filters: []*logproto.LineFilterExpression{ + {Operator: 1, Match: "foo"}, + {Operator: 1, Match: "bar"}, + }, + } + + output := filterRequestForDay(input, day) + require.Equal(t, expected, output) + }) +} From 16899e610aec8f5e1d1fecc12d8ac0101859d7fb Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 9 Nov 2023 14:32:55 +0100 Subject: [PATCH 08/44] Add tests for taskMergeIterator Signed-off-by: Christian Haudum --- pkg/bloomgateway/multiplexing_test.go | 188 ++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 pkg/bloomgateway/multiplexing_test.go diff --git a/pkg/bloomgateway/multiplexing_test.go b/pkg/bloomgateway/multiplexing_test.go new file mode 100644 index 0000000000000..febf7270cba06 --- /dev/null +++ b/pkg/bloomgateway/multiplexing_test.go @@ -0,0 +1,188 @@ +package bloomgateway + +import ( + "testing" + "time" + + "github.com/grafana/loki/pkg/logproto" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" +) + +func TestTask(t *testing.T) { + t.Run("bounds returns request boundaries", func(t *testing.T) { + ts := model.Now() + req := &logproto.FilterChunkRefRequest{ + From: ts.Add(-1 * time.Hour), + Through: ts, + } + task, _, _, err := NewTask("tenant", req) + require.NoError(t, err) + from, through := task.Bounds() + require.Equal(t, req.From, from) + require.Equal(t, req.Through, through) + }) +} + +func TestTaskMergeIterator(t *testing.T) { + // Thu Nov 09 2023 10:56:50 UTC + ts := model.TimeFromUnix(1699523810) + tenant := "fake" + + t.Run("empty requests result in empty iterator", func(t *testing.T) { + r1 := &logproto.FilterChunkRefRequest{ + From: ts.Add(-3 * time.Hour), + Through: ts.Add(-2 * time.Hour), + Refs: []*logproto.GroupedChunkRefs{}, + } + t1, _, _, err := NewTask(tenant, r1) + require.NoError(t, err) + + r2 := &logproto.FilterChunkRefRequest{ + From: ts.Add(-1 * time.Hour), + Through: ts, + Refs: []*logproto.GroupedChunkRefs{}, + } + t2, _, _, err := NewTask(tenant, r2) + require.NoError(t, err) + + r3 := &logproto.FilterChunkRefRequest{ + From: ts.Add(-1 * time.Hour), + Through: ts, + Refs: []*logproto.GroupedChunkRefs{}, + } + t3, _, _, err := NewTask(tenant, r3) + require.NoError(t, err) + + it := newTaskMergeIterator(t1, t2, t3) + require.NotNil(t, it.heap) + // nothing to iterate over + require.False(t, it.Next()) + }) + + t.Run("merge multiple tasks in ascending fingerprint order", func(t *testing.T) { + r1 := &logproto.FilterChunkRefRequest{ + From: ts.Add(-3 * time.Hour), + Through: ts.Add(-2 * time.Hour), + Refs: []*logproto.GroupedChunkRefs{ + {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-3 * time.Hour), Through: ts.Add(-2 * time.Hour), Checksum: 100}, + }}, + }, + } + t1, _, _, err := NewTask(tenant, r1) + require.NoError(t, err) + + r2 := &logproto.FilterChunkRefRequest{ + From: ts.Add(-1 * time.Hour), + Through: ts, + Refs: []*logproto.GroupedChunkRefs{ + {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 200}, + }}, + {Fingerprint: 200, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 300}, + }}, + }, + } + t2, _, _, err := NewTask(tenant, r2) + require.NoError(t, err) + + r3 := &logproto.FilterChunkRefRequest{ + From: ts.Add(-1 * time.Hour), + Through: ts, + Refs: []*logproto.GroupedChunkRefs{ + {Fingerprint: 200, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 400}, + }}, + }, + } + t3, _, _, err := NewTask(tenant, r3) + require.NoError(t, err) + + it := newTaskMergeIterator(t1, t2, t3) + require.NotNil(t, it.heap) + + // first item + require.True(t, it.Next()) + r := it.At() + require.Equal(t, model.Fingerprint(100), r.Fp) + require.Equal(t, uint32(100), r.Chks[0].Checksum) + + // second item + require.True(t, it.Next()) + r = it.At() + require.Equal(t, model.Fingerprint(100), r.Fp) + require.Equal(t, uint32(200), r.Chks[0].Checksum) + + // third item + require.True(t, it.Next()) + r = it.At() + require.Equal(t, model.Fingerprint(200), r.Fp) + require.Equal(t, uint32(300), r.Chks[0].Checksum) + + // fourth item + require.True(t, it.Next()) + r = it.At() + require.Equal(t, model.Fingerprint(200), r.Fp) + require.Equal(t, uint32(400), r.Chks[0].Checksum) + + // no more items + require.False(t, it.Next()) + }) + + t.Run("reset of iterator allows for multiple iterations", func(t *testing.T) { + r1 := &logproto.FilterChunkRefRequest{ + From: ts.Add(-1 * time.Hour), + Through: ts, + Refs: []*logproto.GroupedChunkRefs{ + {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 100}, + }}, + }, + } + t1, _, _, err := NewTask(tenant, r1) + require.NoError(t, err) + + r2 := &logproto.FilterChunkRefRequest{ + From: ts.Add(-1 * time.Hour), + Through: ts, + Refs: []*logproto.GroupedChunkRefs{ + {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 200}, + }}, + }, + } + t2, _, _, err := NewTask(tenant, r2) + require.NoError(t, err) + + r3 := &logproto.FilterChunkRefRequest{ + From: ts.Add(-1 * time.Hour), + Through: ts, + Refs: []*logproto.GroupedChunkRefs{ + {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 300}, + }}, + }, + } + t3, _, _, err := NewTask(tenant, r3) + require.NoError(t, err) + + it := newTaskMergeIterator(t1, t2, t3) + require.NotNil(t, it.heap) + + count := 0 + for it.Next() { + count++ + } + require.Equal(t, 3, count) + + it.Reset() + + count = 0 + for it.Next() { + count++ + } + require.Equal(t, 3, count) + }) +} From 8c5a945859e11ad5288e5aff986bc4ca130aef0a Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 9 Nov 2023 14:35:47 +0100 Subject: [PATCH 09/44] Fix linter warnings Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway_test.go | 5 +++-- pkg/bloomgateway/util.go | 2 +- pkg/bloomgateway/util_test.go | 4 ---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index 0160c31635437..2577af016cb87 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -276,7 +276,8 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { // replace store implementation and re-initialize workers and sub-services gw.bloomStore = newMockBloomStore(t) - gw.initServices() + err = gw.initServices() + require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), gw) require.NoError(t, err) @@ -378,7 +379,7 @@ type mockBloomStore struct { t *testing.T } -func (s *mockBloomStore) GetBlockQueriers(ctx context.Context, tenant string, from, through time.Time, fingerprints []uint64) ([]bloomshipper.BlockQuerierWithFingerprintRange, error) { +func (s *mockBloomStore) GetBlockQueriers(_ context.Context, tenant string, from, through time.Time, fingerprints []uint64) ([]bloomshipper.BlockQuerierWithFingerprintRange, error) { return []bloomshipper.BlockQuerierWithFingerprintRange{ {BlockQuerier: v1.MakeBlockQuerier(s.t, 0, 255, from.Unix(), through.Unix()), MinFp: 0, MaxFp: 255}, {BlockQuerier: v1.MakeBlockQuerier(s.t, 256, 511, from.Unix(), through.Unix()), MinFp: 256, MaxFp: 511}, diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index b2deba0747bc3..b5506f5d633e8 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -58,7 +58,7 @@ func (p *RequestPool) Get() []v1.Request { } func (p *RequestPool) Put(r []v1.Request) { - p.Pool.Put(r[:0]) + p.Pool.Put(r[:0]) // nolint:staticcheck } func getDay(ts model.Time) int64 { diff --git a/pkg/bloomgateway/util_test.go b/pkg/bloomgateway/util_test.go index a237aa7e053a6..75f4ace2e10b8 100644 --- a/pkg/bloomgateway/util_test.go +++ b/pkg/bloomgateway/util_test.go @@ -38,10 +38,6 @@ func TestSliceIterWithIndex(t *testing.T) { }) } -func TestGetDay(t *testing.T) {} - -func TestGetDayTime(t *testing.T) {} - func TestFilterRequestForDay(t *testing.T) { tenant := "fake" From c3f236836c3317a178b224a46b44e83e003674e6 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 9 Nov 2023 14:37:08 +0100 Subject: [PATCH 10/44] Fix import order Signed-off-by: Christian Haudum --- pkg/bloomgateway/multiplexing.go | 5 +++-- pkg/bloomgateway/multiplexing_test.go | 3 ++- pkg/bloomgateway/util.go | 3 ++- pkg/bloomgateway/util_test.go | 3 ++- pkg/storage/bloom/v1/test_util.go | 5 +++-- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index e116a6a44d472..fbbf15c171f24 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -3,10 +3,11 @@ package bloomgateway import ( "time" - "github.com/grafana/loki/pkg/logproto" - v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/oklog/ulid" "github.com/prometheus/common/model" + + "github.com/grafana/loki/pkg/logproto" + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" ) // Task is the data structure that is enqueued to the internal queue and queued by query workers diff --git a/pkg/bloomgateway/multiplexing_test.go b/pkg/bloomgateway/multiplexing_test.go index febf7270cba06..1af68e887ce74 100644 --- a/pkg/bloomgateway/multiplexing_test.go +++ b/pkg/bloomgateway/multiplexing_test.go @@ -4,9 +4,10 @@ import ( "testing" "time" - "github.com/grafana/loki/pkg/logproto" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/logproto" ) func TestTask(t *testing.T) { diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index b5506f5d633e8..aa2712b99af6c 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -4,9 +4,10 @@ import ( "sync" "time" + "github.com/prometheus/common/model" + "github.com/grafana/loki/pkg/logproto" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" - "github.com/prometheus/common/model" ) // SliceIterWithIndex implements v1.PeekingIterator diff --git a/pkg/bloomgateway/util_test.go b/pkg/bloomgateway/util_test.go index 75f4ace2e10b8..897850a0438c2 100644 --- a/pkg/bloomgateway/util_test.go +++ b/pkg/bloomgateway/util_test.go @@ -4,9 +4,10 @@ import ( "testing" "time" - "github.com/grafana/loki/pkg/logproto" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/logproto" ) func TestSliceIterWithIndex(t *testing.T) { diff --git a/pkg/storage/bloom/v1/test_util.go b/pkg/storage/bloom/v1/test_util.go index a8095d020a4f2..19af66299f8de 100644 --- a/pkg/storage/bloom/v1/test_util.go +++ b/pkg/storage/bloom/v1/test_util.go @@ -5,10 +5,11 @@ import ( "fmt" "testing" - "github.com/grafana/loki/pkg/chunkenc" - "github.com/grafana/loki/pkg/storage/bloom/v1/filter" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/chunkenc" + "github.com/grafana/loki/pkg/storage/bloom/v1/filter" ) func MakeBlockQuerier(t testing.TB, fromFp, throughFp uint64, fromTs, throughTs int64) *BlockQuerier { From 266479b5714bd61c544b091a45623b853cfe7b84 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 9 Nov 2023 14:52:24 +0100 Subject: [PATCH 11/44] Implement Err() for taskMergeIterator Signed-off-by: Christian Haudum --- pkg/bloomgateway/multiplexing.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index fbbf15c171f24..959b634f3fccc 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -1,6 +1,7 @@ package bloomgateway import ( + "errors" "time" "github.com/oklog/ulid" @@ -71,6 +72,7 @@ type taskMergeIterator struct { curr FilterRequest heap *v1.HeapIterator[*logproto.GroupedChunkRefs] tasks []Task + err error } func newTaskMergeIterator(tasks ...Task) *taskMergeIterator { @@ -93,6 +95,7 @@ func (it *taskMergeIterator) init() { }, sequences..., ) + it.err = nil } func (it *taskMergeIterator) Reset() { @@ -107,6 +110,7 @@ func (it *taskMergeIterator) Next() bool { currIter, ok := it.heap.CurrIter().(*SliceIterWithIndex[*logproto.GroupedChunkRefs]) if !ok { + it.err = errors.New("failed to cast iterator") return false } iterIndex := currIter.Index() @@ -127,5 +131,5 @@ func (it *taskMergeIterator) At() FilterRequest { } func (it *taskMergeIterator) Err() error { - return nil + return it.err } From f443fe0a8286567d2def566914ab1ffafeb2b2a6 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 9 Nov 2023 14:59:30 +0100 Subject: [PATCH 12/44] Fix test and linter Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway_test.go | 2 +- pkg/bloomgateway/multiplexing_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index 2577af016cb87..5ddbc084ec86e 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -379,7 +379,7 @@ type mockBloomStore struct { t *testing.T } -func (s *mockBloomStore) GetBlockQueriers(_ context.Context, tenant string, from, through time.Time, fingerprints []uint64) ([]bloomshipper.BlockQuerierWithFingerprintRange, error) { +func (s *mockBloomStore) GetBlockQueriers(_ context.Context, _ string, from, through time.Time, _ []uint64) ([]bloomshipper.BlockQuerierWithFingerprintRange, error) { return []bloomshipper.BlockQuerierWithFingerprintRange{ {BlockQuerier: v1.MakeBlockQuerier(s.t, 0, 255, from.Unix(), through.Unix()), MinFp: 0, MaxFp: 255}, {BlockQuerier: v1.MakeBlockQuerier(s.t, 256, 511, from.Unix(), through.Unix()), MinFp: 256, MaxFp: 511}, diff --git a/pkg/bloomgateway/multiplexing_test.go b/pkg/bloomgateway/multiplexing_test.go index 1af68e887ce74..915d140756560 100644 --- a/pkg/bloomgateway/multiplexing_test.go +++ b/pkg/bloomgateway/multiplexing_test.go @@ -20,8 +20,8 @@ func TestTask(t *testing.T) { task, _, _, err := NewTask("tenant", req) require.NoError(t, err) from, through := task.Bounds() - require.Equal(t, req.From, from) - require.Equal(t, req.Through, through) + require.Equal(t, getDayTime(req.From), from) + require.Equal(t, getDayTime(req.Through), through) }) } From afec782fb7cb4ef0fc0d7ee9cd730282a37a283c Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 9 Nov 2023 15:43:16 +0100 Subject: [PATCH 13/44] Add comment about optimized processing block Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 132cf06627fbe..a73a93a51aef5 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -489,6 +489,10 @@ func (w *worker) running(ctx context.Context) error { it.Reset() storeFetchStart := time.Now() + // GetBlockQueriers() waits until all blocks are downloaded and available for querying. + // TODO(chaudum): Add API that allows to process blocks as soon as they become available. + // This will require to change the taskMergeIterator to a slice of requests so we can seek + // to the appropriate fingerprint range within the slice that matches the block's fingerprint range. bqs, err := w.store.GetBlockQueriers(taskCtx, tasks[0].Tenant, day, day.Add(24*time.Hour), fingerprints) w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockQueriers").Observe(time.Since(storeFetchStart).Seconds()) if err != nil { From 6205140c5606d54633b387d758d41b8916d3c82a Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Fri, 10 Nov 2023 12:17:18 +0100 Subject: [PATCH 14/44] Fix filter test Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 48 ++++----- pkg/bloomgateway/bloomgateway_test.go | 135 ++++++++++++++------------ pkg/bloomgateway/util.go | 23 ++--- pkg/storage/bloom/v1/test_util.go | 25 +++-- 4 files changed, 121 insertions(+), 110 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index a73a93a51aef5..ada5642dbe6f5 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -312,6 +312,7 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk }) response := make([]*logproto.GroupedChunkRefs, 0, len(req.Refs)) + responseCount := 0 for { select { case <-ctx.Done(): @@ -319,15 +320,18 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk case err := <-errCh: return nil, errors.Wrap(err, "waiting for results") case res := <-resCh: + responseCount++ // log line is helpful for debugging tests - // level.Debug(g.logger).Log("msg", "got partial result", "task", task.ID, "tenant", tenantID, "fp", res.Fp, "refs", res.Chks.Len()) - response = append(response, &logproto.GroupedChunkRefs{ - Tenant: tenantID, - Fingerprint: uint64(res.Fp), - Refs: convertToShortRefs(res.Chks), - }) + // level.Debug(g.logger).Log("msg", "got partial result", "task", task.ID, "tenant", tenantID, "fp", res.Fp, "chunks", res.Chks.Len(), "progress", fmt.Sprintf("%d/%d", responseCount, len(req.Refs))) + if res.Chks.Len() > 0 { + response = append(response, &logproto.GroupedChunkRefs{ + Tenant: tenantID, + Fingerprint: uint64(res.Fp), + Refs: convertToShortRefs(res.Chks), + }) + } // wait for all parts of the full response - if len(response) == len(req.Refs) { + if responseCount == len(req.Refs) { return &logproto.FilterChunkRefResponse{ChunkRefs: response}, nil } } @@ -391,7 +395,6 @@ type worker struct { tasks *pendingTasks logger log.Logger metrics *workerMetrics - rp RequestPool } func newWorker(id string, cfg workerConfig, queue *queue.RequestQueue, store bloomshipper.Store, tasks *pendingTasks, logger log.Logger, metrics *workerMetrics) *worker { @@ -405,13 +408,6 @@ func newWorker(id string, cfg workerConfig, queue *queue.RequestQueue, store blo metrics: metrics, } w.Service = services.NewBasicService(w.starting, w.running, w.stopping).WithName(id) - w.rp = RequestPool{ - Pool: sync.Pool{ - New: func() any { - return make([]v1.Request, 0, 1024) - }, - }, - } return w } @@ -424,6 +420,9 @@ func (w *worker) starting(_ context.Context) error { func (w *worker) running(ctx context.Context) error { idx := queue.StartIndexWithLocalQueue + requests := make([]v1.Request, 0, 128) + fingerprints := make([]uint64, 0, 1024) + for ctx.Err() == nil { taskCtx := context.Background() dequeueStart := time.Now() @@ -475,7 +474,7 @@ func (w *worker) running(ctx context.Context) error { it := newTaskMergeIterator(tasks...) - fingerprints := make([]uint64, 0, 1024) + fingerprints = fingerprints[:0] for it.Next() { // fingerprints are already sorted. we can skip duplicates by checking // if the next is greater than the previous @@ -493,7 +492,7 @@ func (w *worker) running(ctx context.Context) error { // TODO(chaudum): Add API that allows to process blocks as soon as they become available. // This will require to change the taskMergeIterator to a slice of requests so we can seek // to the appropriate fingerprint range within the slice that matches the block's fingerprint range. - bqs, err := w.store.GetBlockQueriers(taskCtx, tasks[0].Tenant, day, day.Add(24*time.Hour), fingerprints) + bqs, err := w.store.GetBlockQueriers(taskCtx, tasks[0].Tenant, day, day.Add(24*time.Hour).Add(-1*time.Nanosecond), fingerprints) w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockQueriers").Observe(time.Since(storeFetchStart).Seconds()) if err != nil { for _, t := range tasks { @@ -521,13 +520,13 @@ func (w *worker) running(ctx context.Context) error { } hasNext := it.Next() - requests := w.rp.Get() for _, bq := range bqs { requests = requests[:0] for hasNext && it.At().Fp <= bq.MaxFp { requests = append(requests, it.At().Request) hasNext = it.Next() } + // level.Debug(logger).Log("msg", "processing block", "block", i+1, "of", len(bqs), "requests", len(requests)) // no fingerprints in the fingerprint range of the current block if len(requests) == 0 { continue @@ -538,13 +537,18 @@ func (w *worker) running(ctx context.Context) error { for _, t := range tasks { t.ErrCh <- errors.Wrap(err, "failed to run chunk check") } - // return slice back to pool - w.rp.Put(requests) continue } } - // return slice back to pool - w.rp.Put(requests) + + for hasNext { + level.Warn(logger).Log("msg", "processing remaining fingerprint", "fp", it.At().Fp) + it.At().Response <- v1.Output{ + Fp: it.At().Fp, + Chks: it.At().Chks, + } + hasNext = it.Next() + } } } return ctx.Err() diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index 5ddbc084ec86e..64d8675b5ef45 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -2,6 +2,7 @@ package bloomgateway import ( "context" + "fmt" "os" "testing" "time" @@ -274,8 +275,12 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) require.NoError(t, err) + ts, _ := time.Parse("2006-01-02 15:04", "2023-10-03 10:00") + now := model.TimeFromUnix(ts.Unix()) + // replace store implementation and re-initialize workers and sub-services - gw.bloomStore = newMockBloomStore(t) + bqs, data := createBlockQueriers(t, 5, now.Add(-8*time.Hour), now, 0, 1024) + gw.bloomStore = newMockBloomStore(bqs) err = gw.initServices() require.NoError(t, err) @@ -286,51 +291,12 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { require.NoError(t, err) }) - ts, _ := time.Parse("2006-01-02 15:04", "2023-10-03 10:00") - now := model.TimeFromUnix(ts.Unix()) - - chunkRefs := []*logproto.ChunkRef{ - { - Fingerprint: 100, - UserID: tenantID, - From: now.Add(-24 * time.Hour), - Through: now.Add(-23 * time.Hour), - Checksum: 1, - }, - { - Fingerprint: 100, - UserID: tenantID, - From: now.Add(-23 * time.Hour), - Through: now.Add(-22 * time.Hour), - Checksum: 2, - }, - { - Fingerprint: 500, - UserID: tenantID, - From: now.Add(-22 * time.Hour), - Through: now.Add(-21 * time.Hour), - Checksum: 3, - }, - { - Fingerprint: 1000, - UserID: tenantID, - From: now.Add(-20 * time.Hour), - Through: now.Add(-19 * time.Hour), - Checksum: 4, - }, - { - Fingerprint: 1001, - UserID: tenantID, - From: now.Add(-19 * time.Hour), - Through: now.Add(-18 * time.Hour), - Checksum: 5, - }, - } + chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 100) inputChunkRefs := groupRefs(t, chunkRefs) - t.Run("no match - return filtered", func(t *testing.T) { + t.Run("no match - return empty response", func(t *testing.T) { req := &logproto.FilterChunkRefRequest{ - From: now.Add(-24 * time.Hour), + From: now.Add(-8 * time.Hour), Through: now, Refs: inputChunkRefs, Filters: []*logproto.LineFilterExpression{ @@ -342,20 +308,26 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { require.NoError(t, err) expectedResponse := &logproto.FilterChunkRefResponse{ - ChunkRefs: inputChunkRefs, // why does it return all chunks? + ChunkRefs: []*logproto.GroupedChunkRefs{}, } require.Equal(t, expectedResponse, res) }) - t.Run("match - return unfiltered", func(t *testing.T) { + t.Run("match - return filtered", func(t *testing.T) { + // hack to get indexed key for a specific series + // the indexed key range for a series is defined as + // i * keysPerSeries ... i * keysPerSeries + keysPerSeries - 1 + // where i is the nth series in a block + // fortunately, i is also used as Checksum for the single chunk of a series + // see mkBasicSeriesWithBlooms() in pkg/storage/bloom/v1/test_util.go + key := inputChunkRefs[0].Refs[0].Checksum*1000 + 500 + req := &logproto.FilterChunkRefRequest{ - From: now.Add(-24 * time.Hour), + From: now.Add(-8 * time.Hour), Through: now, - Refs: groupRefs(t, chunkRefs), + Refs: inputChunkRefs, Filters: []*logproto.LineFilterExpression{ - // series with fingerprint 100 has 1000 keys - // range is from 100_000 to 100_999 - {Operator: 1, Match: "100001"}, + {Operator: 1, Match: fmt.Sprint(key)}, }, } ctx := user.InjectOrgID(context.Background(), tenantID) @@ -363,7 +335,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { require.NoError(t, err) expectedResponse := &logproto.FilterChunkRefResponse{ - ChunkRefs: inputChunkRefs, // why does it return all chunks? + ChunkRefs: inputChunkRefs[:1], } require.Equal(t, expectedResponse, res) }) @@ -371,21 +343,62 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { }) } -func newMockBloomStore(t *testing.T) *mockBloomStore { - return &mockBloomStore{t: t} +func createBlockQueriers(t *testing.T, numBlocks int, from, through model.Time, minFp, maxFp model.Fingerprint) ([]bloomshipper.BlockQuerierWithFingerprintRange, [][]v1.SeriesWithBloom) { + t.Helper() + step := (maxFp - minFp) / model.Fingerprint(numBlocks) + bqs := make([]bloomshipper.BlockQuerierWithFingerprintRange, 0, numBlocks) + series := make([][]v1.SeriesWithBloom, 0, numBlocks) + for i := 0; i < numBlocks; i++ { + fromFp := minFp + (step * model.Fingerprint(i)) + throughFp := fromFp + step - 1 + // last block needs to include maxFp + if i == numBlocks-1 { + throughFp = maxFp + } + blockQuerier, data := v1.MakeBlockQuerier(t, fromFp, throughFp, from, through) + bq := bloomshipper.BlockQuerierWithFingerprintRange{ + BlockQuerier: blockQuerier, + MinFp: fromFp, + MaxFp: throughFp, + } + bqs = append(bqs, bq) + series = append(series, data) + } + return bqs, series +} + +func newMockBloomStore(bqs []bloomshipper.BlockQuerierWithFingerprintRange) *mockBloomStore { + return &mockBloomStore{bqs: bqs} } type mockBloomStore struct { - t *testing.T + bqs []bloomshipper.BlockQuerierWithFingerprintRange } -func (s *mockBloomStore) GetBlockQueriers(_ context.Context, _ string, from, through time.Time, _ []uint64) ([]bloomshipper.BlockQuerierWithFingerprintRange, error) { - return []bloomshipper.BlockQuerierWithFingerprintRange{ - {BlockQuerier: v1.MakeBlockQuerier(s.t, 0, 255, from.Unix(), through.Unix()), MinFp: 0, MaxFp: 255}, - {BlockQuerier: v1.MakeBlockQuerier(s.t, 256, 511, from.Unix(), through.Unix()), MinFp: 256, MaxFp: 511}, - {BlockQuerier: v1.MakeBlockQuerier(s.t, 512, 767, from.Unix(), through.Unix()), MinFp: 512, MaxFp: 767}, - {BlockQuerier: v1.MakeBlockQuerier(s.t, 768, 1023, from.Unix(), through.Unix()), MinFp: 768, MaxFp: 1023}, - }, nil +func (s *mockBloomStore) GetBlockQueriers(_ context.Context, _ string, _, _ time.Time, _ []uint64) ([]bloomshipper.BlockQuerierWithFingerprintRange, error) { + return s.bqs, nil } func (s *mockBloomStore) Stop() {} + +func createQueryInputFromBlockData(t *testing.T, tenant string, data [][]v1.SeriesWithBloom, nthSeries int) []*logproto.ChunkRef { + t.Helper() + n := 0 + res := make([]*logproto.ChunkRef, 0) + for i := range data { + for j := range data[i] { + if n%nthSeries == 0 { + chk := data[i][j].Series.Chunks[0] + res = append(res, &logproto.ChunkRef{ + Fingerprint: uint64(data[i][j].Series.Fingerprint), + UserID: tenant, + From: chk.Start, + Through: chk.End, + Checksum: chk.Checksum, + }) + } + n++ + } + } + return res +} diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index aa2712b99af6c..4796cbe0e2530 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -1,7 +1,6 @@ package bloomgateway import ( - "sync" "time" "github.com/prometheus/common/model" @@ -50,18 +49,6 @@ func NewIterWithIndex[T any](i int, xs []T) *SliceIterWithIndex[T] { } } -type RequestPool struct { - sync.Pool -} - -func (p *RequestPool) Get() []v1.Request { - return p.Pool.Get().([]v1.Request) -} - -func (p *RequestPool) Put(r []v1.Request) { - p.Pool.Put(r[:0]) // nolint:staticcheck -} - func getDay(ts model.Time) int64 { return ts.Unix() / int64(24*time.Hour/time.Second) } @@ -157,3 +144,13 @@ func convertToChunkRefs(refs []*logproto.ShortRef) v1.ChunkRefs { } return result } + +// getFirstLast returns the first and last item of a fingerprint slice +// It assumes an ascending sorted list of fingerprints. +func getFirstLast[T any](s []T) (T, T) { + var zero T + if len(s) == 0 { + return zero, zero + } + return s[0], s[len(s)-1] +} diff --git a/pkg/storage/bloom/v1/test_util.go b/pkg/storage/bloom/v1/test_util.go index 19af66299f8de..215ecaffe177e 100644 --- a/pkg/storage/bloom/v1/test_util.go +++ b/pkg/storage/bloom/v1/test_util.go @@ -4,6 +4,7 @@ import ( "bytes" "fmt" "testing" + "time" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" @@ -12,7 +13,7 @@ import ( "github.com/grafana/loki/pkg/storage/bloom/v1/filter" ) -func MakeBlockQuerier(t testing.TB, fromFp, throughFp uint64, fromTs, throughTs int64) *BlockQuerier { +func MakeBlockQuerier(t testing.TB, fromFp, throughFp model.Fingerprint, fromTs, throughTs model.Time) (*BlockQuerier, []SeriesWithBloom) { // references for linking in memory reader+writer indexBuf := bytes.NewBuffer(nil) bloomsBuf := bytes.NewBuffer(nil) @@ -20,14 +21,7 @@ func MakeBlockQuerier(t testing.TB, fromFp, throughFp uint64, fromTs, throughTs reader := NewByteReader(indexBuf, bloomsBuf) numSeries := int(throughFp - fromFp) numKeysPerSeries := 1000 - data, _ := mkBasicSeriesWithBlooms( - numSeries, - numKeysPerSeries, - model.Fingerprint(fromFp), - model.Fingerprint(throughFp), - model.TimeFromUnix(fromTs), - model.TimeFromUnix(throughTs), - ) + data, _ := mkBasicSeriesWithBlooms(numSeries, numKeysPerSeries, fromFp, throughFp, fromTs, throughTs) builder, err := NewBlockBuilder( BlockOptions{ @@ -45,21 +39,24 @@ func MakeBlockQuerier(t testing.TB, fromFp, throughFp uint64, fromTs, throughTs _, err = builder.BuildFrom(itr) require.Nil(t, err) block := NewBlock(reader) - return NewBlockQuerier(block) + return NewBlockQuerier(block), data } func mkBasicSeriesWithBlooms(nSeries, keysPerSeries int, fromFp, throughFp model.Fingerprint, fromTs, throughTs model.Time) (seriesList []SeriesWithBloom, keysList [][][]byte) { seriesList = make([]SeriesWithBloom, 0, nSeries) keysList = make([][][]byte, 0, nSeries) + + step := (throughFp - fromFp) / model.Fingerprint(nSeries) + timeDelta := time.Duration(throughTs.Sub(fromTs).Nanoseconds() / int64(nSeries)) + for i := 0; i < nSeries; i++ { var series Series - step := (throughFp - fromFp) / (model.Fingerprint(nSeries)) series.Fingerprint = fromFp + model.Fingerprint(i)*step - timeDelta := fromTs + (throughTs-fromTs)/model.Time(nSeries)*model.Time(i) + from := fromTs.Add(timeDelta * time.Duration(i)) series.Chunks = []ChunkRef{ { - Start: fromTs + timeDelta*model.Time(i), - End: fromTs + timeDelta*model.Time(i), + Start: from, + End: from.Add(timeDelta), Checksum: uint32(i), }, } From e3309776bb6a9dd67c30f9be586828798feda34f Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Fri, 10 Nov 2023 12:50:23 +0100 Subject: [PATCH 15/44] Add helper function for parsing time in tests Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway_test.go | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index 64d8675b5ef45..21a01adb454a2 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -38,6 +38,14 @@ func parseDayTime(s string) config.DayTime { } } +func mktime(s string) model.Time { + ts, err := time.Parse("2006-01-02 15:04", s) + if err != nil { + panic(err) + } + return model.TimeFromUnix(ts.Unix()) +} + func groupRefs(t *testing.T, chunkRefs []*logproto.ChunkRef) []*logproto.GroupedChunkRefs { t.Helper() grouped := make([]*logproto.GroupedChunkRefs, 0, len(chunkRefs)) @@ -168,8 +176,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { require.NoError(t, err) }) - ts, _ := time.Parse("2006-01-02 15:04", "2023-10-03 10:00") - now := model.TimeFromUnix(ts.Unix()) + now := mktime("2023-10-03 10:00") chunkRefs := []*logproto.ChunkRef{ {Fingerprint: 3000, UserID: tenantID, From: now.Add(-24 * time.Hour), Through: now.Add(-23 * time.Hour), Checksum: 1}, @@ -207,8 +214,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) require.NoError(t, err) - ts, _ := time.Parse("2006-01-02 15:04", "2023-10-03 10:00") - now := model.TimeFromUnix(ts.Unix()) + now := mktime("2023-10-03 10:00") chunkRefs := []*logproto.ChunkRef{ {Fingerprint: 1000, UserID: tenantID, From: now.Add(-22 * time.Hour), Through: now.Add(-21 * time.Hour), Checksum: 1}, @@ -241,8 +247,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { require.NoError(t, err) }) - ts, _ := time.Parse("2006-01-02 15:04", "2023-10-03 10:00") - now := model.TimeFromUnix(ts.Unix()) + now := mktime("2023-10-03 10:00") tenants := []string{"tenant-a", "tenant-b", "tenant-c"} for idx, tenantID := range tenants { @@ -275,8 +280,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) require.NoError(t, err) - ts, _ := time.Parse("2006-01-02 15:04", "2023-10-03 10:00") - now := model.TimeFromUnix(ts.Unix()) + now := mktime("2023-10-03 10:00") // replace store implementation and re-initialize workers and sub-services bqs, data := createBlockQueriers(t, 5, now.Add(-8*time.Hour), now, 0, 1024) From 218ec2540a86b92d2ad2505771f63cccc615b43c Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 14 Nov 2023 08:09:39 +0100 Subject: [PATCH 16/44] Remove unused utility function Signed-off-by: Christian Haudum --- pkg/bloomgateway/util.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index 4796cbe0e2530..27da72cd356e5 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -49,10 +49,6 @@ func NewIterWithIndex[T any](i int, xs []T) *SliceIterWithIndex[T] { } } -func getDay(ts model.Time) int64 { - return ts.Unix() / int64(24*time.Hour/time.Second) -} - func getDayTime(ts model.Time) time.Time { return time.Date(ts.Time().Year(), ts.Time().Month(), ts.Time().Day(), 0, 0, 0, 0, time.UTC) } From 7cb3a6a8b375378b047ee9781eff29f44b16f080 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 14 Nov 2023 08:10:26 +0100 Subject: [PATCH 17/44] Improve comments Signed-off-by: Christian Haudum --- pkg/bloomgateway/multiplexing.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index 959b634f3fccc..28e691f38d98a 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -11,7 +11,7 @@ import ( v1 "github.com/grafana/loki/pkg/storage/bloom/v1" ) -// Task is the data structure that is enqueued to the internal queue and queued by query workers +// Task is the data structure that is enqueued to the internal queue and dequeued by query workers type Task struct { // ID is a lexcographically sortable unique identifier of the task ID ulid.ULID @@ -26,7 +26,7 @@ type Task struct { } // NewTask returns a new Task that can be enqueued to the task queue. -// As additional arguments, it returns a result and an error channel, as well +// In addition, it returns a result and an error channel, as well // as an error if the instantiation fails. func NewTask(tenantID string, req *logproto.FilterChunkRefRequest) (Task, chan v1.Output, chan error, error) { key, err := ulid.New(ulid.Now(), nil) From bae443d73f15fb3f95ec23f1e3e5243b209785b0 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 14 Nov 2023 08:14:22 +0100 Subject: [PATCH 18/44] Make function name more explicit Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 2 +- pkg/bloomgateway/multiplexing.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index ada5642dbe6f5..4cfe7e0a8ed97 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -462,7 +462,7 @@ func (w *worker) running(ctx context.Context) error { // split task into separate tasks per day for i := fromDay; i.Before(throughDay); i = i.Add(24 * time.Hour) { r := filterRequestForDay(task.Request, i) - t := task.WithRequest(r) + t := task.CopyWithRequest(r) tasksPerDay[i] = append(tasksPerDay[i], t) } } diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index 28e691f38d98a..d2ba195057dce 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -50,8 +50,8 @@ func (t Task) Bounds() (time.Time, time.Time) { return getDayTime(t.Request.From), getDayTime(t.Request.Through) } -// WithRequest returns a copy of the task, which holds the provided Request req. -func (t Task) WithRequest(req *logproto.FilterChunkRefRequest) Task { +// CopyWithRequest returns a copy of the original task, but with newly provided request +func (t Task) CopyWithRequest(req *logproto.FilterChunkRefRequest) Task { return Task{ ID: t.ID, Tenant: t.Tenant, From a68af7d4a91dced95e5e078b4985d529ff2f9c7e Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 14 Nov 2023 08:38:58 +0100 Subject: [PATCH 19/44] Unify bloomgateway metrics creation Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 4cfe7e0a8ed97..9cd90d599b27b 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -78,22 +78,26 @@ const ( pendingTasksInitialCap = 1024 ) +const ( + metricsSubsystem = "bloom_gateway" +) + type metrics struct { queueDuration prometheus.Histogram inflightRequests prometheus.Summary } -func newMetrics(subsystem string, registerer prometheus.Registerer) *metrics { +func newMetrics(registerer prometheus.Registerer, namespace, subsystem string) *metrics { return &metrics{ queueDuration: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ - Namespace: constants.Loki, + Namespace: namespace, Subsystem: subsystem, Name: "queue_duration_seconds", Help: "Time spent by tasks in queue before getting picked up by a worker.", Buckets: prometheus.DefBuckets, }), inflightRequests: promauto.With(registerer).NewSummary(prometheus.SummaryOpts{ - Namespace: constants.Loki, + Namespace: namespace, Subsystem: subsystem, Name: "inflight_tasks", Help: "Number of inflight tasks (either queued or processing) sampled at a regular interval. Quantile buckets keep track of inflight tasks over the last 60s.", @@ -164,19 +168,17 @@ type Gateway struct { // New returns a new instance of the Bloom Gateway. func New(cfg Config, schemaCfg config.SchemaConfig, storageCfg storage.Config, overrides Limits, shardingStrategy ShardingStrategy, cm storage.ClientMetrics, logger log.Logger, reg prometheus.Registerer) (*Gateway, error) { - metricsSubsystem := "bloom_gateway" - g := &Gateway{ cfg: cfg, logger: logger, - metrics: newMetrics(metricsSubsystem, reg), + metrics: newMetrics(reg, constants.Loki, metricsSubsystem), sharding: shardingStrategy, pendingTasks: makePendingTasks(pendingTasksInitialCap), workerConfig: workerConfig{ maxWaitTime: 200 * time.Millisecond, maxItems: 100, }, - workerMetrics: newWorkerMetrics(metricsSubsystem, reg), + workerMetrics: newWorkerMetrics(reg, constants.Loki, metricsSubsystem), queueMetrics: queue.NewMetrics(reg, constants.Loki, metricsSubsystem), } @@ -350,29 +352,29 @@ type workerMetrics struct { storeAccessLatency *prometheus.HistogramVec } -func newWorkerMetrics(subsystem string, registerer prometheus.Registerer) *workerMetrics { +func newWorkerMetrics(registerer prometheus.Registerer, namespace, subsystem string) *workerMetrics { labels := []string{"worker"} return &workerMetrics{ dequeuedTasks: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: namespace, Subsystem: subsystem, Name: "dequeued_tasks_total", Help: "Total amount of tasks that the worker dequeued from the bloom query queue", }, labels), dequeueErrors: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", + Namespace: namespace, Subsystem: subsystem, Name: "dequeue_errors_total", Help: "Total amount of failed dequeue operations", }, labels), dequeueWaitTime: promauto.With(registerer).NewSummaryVec(prometheus.SummaryOpts{ - Namespace: "loki", + Namespace: namespace, Subsystem: subsystem, Name: "dequeue_wait_time", Help: "Time spent waiting for dequeuing tasks from queue", }, labels), storeAccessLatency: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "loki", + Namespace: namespace, Subsystem: subsystem, Name: "store_latency", Help: "Latency in seconds of accessing the bloom store component", From b4e623f9575ac41b37b5287b3f93d11202750c22 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 14 Nov 2023 08:49:31 +0100 Subject: [PATCH 20/44] Better variable naming Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 9cd90d599b27b..ad2bbadf0e437 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -494,7 +494,7 @@ func (w *worker) running(ctx context.Context) error { // TODO(chaudum): Add API that allows to process blocks as soon as they become available. // This will require to change the taskMergeIterator to a slice of requests so we can seek // to the appropriate fingerprint range within the slice that matches the block's fingerprint range. - bqs, err := w.store.GetBlockQueriers(taskCtx, tasks[0].Tenant, day, day.Add(24*time.Hour).Add(-1*time.Nanosecond), fingerprints) + blockQueriers, err := w.store.GetBlockQueriers(taskCtx, tasks[0].Tenant, day, day.Add(24*time.Hour).Add(-1*time.Nanosecond), fingerprints) w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockQueriers").Observe(time.Since(storeFetchStart).Seconds()) if err != nil { for _, t := range tasks { @@ -507,7 +507,7 @@ func (w *worker) running(ctx context.Context) error { // No blocks found. // Since there are no blocks for the given tasks, we need to return the // unfiltered list of chunk refs. - if len(bqs) == 0 { + if len(blockQueriers) == 0 { level.Warn(logger).Log("msg", "no blocks found") for _, t := range tasks { for _, ref := range t.Request.Refs { @@ -522,9 +522,9 @@ func (w *worker) running(ctx context.Context) error { } hasNext := it.Next() - for _, bq := range bqs { + for _, blockQuerier := range blockQueriers { requests = requests[:0] - for hasNext && it.At().Fp <= bq.MaxFp { + for hasNext && it.At().Fp <= blockQuerier.MaxFp { requests = append(requests, it.At().Request) hasNext = it.Next() } @@ -533,7 +533,7 @@ func (w *worker) running(ctx context.Context) error { if len(requests) == 0 { continue } - fq := bq.Fuse([]v1.PeekingIterator[v1.Request]{NewIterWithIndex(0, requests)}) + fq := blockQuerier.Fuse([]v1.PeekingIterator[v1.Request]{NewIterWithIndex(0, requests)}) err := fq.Run() if err != nil { for _, t := range tasks { From cd4011a0a275695ef2284de1831f73445a3a0bb4 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 14 Nov 2023 08:55:27 +0100 Subject: [PATCH 21/44] Split worker code into separate go file Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 224 ----------------------------- pkg/bloomgateway/worker.go | 239 +++++++++++++++++++++++++++++++ 2 files changed, 239 insertions(+), 224 deletions(-) create mode 100644 pkg/bloomgateway/worker.go diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index ad2bbadf0e437..ed9a4fa5b6430 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -56,12 +56,10 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/prometheus/common/model" "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/queue" "github.com/grafana/loki/pkg/storage" - v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" "github.com/grafana/loki/pkg/util" @@ -339,225 +337,3 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk } } } - -type workerConfig struct { - maxWaitTime time.Duration - maxItems int -} - -type workerMetrics struct { - dequeuedTasks *prometheus.CounterVec - dequeueErrors *prometheus.CounterVec - dequeueWaitTime *prometheus.SummaryVec - storeAccessLatency *prometheus.HistogramVec -} - -func newWorkerMetrics(registerer prometheus.Registerer, namespace, subsystem string) *workerMetrics { - labels := []string{"worker"} - return &workerMetrics{ - dequeuedTasks: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "dequeued_tasks_total", - Help: "Total amount of tasks that the worker dequeued from the bloom query queue", - }, labels), - dequeueErrors: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "dequeue_errors_total", - Help: "Total amount of failed dequeue operations", - }, labels), - dequeueWaitTime: promauto.With(registerer).NewSummaryVec(prometheus.SummaryOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "dequeue_wait_time", - Help: "Time spent waiting for dequeuing tasks from queue", - }, labels), - storeAccessLatency: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "store_latency", - Help: "Latency in seconds of accessing the bloom store component", - }, append(labels, "operation")), - } -} - -// worker is a datastructure that consumes tasks from the request queue, -// processes them and returns the result/error back to the response channels of -// the tasks. -// It is responsible for multiplexing tasks so they can be processes in a more -// efficient way. -type worker struct { - services.Service - - id string - cfg workerConfig - queue *queue.RequestQueue - store bloomshipper.Store - tasks *pendingTasks - logger log.Logger - metrics *workerMetrics -} - -func newWorker(id string, cfg workerConfig, queue *queue.RequestQueue, store bloomshipper.Store, tasks *pendingTasks, logger log.Logger, metrics *workerMetrics) *worker { - w := &worker{ - id: id, - cfg: cfg, - queue: queue, - store: store, - tasks: tasks, - logger: log.With(logger, "worker", id), - metrics: metrics, - } - w.Service = services.NewBasicService(w.starting, w.running, w.stopping).WithName(id) - return w -} - -func (w *worker) starting(_ context.Context) error { - level.Debug(w.logger).Log("msg", "starting worker") - w.queue.RegisterConsumerConnection(w.id) - return nil -} - -func (w *worker) running(ctx context.Context) error { - idx := queue.StartIndexWithLocalQueue - - requests := make([]v1.Request, 0, 128) - fingerprints := make([]uint64, 0, 1024) - - for ctx.Err() == nil { - taskCtx := context.Background() - dequeueStart := time.Now() - items, newIdx, err := w.queue.DequeueMany(taskCtx, idx, w.id, w.cfg.maxItems, w.cfg.maxWaitTime) - w.metrics.dequeueWaitTime.WithLabelValues(w.id).Observe(time.Since(dequeueStart).Seconds()) - if err != nil { - // We only return an error if the queue is stopped and dequeuing did not yield any items - if err == queue.ErrStopped && len(items) == 0 { - return err - } - w.metrics.dequeueErrors.WithLabelValues(w.id).Inc() - level.Error(w.logger).Log("msg", "failed to dequeue tasks", "err", err, "items", len(items)) - } - idx = newIdx - - if len(items) == 0 { - continue - } - w.metrics.dequeuedTasks.WithLabelValues(w.id).Add(float64(len(items))) - - tasksPerDay := make(map[time.Time][]Task) - - for _, item := range items { - task, ok := item.(Task) - if !ok { - // This really should never happen, because only the bloom gateway itself can enqueue tasks. - return errors.Errorf("failed to cast dequeued item to Task: %v", item) - } - level.Debug(w.logger).Log("msg", "dequeued task", "task", task.ID) - w.tasks.Delete(task.ID) - - fromDay, throughDay := task.Bounds() - - if fromDay.Equal(throughDay) { - tasksPerDay[fromDay] = append(tasksPerDay[fromDay], task) - } else { - // split task into separate tasks per day - for i := fromDay; i.Before(throughDay); i = i.Add(24 * time.Hour) { - r := filterRequestForDay(task.Request, i) - t := task.CopyWithRequest(r) - tasksPerDay[i] = append(tasksPerDay[i], t) - } - } - } - - for day, tasks := range tasksPerDay { - logger := log.With(w.logger, "day", day) - level.Debug(logger).Log("msg", "process tasks", "tasks", len(tasks)) - - it := newTaskMergeIterator(tasks...) - - fingerprints = fingerprints[:0] - for it.Next() { - // fingerprints are already sorted. we can skip duplicates by checking - // if the next is greater than the previous - fp := uint64(it.At().Fp) - if len(fingerprints) > 0 && fp <= fingerprints[len(fingerprints)-1] { - continue - } - fingerprints = append(fingerprints, fp) - } - - it.Reset() - - storeFetchStart := time.Now() - // GetBlockQueriers() waits until all blocks are downloaded and available for querying. - // TODO(chaudum): Add API that allows to process blocks as soon as they become available. - // This will require to change the taskMergeIterator to a slice of requests so we can seek - // to the appropriate fingerprint range within the slice that matches the block's fingerprint range. - blockQueriers, err := w.store.GetBlockQueriers(taskCtx, tasks[0].Tenant, day, day.Add(24*time.Hour).Add(-1*time.Nanosecond), fingerprints) - w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockQueriers").Observe(time.Since(storeFetchStart).Seconds()) - if err != nil { - for _, t := range tasks { - t.ErrCh <- err - } - // continue with tasks of next day - continue - } - - // No blocks found. - // Since there are no blocks for the given tasks, we need to return the - // unfiltered list of chunk refs. - if len(blockQueriers) == 0 { - level.Warn(logger).Log("msg", "no blocks found") - for _, t := range tasks { - for _, ref := range t.Request.Refs { - t.ResCh <- v1.Output{ - Fp: model.Fingerprint(ref.Fingerprint), - Chks: convertToChunkRefs(ref.Refs), - } - } - } - // continue with tasks of next day - continue - } - - hasNext := it.Next() - for _, blockQuerier := range blockQueriers { - requests = requests[:0] - for hasNext && it.At().Fp <= blockQuerier.MaxFp { - requests = append(requests, it.At().Request) - hasNext = it.Next() - } - // level.Debug(logger).Log("msg", "processing block", "block", i+1, "of", len(bqs), "requests", len(requests)) - // no fingerprints in the fingerprint range of the current block - if len(requests) == 0 { - continue - } - fq := blockQuerier.Fuse([]v1.PeekingIterator[v1.Request]{NewIterWithIndex(0, requests)}) - err := fq.Run() - if err != nil { - for _, t := range tasks { - t.ErrCh <- errors.Wrap(err, "failed to run chunk check") - } - continue - } - } - - for hasNext { - level.Warn(logger).Log("msg", "processing remaining fingerprint", "fp", it.At().Fp) - it.At().Response <- v1.Output{ - Fp: it.At().Fp, - Chks: it.At().Chks, - } - hasNext = it.Next() - } - } - } - return ctx.Err() -} - -func (w *worker) stopping(err error) error { - level.Debug(w.logger).Log("msg", "stopping worker", "err", err) - w.queue.UnregisterConsumerConnection(w.id) - return nil -} diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go new file mode 100644 index 0000000000000..cb4a800ec2a80 --- /dev/null +++ b/pkg/bloomgateway/worker.go @@ -0,0 +1,239 @@ +package bloomgateway + +import ( + "context" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/grafana/dskit/services" + "github.com/grafana/loki/pkg/queue" + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/common/model" +) + +type workerConfig struct { + maxWaitTime time.Duration + maxItems int +} + +type workerMetrics struct { + dequeuedTasks *prometheus.CounterVec + dequeueErrors *prometheus.CounterVec + dequeueWaitTime *prometheus.SummaryVec + storeAccessLatency *prometheus.HistogramVec +} + +func newWorkerMetrics(registerer prometheus.Registerer, namespace, subsystem string) *workerMetrics { + labels := []string{"worker"} + return &workerMetrics{ + dequeuedTasks: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "dequeued_tasks_total", + Help: "Total amount of tasks that the worker dequeued from the bloom query queue", + }, labels), + dequeueErrors: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "dequeue_errors_total", + Help: "Total amount of failed dequeue operations", + }, labels), + dequeueWaitTime: promauto.With(registerer).NewSummaryVec(prometheus.SummaryOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "dequeue_wait_time", + Help: "Time spent waiting for dequeuing tasks from queue", + }, labels), + storeAccessLatency: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "store_latency", + Help: "Latency in seconds of accessing the bloom store component", + }, append(labels, "operation")), + } +} + +// worker is a datastructure that consumes tasks from the request queue, +// processes them and returns the result/error back to the response channels of +// the tasks. +// It is responsible for multiplexing tasks so they can be processes in a more +// efficient way. +type worker struct { + services.Service + + id string + cfg workerConfig + queue *queue.RequestQueue + store bloomshipper.Store + tasks *pendingTasks + logger log.Logger + metrics *workerMetrics +} + +func newWorker(id string, cfg workerConfig, queue *queue.RequestQueue, store bloomshipper.Store, tasks *pendingTasks, logger log.Logger, metrics *workerMetrics) *worker { + w := &worker{ + id: id, + cfg: cfg, + queue: queue, + store: store, + tasks: tasks, + logger: log.With(logger, "worker", id), + metrics: metrics, + } + w.Service = services.NewBasicService(w.starting, w.running, w.stopping).WithName(id) + return w +} + +func (w *worker) starting(_ context.Context) error { + level.Debug(w.logger).Log("msg", "starting worker") + w.queue.RegisterConsumerConnection(w.id) + return nil +} + +func (w *worker) running(ctx context.Context) error { + idx := queue.StartIndexWithLocalQueue + + requests := make([]v1.Request, 0, 128) + fingerprints := make([]uint64, 0, 1024) + + for ctx.Err() == nil { + taskCtx := context.Background() + dequeueStart := time.Now() + items, newIdx, err := w.queue.DequeueMany(taskCtx, idx, w.id, w.cfg.maxItems, w.cfg.maxWaitTime) + w.metrics.dequeueWaitTime.WithLabelValues(w.id).Observe(time.Since(dequeueStart).Seconds()) + if err != nil { + // We only return an error if the queue is stopped and dequeuing did not yield any items + if err == queue.ErrStopped && len(items) == 0 { + return err + } + w.metrics.dequeueErrors.WithLabelValues(w.id).Inc() + level.Error(w.logger).Log("msg", "failed to dequeue tasks", "err", err, "items", len(items)) + } + idx = newIdx + + if len(items) == 0 { + continue + } + w.metrics.dequeuedTasks.WithLabelValues(w.id).Add(float64(len(items))) + + tasksPerDay := make(map[time.Time][]Task) + + for _, item := range items { + task, ok := item.(Task) + if !ok { + // This really should never happen, because only the bloom gateway itself can enqueue tasks. + return errors.Errorf("failed to cast dequeued item to Task: %v", item) + } + level.Debug(w.logger).Log("msg", "dequeued task", "task", task.ID) + w.tasks.Delete(task.ID) + + fromDay, throughDay := task.Bounds() + + if fromDay.Equal(throughDay) { + tasksPerDay[fromDay] = append(tasksPerDay[fromDay], task) + } else { + // split task into separate tasks per day + for i := fromDay; i.Before(throughDay); i = i.Add(24 * time.Hour) { + r := filterRequestForDay(task.Request, i) + t := task.CopyWithRequest(r) + tasksPerDay[i] = append(tasksPerDay[i], t) + } + } + } + + for day, tasks := range tasksPerDay { + logger := log.With(w.logger, "day", day) + level.Debug(logger).Log("msg", "process tasks", "tasks", len(tasks)) + + it := newTaskMergeIterator(tasks...) + + fingerprints = fingerprints[:0] + for it.Next() { + // fingerprints are already sorted. we can skip duplicates by checking + // if the next is greater than the previous + fp := uint64(it.At().Fp) + if len(fingerprints) > 0 && fp <= fingerprints[len(fingerprints)-1] { + continue + } + fingerprints = append(fingerprints, fp) + } + + it.Reset() + + storeFetchStart := time.Now() + // GetBlockQueriers() waits until all blocks are downloaded and available for querying. + // TODO(chaudum): Add API that allows to process blocks as soon as they become available. + // This will require to change the taskMergeIterator to a slice of requests so we can seek + // to the appropriate fingerprint range within the slice that matches the block's fingerprint range. + blockQueriers, err := w.store.GetBlockQueriers(taskCtx, tasks[0].Tenant, day, day.Add(24*time.Hour).Add(-1*time.Nanosecond), fingerprints) + w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockQueriers").Observe(time.Since(storeFetchStart).Seconds()) + if err != nil { + for _, t := range tasks { + t.ErrCh <- err + } + // continue with tasks of next day + continue + } + + // No blocks found. + // Since there are no blocks for the given tasks, we need to return the + // unfiltered list of chunk refs. + if len(blockQueriers) == 0 { + level.Warn(logger).Log("msg", "no blocks found") + for _, t := range tasks { + for _, ref := range t.Request.Refs { + t.ResCh <- v1.Output{ + Fp: model.Fingerprint(ref.Fingerprint), + Chks: convertToChunkRefs(ref.Refs), + } + } + } + // continue with tasks of next day + continue + } + + hasNext := it.Next() + for _, blockQuerier := range blockQueriers { + requests = requests[:0] + for hasNext && it.At().Fp <= blockQuerier.MaxFp { + requests = append(requests, it.At().Request) + hasNext = it.Next() + } + // level.Debug(logger).Log("msg", "processing block", "block", i+1, "of", len(bqs), "requests", len(requests)) + // no fingerprints in the fingerprint range of the current block + if len(requests) == 0 { + continue + } + fq := blockQuerier.Fuse([]v1.PeekingIterator[v1.Request]{NewIterWithIndex(0, requests)}) + err := fq.Run() + if err != nil { + for _, t := range tasks { + t.ErrCh <- errors.Wrap(err, "failed to run chunk check") + } + continue + } + } + + for hasNext { + level.Warn(logger).Log("msg", "processing remaining fingerprint", "fp", it.At().Fp) + it.At().Response <- v1.Output{ + Fp: it.At().Fp, + Chks: it.At().Chks, + } + hasNext = it.Next() + } + } + } + return ctx.Err() +} + +func (w *worker) stopping(err error) error { + level.Debug(w.logger).Log("msg", "stopping worker", "err", err) + w.queue.UnregisterConsumerConnection(w.id) + return nil +} From e15094c80930b164c3cba687afc6bc4fc93b2b0a Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 14 Nov 2023 09:06:24 +0100 Subject: [PATCH 22/44] Make separate interface for NesterIterator Signed-off-by: Christian Haudum --- pkg/bloomgateway/multiplexing.go | 2 +- pkg/storage/bloom/v1/merge.go | 4 ++-- pkg/storage/bloom/v1/util.go | 5 +++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index d2ba195057dce..0f42f1d5ff913 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -108,7 +108,7 @@ func (it *taskMergeIterator) Next() bool { return false } - currIter, ok := it.heap.CurrIter().(*SliceIterWithIndex[*logproto.GroupedChunkRefs]) + currIter, ok := it.heap.Iter().(*SliceIterWithIndex[*logproto.GroupedChunkRefs]) if !ok { it.err = errors.New("failed to cast iterator") return false diff --git a/pkg/storage/bloom/v1/merge.go b/pkg/storage/bloom/v1/merge.go index a519a44795117..46c9012c5f882 100644 --- a/pkg/storage/bloom/v1/merge.go +++ b/pkg/storage/bloom/v1/merge.go @@ -58,8 +58,8 @@ func (mbq *HeapIterator[T]) Next() (ok bool) { return } -// TODO(chaudum): find a better place -func (mbq *HeapIterator[T]) CurrIter() PeekingIterator[T] { +// Curr implements NestedIterator +func (mbq *HeapIterator[T]) Iter() Iterator[T] { return mbq.currIter } diff --git a/pkg/storage/bloom/v1/util.go b/pkg/storage/bloom/v1/util.go index 15de62e9f9590..c5b0ae688596b 100644 --- a/pkg/storage/bloom/v1/util.go +++ b/pkg/storage/bloom/v1/util.go @@ -83,6 +83,11 @@ type PeekingIterator[T any] interface { Iterator[T] } +type NestedIterator[T any] interface { + Iter() Iterator[T] + Iterator[T] +} + type PeekIter[T any] struct { itr Iterator[T] From d9fa50b95f27afadd2cd00f29439cfb861e8bfda Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 14 Nov 2023 09:15:01 +0100 Subject: [PATCH 23/44] Test iterator Reset() more thoroughly Signed-off-by: Christian Haudum --- pkg/bloomgateway/multiplexing_test.go | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/pkg/bloomgateway/multiplexing_test.go b/pkg/bloomgateway/multiplexing_test.go index 915d140756560..4e572c1280fa8 100644 --- a/pkg/bloomgateway/multiplexing_test.go +++ b/pkg/bloomgateway/multiplexing_test.go @@ -161,7 +161,7 @@ func TestTaskMergeIterator(t *testing.T) { From: ts.Add(-1 * time.Hour), Through: ts, Refs: []*logproto.GroupedChunkRefs{ - {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ + {Fingerprint: 200, Tenant: tenant, Refs: []*logproto.ShortRef{ {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 300}, }}, }, @@ -172,18 +172,14 @@ func TestTaskMergeIterator(t *testing.T) { it := newTaskMergeIterator(t1, t2, t3) require.NotNil(t, it.heap) - count := 0 - for it.Next() { - count++ - } - require.Equal(t, 3, count) - - it.Reset() - - count = 0 - for it.Next() { - count++ + checksums := []uint32{100, 200, 300} + for i := 0; i < 3; i++ { + count := 0 + for it.Next() { + require.Equal(t, checksums[count], it.At().Chks[0].Checksum) + count++ + } + it.Reset() } - require.Equal(t, 3, count) }) } From 388d818512bd3a084349cf56a9e2308a65026b1b Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 14 Nov 2023 10:21:22 +0100 Subject: [PATCH 24/44] Fix import order Signed-off-by: Christian Haudum --- pkg/bloomgateway/worker.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index cb4a800ec2a80..ba02c261e752b 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -7,13 +7,14 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/services" - "github.com/grafana/loki/pkg/queue" - v1 "github.com/grafana/loki/pkg/storage/bloom/v1" - "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/model" + + "github.com/grafana/loki/pkg/queue" + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" ) type workerConfig struct { From 01a6166fd849631914ad89a1a51ec209855f3ee0 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Wed, 15 Nov 2023 08:37:58 +0100 Subject: [PATCH 25/44] Remove unnecessary tenant ID assertion and sorting of the inputs Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 11 ----------- pkg/bloomgateway/bloomgateway_test.go | 26 -------------------------- 2 files changed, 37 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index ed9a4fa5b6430..161271dd72e88 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -67,7 +67,6 @@ import ( ) var errGatewayUnhealthy = errors.New("bloom-gateway is unhealthy in the ring") -var errInvalidTenant = errors.New("invalid tenant in chunk refs") // TODO(chaudum): Make these configurable const ( @@ -284,16 +283,6 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk }, nil } - for _, ref := range req.Refs { - if ref.Tenant != tenantID { - return nil, errors.Wrapf(errInvalidTenant, "expected chunk refs from tenant %s, got tenant %s", tenantID, ref.Tenant) - } - // Sort ShortRefs by From time in ascending order - sort.Slice(ref.Refs, func(i, j int) bool { - return ref.Refs[i].From.Before(ref.Refs[j].From) - }) - } - // Sort ChunkRefs by fingerprint in ascending order sort.Slice(req.Refs, func(i, j int) bool { return req.Refs[i].Fingerprint < req.Refs[j].Fingerprint diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index 21a01adb454a2..6d9ddbd663fb0 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -209,32 +209,6 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { }, res) }) - t.Run("returns error if chunk refs do not belong to tenant", func(t *testing.T) { - reg := prometheus.NewRegistry() - gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) - require.NoError(t, err) - - now := mktime("2023-10-03 10:00") - - chunkRefs := []*logproto.ChunkRef{ - {Fingerprint: 1000, UserID: tenantID, From: now.Add(-22 * time.Hour), Through: now.Add(-21 * time.Hour), Checksum: 1}, - {Fingerprint: 2000, UserID: "other", From: now.Add(-20 * time.Hour), Through: now.Add(-19 * time.Hour), Checksum: 2}, - } - req := &logproto.FilterChunkRefRequest{ - From: now.Add(-24 * time.Hour), - Through: now, - Refs: groupRefs(t, chunkRefs), - Filters: []*logproto.LineFilterExpression{ - {Operator: 1, Match: "foo"}, - }, - } - - ctx := user.InjectOrgID(context.Background(), tenantID) - _, err = gw.FilterChunkRefs(ctx, req) - require.Error(t, err) - require.Equal(t, "expected chunk refs from tenant test, got tenant other: invalid tenant in chunk refs", err.Error()) - }) - t.Run("gateway tracks active users", func(t *testing.T) { reg := prometheus.NewRegistry() gw, err := New(cfg, schemaCfg, storageCfg, limits, ss, cm, logger, reg) From f4eeb5ab7ccbd2f97dc9a24924a246befa5b338f Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Wed, 15 Nov 2023 11:05:19 +0100 Subject: [PATCH 26/44] Remove unnecessary cast Signed-off-by: Christian Haudum --- pkg/bloomgateway/multiplexing.go | 24 +++++++------------- pkg/bloomgateway/util.go | 38 ++++++++++++++++++-------------- pkg/bloomgateway/util_test.go | 31 +++++++++++++------------- pkg/bloomgateway/worker.go | 4 +++- pkg/storage/bloom/v1/merge.go | 15 ++++--------- pkg/storage/bloom/v1/util.go | 5 ----- 6 files changed, 51 insertions(+), 66 deletions(-) diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index 0f42f1d5ff913..614e3e048506c 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -1,7 +1,6 @@ package bloomgateway import ( - "errors" "time" "github.com/oklog/ulid" @@ -70,7 +69,7 @@ type FilterRequest struct { // taskMergeIterator implements v1.Iterator type taskMergeIterator struct { curr FilterRequest - heap *v1.HeapIterator[*logproto.GroupedChunkRefs] + heap *v1.HeapIterator[IndexedValue[*logproto.GroupedChunkRefs]] tasks []Task err error } @@ -85,13 +84,13 @@ func newTaskMergeIterator(tasks ...Task) *taskMergeIterator { } func (it *taskMergeIterator) init() { - sequences := make([]v1.PeekingIterator[*logproto.GroupedChunkRefs], 0, len(it.tasks)) + sequences := make([]v1.PeekingIterator[IndexedValue[*logproto.GroupedChunkRefs]], 0, len(it.tasks)) for i := range it.tasks { - sequences = append(sequences, NewIterWithIndex(i, it.tasks[i].Request.Refs)) + sequences = append(sequences, NewIterWithIndex(it.tasks[i].Request.Refs, i)) } it.heap = v1.NewHeapIterator( - func(i, j *logproto.GroupedChunkRefs) bool { - return i.Fingerprint < j.Fingerprint + func(i, j IndexedValue[*logproto.GroupedChunkRefs]) bool { + return i.val.Fingerprint < j.val.Fingerprint }, sequences..., ) @@ -108,18 +107,11 @@ func (it *taskMergeIterator) Next() bool { return false } - currIter, ok := it.heap.Iter().(*SliceIterWithIndex[*logproto.GroupedChunkRefs]) - if !ok { - it.err = errors.New("failed to cast iterator") - return false - } - iterIndex := currIter.Index() - - task := it.tasks[iterIndex] group := it.heap.At() + task := it.tasks[group.idx] - it.curr.Fp = model.Fingerprint(group.Fingerprint) - it.curr.Chks = convertToChunkRefs(group.Refs) + it.curr.Fp = model.Fingerprint(group.val.Fingerprint) + it.curr.Chks = convertToChunkRefs(group.val.Refs) it.curr.Searches = convertToSearches(task.Request.Filters) it.curr.Response = task.ResCh it.curr.Error = task.ErrCh diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index 27da72cd356e5..d55abf5b753fa 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -9,12 +9,17 @@ import ( v1 "github.com/grafana/loki/pkg/storage/bloom/v1" ) +type IndexedValue[T any] struct { + idx int + val T +} + // SliceIterWithIndex implements v1.PeekingIterator type SliceIterWithIndex[T any] struct { - xs []T // source slice - pos int // position within the slice - idx int // the index that identifies the iterator - zero T // zero value of T + xs []T // source slice + pos int // position within the slice + zero T // zero value of T + cache IndexedValue[T] } func (it *SliceIterWithIndex[T]) Next() bool { @@ -26,26 +31,25 @@ func (it *SliceIterWithIndex[T]) Err() error { return nil } -func (it *SliceIterWithIndex[T]) At() T { - return it.xs[it.pos] +func (it *SliceIterWithIndex[T]) At() IndexedValue[T] { + it.cache.val = it.xs[it.pos] + return it.cache } -func (it *SliceIterWithIndex[T]) Peek() (T, bool) { +func (it *SliceIterWithIndex[T]) Peek() (IndexedValue[T], bool) { if it.pos+1 >= len(it.xs) { - return it.zero, false + it.cache.val = it.zero + return it.cache, false } - return it.xs[it.pos+1], true -} - -func (it *SliceIterWithIndex[T]) Index() int { - return it.idx + it.cache.val = it.xs[it.pos+1] + return it.cache, true } -func NewIterWithIndex[T any](i int, xs []T) *SliceIterWithIndex[T] { +func NewIterWithIndex[T any](xs []T, idx int) v1.PeekingIterator[IndexedValue[T]] { return &SliceIterWithIndex[T]{ - xs: xs, - pos: -1, - idx: i, + xs: xs, + pos: -1, + cache: IndexedValue[T]{idx: idx}, } } diff --git a/pkg/bloomgateway/util_test.go b/pkg/bloomgateway/util_test.go index 897850a0438c2..30b31b461d637 100644 --- a/pkg/bloomgateway/util_test.go +++ b/pkg/bloomgateway/util_test.go @@ -11,31 +11,30 @@ import ( ) func TestSliceIterWithIndex(t *testing.T) { - t.Run("SliceIterWithIndex implements v1.Iterator interface", func(t *testing.T) { - xs := []string{"a", "b", "c"} - it := NewIterWithIndex(0, xs) - require.True(t, it.Next()) - require.Equal(t, "a", it.At()) - require.True(t, it.Next()) - require.Equal(t, "b", it.At()) - require.True(t, it.Next()) - require.Equal(t, "c", it.At()) - require.False(t, it.Next()) - require.NoError(t, it.Err()) - }) t.Run("SliceIterWithIndex implements v1.PeekingIterator interface", func(t *testing.T) { xs := []string{"a", "b", "c"} - it := NewIterWithIndex(0, xs) + it := NewIterWithIndex(xs, 123) + + // peek at first item p, ok := it.Peek() require.True(t, ok) - require.Equal(t, "a", p) + require.Equal(t, "a", p.val) + require.Equal(t, 123, p.idx) + + // proceed to first item require.True(t, it.Next()) - require.Equal(t, "a", it.At()) + require.Equal(t, "a", it.At().val) + require.Equal(t, 123, it.At().idx) + + // proceed to second and third item require.True(t, it.Next()) require.True(t, it.Next()) + + // peek at non-existing fourth item p, ok = it.Peek() require.False(t, ok) - require.Equal(t, "", p) // "" is zero value for type string + require.Equal(t, "", p.val) // "" is zero value for type string + require.Equal(t, 123, p.idx) }) } diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index ba02c261e752b..e9923bf29ed1f 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -210,7 +210,9 @@ func (w *worker) running(ctx context.Context) error { if len(requests) == 0 { continue } - fq := blockQuerier.Fuse([]v1.PeekingIterator[v1.Request]{NewIterWithIndex(0, requests)}) + fq := blockQuerier.Fuse([]v1.PeekingIterator[v1.Request]{ + v1.NewPeekingIter[v1.Request](v1.NewSliceIter[v1.Request](requests)), + }) err := fq.Run() if err != nil { for _, t := range tasks { diff --git a/pkg/storage/bloom/v1/merge.go b/pkg/storage/bloom/v1/merge.go index 46c9012c5f882..aaa4e568bc31e 100644 --- a/pkg/storage/bloom/v1/merge.go +++ b/pkg/storage/bloom/v1/merge.go @@ -8,8 +8,6 @@ type HeapIterator[T any] struct { itrs []PeekingIterator[T] less func(T, T) bool - currIter PeekingIterator[T] - zero T // zero value of T cache T ok bool @@ -58,11 +56,6 @@ func (mbq *HeapIterator[T]) Next() (ok bool) { return } -// Curr implements NestedIterator -func (mbq *HeapIterator[T]) Iter() Iterator[T] { - return mbq.currIter -} - // TODO(owen-d): don't swallow this error func (mbq *HeapIterator[T]) Err() error { return nil @@ -83,15 +76,15 @@ func (mbq *HeapIterator[T]) pop() (T, bool) { return mbq.zero, false } - mbq.currIter = mbq.itrs[0] - if ok := mbq.currIter.Next(); !ok { + curr := mbq.itrs[0] + if ok := curr.Next(); !ok { mbq.remove(0) continue } - result := mbq.currIter.At() + result := curr.At() - _, ok := mbq.currIter.Peek() + _, ok := curr.Peek() if !ok { // that was the end of the iterator. remove it from the heap mbq.remove(0) diff --git a/pkg/storage/bloom/v1/util.go b/pkg/storage/bloom/v1/util.go index c5b0ae688596b..15de62e9f9590 100644 --- a/pkg/storage/bloom/v1/util.go +++ b/pkg/storage/bloom/v1/util.go @@ -83,11 +83,6 @@ type PeekingIterator[T any] interface { Iterator[T] } -type NestedIterator[T any] interface { - Iter() Iterator[T] - Iterator[T] -} - type PeekIter[T any] struct { itr Iterator[T] From 6aee5e795cfa48245f83abcbbd8a51d18dab5555 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Wed, 15 Nov 2023 11:50:25 +0100 Subject: [PATCH 27/44] Use `for select` instead of `for ctx.Err()==nil` Signed-off-by: Christian Haudum --- pkg/bloomgateway/worker.go | 217 +++++++++++++++++++------------------ 1 file changed, 111 insertions(+), 106 deletions(-) diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index e9923bf29ed1f..c08b82ddf759b 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -102,137 +102,142 @@ func (w *worker) running(ctx context.Context) error { requests := make([]v1.Request, 0, 128) fingerprints := make([]uint64, 0, 1024) - for ctx.Err() == nil { - taskCtx := context.Background() - dequeueStart := time.Now() - items, newIdx, err := w.queue.DequeueMany(taskCtx, idx, w.id, w.cfg.maxItems, w.cfg.maxWaitTime) - w.metrics.dequeueWaitTime.WithLabelValues(w.id).Observe(time.Since(dequeueStart).Seconds()) - if err != nil { - // We only return an error if the queue is stopped and dequeuing did not yield any items - if err == queue.ErrStopped && len(items) == 0 { - return err + for { + select { + case <-ctx.Done(): + return ctx.Err() + + default: + taskCtx := context.Background() + dequeueStart := time.Now() + items, newIdx, err := w.queue.DequeueMany(taskCtx, idx, w.id, w.cfg.maxItems, w.cfg.maxWaitTime) + w.metrics.dequeueWaitTime.WithLabelValues(w.id).Observe(time.Since(dequeueStart).Seconds()) + if err != nil { + // We only return an error if the queue is stopped and dequeuing did not yield any items + if err == queue.ErrStopped && len(items) == 0 { + return err + } + w.metrics.dequeueErrors.WithLabelValues(w.id).Inc() + level.Error(w.logger).Log("msg", "failed to dequeue tasks", "err", err, "items", len(items)) } - w.metrics.dequeueErrors.WithLabelValues(w.id).Inc() - level.Error(w.logger).Log("msg", "failed to dequeue tasks", "err", err, "items", len(items)) - } - idx = newIdx + idx = newIdx - if len(items) == 0 { - continue - } - w.metrics.dequeuedTasks.WithLabelValues(w.id).Add(float64(len(items))) + if len(items) == 0 { + continue + } + w.metrics.dequeuedTasks.WithLabelValues(w.id).Add(float64(len(items))) - tasksPerDay := make(map[time.Time][]Task) + tasksPerDay := make(map[time.Time][]Task) - for _, item := range items { - task, ok := item.(Task) - if !ok { - // This really should never happen, because only the bloom gateway itself can enqueue tasks. - return errors.Errorf("failed to cast dequeued item to Task: %v", item) - } - level.Debug(w.logger).Log("msg", "dequeued task", "task", task.ID) - w.tasks.Delete(task.ID) - - fromDay, throughDay := task.Bounds() - - if fromDay.Equal(throughDay) { - tasksPerDay[fromDay] = append(tasksPerDay[fromDay], task) - } else { - // split task into separate tasks per day - for i := fromDay; i.Before(throughDay); i = i.Add(24 * time.Hour) { - r := filterRequestForDay(task.Request, i) - t := task.CopyWithRequest(r) - tasksPerDay[i] = append(tasksPerDay[i], t) + for _, item := range items { + task, ok := item.(Task) + if !ok { + // This really should never happen, because only the bloom gateway itself can enqueue tasks. + return errors.Errorf("failed to cast dequeued item to Task: %v", item) + } + level.Debug(w.logger).Log("msg", "dequeued task", "task", task.ID) + w.tasks.Delete(task.ID) + + fromDay, throughDay := task.Bounds() + + if fromDay.Equal(throughDay) { + tasksPerDay[fromDay] = append(tasksPerDay[fromDay], task) + } else { + // split task into separate tasks per day + for i := fromDay; i.Before(throughDay); i = i.Add(24 * time.Hour) { + r := filterRequestForDay(task.Request, i) + t := task.CopyWithRequest(r) + tasksPerDay[i] = append(tasksPerDay[i], t) + } } } - } - for day, tasks := range tasksPerDay { - logger := log.With(w.logger, "day", day) - level.Debug(logger).Log("msg", "process tasks", "tasks", len(tasks)) + for day, tasks := range tasksPerDay { + logger := log.With(w.logger, "day", day) + level.Debug(logger).Log("msg", "process tasks", "tasks", len(tasks)) - it := newTaskMergeIterator(tasks...) + it := newTaskMergeIterator(tasks...) - fingerprints = fingerprints[:0] - for it.Next() { - // fingerprints are already sorted. we can skip duplicates by checking - // if the next is greater than the previous - fp := uint64(it.At().Fp) - if len(fingerprints) > 0 && fp <= fingerprints[len(fingerprints)-1] { - continue + fingerprints = fingerprints[:0] + for it.Next() { + // fingerprints are already sorted. we can skip duplicates by checking + // if the next is greater than the previous + fp := uint64(it.At().Fp) + if len(fingerprints) > 0 && fp <= fingerprints[len(fingerprints)-1] { + continue + } + fingerprints = append(fingerprints, fp) } - fingerprints = append(fingerprints, fp) - } - it.Reset() + it.Reset() - storeFetchStart := time.Now() - // GetBlockQueriers() waits until all blocks are downloaded and available for querying. - // TODO(chaudum): Add API that allows to process blocks as soon as they become available. - // This will require to change the taskMergeIterator to a slice of requests so we can seek - // to the appropriate fingerprint range within the slice that matches the block's fingerprint range. - blockQueriers, err := w.store.GetBlockQueriers(taskCtx, tasks[0].Tenant, day, day.Add(24*time.Hour).Add(-1*time.Nanosecond), fingerprints) - w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockQueriers").Observe(time.Since(storeFetchStart).Seconds()) - if err != nil { - for _, t := range tasks { - t.ErrCh <- err + storeFetchStart := time.Now() + // GetBlockQueriers() waits until all blocks are downloaded and available for querying. + // TODO(chaudum): Add API that allows to process blocks as soon as they become available. + // This will require to change the taskMergeIterator to a slice of requests so we can seek + // to the appropriate fingerprint range within the slice that matches the block's fingerprint range. + blockQueriers, err := w.store.GetBlockQueriers(taskCtx, tasks[0].Tenant, day, day.Add(24*time.Hour).Add(-1*time.Nanosecond), fingerprints) + w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockQueriers").Observe(time.Since(storeFetchStart).Seconds()) + if err != nil { + for _, t := range tasks { + t.ErrCh <- err + } + // continue with tasks of next day + continue } - // continue with tasks of next day - continue - } - // No blocks found. - // Since there are no blocks for the given tasks, we need to return the - // unfiltered list of chunk refs. - if len(blockQueriers) == 0 { - level.Warn(logger).Log("msg", "no blocks found") - for _, t := range tasks { - for _, ref := range t.Request.Refs { - t.ResCh <- v1.Output{ - Fp: model.Fingerprint(ref.Fingerprint), - Chks: convertToChunkRefs(ref.Refs), + // No blocks found. + // Since there are no blocks for the given tasks, we need to return the + // unfiltered list of chunk refs. + if len(blockQueriers) == 0 { + level.Warn(logger).Log("msg", "no blocks found") + for _, t := range tasks { + for _, ref := range t.Request.Refs { + t.ResCh <- v1.Output{ + Fp: model.Fingerprint(ref.Fingerprint), + Chks: convertToChunkRefs(ref.Refs), + } } } - } - // continue with tasks of next day - continue - } - - hasNext := it.Next() - for _, blockQuerier := range blockQueriers { - requests = requests[:0] - for hasNext && it.At().Fp <= blockQuerier.MaxFp { - requests = append(requests, it.At().Request) - hasNext = it.Next() - } - // level.Debug(logger).Log("msg", "processing block", "block", i+1, "of", len(bqs), "requests", len(requests)) - // no fingerprints in the fingerprint range of the current block - if len(requests) == 0 { + // continue with tasks of next day continue } - fq := blockQuerier.Fuse([]v1.PeekingIterator[v1.Request]{ - v1.NewPeekingIter[v1.Request](v1.NewSliceIter[v1.Request](requests)), - }) - err := fq.Run() - if err != nil { - for _, t := range tasks { - t.ErrCh <- errors.Wrap(err, "failed to run chunk check") + + hasNext := it.Next() + for _, blockQuerier := range blockQueriers { + requests = requests[:0] + for hasNext && it.At().Fp <= blockQuerier.MaxFp { + requests = append(requests, it.At().Request) + hasNext = it.Next() + } + // level.Debug(logger).Log("msg", "processing block", "block", i+1, "of", len(bqs), "requests", len(requests)) + // no fingerprints in the fingerprint range of the current block + if len(requests) == 0 { + continue + } + fq := blockQuerier.Fuse([]v1.PeekingIterator[v1.Request]{ + v1.NewPeekingIter[v1.Request](v1.NewSliceIter[v1.Request](requests)), + }) + err := fq.Run() + if err != nil { + for _, t := range tasks { + t.ErrCh <- errors.Wrap(err, "failed to run chunk check") + } + continue } - continue } - } - for hasNext { - level.Warn(logger).Log("msg", "processing remaining fingerprint", "fp", it.At().Fp) - it.At().Response <- v1.Output{ - Fp: it.At().Fp, - Chks: it.At().Chks, + for hasNext { + level.Warn(logger).Log("msg", "processing remaining fingerprint", "fp", it.At().Fp) + it.At().Response <- v1.Output{ + Fp: it.At().Fp, + Chks: it.At().Chks, + } + hasNext = it.Next() } - hasNext = it.Next() } } } - return ctx.Err() } func (w *worker) stopping(err error) error { From 29c2d14a8f6044b4ea829c1b6d843e0b8f4021ff Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 16 Nov 2023 09:20:25 +0100 Subject: [PATCH 28/44] Change task merge iterator to also filter chunkrefs by day Signed-off-by: Christian Haudum --- pkg/bloomgateway/multiplexing.go | 86 ++++++++++++++++++++++++++- pkg/bloomgateway/multiplexing_test.go | 7 ++- pkg/bloomgateway/util.go | 35 ++++++++++- pkg/bloomgateway/util_test.go | 2 +- pkg/bloomgateway/worker.go | 11 ++-- 5 files changed, 126 insertions(+), 15 deletions(-) diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index 614e3e048506c..5d5575cd03a60 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -1,6 +1,7 @@ package bloomgateway import ( + "sort" "time" "github.com/oklog/ulid" @@ -10,6 +11,10 @@ import ( v1 "github.com/grafana/loki/pkg/storage/bloom/v1" ) +const ( + Day = 24 * time.Hour +) + // Task is the data structure that is enqueued to the internal queue and dequeued by query workers type Task struct { // ID is a lexcographically sortable unique identifier of the task @@ -60,6 +65,80 @@ func (t Task) CopyWithRequest(req *logproto.FilterChunkRefRequest) Task { } } +type filterGroupedChunkRefsByDay struct { + day time.Time +} + +func (cf filterGroupedChunkRefsByDay) contains(a *logproto.GroupedChunkRefs) bool { + from, through := getFromThrough(a.Refs) + if from.Time().After(cf.day.Add(Day)) || through.Time().Before(cf.day) { + return false + } + return true +} + +func (cf filterGroupedChunkRefsByDay) filter(a *logproto.GroupedChunkRefs) *logproto.GroupedChunkRefs { + min := sort.Search(len(a.Refs), func(i int) bool { + start := a.Refs[i].From.Time() + return start.Compare(cf.day) >= 0 && start.Compare(cf.day.Add(Day)) < 0 + }) + max := sort.Search(len(a.Refs), func(i int) bool { + start := a.Refs[i].From.Time() + return start.Compare(cf.day.Add(Day)) >= 0 + }) + return &logproto.GroupedChunkRefs{ + Tenant: a.Tenant, + Fingerprint: a.Fingerprint, + Refs: a.Refs[min:max], + } +} + +func (t Task) ChunkIterForDay(day time.Time) v1.PeekingIterator[*logproto.GroupedChunkRefs] { + cf := filterGroupedChunkRefsByDay{day: day} + iter := &FilterIter[*logproto.GroupedChunkRefs]{ + iter: v1.NewSliceIter(t.Request.Refs), + predicate: cf.contains, + transform: cf.filter, + } + return v1.NewPeekingIter[*logproto.GroupedChunkRefs](iter) +} + +type Predicate[T any] func(a T) bool +type Transform[T any] func(a T) T + +type FilterIter[T any] struct { + iter v1.Iterator[T] + predicate Predicate[T] + transform Transform[T] + cache T + zero T // zero value of the return type of Next() +} + +func (it *FilterIter[T]) Next() bool { + next := it.iter.Next() + if !next { + it.cache = it.zero + return false + } + for next && !it.predicate(it.iter.At()) { + next = it.iter.Next() + if !next { + it.cache = it.zero + return false + } + } + it.cache = it.transform(it.iter.At()) + return true +} + +func (it *FilterIter[T]) At() T { + return it.cache +} + +func (it *FilterIter[T]) Err() error { + return nil +} + // FilterRequest extends v1.Request with an error channel type FilterRequest struct { v1.Request @@ -71,13 +150,15 @@ type taskMergeIterator struct { curr FilterRequest heap *v1.HeapIterator[IndexedValue[*logproto.GroupedChunkRefs]] tasks []Task + day time.Time err error } -func newTaskMergeIterator(tasks ...Task) *taskMergeIterator { +func newTaskMergeIterator(day time.Time, tasks ...Task) *taskMergeIterator { it := &taskMergeIterator{ tasks: tasks, curr: FilterRequest{}, + day: day, } it.init() return it @@ -86,7 +167,8 @@ func newTaskMergeIterator(tasks ...Task) *taskMergeIterator { func (it *taskMergeIterator) init() { sequences := make([]v1.PeekingIterator[IndexedValue[*logproto.GroupedChunkRefs]], 0, len(it.tasks)) for i := range it.tasks { - sequences = append(sequences, NewIterWithIndex(it.tasks[i].Request.Refs, i)) + iter := NewIterWithIndex(it.tasks[i].ChunkIterForDay(it.day), i) + sequences = append(sequences, iter) } it.heap = v1.NewHeapIterator( func(i, j IndexedValue[*logproto.GroupedChunkRefs]) bool { diff --git a/pkg/bloomgateway/multiplexing_test.go b/pkg/bloomgateway/multiplexing_test.go index 4e572c1280fa8..dc04522e13d2f 100644 --- a/pkg/bloomgateway/multiplexing_test.go +++ b/pkg/bloomgateway/multiplexing_test.go @@ -28,6 +28,7 @@ func TestTask(t *testing.T) { func TestTaskMergeIterator(t *testing.T) { // Thu Nov 09 2023 10:56:50 UTC ts := model.TimeFromUnix(1699523810) + day := getDayTime(ts) tenant := "fake" t.Run("empty requests result in empty iterator", func(t *testing.T) { @@ -55,7 +56,7 @@ func TestTaskMergeIterator(t *testing.T) { t3, _, _, err := NewTask(tenant, r3) require.NoError(t, err) - it := newTaskMergeIterator(t1, t2, t3) + it := newTaskMergeIterator(day, t1, t2, t3) require.NotNil(t, it.heap) // nothing to iterate over require.False(t, it.Next()) @@ -101,7 +102,7 @@ func TestTaskMergeIterator(t *testing.T) { t3, _, _, err := NewTask(tenant, r3) require.NoError(t, err) - it := newTaskMergeIterator(t1, t2, t3) + it := newTaskMergeIterator(day, t1, t2, t3) require.NotNil(t, it.heap) // first item @@ -169,7 +170,7 @@ func TestTaskMergeIterator(t *testing.T) { t3, _, _, err := NewTask(tenant, r3) require.NoError(t, err) - it := newTaskMergeIterator(t1, t2, t3) + it := newTaskMergeIterator(day, t1, t2, t3) require.NotNil(t, it.heap) checksums := []uint32{100, 200, 300} diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index d55abf5b753fa..39d4e370da7c7 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -14,7 +14,34 @@ type IndexedValue[T any] struct { val T } -// SliceIterWithIndex implements v1.PeekingIterator +type IterWithIndex[T any] struct { + v1.PeekingIterator[T] + zero T // zero value of T + cache IndexedValue[T] +} + +func (it *IterWithIndex[T]) At() IndexedValue[T] { + it.cache.val = it.PeekingIterator.At() + return it.cache +} + +func (it *IterWithIndex[T]) Peek() (IndexedValue[T], bool) { + peek, ok := it.PeekingIterator.Peek() + if !ok { + it.cache.val = it.zero + return it.cache, false + } + it.cache.val = peek + return it.cache, true +} + +func NewIterWithIndex[T any](iter v1.PeekingIterator[T], idx int) v1.PeekingIterator[IndexedValue[T]] { + return &IterWithIndex[T]{ + PeekingIterator: iter, + cache: IndexedValue[T]{idx: idx}, + } +} + type SliceIterWithIndex[T any] struct { xs []T // source slice pos int // position within the slice @@ -45,7 +72,7 @@ func (it *SliceIterWithIndex[T]) Peek() (IndexedValue[T], bool) { return it.cache, true } -func NewIterWithIndex[T any](xs []T, idx int) v1.PeekingIterator[IndexedValue[T]] { +func NewSliceIterWithIndex[T any](xs []T, idx int) v1.PeekingIterator[IndexedValue[T]] { return &SliceIterWithIndex[T]{ xs: xs, pos: -1, @@ -110,6 +137,10 @@ func filterRequestForDay(r *logproto.FilterChunkRefRequest, day time.Time) *logp } } +// TODO(chaudum): Fix Through time calculation +// getFromThrough assumes a list of ShortRefs sorted by From time +// However, it does also assume that the last item has the highest +// Through time, which might not be the case! func getFromThrough(refs []*logproto.ShortRef) (model.Time, model.Time) { if len(refs) == 0 { return model.Earliest, model.Latest diff --git a/pkg/bloomgateway/util_test.go b/pkg/bloomgateway/util_test.go index 30b31b461d637..8f200ebfbb296 100644 --- a/pkg/bloomgateway/util_test.go +++ b/pkg/bloomgateway/util_test.go @@ -13,7 +13,7 @@ import ( func TestSliceIterWithIndex(t *testing.T) { t.Run("SliceIterWithIndex implements v1.PeekingIterator interface", func(t *testing.T) { xs := []string{"a", "b", "c"} - it := NewIterWithIndex(xs, 123) + it := NewSliceIterWithIndex(xs, 123) // peek at first item p, ok := it.Peek() diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index c08b82ddf759b..4b7e6542e1dfa 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -143,11 +143,8 @@ func (w *worker) running(ctx context.Context) error { if fromDay.Equal(throughDay) { tasksPerDay[fromDay] = append(tasksPerDay[fromDay], task) } else { - // split task into separate tasks per day for i := fromDay; i.Before(throughDay); i = i.Add(24 * time.Hour) { - r := filterRequestForDay(task.Request, i) - t := task.CopyWithRequest(r) - tasksPerDay[i] = append(tasksPerDay[i], t) + tasksPerDay[i] = append(tasksPerDay[i], task) } } } @@ -156,7 +153,7 @@ func (w *worker) running(ctx context.Context) error { logger := log.With(w.logger, "day", day) level.Debug(logger).Log("msg", "process tasks", "tasks", len(tasks)) - it := newTaskMergeIterator(tasks...) + it := newTaskMergeIterator(day, tasks...) fingerprints = fingerprints[:0] for it.Next() { @@ -204,13 +201,13 @@ func (w *worker) running(ctx context.Context) error { } hasNext := it.Next() - for _, blockQuerier := range blockQueriers { + for i, blockQuerier := range blockQueriers { requests = requests[:0] for hasNext && it.At().Fp <= blockQuerier.MaxFp { requests = append(requests, it.At().Request) hasNext = it.Next() } - // level.Debug(logger).Log("msg", "processing block", "block", i+1, "of", len(bqs), "requests", len(requests)) + level.Debug(logger).Log("msg", "processing block", "block", i+1, "of", len(blockQueriers), "requests", len(requests)) // no fingerprints in the fingerprint range of the current block if len(requests) == 0 { continue From f01220cb2cc95011be186537390fa2ec6595ff5d Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 16 Nov 2023 11:52:24 +0100 Subject: [PATCH 29/44] Add TODO for using a pool when dequeueing many Signed-off-by: Christian Haudum --- pkg/queue/queue.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/queue/queue.go b/pkg/queue/queue.go index 44868fbefc584..336afa1ced490 100644 --- a/pkg/queue/queue.go +++ b/pkg/queue/queue.go @@ -132,6 +132,7 @@ func (q *RequestQueue) DequeueMany(ctx context.Context, last QueueIndex, consume defer cancel() var idx QueueIndex + // TODO(chaudum): Use a pool items := make([]Request, 0, maxItems) for { From 81c49829cdc305b832d074dd2e0accbf15149315 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 16 Nov 2023 13:45:23 +0100 Subject: [PATCH 30/44] Use a buffer pool when dequeuing many Signed-off-by: Christian Haudum --- pkg/queue/queue.go | 9 +++++++-- pkg/queue/util.go | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 pkg/queue/util.go diff --git a/pkg/queue/queue.go b/pkg/queue/queue.go index 336afa1ced490..b91741d1faef1 100644 --- a/pkg/queue/queue.go +++ b/pkg/queue/queue.go @@ -59,6 +59,7 @@ type RequestQueue struct { stopped bool metrics *Metrics + pool *BufferPool[Request] } func NewRequestQueue(maxOutstandingPerTenant int, forgetDelay time.Duration, metrics *Metrics) *RequestQueue { @@ -66,6 +67,7 @@ func NewRequestQueue(maxOutstandingPerTenant int, forgetDelay time.Duration, met queues: newTenantQueues(maxOutstandingPerTenant, forgetDelay), connectedConsumers: atomic.NewInt32(0), metrics: metrics, + pool: NewBufferPool[Request](1<<6, 1<<10, 2), // Buckets are [64, 128, 256, 512, 1024]. } q.cond = contextCond{Cond: sync.NewCond(&q.mtx)} @@ -132,8 +134,11 @@ func (q *RequestQueue) DequeueMany(ctx context.Context, last QueueIndex, consume defer cancel() var idx QueueIndex - // TODO(chaudum): Use a pool - items := make([]Request, 0, maxItems) + + items := q.pool.Get(maxItems) + defer func() { + q.pool.Put(items) + }() for { item, newIdx, err := q.Dequeue(dequeueCtx, last, consumerID) diff --git a/pkg/queue/util.go b/pkg/queue/util.go new file mode 100644 index 0000000000000..9d8312a73f072 --- /dev/null +++ b/pkg/queue/util.go @@ -0,0 +1,25 @@ +package queue + +import "github.com/prometheus/prometheus/util/pool" + +// BufferPool uses a bucket pool and wraps the Get() and Put() functions for +// simpler access. +type BufferPool[T any] struct { + p *pool.Pool +} + +func NewBufferPool[T any](minSize, maxSize int, factor float64) *BufferPool[T] { + return &BufferPool[T]{ + p: pool.New(minSize, maxSize, factor, func(i int) interface{} { + return make([]T, 0, i) + }), + } +} + +func (p *BufferPool[T]) Get(n int) []T { + return p.p.Get(n).([]T) +} + +func (p *BufferPool[T]) Put(buf []T) { + p.p.Put(buf[0:0]) +} From ed0949210e49ddafa3c177526ff733cc44afb40d Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Fri, 17 Nov 2023 15:52:22 +0100 Subject: [PATCH 31/44] Improve best case for chunk filtering by day Signed-off-by: Christian Haudum --- pkg/bloomgateway/multiplexing.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index 5d5575cd03a60..0e8d8e052958a 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -78,6 +78,15 @@ func (cf filterGroupedChunkRefsByDay) contains(a *logproto.GroupedChunkRefs) boo } func (cf filterGroupedChunkRefsByDay) filter(a *logproto.GroupedChunkRefs) *logproto.GroupedChunkRefs { + minTs, maxTs := getFromThrough(a.Refs) + + // in most cases, all chunks are within day range + if minTs.Time().Compare(cf.day) >= 0 && maxTs.Time().Before(cf.day.Add(24*time.Hour)) { + return a + } + + // case where certain chunks are outside of day range + // using binary search to get min and max index of chunks that fall into the day range min := sort.Search(len(a.Refs), func(i int) bool { start := a.Refs[i].From.Time() return start.Compare(cf.day) >= 0 && start.Compare(cf.day.Add(Day)) < 0 @@ -86,6 +95,7 @@ func (cf filterGroupedChunkRefsByDay) filter(a *logproto.GroupedChunkRefs) *logp start := a.Refs[i].From.Time() return start.Compare(cf.day.Add(Day)) >= 0 }) + return &logproto.GroupedChunkRefs{ Tenant: a.Tenant, Fingerprint: a.Fingerprint, From c2ac04a2c5d9ce8415ccc3f809a4075bb99781b6 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 21 Nov 2023 09:42:28 +0100 Subject: [PATCH 32/44] Implement inverted logic for checking chunks against blooms Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 57 ++++++++++---- pkg/bloomgateway/bloomgateway_test.go | 3 +- pkg/bloomgateway/multiplexing.go | 38 ++++----- pkg/bloomgateway/multiplexing_test.go | 70 +++++++++++++++++ pkg/bloomgateway/util.go | 73 ++---------------- pkg/bloomgateway/util_test.go | 107 -------------------------- pkg/bloomgateway/worker.go | 8 +- 7 files changed, 139 insertions(+), 217 deletions(-) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 161271dd72e88..20b15cf7b0f59 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -60,6 +60,7 @@ import ( "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/queue" "github.com/grafana/loki/pkg/storage" + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" "github.com/grafana/loki/pkg/util" @@ -300,8 +301,10 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk g.pendingTasks.Add(task.ID, task) }) - response := make([]*logproto.GroupedChunkRefs, 0, len(req.Refs)) - responseCount := 0 + requestCount := len(req.Refs) + // TODO(chaudum): Use pool + responses := make([]v1.Output, 0, requestCount) + for { select { case <-ctx.Done(): @@ -309,19 +312,47 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk case err := <-errCh: return nil, errors.Wrap(err, "waiting for results") case res := <-resCh: - responseCount++ + responses = append(responses, res) // log line is helpful for debugging tests - // level.Debug(g.logger).Log("msg", "got partial result", "task", task.ID, "tenant", tenantID, "fp", res.Fp, "chunks", res.Chks.Len(), "progress", fmt.Sprintf("%d/%d", responseCount, len(req.Refs))) - if res.Chks.Len() > 0 { - response = append(response, &logproto.GroupedChunkRefs{ - Tenant: tenantID, - Fingerprint: uint64(res.Fp), - Refs: convertToShortRefs(res.Chks), - }) - } + // level.Debug(g.logger).Log("msg", "got partial result", "task", task.ID, "tenant", tenantID, "fp", uint64(res.Fp), "chunks", res.Removals.Len(), "progress", fmt.Sprintf("%d/%d", len(responses), requestCount)) // wait for all parts of the full response - if responseCount == len(req.Refs) { - return &logproto.FilterChunkRefResponse{ChunkRefs: response}, nil + if len(responses) == requestCount { + for _, o := range responses { + // we must not remove items from req.Refs as long as the worker may iterater over them + g.removeNotMatchingChunks(req, o) + } + return &logproto.FilterChunkRefResponse{ChunkRefs: req.Refs}, nil + } + } + } +} + +func (g *Gateway) removeNotMatchingChunks(req *logproto.FilterChunkRefRequest, res v1.Output) { + // binary search index of fingerprint + idx := sort.Search(len(req.Refs), func(i int) bool { + return req.Refs[i].Fingerprint >= uint64(res.Fp) + }) + + // fingerprint not found + if idx >= len(req.Refs) { + level.Error(g.logger).Log("msg", "index out of range", "idx", idx, "len", len(req.Refs), "fp", uint64(res.Fp)) + return + } + + // if all chunks of a fingerprint are are removed + // then remove the whole group from the response + if len(req.Refs[idx].Refs) == res.Removals.Len() { + req.Refs[idx] = nil // avoid leaking pointer + req.Refs = append(req.Refs[:idx], req.Refs[idx+1:]...) + return + } + + for i := range res.Removals { + toRemove := res.Removals[i] + for j := range req.Refs[idx].Refs { + if toRemove.Checksum == req.Refs[idx].Refs[j].Checksum { + req.Refs[idx].Refs[j] = nil // avoid leaking pointer + req.Refs[idx].Refs = append(req.Refs[idx].Refs[:j], req.Refs[idx].Refs[j+1:]...) } } } diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index 6d9ddbd663fb0..088666a59abce 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -270,9 +270,9 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { }) chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 100) - inputChunkRefs := groupRefs(t, chunkRefs) t.Run("no match - return empty response", func(t *testing.T) { + inputChunkRefs := groupRefs(t, chunkRefs) req := &logproto.FilterChunkRefRequest{ From: now.Add(-8 * time.Hour), Through: now, @@ -292,6 +292,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { }) t.Run("match - return filtered", func(t *testing.T) { + inputChunkRefs := groupRefs(t, chunkRefs) // hack to get indexed key for a specific series // the indexed key range for a series is defined as // i * keysPerSeries ... i * keysPerSeries + keysPerSeries - 1 diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index 0e8d8e052958a..327f481404737 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -54,14 +54,12 @@ func (t Task) Bounds() (time.Time, time.Time) { return getDayTime(t.Request.From), getDayTime(t.Request.Through) } -// CopyWithRequest returns a copy of the original task, but with newly provided request -func (t Task) CopyWithRequest(req *logproto.FilterChunkRefRequest) Task { - return Task{ - ID: t.ID, - Tenant: t.Tenant, - Request: req, - ErrCh: t.ErrCh, - ResCh: t.ResCh, +func (t Task) ChunkIterForDay(day time.Time) v1.Iterator[*logproto.GroupedChunkRefs] { + cf := filterGroupedChunkRefsByDay{day: day} + return &FilterIter[*logproto.GroupedChunkRefs]{ + iter: v1.NewSliceIter(t.Request.Refs), + matches: cf.contains, + transform: cf.filter, } } @@ -81,7 +79,7 @@ func (cf filterGroupedChunkRefsByDay) filter(a *logproto.GroupedChunkRefs) *logp minTs, maxTs := getFromThrough(a.Refs) // in most cases, all chunks are within day range - if minTs.Time().Compare(cf.day) >= 0 && maxTs.Time().Before(cf.day.Add(24*time.Hour)) { + if minTs.Time().Compare(cf.day) >= 0 && maxTs.Time().Before(cf.day.Add(Day)) { return a } @@ -89,11 +87,13 @@ func (cf filterGroupedChunkRefsByDay) filter(a *logproto.GroupedChunkRefs) *logp // using binary search to get min and max index of chunks that fall into the day range min := sort.Search(len(a.Refs), func(i int) bool { start := a.Refs[i].From.Time() - return start.Compare(cf.day) >= 0 && start.Compare(cf.day.Add(Day)) < 0 + end := a.Refs[i].Through.Time() + return start.Compare(cf.day) >= 0 || end.Compare(cf.day) >= 0 }) + max := sort.Search(len(a.Refs), func(i int) bool { start := a.Refs[i].From.Time() - return start.Compare(cf.day.Add(Day)) >= 0 + return start.Compare(cf.day.Add(Day)) > 0 }) return &logproto.GroupedChunkRefs{ @@ -103,22 +103,12 @@ func (cf filterGroupedChunkRefsByDay) filter(a *logproto.GroupedChunkRefs) *logp } } -func (t Task) ChunkIterForDay(day time.Time) v1.PeekingIterator[*logproto.GroupedChunkRefs] { - cf := filterGroupedChunkRefsByDay{day: day} - iter := &FilterIter[*logproto.GroupedChunkRefs]{ - iter: v1.NewSliceIter(t.Request.Refs), - predicate: cf.contains, - transform: cf.filter, - } - return v1.NewPeekingIter[*logproto.GroupedChunkRefs](iter) -} - type Predicate[T any] func(a T) bool type Transform[T any] func(a T) T type FilterIter[T any] struct { iter v1.Iterator[T] - predicate Predicate[T] + matches Predicate[T] transform Transform[T] cache T zero T // zero value of the return type of Next() @@ -130,7 +120,7 @@ func (it *FilterIter[T]) Next() bool { it.cache = it.zero return false } - for next && !it.predicate(it.iter.At()) { + for next && !it.matches(it.iter.At()) { next = it.iter.Next() if !next { it.cache = it.zero @@ -178,7 +168,7 @@ func (it *taskMergeIterator) init() { sequences := make([]v1.PeekingIterator[IndexedValue[*logproto.GroupedChunkRefs]], 0, len(it.tasks)) for i := range it.tasks { iter := NewIterWithIndex(it.tasks[i].ChunkIterForDay(it.day), i) - sequences = append(sequences, iter) + sequences = append(sequences, v1.NewPeekingIter(iter)) } it.heap = v1.NewHeapIterator( func(i, j IndexedValue[*logproto.GroupedChunkRefs]) bool { diff --git a/pkg/bloomgateway/multiplexing_test.go b/pkg/bloomgateway/multiplexing_test.go index dc04522e13d2f..af740715c22fb 100644 --- a/pkg/bloomgateway/multiplexing_test.go +++ b/pkg/bloomgateway/multiplexing_test.go @@ -184,3 +184,73 @@ func TestTaskMergeIterator(t *testing.T) { } }) } + +func TestChunkIterForDay(t *testing.T) { + tenant := "fake" + + // Thu Nov 09 2023 10:56:50 UTC + ts := model.TimeFromUnix(1699523810) + + t.Run("filter chunk refs that fall into the day range", func(t *testing.T) { + input := &logproto.FilterChunkRefRequest{ + From: ts.Add(-168 * time.Hour), // 1w ago + Through: ts, + Refs: []*logproto.GroupedChunkRefs{ + {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-168 * time.Hour), Through: ts.Add(-167 * time.Hour), Checksum: 100}, + {From: ts.Add(-143 * time.Hour), Through: ts.Add(-142 * time.Hour), Checksum: 101}, + }}, + {Fingerprint: 200, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-144 * time.Hour), Through: ts.Add(-143 * time.Hour), Checksum: 200}, + {From: ts.Add(-119 * time.Hour), Through: ts.Add(-118 * time.Hour), Checksum: 201}, + }}, + {Fingerprint: 300, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-120 * time.Hour), Through: ts.Add(-119 * time.Hour), Checksum: 300}, + {From: ts.Add(-95 * time.Hour), Through: ts.Add(-94 * time.Hour), Checksum: 301}, + }}, + {Fingerprint: 400, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-96 * time.Hour), Through: ts.Add(-95 * time.Hour), Checksum: 400}, + {From: ts.Add(-71 * time.Hour), Through: ts.Add(-70 * time.Hour), Checksum: 401}, + }}, + {Fingerprint: 500, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-72 * time.Hour), Through: ts.Add(-71 * time.Hour), Checksum: 500}, + {From: ts.Add(-47 * time.Hour), Through: ts.Add(-46 * time.Hour), Checksum: 501}, + }}, + {Fingerprint: 600, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-48 * time.Hour), Through: ts.Add(-47 * time.Hour), Checksum: 600}, + {From: ts.Add(-23 * time.Hour), Through: ts.Add(-22 * time.Hour), Checksum: 601}, + }}, + {Fingerprint: 700, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-24 * time.Hour), Through: ts.Add(-23 * time.Hour), Checksum: 700}, + {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 701}, + }}, + }, + Filters: []*logproto.LineFilterExpression{ + {Operator: 1, Match: "foo"}, + {Operator: 1, Match: "bar"}, + }, + } + + // day ranges from ts-48h to ts-24h + day := getDayTime(ts.Add(-36 * time.Hour)) + + expected := []*logproto.GroupedChunkRefs{ + {Fingerprint: 500, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-47 * time.Hour), Through: ts.Add(-46 * time.Hour), Checksum: 501}, + }}, + {Fingerprint: 600, Tenant: tenant, Refs: []*logproto.ShortRef{ + {From: ts.Add(-48 * time.Hour), Through: ts.Add(-47 * time.Hour), Checksum: 600}, + }}, + } + + task, _, _, _ := NewTask(tenant, input) + it := task.ChunkIterForDay(day) + + output := make([]*logproto.GroupedChunkRefs, 0, len(input.Refs)) + for it.Next() { + output = append(output, it.At()) + } + + require.Equal(t, expected, output) + }) +} diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index 39d4e370da7c7..9254067e1b52f 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -15,30 +15,20 @@ type IndexedValue[T any] struct { } type IterWithIndex[T any] struct { - v1.PeekingIterator[T] + v1.Iterator[T] zero T // zero value of T cache IndexedValue[T] } func (it *IterWithIndex[T]) At() IndexedValue[T] { - it.cache.val = it.PeekingIterator.At() + it.cache.val = it.Iterator.At() return it.cache } -func (it *IterWithIndex[T]) Peek() (IndexedValue[T], bool) { - peek, ok := it.PeekingIterator.Peek() - if !ok { - it.cache.val = it.zero - return it.cache, false - } - it.cache.val = peek - return it.cache, true -} - -func NewIterWithIndex[T any](iter v1.PeekingIterator[T], idx int) v1.PeekingIterator[IndexedValue[T]] { +func NewIterWithIndex[T any](iter v1.Iterator[T], idx int) v1.Iterator[IndexedValue[T]] { return &IterWithIndex[T]{ - PeekingIterator: iter, - cache: IndexedValue[T]{idx: idx}, + Iterator: iter, + cache: IndexedValue[T]{idx: idx}, } } @@ -84,59 +74,6 @@ func getDayTime(ts model.Time) time.Time { return time.Date(ts.Time().Year(), ts.Time().Month(), ts.Time().Day(), 0, 0, 0, 0, time.UTC) } -func filterRequestForDay(r *logproto.FilterChunkRefRequest, day time.Time) *logproto.FilterChunkRefRequest { - through := model.TimeFromUnix(day.Unix()) - from := model.TimeFromUnix(day.Add(24 * time.Hour).Unix()) - - refs := make([]*logproto.GroupedChunkRefs, 0, len(r.Refs)) - for i := range r.Refs { - groupedChunkRefs := &logproto.GroupedChunkRefs{ - Fingerprint: r.Refs[i].Fingerprint, - Tenant: r.Refs[i].Tenant, - Refs: make([]*logproto.ShortRef, 0, len(r.Refs[i].Refs)), - } - for j := range r.Refs[i].Refs { - shortRef := r.Refs[i].Refs[j] - fromDay := getDayTime(shortRef.From) - if fromDay.After(day) { - break - } - throughDay := getDayTime(shortRef.Through) - if fromDay.Equal(day) || throughDay.Equal(day) { - groupedChunkRefs.Refs = append(groupedChunkRefs.Refs, shortRef) - } - } - - // do not add empty groups to request - if len(groupedChunkRefs.Refs) == 0 { - continue - } - - groupFrom, groupThrough := getFromThrough(groupedChunkRefs.Refs) - if groupFrom.Before(from) { - from = groupFrom - } - if groupThrough.After(through) { - through = groupThrough - } - refs = append(refs, groupedChunkRefs) - } - - // The initial value of `from` is the through time and vice versa. - // This is, in order to determine min From and max Through. - // In case no chunk refs match, we need to swap the initial value again. - if len(refs) == 0 { - from, through = through, from - } - - return &logproto.FilterChunkRefRequest{ - From: from, - Through: through, - Refs: refs, - Filters: r.Filters, - } -} - // TODO(chaudum): Fix Through time calculation // getFromThrough assumes a list of ShortRefs sorted by From time // However, it does also assume that the last item has the highest diff --git a/pkg/bloomgateway/util_test.go b/pkg/bloomgateway/util_test.go index 8f200ebfbb296..e82a3c4ea460a 100644 --- a/pkg/bloomgateway/util_test.go +++ b/pkg/bloomgateway/util_test.go @@ -2,12 +2,8 @@ package bloomgateway import ( "testing" - "time" - "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - - "github.com/grafana/loki/pkg/logproto" ) func TestSliceIterWithIndex(t *testing.T) { @@ -37,106 +33,3 @@ func TestSliceIterWithIndex(t *testing.T) { require.Equal(t, 123, p.idx) }) } - -func TestFilterRequestForDay(t *testing.T) { - tenant := "fake" - - // Thu Nov 09 2023 10:56:50 UTC - ts := model.TimeFromUnix(1699523810) - - t.Run("filter chunk refs that fall into the day range", func(t *testing.T) { - input := &logproto.FilterChunkRefRequest{ - From: ts.Add(-168 * time.Hour), // 1w ago - Through: ts, - Refs: []*logproto.GroupedChunkRefs{ - {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-168 * time.Hour), Through: ts.Add(-167 * time.Hour), Checksum: 100}, - {From: ts.Add(-143 * time.Hour), Through: ts.Add(-142 * time.Hour), Checksum: 101}, - }}, - {Fingerprint: 200, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-144 * time.Hour), Through: ts.Add(-143 * time.Hour), Checksum: 200}, - {From: ts.Add(-119 * time.Hour), Through: ts.Add(-118 * time.Hour), Checksum: 201}, - }}, - {Fingerprint: 300, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-120 * time.Hour), Through: ts.Add(-119 * time.Hour), Checksum: 300}, - {From: ts.Add(-95 * time.Hour), Through: ts.Add(-94 * time.Hour), Checksum: 301}, - }}, - {Fingerprint: 400, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-96 * time.Hour), Through: ts.Add(-95 * time.Hour), Checksum: 400}, - {From: ts.Add(-71 * time.Hour), Through: ts.Add(-70 * time.Hour), Checksum: 401}, - }}, - {Fingerprint: 500, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-72 * time.Hour), Through: ts.Add(-71 * time.Hour), Checksum: 500}, - {From: ts.Add(-47 * time.Hour), Through: ts.Add(-46 * time.Hour), Checksum: 501}, - }}, - {Fingerprint: 600, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-48 * time.Hour), Through: ts.Add(-47 * time.Hour), Checksum: 600}, - {From: ts.Add(-23 * time.Hour), Through: ts.Add(-22 * time.Hour), Checksum: 601}, - }}, - {Fingerprint: 700, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-24 * time.Hour), Through: ts.Add(-23 * time.Hour), Checksum: 700}, - {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 701}, - }}, - }, - Filters: []*logproto.LineFilterExpression{ - {Operator: 1, Match: "foo"}, - {Operator: 1, Match: "bar"}, - }, - } - - // day ranges from ts-48h to ts-24h - day := getDayTime(ts.Add(-36 * time.Hour)) - - expected := &logproto.FilterChunkRefRequest{ - From: ts.Add(-48 * time.Hour), - Through: ts.Add(-46 * time.Hour), - Refs: []*logproto.GroupedChunkRefs{ - {Fingerprint: 500, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-47 * time.Hour), Through: ts.Add(-46 * time.Hour), Checksum: 501}, - }}, - {Fingerprint: 600, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-48 * time.Hour), Through: ts.Add(-47 * time.Hour), Checksum: 600}, - }}, - }, - Filters: []*logproto.LineFilterExpression{ - {Operator: 1, Match: "foo"}, - {Operator: 1, Match: "bar"}, - }, - } - - output := filterRequestForDay(input, day) - require.Equal(t, expected, output) - }) - - t.Run("empty response returns time range from input day", func(t *testing.T) { - input := &logproto.FilterChunkRefRequest{ - From: ts.Add(-168 * time.Hour), // 1w ago - Through: ts, - Refs: []*logproto.GroupedChunkRefs{ - {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-168 * time.Hour), Through: ts.Add(-167 * time.Hour), Checksum: 100}, - }}, - }, - Filters: []*logproto.LineFilterExpression{ - {Operator: 1, Match: "foo"}, - {Operator: 1, Match: "bar"}, - }, - } - - // day ranges from ts-48h to ts-24h - day := getDayTime(ts.Add(-36 * time.Hour)) - - expected := &logproto.FilterChunkRefRequest{ - From: model.TimeFromUnix(day.Unix()), - Through: model.TimeFromUnix(day.Add(24 * time.Hour).Unix()), - Refs: []*logproto.GroupedChunkRefs{}, - Filters: []*logproto.LineFilterExpression{ - {Operator: 1, Match: "foo"}, - {Operator: 1, Match: "bar"}, - }, - } - - output := filterRequestForDay(input, day) - require.Equal(t, expected, output) - }) -} diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index 4b7e6542e1dfa..82e1a195f1fe9 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -191,8 +191,8 @@ func (w *worker) running(ctx context.Context) error { for _, t := range tasks { for _, ref := range t.Request.Refs { t.ResCh <- v1.Output{ - Fp: model.Fingerprint(ref.Fingerprint), - Chks: convertToChunkRefs(ref.Refs), + Fp: model.Fingerprint(ref.Fingerprint), + Removals: nil, } } } @@ -227,8 +227,8 @@ func (w *worker) running(ctx context.Context) error { for hasNext { level.Warn(logger).Log("msg", "processing remaining fingerprint", "fp", it.At().Fp) it.At().Response <- v1.Output{ - Fp: it.At().Fp, - Chks: it.At().Chks, + Fp: it.At().Fp, + Removals: nil, } hasNext = it.Next() } From 66a4ac5595df0cd1e53e39e55d0731cafa9ed6ad Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Tue, 21 Nov 2023 15:28:22 +0100 Subject: [PATCH 33/44] Towards single-pass iteration for querying of block Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway_test.go | 21 ++++++++ pkg/bloomgateway/worker.go | 39 ++++++++------ .../stores/shipper/bloomshipper/client.go | 21 +++++++- .../stores/shipper/bloomshipper/shipper.go | 42 ++++++++------- .../stores/shipper/bloomshipper/store.go | 27 ++++++++++ .../stores/shipper/bloomshipper/util.go | 46 ++++++++++++++++ .../stores/shipper/bloomshipper/util_test.go | 53 +++++++++++++++++++ 7 files changed, 214 insertions(+), 35 deletions(-) create mode 100644 pkg/storage/stores/shipper/bloomshipper/util.go create mode 100644 pkg/storage/stores/shipper/bloomshipper/util_test.go diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index 088666a59abce..a294500ce27bd 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -354,6 +354,27 @@ type mockBloomStore struct { bqs []bloomshipper.BlockQuerierWithFingerprintRange } +// GetBlockQueriersForBlockRefs implements bloomshipper.Store. +func (s *mockBloomStore) GetBlockQueriersForBlockRefs(_ context.Context, _ string, _ []bloomshipper.BlockRef) ([]bloomshipper.BlockQuerierWithFingerprintRange, error) { + return s.bqs, nil +} + +// GetBlockRefs implements bloomshipper.Store. +func (s *mockBloomStore) GetBlockRefs(_ context.Context, tenant string, _, _ time.Time) ([]bloomshipper.BlockRef, error) { + blocks := make([]bloomshipper.BlockRef, 0, len(s.bqs)) + for i := range s.bqs { + blocks = append(blocks, bloomshipper.BlockRef{ + Ref: bloomshipper.Ref{ + MinFingerprint: uint64(s.bqs[i].MinFp), + MaxFingerprint: uint64(s.bqs[i].MaxFp), + TenantID: tenant, + }, + }) + } + return blocks, nil +} + +// GetBlockQueriers implements bloomshipper.Store. func (s *mockBloomStore) GetBlockQueriers(_ context.Context, _ string, _, _ time.Time, _ []uint64) ([]bloomshipper.BlockQuerierWithFingerprintRange, error) { return s.bqs, nil } diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index 82e1a195f1fe9..ae755433487cc 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -12,6 +12,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/model" + "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/queue" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" @@ -100,10 +101,10 @@ func (w *worker) running(ctx context.Context) error { idx := queue.StartIndexWithLocalQueue requests := make([]v1.Request, 0, 128) - fingerprints := make([]uint64, 0, 1024) for { select { + case <-ctx.Done(): return ctx.Err() @@ -153,28 +154,35 @@ func (w *worker) running(ctx context.Context) error { logger := log.With(w.logger, "day", day) level.Debug(logger).Log("msg", "process tasks", "tasks", len(tasks)) - it := newTaskMergeIterator(day, tasks...) + var fp [][]*logproto.GroupedChunkRefs + for i := range tasks { + fp = append(fp, tasks[i].Request.Refs) + } - fingerprints = fingerprints[:0] - for it.Next() { - // fingerprints are already sorted. we can skip duplicates by checking - // if the next is greater than the previous - fp := uint64(it.At().Fp) - if len(fingerprints) > 0 && fp <= fingerprints[len(fingerprints)-1] { - continue + storeFetchStart := time.Now() + blockRefs, err := w.store.GetBlockRefs(taskCtx, tasks[0].Tenant, day, day.Add(Day).Add(-1*time.Nanosecond)) + w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockRefs").Observe(time.Since(storeFetchStart).Seconds()) + if err != nil { + for _, t := range tasks { + t.ErrCh <- err } - fingerprints = append(fingerprints, fp) + // continue with tasks of next day + continue } - it.Reset() + boundedRefs := bloomshipper.PartitionFingerprintRange(fp, blockRefs) + blockRefs = blockRefs[0:] + for _, b := range boundedRefs { + blockRefs = append(blockRefs, b.BlockRef) + } - storeFetchStart := time.Now() - // GetBlockQueriers() waits until all blocks are downloaded and available for querying. + // GetBlockQueriersForBlockRefs() waits until all blocks are downloaded and available for querying. // TODO(chaudum): Add API that allows to process blocks as soon as they become available. // This will require to change the taskMergeIterator to a slice of requests so we can seek // to the appropriate fingerprint range within the slice that matches the block's fingerprint range. - blockQueriers, err := w.store.GetBlockQueriers(taskCtx, tasks[0].Tenant, day, day.Add(24*time.Hour).Add(-1*time.Nanosecond), fingerprints) - w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockQueriers").Observe(time.Since(storeFetchStart).Seconds()) + storeFetchStart = time.Now() + blockQueriers, err := w.store.GetBlockQueriersForBlockRefs(taskCtx, tasks[0].Tenant, blockRefs) + w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockQueriersForBlockRefs").Observe(time.Since(storeFetchStart).Seconds()) if err != nil { for _, t := range tasks { t.ErrCh <- err @@ -200,6 +208,7 @@ func (w *worker) running(ctx context.Context) error { continue } + it := newTaskMergeIterator(day, tasks...) hasNext := it.Next() for i, blockQuerier := range blockQueriers { requests = requests[:0] diff --git a/pkg/storage/stores/shipper/bloomshipper/client.go b/pkg/storage/stores/shipper/bloomshipper/client.go index 5709bf8866f21..369c6ae939554 100644 --- a/pkg/storage/stores/shipper/bloomshipper/client.go +++ b/pkg/storage/stores/shipper/bloomshipper/client.go @@ -11,9 +11,8 @@ import ( "strings" "time" - "github.com/prometheus/common/model" - "github.com/grafana/dskit/concurrency" + "github.com/prometheus/common/model" "github.com/grafana/loki/pkg/storage" "github.com/grafana/loki/pkg/storage/chunk/client" @@ -29,6 +28,14 @@ const ( fileNamePartDelimiter = "-" ) +type BoundsCheck uint8 + +const ( + Before BoundsCheck = iota + Overlap + After +) + type Ref struct { TenantID string TableName string @@ -37,6 +44,16 @@ type Ref struct { Checksum uint32 } +// Cmp returns the fingerprint's position relative to the bounds +func (b Ref) Cmp(fp uint64) BoundsCheck { + if fp < b.MinFingerprint { + return Before + } else if fp > b.MaxFingerprint { + return After + } + return Overlap +} + type BlockRef struct { Ref IndexPath string diff --git a/pkg/storage/stores/shipper/bloomshipper/shipper.go b/pkg/storage/stores/shipper/bloomshipper/shipper.go index 4fdb3e4a69f5d..c04cad433308a 100644 --- a/pkg/storage/stores/shipper/bloomshipper/shipper.go +++ b/pkg/storage/stores/shipper/bloomshipper/shipper.go @@ -39,30 +39,30 @@ func NewShipper(client Client, config config.Config, limits Limits, logger log.L }, nil } -func (s *Shipper) ForEachBlock( - ctx context.Context, - tenantID string, - from, through time.Time, - fingerprints []uint64, - callback ForEachBlockCallback) error { +func (s *Shipper) GetBlockRefs(ctx context.Context, tenantID string, from, through time.Time) ([]BlockRef, error) { + level.Debug(s.logger).Log("msg", "GetBlockRefs", "tenant", tenantID, "from", from, "through", through) - level.Debug(s.logger).Log("msg", "ForEachBlock", "tenant", tenantID, "from", from, "through", through, "fingerprints", len(fingerprints)) - - blockRefs, err := s.getActiveBlockRefs(ctx, tenantID, from.UnixNano(), through.UnixNano(), fingerprints) + blockRefs, err := s.getActiveBlockRefs(ctx, tenantID, from.UnixNano(), through.UnixNano(), nil) if err != nil { - return fmt.Errorf("error fetching active block references : %w", err) + return nil, fmt.Errorf("error fetching active block references : %w", err) } + return blockRefs, nil +} +func (s *Shipper) Fetch(ctx context.Context, tenantID string, blocks []BlockRef, callback ForEachBlockCallback) error { cancelContext, cancelFunc := context.WithCancel(ctx) defer cancelFunc() - blocksChannel, errorsChannel := s.blockDownloader.downloadBlocks(cancelContext, tenantID, blockRefs) + blocksChannel, errorsChannel := s.blockDownloader.downloadBlocks(cancelContext, tenantID, blocks) + for { select { + case <-ctx.Done(): + return fmt.Errorf("failed to fetch blocks: %w", ctx.Err()) case result, ok := <-blocksChannel: if !ok { return nil } - err = callback(result.BlockQuerier, result.MinFingerprint, result.MaxFingerprint) + err := callback(result.BlockQuerier, result.MinFingerprint, result.MaxFingerprint) if err != nil { return fmt.Errorf("error running callback function for block %s err: %w", result.BlockPath, err) } @@ -74,6 +74,17 @@ func (s *Shipper) ForEachBlock( } } +func (s *Shipper) ForEachBlock(ctx context.Context, tenantID string, from, through time.Time, fingerprints []uint64, callback ForEachBlockCallback) error { + level.Debug(s.logger).Log("msg", "ForEachBlock", "tenant", tenantID, "from", from, "through", through, "fingerprints", len(fingerprints)) + + blockRefs, err := s.getActiveBlockRefs(ctx, tenantID, from.UnixNano(), through.UnixNano(), fingerprints) + if err != nil { + return fmt.Errorf("error fetching active block references : %w", err) + } + + return s.Fetch(ctx, tenantID, blockRefs, callback) +} + func (s *Shipper) Stop() { s.client.Stop() s.blockDownloader.stop() @@ -89,12 +100,7 @@ func getFirstLast[T any](s []T) (T, T) { return s[0], s[len(s)-1] } -func (s *Shipper) getActiveBlockRefs( - ctx context.Context, - tenantID string, - from, through int64, - fingerprints []uint64) ([]BlockRef, error) { - +func (s *Shipper) getActiveBlockRefs(ctx context.Context, tenantID string, from, through int64, fingerprints []uint64) ([]BlockRef, error) { minFingerprint, maxFingerprint := getFirstLast(fingerprints) metas, err := s.client.GetMetas(ctx, MetaSearchParams{ TenantID: tenantID, diff --git a/pkg/storage/stores/shipper/bloomshipper/store.go b/pkg/storage/stores/shipper/bloomshipper/store.go index 7d3d227994637..70c61ba0add8e 100644 --- a/pkg/storage/stores/shipper/bloomshipper/store.go +++ b/pkg/storage/stores/shipper/bloomshipper/store.go @@ -13,7 +13,9 @@ import ( type ForEachBlockCallback func(bq *v1.BlockQuerier, minFp, maxFp uint64) error type ReadShipper interface { + GetBlockRefs(ctx context.Context, tenant string, from, through time.Time) ([]BlockRef, error) ForEachBlock(ctx context.Context, tenant string, from, through time.Time, fingerprints []uint64, callback ForEachBlockCallback) error + Fetch(ctx context.Context, tenant string, blocks []BlockRef, callback ForEachBlockCallback) error } type Interface interface { @@ -27,7 +29,9 @@ type BlockQuerierWithFingerprintRange struct { } type Store interface { + GetBlockRefs(ctx context.Context, tenant string, from, through time.Time) ([]BlockRef, error) GetBlockQueriers(ctx context.Context, tenant string, from, through time.Time, fingerprints []uint64) ([]BlockQuerierWithFingerprintRange, error) + GetBlockQueriersForBlockRefs(ctx context.Context, tenant string, blocks []BlockRef) ([]BlockQuerierWithFingerprintRange, error) Stop() } @@ -45,6 +49,29 @@ func (bs *BloomStore) Stop() { bs.shipper.Stop() } +// GetBlockRefs implements Store +func (bs *BloomStore) GetBlockRefs(ctx context.Context, tenant string, from, through time.Time) ([]BlockRef, error) { + return bs.shipper.GetBlockRefs(ctx, tenant, from, through) +} + +// GetQueriersForBlocks implements Store +func (bs *BloomStore) GetBlockQueriersForBlockRefs(ctx context.Context, tenant string, blocks []BlockRef) ([]BlockQuerierWithFingerprintRange, error) { + bqs := make([]BlockQuerierWithFingerprintRange, 0, 32) + err := bs.shipper.Fetch(ctx, tenant, blocks, func(bq *v1.BlockQuerier, minFp uint64, maxFp uint64) error { + bqs = append(bqs, BlockQuerierWithFingerprintRange{ + BlockQuerier: bq, + MinFp: model.Fingerprint(minFp), + MaxFp: model.Fingerprint(maxFp), + }) + return nil + }) + sort.Slice(bqs, func(i, j int) bool { + return bqs[i].MinFp < bqs[j].MinFp + }) + return bqs, err +} + +// BlockQueriers implements Store func (bs *BloomStore) GetBlockQueriers(ctx context.Context, tenant string, from, through time.Time, fingerprints []uint64) ([]BlockQuerierWithFingerprintRange, error) { bqs := make([]BlockQuerierWithFingerprintRange, 0, 32) err := bs.shipper.ForEachBlock(ctx, tenant, from, through, fingerprints, func(bq *v1.BlockQuerier, minFp uint64, maxFp uint64) error { diff --git a/pkg/storage/stores/shipper/bloomshipper/util.go b/pkg/storage/stores/shipper/bloomshipper/util.go new file mode 100644 index 0000000000000..8768964b06fef --- /dev/null +++ b/pkg/storage/stores/shipper/bloomshipper/util.go @@ -0,0 +1,46 @@ +package bloomshipper + +import ( + "sort" + + "github.com/grafana/loki/pkg/logproto" +) + +// BoundedRefs is a set of requests that are clamped to a specific range +type BoundedRefs struct { + BlockRef BlockRef + Refs [][]*logproto.GroupedChunkRefs +} + +// reqs models a set of requests covering many fingerprints. +// consumers models a set of blocks covering different fingerprint ranges +func PartitionFingerprintRange(reqs [][]*logproto.GroupedChunkRefs, blocks []BlockRef) (res []BoundedRefs) { + for _, block := range blocks { + bounded := BoundedRefs{ + BlockRef: block, + } + + for _, req := range reqs { + min := sort.Search(len(req), func(i int) bool { + return block.Cmp(req[i].Fingerprint) > Before + }) + + max := sort.Search(len(req), func(i int) bool { + return block.Cmp(req[i].Fingerprint) == After + }) + + // All fingerprints fall outside of the consumer's range + if min == len(req) || max == 0 { + continue + } + + bounded.Refs = append(bounded.Refs, req[min:max]) + } + + if len(bounded.Refs) > 0 { + res = append(res, bounded) + } + + } + return res +} diff --git a/pkg/storage/stores/shipper/bloomshipper/util_test.go b/pkg/storage/stores/shipper/bloomshipper/util_test.go new file mode 100644 index 0000000000000..dde3dc85ff54c --- /dev/null +++ b/pkg/storage/stores/shipper/bloomshipper/util_test.go @@ -0,0 +1,53 @@ +package bloomshipper + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/logproto" +) + +func mkBlockRef(minFp, maxFp uint64) BlockRef { + return BlockRef{ + Ref: Ref{ + MinFingerprint: minFp, + MaxFingerprint: maxFp, + }, + } +} + +func TestPartitionFingerprintRange(t *testing.T) { + seriesPerBound := 100 + bounds := []BlockRef{ + mkBlockRef(0, 99), + mkBlockRef(100, 199), + mkBlockRef(200, 299), + mkBlockRef(300, 399), // one out of bounds block + } + + nReqs := 4 + nSeries := 300 + reqs := make([][]*logproto.GroupedChunkRefs, nReqs) + for i := 0; i < nSeries; i++ { + reqs[i%4] = append(reqs[i%nReqs], &logproto.GroupedChunkRefs{Fingerprint: uint64(i)}) + } + + results := PartitionFingerprintRange(reqs, bounds) + require.Equal(t, 3, len(results)) // ensure we only return bounds in range + for _, res := range results { + // ensure we have the right number of requests per bound + for i := 0; i < nReqs; i++ { + require.Equal(t, seriesPerBound/nReqs, len(res.Refs[i])) + } + } + + // ensure bound membership + for i := 0; i < nSeries; i++ { + require.Equal(t, + &logproto.GroupedChunkRefs{Fingerprint: uint64(i)}, + results[i/seriesPerBound].Refs[i%nReqs][i%seriesPerBound/nReqs], + ) + } + +} From 6112b7f81792d3053c3b0f2cdbaff898a0bc72b3 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Wed, 22 Nov 2023 10:57:45 +0100 Subject: [PATCH 34/44] Implement single-pass iterator when querying bloom filters Signed-off-by: Christian Haudum --- pkg/bloomgateway/multiplexing.go | 16 +++++ pkg/bloomgateway/util.go | 39 ++++++++++++ pkg/bloomgateway/util_test.go | 49 +++++++++++++++ pkg/bloomgateway/worker.go | 59 +++++++------------ pkg/storage/bloom/v1/fuse.go | 42 ------------- pkg/storage/bloom/v1/fuse_test.go | 33 ----------- .../stores/shipper/bloomshipper/util.go | 46 --------------- .../stores/shipper/bloomshipper/util_test.go | 53 ----------------- 8 files changed, 125 insertions(+), 212 deletions(-) delete mode 100644 pkg/storage/stores/shipper/bloomshipper/util.go delete mode 100644 pkg/storage/stores/shipper/bloomshipper/util_test.go diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index 327f481404737..7b057ec8551b9 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -49,6 +49,22 @@ func NewTask(tenantID string, req *logproto.FilterChunkRefRequest) (Task, chan v return task, resCh, errCh, nil } +// Copy returns a copy of the existing task but with a new slice of chunks +func (t Task) Copy(refs []*logproto.GroupedChunkRefs) Task { + return Task{ + ID: t.ID, + Tenant: t.Tenant, + Request: &logproto.FilterChunkRefRequest{ + From: t.Request.From, + Through: t.Request.Through, + Filters: t.Request.Filters, + Refs: refs, + }, + ErrCh: t.ErrCh, + ResCh: t.ResCh, + } +} + // Bounds returns the day boundaries of the task func (t Task) Bounds() (time.Time, time.Time) { return getDayTime(t.Request.From), getDayTime(t.Request.Through) diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index 9254067e1b52f..43d20876ce4e9 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -1,12 +1,14 @@ package bloomgateway import ( + "sort" "time" "github.com/prometheus/common/model" "github.com/grafana/loki/pkg/logproto" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" ) type IndexedValue[T any] struct { @@ -122,3 +124,40 @@ func getFirstLast[T any](s []T) (T, T) { } return s[0], s[len(s)-1] } + +type boundedTasks struct { + blockRef bloomshipper.BlockRef + tasks []Task +} + +func partitionFingerprintRange(tasks []Task, blocks []bloomshipper.BlockRef) (result []boundedTasks) { + for _, block := range blocks { + bounded := boundedTasks{ + blockRef: block, + } + + for _, task := range tasks { + refs := task.Request.Refs + min := sort.Search(len(refs), func(i int) bool { + return block.Cmp(refs[i].Fingerprint) > bloomshipper.Before + }) + + max := sort.Search(len(refs), func(i int) bool { + return block.Cmp(refs[i].Fingerprint) == bloomshipper.After + }) + + // All fingerprints fall outside of the consumer's range + if min == len(refs) || max == 0 { + continue + } + + bounded.tasks = append(bounded.tasks, task.Copy(refs[min:max])) + } + + if len(bounded.tasks) > 0 { + result = append(result, bounded) + } + + } + return result +} diff --git a/pkg/bloomgateway/util_test.go b/pkg/bloomgateway/util_test.go index e82a3c4ea460a..1424c56a19153 100644 --- a/pkg/bloomgateway/util_test.go +++ b/pkg/bloomgateway/util_test.go @@ -4,6 +4,9 @@ import ( "testing" "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" ) func TestSliceIterWithIndex(t *testing.T) { @@ -33,3 +36,49 @@ func TestSliceIterWithIndex(t *testing.T) { require.Equal(t, 123, p.idx) }) } + +func mkBlockRef(minFp, maxFp uint64) bloomshipper.BlockRef { + return bloomshipper.BlockRef{ + Ref: bloomshipper.Ref{ + MinFingerprint: minFp, + MaxFingerprint: maxFp, + }, + } +} + +func TestPartitionFingerprintRange(t *testing.T) { + seriesPerBound := 100 + bounds := []bloomshipper.BlockRef{ + mkBlockRef(0, 99), + mkBlockRef(100, 199), + mkBlockRef(200, 299), + mkBlockRef(300, 399), // one out of bounds block + } + + nTasks := 4 + nSeries := 300 + tasks := make([]Task, nTasks) + for i := 0; i < nSeries; i++ { + if tasks[i%4].Request == nil { + tasks[i%4].Request = &logproto.FilterChunkRefRequest{} + } + tasks[i%4].Request.Refs = append(tasks[i%nTasks].Request.Refs, &logproto.GroupedChunkRefs{Fingerprint: uint64(i)}) + } + + results := partitionFingerprintRange(tasks, bounds) + require.Equal(t, 3, len(results)) // ensure we only return bounds in range + for _, res := range results { + // ensure we have the right number of tasks per bound + for i := 0; i < nTasks; i++ { + require.Equal(t, seriesPerBound/nTasks, len(res.tasks[i].Request.Refs)) + } + } + + // ensure bound membership + for i := 0; i < nSeries; i++ { + require.Equal(t, + &logproto.GroupedChunkRefs{Fingerprint: uint64(i)}, + results[i/seriesPerBound].tasks[i%nTasks].Request.Refs[i%seriesPerBound/nTasks], + ) + } +} diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index ae755433487cc..0d8be30072583 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -12,7 +12,6 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/model" - "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/queue" v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" @@ -154,11 +153,6 @@ func (w *worker) running(ctx context.Context) error { logger := log.With(w.logger, "day", day) level.Debug(logger).Log("msg", "process tasks", "tasks", len(tasks)) - var fp [][]*logproto.GroupedChunkRefs - for i := range tasks { - fp = append(fp, tasks[i].Request.Refs) - } - storeFetchStart := time.Now() blockRefs, err := w.store.GetBlockRefs(taskCtx, tasks[0].Tenant, day, day.Add(Day).Add(-1*time.Nanosecond)) w.metrics.storeAccessLatency.WithLabelValues(w.id, "GetBlockRefs").Observe(time.Since(storeFetchStart).Seconds()) @@ -169,11 +163,27 @@ func (w *worker) running(ctx context.Context) error { // continue with tasks of next day continue } + // No blocks found. + // Since there are no blocks for the given tasks, we need to return the + // unfiltered list of chunk refs. + if len(blockRefs) == 0 { + level.Warn(logger).Log("msg", "no blocks found") + for _, t := range tasks { + for _, ref := range t.Request.Refs { + t.ResCh <- v1.Output{ + Fp: model.Fingerprint(ref.Fingerprint), + Removals: nil, + } + } + } + // continue with tasks of next day + continue + } - boundedRefs := bloomshipper.PartitionFingerprintRange(fp, blockRefs) + boundedRefs := partitionFingerprintRange(tasks, blockRefs) blockRefs = blockRefs[0:] for _, b := range boundedRefs { - blockRefs = append(blockRefs, b.BlockRef) + blockRefs = append(blockRefs, b.blockRef) } // GetBlockQueriersForBlockRefs() waits until all blocks are downloaded and available for querying. @@ -191,30 +201,11 @@ func (w *worker) running(ctx context.Context) error { continue } - // No blocks found. - // Since there are no blocks for the given tasks, we need to return the - // unfiltered list of chunk refs. - if len(blockQueriers) == 0 { - level.Warn(logger).Log("msg", "no blocks found") - for _, t := range tasks { - for _, ref := range t.Request.Refs { - t.ResCh <- v1.Output{ - Fp: model.Fingerprint(ref.Fingerprint), - Removals: nil, - } - } - } - // continue with tasks of next day - continue - } - - it := newTaskMergeIterator(day, tasks...) - hasNext := it.Next() for i, blockQuerier := range blockQueriers { + it := newTaskMergeIterator(day, boundedRefs[i].tasks...) requests = requests[:0] - for hasNext && it.At().Fp <= blockQuerier.MaxFp { + for it.Next() { requests = append(requests, it.At().Request) - hasNext = it.Next() } level.Debug(logger).Log("msg", "processing block", "block", i+1, "of", len(blockQueriers), "requests", len(requests)) // no fingerprints in the fingerprint range of the current block @@ -232,16 +223,8 @@ func (w *worker) running(ctx context.Context) error { continue } } - - for hasNext { - level.Warn(logger).Log("msg", "processing remaining fingerprint", "fp", it.At().Fp) - it.At().Response <- v1.Output{ - Fp: it.At().Fp, - Removals: nil, - } - hasNext = it.Next() - } } + } } } diff --git a/pkg/storage/bloom/v1/fuse.go b/pkg/storage/bloom/v1/fuse.go index 9e506cbda900b..c397a7a55fd57 100644 --- a/pkg/storage/bloom/v1/fuse.go +++ b/pkg/storage/bloom/v1/fuse.go @@ -1,8 +1,6 @@ package v1 import ( - "sort" - "github.com/efficientgo/core/errors" "github.com/prometheus/common/model" ) @@ -145,43 +143,3 @@ func (fq *FusedQuerier) Run() error { return nil } - -// boundedRequests is a set of requests that are clamped to a specific range -type boundedRequests struct { - bounds FingerprintBounds - reqs [][]model.Fingerprint -} - -// reqs models a set of requests covering many fingerprints. -// consumers models a set of blocks covering different fingerprint ranges -func partitionFingerprintRange(reqs [][]model.Fingerprint, blocks []FingerprintBounds) (res []boundedRequests) { - for _, block := range blocks { - bounded := boundedRequests{ - bounds: block, - } - - for _, req := range reqs { - min := sort.Search(len(req), func(i int) bool { - return block.Cmp(req[i]) > Before - }) - - max := sort.Search(len(req), func(i int) bool { - return block.Cmp(req[i]) == After - }) - - // All fingerprints fall outside of the consumer's range - if min == len(req) || max == 0 { - continue - } - - bounded.reqs = append(bounded.reqs, req[min:max]) - } - - if len(bounded.reqs) > 0 { - res = append(res, bounded) - } - - } - - return res -} diff --git a/pkg/storage/bloom/v1/fuse_test.go b/pkg/storage/bloom/v1/fuse_test.go index 50147c7cd3c16..e784ac0168201 100644 --- a/pkg/storage/bloom/v1/fuse_test.go +++ b/pkg/storage/bloom/v1/fuse_test.go @@ -7,44 +7,11 @@ import ( "testing" "github.com/grafana/dskit/concurrency" - "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/grafana/loki/pkg/chunkenc" ) -func TestPartitionFingerprintRange(t *testing.T) { - seriesPerBound := 100 - bounds := []FingerprintBounds{ - {0, 99}, - {100, 199}, - {200, 299}, - {300, 399}, // one out of bounds block - } - - nReqs := 4 - nSeries := 300 - reqs := make([][]model.Fingerprint, nReqs) - for i := 0; i < nSeries; i++ { - reqs[i%4] = append(reqs[i%nReqs], model.Fingerprint(i)) - } - - results := partitionFingerprintRange(reqs, bounds) - require.Equal(t, 3, len(results)) // ensure we only return bounds in range - for _, res := range results { - // ensure we have the right number of requests per bound - for i := 0; i < nReqs; i++ { - require.Equal(t, seriesPerBound/nReqs, len(res.reqs[i])) - } - } - - // ensure bound membership - for i := 0; i < nSeries; i++ { - require.Equal(t, model.Fingerprint(i), results[i/seriesPerBound].reqs[i%nReqs][i%seriesPerBound/nReqs]) - } - -} - func TestFusedQuerier(t *testing.T) { // references for linking in memory reader+writer indexBuf := bytes.NewBuffer(nil) diff --git a/pkg/storage/stores/shipper/bloomshipper/util.go b/pkg/storage/stores/shipper/bloomshipper/util.go deleted file mode 100644 index 8768964b06fef..0000000000000 --- a/pkg/storage/stores/shipper/bloomshipper/util.go +++ /dev/null @@ -1,46 +0,0 @@ -package bloomshipper - -import ( - "sort" - - "github.com/grafana/loki/pkg/logproto" -) - -// BoundedRefs is a set of requests that are clamped to a specific range -type BoundedRefs struct { - BlockRef BlockRef - Refs [][]*logproto.GroupedChunkRefs -} - -// reqs models a set of requests covering many fingerprints. -// consumers models a set of blocks covering different fingerprint ranges -func PartitionFingerprintRange(reqs [][]*logproto.GroupedChunkRefs, blocks []BlockRef) (res []BoundedRefs) { - for _, block := range blocks { - bounded := BoundedRefs{ - BlockRef: block, - } - - for _, req := range reqs { - min := sort.Search(len(req), func(i int) bool { - return block.Cmp(req[i].Fingerprint) > Before - }) - - max := sort.Search(len(req), func(i int) bool { - return block.Cmp(req[i].Fingerprint) == After - }) - - // All fingerprints fall outside of the consumer's range - if min == len(req) || max == 0 { - continue - } - - bounded.Refs = append(bounded.Refs, req[min:max]) - } - - if len(bounded.Refs) > 0 { - res = append(res, bounded) - } - - } - return res -} diff --git a/pkg/storage/stores/shipper/bloomshipper/util_test.go b/pkg/storage/stores/shipper/bloomshipper/util_test.go deleted file mode 100644 index dde3dc85ff54c..0000000000000 --- a/pkg/storage/stores/shipper/bloomshipper/util_test.go +++ /dev/null @@ -1,53 +0,0 @@ -package bloomshipper - -import ( - "testing" - - "github.com/stretchr/testify/require" - - "github.com/grafana/loki/pkg/logproto" -) - -func mkBlockRef(minFp, maxFp uint64) BlockRef { - return BlockRef{ - Ref: Ref{ - MinFingerprint: minFp, - MaxFingerprint: maxFp, - }, - } -} - -func TestPartitionFingerprintRange(t *testing.T) { - seriesPerBound := 100 - bounds := []BlockRef{ - mkBlockRef(0, 99), - mkBlockRef(100, 199), - mkBlockRef(200, 299), - mkBlockRef(300, 399), // one out of bounds block - } - - nReqs := 4 - nSeries := 300 - reqs := make([][]*logproto.GroupedChunkRefs, nReqs) - for i := 0; i < nSeries; i++ { - reqs[i%4] = append(reqs[i%nReqs], &logproto.GroupedChunkRefs{Fingerprint: uint64(i)}) - } - - results := PartitionFingerprintRange(reqs, bounds) - require.Equal(t, 3, len(results)) // ensure we only return bounds in range - for _, res := range results { - // ensure we have the right number of requests per bound - for i := 0; i < nReqs; i++ { - require.Equal(t, seriesPerBound/nReqs, len(res.Refs[i])) - } - } - - // ensure bound membership - for i := 0; i < nSeries; i++ { - require.Equal(t, - &logproto.GroupedChunkRefs{Fingerprint: uint64(i)}, - results[i/seriesPerBound].Refs[i%nReqs][i%seriesPerBound/nReqs], - ) - } - -} From 7340e5d9421b3b30645a46cbf3747eb33676c9be Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 23 Nov 2023 08:06:18 +0100 Subject: [PATCH 35/44] Avoid unnecessary chunk removal function call Signed-off-by: Christian Haudum --- pkg/bloomgateway/bloomgateway.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index 20b15cf7b0f59..6448e45324f32 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -318,6 +318,9 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk // wait for all parts of the full response if len(responses) == requestCount { for _, o := range responses { + if res.Removals.Len() == 0 { + continue + } // we must not remove items from req.Refs as long as the worker may iterater over them g.removeNotMatchingChunks(req, o) } From 7dd6f563e8a5b69df2427923976f780d311a029b Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 23 Nov 2023 08:51:27 +0100 Subject: [PATCH 36/44] Add TODO for search string conversion Signed-off-by: Christian Haudum --- pkg/bloomgateway/util.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index 43d20876ce4e9..f3f4454858998 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -87,6 +87,10 @@ func getFromThrough(refs []*logproto.ShortRef) (model.Time, model.Time) { return refs[0].From, refs[len(refs)-1].Through } +// convertToSearches converts a list of line filter expressions to a list of +// byte slices that can be used with the bloom filters. +// TODO(chaudum): Currently this function only supports equality matchers, +// but we eventually also want to support regex matchers. func convertToSearches(filters []*logproto.LineFilterExpression) [][]byte { searches := make([][]byte, 0, len(filters)) for _, f := range filters { From 14079c9c01bdbaf5e1f33231af0fa38b43fa0a85 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 23 Nov 2023 08:52:38 +0100 Subject: [PATCH 37/44] Fix clearing of slice buffer in the worker Signed-off-by: Christian Haudum --- pkg/bloomgateway/worker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index 0d8be30072583..f5a54dd188c72 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -181,7 +181,7 @@ func (w *worker) running(ctx context.Context) error { } boundedRefs := partitionFingerprintRange(tasks, blockRefs) - blockRefs = blockRefs[0:] + blockRefs = blockRefs[:0] for _, b := range boundedRefs { blockRefs = append(blockRefs, b.blockRef) } From d010e9937001e2e851974f841e996e3b4d493c84 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 23 Nov 2023 09:23:16 +0100 Subject: [PATCH 38/44] Avoid collecting items from interator into slice Signed-off-by: Christian Haudum --- pkg/bloomgateway/multiplexing.go | 12 ++---- pkg/bloomgateway/multiplexing_test.go | 53 --------------------------- pkg/bloomgateway/worker.go | 15 +------- 3 files changed, 5 insertions(+), 75 deletions(-) diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index 7b057ec8551b9..17063a4903d23 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -170,14 +170,14 @@ type taskMergeIterator struct { err error } -func newTaskMergeIterator(day time.Time, tasks ...Task) *taskMergeIterator { +func newTaskMergeIterator(day time.Time, tasks ...Task) v1.PeekingIterator[v1.Request] { it := &taskMergeIterator{ tasks: tasks, curr: FilterRequest{}, day: day, } it.init() - return it + return v1.NewPeekingIter[v1.Request](it) } func (it *taskMergeIterator) init() { @@ -195,10 +195,6 @@ func (it *taskMergeIterator) init() { it.err = nil } -func (it *taskMergeIterator) Reset() { - it.init() -} - func (it *taskMergeIterator) Next() bool { ok := it.heap.Next() if !ok { @@ -216,8 +212,8 @@ func (it *taskMergeIterator) Next() bool { return true } -func (it *taskMergeIterator) At() FilterRequest { - return it.curr +func (it *taskMergeIterator) At() v1.Request { + return it.curr.Request } func (it *taskMergeIterator) Err() error { diff --git a/pkg/bloomgateway/multiplexing_test.go b/pkg/bloomgateway/multiplexing_test.go index af740715c22fb..93e5e5686fdaf 100644 --- a/pkg/bloomgateway/multiplexing_test.go +++ b/pkg/bloomgateway/multiplexing_test.go @@ -57,7 +57,6 @@ func TestTaskMergeIterator(t *testing.T) { require.NoError(t, err) it := newTaskMergeIterator(day, t1, t2, t3) - require.NotNil(t, it.heap) // nothing to iterate over require.False(t, it.Next()) }) @@ -103,7 +102,6 @@ func TestTaskMergeIterator(t *testing.T) { require.NoError(t, err) it := newTaskMergeIterator(day, t1, t2, t3) - require.NotNil(t, it.heap) // first item require.True(t, it.Next()) @@ -132,57 +130,6 @@ func TestTaskMergeIterator(t *testing.T) { // no more items require.False(t, it.Next()) }) - - t.Run("reset of iterator allows for multiple iterations", func(t *testing.T) { - r1 := &logproto.FilterChunkRefRequest{ - From: ts.Add(-1 * time.Hour), - Through: ts, - Refs: []*logproto.GroupedChunkRefs{ - {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 100}, - }}, - }, - } - t1, _, _, err := NewTask(tenant, r1) - require.NoError(t, err) - - r2 := &logproto.FilterChunkRefRequest{ - From: ts.Add(-1 * time.Hour), - Through: ts, - Refs: []*logproto.GroupedChunkRefs{ - {Fingerprint: 100, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 200}, - }}, - }, - } - t2, _, _, err := NewTask(tenant, r2) - require.NoError(t, err) - - r3 := &logproto.FilterChunkRefRequest{ - From: ts.Add(-1 * time.Hour), - Through: ts, - Refs: []*logproto.GroupedChunkRefs{ - {Fingerprint: 200, Tenant: tenant, Refs: []*logproto.ShortRef{ - {From: ts.Add(-1 * time.Hour), Through: ts, Checksum: 300}, - }}, - }, - } - t3, _, _, err := NewTask(tenant, r3) - require.NoError(t, err) - - it := newTaskMergeIterator(day, t1, t2, t3) - require.NotNil(t, it.heap) - - checksums := []uint32{100, 200, 300} - for i := 0; i < 3; i++ { - count := 0 - for it.Next() { - require.Equal(t, checksums[count], it.At().Chks[0].Checksum) - count++ - } - it.Reset() - } - }) } func TestChunkIterForDay(t *testing.T) { diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index f5a54dd188c72..75895202cb048 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -99,8 +99,6 @@ func (w *worker) starting(_ context.Context) error { func (w *worker) running(ctx context.Context) error { idx := queue.StartIndexWithLocalQueue - requests := make([]v1.Request, 0, 128) - for { select { @@ -203,18 +201,7 @@ func (w *worker) running(ctx context.Context) error { for i, blockQuerier := range blockQueriers { it := newTaskMergeIterator(day, boundedRefs[i].tasks...) - requests = requests[:0] - for it.Next() { - requests = append(requests, it.At().Request) - } - level.Debug(logger).Log("msg", "processing block", "block", i+1, "of", len(blockQueriers), "requests", len(requests)) - // no fingerprints in the fingerprint range of the current block - if len(requests) == 0 { - continue - } - fq := blockQuerier.Fuse([]v1.PeekingIterator[v1.Request]{ - v1.NewPeekingIter[v1.Request](v1.NewSliceIter[v1.Request](requests)), - }) + fq := blockQuerier.Fuse([]v1.PeekingIterator[v1.Request]{it}) err := fq.Run() if err != nil { for _, t := range tasks { From 4900e249a3597d9e745fd18992a30421b95d81a0 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 23 Nov 2023 09:24:39 +0100 Subject: [PATCH 39/44] Revert accidental renaming of variable Signed-off-by: Christian Haudum --- pkg/storage/bloom/v1/merge.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/storage/bloom/v1/merge.go b/pkg/storage/bloom/v1/merge.go index aaa4e568bc31e..981582f1234cd 100644 --- a/pkg/storage/bloom/v1/merge.go +++ b/pkg/storage/bloom/v1/merge.go @@ -76,15 +76,15 @@ func (mbq *HeapIterator[T]) pop() (T, bool) { return mbq.zero, false } - curr := mbq.itrs[0] - if ok := curr.Next(); !ok { + cur := mbq.itrs[0] + if ok := cur.Next(); !ok { mbq.remove(0) continue } - result := curr.At() + result := cur.At() - _, ok := curr.Peek() + _, ok := cur.Peek() if !ok { // that was the end of the iterator. remove it from the heap mbq.remove(0) From 65bfcc988db58493ff48f418c9e638031866a648 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 23 Nov 2023 09:27:39 +0100 Subject: [PATCH 40/44] Remove duplicate BoundsCheck declaration Signed-off-by: Christian Haudum --- pkg/bloomgateway/util.go | 4 ++-- .../stores/shipper/bloomshipper/client.go | 16 ++++------------ 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/pkg/bloomgateway/util.go b/pkg/bloomgateway/util.go index f3f4454858998..87187e071b82d 100644 --- a/pkg/bloomgateway/util.go +++ b/pkg/bloomgateway/util.go @@ -143,11 +143,11 @@ func partitionFingerprintRange(tasks []Task, blocks []bloomshipper.BlockRef) (re for _, task := range tasks { refs := task.Request.Refs min := sort.Search(len(refs), func(i int) bool { - return block.Cmp(refs[i].Fingerprint) > bloomshipper.Before + return block.Cmp(refs[i].Fingerprint) > v1.Before }) max := sort.Search(len(refs), func(i int) bool { - return block.Cmp(refs[i].Fingerprint) == bloomshipper.After + return block.Cmp(refs[i].Fingerprint) == v1.After }) // All fingerprints fall outside of the consumer's range diff --git a/pkg/storage/stores/shipper/bloomshipper/client.go b/pkg/storage/stores/shipper/bloomshipper/client.go index 369c6ae939554..2dbe1300aa6b9 100644 --- a/pkg/storage/stores/shipper/bloomshipper/client.go +++ b/pkg/storage/stores/shipper/bloomshipper/client.go @@ -28,14 +28,6 @@ const ( fileNamePartDelimiter = "-" ) -type BoundsCheck uint8 - -const ( - Before BoundsCheck = iota - Overlap - After -) - type Ref struct { TenantID string TableName string @@ -45,13 +37,13 @@ type Ref struct { } // Cmp returns the fingerprint's position relative to the bounds -func (b Ref) Cmp(fp uint64) BoundsCheck { +func (b Ref) Cmp(fp uint64) v1.BoundsCheck { if fp < b.MinFingerprint { - return Before + return v1.Before } else if fp > b.MaxFingerprint { - return After + return v1.After } - return Overlap + return v1.Overlap } type BlockRef struct { From 579250cc50d8e6147a73444263276ff84f67722f Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 23 Nov 2023 09:29:01 +0100 Subject: [PATCH 41/44] Rename BuffePool to SlicePool Signed-off-by: Christian Haudum --- pkg/queue/queue.go | 4 ++-- pkg/queue/util.go | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/queue/queue.go b/pkg/queue/queue.go index b91741d1faef1..e2c131ae30852 100644 --- a/pkg/queue/queue.go +++ b/pkg/queue/queue.go @@ -59,7 +59,7 @@ type RequestQueue struct { stopped bool metrics *Metrics - pool *BufferPool[Request] + pool *SlicePool[Request] } func NewRequestQueue(maxOutstandingPerTenant int, forgetDelay time.Duration, metrics *Metrics) *RequestQueue { @@ -67,7 +67,7 @@ func NewRequestQueue(maxOutstandingPerTenant int, forgetDelay time.Duration, met queues: newTenantQueues(maxOutstandingPerTenant, forgetDelay), connectedConsumers: atomic.NewInt32(0), metrics: metrics, - pool: NewBufferPool[Request](1<<6, 1<<10, 2), // Buckets are [64, 128, 256, 512, 1024]. + pool: NewSlicePool[Request](1<<6, 1<<10, 2), // Buckets are [64, 128, 256, 512, 1024]. } q.cond = contextCond{Cond: sync.NewCond(&q.mtx)} diff --git a/pkg/queue/util.go b/pkg/queue/util.go index 9d8312a73f072..9b7fced6dfbf7 100644 --- a/pkg/queue/util.go +++ b/pkg/queue/util.go @@ -2,24 +2,24 @@ package queue import "github.com/prometheus/prometheus/util/pool" -// BufferPool uses a bucket pool and wraps the Get() and Put() functions for +// SlicePool uses a bucket pool and wraps the Get() and Put() functions for // simpler access. -type BufferPool[T any] struct { +type SlicePool[T any] struct { p *pool.Pool } -func NewBufferPool[T any](minSize, maxSize int, factor float64) *BufferPool[T] { - return &BufferPool[T]{ +func NewSlicePool[T any](minSize, maxSize int, factor float64) *SlicePool[T] { + return &SlicePool[T]{ p: pool.New(minSize, maxSize, factor, func(i int) interface{} { return make([]T, 0, i) }), } } -func (p *BufferPool[T]) Get(n int) []T { - return p.p.Get(n).([]T) +func (sp *SlicePool[T]) Get(n int) []T { + return sp.p.Get(n).([]T) } -func (p *BufferPool[T]) Put(buf []T) { - p.p.Put(buf[0:0]) +func (sp *SlicePool[T]) Put(buf []T) { + sp.p.Put(buf[0:0]) } From 733c42c80e8e37a2254cbe4c7f796fd500447602 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 23 Nov 2023 09:43:54 +0100 Subject: [PATCH 42/44] Avoid mutability bug Putting the returned slice of requests back to the pool but also returning them to the caller could lead to a mutability bug. Now the caller of DequeueMany() is responsible for returning the request slice back to the pool of the queue by calling ReleaseRequests(). Signed-off-by: Christian Haudum --- pkg/bloomgateway/worker.go | 5 +++++ pkg/queue/queue.go | 14 ++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index 75895202cb048..5bd56db65fec7 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -121,6 +121,7 @@ func (w *worker) running(ctx context.Context) error { idx = newIdx if len(items) == 0 { + w.queue.ReleaseRequests(items) continue } w.metrics.dequeuedTasks.WithLabelValues(w.id).Add(float64(len(items))) @@ -131,6 +132,7 @@ func (w *worker) running(ctx context.Context) error { task, ok := item.(Task) if !ok { // This really should never happen, because only the bloom gateway itself can enqueue tasks. + w.queue.ReleaseRequests(items) return errors.Errorf("failed to cast dequeued item to Task: %v", item) } level.Debug(w.logger).Log("msg", "dequeued task", "task", task.ID) @@ -212,6 +214,9 @@ func (w *worker) running(ctx context.Context) error { } } + // return dequeued items back to the pool + w.queue.ReleaseRequests(items) + } } } diff --git a/pkg/queue/queue.go b/pkg/queue/queue.go index e2c131ae30852..f0475164bd4d1 100644 --- a/pkg/queue/queue.go +++ b/pkg/queue/queue.go @@ -127,6 +127,16 @@ func (q *RequestQueue) Enqueue(tenant string, path []string, req Request, maxQue } } +// ReleaseRequests returns items back to the slice pool. +// Must only be called in combination with DequeueMany(). +func (q *RequestQueue) ReleaseRequests(items []Request) { + q.pool.Put(items) +} + +// DequeueMany consumes multiple items for a single tenant from the queue. +// It returns maxItems and waits maxWait if no requests for this tenant are enqueued. +// The caller is responsible for returning the dequeued requests back to the +// pool by calling ReleaseRequests(items). func (q *RequestQueue) DequeueMany(ctx context.Context, last QueueIndex, consumerID string, maxItems int, maxWait time.Duration) ([]Request, QueueIndex, error) { // create a context for dequeuing with a max time we want to wait to fullfill the desired maxItems @@ -136,10 +146,6 @@ func (q *RequestQueue) DequeueMany(ctx context.Context, last QueueIndex, consume var idx QueueIndex items := q.pool.Get(maxItems) - defer func() { - q.pool.Put(items) - }() - for { item, newIdx, err := q.Dequeue(dequeueCtx, last, consumerID) if err != nil { From 4f3d7df3a1b73ee675c1d1a663bf8967e40d1252 Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Thu, 23 Nov 2023 09:55:38 +0100 Subject: [PATCH 43/44] Only send error to the affected tasks if fuse operation fails Signed-off-by: Christian Haudum --- pkg/bloomgateway/worker.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/bloomgateway/worker.go b/pkg/bloomgateway/worker.go index 5bd56db65fec7..f39632b1219ff 100644 --- a/pkg/bloomgateway/worker.go +++ b/pkg/bloomgateway/worker.go @@ -206,10 +206,9 @@ func (w *worker) running(ctx context.Context) error { fq := blockQuerier.Fuse([]v1.PeekingIterator[v1.Request]{it}) err := fq.Run() if err != nil { - for _, t := range tasks { + for _, t := range boundedRefs[i].tasks { t.ErrCh <- errors.Wrap(err, "failed to run chunk check") } - continue } } } From 8893eefc500a48118754a4d6b8371f1fd2d5949e Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Fri, 24 Nov 2023 10:30:41 +0100 Subject: [PATCH 44/44] Fix broken import from rebase Signed-off-by: Christian Haudum --- pkg/storage/stores/shipper/bloomshipper/client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/storage/stores/shipper/bloomshipper/client.go b/pkg/storage/stores/shipper/bloomshipper/client.go index 2dbe1300aa6b9..d1e9f24ef866b 100644 --- a/pkg/storage/stores/shipper/bloomshipper/client.go +++ b/pkg/storage/stores/shipper/bloomshipper/client.go @@ -15,6 +15,7 @@ import ( "github.com/prometheus/common/model" "github.com/grafana/loki/pkg/storage" + v1 "github.com/grafana/loki/pkg/storage/bloom/v1" "github.com/grafana/loki/pkg/storage/chunk/client" "github.com/grafana/loki/pkg/storage/config" "github.com/grafana/loki/pkg/util/math"