-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bloom compactor: Load blocks lazily in batches #11919
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
package bloomcompactor | ||
|
||
import ( | ||
"context" | ||
|
||
"github.com/grafana/dskit/multierror" | ||
|
||
"github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" | ||
) | ||
|
||
// interface modeled from `pkg/storage/stores/shipper/bloomshipper.Fetcher` | ||
type blocksFetcher interface { | ||
FetchBlocks(context.Context, []bloomshipper.BlockRef) ([]*bloomshipper.CloseableBlockQuerier, error) | ||
} | ||
|
||
func newBatchedBlockLoader(ctx context.Context, fetcher blocksFetcher, blocks []bloomshipper.BlockRef) (*batchedBlockLoader, error) { | ||
return &batchedBlockLoader{ | ||
ctx: ctx, | ||
batchSize: 10, // make configurable? | ||
source: blocks, | ||
fetcher: fetcher, | ||
}, nil | ||
} | ||
|
||
type batchedBlockLoader struct { | ||
ctx context.Context | ||
batchSize int | ||
|
||
source []bloomshipper.BlockRef | ||
fetcher blocksFetcher | ||
|
||
batch []*bloomshipper.CloseableBlockQuerier | ||
cur *bloomshipper.CloseableBlockQuerier | ||
err error | ||
} | ||
|
||
// At implements v1.CloseableIterator. | ||
func (b *batchedBlockLoader) At() *bloomshipper.CloseableBlockQuerier { | ||
return b.cur | ||
} | ||
|
||
// Close implements v1.CloseableIterator. | ||
func (b *batchedBlockLoader) Close() error { | ||
if b.cur != nil { | ||
return b.cur.Close() | ||
} | ||
return nil | ||
} | ||
|
||
// CloseBatch closes the remaining items from the current batch | ||
func (b *batchedBlockLoader) CloseBatch() error { | ||
var err multierror.MultiError | ||
for _, cur := range b.batch { | ||
err.Add(cur.Close()) | ||
} | ||
if len(b.batch) > 0 { | ||
b.batch = b.batch[:0] | ||
} | ||
return err.Err() | ||
} | ||
|
||
// Err implements v1.CloseableIterator. | ||
func (b *batchedBlockLoader) Err() error { | ||
return b.err | ||
} | ||
|
||
// Next implements v1.CloseableIterator. | ||
func (b *batchedBlockLoader) Next() bool { | ||
if len(b.batch) > 0 { | ||
return b.setNext() | ||
} | ||
|
||
if len(b.source) == 0 { | ||
return false | ||
} | ||
|
||
// setup next batch | ||
batchSize := min(b.batchSize, len(b.source)) | ||
toFetch := b.source[:batchSize] | ||
|
||
// update source | ||
b.source = b.source[batchSize:] | ||
|
||
b.batch, b.err = b.fetcher.FetchBlocks(b.ctx, toFetch) | ||
if b.err != nil { | ||
return false | ||
} | ||
return b.setNext() | ||
} | ||
|
||
func (b *batchedBlockLoader) setNext() bool { | ||
b.cur, b.err = b.batch[0], nil | ||
b.batch = b.batch[1:] | ||
return true | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package bloomcompactor | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
"go.uber.org/atomic" | ||
|
||
"github.com/grafana/loki/pkg/storage/stores/shipper/bloomshipper" | ||
) | ||
|
||
type dummyBlocksFetcher struct { | ||
count *atomic.Int32 | ||
} | ||
|
||
func (f *dummyBlocksFetcher) FetchBlocks(_ context.Context, blocks []bloomshipper.BlockRef) ([]*bloomshipper.CloseableBlockQuerier, error) { | ||
f.count.Inc() | ||
return make([]*bloomshipper.CloseableBlockQuerier, len(blocks)), nil | ||
} | ||
|
||
func TestBatchedBlockLoader(t *testing.T) { | ||
ctx := context.Background() | ||
f := &dummyBlocksFetcher{count: atomic.NewInt32(0)} | ||
|
||
blocks := make([]bloomshipper.BlockRef, 25) | ||
blocksIter, err := newBatchedBlockLoader(ctx, f, blocks) | ||
require.NoError(t, err) | ||
|
||
var count int | ||
for blocksIter.Next() && blocksIter.Err() == nil { | ||
count++ | ||
} | ||
|
||
require.Equal(t, len(blocks), count) | ||
require.Equal(t, int32(len(blocks)/blocksIter.batchSize+1), f.count.Load()) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ import ( | |
"bytes" | ||
"context" | ||
"fmt" | ||
"io" | ||
"sort" | ||
|
||
"github.com/go-kit/log" | ||
|
@@ -138,45 +139,36 @@ func (s *SimpleBloomController) buildBlocks( | |
for _, gap := range plan.gaps { | ||
// Fetch blocks that aren't up to date but are in the desired fingerprint range | ||
// to try and accelerate bloom creation | ||
seriesItr, preExistingBlocks, err := s.loadWorkForGap(ctx, table, tenant, plan.tsdb, gap) | ||
seriesItr, blocksIter, err := s.loadWorkForGap(ctx, table, tenant, plan.tsdb, gap) | ||
if err != nil { | ||
level.Error(logger).Log("msg", "failed to get series and blocks", "err", err) | ||
return errors.Wrap(err, "failed to get series and blocks") | ||
} | ||
// Close all remaining blocks on exit | ||
closePreExistingBlocks := func() { | ||
var closeErrors multierror.MultiError | ||
for _, block := range preExistingBlocks { | ||
closeErrors.Add(block.Close()) | ||
} | ||
if err := closeErrors.Err(); err != nil { | ||
level.Error(s.logger).Log("msg", "failed to close blocks", "err", err) | ||
} | ||
} | ||
|
||
gen := NewSimpleBloomGenerator( | ||
tenant, | ||
blockOpts, | ||
seriesItr, | ||
s.chunkLoader, | ||
preExistingBlocks, | ||
blocksIter, | ||
s.rwFn, | ||
s.metrics, | ||
log.With(logger, "tsdb", plan.tsdb.Name(), "ownership", gap, "blocks", len(preExistingBlocks)), | ||
log.With(logger, "tsdb", plan.tsdb.Name(), "ownership", gap), | ||
) | ||
|
||
_, newBlocks, err := gen.Generate(ctx) | ||
_, loaded, newBlocks, err := gen.Generate(ctx) | ||
|
||
if err != nil { | ||
// TODO(owen-d): metrics | ||
level.Error(logger).Log("msg", "failed to generate bloom", "err", err) | ||
closePreExistingBlocks() | ||
s.closeLoadedBlocks(loaded, blocksIter) | ||
return errors.Wrap(err, "failed to generate bloom") | ||
} | ||
|
||
client, err := s.bloomStore.Client(table.ModelTime()) | ||
if err != nil { | ||
level.Error(logger).Log("msg", "failed to get client", "err", err) | ||
closePreExistingBlocks() | ||
s.closeLoadedBlocks(loaded, blocksIter) | ||
return errors.Wrap(err, "failed to get client") | ||
} | ||
|
||
|
@@ -195,20 +187,20 @@ func (s *SimpleBloomController) buildBlocks( | |
built, | ||
); err != nil { | ||
level.Error(logger).Log("msg", "failed to write block", "err", err) | ||
closePreExistingBlocks() | ||
s.closeLoadedBlocks(loaded, blocksIter) | ||
return errors.Wrap(err, "failed to write block") | ||
} | ||
} | ||
|
||
if err := newBlocks.Err(); err != nil { | ||
// TODO(owen-d): metrics | ||
level.Error(logger).Log("msg", "failed to generate bloom", "err", err) | ||
closePreExistingBlocks() | ||
s.closeLoadedBlocks(loaded, blocksIter) | ||
return errors.Wrap(err, "failed to generate bloom") | ||
} | ||
|
||
// Close pre-existing blocks | ||
closePreExistingBlocks() | ||
s.closeLoadedBlocks(loaded, blocksIter) | ||
} | ||
} | ||
|
||
|
@@ -226,19 +218,49 @@ func (s *SimpleBloomController) loadWorkForGap( | |
tenant string, | ||
id tsdb.Identifier, | ||
gap gapWithBlocks, | ||
) (v1.CloseableIterator[*v1.Series], []*bloomshipper.CloseableBlockQuerier, error) { | ||
) (v1.CloseableIterator[*v1.Series], v1.CloseableIterator[*bloomshipper.CloseableBlockQuerier], error) { | ||
// load a series iterator for the gap | ||
seriesItr, err := s.tsdbStore.LoadTSDB(ctx, table, tenant, id, gap.bounds) | ||
if err != nil { | ||
return nil, nil, errors.Wrap(err, "failed to load tsdb") | ||
} | ||
|
||
blocks, err := s.bloomStore.FetchBlocks(ctx, gap.blocks) | ||
// load a blocks iterator for the gap | ||
fetcher, err := s.bloomStore.Fetcher(table.ModelTime()) | ||
if err != nil { | ||
return nil, nil, errors.Wrap(err, "failed to get blocks") | ||
return nil, nil, errors.Wrap(err, "failed to get fetcher") | ||
} | ||
|
||
return seriesItr, blocks, nil | ||
blocksIter, err := newBatchedBlockLoader(ctx, fetcher, gap.blocks) | ||
if err != nil { | ||
return nil, nil, errors.Wrap(err, "failed to load blocks") | ||
} | ||
|
||
return seriesItr, blocksIter, nil | ||
} | ||
|
||
func (s *SimpleBloomController) closeLoadedBlocks(toClose []io.Closer, it v1.CloseableIterator[*bloomshipper.CloseableBlockQuerier]) { | ||
// close loaded blocks | ||
var err multierror.MultiError | ||
for _, closer := range toClose { | ||
err.Add(closer.Close()) | ||
} | ||
|
||
switch itr := it.(type) { | ||
case *batchedBlockLoader: | ||
// close remaining loaded blocks from batch | ||
err.Add(itr.CloseBatch()) | ||
default: | ||
// close remaining loaded blocks | ||
for itr.Next() && itr.Err() == nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would we ever reach here? As far as I can see There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the function accepts an interface |
||
err.Add(itr.At().Close()) | ||
} | ||
} | ||
|
||
// log error | ||
if err.Err() != nil { | ||
level.Error(s.logger).Log("msg", "failed to close blocks", "err", err) | ||
} | ||
} | ||
|
||
type gapWithBlocks struct { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree this should be configurable