-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bloom-Gateway cache #11380
Merged
Merged
Bloom-Gateway cache #11380
Changes from all commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
957ce11
Bloom-Gateway cache
salvacorts b3780b3
CR feedback (1)
salvacorts 4aab4b1
CR Feedback (2): Forward GRPC call opts
salvacorts a3d8bf6
CR Feedback (3): WithStartEndForCache remove chunks outside range
salvacorts 859de06
Update docs
salvacorts cdad32d
Validate config
salvacorts 58d1bfa
Fix bug with compact
salvacorts 6e05bf6
Do not format json
salvacorts 80ea4d8
Reuse buffer for str fmt
salvacorts 50fb9b0
Fix register with flags
salvacorts ded6905
CR
salvacorts File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
package bloomgateway | ||
|
||
import ( | ||
"context" | ||
"flag" | ||
"sort" | ||
"time" | ||
|
||
"github.com/go-kit/log" | ||
"github.com/prometheus/common/model" | ||
"golang.org/x/exp/slices" | ||
"google.golang.org/grpc" | ||
|
||
"github.com/grafana/loki/pkg/logproto" | ||
"github.com/grafana/loki/pkg/storage/chunk/cache" | ||
"github.com/grafana/loki/pkg/storage/chunk/cache/resultscache" | ||
) | ||
|
||
const ( | ||
cacheParalellism = 1 | ||
) | ||
|
||
type CacheConfig struct { | ||
resultscache.Config `yaml:",inline"` | ||
} | ||
|
||
// RegisterFlags registers flags. | ||
func (cfg *CacheConfig) RegisterFlags(f *flag.FlagSet) { | ||
cfg.RegisterFlagsWithPrefix("bloom-gateway-client.cache.", f) | ||
} | ||
|
||
func (cfg *CacheConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { | ||
cfg.Config.RegisterFlagsWithPrefix(f, prefix) | ||
} | ||
|
||
type CacheLimits interface { | ||
resultscache.Limits | ||
BloomGatewayCacheKeyInterval(tenantID string) time.Duration | ||
} | ||
|
||
type keyGen struct { | ||
CacheLimits | ||
} | ||
|
||
func newCacheKeyGen(limits CacheLimits) keyGen { | ||
return keyGen{limits} | ||
} | ||
|
||
func (k keyGen) GenerateCacheKey(ctx context.Context, tenant string, r resultscache.Request) string { | ||
return resultscache.ConstSplitter(k.BloomGatewayCacheKeyInterval(tenant)).GenerateCacheKey(ctx, tenant, r) | ||
} | ||
|
||
type extractor struct{} | ||
|
||
func newExtractor() extractor { | ||
return extractor{} | ||
} | ||
|
||
// Extract extracts a subset of a response from the `start` and `end` timestamps in milliseconds. | ||
// We remove chunks that are not within the given time range. | ||
func (e extractor) Extract(start, end int64, r resultscache.Response, _, _ int64) resultscache.Response { | ||
res := r.(*logproto.FilterChunkRefResponse) | ||
|
||
chunkRefs := make([]*logproto.GroupedChunkRefs, 0, len(res.ChunkRefs)) | ||
for _, chunkRef := range res.ChunkRefs { | ||
refs := make([]*logproto.ShortRef, 0, len(chunkRef.Refs)) | ||
for _, ref := range chunkRef.Refs { | ||
if model.Time(end) < ref.From || ref.Through <= model.Time(start) { | ||
continue | ||
} | ||
refs = append(refs, ref) | ||
} | ||
if len(refs) > 0 { | ||
chunkRefs = append(chunkRefs, &logproto.GroupedChunkRefs{ | ||
Fingerprint: chunkRef.Fingerprint, | ||
Tenant: chunkRef.Tenant, | ||
Refs: refs, | ||
}) | ||
} | ||
} | ||
|
||
return &logproto.FilterChunkRefResponse{ | ||
ChunkRefs: chunkRefs, | ||
} | ||
} | ||
|
||
type merger struct{} | ||
|
||
func newMerger() merger { | ||
return merger{} | ||
} | ||
|
||
// MergeResponse merges responses from multiple requests into a single Response | ||
// We merge all chunks grouped by their fingerprint. | ||
func (m merger) MergeResponse(responses ...resultscache.Response) (resultscache.Response, error) { | ||
var size int | ||
for _, r := range responses { | ||
res := r.(*logproto.FilterChunkRefResponse) | ||
size += len(res.ChunkRefs) | ||
} | ||
|
||
chunkRefs := make([]*logproto.GroupedChunkRefs, 0, size) | ||
for _, r := range responses { | ||
res := r.(*logproto.FilterChunkRefResponse) | ||
chunkRefs = append(chunkRefs, res.ChunkRefs...) | ||
} | ||
|
||
return &logproto.FilterChunkRefResponse{ | ||
ChunkRefs: mergeGroupedChunkRefs(chunkRefs), | ||
}, nil | ||
} | ||
|
||
// Merge duplicated fingerprints by: | ||
// 1. Sort the chunkRefs by their stream fingerprint | ||
// 2. Remove duplicated FPs appending all chunks into the first fingerprint's chunk list. | ||
func mergeGroupedChunkRefs(chunkRefs []*logproto.GroupedChunkRefs) []*logproto.GroupedChunkRefs { | ||
if len(chunkRefs) <= 1 { | ||
return chunkRefs | ||
} | ||
|
||
sort.Slice(chunkRefs, func(i, j int) bool { | ||
return chunkRefs[i].Fingerprint < chunkRefs[j].Fingerprint | ||
}) | ||
|
||
var lastDiffFP int | ||
for i := 1; i < len(chunkRefs); i++ { | ||
if chunkRefs[lastDiffFP].Fingerprint == chunkRefs[i].Fingerprint { | ||
chunkRefs[lastDiffFP].Refs = mergeShortRefs(append(chunkRefs[lastDiffFP].Refs, chunkRefs[i].Refs...)) | ||
} else { | ||
lastDiffFP++ | ||
chunkRefs[lastDiffFP] = chunkRefs[i] | ||
} | ||
} | ||
return chunkRefs[:lastDiffFP+1] | ||
} | ||
|
||
// mergeShortRefs merges short-refs by removing duplicated checksums. | ||
func mergeShortRefs(refs []*logproto.ShortRef) []*logproto.ShortRef { | ||
if len(refs) <= 1 { | ||
return refs | ||
} | ||
|
||
sort.Slice(refs, func(i, j int) bool { | ||
return refs[i].Checksum < refs[j].Checksum | ||
}) | ||
return slices.CompactFunc(refs, func(a, b *logproto.ShortRef) bool { | ||
return a.Checksum == b.Checksum | ||
}) | ||
} | ||
|
||
type ClientCache struct { | ||
cache *resultscache.ResultsCache | ||
limits CacheLimits | ||
logger log.Logger | ||
} | ||
|
||
func NewBloomGatewayClientCacheMiddleware( | ||
logger log.Logger, | ||
next logproto.BloomGatewayClient, | ||
c cache.Cache, | ||
limits CacheLimits, | ||
cacheGen resultscache.CacheGenNumberLoader, | ||
retentionEnabled bool, | ||
) *ClientCache { | ||
nextAsHandler := resultscache.HandlerFunc(func(ctx context.Context, cacheReq resultscache.Request) (resultscache.Response, error) { | ||
req := cacheReq.(requestWithGrpcCallOptions) | ||
return next.FilterChunkRefs(ctx, req.FilterChunkRefRequest, req.grpcCallOptions...) | ||
}) | ||
|
||
resultsCache := resultscache.NewResultsCache( | ||
logger, | ||
c, | ||
nextAsHandler, | ||
newCacheKeyGen(limits), | ||
limits, | ||
newMerger(), | ||
newExtractor(), | ||
nil, | ||
nil, | ||
func(_ context.Context, _ []string, _ resultscache.Request) int { | ||
return cacheParalellism | ||
}, | ||
cacheGen, | ||
retentionEnabled, | ||
) | ||
|
||
return &ClientCache{ | ||
cache: resultsCache, | ||
limits: limits, | ||
logger: logger, | ||
} | ||
} | ||
|
||
func (c *ClientCache) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunkRefRequest, opts ...grpc.CallOption) (*logproto.FilterChunkRefResponse, error) { | ||
cacheReq := requestWithGrpcCallOptions{ | ||
FilterChunkRefRequest: req, | ||
grpcCallOptions: opts, | ||
} | ||
res, err := c.cache.Do(ctx, cacheReq) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return res.(*logproto.FilterChunkRefResponse), nil | ||
} | ||
|
||
type requestWithGrpcCallOptions struct { | ||
*logproto.FilterChunkRefRequest | ||
grpcCallOptions []grpc.CallOption | ||
} | ||
|
||
func (r requestWithGrpcCallOptions) WithStartEndForCache(start time.Time, end time.Time) resultscache.Request { | ||
return requestWithGrpcCallOptions{ | ||
FilterChunkRefRequest: r.FilterChunkRefRequest.WithStartEndForCache(start, end).(*logproto.FilterChunkRefRequest), | ||
grpcCallOptions: r.grpcCallOptions, | ||
} | ||
} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if we can reduce the mount of iterations by sorting
chunkRef.Refs
byFrom
date and binary search the first item whereFrom > end
.Then iterate only over
chunkRef.Refs[idx:]
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but sorting also requires the same count of iterations to ensure each item is in order. and after this, we will do a binary search. so, I believe it's better to leave it as is. wdyt?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think that will save us any iterations: Sorting + bin search is
O(n*logn + logn)
, whereas here we are only iterating once soO(n)
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right. Ideally, chunkRef.Refs was already sorted, but I cannot guarantee that in the bloom gateway