Skip to content

Commit

Permalink
Count KV retries not tries in Graveler (#7541)
Browse files Browse the repository at this point in the history
* Count KV retries not tries in Graveler

Metric `graveler_kv_retries` currently counts "tries", i.e. at least one for
each try.  So this includes the actual operations rate: if lakeFS performs
100 ops/sec, then it will increment graveler_kv_retries by _at least_ 100 /
sec.  That defeats using this metric to detect when KV retries impact
performance.

Instead count from 0, i.e. count "_retries_".

This is changelog-worthy because people may be tracking this.  But given
that it's an internal metric, and that we rarely examine it, this is not a
breaking change.

* [CR] monitorRetries should take a "retries" parameter

* [CR] Fix silly bugs in previous commit

That's not how you subtract 1.

* [CR] Correctly name "retries" field
  • Loading branch information
arielshaqed authored Mar 11, 2024
1 parent c057290 commit 85db4b0
Showing 1 changed file with 10 additions and 11 deletions.
21 changes: 10 additions & 11 deletions pkg/graveler/graveler.go
Original file line number Diff line number Diff line change
Expand Up @@ -1289,16 +1289,17 @@ func (g *Graveler) UpdateBranch(ctx context.Context, repository *RepositoryRecor
}

// monitorRetries reports the number of retries of operation while updating
// branchID of repository.
func (g *Graveler) monitorRetries(ctx context.Context, tries int, repositoryID RepositoryID, branchID BranchID, operation string) {
kvRetriesCounter.WithLabelValues(operation).Add(float64(tries))
// branchID of repository. Parameter retries should be 0 if the operation
// succeeded on the very first attempt.
func (g *Graveler) monitorRetries(ctx context.Context, retries int, repositoryID RepositoryID, branchID BranchID, operation string) {
kvRetriesCounter.WithLabelValues(operation).Add(float64(retries))
l := g.log(ctx).WithFields(logging.Fields{
"tries": tries,
"retries": retries,
"repository": repositoryID,
"branch": branchID,
"operation": operation,
})
if tries > 1 {
if retries > 1 {
l.Info("Operation retried")
} else {
l.Trace("No retries needed")
Expand Down Expand Up @@ -1744,11 +1745,7 @@ func (g *Graveler) safeBranchWrite(ctx context.Context, log logging.Logger, repo
branchPreOp *Branch
)
defer func() {
retries := try
if retries > options.MaxTries {
retries = options.MaxTries
}
g.monitorRetries(ctx, retries, repository.RepositoryID, branchID, operation)
g.monitorRetries(ctx, try-1, repository.RepositoryID, branchID, operation)
}()

for try = 1; try <= options.MaxTries; try++ {
Expand Down Expand Up @@ -2128,7 +2125,9 @@ func (g *Graveler) CreateCommitRecord(ctx context.Context, repository *Repositor
// between 1 and BranchUpdateMaxTries.
func (g *Graveler) retryBranchUpdate(ctx context.Context, repository *RepositoryRecord, branchID BranchID, f BranchUpdateFunc, operation string) error {
tries := 0
defer g.monitorRetries(ctx, tries, repository.RepositoryID, branchID, operation)
defer func() {
g.monitorRetries(ctx, tries-1, repository.RepositoryID, branchID, operation)
}()
err := backoff.Retry(func() error {
// TODO(eden) issue 3586 - if the branch commit id hasn't changed, update the fields instead of fail
tries += 1
Expand Down

0 comments on commit 85db4b0

Please sign in to comment.