Skip to content

Commit

Permalink
Improve backoff on host uuid file creation contention (#48903) (#48974)
Browse files Browse the repository at this point in the history
Aims to reduce flakiness in tests by using a linear backoff during
contention instead of sleeping for a constant amount of time. The
goal is to eliminate failures in CI while not causing any additional
backoff than needed in real life scenarios.
  • Loading branch information
rosstimothy authored Nov 14, 2024
1 parent b6cd1dd commit f90bacd
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions lib/utils/hostid/hostid_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/google/uuid"
"github.com/gravitational/trace"

"github.com/gravitational/teleport/api/utils/retryutils"
"github.com/gravitational/teleport/lib/utils"
)

Expand All @@ -49,6 +50,16 @@ func ReadOrCreateFile(dataDir string) (string, error) {
hostUUIDFileLock := GetPath(dataDir) + ".lock"
const iterationLimit = 3

backoff, err := retryutils.NewRetryV2(retryutils.RetryV2Config{
First: 100 * time.Millisecond,
Driver: retryutils.NewLinearDriver(100 * time.Millisecond),
Max: time.Second,
Jitter: retryutils.NewFullJitter(),
})
if err != nil {
return "", trace.Wrap(err)
}

for i := 0; i < iterationLimit; i++ {
if read, err := ReadFile(dataDir); err == nil {
return read, nil
Expand All @@ -57,7 +68,7 @@ func ReadOrCreateFile(dataDir string) (string, error) {
}

// Checking error instead of the usual uuid.New() in case uuid generation
// fails due to not enough randomness. It's been known to happen happen when
// fails due to not enough randomness. It's been known to happen when
// Teleport starts very early in the node initialization cycle and /dev/urandom
// isn't ready yet.
rawID, err := uuid.NewRandom()
Expand Down Expand Up @@ -91,12 +102,14 @@ func ReadOrCreateFile(dataDir string) (string, error) {
id, err := writeFile(rawID.String())
if err != nil {
if errors.Is(err, utils.ErrUnsuccessfulLockTry) {
time.Sleep(100 * time.Millisecond)
backoff.Inc()
<-backoff.After()
continue
}

return "", trace.Wrap(err)
}
backoff.Reset()

return id, nil
}
Expand Down

0 comments on commit f90bacd

Please sign in to comment.