Skip to content

Commit

Permalink
Improve backoff on host uuid file creation contention
Browse files Browse the repository at this point in the history
Aims to reduce flakiness in tests by using a linear backoff during
contention instead of sleeping for a constant amount of time. The
goal is to eliminate failures in CI while not causing any additional
backoff than needed in real life scenarios.
  • Loading branch information
rosstimothy authored and github-actions committed Nov 13, 2024
1 parent a558efb commit 7969ae7
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions lib/utils/hostid/hostid_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/google/uuid"
"github.com/gravitational/trace"

"github.com/gravitational/teleport/api/utils/retryutils"
"github.com/gravitational/teleport/lib/utils"
)

Expand All @@ -49,6 +50,16 @@ func ReadOrCreateFile(dataDir string) (string, error) {
hostUUIDFileLock := GetPath(dataDir) + ".lock"
const iterationLimit = 3

backoff, err := retryutils.NewRetryV2(retryutils.RetryV2Config{
First: 100 * time.Millisecond,
Driver: retryutils.NewLinearDriver(100 * time.Millisecond),
Max: time.Second,
Jitter: retryutils.FullJitter,
})
if err != nil {
return "", trace.Wrap(err)
}

for i := 0; i < iterationLimit; i++ {
if read, err := ReadFile(dataDir); err == nil {
return read, nil
Expand All @@ -57,7 +68,7 @@ func ReadOrCreateFile(dataDir string) (string, error) {
}

// Checking error instead of the usual uuid.New() in case uuid generation
// fails due to not enough randomness. It's been known to happen happen when
// fails due to not enough randomness. It's been known to happen when
// Teleport starts very early in the node initialization cycle and /dev/urandom
// isn't ready yet.
rawID, err := uuid.NewRandom()
Expand Down Expand Up @@ -91,12 +102,14 @@ func ReadOrCreateFile(dataDir string) (string, error) {
id, err := writeFile(rawID.String())
if err != nil {
if errors.Is(err, utils.ErrUnsuccessfulLockTry) {
time.Sleep(10 * time.Millisecond)
backoff.Inc()
<-backoff.After()
continue
}

return "", trace.Wrap(err)
}
backoff.Reset()

return id, nil
}
Expand Down

0 comments on commit 7969ae7

Please sign in to comment.