Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v16] migrate teleport wait no-resolve to slog and add more details in logs #49569

Merged
merged 3 commits into from
Nov 29, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 32 additions & 17 deletions tool/teleport/common/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
"time"

"github.com/gravitational/trace"
log "github.com/sirupsen/logrus"

"github.com/gravitational/teleport/api/utils/retryutils"
"github.com/gravitational/teleport/lib/utils"
Expand Down Expand Up @@ -92,6 +91,8 @@ func waitNoResolve(ctx context.Context, domain string, period, timeout time.Dura
if timeout == 0 {
return trace.BadParameter("no timeout provided")
}
log := slog.With("domain", domain)
log.InfoContext(ctx, "waiting until the domain stops resolving to ensure that every auth server running the previous major version has been updated/terminated")

var err error
ctx, cancel := context.WithTimeout(ctx, timeout)
Expand Down Expand Up @@ -124,44 +125,58 @@ func waitNoResolve(ctx context.Context, domain string, period, timeout time.Dura
return trace.Wrap(err)

case <-periodic.Next():
exit, err = checkDomainNoResolve(domain)
exit, err = checkDomainNoResolve(ctx, domain, log)
if err != nil {
return trace.Wrap(err)
}
}
}

log.Info("no endpoints found, exiting with success code")
log.InfoContext(ctx, "no endpoints found, exiting with success code")
return nil
}

func checkDomainNoResolve(domainName string) (exit bool, err error) {
endpoints, err := countEndpoints(domainName)
func checkDomainNoResolve(ctx context.Context, domainName string, log *slog.Logger) (exit bool, err error) {
endpoints, err := resolveEndpoints(domainName)
if err != nil {
var dnsErr *net.DNSError
if !errors.As(trace.Unwrap(err), &dnsErr) {
log.Errorf("unexpected error when resolving domain %s : %s", domainName, err)
log.ErrorContext(ctx, "unexpected error when resolving domain", "error", err)
return false, trace.Wrap(err)
}
if dnsErr.Temporary() {
log.Warnf("temporary error when resolving domain %s : %s", domainName, err)
return false, nil
}

if dnsErr.IsNotFound {
log.Infof("domain %s not found", domainName)
log.InfoContext(ctx, "domain not found")
return true, nil
}
log.Errorf("error when resolving domain %s : %s", domainName, err)

// Creating a new logger because the linter doesn't want both key/value and slog.Attr in the same log write.
log := log.With(slog.Group("dns_error",
"name", dnsErr.Name,
"server", dnsErr.Server,
"is_timeout", dnsErr.IsTimeout,
"is_temporary", dnsErr.IsTemporary,
"is_not_found", dnsErr.IsNotFound,
))
if dnsErr.Temporary() {
log.WarnContext(ctx, "temporary error when resolving domain", "error", err)
return false, nil
}
log.ErrorContext(ctx, "error when resolving domain", "error", err)
return false, nil
}
log.Infof("%d endpoints found when resolving domain %s", endpoints, domainName)
return endpoints == 0, nil
if len(endpoints) == 0 {
log.InfoContext(ctx, "domain found and resolution returned no endpoints")
return true, nil
}
log.InfoContext(ctx, "endpoints found when resolving domain", "endpoints", endpoints)
return false, nil
}

func countEndpoints(serviceName string) (int, error) {
func resolveEndpoints(serviceName string) ([]net.IP, error) {
ips, err := net.LookupIP(serviceName)
if err != nil {
return 0, trace.Wrap(err)
return nil, trace.Wrap(err)
}
return len(ips), nil
return ips, nil
}
Loading