From 5fee4053c85a35d67fce3d4c9a09aab911cd0981 Mon Sep 17 00:00:00 2001 From: rjeczalik Date: Mon, 9 Dec 2024 09:24:24 +0100 Subject: [PATCH] transport/http: fix metrics race condition This PR fixes a race condition reported by #548, by introducing a simple wrapper for time.Time. While by default aws-sdk-go-v2 uses NopMeterProvider, the transport/http internals are still instrumenting the http requests (the withMetrics func). Maybe there is a room for improvement, to not call httptrace.WithClientTrace when the meter is nop, however this is out of scope for this PR. --- .../c898d340-9c15-4b9a-a5da-93a340a9d5f8.json | 9 ++++ transport/http/metrics.go | 44 ++++++++++++------- 2 files changed, 38 insertions(+), 15 deletions(-) create mode 100644 .changelog/c898d340-9c15-4b9a-a5da-93a340a9d5f8.json diff --git a/.changelog/c898d340-9c15-4b9a-a5da-93a340a9d5f8.json b/.changelog/c898d340-9c15-4b9a-a5da-93a340a9d5f8.json new file mode 100644 index 00000000..57e34f5d --- /dev/null +++ b/.changelog/c898d340-9c15-4b9a-a5da-93a340a9d5f8.json @@ -0,0 +1,9 @@ +{ + "id": "c898d340-9c15-4b9a-a5da-93a340a9d5f8", + "type": "bugfix", + "collapse": true, + "description": "transport/http: fix metrics race condition", + "modules": [ + "transport/http" + ] +} diff --git a/transport/http/metrics.go b/transport/http/metrics.go index ab110139..d1beaa59 100644 --- a/transport/http/metrics.go +++ b/transport/http/metrics.go @@ -5,6 +5,7 @@ import ( "crypto/tls" "net/http" "net/http/httptrace" + "sync/atomic" "time" "github.com/aws/smithy-go/metrics" @@ -42,10 +43,10 @@ type timedClientDo struct { } func (c *timedClientDo) Do(r *http.Request) (*http.Response, error) { - c.hm.doStart = now() + c.hm.doStart.Store(now()) resp, err := c.ClientDo.Do(r) - c.hm.DoRequestDuration.Record(r.Context(), elapsed(c.hm.doStart)) + c.hm.DoRequestDuration.Record(r.Context(), c.hm.doStart.Elapsed()) return resp, err } @@ -58,10 +59,10 @@ type httpMetrics struct { DoRequestDuration metrics.Float64Histogram // client.http.do_request_duration TimeToFirstByte metrics.Float64Histogram // client.http.time_to_first_byte - doStart time.Time - dnsStart time.Time - connectStart time.Time - tlsStart time.Time + doStart safeTime + dnsStart safeTime + connectStart safeTime + tlsStart safeTime } func newHTTPMetrics(meter metrics.Meter) (*httpMetrics, error) { @@ -115,15 +116,15 @@ func newHTTPMetrics(meter metrics.Meter) (*httpMetrics, error) { } func (m *httpMetrics) DNSStart(httptrace.DNSStartInfo) { - m.dnsStart = now() + m.dnsStart.Store(now()) } func (m *httpMetrics) ConnectStart(string, string) { - m.connectStart = now() + m.connectStart.Store(now()) } func (m *httpMetrics) TLSHandshakeStart() { - m.tlsStart = now() + m.tlsStart.Store(now()) } func (m *httpMetrics) GotConn(ctx context.Context) func(httptrace.GotConnInfo) { @@ -140,25 +141,25 @@ func (m *httpMetrics) PutIdleConn(ctx context.Context) func(error) { func (m *httpMetrics) DNSDone(ctx context.Context) func(httptrace.DNSDoneInfo) { return func(httptrace.DNSDoneInfo) { - m.DNSLookupDuration.Record(ctx, elapsed(m.dnsStart)) + m.DNSLookupDuration.Record(ctx, m.dnsStart.Elapsed()) } } func (m *httpMetrics) ConnectDone(ctx context.Context) func(string, string, error) { return func(string, string, error) { - m.ConnectDuration.Record(ctx, elapsed(m.connectStart)) + m.ConnectDuration.Record(ctx, m.connectStart.Elapsed()) } } func (m *httpMetrics) TLSHandshakeDone(ctx context.Context) func(tls.ConnectionState, error) { return func(tls.ConnectionState, error) { - m.TLSHandshakeDuration.Record(ctx, elapsed(m.tlsStart)) + m.TLSHandshakeDuration.Record(ctx, m.tlsStart.Elapsed()) } } func (m *httpMetrics) GotFirstResponseByte(ctx context.Context) func() { return func() { - m.TimeToFirstByte.Record(ctx, elapsed(m.doStart)) + m.TimeToFirstByte.Record(ctx, m.doStart.Elapsed()) } } @@ -177,8 +178,21 @@ func (m *httpMetrics) addConnIdle(ctx context.Context, incr int64) { }) } -func elapsed(start time.Time) float64 { +type safeTime struct { + atomic.Value // time.Time +} + +func (st *safeTime) Store(v time.Time) { + st.Value.Store(v) +} + +func (st *safeTime) Load() time.Time { + t, _ := st.Value.Load().(time.Time) + return t +} + +func (st *safeTime) Elapsed() float64 { end := now() - elapsed := end.Sub(start) + elapsed := end.Sub(st.Load()) return float64(elapsed) / 1e9 }