From 71c7ab677499a6b933d872589f6a787663405965 Mon Sep 17 00:00:00 2001 From: Luc Talatinian Date: Wed, 2 Oct 2024 15:39:53 -0400 Subject: [PATCH] add http client metrics --- .../240018a27d404933a6038db3cf0aa991.json | 8 + .../smithy/go/codegen/ServiceGenerator.java | 7 +- transport/http/client.go | 27 ++- transport/http/metrics.go | 184 ++++++++++++++++++ transport/http/metrics_test.go | 165 ++++++++++++++++ 5 files changed, 387 insertions(+), 4 deletions(-) create mode 100644 .changelog/240018a27d404933a6038db3cf0aa991.json create mode 100644 transport/http/metrics.go create mode 100644 transport/http/metrics_test.go diff --git a/.changelog/240018a27d404933a6038db3cf0aa991.json b/.changelog/240018a27d404933a6038db3cf0aa991.json new file mode 100644 index 00000000..8ceeff5b --- /dev/null +++ b/.changelog/240018a27d404933a6038db3cf0aa991.json @@ -0,0 +1,8 @@ +{ + "id": "240018a2-7d40-4933-a603-8db3cf0aa991", + "type": "feature", + "description": "Add HTTP client metrics.", + "modules": [ + "." + ] +} \ No newline at end of file diff --git a/codegen/smithy-go-codegen/src/main/java/software/amazon/smithy/go/codegen/ServiceGenerator.java b/codegen/smithy-go-codegen/src/main/java/software/amazon/smithy/go/codegen/ServiceGenerator.java index 38d084f3..f10b7537 100644 --- a/codegen/smithy-go-codegen/src/main/java/software/amazon/smithy/go/codegen/ServiceGenerator.java +++ b/codegen/smithy-go-codegen/src/main/java/software/amazon/smithy/go/codegen/ServiceGenerator.java @@ -394,7 +394,9 @@ private GoWriter.Writable generateInvokeOperation() { defer endTimer() defer span.End() - handler := $newClientHandler:T(options.HTTPClient) + handler := $newClientHandler:T(options.HTTPClient, func(o *smithyhttp.ClientHandler) { + o.Meter = options.MeterProvider.Meter($scope:S) + }) decorated := middleware.DecorateHandler(handler, stack) result, metadata, err = decorated.Handle(ctx, params) if err != nil { @@ -442,7 +444,8 @@ defer endTimer() ConfigFieldResolver.Target.FINALIZATION ).map(this::generateConfigFieldResolver).toList() ).compose(), - "newClientHandler", SmithyGoDependency.SMITHY_HTTP_TRANSPORT.func("NewClientHandler") + "newClientHandler", SmithyGoDependency.SMITHY_HTTP_TRANSPORT.func("NewClientHandlerWithOptions"), + "scope", settings.getModuleName() )); } diff --git a/transport/http/client.go b/transport/http/client.go index c43c346b..0fceae81 100644 --- a/transport/http/client.go +++ b/transport/http/client.go @@ -6,6 +6,7 @@ import ( "net/http" smithy "github.com/aws/smithy-go" + "github.com/aws/smithy-go/metrics" "github.com/aws/smithy-go/middleware" "github.com/aws/smithy-go/tracing" ) @@ -28,13 +29,30 @@ func (fn ClientDoFunc) Do(r *http.Request) (*http.Response, error) { // implementation is http.Client. type ClientHandler struct { client ClientDo + + Meter metrics.Meter // For HTTP client metrics. } // NewClientHandler returns an initialized middleware handler for the client. +// +// Deprecated: Use [NewClientHandlerWithOptions]. func NewClientHandler(client ClientDo) ClientHandler { - return ClientHandler{ + return NewClientHandlerWithOptions(client) +} + +// NewClientHandlerWithOptions returns an initialized middleware handler for the client +// with applied options. +func NewClientHandlerWithOptions(client ClientDo, opts ...func(*ClientHandler)) ClientHandler { + h := ClientHandler{ client: client, } + for _, opt := range opts { + opt(&h) + } + if h.Meter == nil { + h.Meter = metrics.NopMeterProvider{}.Meter("") + } + return h } // Handle implements the middleware Handler interface, that will invoke the @@ -46,6 +64,11 @@ func (c ClientHandler) Handle(ctx context.Context, input interface{}) ( ctx, span := tracing.StartSpan(ctx, "DoHTTPRequest") defer span.End() + ctx, client, err := withMetrics(ctx, c.client, c.Meter) + if err != nil { + return nil, metadata, fmt.Errorf("instrument with HTTP metrics: %w", err) + } + req, ok := input.(*Request) if !ok { return nil, metadata, fmt.Errorf("expect Smithy http.Request value as input, got unsupported type %T", input) @@ -66,7 +89,7 @@ func (c ClientHandler) Handle(ctx context.Context, input interface{}) ( span.SetProperty("http.request_content_length", length) } - resp, err := c.client.Do(builtRequest) + resp, err := client.Do(builtRequest) if resp == nil { // Ensure a http response value is always present to prevent unexpected // panics. diff --git a/transport/http/metrics.go b/transport/http/metrics.go new file mode 100644 index 00000000..ab110139 --- /dev/null +++ b/transport/http/metrics.go @@ -0,0 +1,184 @@ +package http + +import ( + "context" + "crypto/tls" + "net/http" + "net/http/httptrace" + "time" + + "github.com/aws/smithy-go/metrics" +) + +var now = time.Now + +// withMetrics instruments an HTTP client and context to collect HTTP metrics. +func withMetrics(parent context.Context, client ClientDo, meter metrics.Meter) ( + context.Context, ClientDo, error, +) { + hm, err := newHTTPMetrics(meter) + if err != nil { + return nil, nil, err + } + + ctx := httptrace.WithClientTrace(parent, &httptrace.ClientTrace{ + DNSStart: hm.DNSStart, + ConnectStart: hm.ConnectStart, + TLSHandshakeStart: hm.TLSHandshakeStart, + + GotConn: hm.GotConn(parent), + PutIdleConn: hm.PutIdleConn(parent), + ConnectDone: hm.ConnectDone(parent), + DNSDone: hm.DNSDone(parent), + TLSHandshakeDone: hm.TLSHandshakeDone(parent), + GotFirstResponseByte: hm.GotFirstResponseByte(parent), + }) + return ctx, &timedClientDo{client, hm}, nil +} + +type timedClientDo struct { + ClientDo + hm *httpMetrics +} + +func (c *timedClientDo) Do(r *http.Request) (*http.Response, error) { + c.hm.doStart = now() + resp, err := c.ClientDo.Do(r) + + c.hm.DoRequestDuration.Record(r.Context(), elapsed(c.hm.doStart)) + return resp, err +} + +type httpMetrics struct { + DNSLookupDuration metrics.Float64Histogram // client.http.connections.dns_lookup_duration + ConnectDuration metrics.Float64Histogram // client.http.connections.acquire_duration + TLSHandshakeDuration metrics.Float64Histogram // client.http.connections.tls_handshake_duration + ConnectionUsage metrics.Int64UpDownCounter // client.http.connections.usage + + DoRequestDuration metrics.Float64Histogram // client.http.do_request_duration + TimeToFirstByte metrics.Float64Histogram // client.http.time_to_first_byte + + doStart time.Time + dnsStart time.Time + connectStart time.Time + tlsStart time.Time +} + +func newHTTPMetrics(meter metrics.Meter) (*httpMetrics, error) { + hm := &httpMetrics{} + + var err error + hm.DNSLookupDuration, err = meter.Float64Histogram("client.http.connections.dns_lookup_duration", func(o *metrics.InstrumentOptions) { + o.UnitLabel = "s" + o.Description = "The time it takes a request to perform DNS lookup." + }) + if err != nil { + return nil, err + } + hm.ConnectDuration, err = meter.Float64Histogram("client.http.connections.acquire_duration", func(o *metrics.InstrumentOptions) { + o.UnitLabel = "s" + o.Description = "The time it takes a request to acquire a connection." + }) + if err != nil { + return nil, err + } + hm.TLSHandshakeDuration, err = meter.Float64Histogram("client.http.connections.tls_handshake_duration", func(o *metrics.InstrumentOptions) { + o.UnitLabel = "s" + o.Description = "The time it takes an HTTP request to perform the TLS handshake." + }) + if err != nil { + return nil, err + } + hm.ConnectionUsage, err = meter.Int64UpDownCounter("client.http.connections.usage", func(o *metrics.InstrumentOptions) { + o.UnitLabel = "{connection}" + o.Description = "Current state of connections pool." + }) + if err != nil { + return nil, err + } + hm.DoRequestDuration, err = meter.Float64Histogram("client.http.do_request_duration", func(o *metrics.InstrumentOptions) { + o.UnitLabel = "s" + o.Description = "Time spent performing an entire HTTP transaction." + }) + if err != nil { + return nil, err + } + hm.TimeToFirstByte, err = meter.Float64Histogram("client.http.time_to_first_byte", func(o *metrics.InstrumentOptions) { + o.UnitLabel = "s" + o.Description = "Time from start of transaction to when the first response byte is available." + }) + if err != nil { + return nil, err + } + + return hm, nil +} + +func (m *httpMetrics) DNSStart(httptrace.DNSStartInfo) { + m.dnsStart = now() +} + +func (m *httpMetrics) ConnectStart(string, string) { + m.connectStart = now() +} + +func (m *httpMetrics) TLSHandshakeStart() { + m.tlsStart = now() +} + +func (m *httpMetrics) GotConn(ctx context.Context) func(httptrace.GotConnInfo) { + return func(httptrace.GotConnInfo) { + m.addConnAcquired(ctx, 1) + } +} + +func (m *httpMetrics) PutIdleConn(ctx context.Context) func(error) { + return func(error) { + m.addConnAcquired(ctx, -1) + } +} + +func (m *httpMetrics) DNSDone(ctx context.Context) func(httptrace.DNSDoneInfo) { + return func(httptrace.DNSDoneInfo) { + m.DNSLookupDuration.Record(ctx, elapsed(m.dnsStart)) + } +} + +func (m *httpMetrics) ConnectDone(ctx context.Context) func(string, string, error) { + return func(string, string, error) { + m.ConnectDuration.Record(ctx, elapsed(m.connectStart)) + } +} + +func (m *httpMetrics) TLSHandshakeDone(ctx context.Context) func(tls.ConnectionState, error) { + return func(tls.ConnectionState, error) { + m.TLSHandshakeDuration.Record(ctx, elapsed(m.tlsStart)) + } +} + +func (m *httpMetrics) GotFirstResponseByte(ctx context.Context) func() { + return func() { + m.TimeToFirstByte.Record(ctx, elapsed(m.doStart)) + } +} + +func (m *httpMetrics) addConnAcquired(ctx context.Context, incr int64) { + m.ConnectionUsage.Add(ctx, incr, func(o *metrics.RecordMetricOptions) { + o.Properties.Set("state", "acquired") + }) +} + +// Not used: it is recommended to track acquired vs idle conn, but we can't +// determine when something is truly idle with the current HTTP client hooks +// available to us. +func (m *httpMetrics) addConnIdle(ctx context.Context, incr int64) { + m.ConnectionUsage.Add(ctx, incr, func(o *metrics.RecordMetricOptions) { + o.Properties.Set("state", "idle") + }) +} + +func elapsed(start time.Time) float64 { + end := now() + elapsed := end.Sub(start) + return float64(elapsed) / 1e9 +} diff --git a/transport/http/metrics_test.go b/transport/http/metrics_test.go new file mode 100644 index 00000000..dfac6467 --- /dev/null +++ b/transport/http/metrics_test.go @@ -0,0 +1,165 @@ +package http + +import ( + "context" + "crypto/tls" + "net/http" + "net/http/httptrace" + "testing" + "time" + + "github.com/aws/smithy-go/metrics" +) + +type mockClock struct { + now time.Time +} + +func (m *mockClock) Now() time.Time { + return m.now +} + +func (m *mockClock) Advance(sec, ms time.Duration) { + m.now = m.now.Add(sec*time.Second + ms*time.Millisecond) +} + +func mockNow(start time.Time) (*mockClock, func()) { + clock := &mockClock{start} + now = clock.Now + return clock, func() { now = time.Now } +} + +type mockMeter struct { + metrics.Meter + counters map[string]*mockUpDownCounter + histograms map[string]*mockHistogram +} + +func (m *mockMeter) Int64UpDownCounter(name string, opts ...metrics.InstrumentOption) (metrics.Int64UpDownCounter, error) { + if m.counters == nil { + m.counters = map[string]*mockUpDownCounter{} + } + c := &mockUpDownCounter{} + m.counters[name] = c + return c, nil +} + +func (m *mockMeter) Float64Histogram(name string, opts ...metrics.InstrumentOption) (metrics.Float64Histogram, error) { + if m.histograms == nil { + m.histograms = map[string]*mockHistogram{} + } + h := &mockHistogram{} + m.histograms[name] = h + return h, nil +} + +type mockUpDownCounter struct { + value int64 +} + +func (m *mockUpDownCounter) Add(ctx context.Context, incr int64, opts ...metrics.RecordMetricOption) { + m.value += incr +} + +type mockHistogram struct { + value float64 +} + +func (m *mockHistogram) Record(ctx context.Context, value float64, opts ...metrics.RecordMetricOption) { + m.value = value +} + +type mockClient struct { + clock *mockClock + trace *httptrace.ClientTrace +} + +func (m *mockClient) Do(*http.Request) (*http.Response, error) { + m.trace.DNSStart(httptrace.DNSStartInfo{}) + m.clock.Advance(1, 500) + m.trace.DNSDone(httptrace.DNSDoneInfo{}) + + m.trace.ConnectStart("", "") + m.clock.Advance(0, 250) + m.trace.ConnectDone("", "", nil) + + m.trace.TLSHandshakeStart() + m.clock.Advance(0, 377) + m.trace.TLSHandshakeDone(tls.ConnectionState{}, nil) + + m.trace.GotConn(httptrace.GotConnInfo{}) + + m.clock.Advance(99, 99) + m.trace.GotFirstResponseByte() + + m.clock.Advance(999, 999) + + // explicitly do NOT do this so we can verify the nonzero count + // m.trace.PutIdleConn(nil) + + return &http.Response{}, nil +} + +// There's not a whole lot we can actually test without doing an actual HTTP +// request, but we can at least check that we're calculating things correctly. +func TestHTTPMetrics(t *testing.T) { + start := time.Unix(1, 15) + clock, restore := mockNow(start) + defer restore() + + client := &mockClient{} + meter := &mockMeter{} + + ctx, instrumented, err := withMetrics(context.Background(), client, meter) + if err != nil { + t.Fatalf("withMetrics: %v", err) + } + + trace := httptrace.ContextClientTrace(ctx) + if trace == nil { + t.Fatalf("there should be a trace but there isn't") + } + + client.clock = clock + client.trace = trace + instrumented.Do(&http.Request{}) + + expectHistogram(t, meter, "client.http.connections.dns_lookup_duration", 1.5) + expectHistogram(t, meter, "client.http.connections.acquire_duration", 0.25) + expectHistogram(t, meter, "client.http.connections.tls_handshake_duration", 0.377) + + elapsedTTFB := (1+99)*time.Second + (500+250+377+99)*time.Millisecond + expectHistogram(t, meter, "client.http.time_to_first_byte", float64(elapsedTTFB)/1e9) + + elapsedDo := clock.now.Sub(start) + expectHistogram(t, meter, "client.http.do_request_duration", float64(elapsedDo)/1e9) + + expectCounter(t, meter, "client.http.connections.usage", 1) + +} + +func expectHistogram(t *testing.T, meter *mockMeter, name string, expect float64) { + t.Helper() + + histogram, ok := meter.histograms[name] + if !ok { + t.Errorf("missing histogram: %s", name) + return + } + if expect != histogram.value { + t.Errorf("%s: %v != %v", name, expect, histogram.value) + } +} + +func expectCounter(t *testing.T, meter *mockMeter, name string, expect int64) { + t.Helper() + + counter, ok := meter.counters[name] + if !ok { + t.Errorf("missing counter: %s", name) + return + } + if expect != counter.value { + t.Errorf("%s: %v != %v", name, expect, counter.value) + } +}