Skip to content

Commit

Permalink
Adds synchronous and asynchronous OpenTelemetry metrics (#43)
Browse files Browse the repository at this point in the history
* feat: adds synchronous and asynchronous OpenTelemetry metrics

* fix: specify nanosecond unit for acquire duration metric

* fix: wrap asynchronous metric creation errors

* style: move meter instantiations out of var block

* revert: keep recordSpanError as function instead of method

* fix: update RecordStats defintion comment

* feat: mark WithAttributes option as deprecated

* fix: correct typo in StatsOption definition comment

* chore(deps): bump OTel SDK to v1.34.0
  • Loading branch information
jahough authored Jan 20, 2025
1 parent f2ee37f commit 588e676
Show file tree
Hide file tree
Showing 6 changed files with 507 additions and 62 deletions.
18 changes: 14 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,22 @@

# otelpgx

Provides [OpenTelemetry](https://github.com/open-telemetry/opentelemetry-go)
Provides [OpenTelemetry](https://github.com/open-telemetry/opentelemetry-go)
instrumentation for the [jackc/pgx](https://github.com/jackc/pgx) library.

## Requirements

- go 1.18 (or higher)
- go 1.22 (or higher)
- pgx v5 (or higher)

## Usage

Make sure you have a suitable pgx version:

```bash
go get github.com/jackc/pgx/v5
```

Install the library:

```go
Expand All @@ -28,10 +34,14 @@ if err != nil {

cfg.ConnConfig.Tracer = otelpgx.NewTracer()

conn, err := pgxpool.NewWithConfig(ctx, cfg)
conn, err := pgxpool.NewConfig(ctx, cfg)
if err != nil {
return nil, fmt.Errorf("connect to database: %w", err)
}

if err := otelpgx.RecordStats(conn); err != nil {
return nil, fmt.Errorf("unable to record database stats: %w", err)
}
```

See [options.go](options.go) for the full list of options.
See [options.go](options.go) for the full list of options.
13 changes: 8 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
module github.com/exaring/otelpgx

go 1.20
go 1.22.0

toolchain go1.23.4

require (
github.com/jackc/pgx/v5 v5.6.0
go.opentelemetry.io/otel v1.23.1
go.opentelemetry.io/otel/trace v1.23.1
go.opentelemetry.io/otel v1.34.0
go.opentelemetry.io/otel/metric v1.34.0
go.opentelemetry.io/otel/trace v1.34.0
)

require (
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
github.com/jackc/puddle/v2 v2.2.1 // indirect
go.opentelemetry.io/otel/metric v1.23.1 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/sync v0.1.0 // indirect
golang.org/x/text v0.14.0 // indirect
Expand Down
24 changes: 15 additions & 9 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk=
Expand All @@ -19,13 +21,16 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
go.opentelemetry.io/otel v1.23.1 h1:Za4UzOqJYS+MUczKI320AtqZHZb7EqxO00jAHE0jmQY=
go.opentelemetry.io/otel v1.23.1/go.mod h1:Td0134eafDLcTS4y+zQ26GE8u3dEuRBiBCTUIRHaikA=
go.opentelemetry.io/otel/metric v1.23.1 h1:PQJmqJ9u2QaJLBOELl1cxIdPcpbwzbkjfEyelTl2rlo=
go.opentelemetry.io/otel/metric v1.23.1/go.mod h1:mpG2QPlAfnK8yNhNJAxDZruU9Y1/HubbC+KyH8FaCWI=
go.opentelemetry.io/otel/trace v1.23.1 h1:4LrmmEd8AU2rFvU1zegmvqW7+kWarxtNOPyeL6HmYY8=
go.opentelemetry.io/otel/trace v1.23.1/go.mod h1:4IpnpJFwr1mo/6HL8XIPJaE9y0+u1KcVmuW7dwFSVrI=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY=
go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI=
go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ=
go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE=
go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k=
go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE=
golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k=
golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
Expand All @@ -35,3 +40,4 @@ golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
233 changes: 233 additions & 0 deletions meter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
package otelpgx

import (
"context"
"fmt"
"sync"
"time"

"github.com/jackc/pgx/v5/pgxpool"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
semconv "go.opentelemetry.io/otel/semconv/v1.27.0"
)

const (
// defaultMinimumReadDBStatsInterval is the default minimum interval between calls to db.Stats().
defaultMinimumReadDBStatsInterval = time.Second
)

var (
pgxPoolAcquireCount = "pgxpool.acquires"
pgxPoolAcquireDuration = "pgxpool.acquire_duration"
pgxPoolAcquiredConnections = "pgxpool.acquired_connections"
pgxPoolCancelledAcquires = "pgxpool.canceled_acquires"
pgxPoolConstructingConnections = "pgxpool.constructing_connections"
pgxPoolEmptyAcquire = "pgxpool.empty_acquire"
pgxPoolIdleConnections = "pgxpool.idle_connections"
pgxPoolMaxConnections = "pgxpool.max_connections"
pgxPoolMaxIdleDestroyCount = "pgxpool.max_idle_destroys"
pgxPoolMaxLifetimeDestroyCount = "pgxpool.max_lifetime_destroys"
pgxPoolNewConnectionsCount = "pgxpool.new_connections"
pgxPoolTotalConnections = "pgxpool.total_connections"
)

// RecordStats records database statistics for provided pgxpool.Pool at a default 1 second interval
// unless otherwise specified by the WithMinimumReadDBStatsInterval StatsOption.
func RecordStats(db *pgxpool.Pool, opts ...StatsOption) error {
o := statsOptions{
meterProvider: otel.GetMeterProvider(),
minimumReadDBStatsInterval: defaultMinimumReadDBStatsInterval,
defaultAttributes: []attribute.KeyValue{
semconv.DBSystemPostgreSQL,
},
}

for _, opt := range opts {
opt.applyStatsOptions(&o)
}

meter := o.meterProvider.Meter(meterName, metric.WithInstrumentationVersion(findOwnImportedVersion()))

return recordStats(meter, db, o.minimumReadDBStatsInterval, o.defaultAttributes...)
}

func recordStats(
meter metric.Meter,
db *pgxpool.Pool,
minimumReadDBStatsInterval time.Duration,
attrs ...attribute.KeyValue,
) error {
var (
err error

// Asynchronous Observable Metrics
acquireCount metric.Int64ObservableCounter
acquireDuration metric.Int64ObservableCounter
acquiredConns metric.Int64ObservableUpDownCounter
cancelledAcquires metric.Int64ObservableCounter
constructingConns metric.Int64ObservableUpDownCounter
emptyAcquires metric.Int64ObservableCounter
idleConns metric.Int64ObservableUpDownCounter
maxConns metric.Int64ObservableGauge
maxIdleDestroyCount metric.Int64ObservableCounter
maxLifetimeDestroyCount metric.Int64ObservableCounter
newConnsCount metric.Int64ObservableCounter
totalConns metric.Int64ObservableUpDownCounter

observeOptions []metric.ObserveOption

dbStats *pgxpool.Stat
lastDBStats time.Time

// lock prevents a race between batch observer and instrument registration.
lock sync.Mutex
)

serverAddress := semconv.ServerAddress(db.Config().ConnConfig.Host)
serverPort := semconv.ServerPort(int(db.Config().ConnConfig.Port))
dbNamespace := semconv.DBNamespace(db.Config().ConnConfig.Database)
poolName := fmt.Sprintf("%s:%d/%s", serverAddress.Value.AsString(), serverPort.Value.AsInt64(), dbNamespace.Value.AsString())
dbClientConnectionPoolName := semconv.DBClientConnectionPoolName(poolName)

lock.Lock()
defer lock.Unlock()

if acquireCount, err = meter.Int64ObservableCounter(
pgxPoolAcquireCount,
metric.WithDescription("Cumulative count of successful acquires from the pool."),
); err != nil {
return fmt.Errorf("failed to create asynchronous metric: %s with error: %w", pgxPoolAcquireCount, err)
}

if acquireDuration, err = meter.Int64ObservableCounter(
pgxPoolAcquireDuration,
metric.WithDescription("Total duration of all successful acquires from the pool in nanoseconds."),
metric.WithUnit("ns"),
); err != nil {
return fmt.Errorf("failed to create asynchronous metric: %s with error: %w", pgxPoolAcquireDuration, err)
}

if acquiredConns, err = meter.Int64ObservableUpDownCounter(
pgxPoolAcquiredConnections,
metric.WithDescription("Number of currently acquired connections in the pool."),
); err != nil {
return fmt.Errorf("failed to create asynchronous metric: %s with error: %w", pgxPoolAcquiredConnections, err)
}

if cancelledAcquires, err = meter.Int64ObservableCounter(
pgxPoolCancelledAcquires,
metric.WithDescription("Cumulative count of acquires from the pool that were canceled by a context."),
); err != nil {
return fmt.Errorf("failed to create asynchronous metric: %s with error: %w", pgxPoolCancelledAcquires, err)
}

if constructingConns, err = meter.Int64ObservableUpDownCounter(
pgxPoolConstructingConnections,
metric.WithUnit("ms"),
metric.WithDescription("Number of connections with construction in progress in the pool."),
); err != nil {
return fmt.Errorf("failed to create asynchronous metric: %s with error: %w", pgxPoolConstructingConnections, err)
}

if emptyAcquires, err = meter.Int64ObservableCounter(
pgxPoolEmptyAcquire,
metric.WithDescription("Cumulative count of successful acquires from the pool that waited for a resource to be released or constructed because the pool was empty."),
); err != nil {
return fmt.Errorf("failed to create asynchronous metric: %s with error: %w", pgxPoolEmptyAcquire, err)
}

if idleConns, err = meter.Int64ObservableUpDownCounter(
pgxPoolIdleConnections,
metric.WithDescription("Number of currently idle connections in the pool."),
); err != nil {
return fmt.Errorf("failed to create asynchronous metric: %s with error: %w", pgxPoolIdleConnections, err)
}

if maxConns, err = meter.Int64ObservableGauge(
pgxPoolMaxConnections,
metric.WithDescription("Maximum size of the pool."),
); err != nil {
return fmt.Errorf("failed to create asynchronous metric: %s with error: %w", pgxPoolMaxConnections, err)
}

if maxIdleDestroyCount, err = meter.Int64ObservableCounter(
pgxPoolMaxIdleDestroyCount,
metric.WithDescription("Cumulative count of connections destroyed because they exceeded MaxConnectionsIdleTime."),
); err != nil {
return fmt.Errorf("failed to create asynchronous metric: %s with error: %w", pgxPoolMaxIdleDestroyCount, err)
}

if maxLifetimeDestroyCount, err = meter.Int64ObservableCounter(
pgxPoolMaxLifetimeDestroyCount,
metric.WithDescription("Cumulative count of connections destroyed because they exceeded MaxConnectionsLifetime."),
); err != nil {
return fmt.Errorf("failed to create asynchronous metric: %s with error: %w", pgxPoolMaxLifetimeDestroyCount, err)
}

if newConnsCount, err = meter.Int64ObservableCounter(
pgxPoolNewConnectionsCount,
metric.WithDescription("Cumulative count of new connections opened."),
); err != nil {
return fmt.Errorf("failed to create asynchronous metric: %s with error: %w", pgxPoolNewConnectionsCount, err)
}

if totalConns, err = meter.Int64ObservableUpDownCounter(
pgxPoolTotalConnections,
metric.WithDescription("Total number of resources currently in the pool. The value is the sum of ConstructingConnections, AcquiredConnections, and IdleConnections."),
); err != nil {
return fmt.Errorf("failed to create asynchronous metric: %s with error: %w", pgxPoolTotalConnections, err)
}

attrs = append(attrs, []attribute.KeyValue{
semconv.DBSystemPostgreSQL,
dbClientConnectionPoolName,
}...)

observeOptions = []metric.ObserveOption{
metric.WithAttributes(attrs...),
}

_, err = meter.RegisterCallback(
func(ctx context.Context, o metric.Observer) error {
lock.Lock()
defer lock.Unlock()

now := time.Now()
if now.Sub(lastDBStats) >= minimumReadDBStatsInterval {
dbStats = db.Stat()
lastDBStats = now
}

o.ObserveInt64(acquireCount, dbStats.AcquireCount(), observeOptions...)
o.ObserveInt64(acquireDuration, dbStats.AcquireDuration().Nanoseconds(), observeOptions...)
o.ObserveInt64(acquiredConns, int64(dbStats.AcquiredConns()), observeOptions...)
o.ObserveInt64(cancelledAcquires, dbStats.CanceledAcquireCount(), observeOptions...)
o.ObserveInt64(constructingConns, int64(dbStats.ConstructingConns()), observeOptions...)
o.ObserveInt64(emptyAcquires, dbStats.EmptyAcquireCount(), observeOptions...)
o.ObserveInt64(idleConns, int64(dbStats.IdleConns()), observeOptions...)
o.ObserveInt64(maxConns, int64(dbStats.MaxConns()), observeOptions...)
o.ObserveInt64(maxIdleDestroyCount, dbStats.MaxIdleDestroyCount(), observeOptions...)
o.ObserveInt64(maxLifetimeDestroyCount, dbStats.MaxLifetimeDestroyCount(), observeOptions...)
o.ObserveInt64(newConnsCount, dbStats.NewConnsCount(), observeOptions...)
o.ObserveInt64(totalConns, int64(dbStats.TotalConns()), observeOptions...)

return nil
},
acquireCount,
acquireDuration,
acquiredConns,
cancelledAcquires,
constructingConns,
emptyAcquires,
idleConns,
maxConns,
maxIdleDestroyCount,
maxLifetimeDestroyCount,
newConnsCount,
totalConns,
)

return err
}
Loading

0 comments on commit 588e676

Please sign in to comment.