generated from TBD54566975/tbd-project-template
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: deployment metrics instrumentation (#2224)
Adds controller and runner deployment metrics. * `ftl.deployments.controller.reconciliation.failures` * `ftl.deployments.controller.reconciliations.active` * `ftl.deployments.controller.replicas.added` * `ftl.deployments.runner.failures` * `ftl.deployments.runner.active` Sample output captured here --- ``` ScopeMetrics #4 ScopeMetrics SchemaURL: InstrumentationScope ftl.deployments.controller Metric #0 Descriptor: -> Name: ftl.deployments.controller.reconciliations.active -> Description: the number of active deployment reconciliation tasks -> Unit: -> DataType: Sum -> IsMonotonic: false -> AggregationTemporality: Cumulative NumberDataPoints #0 Data point attributes: -> ftl.deployment.key: Str(dpl-echo-4xty4b1iks6plwgj) -> ftl.module.name: Str(echo) StartTimestamp: 2024-08-01 18:41:42.628275 +0000 UTC Timestamp: 2024-08-01 18:42:07.631714 +0000 UTC Value: 0 NumberDataPoints #1 Data point attributes: -> ftl.deployment.key: Str(dpl-time-4d23618ccc6mwce8) -> ftl.module.name: Str(time) StartTimestamp: 2024-08-01 18:41:42.628275 +0000 UTC Timestamp: 2024-08-01 18:42:07.631714 +0000 UTC Value: 0 Metric #1 Descriptor: -> Name: ftl.deployments.controller.replicas.added -> Description: the number of runner replicas added (or removed) by the deployment reconciliation tasks -> Unit: -> DataType: Sum -> IsMonotonic: true -> AggregationTemporality: Cumulative NumberDataPoints #0 Data point attributes: -> ftl.deployment.key: Str(dpl-echo-4xty4b1iks6plwgj) -> ftl.module.name: Str(echo) StartTimestamp: 2024-08-01 18:41:42.628278 +0000 UTC Timestamp: 2024-08-01 18:42:07.631714 +0000 UTC Value: 1 NumberDataPoints #1 Data point attributes: -> ftl.deployment.key: Str(dpl-time-4d23618ccc6mwce8) -> ftl.module.name: Str(time) StartTimestamp: 2024-08-01 18:41:42.628278 +0000 UTC Timestamp: 2024-08-01 18:42:07.631714 +0000 UTC Value: 1 ``` --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
- Loading branch information
1 parent
63446b8
commit a5bb4b9
Showing
8 changed files
with
227 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
package observability | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
|
||
"go.opentelemetry.io/otel" | ||
"go.opentelemetry.io/otel/attribute" | ||
"go.opentelemetry.io/otel/metric" | ||
|
||
"github.com/TBD54566975/ftl/internal/observability" | ||
) | ||
|
||
const ( | ||
deploymentMeterName = "ftl.deployments.controller" | ||
) | ||
|
||
type DeploymentMetrics struct { | ||
reconciliationFailures metric.Int64Counter | ||
reconciliationsActive metric.Int64UpDownCounter | ||
replicasAdded metric.Int64Counter | ||
replicasRemoved metric.Int64Counter | ||
} | ||
|
||
func initDeploymentMetrics() (*DeploymentMetrics, error) { | ||
result := &DeploymentMetrics{} | ||
|
||
var errs error | ||
var err error | ||
|
||
meter := otel.Meter(deploymentMeterName) | ||
|
||
counter := fmt.Sprintf("%s.reconciliation.failures", deploymentMeterName) | ||
if result.reconciliationFailures, err = meter.Int64Counter( | ||
counter, | ||
metric.WithDescription("the number of failed runner deployment reconciliation tasks")); err != nil { | ||
result.reconciliationFailures, errs = handleInt64CounterError(counter, err, errs) | ||
} | ||
|
||
counter = fmt.Sprintf("%s.reconciliations.active", deploymentMeterName) | ||
if result.reconciliationsActive, err = meter.Int64UpDownCounter( | ||
counter, | ||
metric.WithDescription("the number of active deployment reconciliation tasks")); err != nil { | ||
result.reconciliationsActive, errs = handleInt64UpDownCounterError(counter, err, errs) | ||
} | ||
|
||
counter = fmt.Sprintf("%s.replicas.added", deploymentMeterName) | ||
if result.replicasAdded, err = meter.Int64Counter( | ||
counter, | ||
metric.WithDescription("the number of runner replicas added by the deployment reconciliation tasks")); err != nil { | ||
result.replicasAdded, errs = handleInt64CounterError(counter, err, errs) | ||
} | ||
|
||
counter = fmt.Sprintf("%s.replicas.removed", deploymentMeterName) | ||
if result.replicasRemoved, err = meter.Int64Counter( | ||
counter, | ||
metric.WithDescription("the number of runner replicas removed by the deployment reconciliation tasks")); err != nil { | ||
result.replicasRemoved, errs = handleInt64CounterError(counter, err, errs) | ||
} | ||
|
||
return result, errs | ||
} | ||
|
||
func (m *DeploymentMetrics) ReconciliationFailure(ctx context.Context, module string, key string) { | ||
m.reconciliationFailures.Add(ctx, 1, metric.WithAttributes( | ||
attribute.String(observability.ModuleNameAttribute, module), | ||
attribute.String(observability.RunnerDeploymentKeyAttribute, key), | ||
)) | ||
} | ||
|
||
func (m *DeploymentMetrics) ReconciliationStart(ctx context.Context, module string, key string) { | ||
m.reconciliationsActive.Add(ctx, 1, metric.WithAttributes( | ||
attribute.String(observability.ModuleNameAttribute, module), | ||
attribute.String(observability.RunnerDeploymentKeyAttribute, key), | ||
)) | ||
} | ||
|
||
func (m *DeploymentMetrics) ReconciliationComplete(ctx context.Context, module string, key string) { | ||
m.reconciliationsActive.Add(ctx, -1, metric.WithAttributes( | ||
attribute.String(observability.ModuleNameAttribute, module), | ||
attribute.String(observability.RunnerDeploymentKeyAttribute, key), | ||
)) | ||
} | ||
|
||
func (m *DeploymentMetrics) ReplicasUpdated(ctx context.Context, module string, key string, delta int) { | ||
if delta < 0 { | ||
m.replicasRemoved.Add(ctx, int64(-delta), metric.WithAttributes( | ||
attribute.String(observability.ModuleNameAttribute, module), | ||
attribute.String(observability.RunnerDeploymentKeyAttribute, key), | ||
)) | ||
} else if delta > 0 { | ||
m.replicasAdded.Add(ctx, int64(delta), metric.WithAttributes( | ||
attribute.String(observability.ModuleNameAttribute, module), | ||
attribute.String(observability.RunnerDeploymentKeyAttribute, key), | ||
)) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package observability | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
|
||
"github.com/alecthomas/types/optional" | ||
"go.opentelemetry.io/otel" | ||
"go.opentelemetry.io/otel/attribute" | ||
"go.opentelemetry.io/otel/metric" | ||
|
||
"github.com/TBD54566975/ftl/internal/observability" | ||
) | ||
|
||
const ( | ||
deploymentMeterName = "ftl.deployments.runner" | ||
) | ||
|
||
type DeploymentMetrics struct { | ||
failure metric.Int64Counter | ||
active metric.Int64UpDownCounter | ||
} | ||
|
||
func initDeploymentMetrics() (*DeploymentMetrics, error) { | ||
result := &DeploymentMetrics{} | ||
|
||
var errs error | ||
var err error | ||
|
||
meter := otel.Meter(deploymentMeterName) | ||
|
||
counter := fmt.Sprintf("%s.failures", deploymentMeterName) | ||
if result.failure, err = meter.Int64Counter( | ||
counter, | ||
metric.WithDescription("the number of deployment failures")); err != nil { | ||
result.failure, errs = handleInt64CounterError(counter, err, errs) | ||
} | ||
|
||
counter = fmt.Sprintf("%s.active", deploymentMeterName) | ||
if result.active, err = meter.Int64UpDownCounter( | ||
counter, | ||
metric.WithDescription("the number of active deployments")); err != nil { | ||
result.active, errs = handleInt64UpDownCounterError(counter, err, errs) | ||
} | ||
|
||
return result, errs | ||
} | ||
|
||
func (m *DeploymentMetrics) Failure(ctx context.Context, key optional.Option[string]) { | ||
m.failure.Add(ctx, 1, metric.WithAttributes( | ||
attribute.String(observability.RunnerDeploymentKeyAttribute, key.Default("unknown")), | ||
)) | ||
} | ||
|
||
func (m *DeploymentMetrics) Started(ctx context.Context, key string) { | ||
m.active.Add(ctx, 1, metric.WithAttributes( | ||
attribute.String(observability.RunnerDeploymentKeyAttribute, key), | ||
)) | ||
} | ||
|
||
func (m *DeploymentMetrics) Completed(ctx context.Context, key string) { | ||
m.active.Add(ctx, -1, metric.WithAttributes( | ||
attribute.String(observability.RunnerDeploymentKeyAttribute, key), | ||
)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,38 @@ | ||
package observability | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
|
||
"go.opentelemetry.io/otel/metric" | ||
"go.opentelemetry.io/otel/metric/noop" | ||
) | ||
|
||
var ( | ||
Runner *RunnerMetrics | ||
Runner *RunnerMetrics | ||
Deployment *DeploymentMetrics | ||
) | ||
|
||
func init() { | ||
var errs error | ||
var err error | ||
|
||
Runner, err = initRunnerMetrics() | ||
errs = errors.Join(errs, err) | ||
Deployment, err = initDeploymentMetrics() | ||
errs = errors.Join(errs, err) | ||
|
||
if err != nil { | ||
if errs != nil { | ||
panic(fmt.Errorf("could not initialize runner metrics: %w", err)) | ||
} | ||
} | ||
|
||
//nolint:unparam | ||
func handleInt64CounterError(counter string, err error, errs error) (metric.Int64Counter, error) { | ||
return noop.Int64Counter{}, errors.Join(errs, fmt.Errorf("%q counter init failed; falling back to noop: %w", counter, err)) | ||
} | ||
|
||
//nolint:unparam | ||
func handleInt64UpDownCounterError(counter string, err error, errs error) (metric.Int64UpDownCounter, error) { | ||
return noop.Int64UpDownCounter{}, errors.Join(errs, fmt.Errorf("%q counter init failed; falling back to noop: %w", counter, err)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.