From e918219cec76bc7bfeac7cc3a52a6ebde6ab817f Mon Sep 17 00:00:00 2001 From: Aditya Thebe Date: Wed, 6 Nov 2024 13:11:30 +0545 Subject: [PATCH] fix: db errors should be reported as internal error * feat: handle db errors during check runs * feat: add a new check error metric --- cmd/root.go | 4 ++++ go.mod | 2 +- pkg/api.go | 2 ++ pkg/metrics/metrics.go | 19 +++++++++++++++++-- pkg/results.go | 4 ++++ 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 9770f0189..3a22b9025 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -17,6 +17,7 @@ import ( "github.com/flanksource/duty" "github.com/flanksource/duty/connection" "github.com/flanksource/duty/context" + "github.com/flanksource/duty/db" "github.com/flanksource/duty/query" "github.com/flanksource/duty/shutdown" "github.com/spf13/cobra" @@ -38,6 +39,9 @@ func InitContext() (context.Context, error) { } ctx.WithTracer(otel.GetTracerProvider().Tracer(app)) + if err := ctx.DB().Use(db.NewOopsPlugin()); err != nil { + return ctx, fmt.Errorf("failed to use oops gorm plugin: %w", err) + } return ctx, nil } diff --git a/go.mod b/go.mod index c399bc75b..adb5438a4 100644 --- a/go.mod +++ b/go.mod @@ -54,6 +54,7 @@ require ( github.com/robertkrimen/otto v0.3.0 github.com/robfig/cron/v3 v3.0.1 github.com/samber/lo v1.47.0 + github.com/samber/oops v1.13.1 github.com/sethvargo/go-retry v0.3.0 github.com/sevennt/echo-pprof v0.1.1-0.20220616082843-66a461746b5f github.com/spf13/cobra v1.8.0 @@ -253,7 +254,6 @@ require ( github.com/prometheus/procfs v0.15.1 // indirect github.com/rodaine/table v1.3.0 // indirect github.com/rogpeppe/go-internal v1.12.0 // indirect - github.com/samber/oops v1.13.1 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect github.com/shirou/gopsutil/v3 v3.24.5 // indirect github.com/shoenig/go-m1cpu v0.1.6 // indirect diff --git a/pkg/api.go b/pkg/api.go index 6e24274dd..49799c6ea 100644 --- a/pkg/api.go +++ b/pkg/api.go @@ -347,6 +347,8 @@ type CheckResult struct { // ParentCheck is the parent check of a transformed check ParentCheck external.Check `json:"-"` ErrorObject error `json:"-"` + + InternalError bool `json:"-"` } func (result CheckResult) LoggerName() string { diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 12267ca67..f183c1641 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -94,7 +94,15 @@ func setupMetrics() { checkLabels, ) - prometheus.MustRegister(Gauge, CanaryCheckInfo, OpsCount, OpsSuccessCount, OpsInvalidCount, OpsFailedCount, RequestLatency) + OpsErrorCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "canary_check_error_count", + Help: "The total number of checks that resulted in error", + }, + checkLabels, + ) + + prometheus.MustRegister(Gauge, CanaryCheckInfo, OpsCount, OpsSuccessCount, OpsInvalidCount, OpsErrorCount, OpsFailedCount, RequestLatency) } var ( @@ -115,6 +123,7 @@ var ( OpsCount *prometheus.CounterVec OpsFailedCount *prometheus.CounterVec OpsSuccessCount *prometheus.CounterVec + OpsErrorCount *prometheus.CounterVec RequestLatency *prometheus.HistogramVec ) @@ -278,7 +287,13 @@ func Record( Gauge.WithLabelValues(gaugeLabels...).Set(1) CanaryCheckInfo.WithLabelValues(checkMetricLabels...).Set(1) - OpsFailedCount.WithLabelValues(checkMetricLabels...).Inc() + + if result.InternalError { + OpsErrorCount.WithLabelValues(checkMetricLabels...).Inc() + } else { + fail.Append(1) + OpsFailedCount.WithLabelValues(checkMetricLabels...).Inc() + } } _uptime = types.Uptime{Passed: int(pass.Reduce(rolling.Sum)), Failed: int(fail.Reduce(rolling.Sum))} diff --git a/pkg/results.go b/pkg/results.go index 7e81607e7..f420c23aa 100644 --- a/pkg/results.go +++ b/pkg/results.go @@ -6,6 +6,7 @@ import ( "github.com/flanksource/canary-checker/api/external" v1 "github.com/flanksource/canary-checker/api/v1" + "github.com/flanksource/duty/db" ) type Results []*CheckResult @@ -91,6 +92,9 @@ func (result *CheckResult) Failf(message string, args ...interface{}) *CheckResu if result.Error != "" { result.Error += ", " } + + result.InternalError = db.IsDBError(fmt.Errorf(message, args...)) + result.Pass = false result.Error += fmt.Sprintf(message, args...) return result