From c04e2af4b38e363554b4a4b28485d484b837dbe3 Mon Sep 17 00:00:00 2001 From: Crypto Minion <154598612+jrwbabylonchain@users.noreply.github.com> Date: Mon, 17 Jun 2024 21:56:21 +1000 Subject: [PATCH] feat: restart the service if fail to push msg (#19) --- internal/observability/metrics/metrics.go | 14 ++++++++++++++ internal/queue/queue.go | 4 +++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/internal/observability/metrics/metrics.go b/internal/observability/metrics/metrics.go index 127ef79..3a77ae5 100644 --- a/internal/observability/metrics/metrics.go +++ b/internal/observability/metrics/metrics.go @@ -30,6 +30,7 @@ var ( metricsRouter *chi.Mux pollDurationHistogram *prometheus.HistogramVec btcClientDurationHistogram *prometheus.HistogramVec + queueSendErrorCounter prometheus.Counter ) // Init initializes the metrics package. @@ -77,9 +78,18 @@ func registerMetrics() { []string{"function", "status"}, ) + // add a counter for the number of errors from the fail to push message into queue + queueSendErrorCounter = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "queue_send_error_count", + Help: "The total number of errors when sending messages to the queue", + }, + ) + prometheus.MustRegister( pollDurationHistogram, btcClientDurationHistogram, + queueSendErrorCounter, ) } @@ -105,3 +115,7 @@ func RecordBtcClientMetrics[T any](clientRequest func() (T, error)) (T, error) { return result, err } + +func RecordQueueSendError() { + queueSendErrorCounter.Inc() +} diff --git a/internal/queue/queue.go b/internal/queue/queue.go index 62ede3e..f792c52 100644 --- a/internal/queue/queue.go +++ b/internal/queue/queue.go @@ -7,6 +7,7 @@ import ( "github.com/rs/zerolog/log" + "github.com/babylonchain/staking-expiry-checker/internal/observability/metrics" "github.com/babylonchain/staking-queue-client/client" queueConfig "github.com/babylonchain/staking-queue-client/config" ) @@ -36,7 +37,8 @@ func (qm *QueueManager) SendExpiredStakingEvent(ctx context.Context, ev client.E log.Debug().Str("tx_hash", ev.StakingTxHashHex).Msg("publishing expired staking event") err = qm.stakingExpiredEventQueue.SendMessage(ctx, messageBody) if err != nil { - return fmt.Errorf("failed to publish staking event: %w", err) + metrics.RecordQueueSendError() + log.Fatal().Err(err).Str("tx_hash", ev.StakingTxHashHex).Msg("failed to publish staking event") } log.Debug().Str("tx_hash", ev.StakingTxHashHex).Msg("successfully published expired staking event")