diff --git a/prometheus.go b/prometheus.go index d7a1c271..76ef540e 100644 --- a/prometheus.go +++ b/prometheus.go @@ -41,4 +41,16 @@ var ( Name: "regions_total", Help: "Total number of regions in the cache", }) + + retryBackoffDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Namespace: "gohbase", + Subsystem: "retry", + Name: "backoff_duration_seconds", + Help: "Time spend sleeping in retry backoff", + // Buckets match the exact backoff time generated by the sleepAndIncreaseBackoff function + Buckets: []float64{ + 0.016, 0.032, 0.064, 0.128, 0.256, 0.512, 1.024, 2.048, + 4.096, 8.192, 13.192, 18.192, 23.192, 28.192, 33.192, + }, + }) ) diff --git a/rpc.go b/rpc.go index b96c8225..ed256622 100644 --- a/rpc.go +++ b/rpc.go @@ -1004,6 +1004,11 @@ func sleepAndIncreaseBackoff(ctx context.Context, backoff time.Duration) (time.D return 0, ctx.Err() } + // Keep track of the amount of time spend sleeping in retry backoff. Ignore if context was + // canceled. + retryBackoffDuration.Observe(backoff.Seconds()) + + // When changing this formula, update the buckets of the retryBackoffDuration metric too. if backoff < 5*time.Second { return backoff * 2, nil } else if backoff < 30*time.Second {