Skip to content

Commit

Permalink
[raft] add monitoring for acquiring lease error (#8004)
Browse files Browse the repository at this point in the history
  • Loading branch information
luluz66 authored Dec 3, 2024
1 parent 341257c commit d82d404
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 2 deletions.
3 changes: 3 additions & 0 deletions enterprise/server/raft/leasekeeper/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@ go_library(
"//enterprise/server/raft/rangelease",
"//enterprise/server/raft/replica",
"//proto:raft_go_proto",
"//server/metrics",
"//server/util/alert",
"//server/util/boundedstack",
"//server/util/log",
"//server/util/status",
"@com_github_lni_dragonboat_v4//:dragonboat",
"@com_github_lni_dragonboat_v4//raftio",
"@com_github_prometheus_client_golang//prometheus",
"@org_golang_x_sync//errgroup",
],
)
Expand Down
20 changes: 18 additions & 2 deletions enterprise/server/raft/leasekeeper/leasekeeper.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package leasekeeper
import (
"context"
"fmt"
"strconv"
"sync"
"time"

Expand All @@ -12,11 +13,14 @@ import (
"github.com/buildbuddy-io/buildbuddy/enterprise/server/raft/nodeliveness"
"github.com/buildbuddy-io/buildbuddy/enterprise/server/raft/rangelease"
"github.com/buildbuddy-io/buildbuddy/enterprise/server/raft/replica"
"github.com/buildbuddy-io/buildbuddy/server/metrics"
"github.com/buildbuddy-io/buildbuddy/server/util/alert"
"github.com/buildbuddy-io/buildbuddy/server/util/boundedstack"
"github.com/buildbuddy-io/buildbuddy/server/util/log"
"github.com/buildbuddy-io/buildbuddy/server/util/status"
"github.com/lni/dragonboat/v4"
"github.com/lni/dragonboat/v4/raftio"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/sync/errgroup"

rfpb "github.com/buildbuddy-io/buildbuddy/proto/raft"
Expand Down Expand Up @@ -155,7 +159,13 @@ func (la *leaseAgent) doSingleInstruction(ctx context.Context, instruction *leas

switch instruction.action {
case Acquire:
if err := la.l.Lease(ctx); err != nil {
err := la.l.Lease(ctx)
metrics.RaftLeaseActionCount.With(prometheus.Labels{
metrics.RaftRangeIDLabel: strconv.Itoa(int(la.l.GetRangeDescriptor().GetRangeId())),
metrics.RaftLeaseActionLabel: "Acquire",
metrics.StatusHumanReadableLabel: status.MetricsLabel(err),
}).Inc()
if err != nil {
la.log.Errorf("Error acquiring rangelease (%s): %s %s", la.l.Desc(ctx), err, instruction)
return
}
Expand All @@ -165,7 +175,13 @@ func (la *leaseAgent) doSingleInstruction(ctx context.Context, instruction *leas
}
case Drop:
// This is a no-op if we don't have the lease.
if err := la.l.Release(ctx); err != nil {
err := la.l.Release(ctx)
metrics.RaftLeaseActionCount.With(prometheus.Labels{
metrics.RaftRangeIDLabel: strconv.Itoa(int(la.l.GetRangeDescriptor().GetRangeId())),
metrics.RaftLeaseActionLabel: "Drop",
metrics.StatusHumanReadableLabel: status.MetricsLabel(err),
}).Inc()
if err != nil {
la.log.Errorf("Error dropping rangelease (%s): %s (%s)", la.l.Desc(ctx), err, instruction)
return
}
Expand Down
14 changes: 14 additions & 0 deletions server/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ const (
// The type of raft move `add`, or `remove`.
RaftMoveLabel = "move_type"

// The type of lease action `Acquire`, `Drop`.
RaftLeaseActionLabel = "lease_action"

// Raft RangeCache event type: `hit`, `miss`, or `update`.
RaftRangeCacheEventTypeLabel = "rangecache_event_type"

Expand Down Expand Up @@ -2354,6 +2357,17 @@ var (
RaftListenerEventType,
})

RaftLeaseActionCount = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: bbNamespace,
Subsystem: "raft",
Name: "lease_action_count",
Help: "The total number of lease actions",
}, []string{
RaftRangeIDLabel,
RaftLeaseActionLabel,
StatusHumanReadableLabel,
})

APIKeyLookupCount = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: bbNamespace,
Subsystem: "auth",
Expand Down

0 comments on commit d82d404

Please sign in to comment.