From 30a2725499865d2c0d58927dd40de1ca2dec527e Mon Sep 17 00:00:00 2001 From: Jaz Volpert Date: Sun, 17 Nov 2024 22:22:34 +0000 Subject: [PATCH 1/4] Add more metrics to the relay --- bgs/bgs.go | 5 +++++ bgs/metrics.go | 12 ++++++++++++ did/metrics.go | 6 ++++++ did/multi.go | 6 ++++++ repomgr/metrics.go | 18 ++++++++++++++++++ repomgr/repomgr.go | 8 +++++++- 6 files changed, 54 insertions(+), 1 deletion(-) diff --git a/bgs/bgs.go b/bgs/bgs.go index bf1eae6f5..b64df715e 100644 --- a/bgs/bgs.go +++ b/bgs/bgs.go @@ -876,14 +876,19 @@ func (bgs *BGS) handleFedEvent(ctx context.Context, host *models.PDS, env *event repoCommitsReceivedCounter.WithLabelValues(host.Host).Add(1) evt := env.RepoCommit log.Debugw("bgs got repo append event", "seq", evt.Seq, "pdsHost", host.Host, "repo", evt.Repo) + + s := time.Now() u, err := bgs.lookupUserByDid(ctx, evt.Repo) + userLookupDuration.Observe(time.Since(s).Seconds()) if err != nil { if !errors.Is(err, gorm.ErrRecordNotFound) { return fmt.Errorf("looking up event user: %w", err) } newUsersDiscovered.Inc() + start := time.Now() subj, err := bgs.createExternalUser(ctx, evt.Repo) + newUserDiscoveryDuration.Observe(time.Since(start).Seconds()) if err != nil { return fmt.Errorf("fed event create external user: %w", err) } diff --git a/bgs/metrics.go b/bgs/metrics.go index b33677e6e..edd687596 100644 --- a/bgs/metrics.go +++ b/bgs/metrics.go @@ -81,6 +81,18 @@ var resSz = promauto.NewHistogramVec(prometheus.HistogramOpts{ Buckets: prometheus.ExponentialBuckets(100, 10, 8), }, []string{"code", "method", "path"}) +var userLookupDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "relay_user_lookup_duration", + Help: "A histogram of user lookup latencies", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) + +var newUserDiscoveryDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "relay_new_user_discovery_duration", + Help: "A histogram of new user discovery latencies", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) + // MetricsMiddleware defines handler function for metrics middleware func MetricsMiddleware(next echo.HandlerFunc) echo.HandlerFunc { return func(c echo.Context) error { diff --git a/did/metrics.go b/did/metrics.go index e7bea3b56..67f14e434 100644 --- a/did/metrics.go +++ b/did/metrics.go @@ -9,3 +9,9 @@ var mrResolvedDidsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "multiresolver_resolved_dids_total", Help: "Total number of DIDs resolved", }, []string{"resolver"}) + +var mrResolveDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Name: "indigo_multiresolver_resolve_duration", + Help: "A histogram of resolve latencies", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}, []string{"resolver"}) diff --git a/did/multi.go b/did/multi.go index e871ff454..112394bee 100644 --- a/did/multi.go +++ b/did/multi.go @@ -3,6 +3,7 @@ package did import ( "context" "fmt" + "time" "github.com/whyrusleeping/go-did" ) @@ -43,6 +44,11 @@ func (mr *MultiResolver) FlushCacheFor(didstr string) { } func (mr *MultiResolver) GetDocument(ctx context.Context, didstr string) (*did.Document, error) { + s := time.Now() + defer func() { + mrResolveDuration.WithLabelValues(didstr).Observe(time.Since(s).Seconds()) + }() + pdid, err := did.ParseDID(didstr) if err != nil { return nil, err diff --git a/repomgr/metrics.go b/repomgr/metrics.go index a92e3091d..df3e4bea1 100644 --- a/repomgr/metrics.go +++ b/repomgr/metrics.go @@ -9,3 +9,21 @@ var repoOpsImported = promauto.NewCounter(prometheus.CounterOpts{ Name: "repomgr_repo_ops_imported", Help: "Number of repo ops imported", }) + +var openAndSigCheckDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "repomgr_open_and_sig_check_duration", + Help: "Duration of opening and signature check", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) + +var calcDiffDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "repomgr_calc_diff_duration", + Help: "Duration of calculating diff", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) + +var writeCarSliceDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "repomgr_write_car_slice_duration", + Help: "Duration of writing car slice", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) diff --git a/repomgr/repomgr.go b/repomgr/repomgr.go index ad90a43b2..bdf20d784 100644 --- a/repomgr/repomgr.go +++ b/repomgr/repomgr.go @@ -8,6 +8,7 @@ import ( "io" "strings" "sync" + "time" atproto "github.com/bluesky-social/indigo/api/atproto" bsky "github.com/bluesky-social/indigo/api/bsky" @@ -538,6 +539,7 @@ func (rm *RepoManager) HandleExternalUserEvent(ctx context.Context, pdsid uint, unlock := rm.lockUser(ctx, uid) defer unlock() + start := time.Now() root, ds, err := rm.cs.ImportSlice(ctx, uid, since, carslice) if err != nil { return fmt.Errorf("importing external carslice: %w", err) @@ -551,6 +553,7 @@ func (rm *RepoManager) HandleExternalUserEvent(ctx context.Context, pdsid uint, if err := rm.CheckRepoSig(ctx, r, did); err != nil { return err } + openAndSigCheckDuration.Observe(time.Since(start).Seconds()) var skipcids map[cid.Cid]bool if ds.BaseCid().Defined() { @@ -571,10 +574,11 @@ func (rm *RepoManager) HandleExternalUserEvent(ctx context.Context, pdsid uint, } } + start = time.Now() if err := ds.CalcDiff(ctx, skipcids); err != nil { return fmt.Errorf("failed while calculating mst diff (since=%v): %w", since, err) - } + calcDiffDuration.Observe(time.Since(start).Seconds()) evtops := make([]RepoOp, 0, len(ops)) @@ -631,10 +635,12 @@ func (rm *RepoManager) HandleExternalUserEvent(ctx context.Context, pdsid uint, } } + start = time.Now() rslice, err := ds.CloseWithRoot(ctx, root, nrev) if err != nil { return fmt.Errorf("close with root: %w", err) } + writeCarSliceDuration.Observe(time.Since(start).Seconds()) if rm.events != nil { rm.events(ctx, &RepoEvent{ From 15004ab6dffb9acf206d2704db367fc128366c83 Mon Sep 17 00:00:00 2001 From: Jaz Volpert Date: Sun, 17 Nov 2024 22:32:31 +0000 Subject: [PATCH 2/4] whoops --- did/multi.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/did/multi.go b/did/multi.go index 112394bee..2c67781fe 100644 --- a/did/multi.go +++ b/did/multi.go @@ -45,9 +45,6 @@ func (mr *MultiResolver) FlushCacheFor(didstr string) { func (mr *MultiResolver) GetDocument(ctx context.Context, didstr string) (*did.Document, error) { s := time.Now() - defer func() { - mrResolveDuration.WithLabelValues(didstr).Observe(time.Since(s).Seconds()) - }() pdid, err := did.ParseDID(didstr) if err != nil { @@ -55,6 +52,9 @@ func (mr *MultiResolver) GetDocument(ctx context.Context, didstr string) (*did.D } method := pdid.Protocol() + defer func() { + mrResolveDuration.WithLabelValues(method).Observe(time.Since(s).Seconds()) + }() res, ok := mr.handlers[method] if !ok { From 3a4136fbc994f7e7311cd4ca367c37bbbe382ed9 Mon Sep 17 00:00:00 2001 From: Jaz Volpert Date: Sun, 17 Nov 2024 22:35:06 +0000 Subject: [PATCH 3/4] Rename series --- did/metrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/did/metrics.go b/did/metrics.go index 67f14e434..d3c8f0ecd 100644 --- a/did/metrics.go +++ b/did/metrics.go @@ -11,7 +11,7 @@ var mrResolvedDidsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ }, []string{"resolver"}) var mrResolveDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Name: "indigo_multiresolver_resolve_duration", + Name: "indigo_multiresolver_resolve_duration_seconds", Help: "A histogram of resolve latencies", Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), }, []string{"resolver"}) From 1eeb49688e356fc29b42da6174593129ab39c677 Mon Sep 17 00:00:00 2001 From: Jaz Volpert Date: Sun, 17 Nov 2024 22:55:19 +0000 Subject: [PATCH 4/4] Track disk vs meta write durations --- carstore/bs.go | 4 ++++ carstore/metrics.go | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 carstore/metrics.go diff --git a/carstore/bs.go b/carstore/bs.go index dac62e5d8..2cf0a5093 100644 --- a/carstore/bs.go +++ b/carstore/bs.go @@ -645,10 +645,12 @@ func (cs *FileCarStore) writeNewShard(ctx context.Context, root cid.Cid, rev str offset += nw } + start := time.Now() path, err := cs.writeNewShardFile(ctx, user, seq, buf.Bytes()) if err != nil { return nil, fmt.Errorf("failed to write shard file: %w", err) } + writeShardFileDuration.Observe(time.Since(start).Seconds()) shard := CarShard{ Root: models.DbCID{CID: root}, @@ -659,9 +661,11 @@ func (cs *FileCarStore) writeNewShard(ctx context.Context, root cid.Cid, rev str Rev: rev, } + start = time.Now() if err := cs.putShard(ctx, &shard, brefs, rmcids, false); err != nil { return nil, err } + writeShardMetadataDuration.Observe(time.Since(start).Seconds()) return buf.Bytes(), nil } diff --git a/carstore/metrics.go b/carstore/metrics.go new file mode 100644 index 000000000..0d2a0794a --- /dev/null +++ b/carstore/metrics.go @@ -0,0 +1,18 @@ +package carstore + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var writeShardFileDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "carstore_write_shard_file_duration", + Help: "Duration of writing shard file to disk", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) + +var writeShardMetadataDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "carstore_write_shard_metadata_duration", + Help: "Duration of writing shard metadata to DB", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +})