From 5e8e9a2c60a3989e48b600cebe524c475d34bed4 Mon Sep 17 00:00:00 2001 From: Yiming Zang <50607998+yzang2019@users.noreply.github.com> Date: Wed, 3 Apr 2024 09:37:45 +0800 Subject: [PATCH] Add metrics for block processing latency (#222) * Add metrics for block processing latency * Update execution.go --- internal/consensus/metrics.gen.go | 9 +++++ internal/consensus/metrics.go | 3 ++ internal/consensus/state.go | 3 +- internal/state/execution.go | 13 +++++-- internal/state/metrics.gen.go | 57 +++++++++++++++++++++++++++---- internal/state/metrics.go | 15 ++++++++ 6 files changed, 90 insertions(+), 10 deletions(-) diff --git a/internal/consensus/metrics.gen.go b/internal/consensus/metrics.gen.go index 40965475a..6ea1680fd 100644 --- a/internal/consensus/metrics.gen.go +++ b/internal/consensus/metrics.gen.go @@ -270,6 +270,14 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "complete_proposal_time", Help: "CompleteProposalTime measures how long it takes between receiving a proposal and finishing processing all of its parts. Note that this means it also includes network latency from block parts gossip", }, labels).With(labelsAndValues...), + ApplyBlockLatency: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "apply_block_latency", + Help: "ApplyBlockLatency measures how long it takes to execute ApplyBlock in finalize commit step", + + Buckets: stdprometheus.ExponentialBucketsRange(0.01, 10, 10), + }, append(labels, "apply_block_ms")).With(labelsAndValues...), } } @@ -315,5 +323,6 @@ func NopMetrics() *Metrics { PrevoteLatency: discard.NewHistogram(), ConsensusTime: discard.NewHistogram(), CompleteProposalTime: discard.NewHistogram(), + ApplyBlockLatency: discard.NewHistogram(), } } diff --git a/internal/consensus/metrics.go b/internal/consensus/metrics.go index b0f3e038d..17c573864 100644 --- a/internal/consensus/metrics.go +++ b/internal/consensus/metrics.go @@ -170,6 +170,9 @@ type Metrics struct { // processing all of its parts. Note that this means it also includes network latency from // block parts gossip CompleteProposalTime metrics.Histogram + + // ApplyBlockLatency measures how long it takes to execute ApplyBlock in finalize commit step + ApplyBlockLatency metrics.Histogram `metrics_labels:"apply_block_ms" metrics_buckettype:"exprange" metrics_bucketsizes:"0.01, 10, 10"` } // RecordConsMetrics uses for recording the block related metrics during fast-sync. diff --git a/internal/consensus/state.go b/internal/consensus/state.go index 636152de6..2a191ece8 100644 --- a/internal/consensus/state.go +++ b/internal/consensus/state.go @@ -2184,7 +2184,7 @@ func (cs *State) finalizeCommit(ctx context.Context, height int64) { // Execute and commit the block, update and save the state, and update the mempool. // NOTE The block.AppHash won't reflect these txs until the next block. - + startTime := time.Now() stateCopy, err := cs.blockExec.ApplyBlock(spanCtx, stateCopy, types.BlockID{ @@ -2194,6 +2194,7 @@ func (cs *State) finalizeCommit(ctx context.Context, height int64) { block, cs.tracer, ) + cs.metrics.ApplyBlockLatency.Observe(float64(time.Since(startTime).Milliseconds())) if err != nil { logger.Error("failed to apply block", "err", err) return diff --git a/internal/state/execution.go b/internal/state/execution.go index 83ccec304..bebac2640 100644 --- a/internal/state/execution.go +++ b/internal/state/execution.go @@ -246,6 +246,7 @@ func (blockExec *BlockExecutor) ApplyBlock( defer finalizeBlockSpan.End() } txs := block.Data.Txs.ToSliceOfBytes() + finalizeBlockStartTime := time.Now() fBlockRes, err := blockExec.appClient.FinalizeBlock( ctx, &abci.RequestFinalizeBlock{ @@ -269,6 +270,7 @@ func (blockExec *BlockExecutor) ApplyBlock( LastResultsHash: block.LastResultsHash, }, ) + blockExec.metrics.FinalizeBlockLatency.Observe(float64(time.Since(finalizeBlockStartTime).Milliseconds())) if finalizeBlockSpan != nil { finalizeBlockSpan.End() } @@ -286,7 +288,9 @@ func (blockExec *BlockExecutor) ApplyBlock( ) // Save the results before we commit. + saveBlockResponseTime := time.Now() err = blockExec.store.SaveFinalizeBlockResponses(block.Height, fBlockRes) + blockExec.metrics.SaveBlockResponseLatency.Observe(float64(time.Since(saveBlockResponseTime).Milliseconds())) if err != nil && !errors.Is(err, ErrNoFinalizeBlockResponsesForHeight{block.Height}) { // It is correct to have an empty ResponseFinalizeBlock for ApplyBlock, // but not for saving it to the state store @@ -340,12 +344,14 @@ func (blockExec *BlockExecutor) ApplyBlock( blockExec.evpool.Update(ctx, state, block.Evidence) // Update the app hash and save the state. + saveBlockTime := time.Now() state.AppHash = fBlockRes.AppHash if err := blockExec.store.Save(state); err != nil { return state, err } - + blockExec.metrics.SaveBlockLatency.Observe(float64(time.Since(saveBlockTime).Milliseconds())) // Prune old heights, if requested by ABCI app. + pruneBlockTime := time.Now() if retainHeight > 0 { pruned, err := blockExec.pruneBlocks(retainHeight) if err != nil { @@ -354,14 +360,15 @@ func (blockExec *BlockExecutor) ApplyBlock( blockExec.logger.Debug("pruned blocks", "pruned", pruned, "retain_height", retainHeight) } } - + blockExec.metrics.PruneBlockLatency.Observe(float64(time.Since(pruneBlockTime).Milliseconds())) // reset the verification cache blockExec.cache = make(map[string]struct{}) // Events are fired after everything else. // NOTE: if we crash between Commit and Save, events wont be fired during replay + fireEventsStartTime := time.Now() FireEvents(blockExec.logger, blockExec.eventBus, block, blockID, fBlockRes, validatorUpdates) - + blockExec.metrics.FireEventsLatency.Observe(float64(time.Since(fireEventsStartTime).Milliseconds())) return state, nil } diff --git a/internal/state/metrics.gen.go b/internal/state/metrics.gen.go index ce550f296..237484ed5 100644 --- a/internal/state/metrics.gen.go +++ b/internal/state/metrics.gen.go @@ -52,16 +52,61 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "update_mempool_time", Help: "UpdateMempoolTime meaures how long it takes to update mempool after commiting, including reCheckTx", }, labels).With(labelsAndValues...), + FinalizeBlockLatency: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "finalize_block_latency", + Help: "FinalizeBlockLatency measures how long it takes to run abci FinalizeBlock", + + Buckets: stdprometheus.ExponentialBucketsRange(0.01, 10, 10), + }, append(labels, "finalize_blocks_ms")).With(labelsAndValues...), + SaveBlockResponseLatency: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "save_block_response_latency", + Help: "SaveBlockResponseLatency measures how long it takes to run save the FinalizeBlockRes", + + Buckets: stdprometheus.ExponentialBucketsRange(0.01, 10, 10), + }, append(labels, "save_block_response_ms")).With(labelsAndValues...), + SaveBlockLatency: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "save_block_latency", + Help: "SaveBlockLatency measure how long it takes to save the block", + + Buckets: stdprometheus.ExponentialBucketsRange(0.01, 10, 10), + }, append(labels, "save_block_ms")).With(labelsAndValues...), + PruneBlockLatency: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "prune_block_latency", + Help: "PruneBlockLatency measures how long it takes to prune block from blockstore", + + Buckets: stdprometheus.ExponentialBucketsRange(0.01, 10, 10), + }, append(labels, "prune_block_ms")).With(labelsAndValues...), + FireEventsLatency: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "fire_events_latency", + Help: "FireEventsLatency measures how long it takes to fire events for indexing", + + Buckets: stdprometheus.ExponentialBucketsRange(0.01, 10, 10), + }, append(labels, "prune_block_ms")).With(labelsAndValues...), } } func NopMetrics() *Metrics { return &Metrics{ - BlockProcessingTime: discard.NewHistogram(), - ConsensusParamUpdates: discard.NewCounter(), - ValidatorSetUpdates: discard.NewCounter(), - FlushAppConnectionTime: discard.NewHistogram(), - ApplicationCommitTime: discard.NewHistogram(), - UpdateMempoolTime: discard.NewHistogram(), + BlockProcessingTime: discard.NewHistogram(), + ConsensusParamUpdates: discard.NewCounter(), + ValidatorSetUpdates: discard.NewCounter(), + FlushAppConnectionTime: discard.NewHistogram(), + ApplicationCommitTime: discard.NewHistogram(), + UpdateMempoolTime: discard.NewHistogram(), + FinalizeBlockLatency: discard.NewHistogram(), + SaveBlockResponseLatency: discard.NewHistogram(), + SaveBlockLatency: discard.NewHistogram(), + PruneBlockLatency: discard.NewHistogram(), + FireEventsLatency: discard.NewHistogram(), } } diff --git a/internal/state/metrics.go b/internal/state/metrics.go index 301889343..e6b47c726 100644 --- a/internal/state/metrics.go +++ b/internal/state/metrics.go @@ -37,4 +37,19 @@ type Metrics struct { // UpdateMempoolTime meaures how long it takes to update mempool after commiting, including // reCheckTx UpdateMempoolTime metrics.Histogram + + // FinalizeBlockLatency measures how long it takes to run abci FinalizeBlock + FinalizeBlockLatency metrics.Histogram `metrics_labels:"finalize_blocks_ms" metrics_buckettype:"exprange" metrics_bucketsizes:"0.01, 10, 10"` + + // SaveBlockResponseLatency measures how long it takes to run save the FinalizeBlockRes + SaveBlockResponseLatency metrics.Histogram `metrics_labels:"save_block_response_ms" metrics_buckettype:"exprange" metrics_bucketsizes:"0.01, 10, 10"` + + // SaveBlockLatency measure how long it takes to save the block + SaveBlockLatency metrics.Histogram `metrics_labels:"save_block_ms" metrics_buckettype:"exprange" metrics_bucketsizes:"0.01, 10, 10"` + + // PruneBlockLatency measures how long it takes to prune block from blockstore + PruneBlockLatency metrics.Histogram `metrics_labels:"prune_block_ms" metrics_buckettype:"exprange" metrics_bucketsizes:"0.01, 10, 10"` + + // FireEventsLatency measures how long it takes to fire events for indexing + FireEventsLatency metrics.Histogram `metrics_labels:"prune_block_ms" metrics_buckettype:"exprange" metrics_bucketsizes:"0.01, 10, 10"` }