diff --git a/Cargo.lock b/Cargo.lock index 72c506e721..8713e4b0ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4170,6 +4170,8 @@ dependencies = [ "model", "num", "observe", + "prometheus", + "prometheus-metric-storage", "reqwest", "serde", "serde_json", diff --git a/crates/driver/src/infra/api/routes/metrics.rs b/crates/driver/src/infra/api/routes/metrics.rs index 7c8af034a2..ae81919f80 100644 --- a/crates/driver/src/infra/api/routes/metrics.rs +++ b/crates/driver/src/infra/api/routes/metrics.rs @@ -1,13 +1,8 @@ -use prometheus::Encoder; - pub(in crate::infra::api) fn metrics(app: axum::Router<()>) -> axum::Router<()> { app.route("/metrics", axum::routing::get(route)) } async fn route() -> String { let registry = observe::metrics::get_registry(); - let encoder = prometheus::TextEncoder::new(); - let mut buffer = Vec::new(); - encoder.encode(®istry.gather(), &mut buffer).unwrap(); - String::from_utf8(buffer).unwrap() + observe::metrics::encode(registry) } diff --git a/crates/observe/src/metrics.rs b/crates/observe/src/metrics.rs index f645815750..4ab8429afc 100644 --- a/crates/observe/src/metrics.rs +++ b/crates/observe/src/metrics.rs @@ -1,4 +1,4 @@ -use {once_cell::sync::OnceCell, std::collections::HashMap}; +use {once_cell::sync::OnceCell, prometheus::Encoder, std::collections::HashMap}; /// Global metrics registry used by all components. static REGISTRY: OnceCell = OnceCell::new(); @@ -51,3 +51,10 @@ pub fn get_registry() -> &'static prometheus::Registry { pub fn get_storage_registry() -> &'static prometheus_metric_storage::StorageRegistry { REGISTRY.get_or_init(prometheus_metric_storage::StorageRegistry::default) } + +pub fn encode(registry: &prometheus::Registry) -> String { + let encoder = prometheus::TextEncoder::new(); + let mut buffer = Vec::new(); + encoder.encode(®istry.gather(), &mut buffer).unwrap(); + String::from_utf8(buffer).unwrap() +} diff --git a/crates/shared/src/metrics.rs b/crates/shared/src/metrics.rs index a36bc8fe55..1071a5bead 100644 --- a/crates/shared/src/metrics.rs +++ b/crates/shared/src/metrics.rs @@ -1,5 +1,4 @@ use { - prometheus::Encoder, std::{convert::Infallible, net::SocketAddr, sync::Arc}, tokio::task::{self, JoinHandle}, warp::{Filter, Rejection, Reply}, @@ -21,20 +20,7 @@ pub fn serve_metrics(liveness: Arc, address: SocketAddr) - // `/metrics` route exposing encoded prometheus data to monitoring system pub fn handle_metrics() -> impl Filter + Clone { let registry = observe::metrics::get_registry(); - warp::path("metrics").map(move || { - let encoder = prometheus::TextEncoder::new(); - let mut buffer = Vec::new(); - if let Err(e) = encoder.encode(®istry.gather(), &mut buffer) { - tracing::error!("could not encode metrics: {}", e); - }; - match String::from_utf8(buffer) { - Ok(v) => v, - Err(e) => { - tracing::error!("metrics could not be from_utf8'd: {}", e); - String::default() - } - } - }) + warp::path("metrics").map(move || observe::metrics::encode(registry)) } fn handle_liveness( diff --git a/crates/solvers/Cargo.toml b/crates/solvers/Cargo.toml index c34b389d50..70701fb9b6 100644 --- a/crates/solvers/Cargo.toml +++ b/crates/solvers/Cargo.toml @@ -24,6 +24,8 @@ hex = "0.4" hyper = "0.14" itertools = "0.11" num = "0.4" +prometheus = { workspace = true } +prometheus-metric-storage = { workspace = true } reqwest = "0.11" serde = "1" serde_json = "1" diff --git a/crates/solvers/src/api/mod.rs b/crates/solvers/src/api/mod.rs index 12e8619592..062e79d93d 100644 --- a/crates/solvers/src/api/mod.rs +++ b/crates/solvers/src/api/mod.rs @@ -20,6 +20,7 @@ impl Api { shutdown: impl Future + Send + 'static, ) -> Result<(), hyper::Error> { let app = axum::Router::new() + .route("/metrics", axum::routing::get(routes::metrics)) .route("/healthz", axum::routing::get(routes::healthz)) .route("/solve", axum::routing::post(routes::solve)) .route("/notify", axum::routing::post(routes::notify)) diff --git a/crates/solvers/src/api/routes/metrics.rs b/crates/solvers/src/api/routes/metrics.rs new file mode 100644 index 0000000000..3cca5114c0 --- /dev/null +++ b/crates/solvers/src/api/routes/metrics.rs @@ -0,0 +1,4 @@ +pub async fn metrics() -> String { + let registry = observe::metrics::get_registry(); + observe::metrics::encode(registry) +} diff --git a/crates/solvers/src/api/routes/mod.rs b/crates/solvers/src/api/routes/mod.rs index 0628449625..39ab3d4f4f 100644 --- a/crates/solvers/src/api/routes/mod.rs +++ b/crates/solvers/src/api/routes/mod.rs @@ -1,10 +1,11 @@ use serde::Serialize; mod healthz; +mod metrics; mod notify; mod solve; -pub(super) use {healthz::healthz, notify::notify, solve::solve}; +pub(super) use {healthz::healthz, metrics::metrics, notify::notify, solve::solve}; #[derive(Debug, Serialize)] #[serde(untagged)] diff --git a/crates/solvers/src/domain/solver/dex/mod.rs b/crates/solvers/src/domain/solver/dex/mod.rs index b27f961337..014c7e1b2d 100644 --- a/crates/solvers/src/domain/solver/dex/mod.rs +++ b/crates/solvers/src/domain/solver/dex/mod.rs @@ -106,6 +106,7 @@ impl Dex { gas_price: auction::GasPrice, ) -> Option { let dex_err_handler = |err: infra::dex::Error| { + infra::metrics::solve_error(err.format_variant()); match &err { err @ infra::dex::Error::NotFound => { if order.partially_fillable { diff --git a/crates/solvers/src/domain/solver/mod.rs b/crates/solvers/src/domain/solver/mod.rs index 59af3d97a7..c6140adca0 100644 --- a/crates/solvers/src/domain/solver/mod.rs +++ b/crates/solvers/src/domain/solver/mod.rs @@ -1,4 +1,7 @@ -use crate::domain::{auction, notification, solution}; +use crate::{ + domain::{auction, notification, solution}, + infra::metrics, +}; pub mod baseline; pub mod dex; @@ -19,12 +22,16 @@ impl Solver { /// returning multiple solutions to later merge multiple non-overlapping /// solutions to get one big more gas efficient solution. pub async fn solve(&self, auction: auction::Auction) -> Vec { - match self { + metrics::solve(&auction); + let deadline = auction.deadline.clone(); + let solutions = match self { Solver::Baseline(solver) => solver.solve(auction).await, Solver::Naive(solver) => solver.solve(auction).await, Solver::Legacy(solver) => solver.solve(auction).await, Solver::Dex(solver) => solver.solve(auction).await, - } + }; + metrics::solved(&deadline, &solutions); + solutions } /// Notifies the solver about important events. Some of those events are diff --git a/crates/solvers/src/infra/dex/mod.rs b/crates/solvers/src/infra/dex/mod.rs index 4f2a35bb9e..b74d397894 100644 --- a/crates/solvers/src/infra/dex/mod.rs +++ b/crates/solvers/src/infra/dex/mod.rs @@ -52,6 +52,18 @@ pub enum Error { Other(Box), } +impl Error { + /// for instrumentization purposes + pub fn format_variant(&self) -> &'static str { + match self { + Self::OrderNotSupported => "OrderNotSupported", + Self::NotFound => "NotFound", + Self::RateLimited => "RateLimited", + Self::Other(_) => "Other", + } + } +} + impl From for Error { fn from(err: balancer::Error) -> Self { match err { diff --git a/crates/solvers/src/infra/metrics.rs b/crates/solvers/src/infra/metrics.rs new file mode 100644 index 0000000000..34cf194c1f --- /dev/null +++ b/crates/solvers/src/infra/metrics.rs @@ -0,0 +1,53 @@ +use crate::domain::{auction, solution}; + +/// Metrics for the solver engine. +#[derive(Debug, Clone, prometheus_metric_storage::MetricStorage)] +#[metric(subsystem = "solver_engine")] +struct Metrics { + /// The amount of time this solver engine has for solving. + #[metric(buckets(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))] + time_limit: prometheus::Histogram, + + /// The amount of time this solver engine has left when it finished solving. + #[metric(buckets(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))] + remaining_time: prometheus::Histogram, + + /// Errors that occurred during solving. + #[metric(labels("reason"))] + solve_errors: prometheus::IntCounterVec, + + /// The number of solutions that were found. + solutions: prometheus::IntCounter, +} + +/// Setup the metrics registry. +pub fn init() { + observe::metrics::setup_registry_reentrant(Some("solver-engine".to_owned()), None); +} + +pub fn solve(auction: &auction::Auction) { + get().time_limit.observe( + auction + .deadline + .remaining() + .unwrap_or_default() + .as_secs_f64(), + ); +} + +pub fn solved(deadline: &auction::Deadline, solutions: &[solution::Solution]) { + get() + .remaining_time + .observe(deadline.remaining().unwrap_or_default().as_secs_f64()); + get().solutions.inc_by(solutions.len() as u64); +} + +pub fn solve_error(reason: &str) { + get().solve_errors.with_label_values(&[reason]).inc(); +} + +/// Get the metrics instance. +fn get() -> &'static Metrics { + Metrics::instance(observe::metrics::get_storage_registry()) + .expect("unexpected error getting metrics instance") +} diff --git a/crates/solvers/src/infra/mod.rs b/crates/solvers/src/infra/mod.rs index e938aba499..e3585bfc7e 100644 --- a/crates/solvers/src/infra/mod.rs +++ b/crates/solvers/src/infra/mod.rs @@ -3,3 +3,4 @@ pub mod cli; pub mod config; pub mod contracts; pub mod dex; +pub mod metrics;