Skip to content

Commit

Permalink
feat(telemetry): add metrics for query plan warmup and schema loading (
Browse files Browse the repository at this point in the history
…#3807)

It adds histogram metrics for
`apollo_router_query_planning_warmup_duration` and
`apollo_router_schema_load_duration`.

Example in Prometheus:

```
# HELP apollo_router_query_planning_warmup_duration apollo_router_query_planning_warmup_duration
# TYPE apollo_router_query_planning_warmup_duration histogram
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.05"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.1"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.25"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.5"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="1"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="2.5"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="5"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="10"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="20"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="100"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="1000"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="+Inf"} 1
apollo_router_query_planning_warmup_duration_sum{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version=""} 0.022390619
apollo_router_query_planning_warmup_duration_count{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version=""} 1
# HELP apollo_router_schema_load_duration apollo_router_schema_load_duration
# TYPE apollo_router_schema_load_duration histogram
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.05"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.1"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.25"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.5"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="1"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="2.5"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="5"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="10"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="20"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="100"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="1000"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="+Inf"} 8
apollo_router_schema_load_duration_sum{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version=""} 0.023486205999999996
apollo_router_schema_load_duration_count{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version=""} 8
```

Fixes #3767


---------

Signed-off-by: Benjamin Coenen <[email protected]>
  • Loading branch information
bnjjj authored Sep 15, 2023
1 parent 0bf4440 commit f831f68
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 3 deletions.
42 changes: 42 additions & 0 deletions .changesets/feat_bnjjj_fix_3767.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
### feat(telemetry): add metrics for query plan warmup and schema load ([Issue #3767](https://github.com/apollographql/router/issues/3767))

It adds histogram metrics for `apollo_router_query_planning_warmup_duration` and `apollo_router_schema_load_duration`.

Example in Prometheus:

```
# HELP apollo_router_query_planning_warmup_duration apollo_router_query_planning_warmup_duration
# TYPE apollo_router_query_planning_warmup_duration histogram
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.05"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.1"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.25"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.5"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="1"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="2.5"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="5"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="10"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="20"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="100"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="1000"} 1
apollo_router_query_planning_warmup_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="+Inf"} 1
apollo_router_query_planning_warmup_duration_sum{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version=""} 0.022390619
apollo_router_query_planning_warmup_duration_count{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version=""} 1
# HELP apollo_router_schema_load_duration apollo_router_schema_load_duration
# TYPE apollo_router_schema_load_duration histogram
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.05"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.1"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.25"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="0.5"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="1"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="2.5"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="5"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="10"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="20"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="100"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="1000"} 8
apollo_router_schema_load_duration_bucket{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version="",le="+Inf"} 8
apollo_router_schema_load_duration_sum{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version=""} 0.023486205999999996
apollo_router_schema_load_duration_count{service_name="apollo-router",otel_scope_name="apollo/router",otel_scope_version=""} 8
```

By [@bnjjj](https://github.com/bnjjj) in https://github.com/apollographql/router/pull/3807
6 changes: 4 additions & 2 deletions apollo-router/src/plugins/telemetry/metrics/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ impl<T: MeterProvider> FilterMeterProvider<T> {
FilterMeterProvider::builder()
.delegate(delegate)
.allow(
Regex::new(r"apollo\.(graphos\.cloud|router\.(operations?|config))(\..*|$)")
.expect("regex should have been valid"),
Regex::new(
r"apollo\.(graphos\.cloud|router\.(operations?|config|schema|query))(\..*|$)",
)
.expect("regex should have been valid"),
)
.build()
}
Expand Down
38 changes: 38 additions & 0 deletions apollo-router/src/plugins/telemetry/utils.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
use std::time::Duration;
use std::time::Instant;

use tracing_core::field::Value;

pub(crate) trait TracingUtils {
Expand All @@ -13,3 +16,38 @@ impl TracingUtils for bool {
}
}
}

/// Timer implementing Drop to automatically compute the duration between the moment it has been created until it's dropped
///```ignore
/// Timer::new(|duration| {
/// tracing::info!(histogram.apollo_router_test = duration.as_secs_f64());
/// })
/// ```
pub(crate) struct Timer<F>
where
F: FnOnce(Duration),
{
start: Instant,
f: Option<F>,
}

impl<F> Timer<F>
where
F: FnOnce(Duration),
{
pub(crate) fn new(f: F) -> Self {
Self {
start: Instant::now(),
f: f.into(),
}
}
}

impl<F> Drop for Timer<F>
where
F: FnOnce(Duration),
{
fn drop(&mut self) {
self.f.take().expect("f must exist")(self.start.elapsed())
}
}
2 changes: 1 addition & 1 deletion apollo-router/src/query_planner/bridge_query_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ impl Service<QueryPlannerRequest> for BridgeQueryPlanner {
)
.await;
let duration = start.elapsed().as_secs_f64();
tracing::info!(histogram.apollo_router_query_planning_time = duration,);
tracing::info!(histogram.apollo_router_query_planning_time = duration);

match res {
Ok(query_planner_content) => Ok(QueryPlannerResponse::builder()
Expand Down
6 changes: 6 additions & 0 deletions apollo-router/src/query_planner/caching_query_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use crate::error::CacheResolverError;
use crate::error::QueryPlannerError;
use crate::plugins::authorization::AuthorizationPlugin;
use crate::plugins::authorization::CacheKeyMetadata;
use crate::plugins::telemetry::utils::Timer;
use crate::query_planner::labeler::add_defer_labels;
use crate::query_planner::BridgeQueryPlanner;
use crate::query_planner::QueryPlanResult;
Expand Down Expand Up @@ -107,6 +108,11 @@ where
query_analysis: &QueryAnalysisLayer,
cache_keys: Vec<WarmUpCachingQueryKey>,
) {
let _timer = Timer::new(|duration| {
::tracing::info!(
histogram.apollo.router.query.planning.warmup.duration = duration.as_secs_f64()
);
});
let schema_id = self.schema.schema_id.clone();

let mut service = ServiceBuilder::new().service(
Expand Down
5 changes: 5 additions & 0 deletions apollo-router/src/spec/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use std::time::Instant;

use apollo_compiler::diagnostics::ApolloDiagnostic;
use apollo_compiler::ApolloCompiler;
Expand Down Expand Up @@ -61,6 +62,7 @@ impl Schema {
}

pub(crate) fn parse(sdl: &str, configuration: &Configuration) -> Result<Self, SchemaError> {
let start = Instant::now();
let mut compiler = ApolloCompiler::new();
let id = compiler.add_type_system(sdl, "schema.graphql");

Expand Down Expand Up @@ -129,6 +131,9 @@ impl Schema {
let mut hasher = Sha256::new();
hasher.update(sdl.as_bytes());
let schema_id = Some(format!("{:x}", hasher.finalize()));
tracing::info!(
histogram.apollo.router.schema.load.duration = start.elapsed().as_secs_f64()
);

Ok(Schema {
raw_sdl: Arc::new(sdl.to_string()),
Expand Down
3 changes: 3 additions & 0 deletions docs/source/configuration/metrics.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ The coprocessor operations metric has the following attributes:

- `apollo_router_processing_time` - Time spent processing a request (outside of waiting for external or subgraph requests) in seconds.
- `apollo_router_query_planning_time` - Time spent planning queries in seconds.
- `apollo_router_query_planning_warmup_duration` - Time spent planning queries in seconds.
- `apollo_router_schema_load_duration` - Time spent loading the schema in seconds.

#### Uplink

Expand All @@ -121,6 +123,7 @@ The coprocessor operations metric has the following attributes:
Note that the initial call to uplink during router startup will not be reflected in metrics.

#### Subscription

- `apollo_router_opened_subscriptions` - Number of different opened subscriptions (not the number of clients with an opened subscriptions in case it's deduplicated)
- `apollo_router_deduplicated_subscriptions_total` - Number of subscriptions that has been deduplicated
- `apollo_router_skipped_event_count` - Number of subscription events that has been skipped because too many events have been received from the subgraph but not yet sent to the client.
Expand Down

0 comments on commit f831f68

Please sign in to comment.