From 81f2957f7d8f19e2afead1e35a9d2eff969f7428 Mon Sep 17 00:00:00 2001 From: Eric Ghildyal Date: Wed, 20 Nov 2024 13:16:31 -0500 Subject: [PATCH] Address most of the PR comments --- src/adapters/monitors/cloudwatch.rs | 101 +++++++++++++++++----------- 1 file changed, 60 insertions(+), 41 deletions(-) diff --git a/src/adapters/monitors/cloudwatch.rs b/src/adapters/monitors/cloudwatch.rs index cf1a79b..a8815a6 100644 --- a/src/adapters/monitors/cloudwatch.rs +++ b/src/adapters/monitors/cloudwatch.rs @@ -36,6 +36,7 @@ pub enum ApiMetric { impl ApiMetric { pub fn as_str(&self) -> &'static str { match self { + // Note: These MUST match the AWS metric group names ApiMetric::Count => "Count", ApiMetric::Error4XX => "4XXError", ApiMetric::Error5XX => "5XXError", @@ -43,8 +44,20 @@ impl ApiMetric { } } impl CloudWatch { + // The default name AWS currently uses for canary stages in APIGs + const CANARY_STAGE_SUFFIX: &'static str = "/Canary"; + + fn get_stage_name(stage_name: &str, group: Group) -> String { + // Since AWS standardizes the naming of the canary stage, we can just hard-code it + if group == Group::Experimental { + stage_name.to_owned() + Self::CANARY_STAGE_SUFFIX + } else { + stage_name.to_string() + } + } + async fn query_cloudwatch( - &mut self, + &self, metric_name: ApiMetric, api_gateway_name: &str, stage_name: &str, @@ -52,26 +65,24 @@ impl CloudWatch { start: DateTime, end: DateTime, ) -> u32 { - // Since AWS standardizes the naming of the canary stage, we can just hard-code it - let stage = if group == Group::Experimental { - stage_name.to_owned() + "/Canary" - } else { - stage_name.to_string() - }; - // Builds a query that: // 1. Queries a specific API Gateway by name and stage, // 2. for Count, 4xxErrors, or 5xxErrors values, // 3. As a sum // 4. Over a 60s period // 5. Over the given window (5 mins, by default) + + // We use the debug format here since AWS doesn't allow metrics IDs to start with numbers + let aws_metric_id = format!("{:?}", metric_name).to_lowercase(); + let query = MetricDataQuery::builder() - .id(format!("{:?}", metric_name).to_lowercase()) + .id(aws_metric_id) .metric_stat( MetricStat::builder() .metric( Metric::builder() .namespace("AWS/ApiGateway") + // Use the metrics name as_str to grab the AWS group names .metric_name(metric_name.as_str()) .dimensions( Dimension::builder() @@ -79,7 +90,12 @@ impl CloudWatch { .value(api_gateway_name) .build(), ) - .dimensions(Dimension::builder().name("Stage").value(stage).build()) + .dimensions( + Dimension::builder() + .name("Stage") + .value(Self::get_stage_name(stage_name, group)) + .build(), + ) .build(), ) .period(60) @@ -122,46 +138,49 @@ impl Monitor for CloudWatch { type Item = CategoricalObservation<5, ResponseStatusCode>; async fn query(&mut self, group: Group) -> Vec { + // This function queries the metrics that we care most about (2xx, 4xx, and 5xx errors), + // compiles them into a list, then generates the correct number of + // CategoricalObservations for each response code let now: DateTime = Utc::now(); let five_mins_ago: DateTime = now - Duration::minutes(5); - let count = self - .query_cloudwatch( - ApiMetric::Count, - "Releases", - "prod", - group, - now, - five_mins_ago, - ) - .await; + let count_future = self.query_cloudwatch( + ApiMetric::Count, + "Releases", + "prod", + group, + now, + five_mins_ago, + ); - let error4xx = self - .query_cloudwatch( - ApiMetric::Error4XX, - "Releases", - "prod", - group, - now, - five_mins_ago, - ) - .await; + let error4xx_future = self.query_cloudwatch( + ApiMetric::Error4XX, + "Releases", + "prod", + group, + now, + five_mins_ago, + ); - let error5xx = self - .query_cloudwatch( - ApiMetric::Error5XX, - "Releases", - "prod", - group, - now, - five_mins_ago, - ) - .await; + let error5xx_future = self.query_cloudwatch( + ApiMetric::Error5XX, + "Releases", + "prod", + group, + now, + five_mins_ago, + ); + + let (count, error4xx, error5xx) = + tokio::join!(count_future, error4xx_future, error5xx_future); let mut observations = vec![]; - let resp2xx = (error4xx + error4xx) - count; + let resp2xx = (error4xx + error5xx) - count; + // Since we need a CategoricalObservation for each instance of a response code + // but AWS only returns us a total count, we need to make our own + // list of observations, 1 per counted item observations.extend( std::iter::repeat(CategoricalObservation { group: group.clone(),