Merge pull request #27 from wack/robbie/multi-297

Model and Implement "Stage" Concept
wack · Nov 16, 2024 · b536f4f · b536f4f
2 parents ad00c7a + 7456f57
commit b536f4f
Show file tree

Hide file tree

Showing 7 changed files with 252 additions and 0 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -26,6 +26,7 @@ futures-util = "0.3.31"
 # directories = "5.0"
 # indexmap = { version = "2.1.0", features = ["serde"] }
 miette = { version = "7", features = ["fancy"] }
+nutype = "0.5.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 statrs = "0.17.1"

diff --git a/src/pipeline/mod.rs b/src/pipeline/mod.rs
@@ -65,3 +65,6 @@ impl Pipeline {
 
 #[cfg(test)]
 use crate::adapters::{AlwaysPromote, MockIngress};
+
+mod percent;
+mod stages;
diff --git a/src/pipeline/percent.rs b/src/pipeline/percent.rs
@@ -0,0 +1,55 @@
+use nutype::nutype;
+
+/// Percentages are whole numbers in the range 0...100 inclusive.
+// Using a newtype allows us to ensure they're correct by construction.
+#[nutype(
+    validate(less_or_equal = 100.0),
+    derive(Debug, Display, Copy, Clone, PartialEq, TryFrom, Into)
+)]
+pub(super) struct DecimalPercent(f64);
+
+impl DecimalPercent {
+    /// returns "the rest" of the whole. That is, `100 - this value`.
+    pub(super) fn inverse(self) -> Self {
+        let val = f64::from(self);
+        Self::try_from(100.0 - val).unwrap()
+    }
+}
+
+/// Percentages are whole numbers in the range 0...100 inclusive.
+// Using a newtype allows us to ensure they're correct by construction.
+#[nutype(
+    validate(less_or_equal = 100),
+    derive(Debug, Display, Copy, Clone, PartialEq, Eq, TryFrom, Into)
+)]
+pub(super) struct WholePercent(u8);
+
+impl WholePercent {
+    /// returns "the rest" of the whole. That is, `100 - this value`.
+    pub(super) fn inverse(self) -> Self {
+        let val = u8::from(self);
+        Self::try_from(100 - val).unwrap()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::WholePercent;
+
+    #[test]
+    fn inverse_percent() {
+        let test_cases = [(50, 50), (20, 80), (35, 65), (1, 99), (99, 1), (100, 0)]
+            .into_iter()
+            .map(|(left, right)| {
+                (
+                    WholePercent::try_new(left).unwrap(),
+                    WholePercent::try_new(right).unwrap(),
+                )
+            });
+        for (input, expected_out) in test_cases {
+            let observed_out = input.inverse();
+            assert_eq!(observed_out, expected_out);
+            assert_eq!(observed_out.inverse(), input);
+        }
+    }
+}
diff --git a/src/pipeline/stages/config.rs b/src/pipeline/stages/config.rs
@@ -0,0 +1,62 @@
+use tokio::time::Duration;
+
+use crate::pipeline::percent::{DecimalPercent, WholePercent};
+
+use super::details::StageDetails;
+
+const DEFAULT_STAGE_TIMEOUT: Duration = Duration::from_secs(5 * 60);
+
+/// A description of the stages in this deployment pipeline.
+pub struct StageConfig {
+    current_stage: usize,
+    stages: Vec<StageDetails>,
+}
+
+impl StageConfig {
+    pub fn advance(&mut self) -> Option<&StageDetails> {
+        // Edge case: we advance more than once beyond the end
+        // of the vector. Use the max function to ensure we never
+        // go too far afield from the edge of the vector in case
+        // we have to rollback by one stage.
+        self.current_stage = std::cmp::max(self.current_stage + 1, self.stages.len());
+        self.current()
+    }
+
+    fn current(&mut self) -> Option<&StageDetails> {
+        self.stages.get(self.current_stage)
+    }
+}
+
+/// This default implementation is what we start with
+/// when no user edits are made.
+impl Default for StageConfig {
+    fn default() -> Self {
+        let current_stage = 0;
+        let mut stages = vec![];
+        // Declare the amounts of traffic the canary should get
+        // at each stage.
+        let stage_traffics = [5, 20, 40, 60]
+            .into_iter()
+            .map(|val| WholePercent::try_new(val).unwrap());
+        // Declare the amount of confidence we need to have in
+        // the badness of the deploy before we rollback.
+        let stage_confidence = [99.0, 95.0, 90.0, 90.0]
+            .into_iter()
+            .map(|val| DecimalPercent::try_new(val).unwrap());
+        // Create the stages one at a time, adding them to the array.
+        let stage_descriptions = stage_traffics.zip(stage_confidence);
+        for (traffic, confidence) in stage_descriptions {
+            let stage = StageDetails::builder()
+                .confidence(confidence)
+                .canary_traffic(traffic)
+                .timeout(DEFAULT_STAGE_TIMEOUT)
+                .build();
+            stages.push(stage);
+        }
+        // Return the result.
+        Self {
+            current_stage,
+            stages,
+        }
+    }
+}
diff --git a/src/pipeline/stages/details.rs b/src/pipeline/stages/details.rs
@@ -0,0 +1,54 @@
+use bon::bon;
+
+use crate::pipeline::percent::{DecimalPercent, WholePercent};
+
+use super::TimeoutBehavior;
+
+/// The description of a single stage.
+pub struct StageDetails {
+    /// The percentage of traffic directed to the canary.
+    /// The baseline gets the rest of the traffic.
+    canary_traffic: WholePercent,
+    /// The upper bound at which we detect failure and rollback.
+    /// For example, the value `99` means if we're ever 99% confident
+    /// the deployment is bad, then we rollback.
+    badness_confidence_limit: DecimalPercent,
+    /// The amount of time this stage has before it times out.
+    timeout: tokio::time::Duration,
+    timeout_behavior: TimeoutBehavior,
+}
+
+impl StageDetails {
+    pub(crate) fn canary_traffic(&self) -> u8 {
+        self.canary_traffic.into()
+    }
+
+    pub(crate) fn badness_confidence_limit(&self) -> f64 {
+        self.badness_confidence_limit.into()
+    }
+
+    pub(crate) fn timeout(&self) -> tokio::time::Duration {
+        self.timeout
+    }
+
+    pub(crate) fn timeout_behavior(&self) -> TimeoutBehavior {
+        self.timeout_behavior
+    }
+}
+
+#[bon]
+impl StageDetails {
+    #[builder]
+    pub fn new(
+        canary_traffic: WholePercent,
+        confidence: DecimalPercent,
+        timeout: tokio::time::Duration,
+    ) -> Self {
+        Self {
+            canary_traffic,
+            badness_confidence_limit: confidence,
+            timeout,
+            timeout_behavior: TimeoutBehavior::Advance,
+        }
+    }
+}
diff --git a/src/pipeline/stages/mod.rs b/src/pipeline/stages/mod.rs
@@ -0,0 +1,17 @@
+mod config;
+mod details;
+
+/// TimeoutBehavior describes what happens when the
+/// stage times out. For now, we always advance to the next
+/// stage (an aggressive philosophy), but in the future you
+/// could imagine a more conservative risk profile that
+/// rolls back unless you have a certain amount of confidence
+/// that the deployment is safe (the opposite of our current measure
+/// of confidence, which only tells us how confident we are that
+/// the deployment *isn't* safe).
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum TimeoutBehavior {
+    /// If we don't have confidence that the deployment is bad,
+    /// we advance to the next stage.
+    Advance,
+}