Merge pull request #19 from wack/robbie/re-enable-tests

Test new ContingencyTable
wack · Nov 8, 2024 · 1787b98 · 1787b98
2 parents 9b99446 + a0abc1a
commit 1787b98
Show file tree

Hide file tree

Showing 10 changed files with 256 additions and 245 deletions.
diff --git a/src/adapters/engines/chi.rs b/src/adapters/engines/chi.rs
@@ -1,6 +1,6 @@
 use crate::{
     metrics::ResponseStatusCode,
-    stats::{CategoricalObservation, EmpiricalTable, ExpectationTable, Group},
+    stats::{CategoricalObservation, ContingencyTable, Group},
 };
 
 use super::DecisionEngine;
@@ -9,8 +9,7 @@ use super::DecisionEngine;
 /// significance test to determine whether the canary should be promoted or not.
 #[derive(Default)]
 pub struct ChiSquareEngine {
-    control_data: ExpectationTable<5, ResponseStatusCode>,
-    experimental_data: EmpiricalTable<5, ResponseStatusCode>,
+    table: ContingencyTable<5, ResponseStatusCode>,
 }
 
 impl ChiSquareEngine {
@@ -27,11 +26,11 @@ impl DecisionEngine<CategoricalObservation<5, ResponseStatusCode>> for ChiSquare
         match observation.group {
             Group::Control => {
                 // • Increment the number of observations for this category.
-                self.control_data.increment(&observation.outcome);
+                self.table.increment_expected(&observation.outcome, 1);
             }
             Group::Experimental => {
                 // • Increment the number of observations in the canary contingency table.
-                self.experimental_data.increment(observation.outcome);
+                self.table.increment_observed(&observation.outcome, 1);
             }
         }
     }

diff --git a/src/stats/chi.rs b/src/stats/chi.rs
@@ -1,29 +1,13 @@
-use std::num::NonZeroU64;
+use std::num::NonZeroUsize;
 
 use statrs::distribution::{ChiSquared, ContinuousCDF};
 
-use super::Categorical;
-
-trait ContingencyTable<const N: usize, C: Categorical<N>> {
-    fn expected(&self, index: usize) -> u32;
-    fn observed(&self, index: usize) -> u32;
-
-    /// returns the number of degrees of freedom for this table.
-    /// This is typically the number of categories minus one.
-    /// # Panics
-    /// This method panics if `N` is less than 2.
-    fn degrees_of_freedom(&self) -> NonZeroU64 {
-        if N < 2 {
-            panic!("The experiment must have at least two groups. Only {N} groups provided");
-        }
-        NonZeroU64::new(N as u64 - 1).unwrap()
-    }
-}
+use super::{Categorical, ContingencyTable};
 
 /// Alpha represents the alpha cutoff, expressed as a floating point from [0, 1] inclusive.
 /// For example, 0.95 is the standard 5% confidency interval.
 fn chi_square_test<const N: usize, C: Categorical<N>>(
-    table: &impl ContingencyTable<N, C>,
+    table: &ContingencyTable<N, C>,
     alpha: f64,
 ) -> bool {
     assert!(alpha < 1.0);
@@ -33,12 +17,12 @@ fn chi_square_test<const N: usize, C: Categorical<N>>(
 }
 
 // calculate the chi square test statistic using the provided contingency tables.
-fn test_statistic<const N: usize, C: Categorical<N>>(table: &impl ContingencyTable<N, C>) -> f64 {
+fn test_statistic<const N: usize, C: Categorical<N>>(table: &ContingencyTable<N, C>) -> f64 {
     let mut sum = 0.0;
     // For each category, we calculate the square error between the expected and observed groups.
     for i in 0..N {
-        let expected_count = table.expected(i) as i64;
-        let observed_count = table.observed(i) as i64;
+        let expected_count = table.expected_by_index(i) as i64;
+        let observed_count = table.observed_by_index(i) as i64;
         let diff = observed_count - expected_count;
         let error = diff.pow(2) as f64;
         let incremental_error = error / (expected_count as f64);
@@ -47,70 +31,43 @@ fn test_statistic<const N: usize, C: Categorical<N>>(table: &impl ContingencyTab
     sum
 }
 
-fn p_value(test_statistic: f64, degrees_of_freedom: NonZeroU64) -> f64 {
-    let freedom = u64::from(degrees_of_freedom) as f64;
+/// calculates the p-value given the test statistic and the degrees of freedom.
+/// This is determined by the area of the Chi Square distribution (which is a special
+/// case of the gamma distribution).
+fn p_value(test_statistic: f64, degrees_of_freedom: NonZeroUsize) -> f64 {
+    let freedom = usize::from(degrees_of_freedom) as f64;
     let distribution = ChiSquared::new(freedom).expect("Degrees of freedom must be >= 0");
     1.0 - distribution.cdf(test_statistic)
 }
 
 #[cfg(test)]
 mod tests {
 
-    use super::ContingencyTable;
-    use static_assertions::assert_obj_safe;
+    use std::num::NonZeroUsize;
 
-    // Require the contingency table is object-safe for certain commonly used categories.
-    assert_obj_safe!(ContingencyTable<5, String>);
-
-    // TODO: Revisit these tests once everything compiles.
-    /*
-    /// This simple smoke test shows that the FixedFrequencyTable
-    /// can have its frequencies set and accessed.
-    #[test]
-    fn enumerable_table() {
-        let mut table = FixedTable::new();
-        let groups = [(true, 30u64), (false, 70u64)];
-        // Put the values into the table.
-        for (group, freq) in groups {
-            table.set_group_count(group, freq);
-        }
-        // Retreive the values from the table.
-        for (group, freq) in groups {
-            let expected = freq;
-            let observed = table.group_count(&group);
-            assert_eq!(expected, observed);
-        }
-        // Demonstrate the number of degrees of freedom matches expectations.
-        assert_eq!(degrees_of_freedom(&table), NonZeroU64::new(1).unwrap());
-    }
+    use super::{p_value, test_statistic};
+    use crate::stats::{contingency::Coin, ContingencyTable};
 
     /// Scenario: You flip a coin 50 times, and get 21 Heads and 29 Tails.
     /// You want to determine if the coin is fair. Output the test statistic.
     /// Let True represent Heads and False represent Tails.
     #[test]
     fn calc_test_statistic() {
-        let mut control_group = FixedTable::new();
-        control_group.set_group_count(true, 25);
-        control_group.set_group_count(false, 25);
-        let mut experimental_group = FixedTable::new();
-        experimental_group.set_group_count(true, 21);
-        experimental_group.set_group_count(false, 29);
-        assert_eq!(
-            degrees_of_freedom(&control_group),
-            NonZeroU64::new(1).unwrap()
-        );
-        assert_eq!(
-            degrees_of_freedom(&experimental_group),
-            NonZeroU64::new(1).unwrap()
-        );
-        let stat = test_statistic(&control_group, &experimental_group);
+        let mut table = ContingencyTable::new();
+        table.set_expected(&Coin::Heads, 25);
+        table.set_expected(&Coin::Tails, 25);
+        table.set_observed(&Coin::Heads, 21);
+        table.set_observed(&Coin::Tails, 29);
+        let degrees = table.degrees_of_freedom();
+        // We expect one degree of freedom since there are only two categories.
+        assert_eq!(degrees, NonZeroUsize::new(1).unwrap());
+        let stat = test_statistic(&table);
         // Round the statistic to two decimal places.
         let observed = (stat * 100.0).round() / 100.0;
         let expected = 1.28;
         assert_eq!(observed, expected);
         // Now, calculate the p-value using the test statistic.
-        let pval = p_value(stat, degrees_of_freedom(&control_group));
+        let pval = p_value(stat, degrees);
         assert!(0.25 < pval && pval < 0.30);
     }
-    */
 }
diff --git a/src/stats/contingency.rs b/src/stats/contingency.rs
@@ -0,0 +1,198 @@
+use std::num::NonZeroUsize;
+
+use crate::stats::{histogram::Histogram, Categorical};
+
+/// A `ContingencyTable` is conceptually a two-dimensional table,
+/// where each column represents a category, each row is a group (expected and observed),
+/// and each cell is the count of observations.
+/// Note, the number of elements in the expected row is used as a ratio to calculate
+/// the actual expectation. For example, if you're flipping a fair coin, your expected
+/// cells should be any matching number: i.e. 50/50, or 100/100 (a ratio equal to 1:1).
+/// When a caller queries for the number of expected elements, the ratio from the expected
+/// row is multiplied against the actual, total number of observed counts to determine the
+/// expected number for the given category. For example, for a fair coin, if you flip it 30 times,
+/// you'd multiply `30*50/(50+50)` to get `15`.
+pub struct ContingencyTable<const N: usize, C: Categorical<N>> {
+    expected: Histogram<N, C>,
+    observed: Histogram<N, C>,
+}
+
+impl<const N: usize, C: Categorical<N>> ContingencyTable<N, C> {
+    /// Create a new table with zeroes in each cell.
+    pub fn new() -> Self {
+        Self {
+            expected: Default::default(),
+            observed: Default::default(),
+        }
+    }
+
+    /// Calculate the expected number of elements. This is a ratio
+    pub fn expected(&self, cat: &C) -> f64 {
+        let index = cat.category();
+        self.expected_by_index(index)
+    }
+
+    /// calculate the expected count for the category with index `i`.
+    pub fn expected_by_index(&self, i: usize) -> f64 {
+        // • Calculate the expected number of elements as a ratio
+        //   of the total number of elements observed.
+        let expected_in_category = self.expected.get_count_by_index(i) as f64;
+        let expected_total = self.expected.total() as f64;
+        // • Grab the total number of elements observed, and calculate
+        //   using the ratio.
+        let total_observed = self.observed.total() as f64;
+        // If nothing has been observed, then we expect zero observations.
+        if total_observed == 0.0 || expected_in_category == 0.0 {
+            return 0.0;
+        }
+        // Cast everything to a float since probabilities aren't always discrete.
+        expected_in_category * total_observed / expected_total
+    }
+
+    /// calculate the expected count for the category with index `i`.
+    pub fn observed_by_index(&self, i: usize) -> u32 {
+        self.observed.get_count_by_index(i)
+    }
+
+    /// returns the number of degrees of freedom for this table.
+    /// This is typically the number of categories minus one.
+    /// # Panics
+    /// This method panics if `N` is less than 2.
+    pub fn degrees_of_freedom(&self) -> NonZeroUsize {
+        if N < 2 {
+            panic!("The experiment must have at least two groups. Only {N} groups provided");
+        }
+        NonZeroUsize::new(N - 1).unwrap()
+    }
+
+    pub fn observed(&self, cat: &C) -> u32 {
+        self.observed.get_count(cat)
+    }
+
+    pub fn set_expected(&mut self, cat: &C, count: u32) {
+        self.expected.set_count(cat, count);
+    }
+
+    pub fn set_observed(&mut self, cat: &C, count: u32) {
+        self.observed.set_count(cat, count);
+    }
+
+    pub fn increment_expected(&mut self, cat: &C, count: u32) {
+        self.expected.increment_by(cat, count);
+    }
+
+    pub fn increment_observed(&mut self, cat: &C, count: u32) {
+        self.observed.increment_by(cat, count);
+    }
+}
+
+impl<const N: usize, C: Categorical<N>> Default for ContingencyTable<N, C> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+pub(crate) use tests::Coin;
+
+#[cfg(test)]
+mod tests {
+    use std::num::NonZeroUsize;
+
+    use pretty_assertions::assert_eq;
+
+    use super::ContingencyTable;
+
+    /// This test exercises the ContingencyTable API when used for empirical
+    /// observations, like those coming from a real-life webserver.
+    /// This API updates values incrementally instead of setting them to a fixed value.
+    #[test]
+    fn empirical_expectations() {
+        // Scenario:
+        // • In the control group, we observe fifty 200 OK status codes and twenty 500 status codes.
+        // • In the canary group, we observe ten 200 OK status codes and thirty 500 status codes.
+        let mut table = ContingencyTable::new();
+        // Done in two batches to exercise bin addition.
+        table.increment_expected(&ResponseStatusCode::_2XX, 25);
+        table.increment_expected(&ResponseStatusCode::_2XX, 25);
+        table.increment_expected(&ResponseStatusCode::_5XX, 15);
+        table.increment_expected(&ResponseStatusCode::_5XX, 5);
+
+        table.increment_observed(&ResponseStatusCode::_2XX, 10);
+        table.increment_observed(&ResponseStatusCode::_5XX, 30);
+        // Assert the observations match.
+        assert_eq!(table.observed(&ResponseStatusCode::_2XX), 10);
+        assert_eq!(table.observed(&ResponseStatusCode::_5XX), 30);
+        // Given that we have 70 expected observations, and 40 canary observations, we expect to
+        // see 40*(50/70) 2XX status codes and 40*(20/70) 5XX status codes.
+        let test_case_expected = 40.0 * 50.0 / 70.0;
+        let test_case_observed = table.expected(&ResponseStatusCode::_2XX);
+        assert_eq!(test_case_expected, test_case_observed);
+        let test_case_expected = 40.0 * 20.0 / 70.0;
+        let test_case_observed = table.expected(&ResponseStatusCode::_5XX);
+        assert_eq!(test_case_expected, test_case_observed);
+    }
+
+    /// Test whether the ContingencyTable is able to correctly
+    /// calculate the expected probabilities in a simple coin flip
+    /// scenario.
+    #[test]
+    fn calculate_expected() {
+        // Scenario: We want to test if a coin is fair.
+        // Expected probability for each category is
+        let mut table = ContingencyTable::new();
+        // We expected an even number of heads and tails.
+        // We don't have to use 50 here, as long as the numbers
+        // are the same.
+        table.set_expected(&Coin::Heads, 50);
+        table.set_expected(&Coin::Tails, 50);
+
+        table.set_observed(&Coin::Heads, 20);
+        table.set_observed(&Coin::Tails, 80);
+        // The coin should have a 50% of being either heads or tails.
+        // Because there were 100 trials
+        // in the observed group, we expect 50 = 100*50% Heads and Tails.
+        assert_eq!(table.expected(&Coin::Heads), 50.0);
+        assert_eq!(table.expected(&Coin::Tails), 50.0);
+        // However, if we increase the number of observations to 1000, then
+        // we'd expected 500 heads and 500 tails.
+        table.set_observed(&Coin::Heads, 750);
+        table.set_observed(&Coin::Tails, 250);
+        assert_eq!(
+            table.expected(&Coin::Heads),
+            500.0,
+            "expected 500 because the total is 1000"
+        );
+        assert_eq!(
+            table.expected(&Coin::Tails),
+            500.0,
+            "expected 500 because the total is 1000"
+        );
+    }
+
+    /// Demonstrate the default implementation to calculate
+    /// degrees of freedom is correct.
+    #[test]
+    fn calc_degrees_of_freedom() {
+        let table: ContingencyTable<2, Coin> = ContingencyTable::new();
+        let expected = NonZeroUsize::new(1).unwrap();
+        let observed = table.degrees_of_freedom();
+        assert_eq!(observed, expected);
+    }
+
+    use crate::{metrics::ResponseStatusCode, stats::Categorical};
+    #[derive(PartialEq, Eq, Debug, Hash)]
+    pub(crate) enum Coin {
+        Heads,
+        Tails,
+    }
+
+    impl Categorical<2> for Coin {
+        fn category(&self) -> usize {
+            match self {
+                Self::Heads => 0,
+                Self::Tails => 1,
+            }
+        }
+    }
+}