Skip to content

Commit

Permalink
Merge pull request #19 from wack/robbie/re-enable-tests
Browse files Browse the repository at this point in the history
Test new ContingencyTable
  • Loading branch information
RobbieMcKinstry authored Nov 8, 2024
2 parents 9b99446 + a0abc1a commit 1787b98
Show file tree
Hide file tree
Showing 10 changed files with 256 additions and 245 deletions.
9 changes: 4 additions & 5 deletions src/adapters/engines/chi.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
metrics::ResponseStatusCode,
stats::{CategoricalObservation, EmpiricalTable, ExpectationTable, Group},
stats::{CategoricalObservation, ContingencyTable, Group},
};

use super::DecisionEngine;
Expand All @@ -9,8 +9,7 @@ use super::DecisionEngine;
/// significance test to determine whether the canary should be promoted or not.
#[derive(Default)]
pub struct ChiSquareEngine {
control_data: ExpectationTable<5, ResponseStatusCode>,
experimental_data: EmpiricalTable<5, ResponseStatusCode>,
table: ContingencyTable<5, ResponseStatusCode>,
}

impl ChiSquareEngine {
Expand All @@ -27,11 +26,11 @@ impl DecisionEngine<CategoricalObservation<5, ResponseStatusCode>> for ChiSquare
match observation.group {
Group::Control => {
// • Increment the number of observations for this category.
self.control_data.increment(&observation.outcome);
self.table.increment_expected(&observation.outcome, 1);
}
Group::Experimental => {
// • Increment the number of observations in the canary contingency table.
self.experimental_data.increment(observation.outcome);
self.table.increment_observed(&observation.outcome, 1);
}
}
}
Expand Down
91 changes: 24 additions & 67 deletions src/stats/chi.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,13 @@
use std::num::NonZeroU64;
use std::num::NonZeroUsize;

use statrs::distribution::{ChiSquared, ContinuousCDF};

use super::Categorical;

trait ContingencyTable<const N: usize, C: Categorical<N>> {
fn expected(&self, index: usize) -> u32;
fn observed(&self, index: usize) -> u32;

/// returns the number of degrees of freedom for this table.
/// This is typically the number of categories minus one.
/// # Panics
/// This method panics if `N` is less than 2.
fn degrees_of_freedom(&self) -> NonZeroU64 {
if N < 2 {
panic!("The experiment must have at least two groups. Only {N} groups provided");
}
NonZeroU64::new(N as u64 - 1).unwrap()
}
}
use super::{Categorical, ContingencyTable};

/// Alpha represents the alpha cutoff, expressed as a floating point from [0, 1] inclusive.
/// For example, 0.95 is the standard 5% confidency interval.
fn chi_square_test<const N: usize, C: Categorical<N>>(
table: &impl ContingencyTable<N, C>,
table: &ContingencyTable<N, C>,
alpha: f64,
) -> bool {
assert!(alpha < 1.0);
Expand All @@ -33,12 +17,12 @@ fn chi_square_test<const N: usize, C: Categorical<N>>(
}

// calculate the chi square test statistic using the provided contingency tables.
fn test_statistic<const N: usize, C: Categorical<N>>(table: &impl ContingencyTable<N, C>) -> f64 {
fn test_statistic<const N: usize, C: Categorical<N>>(table: &ContingencyTable<N, C>) -> f64 {
let mut sum = 0.0;
// For each category, we calculate the square error between the expected and observed groups.
for i in 0..N {
let expected_count = table.expected(i) as i64;
let observed_count = table.observed(i) as i64;
let expected_count = table.expected_by_index(i) as i64;
let observed_count = table.observed_by_index(i) as i64;
let diff = observed_count - expected_count;
let error = diff.pow(2) as f64;
let incremental_error = error / (expected_count as f64);
Expand All @@ -47,70 +31,43 @@ fn test_statistic<const N: usize, C: Categorical<N>>(table: &impl ContingencyTab
sum
}

fn p_value(test_statistic: f64, degrees_of_freedom: NonZeroU64) -> f64 {
let freedom = u64::from(degrees_of_freedom) as f64;
/// calculates the p-value given the test statistic and the degrees of freedom.
/// This is determined by the area of the Chi Square distribution (which is a special
/// case of the gamma distribution).
fn p_value(test_statistic: f64, degrees_of_freedom: NonZeroUsize) -> f64 {
let freedom = usize::from(degrees_of_freedom) as f64;
let distribution = ChiSquared::new(freedom).expect("Degrees of freedom must be >= 0");
1.0 - distribution.cdf(test_statistic)
}

#[cfg(test)]
mod tests {

use super::ContingencyTable;
use static_assertions::assert_obj_safe;
use std::num::NonZeroUsize;

// Require the contingency table is object-safe for certain commonly used categories.
assert_obj_safe!(ContingencyTable<5, String>);

// TODO: Revisit these tests once everything compiles.
/*
/// This simple smoke test shows that the FixedFrequencyTable
/// can have its frequencies set and accessed.
#[test]
fn enumerable_table() {
let mut table = FixedTable::new();
let groups = [(true, 30u64), (false, 70u64)];
// Put the values into the table.
for (group, freq) in groups {
table.set_group_count(group, freq);
}
// Retreive the values from the table.
for (group, freq) in groups {
let expected = freq;
let observed = table.group_count(&group);
assert_eq!(expected, observed);
}
// Demonstrate the number of degrees of freedom matches expectations.
assert_eq!(degrees_of_freedom(&table), NonZeroU64::new(1).unwrap());
}
use super::{p_value, test_statistic};
use crate::stats::{contingency::Coin, ContingencyTable};

/// Scenario: You flip a coin 50 times, and get 21 Heads and 29 Tails.
/// You want to determine if the coin is fair. Output the test statistic.
/// Let True represent Heads and False represent Tails.
#[test]
fn calc_test_statistic() {
let mut control_group = FixedTable::new();
control_group.set_group_count(true, 25);
control_group.set_group_count(false, 25);
let mut experimental_group = FixedTable::new();
experimental_group.set_group_count(true, 21);
experimental_group.set_group_count(false, 29);
assert_eq!(
degrees_of_freedom(&control_group),
NonZeroU64::new(1).unwrap()
);
assert_eq!(
degrees_of_freedom(&experimental_group),
NonZeroU64::new(1).unwrap()
);
let stat = test_statistic(&control_group, &experimental_group);
let mut table = ContingencyTable::new();
table.set_expected(&Coin::Heads, 25);
table.set_expected(&Coin::Tails, 25);
table.set_observed(&Coin::Heads, 21);
table.set_observed(&Coin::Tails, 29);
let degrees = table.degrees_of_freedom();
// We expect one degree of freedom since there are only two categories.
assert_eq!(degrees, NonZeroUsize::new(1).unwrap());
let stat = test_statistic(&table);
// Round the statistic to two decimal places.
let observed = (stat * 100.0).round() / 100.0;
let expected = 1.28;
assert_eq!(observed, expected);
// Now, calculate the p-value using the test statistic.
let pval = p_value(stat, degrees_of_freedom(&control_group));
let pval = p_value(stat, degrees);
assert!(0.25 < pval && pval < 0.30);
}
*/
}
198 changes: 198 additions & 0 deletions src/stats/contingency.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
use std::num::NonZeroUsize;

use crate::stats::{histogram::Histogram, Categorical};

/// A `ContingencyTable` is conceptually a two-dimensional table,
/// where each column represents a category, each row is a group (expected and observed),
/// and each cell is the count of observations.
/// Note, the number of elements in the expected row is used as a ratio to calculate
/// the actual expectation. For example, if you're flipping a fair coin, your expected
/// cells should be any matching number: i.e. 50/50, or 100/100 (a ratio equal to 1:1).
/// When a caller queries for the number of expected elements, the ratio from the expected
/// row is multiplied against the actual, total number of observed counts to determine the
/// expected number for the given category. For example, for a fair coin, if you flip it 30 times,
/// you'd multiply `30*50/(50+50)` to get `15`.
pub struct ContingencyTable<const N: usize, C: Categorical<N>> {
expected: Histogram<N, C>,
observed: Histogram<N, C>,
}

impl<const N: usize, C: Categorical<N>> ContingencyTable<N, C> {
/// Create a new table with zeroes in each cell.
pub fn new() -> Self {
Self {
expected: Default::default(),
observed: Default::default(),
}
}

/// Calculate the expected number of elements. This is a ratio
pub fn expected(&self, cat: &C) -> f64 {
let index = cat.category();
self.expected_by_index(index)
}

/// calculate the expected count for the category with index `i`.
pub fn expected_by_index(&self, i: usize) -> f64 {
// • Calculate the expected number of elements as a ratio
// of the total number of elements observed.
let expected_in_category = self.expected.get_count_by_index(i) as f64;
let expected_total = self.expected.total() as f64;
// • Grab the total number of elements observed, and calculate
// using the ratio.
let total_observed = self.observed.total() as f64;
// If nothing has been observed, then we expect zero observations.
if total_observed == 0.0 || expected_in_category == 0.0 {
return 0.0;
}
// Cast everything to a float since probabilities aren't always discrete.
expected_in_category * total_observed / expected_total
}

/// calculate the expected count for the category with index `i`.
pub fn observed_by_index(&self, i: usize) -> u32 {
self.observed.get_count_by_index(i)
}

/// returns the number of degrees of freedom for this table.
/// This is typically the number of categories minus one.
/// # Panics
/// This method panics if `N` is less than 2.
pub fn degrees_of_freedom(&self) -> NonZeroUsize {
if N < 2 {
panic!("The experiment must have at least two groups. Only {N} groups provided");
}
NonZeroUsize::new(N - 1).unwrap()
}

pub fn observed(&self, cat: &C) -> u32 {
self.observed.get_count(cat)
}

pub fn set_expected(&mut self, cat: &C, count: u32) {
self.expected.set_count(cat, count);
}

pub fn set_observed(&mut self, cat: &C, count: u32) {
self.observed.set_count(cat, count);
}

pub fn increment_expected(&mut self, cat: &C, count: u32) {
self.expected.increment_by(cat, count);
}

pub fn increment_observed(&mut self, cat: &C, count: u32) {
self.observed.increment_by(cat, count);
}
}

impl<const N: usize, C: Categorical<N>> Default for ContingencyTable<N, C> {
fn default() -> Self {
Self::new()
}
}

#[cfg(test)]
pub(crate) use tests::Coin;

#[cfg(test)]
mod tests {
use std::num::NonZeroUsize;

use pretty_assertions::assert_eq;

use super::ContingencyTable;

/// This test exercises the ContingencyTable API when used for empirical
/// observations, like those coming from a real-life webserver.
/// This API updates values incrementally instead of setting them to a fixed value.
#[test]
fn empirical_expectations() {
// Scenario:
// • In the control group, we observe fifty 200 OK status codes and twenty 500 status codes.
// • In the canary group, we observe ten 200 OK status codes and thirty 500 status codes.
let mut table = ContingencyTable::new();
// Done in two batches to exercise bin addition.
table.increment_expected(&ResponseStatusCode::_2XX, 25);
table.increment_expected(&ResponseStatusCode::_2XX, 25);
table.increment_expected(&ResponseStatusCode::_5XX, 15);
table.increment_expected(&ResponseStatusCode::_5XX, 5);

table.increment_observed(&ResponseStatusCode::_2XX, 10);
table.increment_observed(&ResponseStatusCode::_5XX, 30);
// Assert the observations match.
assert_eq!(table.observed(&ResponseStatusCode::_2XX), 10);
assert_eq!(table.observed(&ResponseStatusCode::_5XX), 30);
// Given that we have 70 expected observations, and 40 canary observations, we expect to
// see 40*(50/70) 2XX status codes and 40*(20/70) 5XX status codes.
let test_case_expected = 40.0 * 50.0 / 70.0;
let test_case_observed = table.expected(&ResponseStatusCode::_2XX);
assert_eq!(test_case_expected, test_case_observed);
let test_case_expected = 40.0 * 20.0 / 70.0;
let test_case_observed = table.expected(&ResponseStatusCode::_5XX);
assert_eq!(test_case_expected, test_case_observed);
}

/// Test whether the ContingencyTable is able to correctly
/// calculate the expected probabilities in a simple coin flip
/// scenario.
#[test]
fn calculate_expected() {
// Scenario: We want to test if a coin is fair.
// Expected probability for each category is
let mut table = ContingencyTable::new();
// We expected an even number of heads and tails.
// We don't have to use 50 here, as long as the numbers
// are the same.
table.set_expected(&Coin::Heads, 50);
table.set_expected(&Coin::Tails, 50);

table.set_observed(&Coin::Heads, 20);
table.set_observed(&Coin::Tails, 80);
// The coin should have a 50% of being either heads or tails.
// Because there were 100 trials
// in the observed group, we expect 50 = 100*50% Heads and Tails.
assert_eq!(table.expected(&Coin::Heads), 50.0);
assert_eq!(table.expected(&Coin::Tails), 50.0);
// However, if we increase the number of observations to 1000, then
// we'd expected 500 heads and 500 tails.
table.set_observed(&Coin::Heads, 750);
table.set_observed(&Coin::Tails, 250);
assert_eq!(
table.expected(&Coin::Heads),
500.0,
"expected 500 because the total is 1000"
);
assert_eq!(
table.expected(&Coin::Tails),
500.0,
"expected 500 because the total is 1000"
);
}

/// Demonstrate the default implementation to calculate
/// degrees of freedom is correct.
#[test]
fn calc_degrees_of_freedom() {
let table: ContingencyTable<2, Coin> = ContingencyTable::new();
let expected = NonZeroUsize::new(1).unwrap();
let observed = table.degrees_of_freedom();
assert_eq!(observed, expected);
}

use crate::{metrics::ResponseStatusCode, stats::Categorical};
#[derive(PartialEq, Eq, Debug, Hash)]
pub(crate) enum Coin {
Heads,
Tails,
}

impl Categorical<2> for Coin {
fn category(&self) -> usize {
match self {
Self::Heads => 0,
Self::Tails => 1,
}
}
}
}
Loading

0 comments on commit 1787b98

Please sign in to comment.