Skip to content

Commit

Permalink
Merge pull request #6 from wack/robbie/contingency-table
Browse files Browse the repository at this point in the history
Implement the Chi Square significance test.
  • Loading branch information
RobbieMcKinstry authored Oct 18, 2024
2 parents 4db4038 + 5cbd99b commit 064eb02
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 37 deletions.
41 changes: 4 additions & 37 deletions src/adapter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ use aws_sdk_cloudwatchlogs as cloudwatchlogs;
use futures_core::stream::Stream;
use tokio::sync::mpsc::Sender;

use crate::stats::Observation;

pub struct CloudwatchLogsAdapter {
/// The AWS client for querying Cloudwatch Logs.
client: Box<dyn ObservationEmitter>,
Expand Down Expand Up @@ -41,45 +43,10 @@ impl CloudwatchLogsAdapter {
}
}

/// An [Observation] represents a measured outcome that
/// belongs to either a control group or an experimental
/// group (i.e. canary).
pub struct Observation {
/// The experimental group or the control group.
group: Group,
/// The outcome of the observation, by status code.
outcome: StatusCategory,
}

/// The [Group] indicates from whence a given observation
/// was generated: either by a control group deployment or by
/// a canary deployment.
pub enum Group {
/// The control group is the current running deployment.
Control,
/// The experimental group represents the canary deployment.
Experimental,
}

/// [StatusCategory] groups HTTP response status codes according
/// to five general categories. This type is used as the dependent
/// variable in statical observations.
pub enum StatusCategory {
// Information responses
_1XX,
// Successful responses
_2XX,
// Redirection messages
_3XX,
// Client error responses
_4XX,
// Server error responses
_5XX,
}

#[cfg(test)]
mod tests {
use crate::adapter::{Group, Observation, StatusCategory};
use crate::adapter::Observation;
use crate::stats::{Group, StatusCategory};

use super::{CloudwatchLogsAdapter, ObservationEmitter};

Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ mod config;
/// This is the data pipeline responsible for the control flow
/// of data from observers into number crunchers.
mod pipeline;
/// Our statistics library.
mod stats;
122 changes: 122 additions & 0 deletions src/stats/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
use std::collections::HashMap;

/// The alpha cutoff is the amount of confidence must have in the result
/// to feel comfortable that the result is not due to chance, but instead
/// do to the independent variable. The valu is expressed as a confidence
/// percentage: 0.05 means we are 95% confident that the observed difference
/// is not due to chance, but actually because the experimental group differs
/// from the control group.
const DEFAULT_ALPHA_CUTOFF: f64 = 0.05;

/// The [ChiSquareEngine] calculates the Chi Square test statistic
/// based on the data stored in its contingency tables.
pub struct ChiSquareEngine {
control: ContingencyTable,
experimental: ContingencyTable,
total_control_count: usize,
total_experimental_count: usize,
alpha_cutoff: f64,
}

impl ChiSquareEngine {
pub fn new() -> Self {
Self {
control: HashMap::default(),
experimental: HashMap::default(),
total_control_count: 0,
total_experimental_count: 0,
alpha_cutoff: DEFAULT_ALPHA_CUTOFF,
}
}

pub fn add_observation(&mut self, obs: Observation) {
// Fetch the count of observations for the given group.
let entry = match obs.group {
Group::Control => {
self.total_control_count += 1;
self.control.entry(obs.outcome)
}
Group::Experimental => {
self.total_experimental_count += 1;
self.experimental.entry(obs.outcome)
}
};
// Increment the count.
entry.and_modify(|count| *count += 1).or_insert(1);
}

/// calculate the test statistic from the contingency tables.
pub fn calc_test_statistic(&self) -> f64 {
let mut error = 0.0;
let categories = [
StatusCategory::_1XX,
StatusCategory::_2XX,
StatusCategory::_3XX,
StatusCategory::_4XX,
StatusCategory::_5XX,
];
// For each category, we calculate the squared error between the
// expected and the observed probabilies.
for category in categories {
let expected = self.expected_frequency(category);
let observed = self.observed_frequency(category);
error += (observed - expected).powi(2) / expected;
}
error
}

/// calculate the expected frequency for this category.
fn expected_frequency(&self, category: StatusCategory) -> f64 {
let observation_count = self.control[&category] as f64;
let total_count = self.control[&category] as f64;
observation_count / total_count
}

/// calculate the observed frequency for this category.
fn observed_frequency(&self, category: StatusCategory) -> f64 {
let observation_count = self.experimental[&category] as f64;
let total_count = self.experimental[&category] as f64;
observation_count / total_count
}
}

/// This type maps the dependent variable to its count.
pub type ContingencyTable = HashMap<StatusCategory, usize>;

/// An [Observation] represents a measured outcome that
/// belongs to either a control group or an experimental
/// group (i.e. canary).
pub struct Observation {
/// The experimental group or the control group.
pub group: Group,
/// The outcome of the observation, by status code.
pub outcome: StatusCategory,
}

/// The [Group] indicates from whence a given observation
/// was generated: either by a control group deployment or by
/// a canary deployment.
#[derive(Hash, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub enum Group {
/// The control group is the current running deployment.
Control,
/// The experimental group represents the canary deployment.
Experimental,
}

/// [StatusCategory] groups HTTP response status codes according
/// to five general categories. This type is used as the dependent
/// variable in statical observations.
#[derive(Hash, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub enum StatusCategory {
// Information responses
_1XX,
// Successful responses
_2XX,
// Redirection messages
_3XX,
// Client error responses
_4XX,
// Server error responses
_5XX,
}

0 comments on commit 064eb02

Please sign in to comment.