diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 74f85c6..ea97102 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -1,4 +1,4 @@ -use crate::stats::EnumerableCategory; +use crate::stats::{Categorical, EnumerableCategory}; use std::fmt; /// [ResponseStatusCode] groups HTTP response status codes according @@ -18,6 +18,18 @@ pub enum ResponseStatusCode { _5XX, } +impl Categorical<5> for ResponseStatusCode { + fn category(&self) -> usize { + match self { + Self::_1XX => 0, + Self::_2XX => 1, + Self::_3XX => 2, + Self::_4XX => 3, + Self::_5XX => 4, + } + } +} + impl fmt::Display for ResponseStatusCode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { diff --git a/src/stats/categorical.rs b/src/stats/categorical.rs new file mode 100644 index 0000000..f0f2223 --- /dev/null +++ b/src/stats/categorical.rs @@ -0,0 +1,43 @@ +/// Data is [Categorical] if each element has a surjective mapping to a number +/// from `[0, N)`. An `[Categorical]` trait expresses data that fits into exactly one +/// of `N` categories (or bins). The value of `N` represents the total (i.e. the max) +/// number of categories. +/// For example, if modeling bools, the groups are `True` and `False`, so N=2. +/// If modeling a six sided die, the groups would be 0 through 5, so N=6. +/// Each instance must be able to report which category it belongs to (using Self::category method). +/// Categories are zero-indexed (the first category is represented by `0usize`). +/// You can think of an [EnumerableCategory] as a hashmap with fixed integer keys. When the map is +/// created, its keys must already be known and completely cover the range `[0, N)`. +/// +/// ```rust +/// use std::collections::HashSet; +/// use canary::stats::Categorical; +/// +/// #[derive(PartialEq, Eq, Debug, Hash)] +/// enum Coin { +/// Heads, +/// Tails, +/// } +/// +/// impl Categorical<2> for Coin { +/// fn category(&self) -> usize { +/// match self { +/// Self::Heads => 0, +/// Self::Tails => 1, +/// } +/// } +/// } +/// ``` +pub trait Categorical { + fn category(&self) -> usize; +} + +#[cfg(test)] +mod tests { + use static_assertions::assert_obj_safe; + + use super::Categorical; + + // The categorical trait must be object-save. + assert_obj_safe!(Categorical<5>); +} diff --git a/src/stats/chi.rs b/src/stats/chi.rs index 9bfa540..dee5e7a 100644 --- a/src/stats/chi.rs +++ b/src/stats/chi.rs @@ -25,13 +25,15 @@ fn degrees_of_freedom(table: &impl ContingencyTable Box> { /// Box::new([Coin::Heads, Coin::Tails].into_iter()) diff --git a/src/stats/mod.rs b/src/stats/mod.rs index 8e7fdde..bd2a6b6 100644 --- a/src/stats/mod.rs +++ b/src/stats/mod.rs @@ -1,5 +1,6 @@ use std::collections::HashMap; +pub use categorical::Categorical; pub use chi::EnumerableCategory; pub use group::Group; pub use observation::{CategoricalObservation, Observation}; @@ -99,6 +100,8 @@ impl ChiSquareEngine { /// This type maps the dependent variable to its count. type Table = HashMap; +/// For modeling categorical data. +mod categorical; /// contains the engine to calculate the chi square test statistic. mod chi; /// `group` defines the two groups.