diff --git a/src/stats/histogram.rs b/src/stats/histogram.rs new file mode 100644 index 0000000..91162e9 --- /dev/null +++ b/src/stats/histogram.rs @@ -0,0 +1,81 @@ +use std::marker::PhantomData; + +use super::Categorical; + +/// [Histogram] is a data structure for tracking categorical observations. +/// In essence, its a map from Category to count. +/// A [Histogram] has a fixed number of bins, each representing on of `N` +/// categories. Each time an member of the category is observed, it is +/// "added to the bin", incrementing the counter. +pub struct Histogram +where + C: Categorical, +{ + bins: Box<[usize; N]>, + phantom: PhantomData, +} + +impl Histogram +where + C: Categorical, +{ + /// Create a new histogram with zeroes across the board. + pub fn new() -> Self { + let bins = Box::new([0; N]); + Self { + bins, + phantom: PhantomData, + } + } + + /// Increment the observed count for the given category by 1. + pub fn increment(&mut self, categorical: &C) { + self.increment_by(categorical, 1); + } + + /// Increment the observed count for the given category by `count`. + pub fn increment_by(&mut self, categorical: &C, count: usize) { + let index = categorical.category(); + self.bins[index] += count; + } + + /// Return the count for the given category. + pub fn get_count(&self, categorical: &C) -> usize { + let index = categorical.category(); + self.bins[index] + } + + /// Reset all bins to zero. + pub fn clear(&mut self) { + self.bins = Box::new([0; N]); + } +} + +#[cfg(test)] +mod tests { + use crate::stats::Categorical; + + use super::Histogram; + + impl Categorical<2> for bool { + fn category(&self) -> usize { + match self { + true => 1, + false => 0, + } + } + } + + #[test] + fn test_increment() { + let mut hist = Histogram::new(); + // start at 0. + assert_eq!(hist.get_count(&true), 0); + assert_eq!(hist.get_count(&true), 0); + hist.increment(&true); + hist.increment(&true); + hist.increment(&false); + assert_eq!(hist.get_count(&true), 2); + assert_eq!(hist.get_count(&false), 1); + } +} diff --git a/src/stats/mod.rs b/src/stats/mod.rs index bd2a6b6..182b598 100644 --- a/src/stats/mod.rs +++ b/src/stats/mod.rs @@ -106,6 +106,8 @@ mod categorical; mod chi; /// `group` defines the two groups. mod group; +/// A data structure for tracking categorical data. +mod histogram; /// An observation represents a group and the observed category. mod observation; /// Different kinds of contingency tables.