Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

histogram: fix percentiles for empty histograms #116

Merged
merged 1 commit into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion histogram/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "histogram"
version = "0.10.2"
version = "0.11.0"
edition = "2021"
authors = ["Brian Martin <[email protected]>"]
license = "MIT OR Apache-2.0"
Expand Down
52 changes: 36 additions & 16 deletions histogram/src/atomic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,50 +97,69 @@ mod test {
let percentiles = histogram.drain();
assert_eq!(
percentiles.percentile(50.0),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: 50..=50,
})
}))
);
histogram.increment(1000).unwrap();
// after another load the map is empty
let percentiles = histogram.drain();
assert_eq!(
percentiles.percentile(50.0),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: 1000..=1003,
})
}))
);
}

#[test]
// Tests percentiles
fn percentiles() {
let histogram = AtomicHistogram::new(7, 64).unwrap();
let percentiles = [25.0, 50.0, 75.0, 90.0, 99.0];

// check empty
assert_eq!(histogram.load().percentiles(&percentiles), Ok(None));

for percentile in percentiles {
assert_eq!(histogram.load().percentile(percentile), Ok(None));
}

// populate and check percentiles
for i in 0..=100 {
let _ = histogram.increment(i);
assert_eq!(
histogram.load().percentile(0.0),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: 0..=0,
})
}))
);
assert_eq!(
histogram.load().percentile(100.0),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: i..=i,
})
}))
);
}

for percentile in percentiles {
assert_eq!(
histogram
.load()
.percentile(percentile)
.map(|b| b.unwrap().end()),
Ok(percentile as u64)
);
}
assert_eq!(histogram.load().percentile(25.0).map(|b| b.end()), Ok(25));
assert_eq!(histogram.load().percentile(50.0).map(|b| b.end()), Ok(50));
assert_eq!(histogram.load().percentile(75.0).map(|b| b.end()), Ok(75));
assert_eq!(histogram.load().percentile(90.0).map(|b| b.end()), Ok(90));
assert_eq!(histogram.load().percentile(99.0).map(|b| b.end()), Ok(99));
assert_eq!(histogram.load().percentile(99.9).map(|b| b.end()), Ok(100));

assert_eq!(
histogram.load().percentile(99.9).map(|b| b.unwrap().end()),
Ok(100)
);

assert_eq!(
histogram.load().percentile(-1.0),
Expand All @@ -155,6 +174,7 @@ mod test {
.load()
.percentiles(&[50.0, 90.0, 99.0, 99.9])
.unwrap()
.unwrap()
.iter()
.map(|(p, b)| (*p, b.end()))
.collect();
Expand All @@ -167,10 +187,10 @@ mod test {
let _ = histogram.increment(1024);
assert_eq!(
histogram.load().percentile(99.9),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: 1024..=1031,
})
}))
);
}
}
82 changes: 62 additions & 20 deletions histogram/src/sparse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,35 +142,67 @@ impl SparseHistogram {
Ok(histogram)
}

/// Return a single percentile from this histogram.
/// Return a collection of percentiles from this histogram.
///
/// The percentile should be in the inclusive range `0.0..=100.0`. For
/// Each percentile should be in the inclusive range `0.0..=100.0`. For
/// example, the 50th percentile (median) can be found using `50.0`.
pub fn percentile(&self, percentile: f64) -> Result<Bucket, Error> {
///
/// The results will be sorted by the percentile.
pub fn percentiles(&self, percentiles: &[f64]) -> Result<Option<Vec<(f64, Bucket)>>, Error> {
// validate all the percentiles
if percentiles.is_empty() {
return Err(Error::InvalidPercentile);
}

for percentile in percentiles {
if !(0.0..=100.0).contains(percentile) {
return Err(Error::InvalidPercentile);
}
}

let total: u128 = self.count.iter().map(|v| *v as u128).sum();

if !(0.0..=100.0).contains(&percentile) {
return Err(Error::InvalidPercentile);
// empty histogram, no percentiles available
if total == 0 {
return Ok(None);
}

let search = ((total as f64) * percentile / 100.0).ceil() as usize;
// sort the requested percentiles so we can find them in a single pass
let mut percentiles = percentiles.to_vec();
percentiles.sort_by(|a, b| a.partial_cmp(b).unwrap());

let searches: Vec<usize> = percentiles
.iter()
.map(|p| ((total as f64) * *p / 100.0).ceil() as usize)
.collect();
let mut search_idx = 0;
let mut result: Vec<(f64, Bucket)> = Vec::with_capacity(percentiles.len());

let mut seen: usize = 0;
for (idx, count) in self.index.iter().zip(self.count.iter()) {
seen += *count as usize;
if seen >= search {
return Ok(Bucket {
count: *count,
range: self.config.index_to_range(*idx),
});
while search_idx < searches.len() && seen >= searches[search_idx] {
result.push((
percentiles[search_idx],
Bucket {
count: *count,
range: self.config.index_to_range(*idx),
},
));
search_idx += 1;
}
}

// should never be reached; return highest bucket if not found
let last_idx = self.index.len() - 1;
Ok(Bucket {
count: self.count[last_idx],
range: self.config.index_to_range(self.index[last_idx]),
})
Ok(Some(result))
}

/// Return a single percentile from this histogram.
///
/// The percentile should be in the inclusive range `0.0..=100.0`. For
/// example, the 50th percentile (median) can be found using `50.0`.
pub fn percentile(&self, percentile: f64) -> Result<Option<Bucket>, Error> {
self.percentiles(&[percentile])
.map(|v| v.map(|x| x.first().unwrap().1.clone()))
}

/// Returns a new histogram with a reduced grouping power. The reduced
Expand Down Expand Up @@ -384,20 +416,30 @@ mod tests {
}

#[test]
fn percentile() {
fn percentiles() {
let mut hstandard = Histogram::new(4, 10).unwrap();
let hempty = SparseHistogram::from(&hstandard);

for v in 1..1024 {
let _ = hstandard.increment(v);
}

let hsparse = SparseHistogram::from(&hstandard);

for percentile in [1.0, 10.0, 25.0, 50.0, 75.0, 90.0, 99.0, 99.9] {
let percentiles = [1.0, 10.0, 25.0, 50.0, 75.0, 90.0, 99.0, 99.9];
for percentile in percentiles {
let bempty = hempty.percentile(percentile).unwrap();
let bstandard = hstandard.percentile(percentile).unwrap();
let bsparse = hsparse.percentile(percentile).unwrap();

assert_eq!(bempty, None);
assert_eq!(bsparse, bstandard);
}

assert_eq!(hempty.percentiles(&percentiles), Ok(None));
assert_eq!(
hstandard.percentiles(&percentiles).unwrap(),
hsparse.percentiles(&percentiles).unwrap()
);
}

fn compare_histograms(hstandard: &Histogram, hsparse: &SparseHistogram) {
Expand Down
50 changes: 33 additions & 17 deletions histogram/src/standard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ impl Histogram {
/// example, the 50th percentile (median) can be found using `50.0`.
///
/// The results will be sorted by the percentile.
pub fn percentiles(&self, percentiles: &[f64]) -> Result<Vec<(f64, Bucket)>, Error> {
pub fn percentiles(&self, percentiles: &[f64]) -> Result<Option<Vec<(f64, Bucket)>>, Error> {
// get the total count
let total_count: u128 = self.buckets.iter().map(|v| *v as u128).sum();

Expand All @@ -91,6 +91,11 @@ impl Histogram {
}
}

// empty histogram, no percentiles available
if total_count == 0 {
return Ok(None);
}

let mut bucket_idx = 0;
let mut partial_sum = self.buckets[bucket_idx] as u128;

Expand Down Expand Up @@ -125,16 +130,16 @@ impl Histogram {
})
.collect();

Ok(result)
Ok(Some(result))
}

/// Return a single percentile from this histogram.
///
/// The percentile should be in the inclusive range `0.0..=100.0`. For
/// example, the 50th percentile (median) can be found using `50.0`.
pub fn percentile(&self, percentile: f64) -> Result<Bucket, Error> {
pub fn percentile(&self, percentile: f64) -> Result<Option<Bucket>, Error> {
self.percentiles(&[percentile])
.map(|v| v.first().unwrap().1.clone())
.map(|v| v.map(|x| x.first().unwrap().1.clone()))
}

/// Returns a new histogram with a reduced grouping power. The reduced
Expand Down Expand Up @@ -310,36 +315,47 @@ mod tests {
// Tests percentiles
fn percentiles() {
let mut histogram = Histogram::new(7, 64).unwrap();

assert_eq!(histogram.percentile(50.0).unwrap(), None);
assert_eq!(
histogram.percentiles(&[50.0, 90.0, 99.0, 99.9]).unwrap(),
None
);

for i in 0..=100 {
let _ = histogram.increment(i);
assert_eq!(
histogram.percentile(0.0),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: 0..=0,
})
}))
);
assert_eq!(
histogram.percentile(100.0),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: i..=i,
})
}))
);
}
assert_eq!(histogram.percentile(25.0).map(|b| b.end()), Ok(25));
assert_eq!(histogram.percentile(50.0).map(|b| b.end()), Ok(50));
assert_eq!(histogram.percentile(75.0).map(|b| b.end()), Ok(75));
assert_eq!(histogram.percentile(90.0).map(|b| b.end()), Ok(90));
assert_eq!(histogram.percentile(99.0).map(|b| b.end()), Ok(99));
assert_eq!(histogram.percentile(99.9).map(|b| b.end()), Ok(100));
assert_eq!(histogram.percentile(25.0).map(|b| b.unwrap().end()), Ok(25));
assert_eq!(histogram.percentile(50.0).map(|b| b.unwrap().end()), Ok(50));
assert_eq!(histogram.percentile(75.0).map(|b| b.unwrap().end()), Ok(75));
assert_eq!(histogram.percentile(90.0).map(|b| b.unwrap().end()), Ok(90));
assert_eq!(histogram.percentile(99.0).map(|b| b.unwrap().end()), Ok(99));
assert_eq!(
histogram.percentile(99.9).map(|b| b.unwrap().end()),
Ok(100)
);

assert_eq!(histogram.percentile(-1.0), Err(Error::InvalidPercentile));
assert_eq!(histogram.percentile(101.0), Err(Error::InvalidPercentile));

let percentiles: Vec<(f64, u64)> = histogram
.percentiles(&[50.0, 90.0, 99.0, 99.9])
.unwrap()
.unwrap()
.iter()
.map(|(p, b)| (*p, b.end()))
.collect();
Expand All @@ -352,10 +368,10 @@ mod tests {
let _ = histogram.increment(1024);
assert_eq!(
histogram.percentile(99.9),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: 1024..=1031,
})
}))
);
}

Expand Down Expand Up @@ -395,7 +411,7 @@ mod tests {
let v = vals[((*p / 100.0 * (vals.len() as f64)) as usize) - 1];

// Value and relative error from full histogram
let vhist = histogram.percentile(*p).unwrap().end();
let vhist = histogram.percentile(*p).unwrap().unwrap().end();
let e = (v.abs_diff(vhist) as f64) * 100.0 / (v as f64);
assert!(e < error);
}
Expand Down
Loading