pelikan-io · mihirn · Jun 12, 2024 · Jun 8, 2024
diff --git a/histogram/Cargo.toml b/histogram/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "histogram"
-version = "0.10.2"
+version = "0.11.0"
 edition = "2021"
 authors = ["Brian Martin <[email protected]>"]
 license = "MIT OR Apache-2.0"

diff --git a/histogram/src/atomic.rs b/histogram/src/atomic.rs
@@ -97,50 +97,69 @@ mod test {
         let percentiles = histogram.drain();
         assert_eq!(
             percentiles.percentile(50.0),
-            Ok(Bucket {
+            Ok(Some(Bucket {
                 count: 1,
                 range: 50..=50,
-            })
+            }))
         );
         histogram.increment(1000).unwrap();
         // after another load the map is empty
         let percentiles = histogram.drain();
         assert_eq!(
             percentiles.percentile(50.0),
-            Ok(Bucket {
+            Ok(Some(Bucket {
                 count: 1,
                 range: 1000..=1003,
-            })
+            }))
         );
     }
 
     #[test]
     // Tests percentiles
     fn percentiles() {
         let histogram = AtomicHistogram::new(7, 64).unwrap();
+        let percentiles = [25.0, 50.0, 75.0, 90.0, 99.0];
+
+        // check empty
+        assert_eq!(histogram.load().percentiles(&percentiles), Ok(None));
+
+        for percentile in percentiles {
+            assert_eq!(histogram.load().percentile(percentile), Ok(None));
+        }
+
+        // populate and check percentiles
         for i in 0..=100 {
             let _ = histogram.increment(i);
             assert_eq!(
                 histogram.load().percentile(0.0),
-                Ok(Bucket {
+                Ok(Some(Bucket {
                     count: 1,
                     range: 0..=0,
-                })
+                }))
             );
             assert_eq!(
                 histogram.load().percentile(100.0),
-                Ok(Bucket {
+                Ok(Some(Bucket {
                     count: 1,
                     range: i..=i,
-                })
+                }))
+            );
+        }
+
+        for percentile in percentiles {
+            assert_eq!(
+                histogram
+                    .load()
+                    .percentile(percentile)
+                    .map(|b| b.unwrap().end()),
+                Ok(percentile as u64)
             );
         }
-        assert_eq!(histogram.load().percentile(25.0).map(|b| b.end()), Ok(25));
-        assert_eq!(histogram.load().percentile(50.0).map(|b| b.end()), Ok(50));
-        assert_eq!(histogram.load().percentile(75.0).map(|b| b.end()), Ok(75));
-        assert_eq!(histogram.load().percentile(90.0).map(|b| b.end()), Ok(90));
-        assert_eq!(histogram.load().percentile(99.0).map(|b| b.end()), Ok(99));
-        assert_eq!(histogram.load().percentile(99.9).map(|b| b.end()), Ok(100));
+
+        assert_eq!(
+            histogram.load().percentile(99.9).map(|b| b.unwrap().end()),
+            Ok(100)
+        );
 
         assert_eq!(
             histogram.load().percentile(-1.0),
@@ -155,6 +174,7 @@ mod test {
             .load()
             .percentiles(&[50.0, 90.0, 99.0, 99.9])
             .unwrap()
+            .unwrap()
             .iter()
             .map(|(p, b)| (*p, b.end()))
             .collect();
@@ -167,10 +187,10 @@ mod test {
         let _ = histogram.increment(1024);
         assert_eq!(
             histogram.load().percentile(99.9),
-            Ok(Bucket {
+            Ok(Some(Bucket {
                 count: 1,
                 range: 1024..=1031,
-            })
+            }))
         );
     }
 }
diff --git a/histogram/src/sparse.rs b/histogram/src/sparse.rs
@@ -142,35 +142,67 @@ impl SparseHistogram {
         Ok(histogram)
     }
 
-    /// Return a single percentile from this histogram.
+    /// Return a collection of percentiles from this histogram.
     ///
-    /// The percentile should be in the inclusive range `0.0..=100.0`. For
+    /// Each percentile should be in the inclusive range `0.0..=100.0`. For
     /// example, the 50th percentile (median) can be found using `50.0`.
-    pub fn percentile(&self, percentile: f64) -> Result<Bucket, Error> {
+    ///
+    /// The results will be sorted by the percentile.
+    pub fn percentiles(&self, percentiles: &[f64]) -> Result<Option<Vec<(f64, Bucket)>>, Error> {
+        // validate all the percentiles
+        if percentiles.is_empty() {
+            return Err(Error::InvalidPercentile);
+        }
+
+        for percentile in percentiles {
+            if !(0.0..=100.0).contains(percentile) {
+                return Err(Error::InvalidPercentile);
+            }
+        }
+
         let total: u128 = self.count.iter().map(|v| *v as u128).sum();
 
-        if !(0.0..=100.0).contains(&percentile) {
-            return Err(Error::InvalidPercentile);
+        // empty histogram, no percentiles available
+        if total == 0 {
+            return Ok(None);
         }
 
-        let search = ((total as f64) * percentile / 100.0).ceil() as usize;
+        // sort the requested percentiles so we can find them in a single pass
+        let mut percentiles = percentiles.to_vec();
+        percentiles.sort_by(|a, b| a.partial_cmp(b).unwrap());
+
+        let searches: Vec<usize> = percentiles
+            .iter()
+            .map(|p| ((total as f64) * *p / 100.0).ceil() as usize)
+            .collect();
+        let mut search_idx = 0;
+        let mut result: Vec<(f64, Bucket)> = Vec::with_capacity(percentiles.len());
+
         let mut seen: usize = 0;
         for (idx, count) in self.index.iter().zip(self.count.iter()) {
             seen += *count as usize;
-            if seen >= search {
-                return Ok(Bucket {
-                    count: *count,
-                    range: self.config.index_to_range(*idx),
-                });
+            while search_idx < searches.len() && seen >= searches[search_idx] {
+                result.push((
+                    percentiles[search_idx],
+                    Bucket {
+                        count: *count,
+                        range: self.config.index_to_range(*idx),
+                    },
+                ));
+                search_idx += 1;
             }
         }
 
-        // should never be reached; return highest bucket if not found
-        let last_idx = self.index.len() - 1;
-        Ok(Bucket {
-            count: self.count[last_idx],
-            range: self.config.index_to_range(self.index[last_idx]),
-        })
+        Ok(Some(result))
+    }
+
+    /// Return a single percentile from this histogram.
+    ///
+    /// The percentile should be in the inclusive range `0.0..=100.0`. For
+    /// example, the 50th percentile (median) can be found using `50.0`.
+    pub fn percentile(&self, percentile: f64) -> Result<Option<Bucket>, Error> {
+        self.percentiles(&[percentile])
+            .map(|v| v.map(|x| x.first().unwrap().1.clone()))
     }
 
     /// Returns a new histogram with a reduced grouping power. The reduced
@@ -384,20 +416,30 @@ mod tests {
     }
 
     #[test]
-    fn percentile() {
+    fn percentiles() {
         let mut hstandard = Histogram::new(4, 10).unwrap();
+        let hempty = SparseHistogram::from(&hstandard);
+
         for v in 1..1024 {
             let _ = hstandard.increment(v);
         }
 
         let hsparse = SparseHistogram::from(&hstandard);
-
-        for percentile in [1.0, 10.0, 25.0, 50.0, 75.0, 90.0, 99.0, 99.9] {
+        let percentiles = [1.0, 10.0, 25.0, 50.0, 75.0, 90.0, 99.0, 99.9];
+        for percentile in percentiles {
+            let bempty = hempty.percentile(percentile).unwrap();
             let bstandard = hstandard.percentile(percentile).unwrap();
             let bsparse = hsparse.percentile(percentile).unwrap();
 
+            assert_eq!(bempty, None);
             assert_eq!(bsparse, bstandard);
         }
+
+        assert_eq!(hempty.percentiles(&percentiles), Ok(None));
+        assert_eq!(
+            hstandard.percentiles(&percentiles).unwrap(),
+            hsparse.percentiles(&percentiles).unwrap()
+        );
     }
 
     fn compare_histograms(hstandard: &Histogram, hsparse: &SparseHistogram) {

diff --git a/histogram/src/standard.rs b/histogram/src/standard.rs
@@ -76,7 +76,7 @@ impl Histogram {
     /// example, the 50th percentile (median) can be found using `50.0`.
     ///
     /// The results will be sorted by the percentile.
-    pub fn percentiles(&self, percentiles: &[f64]) -> Result<Vec<(f64, Bucket)>, Error> {
+    pub fn percentiles(&self, percentiles: &[f64]) -> Result<Option<Vec<(f64, Bucket)>>, Error> {
         // get the total count
         let total_count: u128 = self.buckets.iter().map(|v| *v as u128).sum();
 
@@ -91,6 +91,11 @@ impl Histogram {
             }
         }
 
+        // empty histogram, no percentiles available
+        if total_count == 0 {
+            return Ok(None);
+        }
+
         let mut bucket_idx = 0;
         let mut partial_sum = self.buckets[bucket_idx] as u128;
 
@@ -125,16 +130,16 @@ impl Histogram {
             })
             .collect();
 
-        Ok(result)
+        Ok(Some(result))
     }
 
     /// Return a single percentile from this histogram.
     ///
     /// The percentile should be in the inclusive range `0.0..=100.0`. For
     /// example, the 50th percentile (median) can be found using `50.0`.
-    pub fn percentile(&self, percentile: f64) -> Result<Bucket, Error> {
+    pub fn percentile(&self, percentile: f64) -> Result<Option<Bucket>, Error> {
         self.percentiles(&[percentile])
-            .map(|v| v.first().unwrap().1.clone())
+            .map(|v| v.map(|x| x.first().unwrap().1.clone()))
     }
 
     /// Returns a new histogram with a reduced grouping power. The reduced
@@ -310,36 +315,47 @@ mod tests {
     // Tests percentiles
     fn percentiles() {
         let mut histogram = Histogram::new(7, 64).unwrap();
+
+        assert_eq!(histogram.percentile(50.0).unwrap(), None);
+        assert_eq!(
+            histogram.percentiles(&[50.0, 90.0, 99.0, 99.9]).unwrap(),
+            None
+        );
+
         for i in 0..=100 {
             let _ = histogram.increment(i);
             assert_eq!(
                 histogram.percentile(0.0),
-                Ok(Bucket {
+                Ok(Some(Bucket {
                     count: 1,
                     range: 0..=0,
-                })
+                }))
             );
             assert_eq!(
                 histogram.percentile(100.0),
-                Ok(Bucket {
+                Ok(Some(Bucket {
                     count: 1,
                     range: i..=i,
-                })
+                }))
             );
         }
-        assert_eq!(histogram.percentile(25.0).map(|b| b.end()), Ok(25));
-        assert_eq!(histogram.percentile(50.0).map(|b| b.end()), Ok(50));
-        assert_eq!(histogram.percentile(75.0).map(|b| b.end()), Ok(75));
-        assert_eq!(histogram.percentile(90.0).map(|b| b.end()), Ok(90));
-        assert_eq!(histogram.percentile(99.0).map(|b| b.end()), Ok(99));
-        assert_eq!(histogram.percentile(99.9).map(|b| b.end()), Ok(100));
+        assert_eq!(histogram.percentile(25.0).map(|b| b.unwrap().end()), Ok(25));
+        assert_eq!(histogram.percentile(50.0).map(|b| b.unwrap().end()), Ok(50));
+        assert_eq!(histogram.percentile(75.0).map(|b| b.unwrap().end()), Ok(75));
+        assert_eq!(histogram.percentile(90.0).map(|b| b.unwrap().end()), Ok(90));
+        assert_eq!(histogram.percentile(99.0).map(|b| b.unwrap().end()), Ok(99));
+        assert_eq!(
+            histogram.percentile(99.9).map(|b| b.unwrap().end()),
+            Ok(100)
+        );
 
         assert_eq!(histogram.percentile(-1.0), Err(Error::InvalidPercentile));
         assert_eq!(histogram.percentile(101.0), Err(Error::InvalidPercentile));
 
         let percentiles: Vec<(f64, u64)> = histogram
             .percentiles(&[50.0, 90.0, 99.0, 99.9])
             .unwrap()
+            .unwrap()
             .iter()
             .map(|(p, b)| (*p, b.end()))
             .collect();
@@ -352,10 +368,10 @@ mod tests {
         let _ = histogram.increment(1024);
         assert_eq!(
             histogram.percentile(99.9),
-            Ok(Bucket {
+            Ok(Some(Bucket {
                 count: 1,
                 range: 1024..=1031,
-            })
+            }))
         );
     }
 
@@ -395,7 +411,7 @@ mod tests {
                 let v = vals[((*p / 100.0 * (vals.len() as f64)) as usize) - 1];
 
                 // Value and relative error from full histogram
-                let vhist = histogram.percentile(*p).unwrap().end();
+                let vhist = histogram.percentile(*p).unwrap().unwrap().end();
                 let e = (v.abs_diff(vhist) as f64) * 100.0 / (v as f64);
                 assert!(e < error);
             }