Skip to content

Commit

Permalink
Add range-based iteration
Browse files Browse the repository at this point in the history
  • Loading branch information
mherzl committed Sep 25, 2024
1 parent 9e743c4 commit 94940db
Show file tree
Hide file tree
Showing 9 changed files with 476 additions and 11 deletions.
1 change: 1 addition & 0 deletions roaring/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ license = "MIT OR Apache-2.0"
bytemuck = { workspace = true, optional = true }
byteorder = { workspace = true, optional = true }
serde = { workspace = true, optional = true }
num = "0.4"

[features]
default = ["std"]
Expand Down
7 changes: 5 additions & 2 deletions roaring/src/bitmap/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@ pub struct IntoIter {
size_hint: u64,
}

impl Iter<'_> {
fn new(containers: &[Container]) -> Iter {
impl<'a> Iter<'a> {
pub(super) fn new(containers: &[Container]) -> Iter {
let size_hint = containers.iter().map(|c| c.len()).sum();
Iter { inner: containers.iter().flatten(), size_hint }
}
pub(super) fn empty() -> Iter<'a> {
Iter { inner: [].iter().flatten(), size_hint: 0 }
}
}

impl IntoIter {
Expand Down
1 change: 1 addition & 0 deletions roaring/src/bitmap/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mod iter;
mod ops;
#[cfg(feature = "std")]
mod ops_with_serialized;
mod range;
#[cfg(feature = "serde")]
mod serde;
#[cfg(feature = "std")]
Expand Down
137 changes: 137 additions & 0 deletions roaring/src/bitmap/range.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
use core::ops::RangeBounds;
use core::ops::RangeInclusive;

use super::container::Container;
use super::iter;
use super::store;
use super::util;
use crate::RoaringBitmap;

/// Iterator over a consecutive subsequence of a bitmap.
/// Efficient; O( log[n] + k ),
/// where n is the bitmap's length
/// and k is the subsequence's length.
pub struct RangeIter<'a> {
first: store::StorePartIter<'a>,
between: iter::Iter<'a>,
last: store::StorePartIter<'a>,
// size_hint: u64,
}

impl<'a> RangeIter<'a> {
pub fn new<R>(containers: &'a [Container], range: R) -> RangeIter<'a>
where
R: RangeBounds<u32>,
{
let (start, end) = match util::convert_range_to_inclusive(range) {
Some(range) => (*range.start(), *range.end()),
None => return RangeIter::empty(),
};

let (start_key, start_low) = util::split(start);
let (end_key, end_low) = util::split(end);

let s = containers.binary_search_by_key(&start_key, |c| c.key);
let e = containers.binary_search_by_key(&end_key, |c| c.key);

if s == e {
// single container
return match s {
Ok(i) => RangeIter {
first: Self::container_part(&containers[i], start_low..=end_low, start_key),
between: iter::Iter::empty(),
last: store::StorePartIter::empty(),
},
Err(_) => RangeIter::empty(), // nothing to iterate over
};
}

// multiple containers
let (first, inner_start) = match s {
Ok(i) => (Self::container_part(&containers[i], start_low..=u16::MAX, start_key), i + 1),
Err(i) => (store::StorePartIter::empty(), i),
};
let (last, inner_stop) = match e {
Ok(i) => (Self::container_part(&containers[i], u16::MIN..=end_low, end_key), i),
Err(i) => (store::StorePartIter::empty(), i),
};
let between = iter::Iter::new(&containers[inner_start..inner_stop]);

RangeIter { first, between, last }
}
fn container_part(
container: &Container,
range: RangeInclusive<u16>,
key: u16,
) -> store::StorePartIter {
store::StorePartIter::new(key, &container.store, range)
}
fn empty() -> RangeIter<'a> {
RangeIter {
first: store::StorePartIter::empty(),
between: iter::Iter::empty(),
last: store::StorePartIter::empty(),
}
}
}

impl<'a> Iterator for RangeIter<'a> {
type Item = u32;

fn next(&mut self) -> Option<u32> {
if let f @ Some(_) = self.first.next() {
return f;
}
if let b @ Some(_) = self.between.next() {
return b;
}
self.last.next()
}
}

impl RoaringBitmap {
/// Efficiently obtains an iterator over the specified range.
///
/// # Examples
///
/// ```
/// use roaring::RoaringBitmap;
///
/// // let mut rb = RoaringBitmap::new();
/// // rb.insert(0);
/// // rb.insert(1);
/// // rb.insert(10);
/// // rb.insert(999_999);
/// // rb.insert(1_000_000);
/// //
/// // let expected = vec![1,10,999_999];
/// // let actual: Vec<u32> = rb.range(1..=999_999).collect();
/// // assert_eq!(expected, actual);
///
/// let rb = RoaringBitmap::from_sorted_iter(10..5000).unwrap();
///
/// let expected = vec![10,11,12];
/// let actual: Vec<u32> = rb.range(0..13).collect();
/// assert_eq!(expected, actual);
/// ```
pub fn range<R>(&self, range: R) -> RangeIter
where
R: RangeBounds<u32>,
{
RangeIter::new(&self.containers, range)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_range_bitmap() {
let rb = RoaringBitmap::from_sorted_iter(10..5000).unwrap();

let expected = vec![10, 11, 12];
let actual: Vec<u32> = rb.range(0..13).collect();
assert_eq!(expected, actual);
}
}
14 changes: 14 additions & 0 deletions roaring/src/bitmap/store/array_store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod vector;
mod visitor;

use crate::bitmap::store::array_store::visitor::{CardinalityCounter, VecWriter};
// use crate::bitmap::util;
use core::cmp::Ordering;
use core::cmp::Ordering::*;
use core::fmt::{Display, Formatter};
Expand Down Expand Up @@ -235,6 +236,19 @@ impl ArrayStore {
&self.vec
}

pub fn range_iter(&self, range: RangeInclusive<u16>) -> core::slice::Iter<u16> {
let start_index = match self.vec.binary_search(range.start()) {
Ok(i) => i,
Err(i) => i,
};
let end_index = match self.vec.binary_search(range.end()) {
Ok(i) => i + 1,
Err(i) => i,
};
let r = start_index..end_index;
self.vec[r].iter()
}

/// Retains only the elements specified by the predicate.
pub fn retain(&mut self, mut f: impl FnMut(u16) -> bool) {
// Idea to avoid branching from "Engineering Fast Indexes for Big Data
Expand Down
Loading

0 comments on commit 94940db

Please sign in to comment.