From 294ef6bf29e11b8d65e48f9436520aaffdb0fb01 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Fri, 1 Nov 2024 20:53:45 -0400 Subject: [PATCH] feat: implement advance_to and advance_back_to for Iter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the ability to efficiently skip ahead to a particular value. Co-authored-by: Christian Schjølberg Co-authored-by: Matthew Herzl --- roaring/src/bitmap/container.rs | 10 ++ roaring/src/bitmap/iter.rs | 195 ++++++++++++++++++++- roaring/src/bitmap/store/bitmap_store.rs | 62 +++++++ roaring/src/bitmap/store/mod.rs | 45 +++++ roaring/tests/iter_advance_to.rs | 208 +++++++++++++++++++++++ 5 files changed, 519 insertions(+), 1 deletion(-) create mode 100644 roaring/tests/iter_advance_to.rs diff --git a/roaring/src/bitmap/container.rs b/roaring/src/bitmap/container.rs index 9b238866..eebdfe6a 100644 --- a/roaring/src/bitmap/container.rs +++ b/roaring/src/bitmap/container.rs @@ -325,6 +325,16 @@ impl DoubleEndedIterator for Iter<'_> { impl ExactSizeIterator for Iter<'_> {} +impl Iter<'_> { + pub(crate) fn advance_to(&mut self, index: u16) { + self.inner.advance_to(index); + } + + pub(crate) fn advance_back_to(&mut self, index: u16) { + self.inner.advance_back_to(index); + } +} + impl fmt::Debug for Container { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { format!("Container<{:?} @ {:?}>", self.len(), self.key).fmt(formatter) diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index 70ef27fa..de14092a 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -2,7 +2,8 @@ use alloc::vec; use core::iter::FusedIterator; use core::slice; -use super::container::{self, Container}; +use super::container::Container; +use super::{container, util}; use crate::{NonSortedIntegers, RoaringBitmap}; #[cfg(not(feature = "std"))] @@ -31,16 +32,208 @@ fn and_then_or_clear(opt: &mut Option, f: impl FnOnce(&mut T) -> Option x } +fn advance_to_impl<'a, It>( + n: u32, + front_iter: &mut Option>, + containers: &mut It, + back_iter: &mut Option>, +) where + It: Iterator, + It: AsRef<[Container]>, + It::Item: IntoIterator>, +{ + let (key, index) = util::split(n); + if let Some(iter) = front_iter { + match key.cmp(&iter.key) { + core::cmp::Ordering::Less => return, + core::cmp::Ordering::Equal => { + iter.advance_to(index); + return; + } + core::cmp::Ordering::Greater => { + *front_iter = None; + } + } + } + let containers_slice = containers.as_ref(); + let containers_len = containers_slice.len(); + let to_skip = match containers_slice.binary_search_by_key(&key, |c| c.key) { + Ok(n) => { + let container = containers.nth(n).expect("binary search returned a valid index"); + let mut container_iter = container.into_iter(); + container_iter.advance_to(index); + *front_iter = Some(container_iter); + return; + } + Err(n) => n, + }; + + if let Some(n) = to_skip.checked_sub(1) { + containers.nth(n); + } + if to_skip != containers_len { + // There are still containers with keys greater than the key we are looking for, + // the key we're looking _can't_ be in the back iterator. + return; + } + if let Some(iter) = back_iter { + match key.cmp(&iter.key) { + core::cmp::Ordering::Less => {} + core::cmp::Ordering::Equal => { + iter.advance_to(index); + } + core::cmp::Ordering::Greater => { + *back_iter = None; + } + } + } +} + +fn advance_back_to_impl<'a, It>( + n: u32, + front_iter: &mut Option>, + containers: &mut It, + back_iter: &mut Option>, +) where + It: DoubleEndedIterator, + It: AsRef<[Container]>, + It::Item: IntoIterator>, +{ + let (key, index) = util::split(n); + if let Some(iter) = back_iter { + match key.cmp(&iter.key) { + core::cmp::Ordering::Greater => return, + core::cmp::Ordering::Equal => { + iter.advance_back_to(index); + return; + } + core::cmp::Ordering::Less => { + *back_iter = None; + } + } + } + let containers_slice = containers.as_ref(); + let containers_len = containers_slice.len(); + let to_skip = match containers_slice.binary_search_by_key(&key, |c| c.key) { + Ok(n) => { + // n must be less than containers_len, so this can never underflow + let n = containers_len - n - 1; + let container = containers.nth_back(n).expect("binary search returned a valid index"); + let mut container_iter = container.into_iter(); + container_iter.advance_back_to(index); + *back_iter = Some(container_iter); + return; + } + Err(n) => containers_len - n, + }; + + if let Some(n) = to_skip.checked_sub(1) { + containers.nth_back(n); + } + if to_skip != containers_len { + // There are still containers with keys less than the key we are looking for, + // the key we're looking _can't_ be in the front iterator. + return; + } + if let Some(iter) = front_iter { + match key.cmp(&iter.key) { + core::cmp::Ordering::Greater => {} + core::cmp::Ordering::Equal => { + iter.advance_back_to(index); + } + core::cmp::Ordering::Less => { + *front_iter = None; + } + } + } +} + impl Iter<'_> { fn new(containers: &[Container]) -> Iter { Iter { front: None, containers: containers.iter(), back: None } } + + /// Advance the iterator to the first position where the item has a value >= `n` + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// use core::iter::FromIterator; + /// + /// let bitmap = (1..3).collect::(); + /// let mut iter = bitmap.iter(); + /// iter.advance_to(2); + /// + /// assert_eq!(iter.next(), Some(2)); + /// assert_eq!(iter.next(), None); + /// ``` + pub fn advance_to(&mut self, n: u32) { + advance_to_impl(n, &mut self.front, &mut self.containers, &mut self.back); + } + + /// Advance the back of the iterator to the first position where the item has a value <= `n` + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// use core::iter::FromIterator; + /// + /// let bitmap = (1..3).collect::(); + /// let mut iter = bitmap.iter(); + /// iter.advance_back_to(1); + /// + /// assert_eq!(iter.next_back(), Some(1)); + /// assert_eq!(iter.next_back(), None); + /// ``` + pub fn advance_back_to(&mut self, n: u32) { + advance_back_to_impl(n, &mut self.front, &mut self.containers, &mut self.back); + } } impl IntoIter { fn new(containers: Vec) -> IntoIter { IntoIter { front: None, containers: containers.into_iter(), back: None } } + + /// Advance the iterator to the first position where the item has a value >= `n` + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// use core::iter::FromIterator; + /// + /// let bitmap = (1..3).collect::(); + /// let mut iter = bitmap.iter(); + /// iter.advance_to(2); + /// + /// assert_eq!(iter.next(), Some(2)); + /// assert_eq!(iter.next(), None); + /// ``` + pub fn advance_to(&mut self, n: u32) { + advance_to_impl(n, &mut self.front, &mut self.containers, &mut self.back); + } + + /// Advance the back of the iterator to the first position where the item has a value <= `n` + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// use core::iter::FromIterator; + /// + /// let bitmap = (1..3).collect::(); + /// let mut iter = bitmap.into_iter(); + /// iter.advance_back_to(1); + /// + /// assert_eq!(iter.next_back(), Some(1)); + /// assert_eq!(iter.next_back(), None); + /// ``` + pub fn advance_back_to(&mut self, n: u32) { + advance_back_to_impl(n, &mut self.front, &mut self.containers, &mut self.back); + } } fn size_hint_impl( diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index 4b89e0e0..91b432bc 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -1,4 +1,5 @@ use core::borrow::Borrow; +use core::cmp::Ordering; use core::fmt::{Display, Formatter}; use core::ops::{BitAndAssign, BitOrAssign, BitXorAssign, RangeInclusive, SubAssign}; @@ -406,6 +407,7 @@ pub struct BitmapIter> { key: u16, value: u64, key_back: u16, + // If key_back <= key, current back value is actually in `value` value_back: u64, bits: B, } @@ -420,6 +422,66 @@ impl> BitmapIter { bits, } } + + /// Advance the iterator to the first value greater than or equal to `n`. + pub(crate) fn advance_to(&mut self, index: u16) { + let new_key = key(index) as u16; + let value = match new_key.cmp(&self.key) { + Ordering::Less => return, + Ordering::Equal => self.value, + Ordering::Greater => { + let bits = self.bits.borrow(); + let cmp = new_key.cmp(&self.key_back); + // Match arms can be reordered, this ordering is perf sensitive + if cmp == Ordering::Less { + // new_key is > self.key, < self.key_back, so it must be in bounds + unsafe { *bits.get_unchecked(new_key as usize) } + } else if cmp == Ordering::Equal { + self.value_back + } else { + self.value_back = 0; + return; + } + } + }; + let bit = bit(index); + let low_bits = (1 << bit) - 1; + + self.key = new_key; + self.value = value & !low_bits; + } + + /// Advance the back of iterator to the first value less than or equal to `n`. + pub(crate) fn advance_back_to(&mut self, index: u16) { + let new_key = key(index) as u16; + let (value, dst) = match new_key.cmp(&self.key_back) { + Ordering::Greater => return, + Ordering::Equal => { + let dst = + if self.key_back <= self.key { &mut self.value } else { &mut self.value_back }; + (*dst, dst) + } + Ordering::Less => { + let bits = self.bits.borrow(); + let cmp = new_key.cmp(&self.key); + // Match arms can be reordered, this ordering is perf sensitive + if cmp == Ordering::Greater { + // new_key is > self.key, < self.key_back, so it must be in bounds + let value = unsafe { *bits.get_unchecked(new_key as usize) }; + (value, &mut self.value_back) + } else if cmp == Ordering::Equal { + (self.value, &mut self.value) + } else { + (0, &mut self.value) + } + } + }; + let bit = bit(index); + let low_bits = u64::MAX >> (64 - bit - 1); + + self.key_back = new_key; + *dst = value & low_bits; + } } impl> Iterator for BitmapIter { diff --git a/roaring/src/bitmap/store/mod.rs b/roaring/src/bitmap/store/mod.rs index 625b8137..6ec1f88d 100644 --- a/roaring/src/bitmap/store/mod.rs +++ b/roaring/src/bitmap/store/mod.rs @@ -497,6 +497,51 @@ impl PartialEq for Store { } } +impl Iter<'_> { + /// Advance the iterator to the first value greater than or equal to `n`. + pub(crate) fn advance_to(&mut self, n: u16) { + match self { + Iter::Array(inner) => { + let skip = inner.as_slice().partition_point(|&i| i < n); + if let Some(nth) = skip.checked_sub(1) { + inner.nth(nth); + } + } + Iter::Vec(inner) => { + let skip = inner.as_slice().partition_point(|&i| i < n); + if let Some(nth) = skip.checked_sub(1) { + inner.nth(nth); + } + } + Iter::BitmapBorrowed(inner) => inner.advance_to(n), + Iter::BitmapOwned(inner) => inner.advance_to(n), + } + } + + pub(crate) fn advance_back_to(&mut self, n: u16) { + match self { + Iter::Array(inner) => { + let slice = inner.as_slice(); + let from_front = slice.partition_point(|&i| i <= n); + let skip = slice.len() - from_front; + if let Some(nth) = skip.checked_sub(1) { + inner.nth_back(nth); + } + } + Iter::Vec(inner) => { + let slice = inner.as_slice(); + let from_front = slice.partition_point(|&i| i <= n); + let skip = slice.len() - from_front; + if let Some(nth) = skip.checked_sub(1) { + inner.nth_back(nth); + } + } + Iter::BitmapBorrowed(inner) => inner.advance_back_to(n), + Iter::BitmapOwned(inner) => inner.advance_back_to(n), + } + } +} + impl Iterator for Iter<'_> { type Item = u16; diff --git a/roaring/tests/iter_advance_to.rs b/roaring/tests/iter_advance_to.rs new file mode 100644 index 00000000..d07ca671 --- /dev/null +++ b/roaring/tests/iter_advance_to.rs @@ -0,0 +1,208 @@ +use roaring::RoaringBitmap; + +#[test] +fn iter_basic() { + let bm = RoaringBitmap::from([1, 2, 3, 4, 11, 12, 13, 14]); + let mut i = bm.iter(); + i.advance_to(10); + for n in 11..=14 { + assert_eq!(i.next(), Some(n)) + } + assert_eq!(i.next(), None); +} + +#[test] +fn to_missing_container() { + let bm = RoaringBitmap::from([1, 0x2_0001, 0x2_0002]); + let mut i = bm.iter(); + i.advance_to(0x1_0000); + assert_eq!(i.next(), Some(0x2_0001)); + assert_eq!(i.next(), Some(0x2_0002)); + assert_eq!(i.next(), None); +} + +#[test] +fn iter_back_basic() { + let bm = RoaringBitmap::from([1, 2, 3, 4, 11, 12, 13, 14]); + let mut i = bm.iter(); + i.advance_back_to(10); + assert_eq!(i.next(), Some(1)); + assert_eq!(i.next(), Some(2)); + assert_eq!(i.next_back(), Some(4)); + assert_eq!(i.next_back(), Some(3)); + + assert_eq!(i.next(), None); + assert_eq!(i.next_back(), None); +} + +#[test] +fn iter_advance_past_end() { + let bm = RoaringBitmap::from([1, 2, 3, 4, 11, 12, 13, 14]); + let mut i = bm.iter(); + i.advance_to(15); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None); +} + +#[test] +fn iter_multi_container() { + let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); + let mut i = bm.iter(); + i.advance_to(3); + assert_eq!(i.size_hint(), (3, Some(3))); + assert_eq!(i.next(), Some(3)); + assert_eq!(i.size_hint(), (2, Some(2))); + assert_eq!(i.next(), Some(100000)); + assert_eq!(i.size_hint(), (1, Some(1))); + assert_eq!(i.next(), Some(100001)); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None); + assert_eq!(i.size_hint(), (0, Some(0))); +} + +#[test] +fn iter_empty() { + let bm = RoaringBitmap::new(); + let mut i = bm.iter(); + i.advance_to(31337); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None) +} + +#[test] +fn iter_back_empty() { + let bm = RoaringBitmap::new(); + let mut i = bm.iter(); + i.advance_back_to(31337); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None) +} + +#[test] +fn into_iter_basic() { + let bm = RoaringBitmap::from([1, 2, 3, 4, 11, 12, 13, 14]); + let mut i = bm.into_iter(); + i.advance_to(10); + let mut expected_size_hint = 4; + assert_eq!(i.size_hint(), (expected_size_hint, Some(expected_size_hint))); + for n in 11..=14 { + assert_eq!(i.next(), Some(n)); + expected_size_hint -= 1; + assert_eq!(i.size_hint(), (expected_size_hint, Some(expected_size_hint))); + } + assert_eq!(i.next(), None); +} + +#[test] +fn into_iter_multi_container() { + let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); + let mut i = bm.into_iter(); + i.advance_to(3); + assert_eq!(i.size_hint(), (3, Some(3))); + assert_eq!(i.next(), Some(3)); + assert_eq!(i.next(), Some(100000)); + assert_eq!(i.next(), Some(100001)); + assert_eq!(i.next(), None); +} + +#[test] +fn into_iter_empty() { + let bm = RoaringBitmap::new(); + let mut i = bm.into_iter(); + i.advance_to(31337); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None) +} + +#[test] +fn into_iter_back_empty() { + let bm = RoaringBitmap::new(); + let mut i = bm.into_iter(); + i.advance_back_to(31337); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None) +} + +#[test] +fn advance_to_with_tail_iter() { + let bm = RoaringBitmap::from([1, 2, 3, 100000, 100001]); + let mut i = bm.iter(); + i.next_back(); + i.advance_to(100000); + assert_eq!(i.size_hint(), (1, Some(1))); + assert_eq!(i.next(), Some(100000)); + assert_eq!(i.size_hint(), (0, Some(0))); + assert_eq!(i.next(), None); +} + +#[test] +fn advance_to_end() { + let bitmap = RoaringBitmap::from([u32::MAX]); + let mut iter = bitmap.iter(); + iter.advance_to(u32::MAX); + assert_eq!(Some(u32::MAX), iter.next()); + assert_eq!(None, iter.next()); +} + +#[test] +fn advance_bitset() { + let mut bitmap = RoaringBitmap::new(); + for i in (0..=0x2_0000).step_by(2) { + bitmap.insert(i); + } + let mut iter = bitmap.iter(); + iter.advance_to(0x1_0000 - 4); + // 0x1_0000 + 5 is not in the bitmap, so the next value will be the first value less than that + iter.advance_back_to(0x1_0000 + 5); + assert_eq!(iter.next(), Some(0x1_0000 - 4)); + assert_eq!(iter.next_back(), Some(0x1_0000 + 4)); + + assert_eq!(iter.next(), Some(0x1_0000 - 2)); + assert_eq!(iter.next(), Some(0x1_0000)); + assert_eq!(iter.next(), Some(0x1_0000 + 2)); + assert_eq!(iter.next(), None); + assert_eq!(iter.next_back(), None); +} + +#[test] +fn advance_bitset_current_word() { + let mut bitmap = RoaringBitmap::new(); + for i in (0..=0x2_0000).step_by(2) { + bitmap.insert(i); + } + let mut iter = bitmap.iter(); + iter.advance_to(4); + iter.advance_back_to(0x2_0000 - 4); + for i in (4..=(0x2_0000 - 4)).step_by(2) { + assert_eq!(iter.next(), Some(i)); + } + assert_eq!(iter.next(), None); +} + +#[test] +fn advance_bitset_to_end_word() { + let mut bitmap = RoaringBitmap::new(); + for i in (0..=0x2_0000).step_by(2) { + bitmap.insert(i); + } + let mut iter = bitmap.iter(); + iter.advance_to(0x1_0000 - 4); + for i in ((0x1_0000 - 4)..=0x2_0000).step_by(2) { + assert_eq!(iter.next(), Some(i)); + } + assert_eq!(iter.next(), None); +} + +#[test] +fn advance_bitset_back_to_start_word() { + let mut bitmap = RoaringBitmap::new(); + for i in (0..=0x2_0000).step_by(2) { + bitmap.insert(i); + } + let mut iter = bitmap.iter(); + iter.advance_back_to(0x1_0000 - 4); + for i in (0..=(0x1_0000 - 4)).step_by(2) { + assert_eq!(iter.next(), Some(i)); + } + assert_eq!(iter.next(), None); +}