Skip to content

Commit

Permalink
Use containers offsets when available
Browse files Browse the repository at this point in the history
  • Loading branch information
Kerollmops committed Jun 6, 2024
1 parent b92f110 commit 88b848b
Showing 1 changed file with 97 additions and 8 deletions.
105 changes: 97 additions & 8 deletions src/bitmap/ops_with_serialized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::ops::RangeInclusive;

use crate::bitmap::container::Container;
use crate::bitmap::serialization::{
NO_OFFSET_THRESHOLD, OFFSET_BYTES, SERIAL_COOKIE, SERIAL_COOKIE_NO_RUNCONTAINER,
NO_OFFSET_THRESHOLD, SERIAL_COOKIE, SERIAL_COOKIE_NO_RUNCONTAINER,
};
use crate::RoaringBitmap;

Expand Down Expand Up @@ -93,21 +93,35 @@ impl RoaringBitmap {
}

// Read the container descriptions
let mut description_bytes = vec![[0u16; 2]; size];
reader.read_exact(cast_slice_mut(&mut description_bytes))?;
description_bytes.iter_mut().for_each(|[ref mut key, ref mut len]| {
let mut descriptions = vec![[0; 2]; size];
reader.read_exact(cast_slice_mut(&mut descriptions))?;
descriptions.iter_mut().for_each(|[ref mut key, ref mut len]| {
*key = u16::from_le(*key);
*len = u16::from_le(*len);
});


if has_offsets {
// I could use these offsets but I am a lazy developer (for now)
reader.seek(SeekFrom::Current((size * OFFSET_BYTES) as i64))?;
let mut offsets = vec![0; size];
reader.read_exact(cast_slice_mut(&mut offsets))?;
offsets.iter_mut().for_each(|offset| *offset = u32::from_le(*offset));

// Loop on the materialized containers if there
// are less or as many of them than serialized ones.
if self.containers.len() <= size {
return self.intersection_with_serialized_impl_with_offsets(
reader,
a,
b,
&descriptions,
&offsets,
run_container_bitmap.as_deref(),
);
}
}

// Read each container and skip the useless ones
for (i, &[key, len_minus_one]) in description_bytes.iter().enumerate() {
let mut containers = Vec::new();
for (i, &[key, len_minus_one]) in descriptions.iter().enumerate() {
let container = match self.containers.binary_search_by_key(&key, |c| c.key) {
Ok(index) => self.containers.get(index),
Err(_) => None,
Expand Down Expand Up @@ -191,6 +205,81 @@ impl RoaringBitmap {

Ok(RoaringBitmap { containers })
}

fn intersection_with_serialized_impl_with_offsets<R, A, AErr, B, BErr>(
&self,
mut reader: R,
a: A,
b: B,
descriptions: &[[u16; 2]],
offsets: &[u32],
run_container_bitmap: Option<&[u8]>,
) -> io::Result<RoaringBitmap>
where
R: io::Read + io::Seek,
A: Fn(Vec<u16>) -> Result<ArrayStore, AErr>,
AErr: Error + Send + Sync + 'static,
B: Fn(u64, Box<[u64; 1024]>) -> Result<BitmapStore, BErr>,
BErr: Error + Send + Sync + 'static,
{
let mut containers = Vec::new();
for container in &self.containers {
let i = match descriptions.binary_search_by_key(&container.key, |[k, _]| *k) {
Ok(index) => index,
Err(_) => continue,
};

// Seek to the bytes of the container we want.
reader.seek(SeekFrom::Start(offsets[i] as u64))?;

let [key, len_minus_one] = descriptions[i];
let cardinality = u64::from(len_minus_one) + 1;

// If the run container bitmap is present, check if this container is a run container
let is_run_container =
run_container_bitmap.as_ref().map_or(false, |bm| bm[i / 8] & (1 << (i % 8)) != 0);

let store = if is_run_container {
let runs = reader.read_u16::<LittleEndian>().unwrap();
let mut intervals = vec![[0, 0]; runs as usize];
reader.read_exact(cast_slice_mut(&mut intervals)).unwrap();
intervals.iter_mut().for_each(|[s, len]| {
*s = u16::from_le(*s);
*len = u16::from_le(*len);
});

let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum();
let mut store = Store::with_capacity(cardinality);
intervals.into_iter().try_for_each(|[s, len]| -> Result<(), io::ErrorKind> {
let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?;
store.insert_range(RangeInclusive::new(s, end));
Ok(())
})?;
store
} else if cardinality <= ARRAY_LIMIT {
let mut values = vec![0; cardinality as usize];
reader.read_exact(cast_slice_mut(&mut values)).unwrap();
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
let array = a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Store::Array(array)
} else {
let mut values = Box::new([0; BITMAP_LENGTH]);
reader.read_exact(cast_slice_mut(&mut values[..])).unwrap();
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
let bitmap = b(cardinality, values)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Store::Bitmap(bitmap)
};

let mut other_container = Container { key, store };
other_container &= container;
if !other_container.is_empty() {
containers.push(other_container);
}
}

Ok(RoaringBitmap { containers })
}
}

#[cfg(test)]
Expand Down

0 comments on commit 88b848b

Please sign in to comment.