Skip to content

Commit

Permalink
Use containers offsets when available
Browse files Browse the repository at this point in the history
  • Loading branch information
Kerollmops committed Jun 6, 2024
1 parent b92f110 commit 0a11dd0
Showing 1 changed file with 74 additions and 7 deletions.
81 changes: 74 additions & 7 deletions src/bitmap/ops_with_serialized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::ops::RangeInclusive;

use crate::bitmap::container::Container;
use crate::bitmap::serialization::{
NO_OFFSET_THRESHOLD, OFFSET_BYTES, SERIAL_COOKIE, SERIAL_COOKIE_NO_RUNCONTAINER,
NO_OFFSET_THRESHOLD, SERIAL_COOKIE, SERIAL_COOKIE_NO_RUNCONTAINER,
};
use crate::RoaringBitmap;

Expand Down Expand Up @@ -93,21 +93,88 @@ impl RoaringBitmap {
}

// Read the container descriptions
let mut description_bytes = vec![[0u16; 2]; size];
reader.read_exact(cast_slice_mut(&mut description_bytes))?;
description_bytes.iter_mut().for_each(|[ref mut key, ref mut len]| {
let mut descriptions = vec![[0; 2]; size];
reader.read_exact(cast_slice_mut(&mut descriptions))?;
descriptions.iter_mut().for_each(|[ref mut key, ref mut len]| {
*key = u16::from_le(*key);
*len = u16::from_le(*len);
});

let mut containers = Vec::new();

if has_offsets {
// I could use these offsets but I am a lazy developer (for now)
reader.seek(SeekFrom::Current((size * OFFSET_BYTES) as i64))?;
let mut offsets = vec![0; size];
reader.read_exact(cast_slice_mut(&mut offsets))?;
offsets.iter_mut().for_each(|offset| *offset = u32::from_le(*offset));

// Loop on the materialized containers if there
// are less or as many of them than serialized ones.
if self.containers.len() <= size {
for container in &self.containers {
let i = match descriptions.binary_search_by_key(&container.key, |[k, _]| *k) {
Ok(index) => index,
Err(_) => continue,
};

// Seek to the bytes of the container we want.
reader.seek(SeekFrom::Start(offsets[i] as u64))?;

let [key, len_minus_one] = descriptions[i];
let cardinality = u64::from(len_minus_one) + 1;

// If the run container bitmap is present, check if this container is a run container
let is_run_container = run_container_bitmap
.as_ref()
.map_or(false, |bm| bm[i / 8] & (1 << (i % 8)) != 0);

let store = if is_run_container {
let runs = reader.read_u16::<LittleEndian>().unwrap();
let mut intervals = vec![[0, 0]; runs as usize];
reader.read_exact(cast_slice_mut(&mut intervals)).unwrap();
intervals.iter_mut().for_each(|[s, len]| {
*s = u16::from_le(*s);
*len = u16::from_le(*len);
});

let cardinality = intervals.iter().map(|[_, len]| *len as usize).sum();
let mut store = Store::with_capacity(cardinality);
intervals.into_iter().try_for_each(
|[s, len]| -> Result<(), io::ErrorKind> {
let end = s.checked_add(len).ok_or(io::ErrorKind::InvalidData)?;
store.insert_range(RangeInclusive::new(s, end));
Ok(())
},
)?;
store
} else if cardinality <= ARRAY_LIMIT {
let mut values = vec![0; cardinality as usize];
reader.read_exact(cast_slice_mut(&mut values)).unwrap();
values.iter_mut().for_each(|n| *n = u16::from_le(*n));
let array =
a(values).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Store::Array(array)
} else {
let mut values = Box::new([0; BITMAP_LENGTH]);
reader.read_exact(cast_slice_mut(&mut values[..])).unwrap();
values.iter_mut().for_each(|n| *n = u64::from_le(*n));
let bitmap = b(cardinality, values)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Store::Bitmap(bitmap)
};

let mut other_container = Container { key, store };
other_container &= container;
if !other_container.is_empty() {
containers.push(other_container);
}
}

return Ok(RoaringBitmap { containers });
}
}

// Read each container and skip the useless ones
for (i, &[key, len_minus_one]) in description_bytes.iter().enumerate() {
for (i, &[key, len_minus_one]) in descriptions.iter().enumerate() {
let container = match self.containers.binary_search_by_key(&key, |c| c.key) {
Ok(index) => self.containers.get(index),
Err(_) => None,
Expand Down

0 comments on commit 0a11dd0

Please sign in to comment.