diff --git a/src/impls/codec.rs b/src/impls/codec.rs index 5e52afb..15d5fb4 100644 --- a/src/impls/codec.rs +++ b/src/impls/codec.rs @@ -1,6 +1,6 @@ //! A region that encodes its contents. -use crate::{OwnedRegion, Push, Region}; +use crate::{IntoOwned, OwnedRegion, Push, Region}; pub use self::misra_gries::MisraGries; pub use dictionary::DictionaryCodec; @@ -138,19 +138,41 @@ impl Push<&[u8]> for CodecRegion where for<'a> R: Region = &'a [u8]> + Push<&'a [u8]> + 'a, { + #[inline] fn push(&mut self, item: &[u8]) -> as Region>::Index { self.codec.encode(item, &mut self.inner) } } +impl Push> for CodecRegion +where + for<'a> R: Region = &'a [u8]> + Push<&'a [u8]> + 'a, +{ + #[inline] + fn push(&mut self, item: Vec) -> as Region>::Index { + self.push(item.as_slice()) + } +} + +impl Push<&Vec> for CodecRegion +where + for<'a> R: Region = &'a [u8]> + Push<&'a [u8]> + 'a, +{ + #[inline] + fn push(&mut self, item: &Vec) -> as Region>::Index { + self.push(item.as_slice()) + } +} + /// Encode and decode byte strings. pub trait Codec: Default { /// Decodes an input byte slice into a sequence of byte slices. fn decode<'a>(&'a self, bytes: &'a [u8]) -> &'a [u8]; /// Encodes a sequence of byte slices into an output byte slice. - fn encode(&mut self, bytes: &[u8], output: &mut R) -> R::Index + fn encode<'a, R, B>(&mut self, bytes: B, output: &mut R) -> R::Index where - for<'a> R: Region + Push<&'a [u8]>; + R: Region + for<'b> Push<&'b [u8]>, + B: AsRef<[u8]> + IntoOwned<'a, Owned = Vec>; /// Constructs a new instance of `Self` from accumulated statistics. /// These statistics should cover the data the output expects to see. fn new_from<'a, I: Iterator + Clone>(stats: I) -> Self @@ -165,7 +187,7 @@ pub trait Codec: Default { mod dictionary { - use crate::{Push, Region}; + use crate::{IntoOwned, Push, Region}; use std::collections::BTreeMap; pub use super::{BytesMap, Codec, MisraGries}; @@ -193,22 +215,24 @@ mod dictionary { /// Encode a sequence of byte slices. /// /// Encoding also records statistics about the structure of the input. - fn encode(&mut self, bytes: &[u8], output: &mut R) -> R::Index + fn encode<'a, R, B>(&mut self, bytes: B, output: &mut R) -> R::Index where - for<'a> R: Region + Push<&'a [u8]>, + R: Region + for<'b> Push<&'b [u8]>, + B: AsRef<[u8]> + IntoOwned<'a, Owned = Vec>, { - self.total += bytes.len(); + let slice = bytes.as_ref(); + self.total += slice.len(); // If we have an index referencing `bytes`, use the index key. - let index = if let Some(b) = self.encode.get(bytes) { + let index = if let Some(b) = self.encode.get(slice) { self.bytes += 1; output.push([*b].as_slice()) } else { - self.bytes += bytes.len(); - output.push(bytes) + self.bytes += slice.len(); + output.push(slice) }; // Stats stuff. - self.stats.0.insert(bytes.to_owned()); - let tag = bytes[0]; + let tag = slice[0]; + self.stats.0.insert(bytes.into_owned()); let tag_idx: usize = (tag % 4).into(); self.stats.1[tag_idx] |= 1 << (tag >> 2); diff --git a/src/impls/columns.rs b/src/impls/columns.rs index 5ccb772..5dc65d8 100644 --- a/src/impls/columns.rs +++ b/src/impls/columns.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::impls::deduplicate::ConsecutiveIndexPairs; use crate::impls::index::{IndexContainer, IndexOptimized}; -use crate::{IntoOwned, PushIter}; +use crate::{IntoOwned, PushIter, ReserveItems}; use crate::{OwnedRegion, Push, Region}; /// A region that can store a variable number of elements per row. @@ -96,7 +96,7 @@ where { type Owned = Vec; type ReadItem<'a> = ReadColumns<'a, R> where Self: 'a; - type Index = , IndexOptimized> as Region>::Index; + type Index = , O> as Region>::Index; fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where @@ -237,10 +237,10 @@ where /// Get the element at `offset`. #[must_use] - pub fn get(&self, offset: usize) -> R::ReadItem<'a> { + pub fn get(&self, offset: usize) -> Option> { match &self.0 { Ok(inner) => inner.get(offset), - Err(slice) => IntoOwned::borrow_as(&slice[offset]), + Err(slice) => Some(IntoOwned::borrow_as(slice.get(offset)?)), } } @@ -262,14 +262,15 @@ where } } } + impl<'a, R> ReadColumnsInner<'a, R> where R: Region, { - /// Get the element at `offset`. + /// Get the element at `offset`, if the offset is valid, and return `None` otherwise. #[must_use] - pub fn get(&self, offset: usize) -> R::ReadItem<'a> { - self.columns[offset].index(self.index[offset]) + pub fn get(&self, offset: usize) -> Option> { + Some(self.columns.get(offset)?.index(*self.index.get(offset)?)) } /// Returns the length of this row. @@ -392,6 +393,52 @@ where } } +impl<'a, R, O> ReserveItems> for ColumnsRegion +where + for<'b> R: Region + ReserveItems<::ReadItem<'b>>, + O: IndexContainer, +{ + fn reserve_items(&mut self, items: I) + where + I: Iterator> + Clone, + { + let len = items.clone().map(|item| item.len()).max().unwrap_or(0); + if len > 0 { + // Ensure all required regions exist. + while self.inner.len() < len { + self.inner.push(R::default()); + } + + for (index, region) in self.inner.iter_mut().enumerate() { + region.reserve_items(items.clone().filter_map(|item| item.get(index))); + } + } + } +} + +impl<'a, T, R, O> ReserveItems<&'a Vec> for ColumnsRegion +where + for<'b> R: Region + ReserveItems<&'a T>, + O: IndexContainer, +{ + fn reserve_items(&mut self, items: I) + where + I: Iterator> + Clone, + { + let len = items.clone().map(|item| item.len()).max().unwrap_or(0); + if len > 0 { + // Ensure all required regions exist. + while self.inner.len() < len { + self.inner.push(R::default()); + } + + for (index, region) in self.inner.iter_mut().enumerate() { + region.reserve_items(items.clone().filter_map(|item| item.get(index))); + } + } + } +} + impl<'a, R, O, T> Push<&'a [T]> for ColumnsRegion where R: Region + Push<&'a T>, @@ -642,7 +689,7 @@ mod tests { assert!(row.iter().eq(r.index(index).iter())); } - assert_eq!("1", r.index(indices[1]).get(0)); + assert_eq!(Some("1"), r.index(indices[1]).get(0)); assert_eq!(1, r.index(indices[1]).len()); assert!(!r.index(indices[1]).is_empty()); assert!(r.index(indices[0]).is_empty()); diff --git a/tests/useful_types.rs b/tests/useful_types.rs new file mode 100644 index 0000000..e20f01b --- /dev/null +++ b/tests/useful_types.rs @@ -0,0 +1,70 @@ +//! Test that the types defined by this crate are useful, i.e., they +//! offer implementations over what's absolutely necessary. + +use flatcontainer::{ + ColumnsRegion, MirrorRegion, OptionRegion, OwnedRegion, Push, Region, ReserveItems, + ResultRegion, SliceRegion, StringRegion, +}; + +trait UsefulRegion: + Region + + Push<::Owned> + + for<'a> Push<&'a ::Owned> + + for<'a> Push<::ReadItem<'a>> + + for<'a> ReserveItems<&'a ::Owned> + + for<'a> ReserveItems<::ReadItem<'a>> +{ +} + +impl UsefulRegion for R where + R: Region + + Push<::Owned> + + for<'a> Push<&'a ::Owned> + + for<'a> Push<::ReadItem<'a>> + + for<'a> ReserveItems<&'a ::Owned> + + for<'a> ReserveItems<::ReadItem<'a>> +{ +} + +trait DerefRegion: UsefulRegion +where + ::Owned: std::ops::Deref, + Self: for<'a> Push<&'a <::Owned as std::ops::Deref>::Target>, + Self: for<'a> ReserveItems<&'a <::Owned as std::ops::Deref>::Target>, +{ +} + +impl DerefRegion for R +where + R: UsefulRegion, + ::Owned: std::ops::Deref, + Self: for<'a> Push<&'a <::Owned as std::ops::Deref>::Target>, + Self: for<'a> ReserveItems<&'a <::Owned as std::ops::Deref>::Target>, +{ +} + +#[test] +fn test_useful_region() { + fn _useful_region() {} + _useful_region::>(); + _useful_region::>>(); + _useful_region::>(); + _useful_region::, MirrorRegion>>(); + _useful_region::>>(); + _useful_region::(); + _useful_region::>(); + _useful_region::>>(); + // _useful_region::>(); +} + +#[test] +fn test_deref_region() { + fn _deref_region() + where + ::Owned: std::ops::Deref, + { + } + _deref_region::>(); + _deref_region::>>(); + _deref_region::(); +}