Skip to content

Commit

Permalink
Tests to validate that regions implement a common interface
Browse files Browse the repository at this point in the history
We don't require region implementations to provide other trait
implementations, but in many cases it is useful to have push/reserve
implementations. This tests checks that regions provided by this crate
provide said implementations. Specifically, it checks that regions can
absorb their owned type, references to their owned type, the read item,
and allow to reserve space for references to the owned type and the read
item. Additionally, regions for where owned types are Deref, the region
should absorb references to the deref target, and be able to reserve it.

This surfaced some missing APIs which we add in this change. Namely, the
columns region missed reserve APIs. Adding the reserve implementations
required the column's item getter to return an option (elements are not
required to have the same length.) The codec region still doesn't provide
all implementations required, but it's closer and provides sufficient
push implementations. It only misses the reserve items API, which does not
make too much sense in its context.

Signed-off-by: Moritz Hoffmann <[email protected]>
  • Loading branch information
antiguru committed Jul 25, 2024
1 parent 712966c commit 1b355b5
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 20 deletions.
48 changes: 36 additions & 12 deletions src/impls/codec.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! A region that encodes its contents.
use crate::{OwnedRegion, Push, Region};
use crate::{IntoOwned, OwnedRegion, Push, Region};

pub use self::misra_gries::MisraGries;
pub use dictionary::DictionaryCodec;
Expand Down Expand Up @@ -138,19 +138,41 @@ impl<C: Codec, R> Push<&[u8]> for CodecRegion<C, R>
where
for<'a> R: Region<ReadItem<'a> = &'a [u8]> + Push<&'a [u8]> + 'a,
{
#[inline]
fn push(&mut self, item: &[u8]) -> <CodecRegion<C, R> as Region>::Index {
self.codec.encode(item, &mut self.inner)
}
}

impl<C: Codec, R> Push<Vec<u8>> for CodecRegion<C, R>
where
for<'a> R: Region<ReadItem<'a> = &'a [u8]> + Push<&'a [u8]> + 'a,
{
#[inline]
fn push(&mut self, item: Vec<u8>) -> <CodecRegion<C, R> as Region>::Index {
self.push(item.as_slice())
}
}

impl<C: Codec, R> Push<&Vec<u8>> for CodecRegion<C, R>
where
for<'a> R: Region<ReadItem<'a> = &'a [u8]> + Push<&'a [u8]> + 'a,
{
#[inline]
fn push(&mut self, item: &Vec<u8>) -> <CodecRegion<C, R> as Region>::Index {
self.push(item.as_slice())
}
}

/// Encode and decode byte strings.
pub trait Codec: Default {
/// Decodes an input byte slice into a sequence of byte slices.
fn decode<'a>(&'a self, bytes: &'a [u8]) -> &'a [u8];
/// Encodes a sequence of byte slices into an output byte slice.
fn encode<R>(&mut self, bytes: &[u8], output: &mut R) -> R::Index
fn encode<'a, R, B>(&mut self, bytes: B, output: &mut R) -> R::Index
where
for<'a> R: Region + Push<&'a [u8]>;
R: Region + for<'b> Push<&'b [u8]>,
B: AsRef<[u8]> + IntoOwned<'a, Owned = Vec<u8>>;
/// Constructs a new instance of `Self` from accumulated statistics.
/// These statistics should cover the data the output expects to see.
fn new_from<'a, I: Iterator<Item = &'a Self> + Clone>(stats: I) -> Self
Expand All @@ -165,7 +187,7 @@ pub trait Codec: Default {

mod dictionary {

use crate::{Push, Region};
use crate::{IntoOwned, Push, Region};
use std::collections::BTreeMap;

pub use super::{BytesMap, Codec, MisraGries};
Expand Down Expand Up @@ -193,22 +215,24 @@ mod dictionary {
/// Encode a sequence of byte slices.
///
/// Encoding also records statistics about the structure of the input.
fn encode<R>(&mut self, bytes: &[u8], output: &mut R) -> R::Index
fn encode<'a, R, B>(&mut self, bytes: B, output: &mut R) -> R::Index
where
for<'a> R: Region + Push<&'a [u8]>,
R: Region + for<'b> Push<&'b [u8]>,
B: AsRef<[u8]> + IntoOwned<'a, Owned = Vec<u8>>,
{
self.total += bytes.len();
let slice = bytes.as_ref();
self.total += slice.len();
// If we have an index referencing `bytes`, use the index key.
let index = if let Some(b) = self.encode.get(bytes) {
let index = if let Some(b) = self.encode.get(slice) {
self.bytes += 1;
output.push([*b].as_slice())
} else {
self.bytes += bytes.len();
output.push(bytes)
self.bytes += slice.len();
output.push(slice)
};
// Stats stuff.
self.stats.0.insert(bytes.to_owned());
let tag = bytes[0];
let tag = slice[0];
self.stats.0.insert(bytes.into_owned());
let tag_idx: usize = (tag % 4).into();
self.stats.1[tag_idx] |= 1 << (tag >> 2);

Expand Down
63 changes: 55 additions & 8 deletions src/impls/columns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};

use crate::impls::deduplicate::ConsecutiveIndexPairs;
use crate::impls::index::{IndexContainer, IndexOptimized};
use crate::{IntoOwned, PushIter};
use crate::{IntoOwned, PushIter, ReserveItems};
use crate::{OwnedRegion, Push, Region};

/// A region that can store a variable number of elements per row.
Expand Down Expand Up @@ -96,7 +96,7 @@ where
{
type Owned = Vec<R::Owned>;
type ReadItem<'a> = ReadColumns<'a, R> where Self: 'a;
type Index = <ConsecutiveIndexPairs<OwnedRegion<R::Index>, IndexOptimized> as Region>::Index;
type Index = <ConsecutiveIndexPairs<OwnedRegion<R::Index>, O> as Region>::Index;

fn merge_regions<'a>(regions: impl Iterator<Item = &'a Self> + Clone) -> Self
where
Expand Down Expand Up @@ -237,10 +237,10 @@ where

/// Get the element at `offset`.
#[must_use]
pub fn get(&self, offset: usize) -> R::ReadItem<'a> {
pub fn get(&self, offset: usize) -> Option<R::ReadItem<'a>> {
match &self.0 {
Ok(inner) => inner.get(offset),
Err(slice) => IntoOwned::borrow_as(&slice[offset]),
Err(slice) => Some(IntoOwned::borrow_as(slice.get(offset)?)),
}
}

Expand All @@ -262,14 +262,15 @@ where
}
}
}

impl<'a, R> ReadColumnsInner<'a, R>
where
R: Region,
{
/// Get the element at `offset`.
/// Get the element at `offset`, if the offset is valid, and return `None` otherwise.
#[must_use]
pub fn get(&self, offset: usize) -> R::ReadItem<'a> {
self.columns[offset].index(self.index[offset])
pub fn get(&self, offset: usize) -> Option<R::ReadItem<'a>> {
Some(self.columns.get(offset)?.index(*self.index.get(offset)?))
}

/// Returns the length of this row.
Expand Down Expand Up @@ -392,6 +393,52 @@ where
}
}

impl<'a, R, O> ReserveItems<ReadColumns<'a, R>> for ColumnsRegion<R, O>
where
for<'b> R: Region + ReserveItems<<R as Region>::ReadItem<'b>>,
O: IndexContainer<usize>,
{
fn reserve_items<I>(&mut self, items: I)
where
I: Iterator<Item = ReadColumns<'a, R>> + Clone,
{
let len = items.clone().map(|item| item.len()).max().unwrap_or(0);
if len > 0 {
// Ensure all required regions exist.
while self.inner.len() < len {
self.inner.push(R::default());
}

for (index, region) in self.inner.iter_mut().enumerate() {
region.reserve_items(items.clone().filter_map(|item| item.get(index)));
}
}
}
}

impl<'a, T, R, O> ReserveItems<&'a Vec<T>> for ColumnsRegion<R, O>
where
for<'b> R: Region + ReserveItems<&'a T>,
O: IndexContainer<usize>,
{
fn reserve_items<I>(&mut self, items: I)
where
I: Iterator<Item = &'a Vec<T>> + Clone,
{
let len = items.clone().map(|item| item.len()).max().unwrap_or(0);
if len > 0 {
// Ensure all required regions exist.
while self.inner.len() < len {
self.inner.push(R::default());
}

for (index, region) in self.inner.iter_mut().enumerate() {
region.reserve_items(items.clone().filter_map(|item| item.get(index)));
}
}
}
}

impl<'a, R, O, T> Push<&'a [T]> for ColumnsRegion<R, O>
where
R: Region + Push<&'a T>,
Expand Down Expand Up @@ -642,7 +689,7 @@ mod tests {
assert!(row.iter().eq(r.index(index).iter()));
}

assert_eq!("1", r.index(indices[1]).get(0));
assert_eq!(Some("1"), r.index(indices[1]).get(0));
assert_eq!(1, r.index(indices[1]).len());
assert!(!r.index(indices[1]).is_empty());
assert!(r.index(indices[0]).is_empty());
Expand Down
70 changes: 70 additions & 0 deletions tests/useful_types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//! Test that the types defined by this crate are useful, i.e., they
//! offer implementations over what's absolutely necessary.
use flatcontainer::{
ColumnsRegion, MirrorRegion, OptionRegion, OwnedRegion, Push, Region, ReserveItems,
ResultRegion, SliceRegion, StringRegion,
};

trait UsefulRegion:
Region
+ Push<<Self as Region>::Owned>
+ for<'a> Push<&'a <Self as Region>::Owned>
+ for<'a> Push<<Self as Region>::ReadItem<'a>>
+ for<'a> ReserveItems<&'a <Self as Region>::Owned>
+ for<'a> ReserveItems<<Self as Region>::ReadItem<'a>>
{
}

impl<R> UsefulRegion for R where
R: Region
+ Push<<Self as Region>::Owned>
+ for<'a> Push<&'a <Self as Region>::Owned>
+ for<'a> Push<<Self as Region>::ReadItem<'a>>
+ for<'a> ReserveItems<&'a <Self as Region>::Owned>
+ for<'a> ReserveItems<<Self as Region>::ReadItem<'a>>
{
}

trait DerefRegion: UsefulRegion
where
<Self as Region>::Owned: std::ops::Deref,
Self: for<'a> Push<&'a <<Self as Region>::Owned as std::ops::Deref>::Target>,
Self: for<'a> ReserveItems<&'a <<Self as Region>::Owned as std::ops::Deref>::Target>,
{
}

impl<R> DerefRegion for R
where
R: UsefulRegion,
<Self as Region>::Owned: std::ops::Deref,
Self: for<'a> Push<&'a <<Self as Region>::Owned as std::ops::Deref>::Target>,
Self: for<'a> ReserveItems<&'a <<Self as Region>::Owned as std::ops::Deref>::Target>,
{
}

#[test]
fn test_useful_region() {
fn _useful_region<R: UsefulRegion>() {}
_useful_region::<MirrorRegion<usize>>();
_useful_region::<OptionRegion<MirrorRegion<usize>>>();
_useful_region::<OwnedRegion<usize>>();
_useful_region::<ResultRegion<MirrorRegion<usize>, MirrorRegion<usize>>>();
_useful_region::<SliceRegion<MirrorRegion<usize>>>();
_useful_region::<StringRegion>();
_useful_region::<Vec<usize>>();
_useful_region::<ColumnsRegion<MirrorRegion<usize>>>();
// _useful_region::<CodecRegion<DictionaryCodec>>();
}

#[test]
fn test_deref_region() {
fn _deref_region<R: DerefRegion>()
where
<R as Region>::Owned: std::ops::Deref,
{
}
_deref_region::<OwnedRegion<usize>>();
_deref_region::<SliceRegion<MirrorRegion<usize>>>();
_deref_region::<StringRegion>();
}

0 comments on commit 1b355b5

Please sign in to comment.