Skip to content

Commit

Permalink
Added Integer decoder
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Aug 27, 2022
1 parent 8081fad commit 2deb916
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 2 deletions.
2 changes: 1 addition & 1 deletion examples/read_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ fn deserialize(page: &DataPage, dict: Option<&Dict>) -> Result<Vec<Option<i32>>,
let values = NativeValuesDecoder::<i32, Vec<i32>>::try_new(page, dict)?;
let decoder = FullDecoder::try_new(page, values)?;
let decoder = NativeDecoder::try_new(page, decoder)?;
// page is an enum comprising of the different possible encodings:
// decoder is an enum comprising the different cases:
match decoder {
NativeDecoder::Full(values) => match values {
FullDecoder::Optional(_, _) => todo!("optional pages"),
Expand Down
122 changes: 122 additions & 0 deletions src/deserialize/integer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
use std::collections::VecDeque;

use crate::{
encoding::delta_bitpacked,
error::Error,
indexes::Interval,
page::{split_buffer, DataPage},
parquet_bridge::Encoding,
types::NativeType,
};

use super::{
values::{Decoder, ValuesDecoder},
NativeFilteredValuesDecoder, NativeValuesDecoder, SliceFilteredIter,
};

pub trait AsNative<T: NativeType> {
fn as_(self) -> T;
}

impl AsNative<i32> for i32 {
#[inline]
fn as_(self) -> i32 {
self
}
}

impl AsNative<i64> for i64 {
#[inline]
fn as_(self) -> i64 {
self
}
}

impl AsNative<i64> for i32 {
#[inline]
fn as_(self) -> i64 {
self as i64
}
}

/// The state of a [`DataPage`] of an integer parquet type (i32 or i64)
#[derive(Debug)]
#[allow(clippy::large_enum_variant)]
pub enum IntegerValuesDecoder<'a, T, P>
where
T: NativeType,
i64: AsNative<T>,
{
Common(NativeValuesDecoder<'a, T, P>),
DeltaBinaryPacked(delta_bitpacked::Decoder<'a>),
}

impl<'a, T, P> IntegerValuesDecoder<'a, T, P>
where
T: NativeType,
i64: AsNative<T>,
{
pub fn try_new(page: &'a DataPage, dict: Option<&'a P>) -> Result<Self, Error> {
match (page.encoding(), dict) {
(Encoding::DeltaBinaryPacked, _) => {
let (_, _, values) = split_buffer(page)?;
delta_bitpacked::Decoder::try_new(values).map(Self::DeltaBinaryPacked)
}
_ => NativeValuesDecoder::try_new(page, dict).map(Self::Common),
}
}

#[must_use]
pub fn len(&self) -> usize {
match self {
Self::Common(validity) => validity.len(),
Self::DeltaBinaryPacked(state) => state.size_hint().0,
}
}

#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}

impl<'a, T, P> ValuesDecoder for IntegerValuesDecoder<'a, T, P>
where
T: NativeType,
i64: AsNative<T>,
{
fn len(&self) -> usize {
self.len()
}
}

/// The state of a [`DataPage`] of an integer parquet type (i32 or i64)
#[derive(Debug)]
#[allow(clippy::large_enum_variant)]
pub enum IntegerFilteredValuesDecoder<'a, T, P>
where
T: NativeType,
i64: AsNative<T>,
{
Common(NativeFilteredValuesDecoder<'a, T, P>),
DeltaBinaryPacked(SliceFilteredIter<delta_bitpacked::Decoder<'a>>),
}

impl<'a, T, P> From<(IntegerValuesDecoder<'a, T, P>, VecDeque<Interval>)>
for IntegerFilteredValuesDecoder<'a, T, P>
where
T: NativeType,
i64: AsNative<T>,
{
fn from((page, intervals): (IntegerValuesDecoder<'a, T, P>, VecDeque<Interval>)) -> Self {
match page {
IntegerValuesDecoder::Common(values) => Self::Common((values, intervals).into()),
IntegerValuesDecoder::DeltaBinaryPacked(values) => {
Self::DeltaBinaryPacked(SliceFilteredIter::new(values, intervals))
}
}
}
}

pub type IntegerDecoder<'a, T, P> =
Decoder<'a, IntegerValuesDecoder<'a, T, P>, IntegerFilteredValuesDecoder<'a, T, P>>;
2 changes: 2 additions & 0 deletions src/deserialize/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod boolean;
mod filtered_rle;
mod fixed_binary;
mod hybrid_rle;
mod integer;
mod native;
mod utils;
mod values;
Expand All @@ -12,6 +13,7 @@ pub use boolean::*;
pub use filtered_rle::*;
pub use fixed_binary::*;
pub use hybrid_rle::*;
pub use integer::*;
pub use native::*;
pub use utils::{
DefLevelsDecoder, FilteredOptionalPageValidity, OptionalPageValidity, OptionalValues,
Expand Down
1 change: 0 additions & 1 deletion src/deserialize/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ impl<'a, V: ValuesDecoder, F: From<(V, VecDeque<Interval>)>> FilteredDecoder<'a,

/// The deserialization state of a [`DataPage`] of a parquet primitive type
#[derive(Debug)]
#[allow(clippy::large_enum_variant)]
pub enum Decoder<'a, V: ValuesDecoder, F: From<(V, VecDeque<Interval>)>> {
Full(FullDecoder<'a, V>),
Filtered(FilteredDecoder<'a, V, F>),
Expand Down

0 comments on commit 2deb916

Please sign in to comment.