Skip to content

Commit

Permalink
perf: Use BitmapBuilder in yet more places (#20868)
Browse files Browse the repository at this point in the history
  • Loading branch information
orlp authored Jan 23, 2025
1 parent a4cf96f commit ca21bd7
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 36 deletions.
5 changes: 5 additions & 0 deletions crates/polars-arrow/src/bitmap/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ impl BitmapBuilder {
self.bit_len
}

#[inline(always)]
pub fn is_empty(&self) -> bool {
self.bit_len == 0
}

#[inline(always)]
pub fn capacity(&self) -> usize {
self.bit_cap
Expand Down
10 changes: 7 additions & 3 deletions crates/polars-core/src/chunked_array/object/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,17 @@ where
unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, 0) }
}

pub fn new_from_vec_and_validity(name: PlSmallStr, v: Vec<T>, validity: Bitmap) -> Self {
pub fn new_from_vec_and_validity(
name: PlSmallStr,
v: Vec<T>,
validity: Option<Bitmap>,
) -> Self {
let field = Arc::new(Field::new(name, DataType::Object(T::type_name(), None)));
let len = v.len();
let null_count = validity.unset_bits();
let null_count = validity.as_ref().map(|v| v.unset_bits()).unwrap_or(0);
let arr = Box::new(ObjectArray {
values: v.into(),
validity: Some(validity),
validity,
});

unsafe { ObjectChunked::new_with_dims(field, vec![arr], len, null_count) }
Expand Down
12 changes: 6 additions & 6 deletions crates/polars-plan/src/dsl/function_expr/shift_and_fill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,24 @@ where
feature = "dtype-categorical"
))]
fn shift_and_fill_with_mask(s: &Column, n: i64, fill_value: &Column) -> PolarsResult<Column> {
use polars_core::export::arrow::array::BooleanArray;
use polars_core::export::arrow::bitmap::MutableBitmap;
use arrow::array::BooleanArray;
use arrow::bitmap::BitmapBuilder;

let mask: BooleanChunked = if n > 0 {
let len = s.len();
let mut bits = MutableBitmap::with_capacity(s.len());
let mut bits = BitmapBuilder::with_capacity(s.len());
bits.extend_constant(n as usize, false);
bits.extend_constant(len.saturating_sub(n as usize), true);
let mask = BooleanArray::from_data_default(bits.into(), None);
let mask = BooleanArray::from_data_default(bits.freeze(), None);
mask.into()
} else {
let length = s.len() as i64;
// n is negative, so subtraction.
let tipping_point = std::cmp::max(length + n, 0);
let mut bits = MutableBitmap::with_capacity(s.len());
let mut bits = BitmapBuilder::with_capacity(s.len());
bits.extend_constant(tipping_point as usize, true);
bits.extend_constant(-n as usize, false);
let mask = BooleanArray::from_data_default(bits.into(), None);
let mask = BooleanArray::from_data_default(bits.freeze(), None);
mask.into()
};
s.shift(n).zip_with_same_type(&mask, fill_value)
Expand Down
6 changes: 3 additions & 3 deletions crates/polars-python/src/series/construction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::borrow::Cow;
use numpy::{Element, PyArray1, PyArrayMethods};
use polars::export::arrow;
use polars::export::arrow::array::Array;
use polars::export::arrow::bitmap::MutableBitmap;
use polars::export::arrow::bitmap::BitmapBuilder;
use polars::export::arrow::types::NativeType;
use polars_core::prelude::*;
use polars_core::utils::CustomIterTools;
Expand Down Expand Up @@ -294,7 +294,7 @@ impl PySeries {
pub fn new_object(py: Python, name: &str, values: Vec<ObjectValue>, _strict: bool) -> Self {
#[cfg(feature = "object")]
{
let mut validity = MutableBitmap::with_capacity(values.len());
let mut validity = BitmapBuilder::with_capacity(values.len());
values.iter().for_each(|v| {
let is_valid = !v.inner.is_none(py);
// SAFETY: we can ensure that validity has correct capacity.
Expand All @@ -304,7 +304,7 @@ impl PySeries {
let ca = ObjectChunked::<ObjectValue>::new_from_vec_and_validity(
name.into(),
values,
validity.into(),
validity.into_opt_validity(),
);
let s = ca.into_series();
s.into()
Expand Down
12 changes: 6 additions & 6 deletions crates/polars-row/src/decode.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use arrow::bitmap::{Bitmap, MutableBitmap};
use arrow::bitmap::{Bitmap, BitmapBuilder};
use arrow::buffer::Buffer;
use arrow::datatypes::ArrowDataType;
use arrow::offset::OffsetsBuffer;
Expand Down Expand Up @@ -62,16 +62,16 @@ unsafe fn decode_validity(rows: &mut [&[u8]], opt: RowEncodingOptions) -> Option
// No nulls just return None
let first_null = first_null?;

let mut bm = MutableBitmap::new();
let mut bm = BitmapBuilder::new();
bm.reserve(rows.len());
bm.extend_constant(first_null, true);
bm.push(false);
bm.extend_from_trusted_len_iter(rows[first_null + 1..].iter_mut().map(|row| {
bm.extend_trusted_len_iter(rows[first_null + 1..].iter_mut().map(|row| {
let v;
(v, *row) = row.split_at_unchecked(1);
v[0] != null_sentinel
}));
Some(bm.freeze())
bm.into_opt_validity()
}

// We inline this in an attempt to avoid the dispatch cost.
Expand Down Expand Up @@ -240,7 +240,7 @@ unsafe fn decode(
FixedSizeListArray::new(dtype.clone(), rows.len(), values, validity).to_boxed()
},
D::List(list_field) | D::LargeList(list_field) => {
let mut validity = MutableBitmap::new();
let mut validity = BitmapBuilder::new();

// @TODO: we could consider making this into a scratchpad
let num_rows = rows.len();
Expand Down Expand Up @@ -281,7 +281,7 @@ unsafe fn decode(
None
} else {
validity.extend_constant(num_rows - validity.len(), true);
Some(validity.freeze())
validity.into_opt_validity()
};
assert_eq!(offsets.len(), rows.len() + 1);

Expand Down
6 changes: 3 additions & 3 deletions crates/polars-row/src/fixed/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
use std::mem::MaybeUninit;

use arrow::array::{Array, PrimitiveArray};
use arrow::bitmap::MutableBitmap;
use arrow::bitmap::BitmapBuilder;
use arrow::datatypes::ArrowDataType;
use polars_utils::slice::Slice2Uninit;

Expand Down Expand Up @@ -205,7 +205,7 @@ pub unsafe fn decode(
return PrimitiveArray::new(ArrowDataType::Int128, values.into(), None);
}

let mut validity = MutableBitmap::with_capacity(rows.len());
let mut validity = BitmapBuilder::with_capacity(rows.len());
validity.extend_constant(values.len(), true);

let start_len = values.len();
Expand Down Expand Up @@ -238,6 +238,6 @@ pub unsafe fn decode(
PrimitiveArray::new(
ArrowDataType::Int128,
values.into(),
Some(validity.freeze()),
validity.into_opt_validity(),
)
}
6 changes: 3 additions & 3 deletions crates/polars-row/src/fixed/packed_u32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
use std::mem::MaybeUninit;

use arrow::array::{Array, PrimitiveArray};
use arrow::bitmap::MutableBitmap;
use arrow::bitmap::BitmapBuilder;
use arrow::datatypes::ArrowDataType;
use polars_utils::slice::Slice2Uninit;

Expand Down Expand Up @@ -154,7 +154,7 @@ pub unsafe fn decode(
return PrimitiveArray::new(ArrowDataType::UInt32, values.into(), None);
}

let mut validity = MutableBitmap::with_capacity(rows.len());
let mut validity = BitmapBuilder::with_capacity(rows.len());
validity.extend_constant(values.len(), true);

let start_len = values.len();
Expand All @@ -175,6 +175,6 @@ pub unsafe fn decode(
PrimitiveArray::new(
ArrowDataType::UInt32,
values.into(),
Some(validity.freeze()),
validity.into_opt_validity(),
)
}
8 changes: 4 additions & 4 deletions crates/polars-row/src/utils.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use arrow::bitmap::{Bitmap, MutableBitmap};
use arrow::bitmap::{Bitmap, BitmapBuilder};

#[macro_export]
macro_rules! with_match_arrow_primitive_type {(
Expand Down Expand Up @@ -27,15 +27,15 @@ pub(crate) unsafe fn decode_opt_nulls(rows: &[&[u8]], null_sentinel: u8) -> Opti
.iter()
.position(|row| *row.get_unchecked(0) == null_sentinel)?;

let mut bm = MutableBitmap::with_capacity(rows.len());
let mut bm = BitmapBuilder::with_capacity(rows.len());
bm.extend_constant(first_null, true);
bm.push(false);

bm.extend_from_trusted_len_iter_unchecked(
bm.extend_trusted_len_iter(
rows[first_null + 1..]
.iter()
.map(|row| *row.get_unchecked(0) != null_sentinel),
);

Some(bm.freeze())
bm.into_opt_validity()
}
6 changes: 3 additions & 3 deletions crates/polars-row/src/variable/no_order.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
use std::mem::MaybeUninit;

use arrow::array::{BinaryViewArray, MutableBinaryViewArray};
use arrow::bitmap::MutableBitmap;
use arrow::bitmap::BitmapBuilder;
use polars_utils::slice::Slice2Uninit;

use crate::row::RowEncodingOptions;
Expand Down Expand Up @@ -90,7 +90,7 @@ pub unsafe fn decode_variable_no_order(

let num_rows = rows.len();
let mut array = MutableBinaryViewArray::<[u8]>::with_capacity(num_rows);
let mut validity = MutableBitmap::new();
let mut validity = BitmapBuilder::new();

for row in rows.iter_mut() {
let sentinel = *unsafe { row.get_unchecked(0) };
Expand Down Expand Up @@ -142,5 +142,5 @@ pub unsafe fn decode_variable_no_order(
}

let array = array.freeze();
array.with_validity(Some(validity.freeze()))
array.with_validity(validity.into_opt_validity())
}
6 changes: 3 additions & 3 deletions crates/polars-row/src/variable/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
use std::mem::MaybeUninit;

use arrow::array::{MutableBinaryViewArray, Utf8ViewArray};
use arrow::bitmap::MutableBitmap;
use arrow::bitmap::BitmapBuilder;

use crate::row::RowEncodingOptions;

Expand Down Expand Up @@ -98,7 +98,7 @@ pub unsafe fn decode_str(rows: &mut [&[u8]], opt: RowEncodingOptions) -> Utf8Vie
return array.into();
}

let mut validity = MutableBitmap::with_capacity(num_rows);
let mut validity = BitmapBuilder::with_capacity(num_rows);
validity.extend_constant(array.len(), true);
validity.push(false);
array.push_value_ignore_validity("");
Expand All @@ -124,5 +124,5 @@ pub unsafe fn decode_str(rows: &mut [&[u8]], opt: RowEncodingOptions) -> Utf8Vie
}

let out: Utf8ViewArray = array.into();
out.with_validity(Some(validity.freeze()))
out.with_validity(validity.into_opt_validity())
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use polars_core::prelude::{
};
use polars_core::scalar::Scalar;
use polars_core::series::{IsSorted, Series};
use polars_core::utils::arrow::bitmap::{Bitmap, MutableBitmap};
use polars_core::utils::arrow::bitmap::{Bitmap, BitmapBuilder};
use polars_error::{polars_bail, PolarsResult};
use polars_io::hive;
use polars_io::predicates::PhysicalIoExpr;
Expand Down Expand Up @@ -587,7 +587,7 @@ impl RowGroupDecoder {
}

let mask_bitmap = {
let mut mask_bitmap = MutableBitmap::with_capacity(mask.len());
let mut mask_bitmap = BitmapBuilder::with_capacity(mask.len());

for chunk in mask.downcast_iter() {
match chunk.validity() {
Expand Down

0 comments on commit ca21bd7

Please sign in to comment.