From 0890503fc2d8f0436e5955e4f0ba5b11807e25c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Marinier?= Date: Tue, 12 Mar 2024 17:50:23 +0100 Subject: [PATCH] Speed up searches by removing repeated memsets coming from vec.resize() Also, reserve exactly the size needed, which is surprisingly needed to get the full speedup of ~5% on a good fraction of the queries. --- bitpacker/src/bitpacker.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/bitpacker/src/bitpacker.rs b/bitpacker/src/bitpacker.rs index 903daccf81..9c98ca817a 100644 --- a/bitpacker/src/bitpacker.rs +++ b/bitpacker/src/bitpacker.rs @@ -125,6 +125,8 @@ impl BitUnpacker { // Decodes the range of bitpacked `u32` values with idx // in [start_idx, start_idx + output.len()). + // It is guaranteed to completely fill `output` and not read from it, so passing a vector with + // un-initialized values is safe. // // #Panics // @@ -237,7 +239,19 @@ impl BitUnpacker { data: &[u8], positions: &mut Vec, ) { - positions.resize(id_range.len(), 0u32); + // We use the code below instead of positions.resize(id_range.len(), 0u32) for performance + // reasons: on some queries, the CPU cost of memsetting the array and of using a bigger + // vector than necessary is noticeable (~5%). + // In particular, searches are a few percent faster when using reserve_exact() as below + // instead of reserve(). + // The un-initialized values are safe as get_batch_u32s() completely fills `positions` + // and does not read from it. + positions.clear(); + positions.reserve_exact(id_range.len()); + #[allow(clippy::uninit_vec)] + unsafe { + positions.set_len(id_range.len()); + } self.get_batch_u32s(id_range.start, data, positions); crate::filter_vec::filter_vec_in_place(value_range, id_range.start, positions) }