From d8a90c79c5d459fdf0a1e18221e70c3c35cd1463 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Tue, 25 Jun 2024 16:57:05 -0400 Subject: [PATCH 1/8] Vector based on lgalloc for flat container Signed-off-by: Moritz Hoffmann --- src/ore/src/flatcontainer.rs | 260 ++++++++++++++++++++++++++++++++- src/ore/src/region.rs | 273 +++++++++++++++++++++++++++++++++++ 2 files changed, 530 insertions(+), 3 deletions(-) diff --git a/src/ore/src/flatcontainer.rs b/src/ore/src/flatcontainer.rs index d74b84ac082e6..fb3850faaa7f0 100644 --- a/src/ore/src/flatcontainer.rs +++ b/src/ore/src/flatcontainer.rs @@ -105,13 +105,12 @@ mod copy { } mod vec { - use flatcontainer::OwnedRegion; - + use crate::flatcontainer::lgalloc::LgAllocOwnedRegion; use crate::flatcontainer::{MzRegionPreference, OwnedRegionOpinion}; impl MzRegionPreference for OwnedRegionOpinion> { type Owned = Vec; - type Region = OwnedRegion; + type Region = LgAllocOwnedRegion; } } @@ -119,3 +118,258 @@ impl MzRegionPreference for Option { type Owned = as Region>::Owned; type Region = flatcontainer::OptionRegion; } + +mod lgalloc { + //! A region that stores slices of clone types in lgalloc + + use crate::region::LgAllocVec; + use flatcontainer::{CopyIter, Push, Region, ReserveItems}; + + /// A container for owned types. + /// + /// The container can absorb any type, and stores an owned version of the type, similarly to what + /// vectors do. We recommend using this container for copy types, but there is no restriction in + /// the implementation, and in fact it can correctly store owned values, although any data owned + /// by `T` is regular heap-allocated data, and not contained in regions. + /// + /// # Examples + /// + /// ``` + /// use flatcontainer::{Push, OwnedRegion, Region}; + /// let mut r = >::default(); + /// + /// let panagram_en = "The quick fox jumps over the lazy dog"; + /// let panagram_de = "Zwölf Boxkämpfer jagen Viktor quer über den großen Sylter Deich"; + /// + /// let en_index = r.push(panagram_en.as_bytes()); + /// let de_index = r.push(panagram_de.as_bytes()); + /// + /// assert_eq!(panagram_de.as_bytes(), r.index(de_index)); + /// assert_eq!(panagram_en.as_bytes(), r.index(en_index)); + /// ``` + #[derive(Debug)] + pub struct LgAllocOwnedRegion { + slices: LgAllocVec, + } + + impl Clone for LgAllocOwnedRegion { + fn clone(&self) -> Self { + Self { + slices: self.slices.clone(), + } + } + + fn clone_from(&mut self, source: &Self) { + self.slices.clone_from(&source.slices); + } + } + + impl Region for LgAllocOwnedRegion + where + [T]: ToOwned, + { + type Owned = <[T] as ToOwned>::Owned; + type ReadItem<'a> = &'a [T] where Self: 'a; + type Index = (usize, usize); + + #[inline] + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self { + slices: LgAllocVec::with_capacity(regions.map(|r| r.slices.len()).sum()), + } + } + + #[inline] + fn index(&self, (start, end): Self::Index) -> Self::ReadItem<'_> { + &self.slices[start..end] + } + + #[inline] + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + self.slices.reserve(regions.map(|r| r.slices.len()).sum()); + } + + #[inline] + fn clear(&mut self) { + self.slices.clear(); + } + + #[inline] + fn heap_size(&self, mut callback: F) { + let size_of_t = std::mem::size_of::(); + callback( + self.slices.len() * size_of_t, + self.slices.capacity() * size_of_t, + ); + } + + #[inline] + fn reborrow<'b, 'a: 'b>(item: Self::ReadItem<'a>) -> Self::ReadItem<'b> + where + Self: 'a, + { + item + } + } + + impl Default for LgAllocOwnedRegion { + #[inline] + fn default() -> Self { + Self { + slices: LgAllocVec::default(), + } + } + } + + impl Push<&[T; N]> for LgAllocOwnedRegion { + #[inline] + fn push(&mut self, item: &[T; N]) -> as Region>::Index { + let start = self.slices.len(); + self.slices.extend_from_slice(item); + (start, self.slices.len()) + } + } + + impl Push<&&[T; N]> for LgAllocOwnedRegion { + #[inline] + fn push(&mut self, item: &&[T; N]) -> as Region>::Index { + self.push(*item) + } + } + + impl<'b, T: Clone, const N: usize> ReserveItems<&'b [T; N]> for LgAllocOwnedRegion { + #[inline] + fn reserve_items(&mut self, items: I) + where + I: Iterator + Clone, + { + self.slices.reserve(items.map(|i| i.len()).sum()); + } + } + + impl Push<&[T]> for LgAllocOwnedRegion { + #[inline] + fn push(&mut self, item: &[T]) -> as Region>::Index { + let start = self.slices.len(); + self.slices.extend_from_slice(item); + (start, self.slices.len()) + } + } + + impl Push<&&[T]> for LgAllocOwnedRegion + where + for<'a> Self: Push<&'a [T]>, + { + #[inline] + fn push(&mut self, item: &&[T]) -> as Region>::Index { + self.push(*item) + } + } + + impl<'b, T> ReserveItems<&'b [T]> for LgAllocOwnedRegion + where + [T]: ToOwned, + { + #[inline] + fn reserve_items(&mut self, items: I) + where + I: Iterator + Clone, + { + self.slices.reserve(items.map(<[T]>::len).sum()); + } + } + + impl Push> for LgAllocOwnedRegion + where + [T]: ToOwned, + { + #[inline] + fn push(&mut self, mut item: Vec) -> as Region>::Index { + let start = self.slices.len(); + self.slices.append(&mut item); + (start, self.slices.len()) + } + } + + impl Push<&Vec> for LgAllocOwnedRegion { + #[inline] + fn push(&mut self, item: &Vec) -> as Region>::Index { + self.push(item.as_slice()) + } + } + + impl<'a, T> ReserveItems<&'a Vec> for LgAllocOwnedRegion + where + [T]: ToOwned, + { + #[inline] + fn reserve_items(&mut self, items: I) + where + I: Iterator> + Clone, + { + self.reserve_items(items.map(Vec::as_slice)); + } + } + + impl> ReserveItems> for LgAllocOwnedRegion + where + [T]: ToOwned, + { + #[inline] + fn reserve_items(&mut self, items: I) + where + I: Iterator> + Clone, + { + self.slices + .reserve(items.flat_map(|i| i.0.into_iter()).count()); + } + } + + #[cfg(test)] + mod tests { + use crate::{CopyIter, Push, Region, ReserveItems}; + + use super::*; + + #[test] + fn test_copy_array() { + let mut r = >::default(); + r.reserve_items(std::iter::once(&[1; 4])); + let index = r.push([1; 4]); + assert_eq!([1, 1, 1, 1], r.index(index)); + } + + #[test] + fn test_copy_ref_ref_array() { + let mut r = >::default(); + ReserveItems::reserve_items(&mut r, std::iter::once(&[1; 4])); + let index = r.push(&&[1; 4]); + assert_eq!([1, 1, 1, 1], r.index(index)); + } + + #[test] + fn test_copy_vec() { + let mut r = >::default(); + ReserveItems::reserve_items(&mut r, std::iter::once(&vec![1; 4])); + let index = r.push(&vec![1; 4]); + assert_eq!([1, 1, 1, 1], r.index(index)); + let index = r.push(vec![2; 4]); + assert_eq!([2, 2, 2, 2], r.index(index)); + } + + #[test] + fn test_copy_iter() { + let mut r = >::default(); + r.reserve_items(std::iter::once(CopyIter(std::iter::repeat(1).take(4)))); + let index = r.push(CopyIter(std::iter::repeat(1).take(4))); + assert_eq!([1, 1, 1, 1], r.index(index)); + } + } +} diff --git a/src/ore/src/region.rs b/src/ore/src/region.rs index 9fcc974dac9e8..af803a9068325 100644 --- a/src/ore/src/region.rs +++ b/src/ore/src/region.rs @@ -19,6 +19,8 @@ use std::fmt::{Debug, Formatter}; use std::mem::ManuallyDrop; use std::ops::{Deref, DerefMut}; +pub use vec::LgAllocVec; + /// A region allocator which holds items at stable memory locations. /// /// Items once inserted will not be moved, and their locations in memory @@ -392,3 +394,274 @@ impl Drop for MMapRegion { lgalloc::deallocate(self.handle.take().unwrap()); } } + +mod vec { + use std::fmt::{Debug, Formatter}; + use std::mem::{ManuallyDrop, MaybeUninit}; + use std::ops::Deref; + use std::ptr; + + /// A fixed-length region in memory, which is either allocated from heap or lgalloc. + pub struct LgAllocVec { + /// A handle to lgalloc. None for heap allocations, Some if the memory comes from lgalloc. + handle: Option, + /// Slice representation of the memory. Elements 0..self.length are valid. + elements: ManuallyDrop]>>, + /// The number of valid elements in `elements` + length: usize, + } + + impl LgAllocVec { + /// Create a new [`LgAllocVec`] with the specified capacity. The actual capacity of the returned + /// array is at least as big as the requested capacity. + pub fn with_capacity(capacity: usize) -> Self { + // Allocate memory, fall-back to regular heap allocations if we cannot acquire memory through + // lgalloc. + let (handle, boxed) = if let Ok((ptr, actual_capacity, handle)) = + lgalloc::allocate::>(capacity) + { + // We allocated sucessfully through lgalloc. + let handle = Some(handle); + // SAFETY: `ptr` is valid for constructing a slice: + // 1. Valid for reading and writing, and enough capacity. + // 2. Properly initialized (left for writing). + // 3. Not aliased. + // 4. Total size not longer than isize::MAX because lgalloc has a capacity limit. + let slice = + unsafe { std::slice::from_raw_parts_mut(ptr.as_ptr(), actual_capacity) }; + // SAFETY: slice is valid, and we deallocate it usinge lgalloc. + (handle, unsafe { Box::from_raw(slice) }) + } else { + // We failed to allocate through lgalloc, fall back to heap. + let mut vec = Vec::with_capacity(capacity); + // SAFETY: We treat all elements as uninitialized and track initialized elements + // through `self.length`. + unsafe { + vec.set_len(vec.capacity()); + } + (None, vec.into_boxed_slice()) + }; + + let elements = ManuallyDrop::new(boxed); + Self { + handle, + elements, + length: 0, + } + } + + /// Visit contained allocations to determine their size and capacity. + pub fn heap_size(&self, mut callback: impl FnMut(usize, usize)) { + let size_of_t = std::mem::size_of::(); + callback(self.len() * size_of_t, self.capacity() * size_of_t) + } + + /// Move an element on the array. Panics if there is no more capacity. + pub fn push(&mut self, item: T) { + if self.len() == self.capacity() { + self.reserve(1); + } + self.elements[self.length].write(item); + self.length += 1; + } + + /// Extend the array from a slice. Increases the capacity if required. + pub fn extend_from_slice(&mut self, slice: &[T]) + where + T: Clone, + { + self.reserve(slice.len()); + let mut iterator = slice.iter().cloned(); + while let Some(element) = iterator.next() { + let len = self.len(); + if len == self.capacity() { + let (lower, _) = iterator.size_hint(); + self.reserve(lower.saturating_add(1)); + } + unsafe { + ptr::write( + self.elements.as_mut_ptr().add(len), + MaybeUninit::new(element), + ); + self.set_len(len + 1); + } + } + } + + /// Extend the array from a slice of copyable elements. Increases the capacity if required. + pub fn extend_from_copy_slice(&mut self, slice: &[T]) + where + T: Copy, + { + let count = slice.len(); + self.reserve(count); + let len = self.len(); + unsafe { + ptr::copy_nonoverlapping( + slice.as_ptr(), + self.elements.as_mut_ptr().add(len) as *const MaybeUninit as *mut T, + count, + ); + self.set_len(len + count); + } + } + + /// Move elements from a vector to the array. Increases the capacity if required. + pub fn append(&mut self, data: &mut Vec) { + let count = data.len(); + self.reserve(count); + let len = self.len(); + unsafe { + data.set_len(0); + ptr::copy_nonoverlapping( + data.as_ptr(), + self.elements.as_mut_ptr().add(len) as *const MaybeUninit as *mut T, + count, + ); + self.set_len(len + count); + } + } + + /// Update the length. Highly unsafe because it doesn't drop elements when reducing the length, + /// and doesn't initialize elements when increasing the length. + #[inline] + pub unsafe fn set_len(&mut self, length: usize) { + debug_assert!(length <= self.capacity()); + self.length = length; + } + + /// The number of elements this array can absorb. + pub fn capacity(&self) -> usize { + self.elements.len() + } + + /// Remove all elements. Drops the contents, but leaves the allocation untouched. + pub fn clear(&mut self) { + let elems = &mut self.elements[..self.length]; + // We are about to run the type's destructor, which may panic. Therefore we set the length + // of the array to zero so that if we have to unwind the stack we don't end up re-dropping + // in valid memory through the Drop impl of Array itself. + self.length = 0; + for e in elems { + // SAFETY: We know elements up to `length` are initialized. + unsafe { + e.assume_init_drop(); + } + } + } + + /// Grow the array to at least `new_len` elements. Reallocates the underlying storage. + fn grow(&mut self, new_len: usize) { + let new_capacity = std::cmp::max(self.capacity() * 2, new_len); + let mut new_vec = LgAllocVec::with_capacity(new_capacity); + + let src_ptr = self.elements.as_ptr(); + let dst_ptr = new_vec.elements.as_mut_ptr(); + let len = self.len(); + + unsafe { + self.set_len(0); + std::ptr::copy_nonoverlapping(src_ptr, dst_ptr, len); + new_vec.set_len(len); + } + + std::mem::swap(&mut new_vec, self); + } + + /// Reserve space for at least `additional` elements. The capacity is increased if necessary. + pub fn reserve(&mut self, additional: usize) { + let new_len = self.len() + additional; + if new_len > self.capacity() { + self.grow(new_len); + } + } + } + + impl Clone for LgAllocVec { + fn clone(&self) -> Self { + let mut new_vec = LgAllocVec::with_capacity(self.len()); + new_vec.extend_from_slice(self); + new_vec + } + + fn clone_from(&mut self, source: &Self) { + // TODO: Optimize for reuse of existing elements. + self.clear(); + self.extend_from_slice(source); + } + } + + impl Default for LgAllocVec { + fn default() -> Self { + Self::with_capacity(0) + } + } + + impl Deref for LgAllocVec { + type Target = [T]; + + fn deref(&self) -> &Self::Target { + // TODO: Use `slice_assume_init_ref` once stable. + // Context: https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.slice_assume_init_ref + // The following safety argument is adapted from the source. + // SAFETY: casting `elements` to a `*const [T]` is safe since the caller guarantees that + // `slice` is initialized, and `MaybeUninit` is guaranteed to have the same layout as `T`. + // The pointer obtained is valid since it refers to memory owned by `elements` which is a + // reference and thus guaranteed to be valid for reads. + #[allow(clippy::as_conversions)] + unsafe { + &*(&self.elements[..self.length] as *const [MaybeUninit] as *const [T]) + } + } + } + + impl Drop for LgAllocVec { + fn drop(&mut self) { + self.clear(); + if let Some(handle) = self.handle.take() { + // Memory allocated through lgalloc + lgalloc::deallocate(handle); + } else { + // Regular allocation + // SAFETY: `elements` is a sliced box allocated from the global allocator, drop it. + unsafe { + ManuallyDrop::drop(&mut self.elements); + } + } + } + } + + impl Debug for LgAllocVec { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.deref().fmt(f) + } + } + + #[cfg(test)] + mod test { + use std::sync::atomic::{AtomicUsize, Ordering}; + + use super::*; + + #[mz_ore::test] + fn double_drop() { + static DROP_COUNT: AtomicUsize = AtomicUsize::new(0); + struct DropGuard; + + impl Drop for DropGuard { + fn drop(&mut self) { + let drops = DROP_COUNT.fetch_add(1, Ordering::Relaxed); + // If this is the first time we're being dropped, panic. + if drops == 0 { + panic!(); + } + } + } + + let mut array = LgAllocVec::with_capacity(1); + array.push(DropGuard); + let _ = mz_ore::panic::catch_unwind(move || array.clear()); + assert_eq!(DROP_COUNT.load(Ordering::Relaxed), 1); + } + } +} From d1956af726aed289aff00da44b7f9a5fbf9a7906 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Fri, 28 Jun 2024 14:25:25 -0400 Subject: [PATCH 2/8] Switch KeyValSpine to flatcontainer Signed-off-by: Moritz Hoffmann --- Cargo.lock | 20 +- Cargo.toml | 11 +- misc/cargo-vet/audits.toml | 3 +- src/cluster/src/communication.rs | 3 +- src/compute/Cargo.toml | 2 +- src/compute/src/extensions/arrange.rs | 65 +-- src/compute/src/logging/differential.rs | 2 +- src/compute/src/logging/initialize.rs | 7 +- src/compute/src/logging/reachability.rs | 8 +- src/compute/src/logging/timely.rs | 57 ++- src/compute/src/typedefs.rs | 150 +++--- src/ore/Cargo.toml | 6 +- src/ore/src/flatcontainer.rs | 580 ++++++++++++++++++++++-- src/ore/src/lib.rs | 2 +- src/ore/src/region.rs | 19 +- src/repr/Cargo.toml | 4 +- src/repr/src/timestamp.rs | 5 +- src/timely-util/src/containers.rs | 85 ++++ 18 files changed, 826 insertions(+), 203 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c85204fe57a34..719e25d811b5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1981,7 +1981,7 @@ dependencies = [ [[package]] name = "differential-dataflow" version = "0.12.0" -source = "git+https://github.com/MaterializeInc/differential-dataflow.git#7760a903c9c451f7cf039a3978994251bafd8721" +source = "git+https://github.com/antiguru/differential-dataflow.git?branch=consolidate_layout_merger_chunk#04a2446fc68a0025529cf676f948f9e8af711aa5" dependencies = [ "abomonation", "abomonation_derive", @@ -2037,7 +2037,7 @@ checksum = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97" [[package]] name = "dogsdogsdogs" version = "0.1.0" -source = "git+https://github.com/MaterializeInc/differential-dataflow.git#7760a903c9c451f7cf039a3978994251bafd8721" +source = "git+https://github.com/antiguru/differential-dataflow.git?branch=consolidate_layout_merger_chunk#04a2446fc68a0025529cf676f948f9e8af711aa5" dependencies = [ "abomonation", "abomonation_derive", @@ -2417,9 +2417,9 @@ dependencies = [ [[package]] name = "flatcontainer" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcaca60d6093f2c5328fe97b9dfafa16a3968577bc5df75ebd6b23ea79b0a0a4" +checksum = "0ff185ea156496de196dfd189038982f480515ea3338f1ff0a4fbff1e52ea0a6" dependencies = [ "cfg-if", "paste", @@ -5438,6 +5438,7 @@ dependencies = [ "criterion", "ctor", "derivative", + "differential-dataflow", "either", "flatcontainer", "futures", @@ -5469,6 +5470,7 @@ dependencies = [ "serde_json", "smallvec", "stacker", + "timely", "tokio", "tokio-native-tls", "tokio-openssl", @@ -9590,7 +9592,7 @@ dependencies = [ [[package]] name = "timely" version = "0.12.0" -source = "git+https://github.com/MaterializeInc/timely-dataflow.git#0c26e5e4198085d6c90db11930f2dba52e9f32cc" +source = "git+https://github.com/MaterializeInc/timely-dataflow.git#f6a5b3620de2050f123aad57fb40a535ba417869" dependencies = [ "abomonation", "abomonation_derive", @@ -9607,12 +9609,12 @@ dependencies = [ [[package]] name = "timely_bytes" version = "0.12.0" -source = "git+https://github.com/MaterializeInc/timely-dataflow.git#0c26e5e4198085d6c90db11930f2dba52e9f32cc" +source = "git+https://github.com/MaterializeInc/timely-dataflow.git#f6a5b3620de2050f123aad57fb40a535ba417869" [[package]] name = "timely_communication" version = "0.12.0" -source = "git+https://github.com/MaterializeInc/timely-dataflow.git#0c26e5e4198085d6c90db11930f2dba52e9f32cc" +source = "git+https://github.com/MaterializeInc/timely-dataflow.git#f6a5b3620de2050f123aad57fb40a535ba417869" dependencies = [ "abomonation", "abomonation_derive", @@ -9628,7 +9630,7 @@ dependencies = [ [[package]] name = "timely_container" version = "0.12.0" -source = "git+https://github.com/MaterializeInc/timely-dataflow.git#0c26e5e4198085d6c90db11930f2dba52e9f32cc" +source = "git+https://github.com/MaterializeInc/timely-dataflow.git#f6a5b3620de2050f123aad57fb40a535ba417869" dependencies = [ "columnation", "flatcontainer", @@ -9638,7 +9640,7 @@ dependencies = [ [[package]] name = "timely_logging" version = "0.12.0" -source = "git+https://github.com/MaterializeInc/timely-dataflow.git#0c26e5e4198085d6c90db11930f2dba52e9f32cc" +source = "git+https://github.com/MaterializeInc/timely-dataflow.git#f6a5b3620de2050f123aad57fb40a535ba417869" [[package]] name = "tiny-keccak" diff --git a/Cargo.toml b/Cargo.toml index d2bdcceeec326..d3d153c0fe5b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -263,6 +263,13 @@ debug = 2 # tend to get rewritten or disappear (e.g., because a PR is force pushed or gets # merged), after which point it becomes impossible to build that historical # version of Materialize. +[patch."https://github.com/TimelyDataflow/timely-dataflow"] +# Projects that do not reliably release to crates.io. +timely = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } +timely_bytes = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } +timely_communication = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } +timely_container = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } +timely_logging = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } [patch.crates-io] # Projects that do not reliably release to crates.io. timely = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } @@ -270,8 +277,8 @@ timely_bytes = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } timely_communication = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } timely_container = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } timely_logging = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } -differential-dataflow = { git = "https://github.com/MaterializeInc/differential-dataflow.git" } -dogsdogsdogs = { git = "https://github.com/MaterializeInc/differential-dataflow.git" } +differential-dataflow = { git = "https://github.com/antiguru/differential-dataflow.git", branch = "consolidate_layout_merger_chunk" } +dogsdogsdogs = { git = "https://github.com/antiguru/differential-dataflow.git", branch = "consolidate_layout_merger_chunk" } # Waiting on https://github.com/sfackler/rust-postgres/pull/752. postgres = { git = "https://github.com/MaterializeInc/rust-postgres" } diff --git a/misc/cargo-vet/audits.toml b/misc/cargo-vet/audits.toml index 35bb6ba16ae2a..085ec34c39777 100644 --- a/misc/cargo-vet/audits.toml +++ b/misc/cargo-vet/audits.toml @@ -1,4 +1,3 @@ - # cargo-vet audits file [criteria.maintained-and-necessary] @@ -281,7 +280,7 @@ version = "23.5.26" [[audits.flatcontainer]] who = "Moritz Hoffmann " criteria = "safe-to-deploy" -version = "0.4.1" +version = "0.5.0" [[audits.fluent-uri]] who = "Nikhil Benesch " diff --git a/src/cluster/src/communication.rs b/src/cluster/src/communication.rs index 01298c9df4b49..8598878371794 100644 --- a/src/cluster/src/communication.rs +++ b/src/cluster/src/communication.rs @@ -36,6 +36,7 @@ use std::any::Any; use std::cmp::Ordering; use std::fmt::Display; +use std::sync::Arc; use std::time::Duration; use anyhow::Context; @@ -109,7 +110,7 @@ where } } - match initialize_networking_from_sockets(sockets, process, workers, Box::new(|_| None)) { + match initialize_networking_from_sockets(sockets, process, workers, Arc::new(|_| None)) { Ok((stuff, guard)) => { info!(process = process, "successfully initialized network"); Ok(( diff --git a/src/compute/Cargo.toml b/src/compute/Cargo.toml index 2ae7ed265d57b..689d5ee29410a 100644 --- a/src/compute/Cargo.toml +++ b/src/compute/Cargo.toml @@ -30,7 +30,7 @@ mz-compute-types = { path = "../compute-types" } mz-dyncfg = { path = "../dyncfg" } mz-dyncfgs = { path = "../dyncfgs" } mz-expr = { path = "../expr" } -mz-ore = { path = "../ore", features = ["async", "flatcontainer", "process", "tracing_"] } +mz-ore = { path = "../ore", features = ["async", "differential", "flatcontainer", "process", "tracing_"] } mz-persist-client = { path = "../persist-client" } mz-persist-types = { path = "../persist-types" } mz-repr = { path = "../repr" } diff --git a/src/compute/src/extensions/arrange.rs b/src/compute/src/extensions/arrange.rs index cc63887c6cd67..949d8b4aab12f 100644 --- a/src/compute/src/extensions/arrange.rs +++ b/src/compute/src/extensions/arrange.rs @@ -23,7 +23,7 @@ use timely::progress::Timestamp; use timely::Container; use crate::logging::compute::ComputeEvent; -use crate::typedefs::{KeyAgent, KeyValAgent, RowAgent, RowRowAgent, RowValAgent}; +use crate::typedefs::{KeyAgent, RowAgent, RowRowAgent, RowValAgent}; /// Extension trait to arrange data. pub trait MzArrange: MzArrangeCore @@ -270,36 +270,6 @@ where } } -impl ArrangementSize for Arranged> -where - G: Scope, - G::Timestamp: Lattice + Ord + Columnation, - K: Data + Columnation, - V: Data + Columnation, - T: Lattice + Timestamp, - R: Semigroup + Ord + Columnation + 'static, -{ - fn log_arrangement_size(self) -> Self { - log_arrangement_size_inner(self, |trace| { - let (mut size, mut capacity, mut allocations) = (0, 0, 0); - let mut callback = |siz, cap| { - size += siz; - capacity += cap; - allocations += usize::from(cap > 0); - }; - trace.map_batches(|batch| { - batch.storage.keys.heap_size(&mut callback); - batch.storage.keys_offs.heap_size(&mut callback); - batch.storage.vals.heap_size(&mut callback); - batch.storage.vals_offs.heap_size(&mut callback); - batch.storage.times.heap_size(&mut callback); - batch.storage.diffs.heap_size(&mut callback); - }); - (size, capacity, allocations) - }) - } -} - impl ArrangementSize for Arranged> where G: Scope, @@ -415,8 +385,8 @@ mod flatcontainer { use differential_dataflow::lattice::Lattice; use differential_dataflow::operators::arrange::Arranged; use differential_dataflow::trace::TraceReader; - use mz_ore::flatcontainer::MzRegionPreference; - use timely::container::flatcontainer::{IntoOwned, Push, Region, ReserveItems}; + use mz_ore::flatcontainer::{MzRegion, MzRegionPreference}; + use timely::container::flatcontainer::{IntoOwned, Region}; use timely::dataflow::Scope; use timely::progress::Timestamp; use timely::PartialOrder; @@ -429,31 +399,10 @@ mod flatcontainer { Self: Clone, G: Scope, G::Timestamp: Lattice + Ord + MzRegionPreference, - K: Region - + Clone - + Push<::Owned> - + for<'a> Push<::ReadItem<'a>> - + for<'a> ReserveItems<::ReadItem<'a>> - + 'static, - V: Region - + Clone - + Push<::Owned> - + for<'a> Push<::ReadItem<'a>> - + for<'a> ReserveItems<::ReadItem<'a>> - + 'static, - T: Region - + Clone - + Push<::Owned> - + for<'a> Push<::ReadItem<'a>> - + for<'a> ReserveItems<::ReadItem<'a>> - + 'static, - R: Region - + Clone - + Push<::Owned> - + for<'a> Push<&'a ::Owned> - + for<'a> Push<::ReadItem<'a>> - + for<'a> ReserveItems<::ReadItem<'a>> - + 'static, + K: MzRegion, + V: MzRegion, + T: MzRegion, + R: MzRegion, K::Owned: Clone + Ord, V::Owned: Clone + Ord, T::Owned: Lattice + for<'a> PartialOrder<::ReadItem<'a>> + Timestamp, diff --git a/src/compute/src/logging/differential.rs b/src/compute/src/logging/differential.rs index da9c28872fbce..a7074e6681d76 100644 --- a/src/compute/src/logging/differential.rs +++ b/src/compute/src/logging/differential.rs @@ -130,7 +130,7 @@ pub(super) fn construct( let stream_to_collection = |input: Stream<_, ((usize, ()), Timestamp, Diff)>, log, name| { let mut packer = PermutedRowPacker::new(log); input - .mz_arrange_core::<_, KeyValSpine<_, _, _, _>>( + .mz_arrange_core::<_, KeyValSpine>( Pipeline, &format!("PreArrange Differential {name}"), ) diff --git a/src/compute/src/logging/initialize.rs b/src/compute/src/logging/initialize.rs index 8a5a6ab0ccd4d..ff3ad860c462f 100644 --- a/src/compute/src/logging/initialize.rs +++ b/src/compute/src/logging/initialize.rs @@ -18,6 +18,7 @@ use mz_ore::flatcontainer::{MzRegionPreference, OwnedRegionOpinion}; use mz_repr::{Diff, Timestamp}; use mz_storage_operators::persist_source::Subtime; use mz_storage_types::errors::DataflowError; +use mz_timely_util::containers::PreallocatingCapacityContainerBuilder; use mz_timely_util::operator::CollectionExt; use timely::communication::Allocate; use timely::container::flatcontainer::FlatStack; @@ -187,10 +188,8 @@ impl LoggingContext<'_, A> { fn reachability_logger(&self) -> Logger { let event_queue = self.r_event_queue.clone(); - let mut logger = BatchLogger::< - CapacityContainerBuilder>, - _, - >::new(event_queue.link, self.interval_ms); + type CB = PreallocatingCapacityContainerBuilder>; + let mut logger = BatchLogger::::new(event_queue.link, self.interval_ms); Logger::new( self.now, self.start_offset, diff --git a/src/compute/src/logging/reachability.rs b/src/compute/src/logging/reachability.rs index 67988eeea7361..1c94908bed87d 100644 --- a/src/compute/src/logging/reachability.rs +++ b/src/compute/src/logging/reachability.rs @@ -19,10 +19,10 @@ use mz_ore::cast::CastFrom; use mz_ore::flatcontainer::{MzRegionPreference, OwnedRegionOpinion}; use mz_ore::iter::IteratorExt; use mz_repr::{Datum, Diff, RowArena, SharedRow, Timestamp}; +use mz_timely_util::containers::PreallocatingCapacityContainerBuilder; use mz_timely_util::replay::MzReplay; use timely::communication::Allocate; use timely::container::flatcontainer::FlatStack; -use timely::container::CapacityContainerBuilder; use timely::dataflow::channels::pact::Pipeline; use crate::extensions::arrange::{MzArrange, MzArrangeCore}; @@ -57,7 +57,7 @@ pub(super) fn construct( ); type UpdatesRegion = <((UpdatesKey, ()), Timestamp, Diff) as MzRegionPreference>::Region; - type CB = CapacityContainerBuilder>; + type CB = PreallocatingCapacityContainerBuilder>; let (updates, token) = Some(event_queue.link).mz_replay::<_, CB, _>( scope, "reachability logs", @@ -102,7 +102,7 @@ pub(super) fn construct( ); let updates = - updates.as_collection(move |(update_type, addr, source, port, ts), _| { + updates.as_collection(move |(&update_type, addr, &source, &port, ts), _| { let row_arena = RowArena::default(); let update_type = if update_type { "source" } else { "target" }; let binding = SharedRow::get(); @@ -118,7 +118,7 @@ pub(super) fn construct( Datum::UInt64(u64::cast_from(port)), Datum::UInt64(u64::cast_from(worker_index)), Datum::String(update_type), - Datum::from(ts.clone()), + Datum::from(ts.copied()), ]; row_builder.packer().extend(key.iter().map(|k| datums[*k])); let key_row = row_builder.clone(); diff --git a/src/compute/src/logging/timely.rs b/src/compute/src/logging/timely.rs index 42c95e01c7c03..714052b8af62b 100644 --- a/src/compute/src/logging/timely.rs +++ b/src/compute/src/logging/timely.rs @@ -9,6 +9,7 @@ //! Logging dataflows for events generated by timely dataflow. +use mz_ore::flatcontainer::{MzRegionPreference, OwnedRegionOpinion}; use std::cell::RefCell; use std::collections::BTreeMap; use std::rc::Rc; @@ -17,6 +18,7 @@ use std::time::Duration; use differential_dataflow::consolidation::ConsolidatingContainerBuilder; use mz_compute_client::logging::LoggingConfig; use mz_ore::cast::CastFrom; +use mz_ore::region::LgAllocVec; use mz_repr::{Datum, Diff, Timestamp}; use mz_timely_util::replay::MzReplay; use serde::{Deserialize, Serialize}; @@ -150,7 +152,10 @@ pub(super) fn construct( // updates that reach `Row` encoding. let mut packer = PermutedRowPacker::new(TimelyLog::Operates); let operates = operates - .mz_arrange_core::<_, KeyValSpine<_, _, _, _>>(Pipeline, "PreArrange Timely operates") + .mz_arrange_core::<_, KeyValSpine>( + Pipeline, + "PreArrange Timely operates", + ) .as_collection(move |id, name| { packer.pack_slice(&[ Datum::UInt64(u64::cast_from(*id)), @@ -160,7 +165,10 @@ pub(super) fn construct( }); let mut packer = PermutedRowPacker::new(TimelyLog::Channels); let channels = channels - .mz_arrange_core::<_, KeyValSpine<_, _, _, _>>(Pipeline, "PreArrange Timely operates") + .mz_arrange_core::<_, KeyValSpine>( + Pipeline, + "PreArrange Timely operates", + ) .as_collection(move |datum, ()| { let (source_node, source_port) = datum.source; let (target_node, target_port) = datum.target; @@ -176,7 +184,10 @@ pub(super) fn construct( let mut packer = PermutedRowPacker::new(TimelyLog::Addresses); let addresses = addresses - .mz_arrange_core::<_, KeyValSpine<_, _, _, _>>(Pipeline, "PreArrange Timely addresses") + .mz_arrange_core::<_, KeyValSpine>, Timestamp, Diff, _>>( + Pipeline, + "PreArrange Timely addresses", + ) .as_collection({ move |id, address| { packer.pack_by_index(|packer, index| match index { @@ -190,7 +201,7 @@ pub(super) fn construct( }); let mut packer = PermutedRowPacker::new(TimelyLog::Parks); let parks = parks - .mz_arrange_core::<_, KeyValSpine<_, _, _, _>>(Pipeline, "PreArrange Timely parks") + .mz_arrange_core::<_, KeyValSpine>(Pipeline, "PreArrange Timely parks") .as_collection(move |datum, ()| { packer.pack_slice(&[ Datum::UInt64(u64::cast_from(worker_id)), @@ -203,7 +214,7 @@ pub(super) fn construct( }); let mut packer = PermutedRowPacker::new(TimelyLog::BatchesSent); let batches_sent = batches_sent - .mz_arrange_core::<_, KeyValSpine<_, _, _, _>>( + .mz_arrange_core::<_, KeyValSpine>( Pipeline, "PreArrange Timely batches sent", ) @@ -216,7 +227,7 @@ pub(super) fn construct( }); let mut packer = PermutedRowPacker::new(TimelyLog::BatchesReceived); let batches_received = batches_received - .mz_arrange_core::<_, KeyValSpine<_, _, _, _>>( + .mz_arrange_core::<_, KeyValSpine>( Pipeline, "PreArrange Timely batches received", ) @@ -229,7 +240,7 @@ pub(super) fn construct( }); let mut packer = PermutedRowPacker::new(TimelyLog::MessagesSent); let messages_sent = messages_sent - .mz_arrange_core::<_, KeyValSpine<_, _, _, _>>( + .mz_arrange_core::<_, KeyValSpine>( Pipeline, "PreArrange Timely messages sent", ) @@ -242,7 +253,7 @@ pub(super) fn construct( }); let mut packer = PermutedRowPacker::new(TimelyLog::MessagesReceived); let messages_received = messages_received - .mz_arrange_core::<_, KeyValSpine<_, _, _, _>>( + .mz_arrange_core::<_, KeyValSpine>( Pipeline, "PreArrange Timely messages received", ) @@ -255,7 +266,10 @@ pub(super) fn construct( }); let mut packer = PermutedRowPacker::new(TimelyLog::Elapsed); let elapsed = schedules_duration - .mz_arrange_core::<_, KeyValSpine<_, _, _, _>>(Pipeline, "PreArrange Timely duration") + .mz_arrange_core::<_, KeyValSpine>( + Pipeline, + "PreArrange Timely duration", + ) .as_collection(move |operator, _| { packer.pack_slice(&[ Datum::UInt64(u64::cast_from(*operator)), @@ -264,7 +278,10 @@ pub(super) fn construct( }); let mut packer = PermutedRowPacker::new(TimelyLog::Histogram); let histogram = schedules_histogram - .mz_arrange_core::<_, KeyValSpine<_, _, _, _>>(Pipeline, "PreArrange Timely histogram") + .mz_arrange_core::<_, KeyValSpine>( + Pipeline, + "PreArrange Timely histogram", + ) .as_collection(move |datum, _| { packer.pack_slice(&[ Datum::UInt64(u64::cast_from(datum.operator)), @@ -378,6 +395,11 @@ impl Columnation for ChannelDatum { type InnerRegion = CopyRegion; } +impl MzRegionPreference for ChannelDatum { + type Owned = Self; + type Region = LgAllocVec; +} + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] struct ParkDatum { duration_pow: u128, @@ -388,6 +410,11 @@ impl Columnation for ParkDatum { type InnerRegion = CopyRegion; } +impl MzRegionPreference for ParkDatum { + type Owned = Self; + type Region = LgAllocVec; +} + #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] struct MessageDatum { channel: usize, @@ -398,6 +425,11 @@ impl Columnation for MessageDatum { type InnerRegion = CopyRegion; } +impl MzRegionPreference for MessageDatum { + type Owned = Self; + type Region = LgAllocVec; +} + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] struct ScheduleHistogramDatum { operator: usize, @@ -408,6 +440,11 @@ impl Columnation for ScheduleHistogramDatum { type InnerRegion = CopyRegion; } +impl MzRegionPreference for ScheduleHistogramDatum { + type Owned = Self; + type Region = LgAllocVec; +} + /// Event handler of the demux operator. struct DemuxHandler<'a, 'b> { /// State kept by the demux operator. diff --git a/src/compute/src/typedefs.rs b/src/compute/src/typedefs.rs index 1c4736a948a6c..90213b1ce8ba1 100644 --- a/src/compute/src/typedefs.rs +++ b/src/compute/src/typedefs.rs @@ -19,10 +19,9 @@ use differential_dataflow::trace::implementations::merge_batcher_col::Columnatio use differential_dataflow::trace::implementations::ord_neu::{FlatValSpine, OrdValBatch}; use differential_dataflow::trace::wrappers::enter::TraceEnter; use differential_dataflow::trace::wrappers::frontier::TraceFrontier; -use mz_ore::flatcontainer::MzRegionPreference; +use mz_ore::flatcontainer::{MzRegionPreference, MzTupleABCRegion, MzTupleABRegion}; use mz_repr::Diff; use mz_storage_types::errors::DataflowError; -use timely::container::flatcontainer::impls::tuple::{TupleABCRegion, TupleABRegion}; use timely::dataflow::ScopeParent; pub use crate::row_spine::{RowRowSpine, RowSpine, RowValSpine}; @@ -40,9 +39,10 @@ pub(crate) mod spines { use differential_dataflow::trace::implementations::spine_fueled::Spine; use differential_dataflow::trace::implementations::{Layout, Update}; use differential_dataflow::trace::rc_blanket_impls::RcBuilder; + use mz_ore::flatcontainer::MzRegion; use mz_timely_util::containers::stack::StackWrapper; use timely::container::columnation::{Columnation, TimelyStack}; - use timely::container::flatcontainer::{FlatStack, Push, Region}; + use timely::container::flatcontainer::FlatStack; use timely::progress::Timestamp; use crate::row_spine::OffsetOptimized; @@ -83,63 +83,109 @@ pub(crate) mod spines { } /// A layout based on flat container stacks - pub struct MzFlatLayout { - phantom: std::marker::PhantomData<(K, V, T, R)>, + pub struct MzFlatLayout { + phantom: std::marker::PhantomData<(KR, VR, TR, RR)>, } - impl Update for MzFlatLayout + impl Update for MzFlatLayout where - K: Region, - V: Region, - T: Region, - R: Region, - K::Owned: Ord + Clone + 'static, - V::Owned: Ord + Clone + 'static, - T::Owned: Ord + Clone + Lattice + Timestamp + 'static, - R::Owned: Ord + Semigroup + 'static, + KR: MzRegion, + VR: MzRegion, + TR: MzRegion, + RR: MzRegion, + KR::Owned: Ord + Clone + 'static, + VR::Owned: Ord + Clone + 'static, + TR::Owned: Ord + Clone + Lattice + Timestamp + 'static, + RR::Owned: Ord + Semigroup + 'static, + for<'a> KR::ReadItem<'a>: Copy + Ord, + for<'a> VR::ReadItem<'a>: Copy + Ord, + for<'a> TR::ReadItem<'a>: Copy + Ord, + for<'a> RR::ReadItem<'a>: Copy + Ord, { - type Key = K::Owned; - type Val = V::Owned; - type Time = T::Owned; - type Diff = R::Owned; + type Key = KR::Owned; + type Val = VR::Owned; + type Time = TR::Owned; + type Diff = RR::Owned; + type ItemRef<'a> = ((Self::KeyGat<'a>, Self::ValGat<'a>), Self::TimeGat<'a>, Self::DiffGat<'a>) + where + Self: 'a; + type KeyGat<'a> = KR::ReadItem<'a> + where + Self: 'a; + type ValGat<'a> = VR::ReadItem<'a> + where + Self: 'a; + type TimeGat<'a> = TR::ReadItem<'a> + where + Self: 'a; + type DiffGat<'a> = RR::ReadItem<'a> + where + Self: 'a; + + fn into_parts<'a>( + ((key, val), time, diff): Self::ItemRef<'a>, + ) -> ( + Self::KeyGat<'a>, + Self::ValGat<'a>, + Self::TimeGat<'a>, + Self::DiffGat<'a>, + ) { + (key, val, time, diff) + } + + fn reborrow_key<'b, 'a: 'b>(item: Self::KeyGat<'a>) -> Self::KeyGat<'b> + where + Self: 'a, + { + KR::reborrow(item) + } + + fn reborrow_val<'b, 'a: 'b>(item: Self::ValGat<'a>) -> Self::ValGat<'b> + where + Self: 'a, + { + VR::reborrow(item) + } + + fn reborrow_time<'b, 'a: 'b>(item: Self::TimeGat<'a>) -> Self::TimeGat<'b> + where + Self: 'a, + { + TR::reborrow(item) + } + + fn reborrow_diff<'b, 'a: 'b>(item: Self::DiffGat<'a>) -> Self::DiffGat<'b> + where + Self: 'a, + { + RR::reborrow(item) + } } /// Layout implementation for [`MzFlatLayout`]. Mostly equivalent to differential's /// flat layout but with a different opinion for the offset container. Here, we use /// [`OffsetOptimized`] instead of an offset list. If differential should gain access /// to the optimized variant, we might be able to remove this implementation. - impl Layout for MzFlatLayout + impl Layout for MzFlatLayout where - K: Region - + Push<::Owned> - + for<'a> Push<::ReadItem<'a>> - + 'static, - V: Region - + Push<::Owned> - + for<'a> Push<::ReadItem<'a>> - + 'static, - T: Region - + Push<::Owned> - + for<'a> Push<::ReadItem<'a>> - + 'static, - R: Region - + Push<::Owned> - + for<'a> Push<::ReadItem<'a>> - + 'static, - K::Owned: Ord + Clone + 'static, - V::Owned: Ord + Clone + 'static, - T::Owned: Ord + Clone + Lattice + Timestamp + 'static, - R::Owned: Ord + Semigroup + 'static, - for<'a> K::ReadItem<'a>: Copy + Ord, - for<'a> V::ReadItem<'a>: Copy + Ord, - for<'a> T::ReadItem<'a>: Copy + Ord, - for<'a> R::ReadItem<'a>: Copy + Ord, + KR: MzRegion, + VR: MzRegion, + TR: MzRegion, + RR: MzRegion, + KR::Owned: Ord + Clone + 'static, + VR::Owned: Ord + Clone + 'static, + TR::Owned: Ord + Clone + Lattice + Timestamp + 'static, + RR::Owned: Ord + Semigroup + 'static, + for<'a> KR::ReadItem<'a>: Copy + Ord, + for<'a> VR::ReadItem<'a>: Copy + Ord, + for<'a> TR::ReadItem<'a>: Copy + Ord, + for<'a> RR::ReadItem<'a>: Copy + Ord, { type Target = Self; - type KeyContainer = FlatStack; - type ValContainer = FlatStack; - type TimeContainer = FlatStack; - type DiffContainer = FlatStack; + type KeyContainer = FlatStack; + type ValContainer = FlatStack; + type TimeContainer = FlatStack; + type DiffContainer = FlatStack; type OffsetContainer = OffsetOptimized; } } @@ -148,10 +194,10 @@ pub(crate) mod spines { // Agents are wrappers around spines that allow shared read access. // Fully generic spines and agents. -pub type KeyValSpine = ColValSpine; -pub type KeyValAgent = TraceAgent>; -pub type KeyValEnter = - TraceEnter>, TEnter>; +pub type KeyValSpine = FlatKeyValSpineDefault; +pub type KeyValAgent = TraceAgent>; +pub type KeyValEnter = + TraceEnter>, TEnter>; // Fully generic key-only spines and agents pub type KeySpine = ColKeySpine; @@ -189,7 +235,7 @@ pub type KeyValBatcher = MergeBatcher< pub type FlatKeyValBatch = OrdValBatch>; pub type FlatKeyValSpine = - FlatValSpine, TupleABCRegion, T, R>, C>; + FlatValSpine, MzTupleABCRegion, T, R>, C>; pub type FlatKeyValSpineDefault = FlatKeyValSpine< ::Region, ::Region, diff --git a/src/ore/Cargo.toml b/src/ore/Cargo.toml index f13219d524289..49ec1dcf7aa57 100644 --- a/src/ore/Cargo.toml +++ b/src/ore/Cargo.toml @@ -27,8 +27,9 @@ clap = { version = "3.2.24", features = ["env"], optional = true } compact_bytes = { version = "0.1.2", optional = true } ctor = { version = "0.1.26", optional = true } derivative = { version = "2.2.0", optional = true } +differential-dataflow = { version = "0.12.0", optional = true } either = "1.8.0" -flatcontainer = { version = "0.4.1", optional = true } +flatcontainer = { version = "0.5.0", optional = true } futures = { version = "0.3.25", optional = true } hibitset = { version = "0.6.4", optional = true } lgalloc = { version = "0.3", optional = true } @@ -50,6 +51,7 @@ smallvec = { version = "1.10.0", optional = true } stacker = { version = "0.1.15", optional = true } sentry = { version = "0.29.1", optional = true, features = ["debug-images"] } serde = { version = "1.0.152", features = ["derive"] } +timely = { version = "0.12.0", default-features = false, features = ["bincode"], optional = true } tokio = { version = "1.38.0", features = [ "io-util", "net", @@ -115,6 +117,8 @@ async = [ "tokio", "tracing", ] +differential = ["differential-dataflow", "flatcontainer_", "timely"] +flatcontainer_ = ["flatcontainer", "region"] bytes_ = ["bytes", "compact_bytes", "smallvec", "smallvec/const_generics", "region", "tracing_"] network = ["async", "bytes", "hyper", "smallvec", "tonic", "tracing"] process = ["libc"] diff --git a/src/ore/src/flatcontainer.rs b/src/ore/src/flatcontainer.rs index fb3850faaa7f0..f73d59851cc29 100644 --- a/src/ore/src/flatcontainer.rs +++ b/src/ore/src/flatcontainer.rs @@ -15,17 +15,54 @@ //! Flat container utilities -use flatcontainer::{Push, Region, ReserveItems}; +use flatcontainer::impls::deduplicate::ConsecutiveOffsetPairs; +use flatcontainer::{OptionRegion, Push, Region, ReserveItems, StringRegion}; +use serde::{Deserialize, Serialize}; + +pub use item::ItemRegion; +pub use tuple::*; /// Associate a type with a flat container region. pub trait MzRegionPreference: 'static { /// The owned type of the container. type Owned; /// A region that can hold `Self`. - type Region: for<'a> Region - + Push - + for<'a> Push<::ReadItem<'a>> - + for<'a> ReserveItems<::ReadItem<'a>>; + type Region: MzRegion; +} + +/// TODO +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct MzIndex(usize); + +impl std::ops::Deref for MzIndex { + type Target = usize; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +/// TODO +pub trait MzRegion: + Region + + Push<::Owned> + + for<'a> Push<&'a ::Owned> + + for<'a> Push<::ReadItem<'a>> + + for<'a> ReserveItems<::ReadItem<'a>> + + Clone + + 'static +{ +} + +impl MzRegion for R where + R: Region + + Push<::Owned> + + for<'a> Push<&'a ::Owned> + + for<'a> Push<::ReadItem<'a>> + + for<'a> ReserveItems<::ReadItem<'a>> + + Clone + + 'static +{ } /// Opinion indicating that the contents of a collection should be stored in an @@ -36,31 +73,262 @@ pub trait MzRegionPreference: 'static { pub struct OwnedRegionOpinion(std::marker::PhantomData); mod tuple { - use flatcontainer::impls::tuple::*; + use flatcontainer::{Index, Push, Region, ReserveItems}; use paste::paste; - use crate::flatcontainer::MzRegionPreference; + use crate::flatcontainer::{MzIndex, MzRegion, MzRegionPreference}; + /// The macro creates the region implementation for tuples macro_rules! tuple_flatcontainer { - ($($name:ident)+) => ( - paste! { - impl<$($name: MzRegionPreference),*> MzRegionPreference for ($($name,)*) { - type Owned = ($($name::Owned,)*); - type Region = []<$($name::Region,)*>; + ($($name:ident)+) => ( + paste! { + impl<$($name: MzRegionPreference),*> MzRegionPreference for ($($name,)*) { + type Owned = ($($name::Owned,)*); + type Region = []<$($name::Region,)*>; + } + + /// A region for a tuple. + #[allow(non_snake_case)] + #[derive(Default, Debug)] + #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] + pub struct []<$($name),*> { + $([]: $name),* + } + + #[allow(non_snake_case)] + impl<$($name: MzRegion),*> Clone for []<$($name),*> + where + $(<$name as Region>::Index: Index),* + { + fn clone(&self) -> Self { + Self { + $([]: self.[].clone(),)* + } + } + + fn clone_from(&mut self, source: &Self) { + $(self.[].clone_from(&source.[]);)* } } - ) - } + + #[allow(non_snake_case)] + impl<$($name: MzRegion),*> Region for []<$($name),*> + where + $(<$name as Region>::Index: Index),* + { + type Owned = ($($name::Owned,)*); + type ReadItem<'a> = ($($name::ReadItem<'a>,)*) where Self: 'a; + + type Index = MzIndex; + + #[inline] + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self { + $([]: $name::merge_regions(regions.clone().map(|r| &r.[]))),* + } + } + + #[inline] fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { + ( + $(self.[].index(index),)* + ) + } + + #[inline(always)] + fn reserve_regions<'a, It>(&mut self, regions: It) + where + Self: 'a, + It: Iterator + Clone, + { + $(self.[].reserve_regions(regions.clone().map(|r| &r.[]));)* + } + + #[inline(always)] + fn clear(&mut self) { + $(self.[].clear();)* + } + + #[inline] + fn heap_size(&self, mut callback: Fn) { + $(self.[].heap_size(&mut callback);)* + } + + #[inline] + fn reborrow<'b, 'a: 'b>(item: Self::ReadItem<'a>) -> Self::ReadItem<'b> where Self: 'a { + let ($($name,)*) = item; + ( + $($name::reborrow($name),)* + ) + } + } + + #[allow(non_camel_case_types)] + #[allow(non_snake_case)] + impl<$($name, [<$name _C>]: MzRegion ),*> Push<($($name,)*)> for []<$([<$name _C>]),*> + where + $([<$name _C>]: Push<$name>),* + { + #[inline] + fn push(&mut self, item: ($($name,)*)) + -> <[]<$([<$name _C>]),*> as Region>::Index { + let ($($name,)*) = item; + $(let _index = self.[].push($name);)* + _index + } + } + + #[allow(non_camel_case_types)] + #[allow(non_snake_case)] + impl<'a, $($name, [<$name _C>]),*> Push<&'a ($($name,)*)> for []<$([<$name _C>]),*> + where + $([<$name _C>]: MzRegion + Push<&'a $name>),* + { + #[inline] + fn push(&mut self, item: &'a ($($name,)*)) + -> <[]<$([<$name _C>]),*> as Region>::Index { + let ($($name,)*) = item; + $(let _index = self.[].push($name);)* + _index + } + } + + #[allow(non_camel_case_types)] + #[allow(non_snake_case)] + impl<'a, $($name, [<$name _C>]),*> ReserveItems<&'a ($($name,)*)> for []<$([<$name _C>]),*> + where + $([<$name _C>]: MzRegion + ReserveItems<&'a $name>),* + { + #[inline] + fn reserve_items(&mut self, items: It) + where + It: Iterator + Clone, + { + tuple_flatcontainer!(reserve_items self items $($name)* @ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31); + } + } + + #[allow(non_camel_case_types)] + #[allow(non_snake_case)] + impl<$($name, [<$name _C>]),*> ReserveItems<($($name,)*)> for []<$([<$name _C>]),*> + where + $([<$name _C>]: MzRegion + ReserveItems<$name>),* + { + #[inline] + fn reserve_items(&mut self, items: It) + where + It: Iterator + Clone, + { + tuple_flatcontainer!(reserve_items_owned self items $($name)* @ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31); + } + } + } + ); + (reserve_items $self:ident $items:ident $name0:ident $($name:ident)* @ $num0:tt $($num:tt)*) => { + paste! { + $self.[].reserve_items($items.clone().map(|i| &i.$num0)); + tuple_flatcontainer!(reserve_items $self $items $($name)* @ $($num)*); + } + }; + (reserve_items $self:ident $items:ident @ $($num:tt)*) => {}; + (reserve_items_owned $self:ident $items:ident $name0:ident $($name:ident)* @ $num0:tt $($num:tt)*) => { + paste! { + $self.[].reserve_items($items.clone().map(|i| i.$num0)); + tuple_flatcontainer!(reserve_items_owned $self $items $($name)* @ $($num)*); + } + }; + (reserve_items_owned $self:ident $items:ident @ $($num:tt)*) => {}; +} tuple_flatcontainer!(A); tuple_flatcontainer!(A B); tuple_flatcontainer!(A B C); tuple_flatcontainer!(A B C D); tuple_flatcontainer!(A B C D E); + + #[cfg(feature = "differential")] + mod differential { + use differential_dataflow::difference::Semigroup; + use differential_dataflow::lattice::Lattice; + use differential_dataflow::trace::implementations::Update; + use timely::progress::Timestamp; + + use crate::flatcontainer::{MzRegion, MzTupleABCRegion, MzTupleABRegion}; + + impl Update for MzTupleABCRegion, TR, RR> + where + KR: MzRegion, + KR::Owned: Clone + Ord, + for<'a> KR::ReadItem<'a>: Copy + Ord, + VR: MzRegion, + VR::Owned: Clone + Ord, + for<'a> VR::ReadItem<'a>: Copy + Ord, + TR: MzRegion, + TR::Owned: Clone + Lattice + Ord + Timestamp, + for<'a> TR::ReadItem<'a>: Copy + Ord, + RR: MzRegion, + RR::Owned: Clone + Ord + Semigroup, + for<'a> RR::ReadItem<'a>: Copy + Ord, + { + type KeyGat<'a> = KR::ReadItem<'a> where Self: 'a; + type ValGat<'a> = VR::ReadItem<'a> where Self: 'a; + type TimeGat<'a> = TR::ReadItem<'a> where Self: 'a; + type Time = TR::Owned; + type DiffGat<'a> = RR::ReadItem<'a> where Self: 'a; + type Diff = RR::Owned; + + fn into_parts<'a>( + ((key, val), time, diff): Self::ItemRef<'a>, + ) -> ( + Self::KeyGat<'a>, + Self::ValGat<'a>, + Self::TimeGat<'a>, + Self::DiffGat<'a>, + ) { + (key, val, time, diff) + } + + type Key = KR::Owned; + type Val = VR::Owned; + type ItemRef<'a> = ((Self::KeyGat<'a>, Self::ValGat<'a>), Self::TimeGat<'a>, Self::DiffGat<'a>) + where + Self: 'a; + + fn reborrow_key<'b, 'a: 'b>(item: Self::KeyGat<'a>) -> Self::KeyGat<'b> + where + Self: 'a, + { + KR::reborrow(item) + } + + fn reborrow_val<'b, 'a: 'b>(item: Self::ValGat<'a>) -> Self::ValGat<'b> + where + Self: 'a, + { + VR::reborrow(item) + } + + fn reborrow_time<'b, 'a: 'b>(item: Self::TimeGat<'a>) -> Self::TimeGat<'b> + where + Self: 'a, + { + TR::reborrow(item) + } + + fn reborrow_diff<'b, 'a: 'b>(item: Self::DiffGat<'a>) -> Self::DiffGat<'b> + where + Self: 'a, + { + RR::reborrow(item) + } + } + } } mod copy { - use flatcontainer::MirrorRegion; + use crate::region::LgAllocVec; use crate::flatcontainer::MzRegionPreference; @@ -68,7 +336,7 @@ mod copy { ($index_type:ty) => { impl MzRegionPreference for $index_type { type Owned = Self; - type Region = MirrorRegion; + type Region = LgAllocVec; } }; } @@ -104,6 +372,11 @@ mod copy { implement_for!(std::time::Duration); } +impl MzRegionPreference for String { + type Owned = String; + type Region = ItemRegion>; +} + mod vec { use crate::flatcontainer::lgalloc::LgAllocOwnedRegion; use crate::flatcontainer::{MzRegionPreference, OwnedRegionOpinion}; @@ -115,16 +388,19 @@ mod vec { } impl MzRegionPreference for Option { - type Owned = as Region>::Owned; - type Region = flatcontainer::OptionRegion; + type Owned = as Region>::Owned; + type Region = ItemRegion>; } mod lgalloc { //! A region that stores slices of clone types in lgalloc - use crate::region::LgAllocVec; + use flatcontainer::impls::offsets::{OffsetContainer, OffsetOptimized}; use flatcontainer::{CopyIter, Push, Region, ReserveItems}; + use crate::flatcontainer::MzIndex; + use crate::region::LgAllocVec; + /// A container for owned types. /// /// The container can absorb any type, and stores an owned version of the type, similarly to what @@ -150,17 +426,20 @@ mod lgalloc { #[derive(Debug)] pub struct LgAllocOwnedRegion { slices: LgAllocVec, + offsets: OffsetOptimized, } impl Clone for LgAllocOwnedRegion { fn clone(&self) -> Self { Self { slices: self.slices.clone(), + offsets: self.offsets.clone(), } } fn clone_from(&mut self, source: &Self) { self.slices.clone_from(&source.slices); + self.offsets.clone_from(&source.offsets); } } @@ -170,20 +449,25 @@ mod lgalloc { { type Owned = <[T] as ToOwned>::Owned; type ReadItem<'a> = &'a [T] where Self: 'a; - type Index = (usize, usize); + type Index = MzIndex; #[inline] fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where Self: 'a, { - Self { + let mut this = Self { slices: LgAllocVec::with_capacity(regions.map(|r| r.slices.len()).sum()), - } + offsets: OffsetOptimized::default(), + }; + this.offsets.push(0); + this } #[inline] - fn index(&self, (start, end): Self::Index) -> Self::ReadItem<'_> { + fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { + let start = self.offsets.index(*index); + let end = self.offsets.index(*index + 1); &self.slices[start..end] } @@ -222,18 +506,21 @@ mod lgalloc { impl Default for LgAllocOwnedRegion { #[inline] fn default() -> Self { - Self { + let mut this = Self { slices: LgAllocVec::default(), - } + offsets: OffsetOptimized::default(), + }; + this.offsets.push(0); + this } } impl Push<&[T; N]> for LgAllocOwnedRegion { #[inline] fn push(&mut self, item: &[T; N]) -> as Region>::Index { - let start = self.slices.len(); self.slices.extend_from_slice(item); - (start, self.slices.len()) + self.offsets.push(self.slices.len()); + MzIndex(self.offsets.len() - 2) } } @@ -257,9 +544,9 @@ mod lgalloc { impl Push<&[T]> for LgAllocOwnedRegion { #[inline] fn push(&mut self, item: &[T]) -> as Region>::Index { - let start = self.slices.len(); self.slices.extend_from_slice(item); - (start, self.slices.len()) + self.offsets.push(self.slices.len()); + MzIndex(self.offsets.len() - 2) } } @@ -292,9 +579,9 @@ mod lgalloc { { #[inline] fn push(&mut self, mut item: Vec) -> as Region>::Index { - let start = self.slices.len(); self.slices.append(&mut item); - (start, self.slices.len()) + self.offsets.push(self.slices.len()); + MzIndex(self.offsets.len() - 2) } } @@ -334,19 +621,11 @@ mod lgalloc { #[cfg(test)] mod tests { - use crate::{CopyIter, Push, Region, ReserveItems}; + use flatcontainer::{Push, Region, ReserveItems}; use super::*; - #[test] - fn test_copy_array() { - let mut r = >::default(); - r.reserve_items(std::iter::once(&[1; 4])); - let index = r.push([1; 4]); - assert_eq!([1, 1, 1, 1], r.index(index)); - } - - #[test] + #[crate::test] fn test_copy_ref_ref_array() { let mut r = >::default(); ReserveItems::reserve_items(&mut r, std::iter::once(&[1; 4])); @@ -354,7 +633,7 @@ mod lgalloc { assert_eq!([1, 1, 1, 1], r.index(index)); } - #[test] + #[crate::test] fn test_copy_vec() { let mut r = >::default(); ReserveItems::reserve_items(&mut r, std::iter::once(&vec![1; 4])); @@ -363,13 +642,218 @@ mod lgalloc { let index = r.push(vec![2; 4]); assert_eq!([2, 2, 2, 2], r.index(index)); } + } +} - #[test] - fn test_copy_iter() { - let mut r = >::default(); - r.reserve_items(std::iter::once(CopyIter(std::iter::repeat(1).take(4)))); - let index = r.push(CopyIter(std::iter::repeat(1).take(4))); - assert_eq!([1, 1, 1, 1], r.index(index)); +mod item { + //! A region that stores indexes in lgalloc, converting indexes to [`MzIndex`]. + use flatcontainer::{Push, Region, ReserveItems}; + + use crate::flatcontainer::MzIndex; + use crate::region::LgAllocVec; + + /// TODO + pub struct ItemRegion { + inner: R, + storage: LgAllocVec, + } + + impl std::fmt::Debug for ItemRegion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ItemRegion").finish_non_exhaustive() + } + } + + impl Clone for ItemRegion { + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + storage: self.storage.clone(), + } + } + + fn clone_from(&mut self, source: &Self) { + self.inner.clone_from(&source.inner); + self.storage.clone_from(&source.storage); + } + } + + impl Default for ItemRegion { + fn default() -> Self { + Self { + inner: R::default(), + storage: LgAllocVec::default(), + } + } + } + + impl Region for ItemRegion { + type Owned = R::Owned; + type ReadItem<'a> = R::ReadItem<'a> + where + Self: 'a; + type Index = MzIndex; + + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self { + inner: R::merge_regions(regions.clone().map(|r| &r.inner)), + storage: LgAllocVec::with_capacity(regions.map(|r| r.storage.len()).sum()), + } + } + + fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { + self.inner.index(self.storage[*index]) + } + + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + self.inner + .reserve_regions(regions.clone().map(|r| &r.inner)); + self.storage.reserve(regions.map(|r| r.storage.len()).sum()); + } + + fn clear(&mut self) { + self.inner.clear(); + self.storage.clear(); + } + + fn heap_size(&self, mut callback: F) { + self.inner.heap_size(&mut callback); + self.storage.heap_size(callback); + } + + fn reborrow<'b, 'a: 'b>(item: Self::ReadItem<'a>) -> Self::ReadItem<'b> + where + Self: 'a, + { + R::reborrow(item) + } + } + + impl, T> Push for ItemRegion { + fn push(&mut self, item: T) -> Self::Index { + let index = self.inner.push(item); + self.storage.push(index); + MzIndex(self.storage.len() - 1) + } + } + + impl, T> ReserveItems for ItemRegion { + fn reserve_items(&mut self, items: I) + where + I: Iterator + Clone, + { + self.inner.reserve_items(items.clone()); + self.storage.reserve(items.count()); + } + } +} + +mod lgallocvec { + //! A vector-like structure that stores its contents in lgalloc. + + use flatcontainer::{Push, Region, ReserveItems}; + + use crate::flatcontainer::MzIndex; + use crate::region::LgAllocVec; + + impl Region for LgAllocVec { + type Owned = T; + type ReadItem<'a> = &'a T where Self: 'a; + type Index = MzIndex; + + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self::with_capacity(regions.map(LgAllocVec::len).sum()) + } + + fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { + &self[*index] + } + + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + self.reserve(regions.map(LgAllocVec::len).sum()); + } + + fn clear(&mut self) { + self.clear(); + } + + fn heap_size(&self, mut callback: F) { + let size_of_t = std::mem::size_of::(); + callback(self.len() * size_of_t, self.capacity() * size_of_t); + } + + fn reborrow<'b, 'a: 'b>(item: Self::ReadItem<'a>) -> Self::ReadItem<'b> + where + Self: 'a, + { + item + } + } + + impl Push for LgAllocVec { + fn push(&mut self, item: T) -> Self::Index { + self.push(item); + MzIndex(self.len() - 1) + } + } + + impl Push<&T> for LgAllocVec { + fn push(&mut self, item: &T) -> Self::Index { + self.push(item.clone()); + MzIndex(self.len() - 1) + } + } + + impl Push<&&T> for LgAllocVec { + fn push(&mut self, item: &&T) -> Self::Index { + self.push((*item).clone()); + MzIndex(self.len() - 1) + } + } + + impl ReserveItems for LgAllocVec { + fn reserve_items(&mut self, items: I) + where + I: Iterator + Clone, + { + self.reserve(items.count()); + } + } + + #[cfg(test)] + mod tests { + #[crate::test] + fn vec() { + use flatcontainer::{Push, Region, ReserveItems}; + + use crate::region::LgAllocVec; + + let mut region = LgAllocVec::::default(); + let index = <_ as Push<_>>::push(&mut region, 42); + assert_eq!(region.index(index), &42); + + let mut region = LgAllocVec::::default(); + region.push(42); + region.push(43); + region.push(44); + region.reserve_items([1, 2, 3].iter()); + assert_eq!(region.index(0), &42); + assert_eq!(region.index(1), &43); + assert_eq!(region.index(2), &44); } } } diff --git a/src/ore/src/lib.rs b/src/ore/src/lib.rs index f771740382898..518cf43b7ef7c 100644 --- a/src/ore/src/lib.rs +++ b/src/ore/src/lib.rs @@ -40,7 +40,7 @@ pub mod codegen; pub mod collections; pub mod env; pub mod error; -#[cfg(feature = "flatcontainer")] +#[cfg(feature = "flatcontainer_")] pub mod flatcontainer; pub mod fmt; #[cfg_attr(nightly_doc_features, doc(cfg(feature = "async")))] diff --git a/src/ore/src/region.rs b/src/ore/src/region.rs index af803a9068325..58e72f66ff12a 100644 --- a/src/ore/src/region.rs +++ b/src/ore/src/region.rs @@ -399,7 +399,6 @@ mod vec { use std::fmt::{Debug, Formatter}; use std::mem::{ManuallyDrop, MaybeUninit}; use std::ops::Deref; - use std::ptr; /// A fixed-length region in memory, which is either allocated from heap or lgalloc. pub struct LgAllocVec { @@ -479,7 +478,7 @@ mod vec { self.reserve(lower.saturating_add(1)); } unsafe { - ptr::write( + std::ptr::write( self.elements.as_mut_ptr().add(len), MaybeUninit::new(element), ); @@ -497,7 +496,7 @@ mod vec { self.reserve(count); let len = self.len(); unsafe { - ptr::copy_nonoverlapping( + std::ptr::copy_nonoverlapping( slice.as_ptr(), self.elements.as_mut_ptr().add(len) as *const MaybeUninit as *mut T, count, @@ -513,7 +512,7 @@ mod vec { let len = self.len(); unsafe { data.set_len(0); - ptr::copy_nonoverlapping( + std::ptr::copy_nonoverlapping( data.as_ptr(), self.elements.as_mut_ptr().add(len) as *const MaybeUninit as *mut T, count, @@ -530,6 +529,11 @@ mod vec { self.length = length; } + /// The number of elements in the array. + pub fn len(&self) -> usize { + self.length + } + /// The number of elements this array can absorb. pub fn capacity(&self) -> usize { self.elements.len() @@ -553,6 +557,11 @@ mod vec { /// Grow the array to at least `new_len` elements. Reallocates the underlying storage. fn grow(&mut self, new_len: usize) { let new_capacity = std::cmp::max(self.capacity() * 2, new_len); + println!( + "Reallocating {} -> {}, requested {new_len}", + self.capacity(), + new_capacity + ); let mut new_vec = LgAllocVec::with_capacity(new_capacity); let src_ptr = self.elements.as_ptr(); @@ -643,7 +652,7 @@ mod vec { use super::*; - #[mz_ore::test] + #[crate::test] fn double_drop() { static DROP_COUNT: AtomicUsize = AtomicUsize::new(0); struct DropGuard; diff --git a/src/repr/Cargo.toml b/src/repr/Cargo.toml index d9b3f8ebe1df2..feff320a7dab7 100644 --- a/src/repr/Cargo.toml +++ b/src/repr/Cargo.toml @@ -40,14 +40,14 @@ differential-dataflow = "0.12.0" enum_dispatch = "0.3.11" enum-kinds = "0.5.1" fast-float = "0.2.0" -flatcontainer = "0.4.1" +flatcontainer = "0.5.0" hex = "0.4.3" itertools = "0.10.5" once_cell = "1.16.0" mz-lowertest = { path = "../lowertest" } mz-ore = { path = "../ore", features = [ "bytes_", - "flatcontainer", + "flatcontainer_", "id_gen", "smallvec", "region", diff --git a/src/repr/src/timestamp.rs b/src/repr/src/timestamp.rs index 1461541f7daef..96549a11f3031 100644 --- a/src/repr/src/timestamp.rs +++ b/src/repr/src/timestamp.rs @@ -467,14 +467,15 @@ impl columnation::Columnation for Timestamp { } mod flatcontainer { - use flatcontainer::{IntoOwned, MirrorRegion}; + use flatcontainer::IntoOwned; use mz_ore::flatcontainer::MzRegionPreference; + use mz_ore::region::LgAllocVec; use crate::Timestamp; impl MzRegionPreference for Timestamp { type Owned = Self; - type Region = MirrorRegion; + type Region = LgAllocVec; } impl<'a> IntoOwned<'a> for Timestamp { diff --git a/src/timely-util/src/containers.rs b/src/timely-util/src/containers.rs index 9431900d8edfd..e851810b0e72c 100644 --- a/src/timely-util/src/containers.rs +++ b/src/timely-util/src/containers.rs @@ -9,5 +9,90 @@ //! Reusable containers. +use std::collections::VecDeque; + +use timely::container::flatcontainer::{FlatStack, Push, Region}; +use timely::container::{ContainerBuilder, PushInto, SizableContainer}; +use timely::Container; + pub mod array; pub mod stack; + +/// A container builder that uses length and preferred capacity to chunk data. Preallocates the next +/// container based on the capacity of the previous one once a container is full. +/// +/// Ideally, we'd have a `TryPush` trait that would fail if a push would cause a reallocation, but +/// we aren't there yet. +/// +/// Maintains a single empty allocation between [`Self::push_into`] and [`Self::extract`], but not +/// across [`Self::finish`] to maintain a low memory footprint. +/// +/// Maintains FIFO order. +#[derive(Default, Debug)] +pub struct PreallocatingCapacityContainerBuilder { + /// Container that we're writing to. + current: C, + /// Emtpy allocation. + empty: Option, + /// Completed containers pending to be sent. + pending: VecDeque, +} + +impl PushInto for PreallocatingCapacityContainerBuilder> +where + R: Region + Push + Clone + 'static, +{ + #[inline] + fn push_into(&mut self, item: T) { + if self.current.capacity() == 0 { + self.current = self.empty.take().unwrap_or_default(); + // Protect against non-emptied containers. + self.current.clear(); + } + // Ensure capacity + let preferred_capacity = FlatStack::::preferred_capacity(); + if self.current.capacity() < preferred_capacity { + self.current + .reserve(preferred_capacity - self.current.len()); + } + + // Push item + self.current.push(item); + + // Maybe flush + if self.current.len() == self.current.capacity() { + let pending = std::mem::take(&mut self.current); + self.current = FlatStack::merge_capacity(std::iter::once(&pending)); + self.current + .reserve(preferred_capacity.saturating_sub(self.current.len())); + self.pending.push_back(pending); + } + } +} + +impl ContainerBuilder for PreallocatingCapacityContainerBuilder> +where + R: Region + Clone + 'static, +{ + type Container = FlatStack; + + #[inline] + fn extract(&mut self) -> Option<&mut Self::Container> { + self.empty = Some(self.pending.pop_front()?); + self.empty.as_mut() + } + + #[inline] + fn finish(&mut self) -> Option<&mut Self::Container> { + if !self.current.is_empty() { + let pending = std::mem::take(&mut self.current); + self.current = FlatStack::merge_capacity(std::iter::once(&pending)); + let preferred_capacity = FlatStack::::preferred_capacity(); + self.current + .reserve(preferred_capacity.saturating_sub(self.current.len())); + self.pending.push_back(pending); + } + self.empty = self.pending.pop_front(); + self.empty.as_mut() + } +} From 5acf67548e33cebe47f8fcc16ae47685e9db19f9 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Mon, 1 Jul 2024 10:53:16 -0400 Subject: [PATCH 3/8] Fix allocation behavior Signed-off-by: Moritz Hoffmann --- src/ore/src/flatcontainer.rs | 2 ++ src/ore/src/region.rs | 14 +++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/ore/src/flatcontainer.rs b/src/ore/src/flatcontainer.rs index f73d59851cc29..2a478b5d5fda2 100644 --- a/src/ore/src/flatcontainer.rs +++ b/src/ore/src/flatcontainer.rs @@ -483,6 +483,8 @@ mod lgalloc { #[inline] fn clear(&mut self) { self.slices.clear(); + self.offsets.clear(); + self.offsets.push(0); } #[inline] diff --git a/src/ore/src/region.rs b/src/ore/src/region.rs index 58e72f66ff12a..11aa51d526df7 100644 --- a/src/ore/src/region.rs +++ b/src/ore/src/region.rs @@ -554,14 +554,18 @@ mod vec { } } + const MIN_NON_ZERO_CAP: usize = if std::mem::size_of::() == 1 { + 8 + } else if std::mem::size_of::() <= 1024 { + 4 + } else { + 1 + }; + /// Grow the array to at least `new_len` elements. Reallocates the underlying storage. fn grow(&mut self, new_len: usize) { let new_capacity = std::cmp::max(self.capacity() * 2, new_len); - println!( - "Reallocating {} -> {}, requested {new_len}", - self.capacity(), - new_capacity - ); + let new_capacity = std::cmp::max(new_capacity, Self::MIN_NON_ZERO_CAP); let mut new_vec = LgAllocVec::with_capacity(new_capacity); let src_ptr = self.elements.as_ptr(); From ca8f4bbc123124aa958b9f419b0990e43905b823 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Mon, 8 Jul 2024 12:08:39 -0400 Subject: [PATCH 4/8] Use MzOffsetOptimized in FlatStack storage Signed-off-by: Moritz Hoffmann --- Cargo.lock | 17 ++- Cargo.toml | 26 ++-- src/compute/src/logging/timely.rs | 18 +-- src/compute/src/typedefs.rs | 76 ++--------- src/ore/src/flatcontainer.rs | 204 +++++++++++++++++++++++++----- src/ore/src/region.rs | 12 ++ 6 files changed, 220 insertions(+), 133 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 719e25d811b5b..83137bd27eb98 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1981,7 +1981,7 @@ dependencies = [ [[package]] name = "differential-dataflow" version = "0.12.0" -source = "git+https://github.com/antiguru/differential-dataflow.git?branch=consolidate_layout_merger_chunk#04a2446fc68a0025529cf676f948f9e8af711aa5" +source = "git+https://github.com/antiguru/differential-dataflow.git?branch=region_update#0697f7a519247a2f2ca08bad28a59617f3fcce72" dependencies = [ "abomonation", "abomonation_derive", @@ -2037,7 +2037,7 @@ checksum = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97" [[package]] name = "dogsdogsdogs" version = "0.1.0" -source = "git+https://github.com/antiguru/differential-dataflow.git?branch=consolidate_layout_merger_chunk#04a2446fc68a0025529cf676f948f9e8af711aa5" +source = "git+https://github.com/antiguru/differential-dataflow.git?branch=region_update#0697f7a519247a2f2ca08bad28a59617f3fcce72" dependencies = [ "abomonation", "abomonation_derive", @@ -2418,8 +2418,7 @@ dependencies = [ [[package]] name = "flatcontainer" version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff185ea156496de196dfd189038982f480515ea3338f1ff0a4fbff1e52ea0a6" +source = "git+https://github.com/antiguru/flatcontainer.git#7dc86fadbbaecd6fa3c549e20e1c46b0e448c778" dependencies = [ "cfg-if", "paste", @@ -9592,7 +9591,7 @@ dependencies = [ [[package]] name = "timely" version = "0.12.0" -source = "git+https://github.com/MaterializeInc/timely-dataflow.git#f6a5b3620de2050f123aad57fb40a535ba417869" +source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#3f9d3874c6c5d75e2a9e67af9e78d6c40234be6a" dependencies = [ "abomonation", "abomonation_derive", @@ -9609,12 +9608,12 @@ dependencies = [ [[package]] name = "timely_bytes" version = "0.12.0" -source = "git+https://github.com/MaterializeInc/timely-dataflow.git#f6a5b3620de2050f123aad57fb40a535ba417869" +source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#3f9d3874c6c5d75e2a9e67af9e78d6c40234be6a" [[package]] name = "timely_communication" version = "0.12.0" -source = "git+https://github.com/MaterializeInc/timely-dataflow.git#f6a5b3620de2050f123aad57fb40a535ba417869" +source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#3f9d3874c6c5d75e2a9e67af9e78d6c40234be6a" dependencies = [ "abomonation", "abomonation_derive", @@ -9630,7 +9629,7 @@ dependencies = [ [[package]] name = "timely_container" version = "0.12.0" -source = "git+https://github.com/MaterializeInc/timely-dataflow.git#f6a5b3620de2050f123aad57fb40a535ba417869" +source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#3f9d3874c6c5d75e2a9e67af9e78d6c40234be6a" dependencies = [ "columnation", "flatcontainer", @@ -9640,7 +9639,7 @@ dependencies = [ [[package]] name = "timely_logging" version = "0.12.0" -source = "git+https://github.com/MaterializeInc/timely-dataflow.git#f6a5b3620de2050f123aad57fb40a535ba417869" +source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#3f9d3874c6c5d75e2a9e67af9e78d6c40234be6a" [[package]] name = "tiny-keccak" diff --git a/Cargo.toml b/Cargo.toml index d3d153c0fe5b5..94359e72ecdba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -265,20 +265,22 @@ debug = 2 # version of Materialize. [patch."https://github.com/TimelyDataflow/timely-dataflow"] # Projects that do not reliably release to crates.io. -timely = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } -timely_bytes = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } -timely_communication = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } -timely_container = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } -timely_logging = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } +timely = { git = "https://github.com/antiguru/timely-dataflow.git", branch = "flatcontainer_storage" } +timely_bytes = { git = "https://github.com/antiguru/timely-dataflow.git", branch = "flatcontainer_storage" } +timely_communication = { git = "https://github.com/antiguru/timely-dataflow.git", branch = "flatcontainer_storage" } +timely_container = { git = "https://github.com/antiguru/timely-dataflow.git", branch = "flatcontainer_storage" } +timely_logging = { git = "https://github.com/antiguru/timely-dataflow.git", branch = "flatcontainer_storage" } [patch.crates-io] # Projects that do not reliably release to crates.io. -timely = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } -timely_bytes = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } -timely_communication = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } -timely_container = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } -timely_logging = { git = "https://github.com/MaterializeInc/timely-dataflow.git" } -differential-dataflow = { git = "https://github.com/antiguru/differential-dataflow.git", branch = "consolidate_layout_merger_chunk" } -dogsdogsdogs = { git = "https://github.com/antiguru/differential-dataflow.git", branch = "consolidate_layout_merger_chunk" } +timely = { git = "https://github.com/antiguru/timely-dataflow.git", branch = "flatcontainer_storage" } +timely_bytes = { git = "https://github.com/antiguru/timely-dataflow.git", branch = "flatcontainer_storage" } +timely_communication = { git = "https://github.com/antiguru/timely-dataflow.git", branch = "flatcontainer_storage" } +timely_container = { git = "https://github.com/antiguru/timely-dataflow.git", branch = "flatcontainer_storage" } +timely_logging = { git = "https://github.com/antiguru/timely-dataflow.git", branch = "flatcontainer_storage" } +differential-dataflow = { git = "https://github.com/antiguru/differential-dataflow.git", branch = "region_update" } +dogsdogsdogs = { git = "https://github.com/antiguru/differential-dataflow.git", branch = "region_update" } + +flatcontainer = { git = "https://github.com/antiguru/flatcontainer.git" } # Waiting on https://github.com/sfackler/rust-postgres/pull/752. postgres = { git = "https://github.com/MaterializeInc/rust-postgres" } diff --git a/src/compute/src/logging/timely.rs b/src/compute/src/logging/timely.rs index 714052b8af62b..8d9029ade5fa3 100644 --- a/src/compute/src/logging/timely.rs +++ b/src/compute/src/logging/timely.rs @@ -9,7 +9,6 @@ //! Logging dataflows for events generated by timely dataflow. -use mz_ore::flatcontainer::{MzRegionPreference, OwnedRegionOpinion}; use std::cell::RefCell; use std::collections::BTreeMap; use std::rc::Rc; @@ -18,6 +17,7 @@ use std::time::Duration; use differential_dataflow::consolidation::ConsolidatingContainerBuilder; use mz_compute_client::logging::LoggingConfig; use mz_ore::cast::CastFrom; +use mz_ore::flatcontainer::{MzRegionPreference, OwnedRegionOpinion}; use mz_ore::region::LgAllocVec; use mz_repr::{Datum, Diff, Timestamp}; use mz_timely_util::replay::MzReplay; @@ -391,10 +391,6 @@ struct ChannelDatum { target: (usize, usize), } -impl Columnation for ChannelDatum { - type InnerRegion = CopyRegion; -} - impl MzRegionPreference for ChannelDatum { type Owned = Self; type Region = LgAllocVec; @@ -406,10 +402,6 @@ struct ParkDatum { requested_pow: Option, } -impl Columnation for ParkDatum { - type InnerRegion = CopyRegion; -} - impl MzRegionPreference for ParkDatum { type Owned = Self; type Region = LgAllocVec; @@ -421,10 +413,6 @@ struct MessageDatum { worker: usize, } -impl Columnation for MessageDatum { - type InnerRegion = CopyRegion; -} - impl MzRegionPreference for MessageDatum { type Owned = Self; type Region = LgAllocVec; @@ -436,10 +424,6 @@ struct ScheduleHistogramDatum { duration_pow: u128, } -impl Columnation for ScheduleHistogramDatum { - type InnerRegion = CopyRegion; -} - impl MzRegionPreference for ScheduleHistogramDatum { type Owned = Self; type Region = LgAllocVec; diff --git a/src/compute/src/typedefs.rs b/src/compute/src/typedefs.rs index 90213b1ce8ba1..9be686d79f4a5 100644 --- a/src/compute/src/typedefs.rs +++ b/src/compute/src/typedefs.rs @@ -19,7 +19,9 @@ use differential_dataflow::trace::implementations::merge_batcher_col::Columnatio use differential_dataflow::trace::implementations::ord_neu::{FlatValSpine, OrdValBatch}; use differential_dataflow::trace::wrappers::enter::TraceEnter; use differential_dataflow::trace::wrappers::frontier::TraceFrontier; -use mz_ore::flatcontainer::{MzRegionPreference, MzTupleABCRegion, MzTupleABRegion}; +use mz_ore::flatcontainer::{ + MzOffsetOptimized, MzRegionPreference, MzTupleABCRegion, MzTupleABRegion, +}; use mz_repr::Diff; use mz_storage_types::errors::DataflowError; use timely::dataflow::ScopeParent; @@ -39,7 +41,7 @@ pub(crate) mod spines { use differential_dataflow::trace::implementations::spine_fueled::Spine; use differential_dataflow::trace::implementations::{Layout, Update}; use differential_dataflow::trace::rc_blanket_impls::RcBuilder; - use mz_ore::flatcontainer::MzRegion; + use mz_ore::flatcontainer::{MzOffsetOptimized, MzRegion}; use mz_timely_util::containers::stack::StackWrapper; use timely::container::columnation::{Columnation, TimelyStack}; use timely::container::flatcontainer::FlatStack; @@ -106,60 +108,6 @@ pub(crate) mod spines { type Val = VR::Owned; type Time = TR::Owned; type Diff = RR::Owned; - type ItemRef<'a> = ((Self::KeyGat<'a>, Self::ValGat<'a>), Self::TimeGat<'a>, Self::DiffGat<'a>) - where - Self: 'a; - type KeyGat<'a> = KR::ReadItem<'a> - where - Self: 'a; - type ValGat<'a> = VR::ReadItem<'a> - where - Self: 'a; - type TimeGat<'a> = TR::ReadItem<'a> - where - Self: 'a; - type DiffGat<'a> = RR::ReadItem<'a> - where - Self: 'a; - - fn into_parts<'a>( - ((key, val), time, diff): Self::ItemRef<'a>, - ) -> ( - Self::KeyGat<'a>, - Self::ValGat<'a>, - Self::TimeGat<'a>, - Self::DiffGat<'a>, - ) { - (key, val, time, diff) - } - - fn reborrow_key<'b, 'a: 'b>(item: Self::KeyGat<'a>) -> Self::KeyGat<'b> - where - Self: 'a, - { - KR::reborrow(item) - } - - fn reborrow_val<'b, 'a: 'b>(item: Self::ValGat<'a>) -> Self::ValGat<'b> - where - Self: 'a, - { - VR::reborrow(item) - } - - fn reborrow_time<'b, 'a: 'b>(item: Self::TimeGat<'a>) -> Self::TimeGat<'b> - where - Self: 'a, - { - TR::reborrow(item) - } - - fn reborrow_diff<'b, 'a: 'b>(item: Self::DiffGat<'a>) -> Self::DiffGat<'b> - where - Self: 'a, - { - RR::reborrow(item) - } } /// Layout implementation for [`MzFlatLayout`]. Mostly equivalent to differential's @@ -182,10 +130,10 @@ pub(crate) mod spines { for<'a> RR::ReadItem<'a>: Copy + Ord, { type Target = Self; - type KeyContainer = FlatStack; - type ValContainer = FlatStack; - type TimeContainer = FlatStack; - type DiffContainer = FlatStack; + type KeyContainer = FlatStack; + type ValContainer = FlatStack; + type TimeContainer = FlatStack; + type DiffContainer = FlatStack; type OffsetContainer = OffsetOptimized; } } @@ -234,8 +182,12 @@ pub type KeyValBatcher = MergeBatcher< >; pub type FlatKeyValBatch = OrdValBatch>; -pub type FlatKeyValSpine = - FlatValSpine, MzTupleABCRegion, T, R>, C>; +pub type FlatKeyValSpine = FlatValSpine< + MzFlatLayout, + MzTupleABCRegion, T, R>, + C, + MzOffsetOptimized, +>; pub type FlatKeyValSpineDefault = FlatKeyValSpine< ::Region, ::Region, diff --git a/src/ore/src/flatcontainer.rs b/src/ore/src/flatcontainer.rs index 2a478b5d5fda2..e5a457de3a972 100644 --- a/src/ore/src/flatcontainer.rs +++ b/src/ore/src/flatcontainer.rs @@ -15,11 +15,12 @@ //! Flat container utilities -use flatcontainer::impls::deduplicate::ConsecutiveOffsetPairs; +use flatcontainer::impls::deduplicate::ConsecutiveIndexPairs; use flatcontainer::{OptionRegion, Push, Region, ReserveItems, StringRegion}; use serde::{Deserialize, Serialize}; pub use item::ItemRegion; +pub use offset::MzOffsetOptimized; pub use tuple::*; /// Associate a type with a flat container region. @@ -252,6 +253,7 @@ mod tuple { mod differential { use differential_dataflow::difference::Semigroup; use differential_dataflow::lattice::Lattice; + use differential_dataflow::trace::implementations::merge_batcher_flat::RegionUpdate; use differential_dataflow::trace::implementations::Update; use timely::progress::Timestamp; @@ -272,52 +274,58 @@ mod tuple { RR::Owned: Clone + Ord + Semigroup, for<'a> RR::ReadItem<'a>: Copy + Ord, { - type KeyGat<'a> = KR::ReadItem<'a> where Self: 'a; - type ValGat<'a> = VR::ReadItem<'a> where Self: 'a; - type TimeGat<'a> = TR::ReadItem<'a> where Self: 'a; + type Key = KR::Owned; + type Val = VR::Owned; type Time = TR::Owned; - type DiffGat<'a> = RR::ReadItem<'a> where Self: 'a; type Diff = RR::Owned; + } + + impl RegionUpdate for MzTupleABCRegion, TR, RR> + where + KR: MzRegion, + for<'a> KR::ReadItem<'a>: Copy + Ord, + VR: MzRegion, + for<'a> VR::ReadItem<'a>: Copy + Ord, + TR: MzRegion, + for<'a> TR::ReadItem<'a>: Copy + Ord, + RR: MzRegion, + for<'a> RR::ReadItem<'a>: Copy + Ord, + { + type Key<'a> = KR::ReadItem<'a> where Self: 'a; + type Val<'a> = VR::ReadItem<'a> where Self: 'a; + type Time<'a> = TR::ReadItem<'a> where Self: 'a; + type TimeOwned = TR::Owned; + type Diff<'a> = RR::ReadItem<'a> where Self: 'a; + type DiffOwned = RR::Owned; fn into_parts<'a>( - ((key, val), time, diff): Self::ItemRef<'a>, - ) -> ( - Self::KeyGat<'a>, - Self::ValGat<'a>, - Self::TimeGat<'a>, - Self::DiffGat<'a>, - ) { + ((key, val), time, diff): Self::ReadItem<'a>, + ) -> (Self::Key<'a>, Self::Val<'a>, Self::Time<'a>, Self::Diff<'a>) { (key, val, time, diff) } - type Key = KR::Owned; - type Val = VR::Owned; - type ItemRef<'a> = ((Self::KeyGat<'a>, Self::ValGat<'a>), Self::TimeGat<'a>, Self::DiffGat<'a>) - where - Self: 'a; - - fn reborrow_key<'b, 'a: 'b>(item: Self::KeyGat<'a>) -> Self::KeyGat<'b> + fn reborrow_key<'b, 'a: 'b>(item: Self::Key<'a>) -> Self::Key<'b> where Self: 'a, { KR::reborrow(item) } - fn reborrow_val<'b, 'a: 'b>(item: Self::ValGat<'a>) -> Self::ValGat<'b> + fn reborrow_val<'b, 'a: 'b>(item: Self::Val<'a>) -> Self::Val<'b> where Self: 'a, { VR::reborrow(item) } - fn reborrow_time<'b, 'a: 'b>(item: Self::TimeGat<'a>) -> Self::TimeGat<'b> + fn reborrow_time<'b, 'a: 'b>(item: Self::Time<'a>) -> Self::Time<'b> where Self: 'a, { TR::reborrow(item) } - fn reborrow_diff<'b, 'a: 'b>(item: Self::DiffGat<'a>) -> Self::DiffGat<'b> + fn reborrow_diff<'b, 'a: 'b>(item: Self::Diff<'a>) -> Self::Diff<'b> where Self: 'a, { @@ -374,7 +382,7 @@ mod copy { impl MzRegionPreference for String { type Owned = String; - type Region = ItemRegion>; + type Region = ItemRegion>; } mod vec { @@ -395,11 +403,11 @@ impl MzRegionPreference for Option { mod lgalloc { //! A region that stores slices of clone types in lgalloc - use flatcontainer::impls::offsets::{OffsetContainer, OffsetOptimized}; - use flatcontainer::{CopyIter, Push, Region, ReserveItems}; - use crate::flatcontainer::MzIndex; use crate::region::LgAllocVec; + use flatcontainer::impls::index::{IndexContainer, IndexOptimized}; + use flatcontainer::impls::storage::Storage; + use flatcontainer::{Push, PushIter, Region, ReserveItems}; /// A container for owned types. /// @@ -426,7 +434,7 @@ mod lgalloc { #[derive(Debug)] pub struct LgAllocOwnedRegion { slices: LgAllocVec, - offsets: OffsetOptimized, + offsets: IndexOptimized, } impl Clone for LgAllocOwnedRegion { @@ -458,7 +466,7 @@ mod lgalloc { { let mut this = Self { slices: LgAllocVec::with_capacity(regions.map(|r| r.slices.len()).sum()), - offsets: OffsetOptimized::default(), + offsets: IndexOptimized::default(), }; this.offsets.push(0); this @@ -510,7 +518,7 @@ mod lgalloc { fn default() -> Self { let mut this = Self { slices: LgAllocVec::default(), - offsets: OffsetOptimized::default(), + offsets: IndexOptimized::default(), }; this.offsets.push(0); this @@ -607,14 +615,14 @@ mod lgalloc { } } - impl> ReserveItems> for LgAllocOwnedRegion + impl> ReserveItems> for LgAllocOwnedRegion where [T]: ToOwned, { #[inline] fn reserve_items(&mut self, items: I) where - I: Iterator> + Clone, + I: Iterator> + Clone, { self.slices .reserve(items.flat_map(|i| i.0.into_iter()).count()); @@ -760,10 +768,11 @@ mod item { mod lgallocvec { //! A vector-like structure that stores its contents in lgalloc. - use flatcontainer::{Push, Region, ReserveItems}; - use crate::flatcontainer::MzIndex; use crate::region::LgAllocVec; + use flatcontainer::impls::index::IndexContainer; + use flatcontainer::impls::storage::Storage; + use flatcontainer::{Push, Region, ReserveItems}; impl Region for LgAllocVec { type Owned = T; @@ -806,6 +815,59 @@ mod lgallocvec { } } + impl Storage for LgAllocVec { + fn with_capacity(capacity: usize) -> Self { + Self::with_capacity(capacity) + } + + fn reserve(&mut self, additional: usize) { + self.reserve(additional); + } + + fn clear(&mut self) { + self.clear(); + } + + fn heap_size(&self, callback: F) { + self.heap_size(callback); + } + + fn len(&self) -> usize { + self.len() + } + + fn is_empty(&self) -> bool { + self.is_empty() + } + } + + impl IndexContainer for LgAllocVec { + type Iter<'a> = std::iter::Copied> + where + Self: 'a; + + fn index(&self, index: usize) -> T { + self[index] + } + + fn push(&mut self, item: T) { + self.push(item); + } + + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { + for item in iter { + self.push(item); + } + } + + fn iter(&self) -> Self::Iter<'_> { + self.iter().copied() + } + } + impl Push for LgAllocVec { fn push(&mut self, item: T) -> Self::Index { self.push(item); @@ -859,3 +921,79 @@ mod lgallocvec { } } } + +mod offset { + use crate::flatcontainer::MzIndex; + use flatcontainer::impls::index::{IndexContainer, IndexOptimized}; + use flatcontainer::impls::storage::Storage; + + /// TODO + #[derive(Default, Clone, Debug)] + pub struct MzOffsetOptimized(IndexOptimized); + + impl Storage for MzOffsetOptimized { + fn with_capacity(capacity: usize) -> Self { + Self(IndexOptimized::with_capacity(capacity)) + } + + fn reserve(&mut self, additional: usize) { + self.0.reserve(additional) + } + + fn clear(&mut self) { + self.0.clear(); + } + + fn heap_size(&self, callback: F) { + self.0.heap_size(callback); + } + + fn len(&self) -> usize { + self.0.len() + } + + fn is_empty(&self) -> bool { + self.0.is_empty() + } + } + + impl IndexContainer for MzOffsetOptimized { + type Iter<'a> = MzOffsetOptimizedIter<>::Iter<'a>> + where + Self: 'a; + + fn index(&self, index: usize) -> MzIndex { + MzIndex(self.0.index(index)) + } + + fn push(&mut self, item: MzIndex) { + self.0.push(item.0); + } + + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { + self.0.extend(iter.into_iter().map(|item| item.0)); + } + + fn iter(&self) -> Self::Iter<'_> { + MzOffsetOptimizedIter(self.0.iter()) + } + } + + /// TODO + #[derive(Clone, Copy, Debug)] + pub struct MzOffsetOptimizedIter(I); + + impl Iterator for MzOffsetOptimizedIter + where + I: Iterator, + { + type Item = MzIndex; + + fn next(&mut self) -> Option { + self.0.next().map(MzIndex) + } + } +} diff --git a/src/ore/src/region.rs b/src/ore/src/region.rs index 11aa51d526df7..3fa8017a70019 100644 --- a/src/ore/src/region.rs +++ b/src/ore/src/region.rs @@ -496,6 +496,7 @@ mod vec { self.reserve(count); let len = self.len(); unsafe { + #[allow(clippy::as_conversions)] std::ptr::copy_nonoverlapping( slice.as_ptr(), self.elements.as_mut_ptr().add(len) as *const MaybeUninit as *mut T, @@ -512,6 +513,7 @@ mod vec { let len = self.len(); unsafe { data.set_len(0); + #[allow(clippy::as_conversions)] std::ptr::copy_nonoverlapping( data.as_ptr(), self.elements.as_mut_ptr().add(len) as *const MaybeUninit as *mut T, @@ -534,6 +536,11 @@ mod vec { self.length } + /// Returns `true` if the array contains no elements. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + /// The number of elements this array can absorb. pub fn capacity(&self) -> usize { self.elements.len() @@ -588,6 +595,11 @@ mod vec { self.grow(new_len); } } + + /// Iterate over the elements. + pub fn iter(&self) -> std::slice::Iter<'_, T> { + self.deref().iter() + } } impl Clone for LgAllocVec { From 48fe75c6aebb85fe2dc367e87298d1d2ac7d82af Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Tue, 9 Jul 2024 10:30:16 -0400 Subject: [PATCH 5/8] Make it work Signed-off-by: Moritz Hoffmann --- Cargo.lock | 14 +++---- src/compute/src/logging/initialize.rs | 8 ++-- src/compute/src/logging/reachability.rs | 7 ++-- src/compute/src/logging/timely.rs | 16 ++++---- src/ore/src/flatcontainer.rs | 12 +++--- src/timely-util/src/containers.rs | 54 ++++++++++++------------- 6 files changed, 58 insertions(+), 53 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 83137bd27eb98..1913b71fb9440 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1981,7 +1981,7 @@ dependencies = [ [[package]] name = "differential-dataflow" version = "0.12.0" -source = "git+https://github.com/antiguru/differential-dataflow.git?branch=region_update#0697f7a519247a2f2ca08bad28a59617f3fcce72" +source = "git+https://github.com/antiguru/differential-dataflow.git?branch=region_update#bb5836a50e5233795cf3efcc042f8945f78b4350" dependencies = [ "abomonation", "abomonation_derive", @@ -2037,7 +2037,7 @@ checksum = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97" [[package]] name = "dogsdogsdogs" version = "0.1.0" -source = "git+https://github.com/antiguru/differential-dataflow.git?branch=region_update#0697f7a519247a2f2ca08bad28a59617f3fcce72" +source = "git+https://github.com/antiguru/differential-dataflow.git?branch=region_update#bb5836a50e5233795cf3efcc042f8945f78b4350" dependencies = [ "abomonation", "abomonation_derive", @@ -9591,7 +9591,7 @@ dependencies = [ [[package]] name = "timely" version = "0.12.0" -source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#3f9d3874c6c5d75e2a9e67af9e78d6c40234be6a" +source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#63103bd5f7584fd77b3256580df229189cbcd314" dependencies = [ "abomonation", "abomonation_derive", @@ -9608,12 +9608,12 @@ dependencies = [ [[package]] name = "timely_bytes" version = "0.12.0" -source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#3f9d3874c6c5d75e2a9e67af9e78d6c40234be6a" +source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#63103bd5f7584fd77b3256580df229189cbcd314" [[package]] name = "timely_communication" version = "0.12.0" -source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#3f9d3874c6c5d75e2a9e67af9e78d6c40234be6a" +source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#63103bd5f7584fd77b3256580df229189cbcd314" dependencies = [ "abomonation", "abomonation_derive", @@ -9629,7 +9629,7 @@ dependencies = [ [[package]] name = "timely_container" version = "0.12.0" -source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#3f9d3874c6c5d75e2a9e67af9e78d6c40234be6a" +source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#63103bd5f7584fd77b3256580df229189cbcd314" dependencies = [ "columnation", "flatcontainer", @@ -9639,7 +9639,7 @@ dependencies = [ [[package]] name = "timely_logging" version = "0.12.0" -source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#3f9d3874c6c5d75e2a9e67af9e78d6c40234be6a" +source = "git+https://github.com/antiguru/timely-dataflow.git?branch=flatcontainer_storage#63103bd5f7584fd77b3256580df229189cbcd314" [[package]] name = "tiny-keccak" diff --git a/src/compute/src/logging/initialize.rs b/src/compute/src/logging/initialize.rs index ff3ad860c462f..9bf241304dcaf 100644 --- a/src/compute/src/logging/initialize.rs +++ b/src/compute/src/logging/initialize.rs @@ -14,7 +14,7 @@ use differential_dataflow::dynamic::pointstamp::PointStamp; use differential_dataflow::logging::DifferentialEvent; use differential_dataflow::Collection; use mz_compute_client::logging::{LogVariant, LoggingConfig}; -use mz_ore::flatcontainer::{MzRegionPreference, OwnedRegionOpinion}; +use mz_ore::flatcontainer::{MzOffsetOptimized, MzRegionPreference, OwnedRegionOpinion}; use mz_repr::{Diff, Timestamp}; use mz_storage_operators::persist_source::Subtime; use mz_storage_types::errors::DataflowError; @@ -100,7 +100,7 @@ struct LoggingContext<'a, A: Allocate> { now: Instant, start_offset: Duration, t_event_queue: EventQueue>, - r_event_queue: EventQueue>, + r_event_queue: EventQueue>, d_event_queue: EventQueue>, c_event_queue: EventQueue>, shared_state: Rc>, @@ -188,7 +188,9 @@ impl LoggingContext<'_, A> { fn reachability_logger(&self) -> Logger { let event_queue = self.r_event_queue.clone(); - type CB = PreallocatingCapacityContainerBuilder>; + type CB = PreallocatingCapacityContainerBuilder< + FlatStack, + >; let mut logger = BatchLogger::::new(event_queue.link, self.interval_ms); Logger::new( self.now, diff --git a/src/compute/src/logging/reachability.rs b/src/compute/src/logging/reachability.rs index 1c94908bed87d..0edfc612ba27c 100644 --- a/src/compute/src/logging/reachability.rs +++ b/src/compute/src/logging/reachability.rs @@ -16,7 +16,7 @@ use std::rc::Rc; use mz_compute_client::logging::LoggingConfig; use mz_expr::{permutation_for_arrangement, MirScalarExpr}; use mz_ore::cast::CastFrom; -use mz_ore::flatcontainer::{MzRegionPreference, OwnedRegionOpinion}; +use mz_ore::flatcontainer::{MzOffsetOptimized, MzRegionPreference, OwnedRegionOpinion}; use mz_ore::iter::IteratorExt; use mz_repr::{Datum, Diff, RowArena, SharedRow, Timestamp}; use mz_timely_util::containers::PreallocatingCapacityContainerBuilder; @@ -39,7 +39,7 @@ use crate::typedefs::{FlatKeyValSpineDefault, RowRowSpine}; pub(super) fn construct( worker: &mut timely::worker::Worker, config: &LoggingConfig, - event_queue: EventQueue>, + event_queue: EventQueue>, ) -> BTreeMap { let interval_ms = std::cmp::max(1, config.interval.as_millis()); let worker_index = worker.index(); @@ -57,7 +57,8 @@ pub(super) fn construct( ); type UpdatesRegion = <((UpdatesKey, ()), Timestamp, Diff) as MzRegionPreference>::Region; - type CB = PreallocatingCapacityContainerBuilder>; + type CB = + PreallocatingCapacityContainerBuilder>; let (updates, token) = Some(event_queue.link).mz_replay::<_, CB, _>( scope, "reachability logs", diff --git a/src/compute/src/logging/timely.rs b/src/compute/src/logging/timely.rs index 8d9029ade5fa3..ce329ca4b0c20 100644 --- a/src/compute/src/logging/timely.rs +++ b/src/compute/src/logging/timely.rs @@ -14,16 +14,16 @@ use std::collections::BTreeMap; use std::rc::Rc; use std::time::Duration; -use differential_dataflow::consolidation::ConsolidatingContainerBuilder; use mz_compute_client::logging::LoggingConfig; use mz_ore::cast::CastFrom; -use mz_ore::flatcontainer::{MzRegionPreference, OwnedRegionOpinion}; +use mz_ore::flatcontainer::{MzOffsetOptimized, MzRegionPreference, OwnedRegionOpinion}; use mz_ore::region::LgAllocVec; use mz_repr::{Datum, Diff, Timestamp}; +use mz_timely_util::containers::PreallocatingCapacityContainerBuilder; use mz_timely_util::replay::MzReplay; use serde::{Deserialize, Serialize}; use timely::communication::Allocate; -use timely::container::columnation::{Columnation, CopyRegion}; +use timely::container::flatcontainer::FlatStack; use timely::container::CapacityContainerBuilder; use timely::dataflow::channels::pact::Pipeline; use timely::dataflow::channels::pushers::buffer::Session; @@ -361,10 +361,12 @@ struct MessageCount { records: i64, } -type Pusher = - Counter, Tee>>; +type FlatStackFor = + FlatStack<<(D, Timestamp, Diff) as MzRegionPreference>::Region, MzOffsetOptimized>; + +type Pusher = Counter, Tee>>; type OutputSession<'a, D> = - Session<'a, Timestamp, ConsolidatingContainerBuilder>, Pusher>; + Session<'a, Timestamp, PreallocatingCapacityContainerBuilder>, Pusher>; /// Bundled output buffers used by the demux operator. // @@ -374,7 +376,7 @@ type OutputSession<'a, D> = struct DemuxOutput<'a> { operates: OutputSession<'a, (usize, String)>, channels: OutputSession<'a, (ChannelDatum, ())>, - addresses: OutputSession<'a, (usize, Vec)>, + addresses: OutputSession<'a, (usize, OwnedRegionOpinion>)>, parks: OutputSession<'a, (ParkDatum, ())>, batches_sent: OutputSession<'a, (MessageDatum, ())>, batches_received: OutputSession<'a, (MessageDatum, ())>, diff --git a/src/ore/src/flatcontainer.rs b/src/ore/src/flatcontainer.rs index e5a457de3a972..8350efb94ca2c 100644 --- a/src/ore/src/flatcontainer.rs +++ b/src/ore/src/flatcontainer.rs @@ -911,13 +911,13 @@ mod lgallocvec { assert_eq!(region.index(index), &42); let mut region = LgAllocVec::::default(); - region.push(42); - region.push(43); - region.push(44); + let i0 = <_ as Push<_>>::push(&mut region, 42); + let i1 = <_ as Push<_>>::push(&mut region, 43); + let i2 = <_ as Push<_>>::push(&mut region, 44); region.reserve_items([1, 2, 3].iter()); - assert_eq!(region.index(0), &42); - assert_eq!(region.index(1), &43); - assert_eq!(region.index(2), &44); + assert_eq!(region.index(i0), &42); + assert_eq!(region.index(i1), &43); + assert_eq!(region.index(i2), &44); } } } diff --git a/src/timely-util/src/containers.rs b/src/timely-util/src/containers.rs index e851810b0e72c..521daf1560e32 100644 --- a/src/timely-util/src/containers.rs +++ b/src/timely-util/src/containers.rs @@ -11,8 +11,9 @@ use std::collections::VecDeque; +use timely::container::flatcontainer::impls::index::IndexContainer; use timely::container::flatcontainer::{FlatStack, Push, Region}; -use timely::container::{ContainerBuilder, PushInto, SizableContainer}; +use timely::container::{CapacityContainer, ContainerBuilder, PushInto}; use timely::Container; pub mod array; @@ -31,50 +32,50 @@ pub mod stack; #[derive(Default, Debug)] pub struct PreallocatingCapacityContainerBuilder { /// Container that we're writing to. - current: C, + current: Option, /// Emtpy allocation. empty: Option, /// Completed containers pending to be sent. pending: VecDeque, } -impl PushInto for PreallocatingCapacityContainerBuilder> +impl PushInto for PreallocatingCapacityContainerBuilder> where R: Region + Push + Clone + 'static, + S: IndexContainer + Clone + 'static, + FlatStack: CapacityContainer, { #[inline] fn push_into(&mut self, item: T) { - if self.current.capacity() == 0 { - self.current = self.empty.take().unwrap_or_default(); - // Protect against non-emptied containers. - self.current.clear(); - } - // Ensure capacity - let preferred_capacity = FlatStack::::preferred_capacity(); - if self.current.capacity() < preferred_capacity { - self.current - .reserve(preferred_capacity - self.current.len()); + if self.current.is_none() { + let mut empty = self.empty.take().unwrap_or_default(); + empty.clear(); + self.current = Some(empty); } + let current = self.current.as_mut().unwrap(); + + // Ensure capacity + current.ensure_preferred_capacity(); // Push item - self.current.push(item); + current.push(item); // Maybe flush - if self.current.len() == self.current.capacity() { - let pending = std::mem::take(&mut self.current); - self.current = FlatStack::merge_capacity(std::iter::once(&pending)); - self.current - .reserve(preferred_capacity.saturating_sub(self.current.len())); + if current.len() >= FlatStack::::preferred_capacity() { + let pending = std::mem::take(current); + *current = FlatStack::merge_capacity(std::iter::once(&pending)); self.pending.push_back(pending); } } } -impl ContainerBuilder for PreallocatingCapacityContainerBuilder> +impl ContainerBuilder for PreallocatingCapacityContainerBuilder> where R: Region + Clone + 'static, + S: IndexContainer + Clone + 'static, + FlatStack: CapacityContainer, { - type Container = FlatStack; + type Container = FlatStack; #[inline] fn extract(&mut self) -> Option<&mut Self::Container> { @@ -84,12 +85,11 @@ where #[inline] fn finish(&mut self) -> Option<&mut Self::Container> { - if !self.current.is_empty() { - let pending = std::mem::take(&mut self.current); - self.current = FlatStack::merge_capacity(std::iter::once(&pending)); - let preferred_capacity = FlatStack::::preferred_capacity(); - self.current - .reserve(preferred_capacity.saturating_sub(self.current.len())); + let current = self.current.as_mut(); + if current.as_ref().map_or(false, |c| !c.is_empty()) { + let current = current.unwrap(); + let pending = std::mem::take(current); + *current = FlatStack::merge_capacity(std::iter::once(&pending)); self.pending.push_back(pending); } self.empty = self.pending.pop_front(); From 03e773e830b3d6baa2cc7865fc6d8f08e5e9c3f1 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Fri, 19 Jul 2024 15:10:27 -0400 Subject: [PATCH 6/8] Inlines Signed-off-by: Moritz Hoffmann --- src/ore/src/flatcontainer.rs | 38 ++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/ore/src/flatcontainer.rs b/src/ore/src/flatcontainer.rs index 8350efb94ca2c..e03413b2a5b39 100644 --- a/src/ore/src/flatcontainer.rs +++ b/src/ore/src/flatcontainer.rs @@ -101,12 +101,14 @@ mod tuple { where $(<$name as Region>::Index: Index),* { + #[inline] fn clone(&self) -> Self { Self { $([]: self.[].clone(),)* } } + #[inline] fn clone_from(&mut self, source: &Self) { $(self.[].clone_from(&source.[]);)* } @@ -298,12 +300,14 @@ mod tuple { type Diff<'a> = RR::ReadItem<'a> where Self: 'a; type DiffOwned = RR::Owned; + #[inline] fn into_parts<'a>( ((key, val), time, diff): Self::ReadItem<'a>, ) -> (Self::Key<'a>, Self::Val<'a>, Self::Time<'a>, Self::Diff<'a>) { (key, val, time, diff) } + #[inline] fn reborrow_key<'b, 'a: 'b>(item: Self::Key<'a>) -> Self::Key<'b> where Self: 'a, @@ -311,6 +315,7 @@ mod tuple { KR::reborrow(item) } + #[inline] fn reborrow_val<'b, 'a: 'b>(item: Self::Val<'a>) -> Self::Val<'b> where Self: 'a, @@ -318,6 +323,7 @@ mod tuple { VR::reborrow(item) } + #[inline] fn reborrow_time<'b, 'a: 'b>(item: Self::Time<'a>) -> Self::Time<'b> where Self: 'a, @@ -325,6 +331,7 @@ mod tuple { TR::reborrow(item) } + #[inline] fn reborrow_diff<'b, 'a: 'b>(item: Self::Diff<'a>) -> Self::Diff<'b> where Self: 'a, @@ -438,6 +445,7 @@ mod lgalloc { } impl Clone for LgAllocOwnedRegion { + #[inline] fn clone(&self) -> Self { Self { slices: self.slices.clone(), @@ -445,6 +453,7 @@ mod lgalloc { } } + #[inline] fn clone_from(&mut self, source: &Self) { self.slices.clone_from(&source.slices); self.offsets.clone_from(&source.offsets); @@ -675,6 +684,7 @@ mod item { } impl Clone for ItemRegion { + #[inline] fn clone(&self) -> Self { Self { inner: self.inner.clone(), @@ -682,6 +692,7 @@ mod item { } } + #[inline] fn clone_from(&mut self, source: &Self) { self.inner.clone_from(&source.inner); self.storage.clone_from(&source.storage); @@ -689,6 +700,7 @@ mod item { } impl Default for ItemRegion { + #[inline] fn default() -> Self { Self { inner: R::default(), @@ -704,6 +716,7 @@ mod item { Self: 'a; type Index = MzIndex; + #[inline] fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where Self: 'a, @@ -714,10 +727,12 @@ mod item { } } + #[inline] fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { self.inner.index(self.storage[*index]) } + #[inline] fn reserve_regions<'a, I>(&mut self, regions: I) where Self: 'a, @@ -728,16 +743,19 @@ mod item { self.storage.reserve(regions.map(|r| r.storage.len()).sum()); } + #[inline] fn clear(&mut self) { self.inner.clear(); self.storage.clear(); } + #[inline] fn heap_size(&self, mut callback: F) { self.inner.heap_size(&mut callback); self.storage.heap_size(callback); } + #[inline] fn reborrow<'b, 'a: 'b>(item: Self::ReadItem<'a>) -> Self::ReadItem<'b> where Self: 'a, @@ -779,6 +797,7 @@ mod lgallocvec { type ReadItem<'a> = &'a T where Self: 'a; type Index = MzIndex; + #[inline] fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where Self: 'a, @@ -786,10 +805,12 @@ mod lgallocvec { Self::with_capacity(regions.map(LgAllocVec::len).sum()) } + #[inline] fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { &self[*index] } + #[inline] fn reserve_regions<'a, I>(&mut self, regions: I) where Self: 'a, @@ -798,15 +819,18 @@ mod lgallocvec { self.reserve(regions.map(LgAllocVec::len).sum()); } + #[inline] fn clear(&mut self) { self.clear(); } + #[inline] fn heap_size(&self, mut callback: F) { let size_of_t = std::mem::size_of::(); callback(self.len() * size_of_t, self.capacity() * size_of_t); } + #[inline] fn reborrow<'b, 'a: 'b>(item: Self::ReadItem<'a>) -> Self::ReadItem<'b> where Self: 'a, @@ -816,26 +840,32 @@ mod lgallocvec { } impl Storage for LgAllocVec { + #[inline] fn with_capacity(capacity: usize) -> Self { Self::with_capacity(capacity) } + #[inline] fn reserve(&mut self, additional: usize) { self.reserve(additional); } + #[inline] fn clear(&mut self) { self.clear(); } + #[inline] fn heap_size(&self, callback: F) { self.heap_size(callback); } + #[inline] fn len(&self) -> usize { self.len() } + #[inline] fn is_empty(&self) -> bool { self.is_empty() } @@ -846,14 +876,17 @@ mod lgallocvec { where Self: 'a; + #[inline] fn index(&self, index: usize) -> T { self[index] } + #[inline] fn push(&mut self, item: T) { self.push(item); } + #[inline] fn extend>(&mut self, iter: I) where I::IntoIter: ExactSizeIterator, @@ -863,12 +896,14 @@ mod lgallocvec { } } + #[inline] fn iter(&self) -> Self::Iter<'_> { self.iter().copied() } } impl Push for LgAllocVec { + #[inline] fn push(&mut self, item: T) -> Self::Index { self.push(item); MzIndex(self.len() - 1) @@ -876,6 +911,7 @@ mod lgallocvec { } impl Push<&T> for LgAllocVec { + #[inline] fn push(&mut self, item: &T) -> Self::Index { self.push(item.clone()); MzIndex(self.len() - 1) @@ -883,6 +919,7 @@ mod lgallocvec { } impl Push<&&T> for LgAllocVec { + #[inline] fn push(&mut self, item: &&T) -> Self::Index { self.push((*item).clone()); MzIndex(self.len() - 1) @@ -890,6 +927,7 @@ mod lgallocvec { } impl ReserveItems for LgAllocVec { + #[inline] fn reserve_items(&mut self, items: I) where I: Iterator + Clone, From 18f6b9b530675b4116a684c04a372973b5d95152 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Mon, 22 Jul 2024 14:22:44 -0400 Subject: [PATCH 7/8] Back out of MzTupleRegion Signed-off-by: Moritz Hoffmann --- src/compute/src/extensions/arrange.rs | 10 +- src/compute/src/logging/differential.rs | 2 +- src/compute/src/logging/initialize.rs | 27 +- src/compute/src/logging/reachability.rs | 14 +- src/compute/src/logging/timely.rs | 85 +++++- src/compute/src/typedefs.rs | 50 ++-- src/ore/src/flatcontainer.rs | 367 +++++++----------------- src/repr/src/timestamp.rs | 5 +- 8 files changed, 233 insertions(+), 327 deletions(-) diff --git a/src/compute/src/extensions/arrange.rs b/src/compute/src/extensions/arrange.rs index 949d8b4aab12f..22e46ab88ea5f 100644 --- a/src/compute/src/extensions/arrange.rs +++ b/src/compute/src/extensions/arrange.rs @@ -385,7 +385,7 @@ mod flatcontainer { use differential_dataflow::lattice::Lattice; use differential_dataflow::operators::arrange::Arranged; use differential_dataflow::trace::TraceReader; - use mz_ore::flatcontainer::{MzRegion, MzRegionPreference}; + use mz_ore::flatcontainer::{MzIndex, MzRegion, MzRegionPreference}; use timely::container::flatcontainer::{IntoOwned, Region}; use timely::dataflow::Scope; use timely::progress::Timestamp; @@ -399,10 +399,10 @@ mod flatcontainer { Self: Clone, G: Scope, G::Timestamp: Lattice + Ord + MzRegionPreference, - K: MzRegion, - V: MzRegion, - T: MzRegion, - R: MzRegion, + K: MzRegion, + V: MzRegion, + T: MzRegion, + R: MzRegion, K::Owned: Clone + Ord, V::Owned: Clone + Ord, T::Owned: Lattice + for<'a> PartialOrder<::ReadItem<'a>> + Timestamp, diff --git a/src/compute/src/logging/differential.rs b/src/compute/src/logging/differential.rs index a7074e6681d76..a92612d35c9ae 100644 --- a/src/compute/src/logging/differential.rs +++ b/src/compute/src/logging/differential.rs @@ -136,7 +136,7 @@ pub(super) fn construct( ) .as_collection(move |op, ()| { packer.pack_slice(&[ - Datum::UInt64(u64::cast_from(*op)), + Datum::UInt64(u64::cast_from(op)), Datum::UInt64(u64::cast_from(worker_id)), ]) }) diff --git a/src/compute/src/logging/initialize.rs b/src/compute/src/logging/initialize.rs index 9bf241304dcaf..715d169c7a524 100644 --- a/src/compute/src/logging/initialize.rs +++ b/src/compute/src/logging/initialize.rs @@ -10,11 +10,15 @@ use std::collections::BTreeMap; use std::rc::Rc; use std::time::{Duration, Instant}; +use crate::arrangement::manager::TraceBundle; +use crate::extensions::arrange::{KeyCollection, MzArrange}; +use crate::logging::compute::ComputeEvent; +use crate::logging::{BatchLogger, EventQueue, SharedLoggingState}; use differential_dataflow::dynamic::pointstamp::PointStamp; use differential_dataflow::logging::DifferentialEvent; use differential_dataflow::Collection; use mz_compute_client::logging::{LogVariant, LoggingConfig}; -use mz_ore::flatcontainer::{MzOffsetOptimized, MzRegionPreference, OwnedRegionOpinion}; +use mz_ore::flatcontainer::{ItemRegion, MzIndexOptimized, MzRegionPreference, OwnedRegionOpinion}; use mz_repr::{Diff, Timestamp}; use mz_storage_operators::persist_source::Subtime; use mz_storage_types::errors::DataflowError; @@ -27,11 +31,6 @@ use timely::logging::{Logger, ProgressEventTimestamp, TimelyEvent, WorkerIdentif use timely::order::Product; use timely::progress::reachability::logging::TrackerEvent; -use crate::arrangement::manager::TraceBundle; -use crate::extensions::arrange::{KeyCollection, MzArrange}; -use crate::logging::compute::ComputeEvent; -use crate::logging::{BatchLogger, EventQueue, SharedLoggingState}; - /// Initialize logging dataflows. /// /// Returns a logger for compute events, and for each `LogVariant` a trace bundle usable for @@ -87,11 +86,13 @@ type ReachabilityEventRegionPreference = ( OwnedRegionOpinion>, OwnedRegionOpinion, Diff)>>, ); -pub(super) type ReachabilityEventRegion = <( - Duration, - WorkerIdentifier, - ReachabilityEventRegionPreference, -) as MzRegionPreference>::Region; +pub(super) type ReachabilityEventRegion = ItemRegion< + <( + Duration, + WorkerIdentifier, + ReachabilityEventRegionPreference, + ) as MzRegionPreference>::Region, +>; struct LoggingContext<'a, A: Allocate> { worker: &'a mut timely::worker::Worker, @@ -100,7 +101,7 @@ struct LoggingContext<'a, A: Allocate> { now: Instant, start_offset: Duration, t_event_queue: EventQueue>, - r_event_queue: EventQueue>, + r_event_queue: EventQueue>, d_event_queue: EventQueue>, c_event_queue: EventQueue>, shared_state: Rc>, @@ -189,7 +190,7 @@ impl LoggingContext<'_, A> { fn reachability_logger(&self) -> Logger { let event_queue = self.r_event_queue.clone(); type CB = PreallocatingCapacityContainerBuilder< - FlatStack, + FlatStack, >; let mut logger = BatchLogger::::new(event_queue.link, self.interval_ms); Logger::new( diff --git a/src/compute/src/logging/reachability.rs b/src/compute/src/logging/reachability.rs index 0edfc612ba27c..76b83e7c8eeca 100644 --- a/src/compute/src/logging/reachability.rs +++ b/src/compute/src/logging/reachability.rs @@ -16,7 +16,7 @@ use std::rc::Rc; use mz_compute_client::logging::LoggingConfig; use mz_expr::{permutation_for_arrangement, MirScalarExpr}; use mz_ore::cast::CastFrom; -use mz_ore::flatcontainer::{MzOffsetOptimized, MzRegionPreference, OwnedRegionOpinion}; +use mz_ore::flatcontainer::{ItemRegion, MzIndexOptimized, MzRegionPreference, OwnedRegionOpinion}; use mz_ore::iter::IteratorExt; use mz_repr::{Datum, Diff, RowArena, SharedRow, Timestamp}; use mz_timely_util::containers::PreallocatingCapacityContainerBuilder; @@ -39,7 +39,7 @@ use crate::typedefs::{FlatKeyValSpineDefault, RowRowSpine}; pub(super) fn construct( worker: &mut timely::worker::Worker, config: &LoggingConfig, - event_queue: EventQueue>, + event_queue: EventQueue>, ) -> BTreeMap { let interval_ms = std::cmp::max(1, config.interval.as_millis()); let worker_index = worker.index(); @@ -55,10 +55,10 @@ pub(super) fn construct( usize, Option, ); - type UpdatesRegion = <((UpdatesKey, ()), Timestamp, Diff) as MzRegionPreference>::Region; + type UpdatesRegion = + ItemRegion<<((UpdatesKey, ()), Timestamp, Diff) as MzRegionPreference>::Region>; - type CB = - PreallocatingCapacityContainerBuilder>; + type CB = PreallocatingCapacityContainerBuilder>; let (updates, token) = Some(event_queue.link).mz_replay::<_, CB, _>( scope, "reachability logs", @@ -103,7 +103,7 @@ pub(super) fn construct( ); let updates = - updates.as_collection(move |(&update_type, addr, &source, &port, ts), _| { + updates.as_collection(move |(update_type, addr, source, port, ts), _| { let row_arena = RowArena::default(); let update_type = if update_type { "source" } else { "target" }; let binding = SharedRow::get(); @@ -119,7 +119,7 @@ pub(super) fn construct( Datum::UInt64(u64::cast_from(port)), Datum::UInt64(u64::cast_from(worker_index)), Datum::String(update_type), - Datum::from(ts.copied()), + Datum::from(ts), ]; row_builder.packer().extend(key.iter().map(|k| datums[*k])); let key_row = row_builder.clone(); diff --git a/src/compute/src/logging/timely.rs b/src/compute/src/logging/timely.rs index ce329ca4b0c20..8dada0159b7cd 100644 --- a/src/compute/src/logging/timely.rs +++ b/src/compute/src/logging/timely.rs @@ -16,14 +16,13 @@ use std::time::Duration; use mz_compute_client::logging::LoggingConfig; use mz_ore::cast::CastFrom; -use mz_ore::flatcontainer::{MzOffsetOptimized, MzRegionPreference, OwnedRegionOpinion}; -use mz_ore::region::LgAllocVec; +use mz_ore::flatcontainer::{ItemRegion, MzIndexOptimized, MzRegionPreference, OwnedRegionOpinion}; use mz_repr::{Datum, Diff, Timestamp}; use mz_timely_util::containers::PreallocatingCapacityContainerBuilder; use mz_timely_util::replay::MzReplay; use serde::{Deserialize, Serialize}; use timely::communication::Allocate; -use timely::container::flatcontainer::FlatStack; +use timely::container::flatcontainer::{FlatStack, IntoOwned, MirrorRegion}; use timely::container::CapacityContainerBuilder; use timely::dataflow::channels::pact::Pipeline; use timely::dataflow::channels::pushers::buffer::Session; @@ -158,7 +157,7 @@ pub(super) fn construct( ) .as_collection(move |id, name| { packer.pack_slice(&[ - Datum::UInt64(u64::cast_from(*id)), + Datum::UInt64(u64::cast_from(id)), Datum::UInt64(u64::cast_from(worker_id)), Datum::String(name), ]) @@ -191,7 +190,7 @@ pub(super) fn construct( .as_collection({ move |id, address| { packer.pack_by_index(|packer, index| match index { - 0 => packer.push(Datum::UInt64(u64::cast_from(*id))), + 0 => packer.push(Datum::UInt64(u64::cast_from(id))), 1 => packer.push(Datum::UInt64(u64::cast_from(worker_id))), 2 => packer .push_list(address.iter().map(|i| Datum::UInt64(u64::cast_from(*i)))), @@ -272,7 +271,7 @@ pub(super) fn construct( ) .as_collection(move |operator, _| { packer.pack_slice(&[ - Datum::UInt64(u64::cast_from(*operator)), + Datum::UInt64(u64::cast_from(operator)), Datum::UInt64(u64::cast_from(worker_id)), ]) }); @@ -362,7 +361,7 @@ struct MessageCount { } type FlatStackFor = - FlatStack<<(D, Timestamp, Diff) as MzRegionPreference>::Region, MzOffsetOptimized>; + FlatStack::Region>, MzIndexOptimized>; type Pusher = Counter, Tee>>; type OutputSession<'a, D> = @@ -395,7 +394,23 @@ struct ChannelDatum { impl MzRegionPreference for ChannelDatum { type Owned = Self; - type Region = LgAllocVec; + type Region = MirrorRegion; +} + +impl<'a> IntoOwned<'a> for ChannelDatum { + type Owned = Self; + + fn into_owned(self) -> Self::Owned { + self + } + + fn clone_onto(self, other: &mut Self::Owned) { + *other = self; + } + + fn borrow_as(owned: &'a Self::Owned) -> Self { + *owned + } } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] @@ -406,7 +421,23 @@ struct ParkDatum { impl MzRegionPreference for ParkDatum { type Owned = Self; - type Region = LgAllocVec; + type Region = MirrorRegion; +} + +impl<'a> IntoOwned<'a> for ParkDatum { + type Owned = Self; + + fn into_owned(self) -> Self::Owned { + self + } + + fn clone_onto(self, other: &mut Self::Owned) { + *other = self; + } + + fn borrow_as(owned: &'a Self::Owned) -> Self { + *owned + } } #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] @@ -417,7 +448,23 @@ struct MessageDatum { impl MzRegionPreference for MessageDatum { type Owned = Self; - type Region = LgAllocVec; + type Region = MirrorRegion; +} + +impl<'a> IntoOwned<'a> for MessageDatum { + type Owned = Self; + + fn into_owned(self) -> Self::Owned { + self + } + + fn clone_onto(self, other: &mut Self::Owned) { + *other = self; + } + + fn borrow_as(owned: &'a Self::Owned) -> Self { + *owned + } } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] @@ -428,7 +475,23 @@ struct ScheduleHistogramDatum { impl MzRegionPreference for ScheduleHistogramDatum { type Owned = Self; - type Region = LgAllocVec; + type Region = MirrorRegion; +} + +impl<'a> IntoOwned<'a> for ScheduleHistogramDatum { + type Owned = Self; + + fn into_owned(self) -> Self::Owned { + self + } + + fn clone_onto(self, other: &mut Self::Owned) { + *other = self; + } + + fn borrow_as(owned: &'a Self::Owned) -> Self { + *owned + } } /// Event handler of the demux operator. diff --git a/src/compute/src/typedefs.rs b/src/compute/src/typedefs.rs index 9be686d79f4a5..5076dcce3f067 100644 --- a/src/compute/src/typedefs.rs +++ b/src/compute/src/typedefs.rs @@ -11,6 +11,9 @@ #![allow(dead_code, missing_docs)] +pub use crate::row_spine::{RowRowSpine, RowSpine, RowValSpine}; +use crate::typedefs::spines::MzFlatLayout; +pub use crate::typedefs::spines::{ColKeySpine, ColValSpine}; use differential_dataflow::operators::arrange::Arranged; use differential_dataflow::operators::arrange::TraceAgent; use differential_dataflow::trace::implementations::chunker::ColumnationChunker; @@ -19,17 +22,12 @@ use differential_dataflow::trace::implementations::merge_batcher_col::Columnatio use differential_dataflow::trace::implementations::ord_neu::{FlatValSpine, OrdValBatch}; use differential_dataflow::trace::wrappers::enter::TraceEnter; use differential_dataflow::trace::wrappers::frontier::TraceFrontier; -use mz_ore::flatcontainer::{ - MzOffsetOptimized, MzRegionPreference, MzTupleABCRegion, MzTupleABRegion, -}; +use mz_ore::flatcontainer::{ItemRegion, MzIndexOptimized, MzRegionPreference}; use mz_repr::Diff; use mz_storage_types::errors::DataflowError; +use timely::container::flatcontainer::impls::tuple::{TupleABCRegion, TupleABRegion}; use timely::dataflow::ScopeParent; -pub use crate::row_spine::{RowRowSpine, RowSpine, RowValSpine}; -use crate::typedefs::spines::MzFlatLayout; -pub use crate::typedefs::spines::{ColKeySpine, ColValSpine}; - pub(crate) mod spines { use std::rc::Rc; @@ -41,7 +39,7 @@ pub(crate) mod spines { use differential_dataflow::trace::implementations::spine_fueled::Spine; use differential_dataflow::trace::implementations::{Layout, Update}; use differential_dataflow::trace::rc_blanket_impls::RcBuilder; - use mz_ore::flatcontainer::{MzOffsetOptimized, MzRegion}; + use mz_ore::flatcontainer::{MzIndex, MzIndexOptimized, MzRegion}; use mz_timely_util::containers::stack::StackWrapper; use timely::container::columnation::{Columnation, TimelyStack}; use timely::container::flatcontainer::FlatStack; @@ -91,10 +89,10 @@ pub(crate) mod spines { impl Update for MzFlatLayout where - KR: MzRegion, - VR: MzRegion, - TR: MzRegion, - RR: MzRegion, + KR: MzRegion, + VR: MzRegion, + TR: MzRegion, + RR: MzRegion, KR::Owned: Ord + Clone + 'static, VR::Owned: Ord + Clone + 'static, TR::Owned: Ord + Clone + Lattice + Timestamp + 'static, @@ -116,10 +114,10 @@ pub(crate) mod spines { /// to the optimized variant, we might be able to remove this implementation. impl Layout for MzFlatLayout where - KR: MzRegion, - VR: MzRegion, - TR: MzRegion, - RR: MzRegion, + KR: MzRegion, + VR: MzRegion, + TR: MzRegion, + RR: MzRegion, KR::Owned: Ord + Clone + 'static, VR::Owned: Ord + Clone + 'static, TR::Owned: Ord + Clone + Lattice + Timestamp + 'static, @@ -130,10 +128,10 @@ pub(crate) mod spines { for<'a> RR::ReadItem<'a>: Copy + Ord, { type Target = Self; - type KeyContainer = FlatStack; - type ValContainer = FlatStack; - type TimeContainer = FlatStack; - type DiffContainer = FlatStack; + type KeyContainer = FlatStack; + type ValContainer = FlatStack; + type TimeContainer = FlatStack; + type DiffContainer = FlatStack; type OffsetContainer = OffsetOptimized; } } @@ -184,15 +182,15 @@ pub type KeyValBatcher = MergeBatcher< pub type FlatKeyValBatch = OrdValBatch>; pub type FlatKeyValSpine = FlatValSpine< MzFlatLayout, - MzTupleABCRegion, T, R>, + ItemRegion, T, R>>, C, - MzOffsetOptimized, + MzIndexOptimized, >; pub type FlatKeyValSpineDefault = FlatKeyValSpine< - ::Region, - ::Region, - ::Region, - ::Region, + ItemRegion<::Region>, + ItemRegion<::Region>, + ItemRegion<::Region>, + ItemRegion<::Region>, C, >; pub type FlatKeyValAgent = TraceAgent>; diff --git a/src/ore/src/flatcontainer.rs b/src/ore/src/flatcontainer.rs index e03413b2a5b39..27b3ed586189b 100644 --- a/src/ore/src/flatcontainer.rs +++ b/src/ore/src/flatcontainer.rs @@ -20,8 +20,7 @@ use flatcontainer::{OptionRegion, Push, Region, ReserveItems, StringRegion}; use serde::{Deserialize, Serialize}; pub use item::ItemRegion; -pub use offset::MzOffsetOptimized; -pub use tuple::*; +pub use offset::MzIndexOptimized; /// Associate a type with a flat container region. pub trait MzRegionPreference: 'static { @@ -45,7 +44,7 @@ impl std::ops::Deref for MzIndex { /// TODO pub trait MzRegion: - Region + Region + Push<::Owned> + for<'a> Push<&'a ::Owned> + for<'a> Push<::ReadItem<'a>> @@ -56,7 +55,7 @@ pub trait MzRegion: } impl MzRegion for R where - R: Region + R: Region + Push<::Owned> + for<'a> Push<&'a ::Owned> + for<'a> Push<::ReadItem<'a>> @@ -74,276 +73,30 @@ impl MzRegion for R where pub struct OwnedRegionOpinion(std::marker::PhantomData); mod tuple { - use flatcontainer::{Index, Push, Region, ReserveItems}; + use flatcontainer::impls::tuple::*; use paste::paste; - use crate::flatcontainer::{MzIndex, MzRegion, MzRegionPreference}; + use crate::flatcontainer::MzRegionPreference; /// The macro creates the region implementation for tuples macro_rules! tuple_flatcontainer { - ($($name:ident)+) => ( - paste! { + ($($name:ident)+) => (paste! { impl<$($name: MzRegionPreference),*> MzRegionPreference for ($($name,)*) { type Owned = ($($name::Owned,)*); - type Region = []<$($name::Region,)*>; - } - - /// A region for a tuple. - #[allow(non_snake_case)] - #[derive(Default, Debug)] - #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] - pub struct []<$($name),*> { - $([]: $name),* - } - - #[allow(non_snake_case)] - impl<$($name: MzRegion),*> Clone for []<$($name),*> - where - $(<$name as Region>::Index: Index),* - { - #[inline] - fn clone(&self) -> Self { - Self { - $([]: self.[].clone(),)* - } - } - - #[inline] - fn clone_from(&mut self, source: &Self) { - $(self.[].clone_from(&source.[]);)* - } - } - - #[allow(non_snake_case)] - impl<$($name: MzRegion),*> Region for []<$($name),*> - where - $(<$name as Region>::Index: Index),* - { - type Owned = ($($name::Owned,)*); - type ReadItem<'a> = ($($name::ReadItem<'a>,)*) where Self: 'a; - - type Index = MzIndex; - - #[inline] - fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self - where - Self: 'a, - { - Self { - $([]: $name::merge_regions(regions.clone().map(|r| &r.[]))),* - } - } - - #[inline] fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { - ( - $(self.[].index(index),)* - ) - } - - #[inline(always)] - fn reserve_regions<'a, It>(&mut self, regions: It) - where - Self: 'a, - It: Iterator + Clone, - { - $(self.[].reserve_regions(regions.clone().map(|r| &r.[]));)* - } - - #[inline(always)] - fn clear(&mut self) { - $(self.[].clear();)* - } - - #[inline] - fn heap_size(&self, mut callback: Fn) { - $(self.[].heap_size(&mut callback);)* - } - - #[inline] - fn reborrow<'b, 'a: 'b>(item: Self::ReadItem<'a>) -> Self::ReadItem<'b> where Self: 'a { - let ($($name,)*) = item; - ( - $($name::reborrow($name),)* - ) - } - } - - #[allow(non_camel_case_types)] - #[allow(non_snake_case)] - impl<$($name, [<$name _C>]: MzRegion ),*> Push<($($name,)*)> for []<$([<$name _C>]),*> - where - $([<$name _C>]: Push<$name>),* - { - #[inline] - fn push(&mut self, item: ($($name,)*)) - -> <[]<$([<$name _C>]),*> as Region>::Index { - let ($($name,)*) = item; - $(let _index = self.[].push($name);)* - _index - } - } - - #[allow(non_camel_case_types)] - #[allow(non_snake_case)] - impl<'a, $($name, [<$name _C>]),*> Push<&'a ($($name,)*)> for []<$([<$name _C>]),*> - where - $([<$name _C>]: MzRegion + Push<&'a $name>),* - { - #[inline] - fn push(&mut self, item: &'a ($($name,)*)) - -> <[]<$([<$name _C>]),*> as Region>::Index { - let ($($name,)*) = item; - $(let _index = self.[].push($name);)* - _index - } - } - - #[allow(non_camel_case_types)] - #[allow(non_snake_case)] - impl<'a, $($name, [<$name _C>]),*> ReserveItems<&'a ($($name,)*)> for []<$([<$name _C>]),*> - where - $([<$name _C>]: MzRegion + ReserveItems<&'a $name>),* - { - #[inline] - fn reserve_items(&mut self, items: It) - where - It: Iterator + Clone, - { - tuple_flatcontainer!(reserve_items self items $($name)* @ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31); - } - } - - #[allow(non_camel_case_types)] - #[allow(non_snake_case)] - impl<$($name, [<$name _C>]),*> ReserveItems<($($name,)*)> for []<$([<$name _C>]),*> - where - $([<$name _C>]: MzRegion + ReserveItems<$name>),* - { - #[inline] - fn reserve_items(&mut self, items: It) - where - It: Iterator + Clone, - { - tuple_flatcontainer!(reserve_items_owned self items $($name)* @ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31); - } + type Region = []<$($name::Region,)*>; } - } - ); - (reserve_items $self:ident $items:ident $name0:ident $($name:ident)* @ $num0:tt $($num:tt)*) => { - paste! { - $self.[].reserve_items($items.clone().map(|i| &i.$num0)); - tuple_flatcontainer!(reserve_items $self $items $($name)* @ $($num)*); - } - }; - (reserve_items $self:ident $items:ident @ $($num:tt)*) => {}; - (reserve_items_owned $self:ident $items:ident $name0:ident $($name:ident)* @ $num0:tt $($num:tt)*) => { - paste! { - $self.[].reserve_items($items.clone().map(|i| i.$num0)); - tuple_flatcontainer!(reserve_items_owned $self $items $($name)* @ $($num)*); - } - }; - (reserve_items_owned $self:ident $items:ident @ $($num:tt)*) => {}; -} + }); + } tuple_flatcontainer!(A); tuple_flatcontainer!(A B); tuple_flatcontainer!(A B C); tuple_flatcontainer!(A B C D); tuple_flatcontainer!(A B C D E); - - #[cfg(feature = "differential")] - mod differential { - use differential_dataflow::difference::Semigroup; - use differential_dataflow::lattice::Lattice; - use differential_dataflow::trace::implementations::merge_batcher_flat::RegionUpdate; - use differential_dataflow::trace::implementations::Update; - use timely::progress::Timestamp; - - use crate::flatcontainer::{MzRegion, MzTupleABCRegion, MzTupleABRegion}; - - impl Update for MzTupleABCRegion, TR, RR> - where - KR: MzRegion, - KR::Owned: Clone + Ord, - for<'a> KR::ReadItem<'a>: Copy + Ord, - VR: MzRegion, - VR::Owned: Clone + Ord, - for<'a> VR::ReadItem<'a>: Copy + Ord, - TR: MzRegion, - TR::Owned: Clone + Lattice + Ord + Timestamp, - for<'a> TR::ReadItem<'a>: Copy + Ord, - RR: MzRegion, - RR::Owned: Clone + Ord + Semigroup, - for<'a> RR::ReadItem<'a>: Copy + Ord, - { - type Key = KR::Owned; - type Val = VR::Owned; - type Time = TR::Owned; - type Diff = RR::Owned; - } - - impl RegionUpdate for MzTupleABCRegion, TR, RR> - where - KR: MzRegion, - for<'a> KR::ReadItem<'a>: Copy + Ord, - VR: MzRegion, - for<'a> VR::ReadItem<'a>: Copy + Ord, - TR: MzRegion, - for<'a> TR::ReadItem<'a>: Copy + Ord, - RR: MzRegion, - for<'a> RR::ReadItem<'a>: Copy + Ord, - { - type Key<'a> = KR::ReadItem<'a> where Self: 'a; - type Val<'a> = VR::ReadItem<'a> where Self: 'a; - type Time<'a> = TR::ReadItem<'a> where Self: 'a; - type TimeOwned = TR::Owned; - type Diff<'a> = RR::ReadItem<'a> where Self: 'a; - type DiffOwned = RR::Owned; - - #[inline] - fn into_parts<'a>( - ((key, val), time, diff): Self::ReadItem<'a>, - ) -> (Self::Key<'a>, Self::Val<'a>, Self::Time<'a>, Self::Diff<'a>) { - (key, val, time, diff) - } - - #[inline] - fn reborrow_key<'b, 'a: 'b>(item: Self::Key<'a>) -> Self::Key<'b> - where - Self: 'a, - { - KR::reborrow(item) - } - - #[inline] - fn reborrow_val<'b, 'a: 'b>(item: Self::Val<'a>) -> Self::Val<'b> - where - Self: 'a, - { - VR::reborrow(item) - } - - #[inline] - fn reborrow_time<'b, 'a: 'b>(item: Self::Time<'a>) -> Self::Time<'b> - where - Self: 'a, - { - TR::reborrow(item) - } - - #[inline] - fn reborrow_diff<'b, 'a: 'b>(item: Self::Diff<'a>) -> Self::Diff<'b> - where - Self: 'a, - { - RR::reborrow(item) - } - } - } } mod copy { - use crate::region::LgAllocVec; + use flatcontainer::MirrorRegion; use crate::flatcontainer::MzRegionPreference; @@ -351,7 +104,7 @@ mod copy { ($index_type:ty) => { impl MzRegionPreference for $index_type { type Owned = Self; - type Region = LgAllocVec; + type Region = MirrorRegion; } }; } @@ -781,6 +534,78 @@ mod item { self.storage.reserve(items.count()); } } + + #[cfg(feature = "differential")] + mod differential { + use differential_dataflow::trace::implementations::merge_batcher_flat::RegionUpdate; + use differential_dataflow::trace::implementations::Update; + + use crate::flatcontainer::{ItemRegion, MzRegion}; + + impl Update for ItemRegion + where + UR: Update + MzRegion, + UR::Owned: Clone + Ord, + for<'a> UR::ReadItem<'a>: Copy + Ord, + { + type Key = UR::Key; + type Val = UR::Val; + type Time = UR::Time; + type Diff = UR::Diff; + } + + impl RegionUpdate for ItemRegion + where + UR: RegionUpdate + MzRegion, + for<'a> UR::ReadItem<'a>: Copy + Ord, + { + type Key<'a> = UR::Key<'a> where Self: 'a; + type Val<'a> = UR::Val<'a> where Self: 'a; + type Time<'a> = UR::Time<'a> where Self: 'a; + type TimeOwned = UR::TimeOwned; + type Diff<'a> = UR::Diff<'a> where Self: 'a; + type DiffOwned = UR::DiffOwned; + + #[inline] + fn into_parts<'a>( + item: Self::ReadItem<'a>, + ) -> (Self::Key<'a>, Self::Val<'a>, Self::Time<'a>, Self::Diff<'a>) { + UR::into_parts(item) + } + + #[inline] + fn reborrow_key<'b, 'a: 'b>(item: Self::Key<'a>) -> Self::Key<'b> + where + Self: 'a, + { + UR::reborrow_key(item) + } + + #[inline] + fn reborrow_val<'b, 'a: 'b>(item: Self::Val<'a>) -> Self::Val<'b> + where + Self: 'a, + { + UR::reborrow_val(item) + } + + #[inline] + fn reborrow_time<'b, 'a: 'b>(item: Self::Time<'a>) -> Self::Time<'b> + where + Self: 'a, + { + UR::reborrow_time(item) + } + + #[inline] + fn reborrow_diff<'b, 'a: 'b>(item: Self::Diff<'a>) -> Self::Diff<'b> + where + Self: 'a, + { + UR::reborrow_diff(item) + } + } + } } mod lgallocvec { @@ -967,47 +792,56 @@ mod offset { /// TODO #[derive(Default, Clone, Debug)] - pub struct MzOffsetOptimized(IndexOptimized); + pub struct MzIndexOptimized(IndexOptimized); - impl Storage for MzOffsetOptimized { + impl Storage for MzIndexOptimized { + #[inline] fn with_capacity(capacity: usize) -> Self { Self(IndexOptimized::with_capacity(capacity)) } + #[inline] fn reserve(&mut self, additional: usize) { self.0.reserve(additional) } + #[inline] fn clear(&mut self) { self.0.clear(); } + #[inline] fn heap_size(&self, callback: F) { self.0.heap_size(callback); } + #[inline] fn len(&self) -> usize { self.0.len() } + #[inline] fn is_empty(&self) -> bool { self.0.is_empty() } } - impl IndexContainer for MzOffsetOptimized { + impl IndexContainer for MzIndexOptimized { type Iter<'a> = MzOffsetOptimizedIter<>::Iter<'a>> where Self: 'a; + #[inline] fn index(&self, index: usize) -> MzIndex { MzIndex(self.0.index(index)) } + #[inline] fn push(&mut self, item: MzIndex) { self.0.push(item.0); } + #[inline] fn extend>(&mut self, iter: I) where I::IntoIter: ExactSizeIterator, @@ -1015,11 +849,21 @@ mod offset { self.0.extend(iter.into_iter().map(|item| item.0)); } + #[inline] fn iter(&self) -> Self::Iter<'_> { MzOffsetOptimizedIter(self.0.iter()) } } + impl<'a> IntoIterator for &'a MzIndexOptimized { + type Item = MzIndex; + type IntoIter = MzOffsetOptimizedIter<>::Iter<'a>>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } + } + /// TODO #[derive(Clone, Copy, Debug)] pub struct MzOffsetOptimizedIter(I); @@ -1030,6 +874,7 @@ mod offset { { type Item = MzIndex; + #[inline] fn next(&mut self) -> Option { self.0.next().map(MzIndex) } diff --git a/src/repr/src/timestamp.rs b/src/repr/src/timestamp.rs index 96549a11f3031..1461541f7daef 100644 --- a/src/repr/src/timestamp.rs +++ b/src/repr/src/timestamp.rs @@ -467,15 +467,14 @@ impl columnation::Columnation for Timestamp { } mod flatcontainer { - use flatcontainer::IntoOwned; + use flatcontainer::{IntoOwned, MirrorRegion}; use mz_ore::flatcontainer::MzRegionPreference; - use mz_ore::region::LgAllocVec; use crate::Timestamp; impl MzRegionPreference for Timestamp { type Owned = Self; - type Region = LgAllocVec; + type Region = MirrorRegion; } impl<'a> IntoOwned<'a> for Timestamp { From e7a778df5292bfb7603d8287aacf7819d9251ab3 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Tue, 23 Jul 2024 14:54:21 -0400 Subject: [PATCH 8/8] Inlines, documentation, restructure LgAllocVec creation Signed-off-by: Moritz Hoffmann --- src/ore/src/flatcontainer.rs | 8 ++- src/ore/src/region.rs | 105 +++++++++++++++++++++++++---------- 2 files changed, 82 insertions(+), 31 deletions(-) diff --git a/src/ore/src/flatcontainer.rs b/src/ore/src/flatcontainer.rs index 27b3ed586189b..e862381fadd77 100644 --- a/src/ore/src/flatcontainer.rs +++ b/src/ore/src/flatcontainer.rs @@ -142,7 +142,7 @@ mod copy { impl MzRegionPreference for String { type Owned = String; - type Region = ItemRegion>; + type Region = ConsecutiveIndexPairs; } mod vec { @@ -157,7 +157,7 @@ mod vec { impl MzRegionPreference for Option { type Owned = as Region>::Owned; - type Region = ItemRegion>; + type Region = OptionRegion; } mod lgalloc { @@ -424,7 +424,7 @@ mod item { use crate::flatcontainer::MzIndex; use crate::region::LgAllocVec; - /// TODO + /// A region that stores indexes in lgalloc. pub struct ItemRegion { inner: R, storage: LgAllocVec, @@ -518,6 +518,7 @@ mod item { } impl, T> Push for ItemRegion { + #[inline] fn push(&mut self, item: T) -> Self::Index { let index = self.inner.push(item); self.storage.push(index); @@ -526,6 +527,7 @@ mod item { } impl, T> ReserveItems for ItemRegion { + #[inline] fn reserve_items(&mut self, items: I) where I: Iterator + Clone, diff --git a/src/ore/src/region.rs b/src/ore/src/region.rs index 3fa8017a70019..c11ffc18e8f3d 100644 --- a/src/ore/src/region.rs +++ b/src/ore/src/region.rs @@ -399,6 +399,11 @@ mod vec { use std::fmt::{Debug, Formatter}; use std::mem::{ManuallyDrop, MaybeUninit}; use std::ops::Deref; + use std::sync::atomic::AtomicUsize; + + /// Configuration variable to dynamically configure the cut over point from heap to lgalloc. + /// TODO: Wire up a configuration mechanism to set this value. + static LGALLOC_VEC_HEAP_LIMIT_BYTES: AtomicUsize = AtomicUsize::new(64 << 10); /// A fixed-length region in memory, which is either allocated from heap or lgalloc. pub struct LgAllocVec { @@ -413,49 +418,70 @@ mod vec { impl LgAllocVec { /// Create a new [`LgAllocVec`] with the specified capacity. The actual capacity of the returned /// array is at least as big as the requested capacity. + #[inline] pub fn with_capacity(capacity: usize) -> Self { // Allocate memory, fall-back to regular heap allocations if we cannot acquire memory through // lgalloc. - let (handle, boxed) = if let Ok((ptr, actual_capacity, handle)) = - lgalloc::allocate::>(capacity) - { - // We allocated sucessfully through lgalloc. - let handle = Some(handle); - // SAFETY: `ptr` is valid for constructing a slice: - // 1. Valid for reading and writing, and enough capacity. - // 2. Properly initialized (left for writing). - // 3. Not aliased. - // 4. Total size not longer than isize::MAX because lgalloc has a capacity limit. - let slice = - unsafe { std::slice::from_raw_parts_mut(ptr.as_ptr(), actual_capacity) }; - // SAFETY: slice is valid, and we deallocate it usinge lgalloc. - (handle, unsafe { Box::from_raw(slice) }) + let bytes = capacity * std::mem::size_of::(); + if bytes <= LGALLOC_VEC_HEAP_LIMIT_BYTES.load(std::sync::atomic::Ordering::Relaxed) { + Self::new_heap(capacity) } else { - // We failed to allocate through lgalloc, fall back to heap. - let mut vec = Vec::with_capacity(capacity); - // SAFETY: We treat all elements as uninitialized and track initialized elements - // through `self.length`. - unsafe { - vec.set_len(vec.capacity()); + match Self::try_new_lgalloc(capacity) { + Ok(vec) => vec, + Err(_) => Self::new_heap(capacity), } - (None, vec.into_boxed_slice()) - }; + } + } + + /// Construct a new instance allocated on the heap. + #[inline] + fn new_heap(capacity: usize) -> Self { + let mut vec = Vec::with_capacity(capacity); + // SAFETY: We treat all elements as uninitialized and track initialized elements + // through `self.length`. + unsafe { + vec.set_len(vec.capacity()); + } - let elements = ManuallyDrop::new(boxed); Self { + handle: None, + elements: ManuallyDrop::new(vec.into_boxed_slice()), + length: 0, + } + } + + /// Construct a new instance allocated through lgalloc, or an error should it fail. + #[inline] + fn try_new_lgalloc(capacity: usize) -> Result { + let (ptr, actual_capacity, handle) = lgalloc::allocate::>(capacity)?; + // We allocated sucessfully through lgalloc. + let handle = Some(handle); + // SAFETY: `ptr` is valid for constructing a slice: + // 1. Valid for reading and writing, and enough capacity. + // 2. Properly initialized (left for writing). + // 3. Not aliased. + // 4. Total size not longer than isize::MAX because lgalloc has a capacity limit. + let slice = unsafe { std::slice::from_raw_parts_mut(ptr.as_ptr(), actual_capacity) }; + // SAFETY: slice is valid, and we deallocate it usinge lgalloc. + let boxed = unsafe { Box::from_raw(slice) }; + let elements = ManuallyDrop::new(boxed); + + Ok(Self { handle, elements, length: 0, - } + }) } /// Visit contained allocations to determine their size and capacity. + #[inline] pub fn heap_size(&self, mut callback: impl FnMut(usize, usize)) { let size_of_t = std::mem::size_of::(); callback(self.len() * size_of_t, self.capacity() * size_of_t) } /// Move an element on the array. Panics if there is no more capacity. + #[inline] pub fn push(&mut self, item: T) { if self.len() == self.capacity() { self.reserve(1); @@ -465,6 +491,7 @@ mod vec { } /// Extend the array from a slice. Increases the capacity if required. + #[inline] pub fn extend_from_slice(&mut self, slice: &[T]) where T: Clone, @@ -488,6 +515,7 @@ mod vec { } /// Extend the array from a slice of copyable elements. Increases the capacity if required. + #[inline] pub fn extend_from_copy_slice(&mut self, slice: &[T]) where T: Copy, @@ -507,6 +535,7 @@ mod vec { } /// Move elements from a vector to the array. Increases the capacity if required. + #[inline] pub fn append(&mut self, data: &mut Vec) { let count = data.len(); self.reserve(count); @@ -532,21 +561,25 @@ mod vec { } /// The number of elements in the array. + #[inline] pub fn len(&self) -> usize { self.length } /// Returns `true` if the array contains no elements. + #[inline] pub fn is_empty(&self) -> bool { self.len() == 0 } /// The number of elements this array can absorb. + #[inline] pub fn capacity(&self) -> usize { self.elements.len() } /// Remove all elements. Drops the contents, but leaves the allocation untouched. + #[inline] pub fn clear(&mut self) { let elems = &mut self.elements[..self.length]; // We are about to run the type's destructor, which may panic. Therefore we set the length @@ -561,6 +594,7 @@ mod vec { } } + /// The minimum capacity for a non-zero array. const MIN_NON_ZERO_CAP: usize = if std::mem::size_of::() == 1 { 8 } else if std::mem::size_of::() <= 1024 { @@ -570,18 +604,22 @@ mod vec { }; /// Grow the array to at least `new_len` elements. Reallocates the underlying storage. + #[cold] fn grow(&mut self, new_len: usize) { let new_capacity = std::cmp::max(self.capacity() * 2, new_len); let new_capacity = std::cmp::max(new_capacity, Self::MIN_NON_ZERO_CAP); - let mut new_vec = LgAllocVec::with_capacity(new_capacity); + let mut new_vec = Self::with_capacity(new_capacity); let src_ptr = self.elements.as_ptr(); let dst_ptr = new_vec.elements.as_mut_ptr(); let len = self.len(); unsafe { + // SAFETY: We forget the current contents momentarily. self.set_len(0); + // SAFETY: `src_ptr` and `dst_ptr` are valid pointers to `len` elements. std::ptr::copy_nonoverlapping(src_ptr, dst_ptr, len); + // SAFETY: Surface exactly as many elements as we just copied.. new_vec.set_len(len); } @@ -589,6 +627,7 @@ mod vec { } /// Reserve space for at least `additional` elements. The capacity is increased if necessary. + #[inline] pub fn reserve(&mut self, additional: usize) { let new_len = self.len() + additional; if new_len > self.capacity() { @@ -597,6 +636,7 @@ mod vec { } /// Iterate over the elements. + #[inline] pub fn iter(&self) -> std::slice::Iter<'_, T> { self.deref().iter() } @@ -617,14 +657,20 @@ mod vec { } impl Default for LgAllocVec { + #[inline] fn default() -> Self { - Self::with_capacity(0) + Self { + handle: None, + elements: ManuallyDrop::new(Vec::new().into_boxed_slice()), + length: 0, + } } } impl Deref for LgAllocVec { type Target = [T]; + #[inline] fn deref(&self) -> &Self::Target { // TODO: Use `slice_assume_init_ref` once stable. // Context: https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.slice_assume_init_ref @@ -641,13 +687,16 @@ mod vec { } impl Drop for LgAllocVec { + #[inline] fn drop(&mut self) { + // Clear the contents, but don't drop the allocation. self.clear(); + if let Some(handle) = self.handle.take() { - // Memory allocated through lgalloc + // Memory allocated through lgalloc, deallocate accordingly. lgalloc::deallocate(handle); } else { - // Regular allocation + // Regular heap allocation // SAFETY: `elements` is a sliced box allocated from the global allocator, drop it. unsafe { ManuallyDrop::drop(&mut self.elements);