Skip to content

Commit

Permalink
Switch KeyValSpine to flatcontainer
Browse files Browse the repository at this point in the history
Signed-off-by: Moritz Hoffmann <[email protected]>
  • Loading branch information
antiguru committed Jul 8, 2024
1 parent bcf8277 commit 4dd1017
Show file tree
Hide file tree
Showing 18 changed files with 826 additions and 203 deletions.
20 changes: 11 additions & 9 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 9 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,22 @@ debug = 2
# tend to get rewritten or disappear (e.g., because a PR is force pushed or gets
# merged), after which point it becomes impossible to build that historical
# version of Materialize.
[patch."https://github.com/TimelyDataflow/timely-dataflow"]
# Projects that do not reliably release to crates.io.
timely = { git = "https://github.com/MaterializeInc/timely-dataflow.git" }
timely_bytes = { git = "https://github.com/MaterializeInc/timely-dataflow.git" }
timely_communication = { git = "https://github.com/MaterializeInc/timely-dataflow.git" }
timely_container = { git = "https://github.com/MaterializeInc/timely-dataflow.git" }
timely_logging = { git = "https://github.com/MaterializeInc/timely-dataflow.git" }
[patch.crates-io]
# Projects that do not reliably release to crates.io.
timely = { git = "https://github.com/MaterializeInc/timely-dataflow.git" }
timely_bytes = { git = "https://github.com/MaterializeInc/timely-dataflow.git" }
timely_communication = { git = "https://github.com/MaterializeInc/timely-dataflow.git" }
timely_container = { git = "https://github.com/MaterializeInc/timely-dataflow.git" }
timely_logging = { git = "https://github.com/MaterializeInc/timely-dataflow.git" }
differential-dataflow = { git = "https://github.com/MaterializeInc/differential-dataflow.git" }
dogsdogsdogs = { git = "https://github.com/MaterializeInc/differential-dataflow.git" }
differential-dataflow = { git = "https://github.com/antiguru/differential-dataflow.git", branch = "consolidate_layout_merger_chunk" }
dogsdogsdogs = { git = "https://github.com/antiguru/differential-dataflow.git", branch = "consolidate_layout_merger_chunk" }

# Waiting on https://github.com/sfackler/rust-postgres/pull/752.
postgres = { git = "https://github.com/MaterializeInc/rust-postgres" }
Expand Down
3 changes: 1 addition & 2 deletions misc/cargo-vet/audits.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# cargo-vet audits file

[criteria.maintained-and-necessary]
Expand Down Expand Up @@ -281,7 +280,7 @@ version = "23.5.26"
[[audits.flatcontainer]]
who = "Moritz Hoffmann <[email protected]>"
criteria = "safe-to-deploy"
version = "0.4.1"
version = "0.5.0"

[[audits.fluent-uri]]
who = "Nikhil Benesch <[email protected]>"
Expand Down
3 changes: 2 additions & 1 deletion src/cluster/src/communication.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
use std::any::Any;
use std::cmp::Ordering;
use std::fmt::Display;
use std::sync::Arc;
use std::time::Duration;

use anyhow::Context;
Expand Down Expand Up @@ -109,7 +110,7 @@ where
}
}

match initialize_networking_from_sockets(sockets, process, workers, Box::new(|_| None)) {
match initialize_networking_from_sockets(sockets, process, workers, Arc::new(|_| None)) {
Ok((stuff, guard)) => {
info!(process = process, "successfully initialized network");
Ok((
Expand Down
2 changes: 1 addition & 1 deletion src/compute/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ mz-compute-types = { path = "../compute-types" }
mz-dyncfg = { path = "../dyncfg" }
mz-dyncfgs = { path = "../dyncfgs" }
mz-expr = { path = "../expr" }
mz-ore = { path = "../ore", features = ["async", "flatcontainer", "process", "tracing_"] }
mz-ore = { path = "../ore", features = ["async", "differential", "flatcontainer", "process", "tracing_"] }
mz-persist-client = { path = "../persist-client" }
mz-persist-types = { path = "../persist-types" }
mz-repr = { path = "../repr" }
Expand Down
65 changes: 7 additions & 58 deletions src/compute/src/extensions/arrange.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use timely::progress::Timestamp;
use timely::Container;

use crate::logging::compute::ComputeEvent;
use crate::typedefs::{KeyAgent, KeyValAgent, RowAgent, RowRowAgent, RowValAgent};
use crate::typedefs::{KeyAgent, RowAgent, RowRowAgent, RowValAgent};

/// Extension trait to arrange data.
pub trait MzArrange: MzArrangeCore
Expand Down Expand Up @@ -270,36 +270,6 @@ where
}
}

impl<G, K, V, T, R> ArrangementSize for Arranged<G, KeyValAgent<K, V, T, R>>
where
G: Scope<Timestamp = T>,
G::Timestamp: Lattice + Ord + Columnation,
K: Data + Columnation,
V: Data + Columnation,
T: Lattice + Timestamp,
R: Semigroup + Ord + Columnation + 'static,
{
fn log_arrangement_size(self) -> Self {
log_arrangement_size_inner(self, |trace| {
let (mut size, mut capacity, mut allocations) = (0, 0, 0);
let mut callback = |siz, cap| {
size += siz;
capacity += cap;
allocations += usize::from(cap > 0);
};
trace.map_batches(|batch| {
batch.storage.keys.heap_size(&mut callback);
batch.storage.keys_offs.heap_size(&mut callback);
batch.storage.vals.heap_size(&mut callback);
batch.storage.vals_offs.heap_size(&mut callback);
batch.storage.times.heap_size(&mut callback);
batch.storage.diffs.heap_size(&mut callback);
});
(size, capacity, allocations)
})
}
}

impl<G, K, T, R> ArrangementSize for Arranged<G, KeyAgent<K, T, R>>
where
G: Scope<Timestamp = T>,
Expand Down Expand Up @@ -415,8 +385,8 @@ mod flatcontainer {
use differential_dataflow::lattice::Lattice;
use differential_dataflow::operators::arrange::Arranged;
use differential_dataflow::trace::TraceReader;
use mz_ore::flatcontainer::MzRegionPreference;
use timely::container::flatcontainer::{IntoOwned, Push, Region, ReserveItems};
use mz_ore::flatcontainer::{MzRegion, MzRegionPreference};
use timely::container::flatcontainer::{IntoOwned, Region};
use timely::dataflow::Scope;
use timely::progress::Timestamp;
use timely::PartialOrder;
Expand All @@ -429,31 +399,10 @@ mod flatcontainer {
Self: Clone,
G: Scope<Timestamp = T::Owned>,
G::Timestamp: Lattice + Ord + MzRegionPreference,
K: Region
+ Clone
+ Push<<K as Region>::Owned>
+ for<'a> Push<<K as Region>::ReadItem<'a>>
+ for<'a> ReserveItems<<K as Region>::ReadItem<'a>>
+ 'static,
V: Region
+ Clone
+ Push<<V as Region>::Owned>
+ for<'a> Push<<V as Region>::ReadItem<'a>>
+ for<'a> ReserveItems<<V as Region>::ReadItem<'a>>
+ 'static,
T: Region
+ Clone
+ Push<<T as Region>::Owned>
+ for<'a> Push<<T as Region>::ReadItem<'a>>
+ for<'a> ReserveItems<<T as Region>::ReadItem<'a>>
+ 'static,
R: Region
+ Clone
+ Push<<R as Region>::Owned>
+ for<'a> Push<&'a <R as Region>::Owned>
+ for<'a> Push<<R as Region>::ReadItem<'a>>
+ for<'a> ReserveItems<<R as Region>::ReadItem<'a>>
+ 'static,
K: MzRegion,
V: MzRegion,
T: MzRegion,
R: MzRegion,
K::Owned: Clone + Ord,
V::Owned: Clone + Ord,
T::Owned: Lattice + for<'a> PartialOrder<<T as Region>::ReadItem<'a>> + Timestamp,
Expand Down
2 changes: 1 addition & 1 deletion src/compute/src/logging/differential.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ pub(super) fn construct<A: Allocate>(
let stream_to_collection = |input: Stream<_, ((usize, ()), Timestamp, Diff)>, log, name| {
let mut packer = PermutedRowPacker::new(log);
input
.mz_arrange_core::<_, KeyValSpine<_, _, _, _>>(
.mz_arrange_core::<_, KeyValSpine<usize, (), Timestamp, Diff, _>>(
Pipeline,
&format!("PreArrange Differential {name}"),
)
Expand Down
7 changes: 3 additions & 4 deletions src/compute/src/logging/initialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use mz_compute_client::logging::{LogVariant, LoggingConfig};
use mz_ore::flatcontainer::{MzRegionPreference, OwnedRegionOpinion};
use mz_repr::{Diff, Timestamp};
use mz_storage_types::errors::DataflowError;
use mz_timely_util::containers::PreallocatingCapacityContainerBuilder;
use mz_timely_util::operator::CollectionExt;
use timely::communication::Allocate;
use timely::container::flatcontainer::FlatStack;
Expand Down Expand Up @@ -184,10 +185,8 @@ impl<A: Allocate + 'static> LoggingContext<'_, A> {

fn reachability_logger(&self) -> Logger<TrackerEvent> {
let event_queue = self.r_event_queue.clone();
let mut logger = BatchLogger::<
CapacityContainerBuilder<FlatStack<ReachabilityEventRegion>>,
_,
>::new(event_queue.link, self.interval_ms);
type CB = PreallocatingCapacityContainerBuilder<FlatStack<ReachabilityEventRegion>>;
let mut logger = BatchLogger::<CB, _>::new(event_queue.link, self.interval_ms);
Logger::new(
self.now,
self.start_offset,
Expand Down
8 changes: 4 additions & 4 deletions src/compute/src/logging/reachability.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ use mz_ore::cast::CastFrom;
use mz_ore::flatcontainer::{MzRegionPreference, OwnedRegionOpinion};
use mz_ore::iter::IteratorExt;
use mz_repr::{Datum, Diff, RowArena, SharedRow, Timestamp};
use mz_timely_util::containers::PreallocatingCapacityContainerBuilder;
use mz_timely_util::replay::MzReplay;
use timely::communication::Allocate;
use timely::container::flatcontainer::FlatStack;
use timely::container::CapacityContainerBuilder;
use timely::dataflow::channels::pact::Pipeline;

use crate::extensions::arrange::{MzArrange, MzArrangeCore};
Expand Down Expand Up @@ -57,7 +57,7 @@ pub(super) fn construct<A: Allocate>(
);
type UpdatesRegion = <((UpdatesKey, ()), Timestamp, Diff) as MzRegionPreference>::Region;

type CB = CapacityContainerBuilder<FlatStack<UpdatesRegion>>;
type CB = PreallocatingCapacityContainerBuilder<FlatStack<UpdatesRegion>>;
let (updates, token) = Some(event_queue.link).mz_replay::<_, CB, _>(
scope,
"reachability logs",
Expand Down Expand Up @@ -102,7 +102,7 @@ pub(super) fn construct<A: Allocate>(
);

let updates =
updates.as_collection(move |(update_type, addr, source, port, ts), _| {
updates.as_collection(move |(&update_type, addr, &source, &port, ts), _| {
let row_arena = RowArena::default();
let update_type = if update_type { "source" } else { "target" };
let binding = SharedRow::get();
Expand All @@ -118,7 +118,7 @@ pub(super) fn construct<A: Allocate>(
Datum::UInt64(u64::cast_from(port)),
Datum::UInt64(u64::cast_from(worker_index)),
Datum::String(update_type),
Datum::from(ts.clone()),
Datum::from(ts.copied()),
];
row_builder.packer().extend(key.iter().map(|k| datums[*k]));
let key_row = row_builder.clone();
Expand Down
Loading

0 comments on commit 4dd1017

Please sign in to comment.