Skip to content

Commit

Permalink
Make topological sort fully general. (#1138)
Browse files Browse the repository at this point in the history
* Make topological sort fully general.

* Detect missing provenance.

* Never gonna underflow.

---------

Co-authored-by: porcuquine <[email protected]>
  • Loading branch information
porcuquine and porcuquine authored Feb 17, 2024
1 parent 6621d24 commit f2d70f4
Showing 1 changed file with 49 additions and 20 deletions.
69 changes: 49 additions & 20 deletions src/coroutine/memoset/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -617,40 +617,57 @@ impl<F: LurkField, Q: Query<F>> Scope<Q, LogMemo<F>, F> {
// }

fn compute_provenances(&self, store: &Store<F>) -> HashMap<ZPtr<Tag, F>, ZPtr<Tag, F>> {
let base_cases = self
let mut provenances = HashMap::default();
let mut ready = HashSet::new();

let mut missing_dependency_counts: HashMap<&Ptr, usize> = self
.queries
.keys()
.filter(|key| {
if let Some(deps) = self.dependencies.get(key) {
deps.is_empty()
} else {
true
.map(|key| {
let dep_count = self.dependencies.get(key).map_or(0, |x| x.len());

if dep_count == 0 {
// Queries are ready if they have no missing dependencies.
// Initially, this will be the base cases -- which have no dependencies.
ready.insert(key);
}
(key, dep_count)
})
.collect::<HashSet<_>>();
let mut todo = base_cases;
let mut provenances = HashMap::default();

while !todo.is_empty() {
todo = self.extend_provenances(store, &mut provenances, todo);
.collect();

while !ready.is_empty() {
ready = self.extend_provenances(
store,
&mut provenances,
ready,
&mut missing_dependency_counts,
);
}

assert_eq!(
self.queries.len(),
provenances.len(),
"incomplete provenance computation (probably a forbidden cyclic query)"
);

provenances
.iter()
.map(|(k, v)| (store.hash_ptr(k), store.hash_ptr(v)))
.collect()
}

fn extend_provenances(
&self,
fn extend_provenances<'a>(
&'a self,
store: &Store<F>,
provenances: &mut HashMap<Ptr, Ptr>,
todo: HashSet<&Ptr>,
ready: HashSet<&Ptr>,
missing_dependency_counts: &mut HashMap<&'a Ptr, usize>,
) -> HashSet<&Ptr> {
let mut next = HashSet::new();

for query in todo.into_iter() {
for query in ready.into_iter() {
if provenances.get(query).is_some() {
// Skip if already complete. This should not happen if called by `compute_provenances` when computing
// all provenances from scratch, but it could happen if we compute more incrementally in the future.
continue;
};

Expand All @@ -669,9 +686,21 @@ impl<F: LurkField, Q: Query<F>> Scope<Q, LogMemo<F>, F> {

if let Some(dependents) = self.dependents.get(query) {
for dependent in dependents {
if provenances.get(dependent).is_none() {
next.insert(dependent);
}
missing_dependency_counts
.entry(dependent)
.and_modify(|missing_count| {
// NOTE: A query only becomes the `dependent` here when one of its dependencies is
// processed. Any query with `missing_count` 0 has no unprocessed dependencies to trigger
// the following update. Therefore, the underflow guarded against below should never occur
// if the implicit topological sort worked correctly. Any failure suggests the algorithm has
// been broken accidentally.
*missing_count = missing_count
.checked_sub(1)
.expect("topological sort has been broken; a dependency was processed out of order");
if *missing_count == 0 {
next.insert(dependent);
}
});
}
};

Expand Down

1 comment on commit f2d70f4

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmarks

Table of Contents

Overview

This benchmark report shows the Fibonacci GPU benchmark.
NVIDIA L4
Intel(R) Xeon(R) CPU @ 2.20GHz
32 vCPUs
125 GB RAM
Workflow run: https://github.com/lurk-lab/lurk-rs/actions/runs/7937978133

Benchmark Results

LEM Fibonacci Prove - rc = 100

ref=6621d24b3c7a9a30e4d806164aac98d7bfa97f35 ref=f2d70f4f7c295cde6f07ae391f601a0d49c3965e
num-100 1.45 s (✅ 1.00x) 1.45 s (✅ 1.00x faster)
num-200 2.78 s (✅ 1.00x) 2.77 s (✅ 1.00x faster)

LEM Fibonacci Prove - rc = 600

ref=6621d24b3c7a9a30e4d806164aac98d7bfa97f35 ref=f2d70f4f7c295cde6f07ae391f601a0d49c3965e
num-100 1.84 s (✅ 1.00x) 1.84 s (✅ 1.00x faster)
num-200 3.06 s (✅ 1.00x) 3.04 s (✅ 1.01x faster)

Made with criterion-table

Please sign in to comment.