From d839d7043dadd6f5f8c224931c2176b099be08fd Mon Sep 17 00:00:00 2001 From: konstin Date: Thu, 12 Dec 2024 16:12:14 +0100 Subject: [PATCH 1/4] Return and track affected and culprit on conflicts Whenever we either discard a version due to its dependencies or perform conflict resolution, we return the last conflict that led to discarding them. In cargo, we use this information for prioritization, which speeds up resolution (`cargo run -r -- -m pub --with-solana --filter solana-archiver-lib -t 16` goes from 90s to 20s on my machine). Configurations that are noticeably slower for the solana test case: * All incompatibilities unit propagation * Only the last root cause in unit propagation * No incompatibilities from unit propagation * No incompatibilities from `add_version` * Only affect counts (without culprit counts) * Backtracking with the same heuristic as https://github.com/astral-sh/uv/pull/9843 (backtracking once after affected hits 5) In uv, we use this to re-prioritize and backtrack when a package decision accumulated to many conflicts. Since we have our own solver loop, we add the incompatibility to our own tracking instead. Built on https://github.com/pubgrub-rs/pubgrub/pull/291 ## Benchmarks Main: ``` index commit hash: 82086e46740d7a9303216bfac093e7268a95121f index commit time: 2024-11-30T18:18:14Z index size: 32 solana in index: 32 Pub CPU time: 1215.49s == 20.26min Cargo CPU time: skipped Cargo check lock CPU time: skipped Pub check lock CPU time: skipped Wall time: 80.58s == 1.34min ``` With https://github.com/pubgrub-rs/pubgrub/pull/291: ``` index commit hash: 82086e46740d7a9303216bfac093e7268a95121f index commit time: 2024-11-30T18:18:14Z index size: 32 solana in index: 32 Pub CPU time: 467.73s == 7.80min Cargo CPU time: skipped Cargo check lock CPU time: skipped Pub check lock CPU time: skipped Wall time: 34.76s == 0.58min ``` This PR: ``` index commit hash: 82086e46740d7a9303216bfac093e7268a95121f index commit time: 2024-11-30T18:18:14Z index size: 32 solana in index: 32 Pub CPU time: 271.79s == 4.53min Cargo CPU time: skipped Cargo check lock CPU time: skipped Pub check lock CPU time: skipped Wall time: 20.17s == 0.34min ``` --- examples/caching_dependency_provider.rs | 15 ++++- src/internal/core.rs | 10 +++- src/internal/partial_solution.rs | 67 ++++++++++++++-------- src/lib.rs | 6 +- src/provider.rs | 23 +++++--- src/solver.rs | 76 +++++++++++++++++++++++-- tests/proptest.rs | 22 +++++-- 7 files changed, 168 insertions(+), 51 deletions(-) diff --git a/examples/caching_dependency_provider.rs b/examples/caching_dependency_provider.rs index f6798a29..e9728902 100644 --- a/examples/caching_dependency_provider.rs +++ b/examples/caching_dependency_provider.rs @@ -2,7 +2,10 @@ use std::cell::RefCell; -use pubgrub::{resolve, Dependencies, DependencyProvider, OfflineDependencyProvider, Ranges}; +use pubgrub::{ + resolve, Dependencies, DependencyProvider, OfflineDependencyProvider, + PackageResolutionStatistics, Ranges, +}; type NumVS = Ranges; @@ -57,8 +60,14 @@ impl> DependencyProvider for CachingDependenc type Priority = DP::Priority; - fn prioritize(&self, package: &DP::P, ranges: &DP::VS) -> Self::Priority { - self.remote_dependencies.prioritize(package, ranges) + fn prioritize( + &self, + package: &Self::P, + range: &Self::VS, + package_statistics: &PackageResolutionStatistics, + ) -> Self::Priority { + self.remote_dependencies + .prioritize(package, range, package_statistics) } type Err = DP::Err; diff --git a/src/internal/core.rs b/src/internal/core.rs index 6f70d04c..51de3c65 100644 --- a/src/internal/core.rs +++ b/src/internal/core.rs @@ -106,11 +106,16 @@ impl State { /// Unit propagation is the core mechanism of the solving algorithm. /// CF + /// + /// For each package with a satisfied incompatibility, returns the package and the root cause + /// incompatibility. #[cold] + #[allow(clippy::type_complexity)] // Type definitions don't support impl trait. pub(crate) fn unit_propagation( &mut self, package: Id, - ) -> Result<(), NoSolutionError> { + ) -> Result, IncompDpId)>, NoSolutionError> { + let mut root_causes = Vec::new(); self.unit_propagation_buffer.clear(); self.unit_propagation_buffer.push(package); while let Some(current_package) = self.unit_propagation_buffer.pop() { @@ -168,6 +173,7 @@ impl State { .map_err(|terminal_incompat_id| { self.build_derivation_tree(terminal_incompat_id) })?; + root_causes.push((package, root_cause)); self.unit_propagation_buffer.clear(); self.unit_propagation_buffer.push(package_almost); // Add to the partial solution with incompat as cause. @@ -183,7 +189,7 @@ impl State { } } // If there are no more changed packages, unit propagation is done. - Ok(()) + Ok(root_causes) } /// Return the root cause or the terminal incompatibility. diff --git a/src/internal/partial_solution.rs b/src/internal/partial_solution.rs index a00b70d4..a8405756 100644 --- a/src/internal/partial_solution.rs +++ b/src/internal/partial_solution.rs @@ -268,7 +268,7 @@ impl PartialSolution { #[cold] pub(crate) fn pick_highest_priority_pkg( &mut self, - prioritizer: impl Fn(Id, &DP::VS) -> DP::Priority, + mut prioritizer: impl FnMut(Id, &DP::VS) -> DP::Priority, ) -> Option> { let check_all = self.changed_this_decision_level == self.current_decision_level.0.saturating_sub(1) as usize; @@ -304,7 +304,21 @@ impl PartialSolution { .map(|(&p, pa)| match &pa.assignments_intersection { AssignmentsIntersection::Decision((_, v, _)) => (p, v.clone()), AssignmentsIntersection::Derivations(_) => { - panic!("Derivations in the Decision part") + let mut context = String::new(); + for (id, assignment) in self + .package_assignments + .iter() + .take(self.current_decision_level.0 as usize) + { + context.push_str(&format!( + " * {:?} {:?}\n", + id, assignment.assignments_intersection + )); + } + panic!( + "Derivations in the Decision part. Decision level {}\n{}", + self.current_decision_level.0, context + ) } }) } @@ -362,34 +376,39 @@ impl PartialSolution { version: DP::V, new_incompatibilities: std::ops::Range>, store: &Arena>, - ) { + ) -> Option> { if !self.has_ever_backtracked { - // Nothing has yet gone wrong during this resolution. This call is unlikely to be the first problem. + // Fast path: Nothing has yet gone wrong during this resolution. This call is unlikely to be the first problem. // So let's live with a little bit of risk and add the decision without checking the dependencies. // The worst that can happen is we will have to do a full backtrack which only removes this one decision. log::info!("add_decision: {package:?} @ {version} without checking dependencies"); self.add_decision(package, version); + return None; + } + + // Check if any of the dependencies preclude deciding on this crate version. + let package_term = Term::exact(version.clone()); + let relation = |incompat: IncompId| { + store[incompat].relation(|p| { + // The current package isn't part of the package assignments yet. + if p == package { + Some(&package_term) + } else { + self.term_intersection_for_package(p) + } + }) + }; + if let Some(satisfied) = Id::range_to_iter(new_incompatibilities) + .find(|incompat| relation(*incompat) == Relation::Satisfied) + { + log::info!( + "rejecting decision {package:?} @ {version} because its dependencies conflict" + ); + Some(satisfied) } else { - // Check if any of the new dependencies preclude deciding on this crate version. - let exact = Term::exact(version.clone()); - let not_satisfied = |incompat: &Incompatibility| { - incompat.relation(|p| { - if p == package { - Some(&exact) - } else { - self.term_intersection_for_package(p) - } - }) != Relation::Satisfied - }; - - // Check none of the dependencies (new_incompatibilities) - // would create a conflict (be satisfied). - if store[new_incompatibilities].iter().all(not_satisfied) { - log::info!("add_decision: {package:?} @ {version}"); - self.add_decision(package, version); - } else { - log::info!("not adding {package:?} @ {version} because of its dependencies",); - } + log::info!("adding decision: {package:?} @ {version}"); + self.add_decision(package, version); + None } } diff --git a/src/lib.rs b/src/lib.rs index cc1c943f..1d906c27 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,7 +71,7 @@ //! and [SemanticVersion] for versions. //! This may be done quite easily by implementing the three following functions. //! ``` -//! # use pubgrub::{DependencyProvider, Dependencies, SemanticVersion, Ranges, DependencyConstraints, Map}; +//! # use pubgrub::{DependencyProvider, Dependencies, SemanticVersion, Ranges, DependencyConstraints, Map, PackageResolutionStatistics}; //! # use std::error::Error; //! # use std::borrow::Borrow; //! # use std::convert::Infallible; @@ -86,7 +86,7 @@ //! } //! //! type Priority = usize; -//! fn prioritize(&self, package: &String, range: &SemVS) -> Self::Priority { +//! fn prioritize(&self, package: &String, range: &SemVS, conflicts_counts: &PackageResolutionStatistics) -> Self::Priority { //! unimplemented!() //! } //! @@ -227,7 +227,7 @@ pub use report::{ DefaultStringReportFormatter, DefaultStringReporter, DerivationTree, Derived, External, ReportFormatter, Reporter, }; -pub use solver::{resolve, Dependencies, DependencyProvider}; +pub use solver::{resolve, Dependencies, DependencyProvider, PackageResolutionStatistics}; pub use term::Term; pub use type_aliases::{DependencyConstraints, Map, SelectedDependencies, Set}; pub use version::{SemanticVersion, VersionParseError}; diff --git a/src/provider.rs b/src/provider.rs index 83268795..aad87175 100644 --- a/src/provider.rs +++ b/src/provider.rs @@ -2,6 +2,7 @@ use std::cmp::Reverse; use std::collections::BTreeMap; use std::convert::Infallible; +use crate::solver::PackageResolutionStatistics; use crate::{Dependencies, DependencyConstraints, DependencyProvider, Map, Package, VersionSet}; /// A basic implementation of [DependencyProvider]. @@ -92,15 +93,23 @@ impl DependencyProvider for OfflineDependencyProvide .and_then(|versions| versions.keys().rev().find(|v| range.contains(v)).cloned())) } - type Priority = Reverse; + type Priority = (u32, Reverse); #[inline] - fn prioritize(&self, package: &P, range: &VS) -> Self::Priority { - Reverse( - self.dependencies - .get(package) - .map(|versions| versions.keys().filter(|v| range.contains(v)).count()) - .unwrap_or(0), + fn prioritize( + &self, + package: &Self::P, + range: &Self::VS, + package_statistics: &PackageResolutionStatistics, + ) -> Self::Priority { + ( + package_statistics.conflict_count(), + Reverse( + self.dependencies + .get(package) + .map(|versions| versions.keys().filter(|v| range.contains(v)).count()) + .unwrap_or(0), + ), ) } diff --git a/src/solver.rs b/src/solver.rs index a8aff4bf..ea6c129b 100644 --- a/src/solver.rs +++ b/src/solver.rs @@ -68,6 +68,31 @@ use log::{debug, info}; use crate::internal::{Id, Incompatibility, State}; use crate::{DependencyConstraints, Map, Package, PubGrubError, SelectedDependencies, VersionSet}; +/// Statistics on how often a package conflicted with other packages. +#[derive(Debug, Default, Clone)] +pub struct PackageResolutionStatistics { + unit_propagation_affected: u32, + unit_propagation_culprit: u32, + dependencies_affected: u32, + dependencies_culprit: u32, +} + +impl PackageResolutionStatistics { + /// The number of conflicts this package was involved in. + /// + /// Processing packages with a high conflict count earlier usually speeds up resolution. + /// + /// Whenever a package is part of the root cause incompatibility of a conflict, we increase its + /// count by one. Since the structure of the incompatibilities may change, this count too may + /// change in the future. + pub fn conflict_count(&self) -> u32 { + self.unit_propagation_affected + + self.unit_propagation_culprit + + self.dependencies_affected + + self.dependencies_culprit + } +} + /// Main function of the library. /// Finds a set of packages satisfying dependency bounds for a given package + version pair. #[cold] @@ -77,6 +102,7 @@ pub fn resolve( version: impl Into, ) -> Result, PubGrubError> { let mut state: State = State::init(package.clone(), version.into()); + let mut conflict_tracker: Map, PackageResolutionStatistics> = Map::default(); let mut added_dependencies: Map, Set> = Map::default(); let mut next = state.root_package; loop { @@ -88,7 +114,22 @@ pub fn resolve( "unit_propagation: {:?} = '{}'", &next, state.package_store[next] ); - state.unit_propagation(next)?; + let root_causes = state.unit_propagation(next)?; + for (affected, incompat) in root_causes { + conflict_tracker + .entry(affected) + .or_default() + .unit_propagation_affected += 1; + for (conflict_package, _) in state.incompatibility_store[incompat].iter() { + if conflict_package == affected { + continue; + } + conflict_tracker + .entry(conflict_package) + .or_default() + .unit_propagation_culprit += 1; + } + } debug!( "Partial solution after unit propagation: {}", @@ -97,7 +138,11 @@ pub fn resolve( let Some(highest_priority_pkg) = state.partial_solution.pick_highest_priority_pkg(|p, r| { - dependency_provider.prioritize(&state.package_store[p], r) + dependency_provider.prioritize( + &state.package_store[p], + r, + conflict_tracker.entry(p).or_default(), + ) }) else { return Ok(state @@ -174,9 +219,23 @@ pub fn resolve( let dep_incompats = state.add_incompatibility_from_dependencies(p, v.clone(), dependencies); - state - .partial_solution - .add_version(p, v, dep_incompats, &state.incompatibility_store); + if let Some(conflict) = state.partial_solution.add_version( + p, + v, + dep_incompats, + &state.incompatibility_store, + ) { + conflict_tracker.entry(p).or_default().dependencies_affected += 1; + for (incompat_package, _) in state.incompatibility_store[conflict].iter() { + if incompat_package == p { + continue; + } + conflict_tracker + .entry(incompat_package) + .or_default() + .unit_propagation_culprit += 1; + } + } } else { // `dep_incompats` are already in `incompatibilities` so we know there are not satisfied // terms and can add the decision directly. @@ -254,7 +313,12 @@ pub trait DependencyProvider { /// /// Note: the resolver may call this even when the range has not changed, /// if it is more efficient for the resolvers internal data structures. - fn prioritize(&self, package: &Self::P, range: &Self::VS) -> Self::Priority; + fn prioritize( + &self, + package: &Self::P, + range: &Self::VS, + package_conflicts_counts: &PackageResolutionStatistics, + ) -> Self::Priority; /// The type returned from `prioritize`. The resolver does not care what type this is /// as long as it can pick a largest one and clone it. /// diff --git a/tests/proptest.rs b/tests/proptest.rs index 65d4753b..74305aad 100644 --- a/tests/proptest.rs +++ b/tests/proptest.rs @@ -13,8 +13,8 @@ use proptest::string::string_regex; use pubgrub::{ resolve, DefaultStringReporter, Dependencies, DependencyProvider, DerivationTree, External, - OfflineDependencyProvider, Package, PubGrubError, Ranges, Reporter, SelectedDependencies, - VersionSet, + OfflineDependencyProvider, Package, PackageResolutionStatistics, PubGrubError, Ranges, + Reporter, SelectedDependencies, VersionSet, }; use crate::sat_dependency_provider::SatResolve; @@ -49,8 +49,13 @@ impl DependencyProvider for OldestVersionsDependency type Priority = as DependencyProvider>::Priority; - fn prioritize(&self, package: &P, range: &VS) -> Self::Priority { - self.0.prioritize(package, range) + fn prioritize( + &self, + package: &Self::P, + range: &Self::VS, + package_statistics: &PackageResolutionStatistics, + ) -> Self::Priority { + self.0.prioritize(package, range, package_statistics) } type Err = Infallible; @@ -104,8 +109,13 @@ impl DependencyProvider for TimeoutDependencyProvider Self::Priority { - self.dp.prioritize(package, range) + fn prioritize( + &self, + package: &Self::P, + range: &Self::VS, + package_statistics: &PackageResolutionStatistics, + ) -> Self::Priority { + self.dp.prioritize(package, range, package_statistics) } type Err = DP::Err; From 087bcdf506c070b3538b84c1281d094d60358e40 Mon Sep 17 00:00:00 2001 From: konstin Date: Mon, 16 Dec 2024 16:43:50 +0100 Subject: [PATCH 2/4] Use smallvec for root causes --- src/internal/core.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/internal/core.rs b/src/internal/core.rs index 51de3c65..9ef59746 100644 --- a/src/internal/core.rs +++ b/src/internal/core.rs @@ -114,8 +114,8 @@ impl State { pub(crate) fn unit_propagation( &mut self, package: Id, - ) -> Result, IncompDpId)>, NoSolutionError> { - let mut root_causes = Vec::new(); + ) -> Result, IncompDpId)>, NoSolutionError> { + let mut root_causes = SmallVec::default(); self.unit_propagation_buffer.clear(); self.unit_propagation_buffer.push(package); while let Some(current_package) = self.unit_propagation_buffer.pop() { From 541efb5d4ec6ee0dc8497f113915412a909b68ff Mon Sep 17 00:00:00 2001 From: konstin Date: Wed, 18 Dec 2024 20:30:05 +0100 Subject: [PATCH 3/4] Add more docs --- src/internal/partial_solution.rs | 1 + src/solver.rs | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/src/internal/partial_solution.rs b/src/internal/partial_solution.rs index a8405756..97ee5cfe 100644 --- a/src/internal/partial_solution.rs +++ b/src/internal/partial_solution.rs @@ -304,6 +304,7 @@ impl PartialSolution { .map(|(&p, pa)| match &pa.assignments_intersection { AssignmentsIntersection::Decision((_, v, _)) => (p, v.clone()), AssignmentsIntersection::Derivations(_) => { + // The invariant on the order in `self.package_assignments` was broken. let mut context = String::new(); for (id, assignment) in self .package_assignments diff --git a/src/solver.rs b/src/solver.rs index ea6c129b..562f6d23 100644 --- a/src/solver.rs +++ b/src/solver.rs @@ -71,6 +71,18 @@ use crate::{DependencyConstraints, Map, Package, PubGrubError, SelectedDependenc /// Statistics on how often a package conflicted with other packages. #[derive(Debug, Default, Clone)] pub struct PackageResolutionStatistics { + // We track these fields separately but currently don't expose them separately to keep the + // stable API slim. Please be encouraged to try different combinations of them and report if + // you find better metrics that should be exposed. + // + // Say we have packages A and B, A having higher priority than B. We first decide A and then B, + // and then find B to conflict with A. We call be B "affected" and A "culprit" since the + // decisions for B is being rejected due to the decision we made for A earlier. + // + // If B is rejected due to its dependencies conflicting with A, we increase + // `dependencies_affected` for B and for `dependencies_culprit` A. If B is rejected in unit + // through an incompatibility with B, we increase `unit_propagation_affected` for B and for + // `unit_propagation_culprit` A. unit_propagation_affected: u32, unit_propagation_culprit: u32, dependencies_affected: u32, From ec324e88c68d7326c529b7c24171cbb9d8ab4bf1 Mon Sep 17 00:00:00 2001 From: konstin Date: Wed, 18 Dec 2024 20:30:08 +0100 Subject: [PATCH 4/4] Review --- src/solver.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solver.rs b/src/solver.rs index 562f6d23..e6ba6249 100644 --- a/src/solver.rs +++ b/src/solver.rs @@ -245,7 +245,7 @@ pub fn resolve( conflict_tracker .entry(incompat_package) .or_default() - .unit_propagation_culprit += 1; + .dependencies_culprit += 1; } } } else {