Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: speed up PrefixRecord loading #984

Merged
merged 10 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 19 additions & 18 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ tag-prefix = ""
lto = true

[workspace.dependencies]
anyhow = "1.0.92"
anyhow = "1.0.94"
archspec = "0.1.3"
assert_matches = "1.5.0"
async-compression = { version = "0.4.17", features = [
Expand All @@ -33,27 +33,27 @@ async-compression = { version = "0.4.17", features = [
"zstd",
] }
async-fd-lock = "0.2.0"
fs4 = "0.11.0"
fs4 = "0.12.0"
async-trait = "0.1.83"
axum = { version = "0.7.7", default-features = false, features = [
"tokio",
"http1",
] }
base64 = "0.22.1"
bindgen = "0.69.5"
bindgen = "0.71.1"
blake2 = "0.10.6"
bytes = "1.8.0"
bzip2 = "0.4.4"
bzip2 = "0.5.0"
cache_control = "0.2.0"
cfg-if = "1.0"
chrono = { version = "0.4.38", default-features = false, features = [
chrono = { version = "0.4.39", default-features = false, features = [
"std",
"serde",
"alloc",
] }
clap = { version = "4.5.20", features = ["derive"] }
clap = { version = "4.5.23", features = ["derive"] }
cmake = "0.1.51"
console = { version = "0.15.8", features = ["windows-console-colors"] }
console = { version = "0.15.10", features = ["windows-console-colors"] }
criterion = "0.5"
dashmap = "6.1.0"
difference = "2.0.0"
Expand All @@ -74,11 +74,11 @@ google-cloud-auth = { version = "0.17.1", default-features = false }
google-cloud-token = "0.1.2"
hex = "0.4.3"
hex-literal = "0.4.1"
http = "1.1"
http = "1.2"
http-cache-semantics = "2.1.0"
humansize = "2.1.3"
humantime = "2.1.0"
indexmap = "2.6.0"
indexmap = "2.7.0"
indicatif = "0.17.8"
insta = { version = "1.41.1" }
itertools = "0.13.0"
Expand All @@ -99,14 +99,15 @@ once_cell = "1.20.2"
ouroboros = "0.18.4"
parking_lot = "0.12.3"
pathdiff = "0.2.2"
pep440_rs = { version = "0.7.2" }
pep440_rs = { version = "0.7.3" }
pep508_rs = { version = "0.9.1" }
percent-encoding = "2.3.1"
pin-project-lite = "0.2.15"
plist = "1"
purl = { version = "0.1.3", features = ["serde"] }
quote = "1.0.37"
rand = "0.8.5"
rayon = "1.10.0"
reflink-copy = "0.1.20"
regex = "1.11.1"
reqwest = { version = "0.12.9", default-features = false }
Expand All @@ -118,7 +119,7 @@ rmp-serde = { version = "1.3.0" }
rstest = { version = "0.23.0" }
rstest_reuse = "0.7.0"
simd-json = { version = "0.14.2", features = ["serde_impl"] }
serde = { version = "1.0.214" }
serde = { version = "1.0.216" }
serde_json = { version = "1.0.132" }
serde_repr = "0.1"
serde-value = "0.7.0"
Expand All @@ -137,23 +138,23 @@ smallvec = { version = "1.13.2", features = [
strum = { version = "0.26.3", features = ["derive"] }
superslice = "1.0.0"
syn = "2.0.86"
sysinfo = "0.32.0"
sysinfo = "0.33.0"
tar = "0.4.42"
tempdir = "0.3.7"
tempfile = "3.13.0"
temp-env = "0.3.6"
test-log = "0.2.16"
thiserror = "1.0"
tokio = { version = "1.41.0", default-features = false }
tokio-stream = "0.1.16"
tokio-util = "0.7.12"
tower = { version = "0.5.1", default-features = false }
thiserror = "2.0"
tokio = { version = "1.42.0", default-features = false }
tokio-stream = "0.1.17"
tokio-util = "0.7.13"
tower = { version = "0.5.2", default-features = false }
tower-http = { version = "0.6.1", default-features = false }
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", default-features = false }
tracing-test = { version = "0.2.5" }
trybuild = { version = "1.0.101" }
typed-path = { version = "0.9.3" }
typed-path = { version = "0.10.0" }
url = { version = "2.5.2" }
uuid = { version = "1.11.0", default-features = false }
walkdir = "2.5.0"
Expand Down
2 changes: 1 addition & 1 deletion crates/rattler-bin/src/commands/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ pub async fn create(opt: Opt) -> anyhow::Result<()> {
.collect::<Result<Vec<_>, _>>()?;

// Determine the packages that are currently installed in the environment.
let installed_packages = PrefixRecord::collect_from_prefix(&target_prefix)?;
let installed_packages = PrefixRecord::collect_from_prefix::<PrefixRecord>(&target_prefix)?;

// For each channel/subdirectory combination, download and cache the
// `repodata.json` that should be available from the corresponding Url. The
Expand Down
18 changes: 12 additions & 6 deletions crates/rattler/src/install/clobber_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,8 @@ mod tests {
],
);

let prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();

for record in prefix_records {
if record.repodata_record.package_record.name.as_normalized() == "clobber-1" {
Expand Down Expand Up @@ -878,7 +879,8 @@ mod tests {

println!("== RUNNING UPDATE");

let mut prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let mut prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
prefix_records.sort_by(|a, b| {
a.repodata_record
.package_record
Expand Down Expand Up @@ -974,7 +976,8 @@ mod tests {
&["clobber.txt", "another-clobber.txt"],
);

let mut prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let mut prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
prefix_records.sort_by(|a, b| {
a.repodata_record
.package_record
Expand Down Expand Up @@ -1063,7 +1066,8 @@ mod tests {
],
);

let mut prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let mut prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
prefix_records.sort_by(|a, b| {
a.repodata_record
.package_record
Expand Down Expand Up @@ -1232,7 +1236,8 @@ mod tests {
)
.await;

let prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();

// remove one of the clobbering files
let transaction = transaction::Transaction::<PrefixRecord, RepoDataRecord> {
Expand Down Expand Up @@ -1299,7 +1304,8 @@ mod tests {
)
.await;

let prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();

// remove one of the clobbering files
let transaction = transaction::Transaction::<PrefixRecord, RepoDataRecord> {
Expand Down
3 changes: 2 additions & 1 deletion crates/rattler/src/install/link_script.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,8 @@ mod tests {
assert!(target_prefix.path().join("i-was-post-linked").exists());

// unlink the package
let prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let transaction = transaction::Transaction::<PrefixRecord, RepoDataRecord> {
operations: vec![TransactionOperation::Remove(prefix_records[0].clone())],
python_info: None,
Expand Down
2 changes: 1 addition & 1 deletion crates/rattler_cache/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ url.workspace = true
thiserror.workspace = true
reqwest-middleware.workspace = true
digest.workspace = true
fs4 = { workspace = true, features = ["fs-err-tokio"] }
fs4 = { workspace = true, features = ["fs-err3-tokio", "tokio"] }
simple_spawn_blocking = { version = "1.0.0", path = "../simple_spawn_blocking", features = ["tokio"] }

[dev-dependencies]
Expand Down
5 changes: 5 additions & 0 deletions crates/rattler_conda_types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ repository.workspace = true
license.workspace = true
readme.workspace = true

[features]
default = ["rayon"]

[dependencies]
chrono = { workspace = true }
file_url = { path = "../file_url", version = "0.2.0" }
Expand Down Expand Up @@ -39,6 +42,8 @@ url = { workspace = true, features = ["serde"] }
indexmap = { workspace = true }
rattler_redaction = { version = "0.1.4", path = "../rattler_redaction" }
dirs = { workspace = true }
rayon = { workspace = true, optional = true }
fs-err = { workspace = true }

[dev-dependencies]
rand = { workspace = true }
Expand Down
55 changes: 48 additions & 7 deletions crates/rattler_conda_types/benches/prefix_record_from_path.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,65 @@
use std::{fs, path::PathBuf};
use rattler_conda_types::RecordFromPath;
use std::{
fs::{self},
path::{Path, PathBuf},
};

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rattler_conda_types::PrefixRecord;
use rattler_conda_types::{PackageRecord, PrefixRecord};

fn process_json_files_from_dir(dir: PathBuf) {
fn process_json_files_from_dir(dir: &Path) {
let entries = fs::read_dir(dir).expect("Directory not found");

for entry in entries {
let entry = entry.expect("Unable to read entry");
let path = entry.path();

PrefixRecord::from_path(path).unwrap();
black_box(PrefixRecord::from_path(path).unwrap());
}
}

fn load_as_prefix_record(dir: &Path) -> Vec<PrefixRecord> {
black_box(PrefixRecord::collect_from_prefix::<PrefixRecord>(dir).unwrap())
}

fn load_as_package_record(dir: &Path) -> Vec<PackageRecord> {
black_box(PrefixRecord::collect_from_prefix::<PackageRecord>(dir).unwrap())
}

fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("process_json_files", |b| {
let manifest_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let test_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../test-data/conda-meta");

let mut super_long_file: PrefixRecord =
PrefixRecord::from_path(test_dir.join("tk-8.6.13-h5083fa2_1.json")).unwrap();
// duplicate data until we have 20k paths
let files = super_long_file.files.clone();
while super_long_file.files.len() < 20_000 {
super_long_file.files.extend(files.clone());
}

let tempfile = tempfile::NamedTempFile::new().unwrap();
serde_json::to_writer(&tempfile, &super_long_file).unwrap();

c.bench_function("load_prefix_record_serially", |b| {
b.iter(|| {
process_json_files_from_dir(&test_dir);
});
});
c.bench_function("load_as_prefix_record", |b| {
b.iter(|| load_as_prefix_record(&test_dir));
});
c.bench_function("load_as_package_record", |b| {
b.iter(|| load_as_package_record(&test_dir));
});

let path = tempfile.path();
c.bench_function("load_long_prefix_record", |b| {
b.iter(|| black_box(PrefixRecord::from_path(path).unwrap()));
});

c.bench_function("load_long_package_record", |b| {
b.iter(|| {
process_json_files_from_dir(black_box(manifest_dir.join("../../test-data/conda-meta")));
black_box(PackageRecord::from_path(path).unwrap());
});
});
}
Expand Down
4 changes: 2 additions & 2 deletions crates/rattler_conda_types/src/environment_yaml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ impl EnvironmentYaml {
/// Reads the contents of a file at the given path and parses it as an
/// `environment.yaml` file.
pub fn from_path(path: &Path) -> std::io::Result<Self> {
let contents = std::fs::read_to_string(path)?;
let contents = fs_err::read_to_string(path)?;
Self::from_yaml_str(&contents)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
}
Expand All @@ -103,7 +103,7 @@ impl EnvironmentYaml {

/// Write the contents of this `environment.yaml` file to the given path.
pub fn to_path(&self, path: &Path) -> std::io::Result<()> {
std::fs::write(path, self.to_yaml_string())
fs_err::write(path, self.to_yaml_string())
}

/// Converts the contents of this `environment.yaml` file to a string.
Expand Down
3 changes: 2 additions & 1 deletion crates/rattler_conda_types/src/explicit_environment_spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
//! To create an explicit environment file, you can use the `conda env export` command.

use crate::{ParsePlatformError, Platform};
use fs_err::{self as fs, File};
use serde::{Deserialize, Serialize};
use std::{fs, fs::File, io::Read, path::Path, str::FromStr};
use std::{io::Read, path::Path, str::FromStr};
use url::Url;

/// An [`ExplicitEnvironmentSpec`] represents an explicit environment specification. Packages are
Expand Down
3 changes: 2 additions & 1 deletion crates/rattler_conda_types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ pub use repo_data::{
compute_package_url,
patches::{PackageRecordPatch, PatchInstructions, RepoDataPatch},
sharded::{Shard, ShardedRepodata, ShardedSubdirInfo},
ChannelInfo, ConvertSubdirError, PackageRecord, RepoData, ValidatePackageRecordsError,
ChannelInfo, ConvertSubdirError, PackageRecord, RecordFromPath, RepoData,
ValidatePackageRecordsError,
};
pub use repo_data_record::RepoDataRecord;
pub use run_export::RunExportKind;
Expand Down
Loading
Loading