Skip to content

Commit

Permalink
feat: speed up PrefixRecord loading (#984)
Browse files Browse the repository at this point in the history
  • Loading branch information
wolfv authored Dec 16, 2024
1 parent 2f571eb commit 35658e9
Show file tree
Hide file tree
Showing 15 changed files with 159 additions and 55 deletions.
36 changes: 18 additions & 18 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ tag-prefix = ""
lto = true

[workspace.dependencies]
anyhow = "1.0.92"
anyhow = "1.0.94"
archspec = "0.1.3"
assert_matches = "1.5.0"
async-compression = { version = "0.4.17", features = [
Expand All @@ -33,27 +33,27 @@ async-compression = { version = "0.4.17", features = [
"zstd",
] }
async-fd-lock = "0.2.0"
fs4 = "0.11.0"
fs4 = "0.12.0"
async-trait = "0.1.83"
axum = { version = "0.7.7", default-features = false, features = [
"tokio",
"http1",
] }
base64 = "0.22.1"
bindgen = "0.69.5"
bindgen = "0.71.1"
blake2 = "0.10.6"
bytes = "1.8.0"
bzip2 = "0.4.4"
bzip2 = "0.5.0"
cache_control = "0.2.0"
cfg-if = "1.0"
chrono = { version = "0.4.38", default-features = false, features = [
chrono = { version = "0.4.39", default-features = false, features = [
"std",
"serde",
"alloc",
] }
clap = { version = "4.5.20", features = ["derive"] }
clap = { version = "4.5.23", features = ["derive"] }
cmake = "0.1.51"
console = { version = "0.15.8", features = ["windows-console-colors"] }
console = { version = "0.15.10", features = ["windows-console-colors"] }
criterion = "0.5"
dashmap = "6.1.0"
difference = "2.0.0"
Expand All @@ -74,11 +74,11 @@ google-cloud-auth = { version = "0.17.1", default-features = false }
google-cloud-token = "0.1.2"
hex = "0.4.3"
hex-literal = "0.4.1"
http = "1.1"
http = "1.2"
http-cache-semantics = "2.1.0"
humansize = "2.1.3"
humantime = "2.1.0"
indexmap = "2.6.0"
indexmap = "2.7.0"
indicatif = "0.17.8"
insta = { version = "1.41.1" }
itertools = "0.13.0"
Expand All @@ -99,7 +99,7 @@ once_cell = "1.20.2"
ouroboros = "0.18.4"
parking_lot = "0.12.3"
pathdiff = "0.2.2"
pep440_rs = { version = "0.7.2" }
pep440_rs = { version = "0.7.3" }
pep508_rs = { version = "0.9.1" }
percent-encoding = "2.3.1"
pin-project-lite = "0.2.15"
Expand All @@ -119,7 +119,7 @@ rmp-serde = { version = "1.3.0" }
rstest = { version = "0.23.0" }
rstest_reuse = "0.7.0"
simd-json = { version = "0.14.2", features = ["serde_impl"] }
serde = { version = "1.0.214" }
serde = { version = "1.0.216" }
serde_json = { version = "1.0.132" }
serde_repr = "0.1"
serde-value = "0.7.0"
Expand All @@ -138,23 +138,23 @@ smallvec = { version = "1.13.2", features = [
strum = { version = "0.26.3", features = ["derive"] }
superslice = "1.0.0"
syn = "2.0.86"
sysinfo = "0.32.0"
sysinfo = "0.33.0"
tar = "0.4.42"
tempdir = "0.3.7"
tempfile = "3.13.0"
temp-env = "0.3.6"
test-log = "0.2.16"
thiserror = "1.0"
tokio = { version = "1.41.0", default-features = false }
tokio-stream = "0.1.16"
tokio-util = "0.7.12"
tower = { version = "0.5.1", default-features = false }
thiserror = "2.0"
tokio = { version = "1.42.0", default-features = false }
tokio-stream = "0.1.17"
tokio-util = "0.7.13"
tower = { version = "0.5.2", default-features = false }
tower-http = { version = "0.6.1", default-features = false }
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", default-features = false }
tracing-test = { version = "0.2.5" }
trybuild = { version = "1.0.101" }
typed-path = { version = "0.9.3" }
typed-path = { version = "0.10.0" }
url = { version = "2.5.2" }
uuid = { version = "1.11.0", default-features = false }
walkdir = "2.5.0"
Expand Down
2 changes: 1 addition & 1 deletion crates/rattler-bin/src/commands/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ pub async fn create(opt: Opt) -> anyhow::Result<()> {
.collect::<Result<Vec<_>, _>>()?;

// Determine the packages that are currently installed in the environment.
let installed_packages = PrefixRecord::collect_from_prefix(&target_prefix)?;
let installed_packages = PrefixRecord::collect_from_prefix::<PrefixRecord>(&target_prefix)?;

// For each channel/subdirectory combination, download and cache the
// `repodata.json` that should be available from the corresponding Url. The
Expand Down
18 changes: 12 additions & 6 deletions crates/rattler/src/install/clobber_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,8 @@ mod tests {
],
);

let prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();

for record in prefix_records {
if record.repodata_record.package_record.name.as_normalized() == "clobber-1" {
Expand Down Expand Up @@ -878,7 +879,8 @@ mod tests {

println!("== RUNNING UPDATE");

let mut prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let mut prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
prefix_records.sort_by(|a, b| {
a.repodata_record
.package_record
Expand Down Expand Up @@ -974,7 +976,8 @@ mod tests {
&["clobber.txt", "another-clobber.txt"],
);

let mut prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let mut prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
prefix_records.sort_by(|a, b| {
a.repodata_record
.package_record
Expand Down Expand Up @@ -1063,7 +1066,8 @@ mod tests {
],
);

let mut prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let mut prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
prefix_records.sort_by(|a, b| {
a.repodata_record
.package_record
Expand Down Expand Up @@ -1232,7 +1236,8 @@ mod tests {
)
.await;

let prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();

// remove one of the clobbering files
let transaction = transaction::Transaction::<PrefixRecord, RepoDataRecord> {
Expand Down Expand Up @@ -1299,7 +1304,8 @@ mod tests {
)
.await;

let prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();

// remove one of the clobbering files
let transaction = transaction::Transaction::<PrefixRecord, RepoDataRecord> {
Expand Down
3 changes: 2 additions & 1 deletion crates/rattler/src/install/link_script.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,8 @@ mod tests {
assert!(target_prefix.path().join("i-was-post-linked").exists());

// unlink the package
let prefix_records = PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let prefix_records: Vec<PrefixRecord> =
PrefixRecord::collect_from_prefix(target_prefix.path()).unwrap();
let transaction = transaction::Transaction::<PrefixRecord, RepoDataRecord> {
operations: vec![TransactionOperation::Remove(prefix_records[0].clone())],
python_info: None,
Expand Down
2 changes: 1 addition & 1 deletion crates/rattler_cache/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ url.workspace = true
thiserror.workspace = true
reqwest-middleware.workspace = true
digest.workspace = true
fs4 = { workspace = true, features = ["fs-err-tokio"] }
fs4 = { workspace = true, features = ["fs-err3-tokio", "tokio"] }
simple_spawn_blocking = { version = "1.0.0", path = "../simple_spawn_blocking", features = ["tokio"] }
rayon = { workspace = true }

Expand Down
5 changes: 5 additions & 0 deletions crates/rattler_conda_types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ repository.workspace = true
license.workspace = true
readme.workspace = true

[features]
default = ["rayon"]

[dependencies]
chrono = { workspace = true }
file_url = { path = "../file_url", version = "0.2.0" }
Expand Down Expand Up @@ -39,6 +42,8 @@ url = { workspace = true, features = ["serde"] }
indexmap = { workspace = true }
rattler_redaction = { version = "0.1.4", path = "../rattler_redaction" }
dirs = { workspace = true }
rayon = { workspace = true, optional = true }
fs-err = { workspace = true }

[dev-dependencies]
rand = { workspace = true }
Expand Down
55 changes: 48 additions & 7 deletions crates/rattler_conda_types/benches/prefix_record_from_path.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,65 @@
use std::{fs, path::PathBuf};
use rattler_conda_types::RecordFromPath;
use std::{
fs::{self},
path::{Path, PathBuf},
};

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rattler_conda_types::PrefixRecord;
use rattler_conda_types::{PackageRecord, PrefixRecord};

fn process_json_files_from_dir(dir: PathBuf) {
fn process_json_files_from_dir(dir: &Path) {
let entries = fs::read_dir(dir).expect("Directory not found");

for entry in entries {
let entry = entry.expect("Unable to read entry");
let path = entry.path();

PrefixRecord::from_path(path).unwrap();
black_box(PrefixRecord::from_path(path).unwrap());
}
}

fn load_as_prefix_record(dir: &Path) -> Vec<PrefixRecord> {
black_box(PrefixRecord::collect_from_prefix::<PrefixRecord>(dir).unwrap())
}

fn load_as_package_record(dir: &Path) -> Vec<PackageRecord> {
black_box(PrefixRecord::collect_from_prefix::<PackageRecord>(dir).unwrap())
}

fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("process_json_files", |b| {
let manifest_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let test_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../test-data/conda-meta");

let mut super_long_file: PrefixRecord =
PrefixRecord::from_path(test_dir.join("tk-8.6.13-h5083fa2_1.json")).unwrap();
// duplicate data until we have 20k paths
let files = super_long_file.files.clone();
while super_long_file.files.len() < 20_000 {
super_long_file.files.extend(files.clone());
}

let tempfile = tempfile::NamedTempFile::new().unwrap();
serde_json::to_writer(&tempfile, &super_long_file).unwrap();

c.bench_function("load_prefix_record_serially", |b| {
b.iter(|| {
process_json_files_from_dir(&test_dir);
});
});
c.bench_function("load_as_prefix_record", |b| {
b.iter(|| load_as_prefix_record(&test_dir));
});
c.bench_function("load_as_package_record", |b| {
b.iter(|| load_as_package_record(&test_dir));
});

let path = tempfile.path();
c.bench_function("load_long_prefix_record", |b| {
b.iter(|| black_box(PrefixRecord::from_path(path).unwrap()));
});

c.bench_function("load_long_package_record", |b| {
b.iter(|| {
process_json_files_from_dir(black_box(manifest_dir.join("../../test-data/conda-meta")));
black_box(PackageRecord::from_path(path).unwrap());
});
});
}
Expand Down
4 changes: 2 additions & 2 deletions crates/rattler_conda_types/src/environment_yaml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ impl EnvironmentYaml {
/// Reads the contents of a file at the given path and parses it as an
/// `environment.yaml` file.
pub fn from_path(path: &Path) -> std::io::Result<Self> {
let contents = std::fs::read_to_string(path)?;
let contents = fs_err::read_to_string(path)?;
Self::from_yaml_str(&contents)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
}
Expand All @@ -103,7 +103,7 @@ impl EnvironmentYaml {

/// Write the contents of this `environment.yaml` file to the given path.
pub fn to_path(&self, path: &Path) -> std::io::Result<()> {
std::fs::write(path, self.to_yaml_string())
fs_err::write(path, self.to_yaml_string())
}

/// Converts the contents of this `environment.yaml` file to a string.
Expand Down
3 changes: 2 additions & 1 deletion crates/rattler_conda_types/src/explicit_environment_spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
//! To create an explicit environment file, you can use the `conda env export` command.
use crate::{ParsePlatformError, Platform};
use fs_err::{self as fs, File};
use serde::{Deserialize, Serialize};
use std::{fs, fs::File, io::Read, path::Path, str::FromStr};
use std::{io::Read, path::Path, str::FromStr};
use url::Url;

/// An [`ExplicitEnvironmentSpec`] represents an explicit environment specification. Packages are
Expand Down
3 changes: 2 additions & 1 deletion crates/rattler_conda_types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ pub use repo_data::{
compute_package_url,
patches::{PackageRecordPatch, PatchInstructions, RepoDataPatch},
sharded::{Shard, ShardedRepodata, ShardedSubdirInfo},
ChannelInfo, ConvertSubdirError, PackageRecord, RepoData, ValidatePackageRecordsError,
ChannelInfo, ConvertSubdirError, PackageRecord, RecordFromPath, RepoData,
ValidatePackageRecordsError,
};
pub use repo_data_record::RepoDataRecord;
pub use run_export::RunExportKind;
Expand Down
Loading

0 comments on commit 35658e9

Please sign in to comment.