From d7230cf138637a5de25d048b8392b26adfdefb76 Mon Sep 17 00:00:00 2001 From: Ruben Arts Date: Wed, 6 Nov 2024 11:36:54 +0100 Subject: [PATCH] perf: Quick prefix validation check (#2400) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This pr will add a hash of the lock file to the `EnvironmentFile` (`.pixi/envs/default/conda-meta/pixi`) It will look like this: ```json { "manifest_path": "/home/rarts/dev/pixi/pixi.toml", "environment_name": "default", "pixi_version": "0.34.0", "environment_lock_file_hash": "4f36ee620f10329d" } ``` And that hash will be compared with the current lockfile on `pixi run` `pixi shell` `pixi shell-hook`. ### Profile result ```shell ❯ hyperfine "pixi run echo" "old-pixi run echo" Benchmark 1: pixi run echo Time (mean ± σ): 381.6 ms ± 22.1 ms [User: 193.8 ms, System: 246.3 ms] Range (min … max): 344.5 ms … 414.3 ms 10 runs Benchmark 2: old-pixi run echo Time (mean ± σ): 868.2 ms ± 58.2 ms [User: 480.0 ms, System: 557.0 ms] Range (min … max): 791.1 ms … 950.8 ms 10 runs Summary pixi run echo ran 2.28 ± 0.20 times faster than old-pixi run echo ``` > [!NOTE] > The remaining `381ms` is the activation which is fixed by https://github.com/prefix-dev/pixi/pull/2367 ### UX - It's turned on by default - You can request a re-validate on `pixi run/shell/shell-hook` with `--revalidate` - All commands designed to update the lock file or `pixi install` will always re-validate. ### TODO: - [x] : Add tests: chosen python integration tests as I was to fed up with the extreme amount of time spent on writing tests in Rust - [x] : use Enum instead of booleans. Using `UpdateMode::QuickValidate` and `UpdateMode::Revalidate` - [x] : Document behavior - [x] : Extend logic to cli. `--revalidate` --------- Co-authored-by: Hofer-Julian <30049909+Hofer-Julian@users.noreply.github.com> --- docs/advanced/explain_info_command.md | 2 +- docs/features/environment.md | 27 +++- docs/reference/cli.md | 4 + src/cli/add.rs | 4 +- src/cli/cli_config.rs | 14 ++ src/cli/install.rs | 9 +- src/cli/project/channel/add.rs | 2 + src/cli/project/channel/remove.rs | 2 + src/cli/project/platform/add.rs | 2 + src/cli/project/platform/remove.rs | 2 + src/cli/remove.rs | 2 + src/cli/run.rs | 16 ++- src/cli/shell.rs | 2 + src/cli/shell_hook.rs | 11 +- src/environment.rs | 175 +++++++++++++++++------ src/lib.rs | 2 +- src/lock_file/mod.rs | 18 ++- src/lock_file/records_by_name.rs | 8 +- src/lock_file/update.rs | 97 ++++++++++--- tbump.toml | 4 + tests/integration_python/common.py | 8 ++ tests/integration_python/test_run_cli.py | 98 ++++++++++++- tests/integration_rust/common/mod.rs | 7 +- tests/integration_rust/install_tests.rs | 13 +- 24 files changed, 436 insertions(+), 93 deletions(-) diff --git a/docs/advanced/explain_info_command.md b/docs/advanced/explain_info_command.md index 49aa29876..dabfed2ae 100644 --- a/docs/advanced/explain_info_command.md +++ b/docs/advanced/explain_info_command.md @@ -57,7 +57,7 @@ In that case, if pixi cannot find the `__cuda` virtual package on your machine t ### Cache dir The directory where pixi stores its cache. -Checkout the [cache documentation](../features/environment.md#caching) for more information. +Checkout the [cache documentation](../features/environment.md#caching-packages) for more information. ### Auth storage diff --git a/docs/features/environment.md b/docs/features/environment.md index e99aa2d69..2e0124e67 100644 --- a/docs/features/environment.md +++ b/docs/features/environment.md @@ -40,6 +40,31 @@ These directories are conda environments, and you can use them as such, but you Pixi will always make sure the environment is in sync with the `pixi.lock` file. If this is not the case then all the commands that use the environment will automatically update the environment, e.g. `pixi run`, `pixi shell`. +### Environment Installation Metadata +On environment installation, pixi will write a small file to the environment that contains some metadata about installation. +This file is called `pixi` and is located in the `conda-meta` folder of the environment. +This file contains the following information: +- `manifest_path`: The path to the manifest file that describes the project used to create this environment +- `environment_name`: The name of the environment +- `pixi_version`: The version of pixi that was used to create this environment +- `environment_lock_file_hash`: The hash of the `pixi.lock` file that was used to create this environment + +```json +{ + "manifest_path": "/home/user/dev/pixi/pixi.toml", + "environment_name": "default", + "pixi_version": "0.34.0", + "environment_lock_file_hash": "4f36ee620f10329d" +} +``` + +The `environment_lock_file_hash` is used to check if the environment is in sync with the `pixi.lock` file. +If the hash of the `pixi.lock` file is different from the hash in the `pixi` file, pixi will update the environment. + +This is used to speedup activation, in order to trigger a full revalidation pass `--revalidate` to the `pixi run` or `pixi shell` command. +A broken environment would typically not be found with a hash comparison, but a revalidation would reinstall the environment. +By default, all lock file modifying commands will always use the revalidation and on `pixi install` it always revalidates. + ### Cleaning up If you want to clean up the environments, you can simply delete the `.pixi/envs` directory, and pixi will recreate the environments when needed. @@ -186,7 +211,7 @@ For the `[pypi-dependencies]`, `uv` implements `sdist` building to retrieve the For this building step, `pixi` requires to first install `python` in the (conda)`[dependencies]` section of the `pixi.toml` file. This will always be slower than the pure conda solves. So for the best pixi experience you should stay within the `[dependencies]` section of the `pixi.toml` file. -## Caching +## Caching packages Pixi caches all previously downloaded packages in a cache folder. This cache folder is shared between all pixi projects and globally installed tools. diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 26d970299..03e248fcd 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -208,6 +208,8 @@ You cannot run `pixi run source setup.bash` as `source` is not available in the - `--locked`: only install if the `pixi.lock` is up-to-date with the [manifest file](project_configuration.md)[^1]. It can also be controlled by the `PIXI_LOCKED` environment variable (example: `PIXI_LOCKED=true`). Conflicts with `--frozen`. - `--environment (-e)`: The environment to run the task in, if none are provided the default environment will be used or a selector will be given to select the right environment. - `--clean-env`: Run the task in a clean environment, this will remove all environment variables of the shell environment except for the ones pixi sets. THIS DOESN't WORK ON `Windows`. +- `--revalidate`: Revalidate the full environment, instead of checking the lock file hash. [more info](../features/environment.md#environment-installation-metadata) + ```shell pixi run python pixi run cowpy "Hey pixi user" @@ -636,6 +638,7 @@ To exit the pixi shell, simply run `exit`. - `--frozen`: install the environment as defined in the lock file, doesn't update `pixi.lock` if it isn't up-to-date with [manifest file](project_configuration.md). It can also be controlled by the `PIXI_FROZEN` environment variable (example: `PIXI_FROZEN=true`). - `--locked`: only install if the `pixi.lock` is up-to-date with the [manifest file](project_configuration.md)[^1]. It can also be controlled by the `PIXI_LOCKED` environment variable (example: `PIXI_LOCKED=true`). Conflicts with `--frozen`. - `--environment (-e)`: The environment to activate the shell in, if none are provided the default environment will be used or a selector will be given to select the right environment. +- `--revalidate`: Revalidate the full environment, instead of checking lock file hash. [more info](../features/environment.md#environment-installation-metadata) ```shell pixi shell @@ -664,6 +667,7 @@ This command prints the activation script of an environment. - `--environment (-e)`: The environment to activate, if none are provided the default environment will be used or a selector will be given to select the right environment. - `--json`: Print all environment variables that are exported by running the activation script as JSON. When specifying this option, `--shell` is ignored. +- `--revalidate`: Revalidate the full environment, instead of checking lock file hash. [more info](../features/environment.md#environment-installation-metadata) ```shell pixi shell-hook diff --git a/src/cli/add.rs b/src/cli/add.rs index 066a03b7d..35b4d53bd 100644 --- a/src/cli/add.rs +++ b/src/cli/add.rs @@ -22,7 +22,7 @@ use crate::{ cli::cli_config::{DependencyConfig, PrefixUpdateConfig, ProjectConfig}, environment::verify_prefix_location_unchanged, load_lock_file, - lock_file::{filter_lock_file, LockFileDerivedData, UpdateContext}, + lock_file::{filter_lock_file, LockFileDerivedData, UpdateContext, UpdateMode}, project::{grouped_environment::GroupedEnvironment, DependencyType, Project}, }; @@ -294,7 +294,7 @@ pub async fn execute(args: Args) -> miette::Result<()> { && default_environment_is_affected { updated_lock_file - .prefix(&project.default_environment()) + .prefix(&project.default_environment(), UpdateMode::Revalidate) .await?; } diff --git a/src/cli/cli_config.rs b/src/cli/cli_config.rs index e8084e245..9fe27b09e 100644 --- a/src/cli/cli_config.rs +++ b/src/cli/cli_config.rs @@ -1,5 +1,6 @@ use crate::cli::has_specs::HasSpecs; use crate::environment::LockFileUsage; +use crate::lock_file::UpdateMode; use crate::DependencyType; use crate::Project; use clap::Parser; @@ -98,6 +99,10 @@ pub struct PrefixUpdateConfig { #[clap(flatten)] pub config: ConfigCli, + + /// Run the complete environment validation. This will reinstall a broken environment. + #[arg(long)] + pub revalidate: bool, } impl PrefixUpdateConfig { pub fn lock_file_usage(&self) -> LockFileUsage { @@ -114,6 +119,15 @@ impl PrefixUpdateConfig { pub(crate) fn no_install(&self) -> bool { self.no_install || self.no_lockfile_update } + + /// Which `[UpdateMode]` to use + pub(crate) fn update_mode(&self) -> UpdateMode { + if self.revalidate { + UpdateMode::Revalidate + } else { + UpdateMode::QuickValidate + } + } } #[derive(Parser, Debug, Default)] pub struct DependencyConfig { diff --git a/src/cli/install.rs b/src/cli/install.rs index 2a0af222e..62c1aa020 100644 --- a/src/cli/install.rs +++ b/src/cli/install.rs @@ -1,5 +1,6 @@ use crate::cli::cli_config::ProjectConfig; use crate::environment::update_prefix; +use crate::lock_file::UpdateMode; use crate::Project; use clap::Parser; use fancy_display::FancyDisplay; @@ -52,7 +53,13 @@ pub async fn execute(args: Args) -> miette::Result<()> { let environment = project.environment_from_name_or_env_var(Some(env))?; // Update the prefix by installing all packages - update_prefix(&environment, args.lock_file_usage.into(), false).await?; + update_prefix( + &environment, + args.lock_file_usage.into(), + false, + UpdateMode::Revalidate, + ) + .await?; installed_envs.push(environment.name().clone()); } diff --git a/src/cli/project/channel/add.rs b/src/cli/project/channel/add.rs index 98735edc3..e4137df81 100644 --- a/src/cli/project/channel/add.rs +++ b/src/cli/project/channel/add.rs @@ -1,5 +1,6 @@ use crate::{ environment::{update_prefix, LockFileUsage}, + lock_file::UpdateMode, Project, }; @@ -16,6 +17,7 @@ pub async fn execute(mut project: Project, args: AddRemoveArgs) -> miette::Resul &project.default_environment(), LockFileUsage::Update, args.no_install, + UpdateMode::Revalidate, ) .await?; project.save()?; diff --git a/src/cli/project/channel/remove.rs b/src/cli/project/channel/remove.rs index b93b773f2..b78306981 100644 --- a/src/cli/project/channel/remove.rs +++ b/src/cli/project/channel/remove.rs @@ -1,3 +1,4 @@ +use crate::lock_file::UpdateMode; use crate::{ environment::{update_prefix, LockFileUsage}, Project, @@ -16,6 +17,7 @@ pub async fn execute(mut project: Project, args: AddRemoveArgs) -> miette::Resul &project.default_environment(), LockFileUsage::Update, args.no_install, + UpdateMode::Revalidate, ) .await?; project.save()?; diff --git a/src/cli/project/platform/add.rs b/src/cli/project/platform/add.rs index eda05acf0..79382e2eb 100644 --- a/src/cli/project/platform/add.rs +++ b/src/cli/project/platform/add.rs @@ -2,6 +2,7 @@ use std::str::FromStr; use crate::{ environment::{update_prefix, LockFileUsage}, + lock_file::UpdateMode, Project, }; use clap::Parser; @@ -48,6 +49,7 @@ pub async fn execute(mut project: Project, args: Args) -> miette::Result<()> { &project.default_environment(), LockFileUsage::Update, args.no_install, + UpdateMode::Revalidate, ) .await?; project.save()?; diff --git a/src/cli/project/platform/remove.rs b/src/cli/project/platform/remove.rs index ed63344ba..0cc22137f 100644 --- a/src/cli/project/platform/remove.rs +++ b/src/cli/project/platform/remove.rs @@ -1,5 +1,6 @@ use std::str::FromStr; +use crate::lock_file::UpdateMode; use crate::{ environment::{update_prefix, LockFileUsage}, Project, @@ -47,6 +48,7 @@ pub async fn execute(mut project: Project, args: Args) -> miette::Result<()> { &project.default_environment(), LockFileUsage::Update, args.no_install, + UpdateMode::Revalidate, ) .await?; project.save()?; diff --git a/src/cli/remove.rs b/src/cli/remove.rs index d9c1a0cff..8bea71e24 100644 --- a/src/cli/remove.rs +++ b/src/cli/remove.rs @@ -6,6 +6,7 @@ use crate::DependencyType; use crate::Project; use crate::cli::cli_config::{DependencyConfig, PrefixUpdateConfig, ProjectConfig}; +use crate::lock_file::UpdateMode; use super::has_specs::HasSpecs; @@ -82,6 +83,7 @@ pub async fn execute(args: Args) -> miette::Result<()> { &project.default_environment(), prefix_update_config.lock_file_usage(), prefix_update_config.no_install, + UpdateMode::Revalidate, ) .await?; } diff --git a/src/cli/run.rs b/src/cli/run.rs index f2841eb52..725eea267 100644 --- a/src/cli/run.rs +++ b/src/cli/run.rs @@ -1,12 +1,11 @@ -use std::collections::hash_map::Entry; -use std::collections::HashSet; -use std::convert::identity; -use std::{collections::HashMap, string::String}; - use clap::Parser; use dialoguer::theme::ColorfulTheme; use itertools::Itertools; use miette::{Diagnostic, IntoDiagnostic}; +use std::collections::hash_map::Entry; +use std::collections::HashSet; +use std::convert::identity; +use std::{collections::HashMap, string::String}; use crate::cli::cli_config::{PrefixUpdateConfig, ProjectConfig}; use crate::environment::verify_prefix_location_unchanged; @@ -169,7 +168,12 @@ pub async fn execute(args: Args) -> miette::Result<()> { Entry::Occupied(env) => env.into_mut(), Entry::Vacant(entry) => { // Ensure there is a valid prefix - lock_file.prefix(&executable_task.run_environment).await?; + lock_file + .prefix( + &executable_task.run_environment, + args.prefix_update_config.update_mode(), + ) + .await?; let command_env = get_task_env( &executable_task.run_environment, diff --git a/src/cli/shell.rs b/src/cli/shell.rs index cac1d2d61..1b8d7cbb0 100644 --- a/src/cli/shell.rs +++ b/src/cli/shell.rs @@ -9,6 +9,7 @@ use rattler_shell::{ }; use crate::cli::cli_config::{PrefixUpdateConfig, ProjectConfig}; +use crate::lock_file::UpdateMode; use crate::{ activation::CurrentEnvVarBehavior, environment::update_prefix, project::virtual_packages::verify_current_platform_has_required_virtual_packages, prompt, @@ -241,6 +242,7 @@ pub async fn execute(args: Args) -> miette::Result<()> { &environment, args.prefix_update_config.lock_file_usage(), false, + UpdateMode::QuickValidate, ) .await?; diff --git a/src/cli/shell_hook.rs b/src/cli/shell_hook.rs index 5da49c455..6236451ff 100644 --- a/src/cli/shell_hook.rs +++ b/src/cli/shell_hook.rs @@ -10,12 +10,12 @@ use rattler_shell::{ use serde::Serialize; use serde_json; -use crate::cli::cli_config::{PrefixUpdateConfig, ProjectConfig}; -use crate::project::HasProjectRef; +use crate::activation::CurrentEnvVarBehavior; +use crate::environment::update_prefix; use crate::{ - activation::{get_activator, CurrentEnvVarBehavior}, - environment::update_prefix, - project::Environment, + activation::get_activator, + cli::cli_config::{PrefixUpdateConfig, ProjectConfig}, + project::{Environment, HasProjectRef}, Project, }; @@ -113,6 +113,7 @@ pub async fn execute(args: Args) -> miette::Result<()> { &environment, args.prefix_update_config.lock_file_usage(), false, + args.prefix_update_config.update_mode(), ) .await?; diff --git a/src/environment.rs b/src/environment.rs index a5c941d20..77e2ecd9a 100644 --- a/src/environment.rs +++ b/src/environment.rs @@ -1,13 +1,14 @@ -use std::{ - collections::HashMap, - convert::identity, - io::ErrorKind, - path::{Path, PathBuf}, - sync::Arc, +use crate::{ + install_pypi, + lock_file::{UpdateLockFileOptions, UpdateMode, UvResolutionContext}, + prefix::Prefix, + project::{grouped_environment::GroupedEnvironment, Environment, HasProjectRef}, + rlimit::try_increase_rlimit_to_sensible, + Project, }; - use dialoguer::theme::ColorfulTheme; use fancy_display::FancyDisplay; +use fs_err as fs; use miette::{IntoDiagnostic, WrapErr}; use pixi_consts::consts; use pixi_manifest::{EnvironmentName, FeaturesExt, SystemRequirements}; @@ -17,20 +18,23 @@ use rattler::{ package_cache::PackageCache, }; use rattler_conda_types::{Platform, PrefixRecord, RepoDataRecord}; +use rattler_lock::Package::{Conda, Pypi}; use rattler_lock::{PypiIndexes, PypiPackageData, PypiPackageEnvironmentData}; use reqwest_middleware::ClientWithMiddleware; use serde::{Deserialize, Serialize}; +use std::hash::{Hash, Hasher}; +use std::{ + collections::HashMap, + convert::identity, + io, + io::ErrorKind, + path::{Path, PathBuf}, + sync::Arc, +}; use tokio::sync::Semaphore; use uv_distribution_types::{InstalledDist, Name}; -use crate::{ - install_pypi, - lock_file::{UpdateLockFileOptions, UvResolutionContext}, - prefix::Prefix, - project::{grouped_environment::GroupedEnvironment, Environment, HasProjectRef}, - rlimit::try_increase_rlimit_to_sensible, - Project, -}; +use xxhash_rust::xxh3::Xxh3; /// Verify the location of the prefix folder is not changed so the applied /// prefix path is still valid. Errors when there is a file system error or the @@ -102,13 +106,22 @@ async fn prefix_location_changed( } } +// Write the contents to the file at the given path. +fn write_file, C: AsRef<[u8]>>(path: P, contents: C) -> io::Result<()> { + // Verify existence of parent + if let Some(parent) = path.as_ref().parent() { + fs::create_dir_all(parent)?; + } + + fs::write(path, contents) +} + /// Create the prefix location file. /// Give it the environment path to place it. fn create_prefix_location_file(environment_dir: &Path) -> miette::Result<()> { let prefix_file_path = environment_dir .join("conda-meta") .join(consts::PREFIX_FILE_NAME); - tracing::info!("Creating prefix file at: {}", prefix_file_path.display()); let parent_dir = prefix_file_path.parent().ok_or_else(|| { miette::miette!( @@ -120,18 +133,17 @@ fn create_prefix_location_file(environment_dir: &Path) -> miette::Result<()> { if parent_dir.exists() { let contents = parent_dir.to_string_lossy(); - let path = Path::new(&prefix_file_path); // Read existing contents to determine if an update is necessary - if path.exists() { - let existing_contents = std::fs::read_to_string(path).into_diagnostic()?; + if prefix_file_path.exists() { + let existing_contents = fs::read_to_string(&prefix_file_path).into_diagnostic()?; if existing_contents == contents { tracing::info!("No update needed for the prefix file."); return Ok(()); } } - // Write new contents to the prefix file - std::fs::write(path, &*contents).into_diagnostic()?; + write_file(&prefix_file_path, contents.as_bytes()).into_diagnostic()?; + tracing::info!("Prefix file updated with: '{}'.", contents); } Ok(()) @@ -142,43 +154,80 @@ fn create_prefix_location_file(environment_dir: &Path) -> miette::Result<()> { fn create_history_file(environment_dir: &Path) -> miette::Result<()> { let history_file = environment_dir.join("conda-meta").join("history"); - tracing::info!( - "Checking if history file exists: {}", - history_file.display() - ); + tracing::info!("Verify history file exists: {}", history_file.display()); - let binding = history_file.clone(); - let parent = binding - .parent() - .ok_or_else(|| miette::miette!("cannot find parent of '{}'", binding.display()))?; + write_file( + history_file, + "// not relevant for pixi but for `conda run -p`", + ) + .into_diagnostic() +} - if parent.exists() && !history_file.exists() { - tracing::info!("Creating history file: {}", history_file.display()); - std::fs::write( - history_file, - "// not relevant for pixi but for `conda run -p`", - ) - .into_diagnostic()?; +#[derive(Debug, Hash, Serialize, Deserialize, PartialEq, Eq)] +pub struct LockedEnvironmentHash(String); +impl LockedEnvironmentHash { + pub(crate) fn from_environment( + environment: rattler_lock::Environment, + platform: Platform, + ) -> Self { + let mut hasher = Xxh3::new(); + + if let Some(packages) = environment.packages(platform) { + for package in packages { + // Always has the url or path + package + .url_or_path() + .into_owned() + .to_string() + .hash(&mut hasher); + + match package { + // A select set of fields are used to hash the package + Conda(pack) => { + if let Some(sha) = pack.package_record().sha256 { + sha.hash(&mut hasher); + } else if let Some(md5) = pack.package_record().md5 { + md5.hash(&mut hasher); + } + } + Pypi(pack) => { + pack.is_editable().hash(&mut hasher); + pack.extras().hash(&mut hasher); + } + } + } + } + + LockedEnvironmentHash(format!("{:x}", hasher.finish())) } - Ok(()) } +/// Information about the environment that was used to create the environment. #[derive(Serialize, Deserialize)] pub(crate) struct EnvironmentFile { + /// The path to the manifest file that was used to create the environment. pub(crate) manifest_path: PathBuf, + /// The name of the environment. pub(crate) environment_name: String, + /// The version of the pixi that was used to create the environment. pub(crate) pixi_version: String, + /// The hash of the lock file that was used to create the environment. + pub(crate) environment_lock_file_hash: LockedEnvironmentHash, +} + +/// The path to the environment file in the `conda-meta` directory of the environment. +fn environment_file_path(environment_dir: &Path) -> PathBuf { + environment_dir + .join(consts::CONDA_META_DIR) + .join(consts::ENVIRONMENT_FILE_NAME) } /// Write information about the environment to a file in the environment -/// directory. This can be useful for other tools that only know the environment -/// directory to find the original project. +/// directory. Used by the prefix updating to validate if it needs to be updated. pub(crate) fn write_environment_file( environment_dir: &Path, env_file: EnvironmentFile, ) -> miette::Result { - let path = environment_dir - .join("conda-meta") - .join(consts::ENVIRONMENT_FILE_NAME); + let path = environment_file_path(environment_dir); let parent = path .parent() @@ -195,6 +244,45 @@ pub(crate) fn write_environment_file( Ok(path) } +/// Reading the environment file of the environment. +/// Removing it if it's not valid. +pub(crate) fn read_environment_file( + environment_dir: &Path, +) -> miette::Result> { + let path = environment_file_path(environment_dir); + + let contents = match std::fs::read_to_string(&path) { + Ok(contents) => contents, + Err(e) if e.kind() == ErrorKind::NotFound => { + tracing::debug!("Environment file not yet found at: {:?}", path); + return Ok(None); + } + Err(e) => { + tracing::debug!( + "Failed to read environment file at: {:?}, error: {}, will try to remove it.", + path, + e + ); + let _ = std::fs::remove_file(&path); + return Err(e).into_diagnostic(); + } + }; + let env_file: EnvironmentFile = match serde_json::from_str(&contents) { + Ok(env_file) => env_file, + Err(e) => { + tracing::debug!( + "Failed to read environment file at: {:?}, error: {}, will try to remove it.", + path, + e + ); + let _ = std::fs::remove_file(&path); + return Ok(None); + } + }; + + Ok(Some(env_file)) +} + /// Runs the following checks to make sure the project is in a sane state: /// 1. It verifies that the prefix location is unchanged. /// 2. It verifies that the system requirements are met. @@ -292,6 +380,7 @@ pub async fn update_prefix( environment: &Environment<'_>, lock_file_usage: LockFileUsage, mut no_install: bool, + update_mode: UpdateMode, ) -> miette::Result<()> { let current_platform = environment.best_platform(); let project = environment.project(); @@ -316,7 +405,7 @@ pub async fn update_prefix( // Get the locked environment from the lock-file. if !no_install { - lock_file.prefix(environment).await?; + lock_file.prefix(environment, update_mode).await?; } Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 59592ea1c..5470fa23d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,7 @@ pub mod environment; mod global; mod install_pypi; mod install_wheel; -mod lock_file; +pub mod lock_file; mod prefix; mod project; mod prompt; diff --git a/src/lock_file/mod.rs b/src/lock_file/mod.rs index 01d379ba3..c70aca2e0 100644 --- a/src/lock_file/mod.rs +++ b/src/lock_file/mod.rs @@ -7,17 +7,21 @@ mod update; mod utils; use miette::{IntoDiagnostic, WrapErr}; -pub use outdated::OutdatedEnvironments; -pub use package_identifier::PypiPackageIdentifier; +pub(crate) use outdated::OutdatedEnvironments; +pub(crate) use package_identifier::PypiPackageIdentifier; use rattler_conda_types::RepoDataRecord; use rattler_lock::{LockFile, PypiPackageData, PypiPackageEnvironmentData}; -pub use records_by_name::{PypiRecordsByName, RepoDataRecordsByName}; -pub use resolve::{ +pub(crate) use records_by_name::{PypiRecordsByName, RepoDataRecordsByName}; +pub(crate) use resolve::{ conda::resolve_conda, pypi::resolve_pypi, uv_resolution_context::UvResolutionContext, }; -pub use satisfiability::{verify_environment_satisfiability, verify_platform_satisfiability}; -pub use update::{LockFileDerivedData, UpdateContext, UpdateLockFileOptions}; -pub use utils::filter_lock_file; +pub use satisfiability::{ + verify_environment_satisfiability, verify_platform_satisfiability, EnvironmentUnsat, + PlatformUnsat, +}; +pub(crate) use update::{LockFileDerivedData, UpdateContext}; +pub use update::{UpdateLockFileOptions, UpdateMode}; +pub(crate) use utils::filter_lock_file; use crate::Project; diff --git a/src/lock_file/records_by_name.rs b/src/lock_file/records_by_name.rs index 82178efea..201669ad7 100644 --- a/src/lock_file/records_by_name.rs +++ b/src/lock_file/records_by_name.rs @@ -8,8 +8,8 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; use std::hash::Hash; -pub type RepoDataRecordsByName = DependencyRecordsByName; -pub type PypiRecordsByName = DependencyRecordsByName; +pub(crate) type RepoDataRecordsByName = DependencyRecordsByName; +pub(crate) type PypiRecordsByName = DependencyRecordsByName; /// A trait required from the dependencies stored in DependencyRecordsByName pub(crate) trait HasNameVersion { @@ -49,8 +49,8 @@ impl HasNameVersion for RepoDataRecord { /// A struct that holds both a ``Vec` of `DependencyRecord` and a mapping from name to index. #[derive(Clone, Debug)] -pub struct DependencyRecordsByName { - pub records: Vec, +pub(crate) struct DependencyRecordsByName { + pub(crate) records: Vec, by_name: HashMap, } diff --git a/src/lock_file/update.rs b/src/lock_file/update.rs index ae318372f..5cdb996dd 100644 --- a/src/lock_file/update.rs +++ b/src/lock_file/update.rs @@ -1,17 +1,3 @@ -use std::{ - borrow::Cow, - collections::{HashMap, HashSet, VecDeque}, - fmt::Write, - future::{ready, Future}, - iter, - path::PathBuf, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, - time::{Duration, Instant}, -}; - use barrier_cell::BarrierCell; use fancy_display::FancyDisplay; use futures::{future::Either, stream::FuturesUnordered, FutureExt, StreamExt, TryFutureExt}; @@ -34,12 +20,27 @@ use rattler_conda_types::{Arch, Channel, MatchSpec, Platform, RepoDataRecord}; use rattler_lock::{LockFile, PypiIndexes, PypiPackageData, PypiPackageEnvironmentData}; use rattler_repodata_gateway::{Gateway, RepoData}; use rattler_solve::ChannelPriority; +use std::cmp::PartialEq; +use std::{ + borrow::Cow, + collections::{HashMap, HashSet, VecDeque}, + fmt::Write, + future::{ready, Future}, + iter, + path::PathBuf, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; use thiserror::Error; use tokio::sync::Semaphore; use tracing::Instrument; use url::Url; use uv_normalize::ExtraName; +use crate::environment::{read_environment_file, LockedEnvironmentHash}; use crate::repodata::Repodata; use crate::{ activation::CurrentEnvVarBehavior, @@ -121,6 +122,20 @@ pub struct LockFileDerivedData<'p> { pub io_concurrency_limit: IoConcurrencyLimit, } +/// The mode to use when updating a prefix. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum UpdateMode { + /// Validate if the prefix is up-to-date. + /// Using a fast and simple validation method. + /// Used for skipping the update if the prefix is already up-to-date, in activating commands. + /// Like `pixi shell` or `pixi run`. + QuickValidate, + /// Force a prefix install without running the short validation. + /// Used for updating the prefix when the lock-file likely out of date. + /// Like `pixi install` or `pixi update`. + Revalidate, +} + impl<'p> LockFileDerivedData<'p> { /// Write the lock-file to disk. pub(crate) fn write_to_disk(&self) -> miette::Result<()> { @@ -131,22 +146,71 @@ impl<'p> LockFileDerivedData<'p> { .context("failed to write lock-file to disk") } + fn locked_environment_hash( + &self, + environment: &Environment<'p>, + ) -> miette::Result { + let locked_environment = self + .lock_file + .environment(environment.name().as_str()) + .ok_or_else(|| UpdateError::LockFileMissingEnv(environment.name().clone()))?; + Ok(LockedEnvironmentHash::from_environment( + locked_environment, + environment.best_platform(), + )) + } + /// Returns the up-to-date prefix for the given environment. - pub async fn prefix(&mut self, environment: &Environment<'p>) -> miette::Result { - // Save an environment file to the environment directory + pub async fn prefix( + &mut self, + environment: &Environment<'p>, + update_mode: UpdateMode, + ) -> miette::Result { + // Check if the prefix is already up-to-date by validating the hash with the environment file + let hash = self.locked_environment_hash(environment)?; + if update_mode == UpdateMode::QuickValidate { + if let Ok(Some(environment_file)) = read_environment_file(&environment.dir()) { + if environment_file.environment_lock_file_hash == hash { + tracing::info!( + "Environment '{}' is up-to-date with lock file hash", + environment.name().fancy_display() + ); + return Ok(Prefix::new(environment.dir())); + } + } else { + tracing::debug!( + "Environment file not found or parsable for '{}'", + environment.name().fancy_display() + ); + } + } + + // Get the up-to-date prefix + let prefix = self.update_prefix(environment).await?; + + // Save an environment file to the environment directory after the update. + // Avoiding writing the cache away before the update is done. write_environment_file( &environment.dir(), EnvironmentFile { manifest_path: environment.project().manifest_path(), environment_name: environment.name().to_string(), pixi_version: consts::PIXI_VERSION.to_string(), + environment_lock_file_hash: hash, }, )?; + Ok(prefix) + } + + /// Returns the up-to-date prefix for the given environment. + async fn update_prefix(&mut self, environment: &Environment<'p>) -> miette::Result { + // If we previously updated this environment, early out. if let Some(prefix) = self.updated_pypi_prefixes.get(environment.name()) { return Ok(prefix.clone()); } + tracing::info!("Updating prefix"); // Get the prefix with the conda packages installed. let platform = environment.best_platform(); let (prefix, python_status) = self.conda_prefix(environment).await?; @@ -173,6 +237,7 @@ impl<'p> LockFileDerivedData<'p> { Some(context) => context.clone(), }; + // TODO: This can be really slow (~200ms for pixi on @ruben-arts machine). let env_variables = self .project .get_activated_environment_variables(environment, CurrentEnvVarBehavior::Exclude) diff --git a/tbump.toml b/tbump.toml index 92e62b692..6510ccee9 100644 --- a/tbump.toml +++ b/tbump.toml @@ -71,6 +71,10 @@ src = "tests/integration_python/common.py" search = "None => \"{current_version}\"," src = "crates/pixi_consts/src/consts.rs" +[[file]] +search = "\"pixi_version\": \"{current_version}\"," +src = "docs/features/environment.md" + [[field]] # the name of the field name = "candidate" diff --git a/tests/integration_python/common.py b/tests/integration_python/common.py index 47f08f855..ed127a04e 100644 --- a/tests/integration_python/common.py +++ b/tests/integration_python/common.py @@ -101,3 +101,11 @@ def is_binary(path: Path) -> bool: textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F}) with open(path, "rb") as f: return bool(f.read(2048).translate(None, textchars)) + + +def pixi_dir(project_root: Path) -> Path: + return project_root.joinpath(".pixi") + + +def default_env_path(project_root: Path) -> Path: + return pixi_dir(project_root).joinpath("envs", "default") diff --git a/tests/integration_python/test_run_cli.py b/tests/integration_python/test_run_cli.py index b26abe7e4..7fbc3d848 100644 --- a/tests/integration_python/test_run_cli.py +++ b/tests/integration_python/test_run_cli.py @@ -1,5 +1,5 @@ from pathlib import Path -from .common import verify_cli_command, ExitCode +from .common import verify_cli_command, ExitCode, default_env_path ALL_PLATFORMS = '["linux-64", "osx-64", "win-64", "linux-ppc64le", "linux-aarch64"]' @@ -57,3 +57,99 @@ def test_run_in_shell(pixi: Path, tmp_path: Path) -> None: stdout_contains=["a", "a1"], env=env, ) + + +def test_using_prefix_validation(pixi: Path, tmp_path: Path, dummy_channel_1: str) -> None: + manifest = tmp_path.joinpath("pixi.toml") + toml = f""" + [project] + name = "test" + channels = ["{dummy_channel_1}"] + platforms = ["linux-64", "osx-64", "osx-arm64", "win-64"] + + [dependencies] + dummy-a = "*" + """ + manifest.write_text(toml) + + # Run the install + verify_cli_command( + [pixi, "install", "--manifest-path", manifest], + ) + + # Validate creation of the pixi file with the hash + pixi_file = default_env_path(tmp_path).joinpath("conda-meta").joinpath("pixi") + assert pixi_file.exists() + assert "environment_lock_file_hash" in pixi_file.read_text() + + # Break environment on purpose + dummy_a_meta_files = default_env_path(tmp_path).joinpath("conda-meta").glob("dummy-a*.json") + + for file in dummy_a_meta_files: + path = Path(file) + if path.exists(): + path.unlink() # Removes the file + + # Run simple script, which shouldn't reinstall + verify_cli_command( + [pixi, "run", "--manifest-path", manifest, "echo", "hello"], + stdout_contains="hello", + ) + + # Validate that the dummy-a files still don't exist + for file in dummy_a_meta_files: + assert not Path(file).exists() + + # Run an actual re-install + verify_cli_command( + [pixi, "install", "--manifest-path", manifest], + ) + + # Validate the files are back + for file in dummy_a_meta_files: + # All dummy-a files should be back as `install` will ignore the hash + assert Path(file).exists() + + +def test_prefix_revalidation(pixi: Path, tmp_path: Path, dummy_channel_1: str) -> None: + manifest = tmp_path.joinpath("pixi.toml") + toml = f""" + [project] + name = "test" + channels = ["{dummy_channel_1}"] + platforms = ["linux-64", "osx-64", "osx-arm64", "win-64"] + + [dependencies] + dummy-a = "*" + """ + manifest.write_text(toml) + + # Run the installation + verify_cli_command( + [pixi, "install", "--manifest-path", manifest], + ExitCode.SUCCESS, + ) + + # Validate creation of the pixi file with the hash + pixi_file = default_env_path(tmp_path).joinpath("conda-meta").joinpath("pixi") + assert pixi_file.exists() + assert "environment_lock_file_hash" in pixi_file.read_text() + + # Break environment on purpose + dummy_a_meta_files = default_env_path(tmp_path).joinpath("conda-meta").glob("dummy-a*.json") + + for file in dummy_a_meta_files: + path = Path(file) + if path.exists(): + path.unlink() # Removes the file + + # Run with revalidation to force reinstallation + verify_cli_command( + [pixi, "run", "--manifest-path", manifest, "--revalidate", "echo", "hello"], + ExitCode.SUCCESS, + stdout_contains="hello", + ) + + # Validate that the dummy-a files are reinstalled + for file in dummy_a_meta_files: + assert Path(file).exists() diff --git a/tests/integration_rust/common/mod.rs b/tests/integration_rust/common/mod.rs index bcf635412..54f60842d 100644 --- a/tests/integration_rust/common/mod.rs +++ b/tests/integration_rust/common/mod.rs @@ -12,6 +12,7 @@ use std::{ use indicatif::ProgressDrawTarget; use miette::{Context, Diagnostic, IntoDiagnostic}; +use pixi::lock_file::UpdateMode; use pixi::{ cli::{ add, @@ -304,6 +305,7 @@ impl PixiControl { no_install: true, lock_file_usage: LockFileUsageArgs::default(), config: Default::default(), + revalidate: false, }, editable: false, }, @@ -323,6 +325,7 @@ impl PixiControl { no_install: true, lock_file_usage: LockFileUsageArgs::default(), config: Default::default(), + revalidate: false, }, }, } @@ -418,7 +421,9 @@ impl PixiControl { // Construct the task environment if not already created. let task_env = match task_env.as_ref() { None => { - lock_file.prefix(&task.run_environment).await?; + lock_file + .prefix(&task.run_environment, UpdateMode::Revalidate) + .await?; let env = get_task_env(&task.run_environment, args.clean_env).await?; task_env.insert(env) } diff --git a/tests/integration_rust/install_tests.rs b/tests/integration_rust/install_tests.rs index d99bb3bb8..9cf48484e 100644 --- a/tests/integration_rust/install_tests.rs +++ b/tests/integration_rust/install_tests.rs @@ -6,6 +6,7 @@ use crate::common::{LockFileExt, PixiControl}; use pixi::cli::cli_config::{PrefixUpdateConfig, ProjectConfig}; use pixi::cli::{run, run::Args, LockFileUsageArgs}; use pixi::environment::LockFileUsage; +use pixi::lock_file::UpdateMode; use pixi::Project; use pixi_config::{Config, DetachedEnvironments}; use pixi_consts::consts; @@ -445,7 +446,6 @@ async fn test_channels_changed() { } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] -#[cfg_attr(not(feature = "slow_integration_tests"), ignore)] async fn install_conda_meta_history() { let pixi = PixiControl::new().unwrap(); pixi.init().await.unwrap(); @@ -569,9 +569,14 @@ async fn test_old_lock_install() { "tests/data/satisfiability/old_lock_file/pyproject.toml", )) .unwrap(); - pixi::environment::update_prefix(&project.default_environment(), LockFileUsage::Update, false) - .await - .unwrap(); + pixi::environment::update_prefix( + &project.default_environment(), + LockFileUsage::Update, + false, + UpdateMode::Revalidate, + ) + .await + .unwrap(); assert_eq!( lock_str, std::fs::read_to_string("tests/data/satisfiability/old_lock_file/pixi.lock").unwrap()