Skip to content

Commit

Permalink
feat: find installed distributions (#52)
Browse files Browse the repository at this point in the history
A simple implementation of #7.

I'm sure there are lots of edge cases that are not covered by this code.
  • Loading branch information
baszalmstra authored Oct 19, 2023
1 parent 7896409 commit aeec00b
Show file tree
Hide file tree
Showing 67 changed files with 2,896 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ repos:
- id: codespell
args: [--ignore-words=.codespell-whitelist.txt]
exclude: Cargo.lock
exclude: 'crates/rattler_installs_packages/vendor/.*'
exclude: 'crates/rattler_installs_packages/vendor/.*|test-data/'
60 changes: 60 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion crates/rattler_installs_packages/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,11 @@ url = { version = "2.4.1", features = ["serde"] }
zip = "0.6.6"
resolvo = { version = "0.1.0", optional = true }
which = "4.4.2"
pathdiff = "0.2.1"

[dev-dependencies]
criterion = "0.5"
insta = { version = "1.33.0", features = ["ron"] }
insta = { version = "1.33.0", features = ["ron", "redactions"] }
miette = { version = "5.10.0", features = ["fancy"] }
once_cell = "1.18.0"
tokio = { version = "1.32.0", features = ["rt", "macros", "rt-multi-thread"] }
Expand Down
6 changes: 3 additions & 3 deletions crates/rattler_installs_packages/src/artifact.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ fn parse_format_metadata_and_check_version(
version_field: &str,
) -> miette::Result<RFC822ish> {
let input: &str = std::str::from_utf8(input).into_diagnostic()?;
let mut parsed = RFC822ish::parse(input)?;
let mut parsed = RFC822ish::from_str(input).into_diagnostic()?;

let version = parsed.take(version_field)?;
if !version.starts_with("1.") {
Expand Down Expand Up @@ -304,7 +304,7 @@ impl InstallPaths {
}

impl Wheel {
/// Unpacks a wheel to the given fileystem.
/// Unpacks a wheel to the given filesystem.
/// TODO: Write better docs.
/// The following functionality is still missing:
/// - Checking and writing of RECORD file
Expand Down Expand Up @@ -375,7 +375,7 @@ struct WheelPathTransformer<'a> {
/// Whether the wheel is a purelib or a platlib.
root_is_purelib: bool,

/// The location in the fileystem where to place files from the data directory.
/// The location in the filesystem where to place files from the data directory.
paths: &'a InstallPaths,
}

Expand Down
2 changes: 1 addition & 1 deletion crates/rattler_installs_packages/src/core_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ impl TryFrom<&[u8]> for WheelCoreMetadata {

fn parse_common(input: &[u8]) -> miette::Result<(PackageName, Version, RFC822ish)> {
let input = String::from_utf8_lossy(input);
let mut parsed = RFC822ish::parse(&input)?;
let mut parsed = RFC822ish::from_str(&input).into_diagnostic()?;

static NEXT_MAJOR_METADATA_VERSION: Lazy<Version> =
Lazy::new(|| Version::from_str("3").unwrap());
Expand Down
155 changes: 155 additions & 0 deletions crates/rattler_installs_packages/src/distribution_finder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
//! This module implements logic to locate so called python distributions (installed artifacts)
//! in an environment.
//!
//! The implementation is based on the <https://packaging.python.org/en/latest/specifications/recording-installed-packages>
//! which is based on [PEP 376](https://peps.python.org/pep-0376/) and [PEP 627](https://peps.python.org/pep-0627/).
use crate::{rfc822ish::RFC822ish, InstallPaths, NormalizedPackageName, PackageName};
use itertools::Itertools;
use pep440_rs::Version;
use serde::{Deserialize, Serialize};
use std::{
ffi::OsStr,
path::{Path, PathBuf},
str::FromStr,
};
use thiserror::Error;

/// Information about a distribution found by `find_distributions_in_venv`.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Distribution {
/// The name of the distribution
pub name: NormalizedPackageName,

/// The version of the distribution
pub version: Version,

/// The installer that was responsible for installing the distribution
pub installer: Option<String>,

/// The path to the .dist-info directory relative to the root of the environment.
pub dist_info: PathBuf,

/// The specific tags of the distribution that was installed or `None` if this information
/// could not be retrieved.
pub tags: Option<Vec<String>>,
}

/// An error that can occur when running `find_distributions_in_venv`.
#[derive(Debug, Error)]
pub enum FindDistributionError {
/// An IO error occurred
#[error(transparent)]
IoError(#[from] std::io::Error),

/// Failed to parse a WHEEL file
#[error("failed to parse '{0}'")]
FailedToParseWheel(PathBuf, #[source] <RFC822ish as FromStr>::Err),
}

/// Locates the python distributions (packages) that have been installed in the virtualenv rooted at
/// `root`.
pub fn find_distributions_in_venv(
root: &Path,
paths: &InstallPaths,
) -> Result<Vec<Distribution>, FindDistributionError> {
// We will look for distributions in the purelib/platlib directories
let locations = [paths.mapping.get("purelib"), paths.mapping.get("platlib")]
.into_iter()
.filter_map(|p| Some(root.join(p?)))
.unique()
.filter(|p| p.is_dir())
.collect_vec();

// Iterate over all the entries in the in the locations and look for .dist-info entries.
let mut result = Vec::new();
for location in locations {
for entry in location.read_dir()? {
let entry = entry?;
if entry.file_type()?.is_dir() {
if let Some(dist) = analyze_distribution(entry.path())? {
result.push(Distribution {
dist_info: pathdiff::diff_paths(&dist.dist_info, root)
.unwrap_or(dist.dist_info),
..dist
})
}
}
}
}

Ok(result)
}

/// Analyzes a `.dist-info` directory to see if it actually contains a python distribution (package).
fn analyze_distribution(path: PathBuf) -> Result<Option<Distribution>, FindDistributionError> {
let Some((name, version)) = path
.file_name()
.and_then(OsStr::to_str)
.and_then(|n| n.strip_suffix(".dist-info"))
.and_then(|n| n.split_once('-'))
else {
// If we are unable to parse the distribution name we just skip.
return Ok(None);
};

// Parse the name
let Ok(name) = PackageName::from_str(name) else {
// If the package name cannot be parsed, just skip
return Ok(None);
};

// Parse the version
let Ok(version) = Version::from_str(version) else {
// If the version cannot be parsed, just skip
return Ok(None);
};

// Try to read the INSTALLER file from the distribution directory
let installer = std::fs::read_to_string(path.join("INSTALLER"))
.map(|i| i.trim().to_owned())
.ok();

// Check if there is a WHEEL file from where we can read tags
let wheel_path = path.join("WHEEL");
let tags = if wheel_path.is_file() {
let mut parsed = RFC822ish::from_str(&std::fs::read_to_string(&wheel_path)?)
.map_err(move |e| FindDistributionError::FailedToParseWheel(wheel_path, e))?;
Some(parsed.take_all("Tag"))
} else {
None
};

Ok(Some(Distribution {
dist_info: path,
name: name.into(),
version,
installer,
tags,
}))
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn test_find_distributions() {
// Describe the virtual environment
let venv_path =
Path::new(env!("CARGO_MANIFEST_DIR")).join("../../test-data/find_distributions/");
let install_paths = InstallPaths::for_venv((3, 8), true);

// Find all distributions
let mut distributions = find_distributions_in_venv(&venv_path, &install_paths).unwrap();

// Sort to get consistent ordering across platforms
distributions.sort_by(|a, b| a.name.cmp(&b.name));

insta::assert_ron_snapshot!(distributions, {
"[].dist_info" => insta::dynamic_redaction(move |value, _path| {
value.as_str().unwrap().replace("\\", "/")
}),
});
}
}
3 changes: 3 additions & 0 deletions crates/rattler_installs_packages/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ pub mod fs;
mod resolve;
pub mod tags;

mod distribution_finder;

#[cfg(feature = "resolvo")]
pub use resolve::{resolve, PinnedPackage};

Expand All @@ -35,6 +37,7 @@ pub use artifact_name::{
ArtifactName, BuildTag, InnerAsArtifactName, ParseArtifactNameError, SDistFormat, SDistName,
WheelName,
};
pub use distribution_finder::{find_distributions_in_venv, Distribution, FindDistributionError};
pub use env_markers::Pep508EnvMakers;
pub use extra::Extra;
pub use package_name::{NormalizedPackageName, PackageName, ParsePackageNameError};
Expand Down
14 changes: 12 additions & 2 deletions crates/rattler_installs_packages/src/package_name.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use miette::Diagnostic;
use regex::Regex;
use serde::{Serialize, Serializer};
use serde_with::DeserializeFromStr;
use serde_with::{DeserializeFromStr, SerializeDisplay};
use std::cmp::Ordering;
use std::fmt::{Display, Formatter};
use std::hash::{Hash, Hasher};
Expand Down Expand Up @@ -104,7 +104,9 @@ impl Serialize for PackageName {
/// A normalized package name. This is a string that is guaranteed to be a valid python package string
/// this is described in [PEP 503 (Normalized Names)](https://www.python.org/dev/peps/pep-0503/#normalized-names).
#[repr(transparent)]
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[derive(
Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, SerializeDisplay, DeserializeFromStr,
)]
pub struct NormalizedPackageName(Box<str>);

impl From<PackageName> for NormalizedPackageName {
Expand Down Expand Up @@ -135,6 +137,14 @@ impl NormalizedPackageName {
}
}

impl FromStr for NormalizedPackageName {
type Err = ParsePackageNameError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(PackageName::from_str(s)?.into())
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
14 changes: 9 additions & 5 deletions crates/rattler_installs_packages/src/rfc822ish.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Implementation comes from https://github.com/njsmith/posy/blob/main/src/vocab/rfc822ish.rs
// Licensed under MIT or Apache-2.0

use miette::IntoDiagnostic;
use std::collections::HashMap;
use std::str::FromStr;

pub type Fields = HashMap<String, Vec<String>>;

Expand Down Expand Up @@ -88,10 +88,6 @@ peg::parser! {
}

impl RFC822ish {
pub fn parse(input: &str) -> miette::Result<RFC822ish> {
rfc822ish_parser::rfc822ish(input).into_diagnostic()
}

pub fn take_all(&mut self, key: &str) -> Vec<String> {
match self.fields.remove(&key.to_ascii_lowercase()) {
Some(vec) => vec,
Expand All @@ -115,3 +111,11 @@ impl RFC822ish {
}
}
}

impl FromStr for RFC822ish {
type Err = peg::error::ParseError<peg::str::LineCol>;

fn from_str(s: &str) -> Result<Self, Self::Err> {
rfc822ish_parser::rfc822ish(s)
}
}
Loading

0 comments on commit aeec00b

Please sign in to comment.