Skip to content

Commit

Permalink
feat: switch to compressed mapping (prefix-dev#1335)
Browse files Browse the repository at this point in the history
  • Loading branch information
nichmor authored May 16, 2024
1 parent 070d0e4 commit 66ce6cf
Show file tree
Hide file tree
Showing 10 changed files with 1,998 additions and 913 deletions.
1,504 changes: 1,139 additions & 365 deletions examples/conda_mapping/pixi.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions examples/conda_mapping/pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ python = "~=3.11.0"
scipy = "~=1.11.4"
boltons = "*"
jupyter-ros = { version = "*", channel = "robostack" }
jupyter-amphion = {version = "*", channel = "robostack"}

[pypi-dependencies]
black = { version = "~=23.10", extras = ["jupyter"] }
Expand Down
3 changes: 2 additions & 1 deletion examples/conda_mapping/robostack_mapping.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{
"jupyter-ros": "my-name-from-mapping"
"jupyter-ros": "my-name-from-mapping",
"jupyter-amphion": null
}
34 changes: 28 additions & 6 deletions examples/conda_mapping/test_conda_mapping.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,40 @@
import yaml

# This test verify if we generate right purls for our packages
# We use one remote mapping for conda-forge channel
# and one local mapping for robostack channel

PACKAGE_NAME_TO_TEST = {"boltons": "my-boltons-name", "jupyter-ros": "my-name-from-mapping"}

# For packages that are present in local-mapping
# we verify if source=project-defined-mapping qualifier is present in purl
# so purl should look like this:
# pkg:pypi/my-boltons-name?source=project-defined-mapping

PACKAGE_NAME_TO_TEST = {
"boltons": "my-boltons-name?source=project-defined-mapping",
"jupyter-ros": "my-name-from-mapping?source=project-defined-mapping"
}



# We test if having a null for conda name
# will mark a conda package as not a pypi package
# and will not add any purls for it
# "jupyter-amphion": null
PACKAGE_NAME_SHOULD_BE_NULL = ("jupyter-amphion",)

if __name__ == "__main__":
# this will test if we map correctly our packages
# we have one remote mapping for conda-forge
# and one local mapping for robostack

if __name__ == "__main__":
with open("pixi.lock") as pixi_lock:
lock = yaml.safe_load(pixi_lock)

expected_packages = [
package for package in lock["packages"] if package["name"] in PACKAGE_NAME_TO_TEST
]

assert len(expected_packages) == 2
expected_null_packages = [
package for package in lock["packages"] if package["name"] in PACKAGE_NAME_SHOULD_BE_NULL
]

for package in expected_packages:
package_name = package["name"]
Expand All @@ -29,3 +47,7 @@
expected_purl = f"pkg:pypi/{PACKAGE_NAME_TO_TEST[package_name]}"

assert purls[0] == expected_purl


for package in expected_null_packages:
assert "purls" not in package
958 changes: 501 additions & 457 deletions examples/pypi/pixi.lock

Large diffs are not rendered by default.

38 changes: 4 additions & 34 deletions src/lock_file/package_identifier.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{project::manifest::python::PyPiPackageName, pypi_mapping};
use crate::project::manifest::python::PyPiPackageName;
use pep508_rs::{Requirement, VersionOrUrl};
use rattler_conda_types::{PackageUrl, RepoDataRecord};
use std::{collections::HashSet, str::FromStr};
Expand Down Expand Up @@ -32,51 +32,21 @@ impl PypiPackageIdentifier {
result: &mut Vec<Self>,
) -> Result<(), ConversionError> {
// Check the PURLs for a python package.
let mut has_pypi_purl = false;
for purl in record.package_record.purls.iter() {
if let Some(entry) = Self::try_from_purl(purl, &record.package_record.version.as_str())?
if let Some(entry) =
Self::convert_from_purl(purl, &record.package_record.version.as_str())?
{
result.push(entry);
has_pypi_purl = true;
}
}

// If there is no pypi purl, but the package is a conda-forge package, we just assume that
// the name of the package is equivalent to the name of the python package.
if !has_pypi_purl && pypi_mapping::is_conda_forge_record(record) {
// Convert the conda package names to pypi package names. If the conversion fails we
// just assume that its not a valid python package.
let name = PackageName::from_str(record.package_record.name.as_source()).ok();
let version =
pep440_rs::Version::from_str(&record.package_record.version.as_str()).ok();
if let (Some(name), Some(version)) = (name, version) {
result.push(PypiPackageIdentifier {
name: PyPiPackageName::from_normalized(name),
version,
url: record.url.clone(),
// TODO: We can't really tell which python extras are enabled in a conda package.
extras: Default::default(),
})
}
}

Ok(())
}

// /// Given a list of conda package records, extract the python packages that will be installed
// /// when these conda packages are installed.
// pub fn from_records(records: &[RepoDataRecord]) -> Result<Vec<Self>, ConversionError> {
// let mut result = Vec::new();
// for record in records {
// Self::from_record_into(record, &mut result)?;
// }
// Ok(result)
// }

/// Tries to construct an instance from a generic PURL.
///
/// The `fallback_version` is used if the PURL does not contain a version.
pub fn try_from_purl(
pub fn convert_from_purl(
package_url: &PackageUrl,
fallback_version: &str,
) -> Result<Option<Self>, ConversionError> {
Expand Down
70 changes: 45 additions & 25 deletions src/pypi_mapping/custom_pypi_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@ use async_once_cell::OnceCell;
use crate::pypi_mapping::MappingLocation;

use super::{
prefix_pypi_name_mapping::{self},
build_pypi_purl_from_package_record, is_conda_forge_record, prefix_pypi_name_mapping,
MappingMap, Reporter,
};

pub async fn fetch_mapping_from_url(
pub async fn fetch_mapping_from_url<T>(
client: &ClientWithMiddleware,
url: &Url,
) -> miette::Result<HashMap<String, String>> {
) -> miette::Result<T>
where
T: serde::de::DeserializeOwned,
{
let response = client
.get(url.clone())
.send()
Expand All @@ -34,8 +37,7 @@ pub async fn fetch_mapping_from_url(
));
}

let mapping_by_name: HashMap<String, String> =
response.json().await.into_diagnostic().context(format!(
let mapping_by_name: T = response.json().await.into_diagnostic().context(format!(
"failed to parse pypi name mapping located at {}. Please make sure that it's a valid json",
url
))?;
Expand All @@ -46,11 +48,11 @@ pub async fn fetch_mapping_from_url(
pub async fn fetch_custom_mapping(
client: &ClientWithMiddleware,
mapping_url: &MappingMap,
) -> miette::Result<&'static HashMap<String, HashMap<String, String>>> {
static MAPPING: OnceCell<HashMap<String, HashMap<String, String>>> = OnceCell::new();
) -> miette::Result<&'static HashMap<String, HashMap<String, Option<String>>>> {
static MAPPING: OnceCell<HashMap<String, HashMap<String, Option<String>>>> = OnceCell::new();
MAPPING
.get_or_try_init(async {
let mut mapping_url_to_name: HashMap<String, HashMap<String, String>> =
let mut mapping_url_to_name: HashMap<String, HashMap<String, Option<String>>> =
Default::default();

for (name, url) in mapping_url.iter() {
Expand Down Expand Up @@ -83,10 +85,12 @@ pub async fn fetch_custom_mapping(
let contents = std::fs::read_to_string(path)
.into_diagnostic()
.context(format!("mapping on {path:?} could not be loaded"))?;
let data: HashMap<String, String> = serde_json::from_str(&contents)
.unwrap_or_else(|_| {
panic!("Failed to parse JSON mapping located at {path:?}")
});
let data: HashMap<String, Option<String>> = serde_json::from_str(&contents)
.into_diagnostic()
.context(format!(
"Failed to parse JSON mapping located at {}",
path.display()
))?;

mapping_url_to_name.insert(name.to_string(), data);
}
Expand Down Expand Up @@ -149,7 +153,7 @@ pub async fn amend_pypi_purls(
/// a conda-forge package.
fn amend_pypi_purls_for_record(
record: &mut RepoDataRecord,
custom_mapping: &'static HashMap<String, HashMap<String, String>>,
custom_mapping: &'static HashMap<String, HashMap<String, Option<String>>>,
) -> miette::Result<()> {
// If the package already has a pypi name we can stop here.
if record
Expand All @@ -161,27 +165,43 @@ fn amend_pypi_purls_for_record(
return Ok(());
}

// If this package is a conda-forge package or user specified a custom channel mapping
// we can try to guess the pypi name from the conda name
if custom_mapping.contains_key(&record.channel) {
if let Some(mapped_channel) = custom_mapping.get(&record.channel) {
if let Some(mapped_name) =
mapped_channel.get(record.package_record.name.as_normalized())
{
record.package_record.purls.push(
PackageUrl::new(String::from("pypi"), mapped_name)
.expect("valid pypi package url"),
);
let mut not_a_pypi = false;

// we verify if we have package channel and name in user provided mapping
if let Some(mapped_channel) = custom_mapping.get(&record.channel) {
if let Some(mapped_name) = mapped_channel.get(record.package_record.name.as_normalized()) {
// we have a pypi name for it so we record a purl
if let Some(name) = mapped_name {
let purl = PackageUrl::builder(String::from("pypi"), name.to_string())
.with_qualifier("source", "project-defined-mapping")
.expect("valid qualifier");

record
.package_record
.purls
.push(purl.build().expect("valid pypi package url"));
} else {
not_a_pypi = true;
}
}
}

// if we don't have it and it's channel is conda-forge
// we assume that it's the pypi package
if !not_a_pypi && record.package_record.purls.is_empty() && is_conda_forge_record(record) {
// Convert the conda package names to pypi package names. If the conversion fails we
// just assume that its not a valid python package.
if let Some(purl) = build_pypi_purl_from_package_record(&record.package_record) {
record.package_record.purls.push(purl);
}
}

Ok(())
}

pub fn _amend_only_custom_pypi_purls(
conda_packages: &mut [RepoDataRecord],
custom_mapping: &'static HashMap<String, HashMap<String, String>>,
custom_mapping: &'static HashMap<String, HashMap<String, Option<String>>>,
) -> miette::Result<()> {
for record in conda_packages.iter_mut() {
amend_pypi_purls_for_record(record, custom_mapping)?;
Expand Down
38 changes: 34 additions & 4 deletions src/pypi_mapping/mod.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use std::{collections::HashMap, path::PathBuf, str::FromStr, sync::Arc};

use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache, HttpCacheOptions};
use rattler_conda_types::RepoDataRecord;
use rattler_conda_types::{PackageRecord, PackageUrl, RepoDataRecord};
use reqwest_middleware::ClientBuilder;
use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
use url::Url;

use crate::config::get_cache_dir;

mod custom_pypi_mapping;
pub mod custom_pypi_mapping;
pub mod prefix_pypi_name_mapping;

pub trait Reporter: Send + Sync {
Expand All @@ -19,19 +19,34 @@ pub trait Reporter: Send + Sync {

pub type ChannelName = String;

type MappingMap = HashMap<ChannelName, MappingLocation>;
pub type MappingMap = HashMap<ChannelName, MappingLocation>;

#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum MappingLocation {
Path(PathBuf),
Url(Url),
}

/// This enum represents the source of mapping
/// it can be user-defined ( custom )
/// or from prefix.dev ( prefix )
pub enum MappingSource {
Custom { mapping: MappingMap },
Prefix,
}

impl MappingSource {
/// Return the custom `MappingMap`
/// for `MappingSource::Custom`
pub fn custom(&self) -> Option<MappingMap> {
match self {
MappingSource::Custom { mapping } => Some(mapping.clone()),
_ => None,
}
}
}

pub async fn amend_pypi_purls(
client: reqwest::Client,
mapping_source: &MappingSource,
Expand Down Expand Up @@ -78,3 +93,18 @@ pub fn is_conda_forge_record(record: &RepoDataRecord) -> bool {
pub fn is_conda_forge_url(url: &Url) -> bool {
url.path().starts_with("/conda-forge")
}

/// Build a purl for a `PackageRecord`
/// it will return a purl in this format
/// `pkg:pypi/aiofiles`
pub fn build_pypi_purl_from_package_record(package_record: &PackageRecord) -> Option<PackageUrl> {
let name = pep508_rs::PackageName::from_str(package_record.name.as_source()).ok();
let version = pep440_rs::Version::from_str(&package_record.version.as_str()).ok();
if let (Some(name), Some(_)) = (name, version) {
let purl = PackageUrl::builder(String::from("pypi"), name.to_string());
let built_purl = purl.build().expect("valid pypi package url");
return Some(built_purl);
}

None
}
Loading

0 comments on commit 66ce6cf

Please sign in to comment.