diff --git a/src/git.rs b/src/git.rs index 57d2e45..028ba28 100644 --- a/src/git.rs +++ b/src/git.rs @@ -1,6 +1,7 @@ use std::{ cmp::Ordering, collections::BTreeSet, + fmt::{self, Display}, path::{Path, PathBuf}, process::Command, str, @@ -12,6 +13,9 @@ use url::Url; use crate::package::Package; +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct GitUrl(Url); + #[derive(Debug)] pub struct GitRepository { repo_dir: PathBuf, @@ -32,21 +36,15 @@ pub struct GitTag<'a> { } impl GitRepository { - pub fn obtain(dir: &Path, url: Url) -> Result { - let normalized_url = normalize_url(url)?; - - let name = format!( - "{}-{}", - normalized_url.host().unwrap(), - normalized_url.path().replace('/', "-") - ); + pub fn obtain(dir: &Path, GitUrl(url): GitUrl) -> Result { + let name = format!("{}-{}", url.host().unwrap(), url.path().replace('/', "-")); let repo_dir = dir.join(name); if !repo_dir.try_exists()? { let out = Command::new("git") .arg("clone") .arg("--filter=blob:none") .arg("--") - .arg(normalized_url.to_string()) + .arg(url.to_string()) .arg(&repo_dir) .env("GIT_TERMINAL_PROMPT", "0") .output()?; @@ -227,28 +225,40 @@ impl<'a> Ord for GitTag<'a> { } } -fn normalize_url(url: Url) -> Result { - ensure!( - matches!(url.scheme(), "http" | "https"), - "Bad repository scheme" - ); - let host = url - .host() - .context("repository doesn't have a `host`")? - .to_string(); - - Ok(if host == "github.com" || host.starts_with("gitlab.") { - let mut url = url; - let mut path = url.path().strip_prefix('/').unwrap().split('/'); - url.set_path(&format!( - "/{}/{}.git", - path.next().context("repository is missing user/org")?, - path.next() - .context("repository is missing repo name")? - .trim_end_matches(".git") - )); - url - } else { - url - }) +impl TryFrom for GitUrl { + type Error = anyhow::Error; + + fn try_from(url: Url) -> Result { + ensure!( + matches!(url.scheme(), "http" | "https"), + "Bad repository scheme" + ); + let host = url + .host() + .context("repository doesn't have a `host`")? + .to_string(); + + Ok(Self( + if host == "github.com" || host.starts_with("gitlab.") { + let mut url = url; + let mut path = url.path().strip_prefix('/').unwrap().split('/'); + url.set_path(&format!( + "/{}/{}.git", + path.next().context("repository is missing user/org")?, + path.next() + .context("repository is missing repo name")? + .trim_end_matches(".git") + )); + url + } else { + url + }, + )) + } +} + +impl Display for GitUrl { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Display::fmt(&self.0, f) + } } diff --git a/src/main.rs b/src/main.rs index 1b9bb67..d9bfec1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,14 @@ use std::{ env, fs, io::{self as std_io, Read}, + path::Path, str, }; use anyhow::{ensure, Context, Result}; use cargo_lock::{package::SourceKind, Checksum, Lockfile}; use cargo_toml::Manifest; +use git::GitUrl; use serde::Deserialize; use sha2::{Digest as _, Sha256}; use url::Url; @@ -31,6 +33,8 @@ const USER_AGENT: &str = concat!( ")" ); +const CRATES_IO_INDEX: &str = "https://github.com/rust-lang/crates.io-index"; + #[derive(Debug, Deserialize)] struct CargoVcsInfo { git: CargoGitVcsInfo, @@ -42,15 +46,21 @@ struct CargoGitVcsInfo { sha1: String, } +#[derive(Debug)] +struct ResolvedPackage { + lock_info: cargo_lock::Package, + registry_crate: RegistryCrate, + repository_url: GitUrl, + cargo_vcs_info: Option, +} + fn main() -> Result<()> { - let client = reqwest::blocking::Client::builder() + let http_client = reqwest::blocking::Client::builder() .user_agent(USER_AGENT) .build()?; let default_toolchain = self::rustup::default_toolchain(); - let crates_io_index = "https://github.com/rust-lang/crates.io-index".parse::()?; - let current_dir = env::current_dir()?; let temp_dir = env::temp_dir().join(env!("CARGO_PKG_NAME")); let crates_dir = temp_dir.join("crates"); @@ -66,255 +76,285 @@ fn main() -> Result<()> { let lock = Lockfile::load(lock).context("decode Cargo.lock")?; - for package in lock.packages { - // - // Check that it's the official crates.io registry - // - - let Some(source) = package.source else { - println!("package {} doesn't have a source", package.name); - continue; - }; - - if source.kind() != &SourceKind::Registry { - continue; - } - if source.url() != &crates_io_index { - println!( - "package {} isn't part of the official crates.io registry", - package.name - ); - continue; - }; + for lock_info in lock.packages { + let name = lock_info.name.clone(); + let version = lock_info.version.clone(); - // - // Download the package - // - - let registry_crate = match RegistryCrate::obtain( - &client, - &crates_dir, - package.name.as_str(), - &package.version, - ) { - Ok(registry_crate) => registry_crate, + let resolved_package = match resolve_package(&http_client, &crates_dir, lock_info) { + Ok(resolved_package) => resolved_package, Err(err) => { - println!( - "Couldn't obtain package {} v{} err={:?}", - package.name, package.version, err - ); + println!("Couldn't resolve package {name} v{version} err={err:?}"); continue; } }; - let registry_crate_package = registry_crate.package(); - - // - // Verify the package checksum - // - - match package.checksum { - Some(Checksum::Sha256(expected_sha256_hash)) => { - let mut sha256 = Sha256::new(); - std_io::copy(&mut registry_crate_package.raw_reader()?, &mut sha256)?; - let sha256 = sha256.finalize(); - - ensure!( - <[u8; 32]>::from(sha256) == expected_sha256_hash, - "package {} digest doesn't match", - package.name - ); - } - None => { - println!("package {} doesn't have a checksum", package.name); - } + + let mut git_repository = + match GitRepository::obtain(&repos_dir, resolved_package.repository_url.clone()) { + Ok(git_repository) => git_repository, + Err(err) => { + println!( + "Couldn't obtain git repository for {} v{} err={:?} url={}", + resolved_package.lock_info.name, + resolved_package.lock_info.version, + err, + resolved_package.repository_url + ); + continue; + } + }; + + if let Err(err) = + analyze_package(&default_toolchain, &resolved_package, &mut git_repository) + { + println!( + "Couldn't analyze package for {} v{} err={:?} url={}", + resolved_package.lock_info.name, + resolved_package.lock_info.version, + err, + resolved_package.repository_url + ); } + } - // - // Read `.cargo_vcs_info.json` and `Cargo.toml` - // + Ok(()) +} - let mut cargo_vcs_info = None; - let mut cargo_toml = None; +fn resolve_package( + http_client: &reqwest::blocking::Client, + cache_dir: &Path, + lock_info: cargo_lock::Package, +) -> Result { + // + // Check that it's the official crates.io registry + // + + let source = lock_info + .source + .as_ref() + .context("package doesn't have a `source`")?; + ensure!( + source.kind() == &SourceKind::Registry, + "package source isn't Registry" + ); + ensure!( + source.url().as_str() == CRATES_IO_INDEX, + "package is part of the official crates.io registry" + ); - let mut tar = registry_crate_package.archive_reader()?; - for entry in tar.entries()? { - let mut entry = entry?; - let path = entry - .path()? - .to_str() - .context("entry path isn't utf-8")? - .to_owned(); + // + // Download the package + // + + let registry_crate = RegistryCrate::obtain( + http_client, + cache_dir, + lock_info.name.as_str(), + &lock_info.version, + ) + .context("couldn't obtain package")?; + let registry_crate_package = registry_crate.package(); + + // + // Verify the package checksum + // + + match lock_info.checksum { + Some(Checksum::Sha256(expected_sha256_hash)) => { + let mut sha256 = Sha256::new(); + std_io::copy(&mut registry_crate_package.raw_reader()?, &mut sha256)?; + let sha256 = sha256.finalize(); + + ensure!( + <[u8; 32]>::from(sha256) == expected_sha256_hash, + "package {} digest doesn't match", + lock_info.name + ); + } + None => { + println!("package {} doesn't have a checksum", lock_info.name); + } + } - // TODO: verify that the `.tar` doesn't contain multiple directories + // + // Read `.cargo_vcs_info.json` and `Cargo.toml` + // - if path.ends_with(".cargo_vcs_info.json") { - ensure!( - cargo_vcs_info.is_none(), - "`.cargo_vcs_info.json` encountered multiple times" - ); + let mut cargo_vcs_info = None; + let mut cargo_toml = None; - cargo_vcs_info = serde_json::from_reader::<_, CargoVcsInfo>(&mut entry).ok(); - } else if path.ends_with("Cargo.toml") { - if cargo_toml.is_some() { - println!("`Cargo.toml` encountered multiple times"); - } + let mut tar = registry_crate_package.archive_reader()?; + for entry in tar.entries()? { + let mut entry = entry?; + let path = entry + .path()? + .to_str() + .context("entry path isn't utf-8")? + .to_owned(); - let mut manifest = String::new(); - entry.read_to_string(&mut manifest)?; - cargo_toml = Some(Manifest::from_str(&manifest)?); - } - } + // TODO: verify that the `.tar` doesn't contain multiple directories - let cargo_toml = cargo_toml.context("`Cargo.toml` not found")?; - let Some(repository) = cargo_toml - .package - .context("Package metadata missing")? - .repository - else { - println!( - "Package {} is missing `repository` attribute in Cargo.toml", - package.name + if path.ends_with(".cargo_vcs_info.json") { + ensure!( + cargo_vcs_info.is_none(), + "`.cargo_vcs_info.json` encountered multiple times" ); - continue; - }; - // - // Clone repository - // - - let repository_url = repository - .get()? - .parse::() - .context("repository isn't a valid url")?; - let mut git_repository = match GitRepository::obtain(&repos_dir, repository_url.clone()) { - Ok(git_repository) => git_repository, - Err(err) => { - println!( - "Couldn't obtain git repository for {} v{} err={:?} url={}", - package.name, package.version, err, repository_url - ); - continue; + cargo_vcs_info = serde_json::from_reader::<_, CargoVcsInfo>(&mut entry).ok(); + } else if path.ends_with("Cargo.toml") { + if cargo_toml.is_some() { + println!("`Cargo.toml` encountered multiple times"); } - }; - - // - // Get git tags - // - let tags = git_repository.tags().context("obtain git tags")?; + let mut manifest = String::new(); + entry.read_to_string(&mut manifest)?; + cargo_toml = Some(Manifest::from_str(&manifest)?); + } + } - // - // Find a matching tag - // + let manifest = cargo_toml.context("`Cargo.toml` not found")?; + let repository = manifest + .package + .as_ref() + .context("Package metadata missing")? + .repository + .as_ref() + .context("missing `repository` attribute in Cargo.toml")?; + + // + // Clone repository + // + + let repository_url = repository + .get()? + .parse::() + .context("repository isn't a valid url")? + .try_into() + .context("repository url isn't valid")?; + + Ok(ResolvedPackage { + lock_info, + registry_crate, + repository_url, + cargo_vcs_info, + }) +} - let commit = match tags.find_tag_for_version(package.name.as_str(), package.version.clone()) - { - Some(tag) => { - let commit = tag.commit()?; - - if let Some(cargo_vcs_info) = &cargo_vcs_info { - if cargo_vcs_info.git.sha1 != commit { - println!( - "Commit between crates.io tarball and git tag doesn't match for {} v{}", - package.name, package.version - ); - } +fn analyze_package( + default_toolchain: &str, + resolved_package: &ResolvedPackage, + git_repository: &mut GitRepository, +) -> Result<()> { + let ResolvedPackage { + lock_info, + registry_crate, + repository_url: _, + cargo_vcs_info, + } = resolved_package; + + let registry_crate_package = registry_crate.package(); + + // + // Get git tags + // + + let tags = git_repository.tags().context("obtain git tags")?; + + // + // Find a matching tag + // + + let commit = match tags.find_tag_for_version(lock_info.name.as_str(), lock_info.version.clone()) + { + Some(tag) => { + let commit = tag.commit()?; + + if let Some(cargo_vcs_info) = &cargo_vcs_info { + if cargo_vcs_info.git.sha1 != commit { + println!( + "Commit between crates.io tarball and git tag doesn't match for {} v{}", + lock_info.name, lock_info.version + ); } - - commit } - None => { - if tags.is_empty() { - println!("Package {} has no tags in git repository", package.name); - } else { - println!("Found NO tag match with package {}", package.name); - } - match &cargo_vcs_info { - Some(cargo_vcs_info) => cargo_vcs_info.git.sha1.clone(), - None => { - println!("Couldn't determine commit for crate {}", package.name); - continue; - } - } + commit + } + None => { + if tags.is_empty() { + println!("Package {} has no tags in git repository", lock_info.name); + } else { + println!("Found NO tag match with package {}", lock_info.name); } - }; - // - // Checkout the commit in the repo - // - - let git_repository_checkout = match git_repository.checkout(&commit) { - Ok(git_repository_checkout) => git_repository_checkout, - Err(err) => { + cargo_vcs_info + .as_ref() + .context("couldn't determine commit matching registry release")? + .git + .sha1 + .clone() + } + }; + + // + // Checkout the commit in the repo + // + + let git_repository_checkout = git_repository + .checkout(&commit) + .context("couldn't checkout commit")?; + + // + // Create local package + // + + let repository_package = git_repository_checkout + .crate_package( + default_toolchain, + lock_info.name.as_str(), + &lock_info.version, + ) + .context("couldn't package")?; + + // + // Hash file contents + // + + let repository_package_contents = repository_package + .contents() + .context("calculate repository package contents")?; + let registry_package_contents = registry_crate_package + .contents() + .context("calculate registry crate package contents")?; + + // + // Compare hashes + // + + let comparison = + PackageContents::compare(&repository_package_contents, ®istry_package_contents); + for outcome in comparison { + match outcome { + PackageComparison::Equal(_) => continue, + PackageComparison::Different(path) => { println!( - "Couldn't checkout commit {} for package {} v{} err={:?}", - commit, package.name, package.version, err + "Package {} has mismatching file hashes for {}", + lock_info.name, + path.display() ); - continue; } - }; - - // - // Create local package - // - - let repository_package = match git_repository_checkout.crate_package( - &default_toolchain, - package.name.as_str(), - &package.version, - ) { - Ok(repository_package) => repository_package, - Err(err) => { + PackageComparison::OnlyLeft(path) => { println!( - "Couldn't package {} v{} err={:?}", - package.name, package.version, err + "Package {} has file {} in our release but not in crates.io tarball", + lock_info.name, + path.display() ); - continue; } - }; - - // - // Hash file contents - // - - let repository_package_contents = repository_package - .contents() - .context("calculate repository package contents")?; - let registry_package_contents = registry_crate_package - .contents() - .context("calculate registry crate package contents")?; - - // - // Compare hashes - // - - let comparison = - PackageContents::compare(&repository_package_contents, ®istry_package_contents); - for outcome in comparison { - match outcome { - PackageComparison::Equal(_) => continue, - PackageComparison::Different(path) => { - println!( - "Package {} has mismatching file hashes for {}", - package.name, - path.display() - ); - } - PackageComparison::OnlyLeft(path) => { - println!( - "Package {} has file {} in our release but not in crates.io tarball", - package.name, - path.display() - ); - } - PackageComparison::OnlyRight(path) => { - println!( - "Package {} has file {} in crates.io release but not ours", - package.name, - path.display() - ); - } + PackageComparison::OnlyRight(path) => { + println!( + "Package {} has file {} in crates.io release but not ours", + lock_info.name, + path.display() + ); } } } diff --git a/src/registry.rs b/src/registry.rs index 6dd0004..1975b3a 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -9,6 +9,7 @@ use semver::Version; use crate::package::Package; +#[derive(Debug)] pub struct RegistryCrate { crate_file: PathBuf, }