From ed868d5f53eae65150b54c41dd5814d75b7720a6 Mon Sep 17 00:00:00 2001 From: Gabriel de Perthuis Date: Mon, 27 May 2024 18:21:47 +0200 Subject: [PATCH 1/4] Add tests that trip up due to incomplete cargo-metadata logic Two of them fail currently, the other doesn't but is still useful to validate the logic. --- .../unused-kebab-spec/Cargo.lock | 25 +++++++++++ .../unused-kebab-spec/Cargo.toml | 9 ++++ .../unused-kebab-spec/src/lib.rs | 0 .../unused-renamed-in-registry/Cargo.lock | 16 +++++++ .../unused-renamed-in-registry/Cargo.toml | 9 ++++ .../unused-renamed-in-registry/src/lib.rs | 0 .../unused-renamed-in-spec/Cargo.lock | 16 +++++++ .../unused-renamed-in-spec/Cargo.toml | 9 ++++ .../unused-renamed-in-spec/src/lib.rs | 0 src/search_unused.rs | 42 +++++++++++++++++++ 10 files changed, 126 insertions(+) create mode 100644 integration-tests/unused-kebab-spec/Cargo.lock create mode 100644 integration-tests/unused-kebab-spec/Cargo.toml create mode 100644 integration-tests/unused-kebab-spec/src/lib.rs create mode 100644 integration-tests/unused-renamed-in-registry/Cargo.lock create mode 100644 integration-tests/unused-renamed-in-registry/Cargo.toml create mode 100644 integration-tests/unused-renamed-in-registry/src/lib.rs create mode 100644 integration-tests/unused-renamed-in-spec/Cargo.lock create mode 100644 integration-tests/unused-renamed-in-spec/Cargo.toml create mode 100644 integration-tests/unused-renamed-in-spec/src/lib.rs diff --git a/integration-tests/unused-kebab-spec/Cargo.lock b/integration-tests/unused-kebab-spec/Cargo.lock new file mode 100644 index 0000000..cac8f4b --- /dev/null +++ b/integration-tests/unused-kebab-spec/Cargo.lock @@ -0,0 +1,25 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "log-once" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aba6f00890254f37df9778e2df6042249f543ab1789cbe001c50b4debe264a8c" +dependencies = [ + "log", +] + +[[package]] +name = "unused-kebab-spec" +version = "0.1.0" +dependencies = [ + "log-once", +] diff --git a/integration-tests/unused-kebab-spec/Cargo.toml b/integration-tests/unused-kebab-spec/Cargo.toml new file mode 100644 index 0000000..7c30aa9 --- /dev/null +++ b/integration-tests/unused-kebab-spec/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "unused-kebab-spec" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +log-once = "0.3.1" diff --git a/integration-tests/unused-kebab-spec/src/lib.rs b/integration-tests/unused-kebab-spec/src/lib.rs new file mode 100644 index 0000000..e69de29 diff --git a/integration-tests/unused-renamed-in-registry/Cargo.lock b/integration-tests/unused-renamed-in-registry/Cargo.lock new file mode 100644 index 0000000..a84a221 --- /dev/null +++ b/integration-tests/unused-renamed-in-registry/Cargo.lock @@ -0,0 +1,16 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "unused-renamed-in-registry" +version = "0.1.0" +dependencies = [ + "xml-rs", +] + +[[package]] +name = "xml-rs" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193" diff --git a/integration-tests/unused-renamed-in-registry/Cargo.toml b/integration-tests/unused-renamed-in-registry/Cargo.toml new file mode 100644 index 0000000..b8dfa92 --- /dev/null +++ b/integration-tests/unused-renamed-in-registry/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "unused-renamed-in-registry" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +xml-rs = "0.8.14" diff --git a/integration-tests/unused-renamed-in-registry/src/lib.rs b/integration-tests/unused-renamed-in-registry/src/lib.rs new file mode 100644 index 0000000..e69de29 diff --git a/integration-tests/unused-renamed-in-spec/Cargo.lock b/integration-tests/unused-renamed-in-spec/Cargo.lock new file mode 100644 index 0000000..ca9e92a --- /dev/null +++ b/integration-tests/unused-renamed-in-spec/Cargo.lock @@ -0,0 +1,16 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "unused-renamed-in-spec" +version = "0.1.0" +dependencies = [ + "log", +] diff --git a/integration-tests/unused-renamed-in-spec/Cargo.toml b/integration-tests/unused-renamed-in-spec/Cargo.toml new file mode 100644 index 0000000..7cd86cc --- /dev/null +++ b/integration-tests/unused-renamed-in-spec/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "unused-renamed-in-spec" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +tracing = { version = "0.4.14", package = "log" } diff --git a/integration-tests/unused-renamed-in-spec/src/lib.rs b/integration-tests/unused-renamed-in-spec/src/lib.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/search_unused.rs b/src/search_unused.rs index 4b88329..0ae1ad9 100644 --- a/src/search_unused.rs +++ b/src/search_unused.rs @@ -769,6 +769,48 @@ fn test_crate_renaming_works() -> anyhow::Result<()> { Ok(()) } +#[test] +fn test_unused_renamed_in_registry() -> anyhow::Result<()> { + // when a lib like xml-rs is exposed with a different name, + // cargo-machete reports the unused spec properly. + let analysis = find_unused( + &PathBuf::from(TOP_LEVEL).join("./integration-tests/unused-renamed-in-registry/Cargo.toml"), + UseCargoMetadata::Yes, + )? + .expect("no error during processing"); + assert_eq!(analysis.unused, &["xml-rs".to_string()]); + + Ok(()) +} + +#[test] +fn test_unused_renamed_in_spec() -> anyhow::Result<()> { + // when a lib is renamed through key = { package = … }, + // cargo-machete reports the unused spec properly. + let analysis = find_unused( + &PathBuf::from(TOP_LEVEL).join("./integration-tests/unused-renamed-in-spec/Cargo.toml"), + UseCargoMetadata::Yes, + )? + .expect("no error during processing"); + assert_eq!(analysis.unused, &["tracing".to_string()]); + + Ok(()) +} + +#[test] +fn test_unused_kebab_spec() -> anyhow::Result<()> { + // when a lib is renamed through key = { package = … }, + // cargo-machete reports the unused spec properly. + let analysis = find_unused( + &PathBuf::from(TOP_LEVEL).join("./integration-tests/unused-kebab-spec/Cargo.toml"), + UseCargoMetadata::Yes, + )? + .expect("no error during processing"); + assert_eq!(analysis.unused, &["log-once".to_string()]); + + Ok(()) +} + #[test] fn test_ignore_deps_works() { // ensure that ignored deps listed in Cargo.toml package.metadata.cargo-machete.ignored are From ee8dcfcdeceafec576565404e77602df0cde6aea Mon Sep 17 00:00:00 2001 From: Gabriel de Perthuis Date: Mon, 27 May 2024 17:52:01 +0200 Subject: [PATCH 2/4] Rewrite cargo-metadata support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should make cargo machete --with-metadata much more accurate. The previous version was able to get crate names (snake_case names usable in source files) from cargo metadata, but was unable to go back to the dependency spec written in Cargo.toml from that. As a result, --fix was broken. The mapping is complex because the published package can use a different name (see https://lib.rs/crates/md-5 for an example), the dependency spec can rename it again (through key = { package = … }), and keys with dashes are mapped to crate names with underscores. Two of these cases were broken, all of them now work. This also removes a hack that attempted to extract package names from cargo SourceIds. Those are opaque and have changed as recently as Rust 1.78 (Cargo.lock v4). --- src/search_unused.rs | 148 +++++++++++++++++++++---------------------- 1 file changed, 74 insertions(+), 74 deletions(-) diff --git a/src/search_unused.rs b/src/search_unused.rs index 0ae1ad9..590d4be 100644 --- a/src/search_unused.rs +++ b/src/search_unused.rs @@ -7,7 +7,7 @@ use grep::{ use log::{debug, trace}; use rayon::prelude::*; use std::{ - collections::HashSet, + collections::{BTreeMap, HashSet}, error::{self, Error}, path::{Path, PathBuf}, }; @@ -193,9 +193,9 @@ struct Search { impl Search { fn new(crate_name: &str) -> anyhow::Result { - let snaked = crate_name.replace('-', "_"); + assert!(!crate_name.contains('-')); - let line_matcher = RegexMatcher::new_line_matcher(&make_line_regexp(&snaked))?; + let line_matcher = RegexMatcher::new_line_matcher(&make_line_regexp(crate_name))?; let line_searcher = SearcherBuilder::new() .binary_detection(BinaryDetection::quit(b'\x00')) .line_terminator(LineTerminator::byte(b'\n')) @@ -204,7 +204,7 @@ impl Search { let multiline_matcher = RegexMatcherBuilder::new() .multi_line(true) - .build(&make_multiline_regexp(&snaked))?; + .build(&make_multiline_regexp(crate_name))?; let multiline_searcher = SearcherBuilder::new() .binary_detection(BinaryDetection::quit(b'\x00')) .multi_line(true) @@ -343,72 +343,72 @@ pub(crate) fn find_unused( // TODO extend to dev dependencies + build dependencies, and be smarter in the grouping of // searched paths - let dependencies_names: Vec<_> = if let Some(resolve) = analysis + // Maps dependency name (the name of the key in the Cargo.toml dependency + // table, can have dashes, not necessarily the name in the crate registry) + // to crate name (extern crate, snake case) + let dependencies: BTreeMap = if let Some(metadata) = analysis .metadata .as_ref() - .and_then(|metadata| metadata.resolve.as_ref()) + .filter(|metadata| metadata.resolve.is_some()) { - resolve - .nodes - .iter() - .find(|node| { - // Note: this is dependent on rustc, so it's a bit fragile, and may break with - // nightly versions of the compiler (see cargo-machete issue #104). - - // The node id can have multiple representations: - // - on rust 1.77 and before, something like "aa 0.1.0 (path+file:///tmp/aa)". - // - on rust 1.78+, something like: - // - "path+file:///home/ben/cargo-machete/integration-tests/aa#0.1.0" - // - "path+file:///home/ben/cargo-machete/integration-tests/directory#aa@0.1.0" - // - // See https://doc.rust-lang.org/cargo/reference/pkgid-spec.html for the full - // spec. If this breaks in the future (>= March 2024), consider using - // `cargo-util-schemas`. - let repr = &node.id.repr; - - let package_found = if repr.contains(' ') { - // Rust < 1.78, take everything up to the space. - node.id.repr.split(' ').next() // e.g. "aa" - } else { - // Rust >= 1.78. Oh boy. - let mut temp = None; - - let mut slash_split = node.id.repr.split('/').peekable(); - - while let Some(subset) = slash_split.next() { - // Continue until the last part of the path. - if slash_split.peek().is_some() { - continue; - } - - // If there's no next slash separator, we've reached the end, and subset is - // one of: - // - aa#0.1.0 - // - directory#aa@0.1.0 - if subset.contains('@') { - let mut hash_split = subset.split('#'); - // Skip everything before the hash. - hash_split.next(); - // Now in the rest, take everything up to the @. - temp = hash_split.next().and_then(|end| end.split('@').next()); - } else { - // Otherwise, take everything up to the #. - temp = subset.split('#').next(); - } - } - - temp - }; - - package_found.map_or(false, |node_package_name| node_package_name == package_name) - }) - .expect("the current package must be in the dependency graph") - .deps - .iter() - .map(|node_dep| node_dep.name.clone()) - .collect() + let resolve = metadata.resolve.as_ref().unwrap(); + if let Some(ref root) = resolve.root { + // This gives us resolved dependencies, in crate form + let root_node = resolve + .nodes + .iter() + .find(|node| node.id == *root) + .expect("root should be resolved by cargo-metadata"); + // This gives us the original dependency table + // May have more than resolved if some were never enabled + let root_package = metadata + .packages + .iter() + .find(|pkg| pkg.id == *root) + .expect("root should appear under cargo-metadata packages"); + // For every resolved dependency: + // look it up in the package list to find the name (the one in registries) + // look up that name in dependencies of the root_package; + // find if it uses a different key through the rename field + root_node + .deps + .iter() + .map(|dep| { + let crate_name = dep.name.clone(); + let dep_pkg = metadata + .packages + .iter() + .find(|pkg| pkg.id == dep.pkg) + .expect( + "resolved dependencies should appear under cargo-metadata packages", + ); + let mut dep_spec_it = root_package.dependencies.iter().filter(|dep_spec| { + dep_spec.name == dep_pkg.name + && dep_spec.source.as_ref() == dep_pkg.source.as_ref().map(|s| &s.repr) + }); + // The dependency can appear more than once, for example if it is both + // a dependency and a dev-dependency (often with more features enabled). + // We'll assume cargo enforces consistency. + let dep_spec = dep_spec_it + .next() + .expect("resolved dependency should have a matching dependency spec"); + // If the dependency was renamed, through key = { package = … }, + // the original key is in dep_spec.rename + let dep_key = dep_spec.rename.clone().unwrap_or(dep_spec.name.clone()); + (dep_key, crate_name) + }) + .collect() + } else { + // No root -> virtual workspace, empty map + Default::default() + } } else { - analysis.manifest.dependencies.keys().cloned().collect() + analysis + .manifest + .dependencies + .keys() + .map(|k| (k.clone(), k.replace('-', "_"))) + .collect() }; // Keep a side-list of ignored dependencies (likely false positives). @@ -430,14 +430,14 @@ pub(crate) fn find_unused( IgnoredButUsed(String), } - let results: Vec = dependencies_names + let results: Vec = dependencies .into_par_iter() - .filter_map(|name| { - let mut search = Search::new(&name).expect("constructing grep context"); + .filter_map(|(dep_name, crate_name)| { + let mut search = Search::new(&crate_name).expect("constructing grep context"); let mut found_once = false; for path in &paths { - trace!("looking for {} in {}", name, path.to_string_lossy(),); + trace!("looking for {} in {}", crate_name, path.to_string_lossy(),); match search.search_path(path) { Ok(true) => { found_once = true; @@ -451,14 +451,14 @@ pub(crate) fn find_unused( } if !found_once { - if ignored.contains(&name) || workspace_ignored.contains(&name) { + if ignored.contains(&dep_name) || workspace_ignored.contains(&dep_name) { return None; } - Some(SingleDepResult::Unused(name)) + Some(SingleDepResult::Unused(dep_name)) } else { - if ignored.contains(&name) { - return Some(SingleDepResult::IgnoredButUsed(name)); + if ignored.contains(&dep_name) { + return Some(SingleDepResult::IgnoredButUsed(dep_name)); } None From 3f311b076fa7d4b359b004b7dc4501f478c2e96e Mon Sep 17 00:00:00 2001 From: Gabriel de Perthuis Date: Mon, 27 May 2024 21:48:05 +0200 Subject: [PATCH 3/4] Don't enforce the dep_spec source matching the resolved source With [patch] sections, this may not be the case. --- src/search_unused.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/search_unused.rs b/src/search_unused.rs index 590d4be..d365c83 100644 --- a/src/search_unused.rs +++ b/src/search_unused.rs @@ -382,10 +382,10 @@ pub(crate) fn find_unused( .expect( "resolved dependencies should appear under cargo-metadata packages", ); - let mut dep_spec_it = root_package.dependencies.iter().filter(|dep_spec| { - dep_spec.name == dep_pkg.name - && dep_spec.source.as_ref() == dep_pkg.source.as_ref().map(|s| &s.repr) - }); + let mut dep_spec_it = root_package + .dependencies + .iter() + .filter(|dep_spec| dep_spec.name == dep_pkg.name); // The dependency can appear more than once, for example if it is both // a dependency and a dev-dependency (often with more features enabled). // We'll assume cargo enforces consistency. From 1953e8f3c7cf663abec48d714fe05a7c59656d76 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Fri, 23 Aug 2024 12:02:28 +0200 Subject: [PATCH 4/4] Address review comments --- src/search_unused.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/search_unused.rs b/src/search_unused.rs index d365c83..0c9a217 100644 --- a/src/search_unused.rs +++ b/src/search_unused.rs @@ -180,7 +180,7 @@ fn collect_paths(dir_path: &Path, analysis: &PackageAnalysis) -> Vec { /// Performs search of the given crate name with the following strategy: first try to use the line /// matcher, then the multiline matcher if the line matcher failed. /// -/// Splitting the single line matcher from the multiline matcher makes maintainance of the regular +/// Splitting the single line matcher from the multiline matcher makes maintenance of the regular /// expressions simpler (oh well), and likely faster too since most use statements will be caught /// by the single line matcher. struct Search { @@ -346,12 +346,11 @@ pub(crate) fn find_unused( // Maps dependency name (the name of the key in the Cargo.toml dependency // table, can have dashes, not necessarily the name in the crate registry) // to crate name (extern crate, snake case) - let dependencies: BTreeMap = if let Some(metadata) = analysis + let dependencies: BTreeMap = if let Some((metadata, resolve)) = analysis .metadata .as_ref() - .filter(|metadata| metadata.resolve.is_some()) + .and_then(|metadata| metadata.resolve.as_ref().map(|resolve| (metadata, resolve))) { - let resolve = metadata.resolve.as_ref().unwrap(); if let Some(ref root) = resolve.root { // This gives us resolved dependencies, in crate form let root_node = resolve @@ -382,19 +381,25 @@ pub(crate) fn find_unused( .expect( "resolved dependencies should appear under cargo-metadata packages", ); + let mut dep_spec_it = root_package .dependencies .iter() .filter(|dep_spec| dep_spec.name == dep_pkg.name); + // The dependency can appear more than once, for example if it is both // a dependency and a dev-dependency (often with more features enabled). // We'll assume cargo enforces consistency. let dep_spec = dep_spec_it .next() .expect("resolved dependency should have a matching dependency spec"); + // If the dependency was renamed, through key = { package = … }, - // the original key is in dep_spec.rename - let dep_key = dep_spec.rename.clone().unwrap_or(dep_spec.name.clone()); + // the original key is in dep_spec.rename. + let dep_key = dep_spec + .rename + .clone() + .unwrap_or_else(|| dep_spec.name.clone()); (dep_key, crate_name) }) .collect() @@ -799,8 +804,7 @@ fn test_unused_renamed_in_spec() -> anyhow::Result<()> { #[test] fn test_unused_kebab_spec() -> anyhow::Result<()> { - // when a lib is renamed through key = { package = … }, - // cargo-machete reports the unused spec properly. + // when a lib uses kebab naming, cargo-machete reports the unused spec properly. let analysis = find_unused( &PathBuf::from(TOP_LEVEL).join("./integration-tests/unused-kebab-spec/Cargo.toml"), UseCargoMetadata::Yes,