From 55ee7c86afc487f31aebad0e11e416f549936a15 Mon Sep 17 00:00:00 2001 From: Filip Tibell Date: Thu, 13 Jun 2024 23:01:10 +0200 Subject: [PATCH 1/3] Improve arch detection algorithm, add more tests --- lib/descriptor/arch.rs | 85 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 77 insertions(+), 8 deletions(-) diff --git a/lib/descriptor/arch.rs b/lib/descriptor/arch.rs index e2932da..1036205 100644 --- a/lib/descriptor/arch.rs +++ b/lib/descriptor/arch.rs @@ -2,12 +2,23 @@ use std::env::consts::ARCH as CURRENT_ARCH; use super::{executable_parsing::parse_executable, OS}; +// Matching substrings - these can be partial matches, eg. "wordwin64" will match as "win64" #[rustfmt::skip] -const ARCH_KEYWORDS: [(Arch, &[&str]); 4] = [ +const ARCH_SUBSTRINGS: [(Arch, &[&str]); 4] = [ (Arch::Arm64, &["aarch64", "arm64", "armv9"]), - (Arch::X64, &["x86-64", "x86_64", "x64", "amd64", "win64", "win-x64"]), - (Arch::Arm32, &["arm", "arm32", "armv7"]), - (Arch::X86, &["x86", "i686", "i386", "win32", "win-x86"]), + (Arch::X64, &["x86-64", "x86_64", "amd64", "win64", "win-x64"]), + (Arch::Arm32, &["arm32", "armv7"]), + (Arch::X86, &["i686", "i386", "win32", "win-x86"]), +]; + +// Matching words - these must be exact word matches, eg. "tarmac" will not match as "arm" +// Note that these can not contain word separators like "-" or "_", since they're stripped +#[rustfmt::skip] +const ARCH_WORDS: [(Arch, &[&str]); 4] = [ + (Arch::Arm64, &[]), + (Arch::X64, &["x64", "win"]), + (Arch::Arm32, &["arm"]), + (Arch::X86, &["x86"]), ]; /** @@ -49,7 +60,9 @@ impl Arch { pub fn detect(search_string: impl AsRef) -> Option { let lowercased = search_string.as_ref().to_lowercase(); - for (arch, keywords) in ARCH_KEYWORDS { + // Try to find a substring match first, these are generally longer and + // contain more symbol-like characters, less likely to be a false positive + for (arch, keywords) in ARCH_SUBSTRINGS { for keyword in keywords { if lowercased.contains(keyword) { return Some(arch); @@ -57,6 +70,19 @@ impl Arch { } } + // Try to find a strict keyword given as a standalone word in our search string + if let Some(arch) = lowercased.split(char_is_word_separator).find_map(|part| { + ARCH_WORDS.iter().find_map(|(arch, keywords)| { + if keywords.contains(&part) { + Some(*arch) + } else { + None + } + }) + }) { + return Some(arch); + }; + /* HACK: If nothing else matched, but the search string contains "universal", we may have found a macOS universal binary, which is compatible with both @@ -99,6 +125,10 @@ impl Arch { } } +fn char_is_word_separator(c: char) -> bool { + c == '-' || c == '_' || c.is_ascii_whitespace() +} + #[cfg(test)] mod tests { #![allow(clippy::uninlined_format_args)] @@ -107,13 +137,27 @@ mod tests { use super::*; #[test] - fn keywords_are_lowercase() { - for (toolchain, keywords) in ARCH_KEYWORDS { + fn substrings_and_words_are_lowercase() { + for (arch, keywords) in ARCH_SUBSTRINGS.into_iter().chain(ARCH_WORDS.into_iter()) { for keyword in keywords { assert_eq!( keyword.to_string(), keyword.to_lowercase(), - "Arch keyword for {:?} is not lowercase: {}", + "Arch substring / word for {:?} is not lowercase: {}", + arch, + keyword + ); + } + } + } + + #[test] + fn words_do_not_contain_word_separators() { + for (toolchain, keywords) in ARCH_WORDS { + for keyword in keywords { + assert!( + !keyword.contains(char_is_word_separator), + "Arch keyword for {:?} contains word separator: {}", toolchain, keyword ); @@ -172,4 +216,29 @@ mod tests { fn detect_arch_universal() { assert_eq!(Arch::detect("APP-macos-universal-VER"), Some(Arch::X64)); } + + #[test] + fn real_tool_specs() { + const REAL_TOOLS: [(&str, Option); 10] = [ + ("stylua-linux-x86_64-musl", Some(Arch::X64)), + ("remodel-0.11.0-linux-x86_64", Some(Arch::X64)), + ("rojo-0.6.0-alpha.1-win64", Some(Arch::X64)), + ("lune-0.6.7-windows-aarch64", Some(Arch::Arm64)), + ("darklua-linux-aarch64", Some(Arch::Arm64)), + ("tarmac-0.7.5-macos", None), + ("sentry-cli-Darwin-universal", Some(Arch::X64)), + ("sentry-cli-linux-i686-2.32.1", Some(Arch::X86)), + ( + "just-1.28.0-armv7-unknown-linux-musleabihf", + Some(Arch::Arm32), + ), + ( + "just-1.28.0-arm-unknown-linux-musleabihf", + Some(Arch::Arm32), + ), + ]; + for (tool, expected) in REAL_TOOLS { + assert_eq!(Arch::detect(tool), expected, "Tool: {tool}"); + } + } } From 00e02874344552b8e46029a0c86d82797fdbdf11 Mon Sep 17 00:00:00 2001 From: Filip Tibell Date: Thu, 13 Jun 2024 23:04:56 +0200 Subject: [PATCH 2/3] Clarify constants naming --- lib/descriptor/arch.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/descriptor/arch.rs b/lib/descriptor/arch.rs index 1036205..bcd4ae5 100644 --- a/lib/descriptor/arch.rs +++ b/lib/descriptor/arch.rs @@ -2,7 +2,8 @@ use std::env::consts::ARCH as CURRENT_ARCH; use super::{executable_parsing::parse_executable, OS}; -// Matching substrings - these can be partial matches, eg. "wordwin64" will match as "win64" +// Matching substrings - these can be partial matches, eg. "wordwin64" will match as x64 arch +// These will take priority over full word matches, and should be as precise as possible #[rustfmt::skip] const ARCH_SUBSTRINGS: [(Arch, &[&str]); 4] = [ (Arch::Arm64, &["aarch64", "arm64", "armv9"]), @@ -11,10 +12,10 @@ const ARCH_SUBSTRINGS: [(Arch, &[&str]); 4] = [ (Arch::X86, &["i686", "i386", "win32", "win-x86"]), ]; -// Matching words - these must be exact word matches, eg. "tarmac" will not match as "arm" +// Matching words - these must be full word matches, eg. "tarmac" will not match as arm arch // Note that these can not contain word separators like "-" or "_", since they're stripped #[rustfmt::skip] -const ARCH_WORDS: [(Arch, &[&str]); 4] = [ +const ARCH_FULL_WORDS: [(Arch, &[&str]); 4] = [ (Arch::Arm64, &[]), (Arch::X64, &["x64", "win"]), (Arch::Arm32, &["arm"]), @@ -72,7 +73,7 @@ impl Arch { // Try to find a strict keyword given as a standalone word in our search string if let Some(arch) = lowercased.split(char_is_word_separator).find_map(|part| { - ARCH_WORDS.iter().find_map(|(arch, keywords)| { + ARCH_FULL_WORDS.iter().find_map(|(arch, keywords)| { if keywords.contains(&part) { Some(*arch) } else { @@ -138,7 +139,10 @@ mod tests { #[test] fn substrings_and_words_are_lowercase() { - for (arch, keywords) in ARCH_SUBSTRINGS.into_iter().chain(ARCH_WORDS.into_iter()) { + for (arch, keywords) in ARCH_SUBSTRINGS + .into_iter() + .chain(ARCH_FULL_WORDS.into_iter()) + { for keyword in keywords { assert_eq!( keyword.to_string(), @@ -153,7 +157,7 @@ mod tests { #[test] fn words_do_not_contain_word_separators() { - for (toolchain, keywords) in ARCH_WORDS { + for (toolchain, keywords) in ARCH_FULL_WORDS { for keyword in keywords { assert!( !keyword.contains(char_is_word_separator), From 2ebb00d7ac59b05b4381c12b7a59296088aecce5 Mon Sep 17 00:00:00 2001 From: Filip Tibell Date: Thu, 13 Jun 2024 23:06:19 +0200 Subject: [PATCH 3/3] Update changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5696fec..7e8426b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### Fixed + +- Fixed `tarmac` not installing correctly on non-arm systems due to its name containing `arm` + ## `0.1.1` - June 9th, 2024 ### Fixed