From 9a9aabc178b3d28d534b867e171bd0ef6ab0f148 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 23 Oct 2023 12:29:17 -0700 Subject: [PATCH] refactor: Reduce code size of testing tokens if they're a number This commit is a tiny win in compiled code size of a final binary including `clap` which shaves off 19k of compiled code locally. Previously tokens were checked if they were a number by using `.parse::().is_ok()`, but parsing floats is relatively heavyweight in terms of code size. This replaces the check with a more naive "does this string have lots of ascii digits" check where the compiled size of this check should be much smaller. --- clap_builder/src/parser/parser.rs | 6 ++-- clap_lex/src/lib.rs | 46 +++++++++++++++++++++++++++---- clap_lex/tests/parsed.rs | 31 +++++++++++++-------- clap_lex/tests/shorts.rs | 8 +++--- 4 files changed, 67 insertions(+), 24 deletions(-) diff --git a/clap_builder/src/parser/parser.rs b/clap_builder/src/parser/parser.rs index fcde4a86e4b..4b24eb0de20 100644 --- a/clap_builder/src/parser/parser.rs +++ b/clap_builder/src/parser/parser.rs @@ -643,7 +643,7 @@ impl<'cmd> Parser<'cmd> { if self.cmd[current_positional.get_id()].is_allow_hyphen_values_set() || (self.cmd[current_positional.get_id()].is_allow_negative_numbers_set() - && next.is_number()) + && next.is_negative_number()) { // If allow hyphen, this isn't a new arg. debug!("Parser::is_new_arg: Allow hyphen"); @@ -841,7 +841,7 @@ impl<'cmd> Parser<'cmd> { #[allow(clippy::blocks_in_if_conditions)] if matches!(parse_state, ParseState::Opt(opt) | ParseState::Pos(opt) - if self.cmd[opt].is_allow_hyphen_values_set() || (self.cmd[opt].is_allow_negative_numbers_set() && short_arg.is_number())) + if self.cmd[opt].is_allow_hyphen_values_set() || (self.cmd[opt].is_allow_negative_numbers_set() && short_arg.is_negative_number())) { debug!("Parser::parse_short_args: prior arg accepts hyphenated values",); return Ok(ParseResult::MaybeHyphenValue); @@ -851,7 +851,7 @@ impl<'cmd> Parser<'cmd> { .get(&pos_counter) .map(|arg| arg.is_allow_negative_numbers_set()) .unwrap_or_default() - && short_arg.is_number() + && short_arg.is_negative_number() { debug!("Parser::parse_short_arg: negative number"); return Ok(ParseResult::MaybeHyphenValue); diff --git a/clap_lex/src/lib.rs b/clap_lex/src/lib.rs index d519a10c562..581595a2c5e 100644 --- a/clap_lex/src/lib.rs +++ b/clap_lex/src/lib.rs @@ -300,10 +300,14 @@ impl<'s> ParsedArg<'s> { self.inner == "--" } - /// Does the argument look like a number - pub fn is_number(&self) -> bool { + /// Does the argument look like a negative number? + /// + /// This won't parse the number in full but attempts to see if this looks + /// like something along the lines of `-3`, `-0.3`, or `-33.03` + pub fn is_negative_number(&self) -> bool { self.to_value() - .map(|s| s.parse::().is_ok()) + .ok() + .and_then(|s| Some(is_number(s.strip_prefix('-')?))) .unwrap_or_default() } @@ -408,8 +412,8 @@ impl<'s> ShortFlags<'s> { /// Does the short flag look like a number /// /// Ideally call this before doing any iterator - pub fn is_number(&self) -> bool { - self.invalid_suffix.is_none() && self.utf8_prefix.as_str().parse::().is_ok() + pub fn is_negative_number(&self) -> bool { + self.invalid_suffix.is_none() && is_number(self.utf8_prefix.as_str()) } /// Advance the iterator, returning the next short flag on success @@ -466,3 +470,35 @@ fn split_nonutf8_once(b: &OsStr) -> (&str, Option<&OsStr>) { } } } + +fn is_number(arg: &str) -> bool { + // Return true if this looks like an integer or a float where it's all + // digits plus an optional single dot after some digits. + // + // For floats allow forms such as `1.`, `1.2`, `1.2e10`, etc. + let mut seen_dot = false; + let mut position_of_e = None; + for (i, c) in arg.as_bytes().iter().enumerate() { + match c { + // Digits are always valid + b'0'..=b'9' => {} + + // Allow a `.`, but only one, only if it comes before an + // optional exponent, and only if it's not the first character. + b'.' if !seen_dot && position_of_e.is_none() && i > 0 => seen_dot = true, + + // Allow an exponent `e` but only at most one after the first + // character. + b'e' if position_of_e.is_none() && i > 0 => position_of_e = Some(i), + + _ => return false, + } + } + + // Disallow `-1e` which isn't a valid float since it doesn't actually have + // an exponent. + match position_of_e { + Some(i) => i != arg.len() - 1, + None => true, + } +} diff --git a/clap_lex/tests/parsed.rs b/clap_lex/tests/parsed.rs index 133974cb8fa..4938e1792a9 100644 --- a/clap_lex/tests/parsed.rs +++ b/clap_lex/tests/parsed.rs @@ -120,12 +120,14 @@ fn to_short() { #[test] fn is_negative_number() { - let raw = clap_lex::RawArgs::new(["bin", "-10.0"]); - let mut cursor = raw.cursor(); - assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin"))); - let next = raw.next(&mut cursor).unwrap(); + for number in ["-10.0", "-1", "-100", "-3.5", "-1e10", "-1.3e10"] { + let raw = clap_lex::RawArgs::new(["bin", number]); + let mut cursor = raw.cursor(); + assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin"))); + let next = raw.next(&mut cursor).unwrap(); - assert!(next.is_number()); + assert!(next.is_negative_number()); + } } #[test] @@ -135,17 +137,22 @@ fn is_positive_number() { assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin"))); let next = raw.next(&mut cursor).unwrap(); - assert!(next.is_number()); + assert!(!next.is_negative_number()); } #[test] fn is_not_number() { - let raw = clap_lex::RawArgs::new(["bin", "--10.0"]); - let mut cursor = raw.cursor(); - assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin"))); - let next = raw.next(&mut cursor).unwrap(); - - assert!(!next.is_number()); + for number in ["--10.0", "-..", "-2..", "-e", "-1e", "-1e10.2", "-.2"] { + let raw = clap_lex::RawArgs::new(["bin", number]); + let mut cursor = raw.cursor(); + assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin"))); + let next = raw.next(&mut cursor).unwrap(); + + assert!( + !next.is_negative_number(), + "`{number}` is mistakenly classified as a number" + ); + } } #[test] diff --git a/clap_lex/tests/shorts.rs b/clap_lex/tests/shorts.rs index 538afa460e5..63e0e18b908 100644 --- a/clap_lex/tests/shorts.rs +++ b/clap_lex/tests/shorts.rs @@ -151,23 +151,23 @@ fn is_exhausted_empty() { } #[test] -fn is_number() { +fn is_negative_number() { let raw = clap_lex::RawArgs::new(["bin", "-1.0"]); let mut cursor = raw.cursor(); assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin"))); let next = raw.next(&mut cursor).unwrap(); let shorts = next.to_short().unwrap(); - assert!(shorts.is_number()); + assert!(shorts.is_negative_number()); } #[test] -fn is_not_number() { +fn is_not_negaitve_number() { let raw = clap_lex::RawArgs::new(["bin", "-hello"]); let mut cursor = raw.cursor(); assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin"))); let next = raw.next(&mut cursor).unwrap(); let shorts = next.to_short().unwrap(); - assert!(!shorts.is_number()); + assert!(!shorts.is_negative_number()); }