From d3b3a6a0f080cf7537765a9208c23190f965e8f7 Mon Sep 17 00:00:00 2001 From: Ludwig Stecher Date: Wed, 3 Aug 2022 18:34:02 +0200 Subject: [PATCH] Add diagnostics for [.], [codepoint] and [cp] --- pomsky-lib/src/error/diagnostics.rs | 4 +- pomsky-lib/src/error/parse_error.rs | 2 +- pomsky-lib/src/exprs/char_class/char_group.rs | 27 +++++++----- pomsky-lib/src/parse/parsers.rs | 41 ++++++++++--------- pomsky-lib/src/warning.rs | 8 ++++ pomsky-lib/tests/testcases/basics/cp.txt | 5 +++ pomsky-lib/tests/testcases/basics/dot.txt | 6 +++ .../tests/testcases/errors/unexpected_dot.txt | 6 --- .../tests/testcases/regex-diagnostics/dot.txt | 4 +- 9 files changed, 61 insertions(+), 42 deletions(-) create mode 100644 pomsky-lib/tests/testcases/basics/cp.txt create mode 100644 pomsky-lib/tests/testcases/basics/dot.txt delete mode 100644 pomsky-lib/tests/testcases/errors/unexpected_dot.txt diff --git a/pomsky-lib/src/error/diagnostics.rs b/pomsky-lib/src/error/diagnostics.rs index 1e81102..9cd4669 100644 --- a/pomsky-lib/src/error/diagnostics.rs +++ b/pomsky-lib/src/error/diagnostics.rs @@ -94,9 +94,7 @@ impl Diagnostic { Some(format!("Switch the numbers: {}-{}", part2.trim(), part1.trim())) } ParseErrorKind::Dot => Some( - "The dot is deprecated. Use `Codepoint` to match any code point, \ - or `![n]` to exclude line breaks" - .into(), + "Use `Codepoint` to match any code point, or `![n]` to exclude line breaks".into(), ), #[cfg(feature = "suggestions")] ParseErrorKind::CharClass(CharClassError::UnknownNamedClass { diff --git a/pomsky-lib/src/error/parse_error.rs b/pomsky-lib/src/error/parse_error.rs index 1dcdabd..876d34c 100644 --- a/pomsky-lib/src/error/parse_error.rs +++ b/pomsky-lib/src/error/parse_error.rs @@ -61,7 +61,7 @@ pub(crate) enum ParseErrorKind { UnknownToken, #[error(transparent)] LexErrorWithMessage(LexErrorMsg), - #[error("Unexpected dot")] + #[error("The dot is not supported")] // this is for a dot *not* enclosed in brackets Dot, #[error("Unexpected keyword `{}`", .0)] KeywordAfterLet(String), diff --git a/pomsky-lib/src/exprs/char_class/char_group.rs b/pomsky-lib/src/exprs/char_class/char_group.rs index 10ea888..b871133 100644 --- a/pomsky-lib/src/exprs/char_class/char_group.rs +++ b/pomsky-lib/src/exprs/char_class/char_group.rs @@ -8,7 +8,7 @@ use std::fmt::Write; -use crate::error::CharClassError; +use crate::{error::CharClassError, warning::DeprecationWarning}; use super::unicode::{Category, CodeBlock, OtherProperties, Script}; @@ -59,10 +59,13 @@ impl CharGroup { /// If the name is uppercase (and not `R`), we just assume that it is a /// Unicode category, script or block. This needs to be fixed at one /// point! - pub(crate) fn try_from_group_name(name: &str, negative: bool) -> Result { + pub(crate) fn try_from_group_name( + name: &str, + negative: bool, + ) -> Result<(Self, Option), CharClassError> { Ok(match name { _ if name == "ascii" || name.starts_with("ascii_") => { - CharGroup::Items(super::ascii::parse_ascii_group(name, negative)?) + (CharGroup::Items(super::ascii::parse_ascii_group(name, negative)?), None) } "codepoint" | "cp" | "." if negative => { @@ -74,13 +77,17 @@ impl CharGroup { return Err(CharClassError::Keyword(name.to_string())); } - "codepoint" | "cp" => CharGroup::CodePoint, - "." => CharGroup::Dot, - - _ => CharGroup::Items(vec![GroupItem::Named { - name: super::unicode::parse_group_name(name)?, - negative, - }]), + "codepoint" => (CharGroup::CodePoint, Some(DeprecationWarning::Codepoint)), + "cp" => (CharGroup::CodePoint, Some(DeprecationWarning::Cp)), + "." => (CharGroup::Dot, Some(DeprecationWarning::Dot)), + + _ => ( + CharGroup::Items(vec![GroupItem::Named { + name: super::unicode::parse_group_name(name)?, + negative, + }]), + None, + ), }) } diff --git a/pomsky-lib/src/parse/parsers.rs b/pomsky-lib/src/parse/parsers.rs index f9714b6..b5d5c44 100644 --- a/pomsky-lib/src/parse/parsers.rs +++ b/pomsky-lib/src/parse/parsers.rs @@ -60,7 +60,7 @@ pub(super) fn parse_modified<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Ru Disable, } - try_map2( + try_map_spanned( pair( many0(alt(( map( @@ -124,7 +124,7 @@ pub(super) fn parse_modified<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Ru } pub(super) fn parse_or<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Rule<'i>> { - try_map2( + try_map_spanned( pair(opt(Token::Pipe), separated_list0(Token::Pipe, parse_sequence)), |(leading_pipe, mut rules)| { if rules.len() == 1 { @@ -171,7 +171,7 @@ pub(super) fn parse_fixes<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Rule< let span = span.join(rule.span()); Rule::Lookaround(Box::new(Lookaround::new(rule, kind, span))) }), - try_map2( + try_map_spanned( pair(parse_atom, many0(parse_repetition)), |(mut rule, repetitions)| { if repetitions.len() > 64 { @@ -408,23 +408,30 @@ pub(super) fn parse_char_class<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, fn parse_char_group<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, CharGroup> { let span1 = input.span(); + let mut warnings = vec![]; - let (input, ranges) = many0(alt(( + let (mut input, ranges) = many0(alt(( parse_chars_or_range, - parse_dot, - try_map( - pair(opt(Token::Not), Token::Identifier), - |(not, (s, _))| { - // FIXME: When this fails on a negative item, the span of the exclamation mark - // is used instead of the identifier's span - CharGroup::try_from_group_name(s, not.is_some()) - .map_err(ParseErrorKind::CharClass) + try_map_spanned( + pair(opt(Token::Not), alt((Token::Identifier, Token::Dot))), + |(not, (s, span))| { + let (char_group, dw) = CharGroup::try_from_group_name(s, not.is_some()) + .map_err(|e| ParseErrorKind::CharClass(e).at(span))?; + + if let Some(dw) = dw { + warnings.push(Warning { kind: WarningKind::Deprecation(dw), span }) + } + Ok(char_group) }, nom::Err::Failure, ), err(|| ParseErrorKind::CharClass(CharClassError::Invalid)), )))(input)?; + for warning in warnings { + input.add_warning(warning); + } + let mut iter = ranges.into_iter(); let mut class = iter.next().unwrap_or_else(|| CharGroup::Items(vec![])); @@ -436,12 +443,6 @@ pub(super) fn parse_char_class<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Ok((input, class)) } - fn parse_dot<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, CharGroup> { - let (mut input, (_, span)) = Token::Dot.parse(input)?; - input.add_warning(WarningKind::Deprecation(DeprecationWarning::Dot).at(span)); - Ok((input, CharGroup::Dot)) - } - try_map( tuple(( Token::OpenBracket, @@ -539,7 +540,7 @@ pub(super) fn parse_range<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Rule< map( pair( "range", - try_map2( + try_map_spanned( pair( cut(separated_pair(Token::String, Token::Dash, Token::String)), opt(parse_base), @@ -747,7 +748,7 @@ where } } -fn try_map2<'i, 'b, O1, O2, P, M, EM>( +fn try_map_spanned<'i, 'b, O1, O2, P, M, EM>( mut parser: P, mut map: M, err_kind: EM, diff --git a/pomsky-lib/src/warning.rs b/pomsky-lib/src/warning.rs index 0884028..fb27e6f 100644 --- a/pomsky-lib/src/warning.rs +++ b/pomsky-lib/src/warning.rs @@ -56,6 +56,10 @@ pub enum DeprecationWarning { OldEndLiteral, /// The `[.]` dot Dot, + /// `[codepoint]` + Codepoint, + /// `[cp]` + Cp, } impl fmt::Display for DeprecationWarning { @@ -71,6 +75,10 @@ impl fmt::Display for DeprecationWarning { "The dot is deprecated. Use `Codepoint` (or `C`) to match any character;\n\ Use `![n]` to match anything except for line breaks.", ), + DeprecationWarning::Codepoint => { + f.write_str("`[codepoint]` is deprecated. Use `Codepoint` instead.") + } + DeprecationWarning::Cp => f.write_str("`[cp]` is deprecated. Use `C` instead."), } } } diff --git a/pomsky-lib/tests/testcases/basics/cp.txt b/pomsky-lib/tests/testcases/basics/cp.txt new file mode 100644 index 0000000..78681f8 --- /dev/null +++ b/pomsky-lib/tests/testcases/basics/cp.txt @@ -0,0 +1,5 @@ +[cp] +----- +[\s\S] +WARNING: `[cp]` is deprecated. Use `C` instead. + at 1..3 \ No newline at end of file diff --git a/pomsky-lib/tests/testcases/basics/dot.txt b/pomsky-lib/tests/testcases/basics/dot.txt new file mode 100644 index 0000000..62d15a9 --- /dev/null +++ b/pomsky-lib/tests/testcases/basics/dot.txt @@ -0,0 +1,6 @@ +[.] +----- +. +WARNING: The dot is deprecated. Use `Codepoint` (or `C`) to match any character; +Use `![n]` to match anything except for line breaks. + at 1..2 \ No newline at end of file diff --git a/pomsky-lib/tests/testcases/errors/unexpected_dot.txt b/pomsky-lib/tests/testcases/errors/unexpected_dot.txt deleted file mode 100644 index 139f3a9..0000000 --- a/pomsky-lib/tests/testcases/errors/unexpected_dot.txt +++ /dev/null @@ -1,6 +0,0 @@ -#! expect=error -. ------ -ERROR: Unexpected dot -HELP: The dot is deprecated. Use `Codepoint` to match any code point, or `![n]` to exclude line breaks -SPAN: 0..1 \ No newline at end of file diff --git a/pomsky-lib/tests/testcases/regex-diagnostics/dot.txt b/pomsky-lib/tests/testcases/regex-diagnostics/dot.txt index 139f3a9..3df18dd 100644 --- a/pomsky-lib/tests/testcases/regex-diagnostics/dot.txt +++ b/pomsky-lib/tests/testcases/regex-diagnostics/dot.txt @@ -1,6 +1,6 @@ #! expect=error . ----- -ERROR: Unexpected dot -HELP: The dot is deprecated. Use `Codepoint` to match any code point, or `![n]` to exclude line breaks +ERROR: The dot is not supported +HELP: Use `Codepoint` to match any code point, or `![n]` to exclude line breaks SPAN: 0..1 \ No newline at end of file