From d3b3a6a0f080cf7537765a9208c23190f965e8f7 Mon Sep 17 00:00:00 2001
From: Ludwig Stecher <ludwig.stecher@gmx.de>
Date: Wed, 3 Aug 2022 18:34:02 +0200
Subject: [PATCH] Add diagnostics for [.], [codepoint] and [cp]

---
 pomsky-lib/src/error/diagnostics.rs           |  4 +-
 pomsky-lib/src/error/parse_error.rs           |  2 +-
 pomsky-lib/src/exprs/char_class/char_group.rs | 27 +++++++-----
 pomsky-lib/src/parse/parsers.rs               | 41 ++++++++++---------
 pomsky-lib/src/warning.rs                     |  8 ++++
 pomsky-lib/tests/testcases/basics/cp.txt      |  5 +++
 pomsky-lib/tests/testcases/basics/dot.txt     |  6 +++
 .../tests/testcases/errors/unexpected_dot.txt |  6 ---
 .../tests/testcases/regex-diagnostics/dot.txt |  4 +-
 9 files changed, 61 insertions(+), 42 deletions(-)
 create mode 100644 pomsky-lib/tests/testcases/basics/cp.txt
 create mode 100644 pomsky-lib/tests/testcases/basics/dot.txt
 delete mode 100644 pomsky-lib/tests/testcases/errors/unexpected_dot.txt

diff --git a/pomsky-lib/src/error/diagnostics.rs b/pomsky-lib/src/error/diagnostics.rs
index 1e81102..9cd4669 100644
--- a/pomsky-lib/src/error/diagnostics.rs
+++ b/pomsky-lib/src/error/diagnostics.rs
@@ -94,9 +94,7 @@ impl Diagnostic {
                 Some(format!("Switch the numbers: {}-{}", part2.trim(), part1.trim()))
             }
             ParseErrorKind::Dot => Some(
-                "The dot is deprecated. Use `Codepoint` to match any code point, \
-                or `![n]` to exclude line breaks"
-                    .into(),
+                "Use `Codepoint` to match any code point, or `![n]` to exclude line breaks".into(),
             ),
             #[cfg(feature = "suggestions")]
             ParseErrorKind::CharClass(CharClassError::UnknownNamedClass {
diff --git a/pomsky-lib/src/error/parse_error.rs b/pomsky-lib/src/error/parse_error.rs
index 1dcdabd..876d34c 100644
--- a/pomsky-lib/src/error/parse_error.rs
+++ b/pomsky-lib/src/error/parse_error.rs
@@ -61,7 +61,7 @@ pub(crate) enum ParseErrorKind {
     UnknownToken,
     #[error(transparent)]
     LexErrorWithMessage(LexErrorMsg),
-    #[error("Unexpected dot")]
+    #[error("The dot is not supported")] // this is for a dot *not* enclosed in brackets
     Dot,
     #[error("Unexpected keyword `{}`", .0)]
     KeywordAfterLet(String),
diff --git a/pomsky-lib/src/exprs/char_class/char_group.rs b/pomsky-lib/src/exprs/char_class/char_group.rs
index 10ea888..b871133 100644
--- a/pomsky-lib/src/exprs/char_class/char_group.rs
+++ b/pomsky-lib/src/exprs/char_class/char_group.rs
@@ -8,7 +8,7 @@
 
 use std::fmt::Write;
 
-use crate::error::CharClassError;
+use crate::{error::CharClassError, warning::DeprecationWarning};
 
 use super::unicode::{Category, CodeBlock, OtherProperties, Script};
 
@@ -59,10 +59,13 @@ impl CharGroup {
     /// If the name is uppercase (and not `R`), we just assume that it is a
     /// Unicode category, script or block. This needs to be fixed at one
     /// point!
-    pub(crate) fn try_from_group_name(name: &str, negative: bool) -> Result<Self, CharClassError> {
+    pub(crate) fn try_from_group_name(
+        name: &str,
+        negative: bool,
+    ) -> Result<(Self, Option<DeprecationWarning>), CharClassError> {
         Ok(match name {
             _ if name == "ascii" || name.starts_with("ascii_") => {
-                CharGroup::Items(super::ascii::parse_ascii_group(name, negative)?)
+                (CharGroup::Items(super::ascii::parse_ascii_group(name, negative)?), None)
             }
 
             "codepoint" | "cp" | "." if negative => {
@@ -74,13 +77,17 @@ impl CharGroup {
                 return Err(CharClassError::Keyword(name.to_string()));
             }
 
-            "codepoint" | "cp" => CharGroup::CodePoint,
-            "." => CharGroup::Dot,
-
-            _ => CharGroup::Items(vec![GroupItem::Named {
-                name: super::unicode::parse_group_name(name)?,
-                negative,
-            }]),
+            "codepoint" => (CharGroup::CodePoint, Some(DeprecationWarning::Codepoint)),
+            "cp" => (CharGroup::CodePoint, Some(DeprecationWarning::Cp)),
+            "." => (CharGroup::Dot, Some(DeprecationWarning::Dot)),
+
+            _ => (
+                CharGroup::Items(vec![GroupItem::Named {
+                    name: super::unicode::parse_group_name(name)?,
+                    negative,
+                }]),
+                None,
+            ),
         })
     }
 
diff --git a/pomsky-lib/src/parse/parsers.rs b/pomsky-lib/src/parse/parsers.rs
index f9714b6..b5d5c44 100644
--- a/pomsky-lib/src/parse/parsers.rs
+++ b/pomsky-lib/src/parse/parsers.rs
@@ -60,7 +60,7 @@ pub(super) fn parse_modified<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Ru
         Disable,
     }
 
-    try_map2(
+    try_map_spanned(
         pair(
             many0(alt((
                 map(
@@ -124,7 +124,7 @@ pub(super) fn parse_modified<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Ru
 }
 
 pub(super) fn parse_or<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Rule<'i>> {
-    try_map2(
+    try_map_spanned(
         pair(opt(Token::Pipe), separated_list0(Token::Pipe, parse_sequence)),
         |(leading_pipe, mut rules)| {
             if rules.len() == 1 {
@@ -171,7 +171,7 @@ pub(super) fn parse_fixes<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Rule<
             let span = span.join(rule.span());
             Rule::Lookaround(Box::new(Lookaround::new(rule, kind, span)))
         }),
-        try_map2(
+        try_map_spanned(
             pair(parse_atom, many0(parse_repetition)),
             |(mut rule, repetitions)| {
                 if repetitions.len() > 64 {
@@ -408,23 +408,30 @@ pub(super) fn parse_char_class<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b,
 
     fn parse_char_group<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, CharGroup> {
         let span1 = input.span();
+        let mut warnings = vec![];
 
-        let (input, ranges) = many0(alt((
+        let (mut input, ranges) = many0(alt((
             parse_chars_or_range,
-            parse_dot,
-            try_map(
-                pair(opt(Token::Not), Token::Identifier),
-                |(not, (s, _))| {
-                    // FIXME: When this fails on a negative item, the span of the exclamation mark
-                    // is used instead of the identifier's span
-                    CharGroup::try_from_group_name(s, not.is_some())
-                        .map_err(ParseErrorKind::CharClass)
+            try_map_spanned(
+                pair(opt(Token::Not), alt((Token::Identifier, Token::Dot))),
+                |(not, (s, span))| {
+                    let (char_group, dw) = CharGroup::try_from_group_name(s, not.is_some())
+                        .map_err(|e| ParseErrorKind::CharClass(e).at(span))?;
+
+                    if let Some(dw) = dw {
+                        warnings.push(Warning { kind: WarningKind::Deprecation(dw), span })
+                    }
+                    Ok(char_group)
                 },
                 nom::Err::Failure,
             ),
             err(|| ParseErrorKind::CharClass(CharClassError::Invalid)),
         )))(input)?;
 
+        for warning in warnings {
+            input.add_warning(warning);
+        }
+
         let mut iter = ranges.into_iter();
         let mut class = iter.next().unwrap_or_else(|| CharGroup::Items(vec![]));
 
@@ -436,12 +443,6 @@ pub(super) fn parse_char_class<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b,
         Ok((input, class))
     }
 
-    fn parse_dot<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, CharGroup> {
-        let (mut input, (_, span)) = Token::Dot.parse(input)?;
-        input.add_warning(WarningKind::Deprecation(DeprecationWarning::Dot).at(span));
-        Ok((input, CharGroup::Dot))
-    }
-
     try_map(
         tuple((
             Token::OpenBracket,
@@ -539,7 +540,7 @@ pub(super) fn parse_range<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Rule<
     map(
         pair(
             "range",
-            try_map2(
+            try_map_spanned(
                 pair(
                     cut(separated_pair(Token::String, Token::Dash, Token::String)),
                     opt(parse_base),
@@ -747,7 +748,7 @@ where
     }
 }
 
-fn try_map2<'i, 'b, O1, O2, P, M, EM>(
+fn try_map_spanned<'i, 'b, O1, O2, P, M, EM>(
     mut parser: P,
     mut map: M,
     err_kind: EM,
diff --git a/pomsky-lib/src/warning.rs b/pomsky-lib/src/warning.rs
index 0884028..fb27e6f 100644
--- a/pomsky-lib/src/warning.rs
+++ b/pomsky-lib/src/warning.rs
@@ -56,6 +56,10 @@ pub enum DeprecationWarning {
     OldEndLiteral,
     /// The `[.]` dot
     Dot,
+    /// `[codepoint]`
+    Codepoint,
+    /// `[cp]`
+    Cp,
 }
 
 impl fmt::Display for DeprecationWarning {
@@ -71,6 +75,10 @@ impl fmt::Display for DeprecationWarning {
                 "The dot is deprecated. Use `Codepoint` (or `C`) to match any character;\n\
                 Use `![n]` to match anything except for line breaks.",
             ),
+            DeprecationWarning::Codepoint => {
+                f.write_str("`[codepoint]` is deprecated. Use `Codepoint` instead.")
+            }
+            DeprecationWarning::Cp => f.write_str("`[cp]` is deprecated. Use `C` instead."),
         }
     }
 }
diff --git a/pomsky-lib/tests/testcases/basics/cp.txt b/pomsky-lib/tests/testcases/basics/cp.txt
new file mode 100644
index 0000000..78681f8
--- /dev/null
+++ b/pomsky-lib/tests/testcases/basics/cp.txt
@@ -0,0 +1,5 @@
+[cp]
+-----
+[\s\S]
+WARNING: `[cp]` is deprecated. Use `C` instead.
+  at 1..3
\ No newline at end of file
diff --git a/pomsky-lib/tests/testcases/basics/dot.txt b/pomsky-lib/tests/testcases/basics/dot.txt
new file mode 100644
index 0000000..62d15a9
--- /dev/null
+++ b/pomsky-lib/tests/testcases/basics/dot.txt
@@ -0,0 +1,6 @@
+[.]
+-----
+.
+WARNING: The dot is deprecated. Use `Codepoint` (or `C`) to match any character;
+Use `![n]` to match anything except for line breaks.
+  at 1..2
\ No newline at end of file
diff --git a/pomsky-lib/tests/testcases/errors/unexpected_dot.txt b/pomsky-lib/tests/testcases/errors/unexpected_dot.txt
deleted file mode 100644
index 139f3a9..0000000
--- a/pomsky-lib/tests/testcases/errors/unexpected_dot.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-#! expect=error
-.
------
-ERROR: Unexpected dot
-HELP: The dot is deprecated. Use `Codepoint` to match any code point, or `![n]` to exclude line breaks
-SPAN: 0..1
\ No newline at end of file
diff --git a/pomsky-lib/tests/testcases/regex-diagnostics/dot.txt b/pomsky-lib/tests/testcases/regex-diagnostics/dot.txt
index 139f3a9..3df18dd 100644
--- a/pomsky-lib/tests/testcases/regex-diagnostics/dot.txt
+++ b/pomsky-lib/tests/testcases/regex-diagnostics/dot.txt
@@ -1,6 +1,6 @@
 #! expect=error
 .
 -----
-ERROR: Unexpected dot
-HELP: The dot is deprecated. Use `Codepoint` to match any code point, or `![n]` to exclude line breaks
+ERROR: The dot is not supported
+HELP: Use `Codepoint` to match any code point, or `![n]` to exclude line breaks
 SPAN: 0..1
\ No newline at end of file