Skip to content

Commit

Permalink
Add diagnostics for [.], [codepoint] and [cp]
Browse files Browse the repository at this point in the history
  • Loading branch information
Aloso committed Aug 3, 2022
1 parent bce3d1b commit d3b3a6a
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 42 deletions.
4 changes: 1 addition & 3 deletions pomsky-lib/src/error/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,7 @@ impl Diagnostic {
Some(format!("Switch the numbers: {}-{}", part2.trim(), part1.trim()))
}
ParseErrorKind::Dot => Some(
"The dot is deprecated. Use `Codepoint` to match any code point, \
or `![n]` to exclude line breaks"
.into(),
"Use `Codepoint` to match any code point, or `![n]` to exclude line breaks".into(),
),
#[cfg(feature = "suggestions")]
ParseErrorKind::CharClass(CharClassError::UnknownNamedClass {
Expand Down
2 changes: 1 addition & 1 deletion pomsky-lib/src/error/parse_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ pub(crate) enum ParseErrorKind {
UnknownToken,
#[error(transparent)]
LexErrorWithMessage(LexErrorMsg),
#[error("Unexpected dot")]
#[error("The dot is not supported")] // this is for a dot *not* enclosed in brackets
Dot,
#[error("Unexpected keyword `{}`", .0)]
KeywordAfterLet(String),
Expand Down
27 changes: 17 additions & 10 deletions pomsky-lib/src/exprs/char_class/char_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use std::fmt::Write;

use crate::error::CharClassError;
use crate::{error::CharClassError, warning::DeprecationWarning};

use super::unicode::{Category, CodeBlock, OtherProperties, Script};

Expand Down Expand Up @@ -59,10 +59,13 @@ impl CharGroup {
/// If the name is uppercase (and not `R`), we just assume that it is a
/// Unicode category, script or block. This needs to be fixed at one
/// point!
pub(crate) fn try_from_group_name(name: &str, negative: bool) -> Result<Self, CharClassError> {
pub(crate) fn try_from_group_name(
name: &str,
negative: bool,
) -> Result<(Self, Option<DeprecationWarning>), CharClassError> {
Ok(match name {
_ if name == "ascii" || name.starts_with("ascii_") => {
CharGroup::Items(super::ascii::parse_ascii_group(name, negative)?)
(CharGroup::Items(super::ascii::parse_ascii_group(name, negative)?), None)
}

"codepoint" | "cp" | "." if negative => {
Expand All @@ -74,13 +77,17 @@ impl CharGroup {
return Err(CharClassError::Keyword(name.to_string()));
}

"codepoint" | "cp" => CharGroup::CodePoint,
"." => CharGroup::Dot,

_ => CharGroup::Items(vec![GroupItem::Named {
name: super::unicode::parse_group_name(name)?,
negative,
}]),
"codepoint" => (CharGroup::CodePoint, Some(DeprecationWarning::Codepoint)),
"cp" => (CharGroup::CodePoint, Some(DeprecationWarning::Cp)),
"." => (CharGroup::Dot, Some(DeprecationWarning::Dot)),

_ => (
CharGroup::Items(vec![GroupItem::Named {
name: super::unicode::parse_group_name(name)?,
negative,
}]),
None,
),
})
}

Expand Down
41 changes: 21 additions & 20 deletions pomsky-lib/src/parse/parsers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub(super) fn parse_modified<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Ru
Disable,
}

try_map2(
try_map_spanned(
pair(
many0(alt((
map(
Expand Down Expand Up @@ -124,7 +124,7 @@ pub(super) fn parse_modified<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Ru
}

pub(super) fn parse_or<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Rule<'i>> {
try_map2(
try_map_spanned(
pair(opt(Token::Pipe), separated_list0(Token::Pipe, parse_sequence)),
|(leading_pipe, mut rules)| {
if rules.len() == 1 {
Expand Down Expand Up @@ -171,7 +171,7 @@ pub(super) fn parse_fixes<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Rule<
let span = span.join(rule.span());
Rule::Lookaround(Box::new(Lookaround::new(rule, kind, span)))
}),
try_map2(
try_map_spanned(
pair(parse_atom, many0(parse_repetition)),
|(mut rule, repetitions)| {
if repetitions.len() > 64 {
Expand Down Expand Up @@ -408,23 +408,30 @@ pub(super) fn parse_char_class<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b,

fn parse_char_group<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, CharGroup> {
let span1 = input.span();
let mut warnings = vec![];

let (input, ranges) = many0(alt((
let (mut input, ranges) = many0(alt((
parse_chars_or_range,
parse_dot,
try_map(
pair(opt(Token::Not), Token::Identifier),
|(not, (s, _))| {
// FIXME: When this fails on a negative item, the span of the exclamation mark
// is used instead of the identifier's span
CharGroup::try_from_group_name(s, not.is_some())
.map_err(ParseErrorKind::CharClass)
try_map_spanned(
pair(opt(Token::Not), alt((Token::Identifier, Token::Dot))),
|(not, (s, span))| {
let (char_group, dw) = CharGroup::try_from_group_name(s, not.is_some())
.map_err(|e| ParseErrorKind::CharClass(e).at(span))?;

if let Some(dw) = dw {
warnings.push(Warning { kind: WarningKind::Deprecation(dw), span })
}
Ok(char_group)
},
nom::Err::Failure,
),
err(|| ParseErrorKind::CharClass(CharClassError::Invalid)),
)))(input)?;

for warning in warnings {
input.add_warning(warning);
}

let mut iter = ranges.into_iter();
let mut class = iter.next().unwrap_or_else(|| CharGroup::Items(vec![]));

Expand All @@ -436,12 +443,6 @@ pub(super) fn parse_char_class<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b,
Ok((input, class))
}

fn parse_dot<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, CharGroup> {
let (mut input, (_, span)) = Token::Dot.parse(input)?;
input.add_warning(WarningKind::Deprecation(DeprecationWarning::Dot).at(span));
Ok((input, CharGroup::Dot))
}

try_map(
tuple((
Token::OpenBracket,
Expand Down Expand Up @@ -539,7 +540,7 @@ pub(super) fn parse_range<'i, 'b>(input: Input<'i, 'b>) -> PResult<'i, 'b, Rule<
map(
pair(
"range",
try_map2(
try_map_spanned(
pair(
cut(separated_pair(Token::String, Token::Dash, Token::String)),
opt(parse_base),
Expand Down Expand Up @@ -747,7 +748,7 @@ where
}
}

fn try_map2<'i, 'b, O1, O2, P, M, EM>(
fn try_map_spanned<'i, 'b, O1, O2, P, M, EM>(
mut parser: P,
mut map: M,
err_kind: EM,
Expand Down
8 changes: 8 additions & 0 deletions pomsky-lib/src/warning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ pub enum DeprecationWarning {
OldEndLiteral,
/// The `[.]` dot
Dot,
/// `[codepoint]`
Codepoint,
/// `[cp]`
Cp,
}

impl fmt::Display for DeprecationWarning {
Expand All @@ -71,6 +75,10 @@ impl fmt::Display for DeprecationWarning {
"The dot is deprecated. Use `Codepoint` (or `C`) to match any character;\n\
Use `![n]` to match anything except for line breaks.",
),
DeprecationWarning::Codepoint => {
f.write_str("`[codepoint]` is deprecated. Use `Codepoint` instead.")
}
DeprecationWarning::Cp => f.write_str("`[cp]` is deprecated. Use `C` instead."),
}
}
}
5 changes: 5 additions & 0 deletions pomsky-lib/tests/testcases/basics/cp.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[cp]
-----
[\s\S]
WARNING: `[cp]` is deprecated. Use `C` instead.
at 1..3
6 changes: 6 additions & 0 deletions pomsky-lib/tests/testcases/basics/dot.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[.]
-----
.
WARNING: The dot is deprecated. Use `Codepoint` (or `C`) to match any character;
Use `![n]` to match anything except for line breaks.
at 1..2
6 changes: 0 additions & 6 deletions pomsky-lib/tests/testcases/errors/unexpected_dot.txt

This file was deleted.

4 changes: 2 additions & 2 deletions pomsky-lib/tests/testcases/regex-diagnostics/dot.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! expect=error
.
-----
ERROR: Unexpected dot
HELP: The dot is deprecated. Use `Codepoint` to match any code point, or `![n]` to exclude line breaks
ERROR: The dot is not supported
HELP: Use `Codepoint` to match any code point, or `![n]` to exclude line breaks
SPAN: 0..1

0 comments on commit d3b3a6a

Please sign in to comment.