Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to change error constructor #445

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions book/src/attributes/logos.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ This can be changed by using `#[logos(error = ErrorType)]` attribute on the enum
The type `ErrorType` can be any type that implements `Clone`, `PartialEq`,
`Default` and `From<E>` for each callback's error type.

`ErrorType` must implement the `Default` trait because invalid tokens, i.e.,
literals that do not match any variant, will produce `Err(ErrorType::default())`.

For example, here is an example using a custom error type:

```rust,no_run,noplayground
Expand All @@ -46,6 +43,24 @@ You can add error variants to `LexingError`,
and implement `From<E>` for each error type `E` that could
be returned by a callback. See [callbacks](../callbacks.md).

`ErrorType` must implement the `Default` trait because invalid tokens, i.e.,
literals that do not match any variant, will produce `Err(ErrorType::default())`.

Alternatively, you can provide a callback with the alternate syntax
`#[logos(error(ErrorType, callback = ...))]`, which allows you to include information
from the lexer such as the span where the error occurred:

```rust,no_run,noplayground
#[derive(Logos)]
#[logos(error(Range<usize>, callback = |lex| lex.span()))]
enum Token {
#[token("a")]
A,
#[token("b")]
B,
}
```

## Specifying path to logos

You can force the derive macro to use a different path to `Logos`'s crate
Expand Down
13 changes: 10 additions & 3 deletions examples/custom_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ use std::num::ParseIntError;
#[derive(Default, Debug, Clone, PartialEq)]
enum LexingError {
InvalidInteger(String),
NonAsciiCharacter(char),
#[default]
NonAsciiCharacter,
Other,
}

/// Error type returned by calling `lex.slice().parse()` to u8.
Expand All @@ -29,8 +30,14 @@ impl From<ParseIntError> for LexingError {
}
}

impl LexingError {
fn from_lexer<'src>(lex: &mut logos::Lexer<'src, Token>) -> Self {
LexingError::NonAsciiCharacter(lex.slice().chars().next().unwrap())
}
}

#[derive(Debug, Logos, PartialEq)]
#[logos(error = LexingError)]
#[logos(error(LexingError, LexingError::from_lexer))]
#[logos(skip r"[ \t]+")]
enum Token {
#[regex(r"[a-zA-Z]+")]
Expand Down Expand Up @@ -58,7 +65,7 @@ fn main() {
assert_eq!(lex.next(), Some(Ok(Token::Word)));
assert_eq!(lex.slice(), "J");

assert_eq!(lex.next(), Some(Err(LexingError::NonAsciiCharacter)));
assert_eq!(lex.next(), Some(Err(LexingError::NonAsciiCharacter('é'))));
assert_eq!(lex.slice(), "é");

assert_eq!(lex.next(), Some(Ok(Token::Word)));
Expand Down
22 changes: 21 additions & 1 deletion logos-codegen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ pub fn generate(input: TokenStream) -> TokenStream {

debug!("Parsing additional options (extras, source, ...)");

let error_type = parser.error_type.take();
let (error_type, error_callback) = parser::ErrorType::unwrap(parser.error_type.take());
let extras = parser.extras.take();
let source = parser
.source
Expand All @@ -231,6 +231,24 @@ pub fn generate(input: TokenStream) -> TokenStream {
.take()
.unwrap_or_else(|| parse_quote!(::logos));

let make_error_impl = match error_callback {
Some(leaf::Callback::Label(label)) => Some(quote! {
fn make_error(lex: &mut #logos_path::Lexer<'s, Self>) -> #error_type {
#label(lex)
}
}),
Some(leaf::Callback::Inline(inline)) => {
let leaf::InlineCallback { arg, body, .. } = *inline;

Some(quote! {
fn make_error(#arg: &mut #logos_path::Lexer<'s, Self>) -> #error_type {
#body
}
})
}
_ => None,
};

let generics = parser.generics();
let this = quote!(#name #generics);

Expand All @@ -246,6 +264,8 @@ pub fn generate(input: TokenStream) -> TokenStream {
fn lex(lex: &mut #logos_path::Lexer<'s, Self>) {
#body
}

#make_error_impl
}
}
};
Expand Down
71 changes: 71 additions & 0 deletions logos-codegen/src/parser/error_type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
use proc_macro2::{Span, TokenStream};
use syn::spanned::Spanned;
use syn::Ident;

use crate::leaf::Callback;
use crate::parser::nested::NestedValue;
use crate::parser::Parser;
use crate::util::MaybeVoid;

pub struct ErrorType {
pub ty: TokenStream,
pub callback: Option<Callback>,
}

impl ErrorType {
pub fn new(ty: TokenStream) -> Self {
Self { ty, callback: None }
}

pub fn named_attr(&mut self, name: Ident, value: NestedValue, parser: &mut Parser) {
match (name.to_string().as_str(), value) {
("callback", NestedValue::Assign(tokens)) => {
let span = tokens.span();
let callback = match parser.parse_callback(tokens) {
Some(callback) => callback,
None => {
parser.err("Not a valid callback", span);
return;
}
};

if let Some(previous) = self.callback.replace(callback) {
parser
.err(
"Callback has been already set",
span.join(name.span()).unwrap(),
)
.err("Previous callback set here", previous.span());
}
}
("callback", _) => {
parser.err("Expected: callback = ...", name.span());
}
(unknown, _) => {
parser.err(
format!(
"\
Unknown nested attribute: {}\n\
\n\
Expected one of: callback\
",
unknown
),
name.span(),
);
}
}
}

pub fn unwrap(opt: Option<Self>) -> (MaybeVoid, Option<Callback>) {
if let Some(Self { ty, callback }) = opt {
(MaybeVoid::Some(ty), callback)
} else {
(MaybeVoid::Void, None)
}
}

pub fn span(&self) -> Span {
self.ty.span()
}
}
66 changes: 63 additions & 3 deletions logos-codegen/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ use crate::util::{expect_punct, MaybeVoid};
use crate::LOGOS_ATTR;

mod definition;
mod error_type;
mod ignore_flags;
mod nested;
mod subpattern;
mod type_params;

pub use self::definition::{Definition, Literal};
pub use self::error_type::ErrorType;
pub use self::ignore_flags::IgnoreFlags;
use self::nested::{AttributeParser, Nested, NestedValue};
pub use self::subpattern::Subpatterns;
Expand All @@ -28,7 +30,7 @@ pub struct Parser {
pub source: Option<TokenStream>,
pub skips: Vec<Literal>,
pub extras: MaybeVoid,
pub error_type: MaybeVoid,
pub error_type: Option<ErrorType>,
pub subpatterns: Subpatterns,
pub logos_path: Option<TokenStream>,
types: TypeParams,
Expand Down Expand Up @@ -108,14 +110,72 @@ impl Parser {
NestedValue::Assign(value) => {
let span = value.span();

if let MaybeVoid::Some(previous) = parser.error_type.replace(value) {
let error_ty = ErrorType::new(value);

if let Some(previous) = parser.error_type.replace(error_ty) {
parser
.err("Error type can be defined only once", span)
.err("Previous definition here", previous.span());
}
}
NestedValue::Group(value) => {
let span = value.span();
let mut nested = AttributeParser::new(value);
let ty = match nested.parsed::<Type>() {
Some(Ok(ty)) => ty,
Some(Err(e)) => {
parser.err(e.to_string(), e.span());
return;
}
None => {
parser.err("Expected #[logos(error(SomeType))]", span);
return;
}
};

let mut error_type = {
use quote::ToTokens;
ErrorType::new(ty.into_token_stream())
};

for (position, next) in nested.enumerate() {
match next {
Nested::Unexpected(tokens) => {
parser.err("Unexpected token in attribute", tokens.span());
}
Nested::Unnamed(tokens) => match position {
0 => error_type.callback = parser.parse_callback(tokens),
_ => {
parser.err(
"\
Expected a named argument at this position\n\
\n\
hint: If you are trying to define a callback here use: callback = ...\
",
tokens.span(),
);
}
},
Nested::Named(name, value) => {
error_type.named_attr(name, value, parser);
}
}
}

if let Some(previous) = parser.error_type.replace(error_type) {
parser
.err("Error type can be defined only once", span)
.err("Previous definition here", previous.span());
}
}
_ => {
parser.err("Expected: #[logos(error = SomeType)]", span);
parser.err(
concat!(
"Expected: #[logos(error = SomeType)] or ",
"#[logos(error(SomeType[, callback))]"
),
span,
);
}
}),
("extras", |parser, span, value| match value {
Expand Down
4 changes: 2 additions & 2 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -378,11 +378,11 @@ where
self.token_end = self.source.find_boundary(self.token_end);
#[cfg(not(feature = "forbid_unsafe"))]
{
self.token = core::mem::ManuallyDrop::new(Some(Err(Token::Error::default())));
self.token = core::mem::ManuallyDrop::new(Some(Err(Token::make_error(self))));
}
#[cfg(feature = "forbid_unsafe")]
{
self.token = Some(Err(Token::Error::default()));
self.token = Some(Err(Token::make_error(self)));
}
}

Expand Down
6 changes: 6 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ pub trait Logos<'source>: Sized {
) -> Lexer<'source, Self> {
Lexer::with_extras(source, extras)
}

/// Create a new error. The default implementation uses `Error::default()`. If you want to make
/// your own, use `#[logos(error_callback = ...)]`
fn make_error(_lexer: &mut Lexer<'source, Self>) -> Self::Error {
Self::Error::default()
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
fn make_error(_lexer: &mut Lexer<'source, Self>) -> Self::Error {
Self::Error::default()
}
#[inline(always)]
fn make_error(_lexer: &mut Lexer<'source, Self>) -> Self::Error {
Self::Error::default()
}

We can probably try this to see if that helps the compiler.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Performance is unchanged, unfortunately:

group                                         default_before                         default_changes
-----                                         --------------                         ---------------
count_ok/identifiers                          1.20   620.0±18.12ns  1198.2 MB/sec    1.00   518.1±28.66ns  1433.8 MB/sec
count_ok/keywords_operators_and_punctators    1.12  1731.6±95.53ns  1173.6 MB/sec    1.00  1552.2±150.95ns  1309.3 MB/sec
count_ok/strings                              1.00    410.6±6.04ns  2023.2 MB/sec    1.40   574.0±12.94ns  1447.1 MB/sec
iterate/identifiers                           1.15   594.5±30.62ns  1249.7 MB/sec    1.00   518.0±27.87ns  1434.3 MB/sec
iterate/keywords_operators_and_punctators     1.04  1623.9±49.04ns  1251.5 MB/sec    1.00  1559.1±111.50ns  1303.5 MB/sec
iterate/strings                               1.00   412.3±32.70ns  2014.8 MB/sec    1.39   571.8±16.91ns  1452.7 MB/sec

}

/// Type that can be returned from a callback, informing the `Lexer`, to skip
Expand Down
11 changes: 9 additions & 2 deletions tests/tests/custom_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use tests::assert_lex;
enum LexingError {
NumberTooLong,
NumberNotEven(u32),
UnrecognisedCharacter(char),
#[default]
Other,
}
Expand All @@ -19,6 +20,12 @@ impl From<ParseIntError> for LexingError {
}
}

impl LexingError {
fn unrecognised_character<'src>(lexer: &mut logos::Lexer<'src, Token<'src>>) -> Self {
Self::UnrecognisedCharacter(lexer.slice().chars().next().unwrap())
}
}

fn parse_number(input: &str) -> Result<u32, LexingError> {
let num = input.parse::<u32>()?;
if num % 2 == 0 {
Expand All @@ -29,7 +36,7 @@ fn parse_number(input: &str) -> Result<u32, LexingError> {
}

#[derive(Logos, Debug, Clone, Copy, PartialEq)]
#[logos(error = LexingError)]
#[logos(error(LexingError, LexingError::unrecognised_character))]
enum Token<'a> {
#[regex(r"[0-9]+", |lex| parse_number(lex.slice()))]
Number(u32),
Expand All @@ -51,7 +58,7 @@ fn test() {
"1111111111111111111111111111111111111111111111111111111",
13..68,
),
(Err(LexingError::Other), ",", 68..69),
(Err(LexingError::UnrecognisedCharacter(',')), ",", 68..69),
],
);
}
Loading