diff --git a/book/src/attributes/logos.md b/book/src/attributes/logos.md index 38475df9..d27d9bfe 100644 --- a/book/src/attributes/logos.md +++ b/book/src/attributes/logos.md @@ -33,9 +33,6 @@ This can be changed by using `#[logos(error = ErrorType)]` attribute on the enum The type `ErrorType` can be any type that implements `Clone`, `PartialEq`, `Default` and `From` for each callback's error type. -`ErrorType` must implement the `Default` trait because invalid tokens, i.e., -literals that do not match any variant, will produce `Err(ErrorType::default())`. - For example, here is an example using a custom error type: ```rust,no_run,noplayground @@ -46,6 +43,24 @@ You can add error variants to `LexingError`, and implement `From` for each error type `E` that could be returned by a callback. See [callbacks](../callbacks.md). +`ErrorType` must implement the `Default` trait because invalid tokens, i.e., +literals that do not match any variant, will produce `Err(ErrorType::default())`. + +Alternatively, you can provide a callback with the alternate syntax +`#[logos(error(ErrorType, callback = ...))]`, which allows you to include information +from the lexer such as the span where the error occurred: + +```rust,no_run,noplayground +#[derive(Logos)] +#[logos(error(Range, callback = |lex| lex.span()))] +enum Token { + #[token("a")] + A, + #[token("b")] + B, +} +``` + ## Specifying path to logos You can force the derive macro to use a different path to `Logos`'s crate diff --git a/examples/custom_error.rs b/examples/custom_error.rs index 757ca0db..a36a2bfe 100644 --- a/examples/custom_error.rs +++ b/examples/custom_error.rs @@ -14,8 +14,9 @@ use std::num::ParseIntError; #[derive(Default, Debug, Clone, PartialEq)] enum LexingError { InvalidInteger(String), + NonAsciiCharacter(char), #[default] - NonAsciiCharacter, + Other, } /// Error type returned by calling `lex.slice().parse()` to u8. @@ -29,8 +30,14 @@ impl From for LexingError { } } +impl LexingError { + fn from_lexer<'src>(lex: &mut logos::Lexer<'src, Token>) -> Self { + LexingError::NonAsciiCharacter(lex.slice().chars().next().unwrap()) + } +} + #[derive(Debug, Logos, PartialEq)] -#[logos(error = LexingError)] +#[logos(error(LexingError, LexingError::from_lexer))] #[logos(skip r"[ \t]+")] enum Token { #[regex(r"[a-zA-Z]+")] @@ -58,7 +65,7 @@ fn main() { assert_eq!(lex.next(), Some(Ok(Token::Word))); assert_eq!(lex.slice(), "J"); - assert_eq!(lex.next(), Some(Err(LexingError::NonAsciiCharacter))); + assert_eq!(lex.next(), Some(Err(LexingError::NonAsciiCharacter('é')))); assert_eq!(lex.slice(), "é"); assert_eq!(lex.next(), Some(Ok(Token::Word))); diff --git a/logos-codegen/src/lib.rs b/logos-codegen/src/lib.rs index 2b2d3db2..0b5c2fff 100644 --- a/logos-codegen/src/lib.rs +++ b/logos-codegen/src/lib.rs @@ -216,7 +216,7 @@ pub fn generate(input: TokenStream) -> TokenStream { debug!("Parsing additional options (extras, source, ...)"); - let error_type = parser.error_type.take(); + let (error_type, error_callback) = parser::ErrorType::unwrap(parser.error_type.take()); let extras = parser.extras.take(); let source = parser .source @@ -231,6 +231,30 @@ pub fn generate(input: TokenStream) -> TokenStream { .take() .unwrap_or_else(|| parse_quote!(::logos)); + let make_error_impl = match error_callback { + Some(leaf::Callback::Label(label)) => Some(quote! { + #[inline] + fn make_error(mut lex: &mut #logos_path::Lexer<'s, Self>) { + use #logos_path::internal::LexerInternal; + let error = #label(&mut lex); + lex.set(Err(error)); + } + }), + Some(leaf::Callback::Inline(inline)) => { + let leaf::InlineCallback { arg, body, .. } = *inline; + + Some(quote! { + #[inline] + fn make_error(#arg: &mut #logos_path::Lexer<'s, Self>) { + use #logos_path::internal::LexerInternal; + let error = #body; + #arg.set(Err(error)) + } + }) + } + _ => None, + }; + let generics = parser.generics(); let this = quote!(#name #generics); @@ -246,6 +270,8 @@ pub fn generate(input: TokenStream) -> TokenStream { fn lex(lex: &mut #logos_path::Lexer<'s, Self>) { #body } + + #make_error_impl } } }; diff --git a/logos-codegen/src/parser/error_type.rs b/logos-codegen/src/parser/error_type.rs new file mode 100644 index 00000000..e5ccaa0f --- /dev/null +++ b/logos-codegen/src/parser/error_type.rs @@ -0,0 +1,71 @@ +use proc_macro2::{Span, TokenStream}; +use syn::spanned::Spanned; +use syn::Ident; + +use crate::leaf::Callback; +use crate::parser::nested::NestedValue; +use crate::parser::Parser; +use crate::util::MaybeVoid; + +pub struct ErrorType { + pub ty: TokenStream, + pub callback: Option, +} + +impl ErrorType { + pub fn new(ty: TokenStream) -> Self { + Self { ty, callback: None } + } + + pub fn named_attr(&mut self, name: Ident, value: NestedValue, parser: &mut Parser) { + match (name.to_string().as_str(), value) { + ("callback", NestedValue::Assign(tokens)) => { + let span = tokens.span(); + let callback = match parser.parse_callback(tokens) { + Some(callback) => callback, + None => { + parser.err("Not a valid callback", span); + return; + } + }; + + if let Some(previous) = self.callback.replace(callback) { + parser + .err( + "Callback has been already set", + span.join(name.span()).unwrap(), + ) + .err("Previous callback set here", previous.span()); + } + } + ("callback", _) => { + parser.err("Expected: callback = ...", name.span()); + } + (unknown, _) => { + parser.err( + format!( + "\ + Unknown nested attribute: {}\n\ + \n\ + Expected one of: callback\ + ", + unknown + ), + name.span(), + ); + } + } + } + + pub fn unwrap(opt: Option) -> (MaybeVoid, Option) { + if let Some(Self { ty, callback }) = opt { + (MaybeVoid::Some(ty), callback) + } else { + (MaybeVoid::Void, None) + } + } + + pub fn span(&self) -> Span { + self.ty.span() + } +} diff --git a/logos-codegen/src/parser/mod.rs b/logos-codegen/src/parser/mod.rs index 3ad7202e..9fb60cd3 100644 --- a/logos-codegen/src/parser/mod.rs +++ b/logos-codegen/src/parser/mod.rs @@ -10,12 +10,14 @@ use crate::util::{expect_punct, MaybeVoid}; use crate::LOGOS_ATTR; mod definition; +mod error_type; mod ignore_flags; mod nested; mod subpattern; mod type_params; pub use self::definition::{Definition, Literal}; +pub use self::error_type::ErrorType; pub use self::ignore_flags::IgnoreFlags; use self::nested::{AttributeParser, Nested, NestedValue}; pub use self::subpattern::Subpatterns; @@ -28,7 +30,7 @@ pub struct Parser { pub source: Option, pub skips: Vec, pub extras: MaybeVoid, - pub error_type: MaybeVoid, + pub error_type: Option, pub subpatterns: Subpatterns, pub logos_path: Option, types: TypeParams, @@ -108,14 +110,72 @@ impl Parser { NestedValue::Assign(value) => { let span = value.span(); - if let MaybeVoid::Some(previous) = parser.error_type.replace(value) { + let error_ty = ErrorType::new(value); + + if let Some(previous) = parser.error_type.replace(error_ty) { + parser + .err("Error type can be defined only once", span) + .err("Previous definition here", previous.span()); + } + } + NestedValue::Group(value) => { + let span = value.span(); + let mut nested = AttributeParser::new(value); + let ty = match nested.parsed::() { + Some(Ok(ty)) => ty, + Some(Err(e)) => { + parser.err(e.to_string(), e.span()); + return; + } + None => { + parser.err("Expected #[logos(error(SomeType))]", span); + return; + } + }; + + let mut error_type = { + use quote::ToTokens; + ErrorType::new(ty.into_token_stream()) + }; + + for (position, next) in nested.enumerate() { + match next { + Nested::Unexpected(tokens) => { + parser.err("Unexpected token in attribute", tokens.span()); + } + Nested::Unnamed(tokens) => match position { + 0 => error_type.callback = parser.parse_callback(tokens), + _ => { + parser.err( + "\ + Expected a named argument at this position\n\ + \n\ + hint: If you are trying to define a callback here use: callback = ...\ + ", + tokens.span(), + ); + } + }, + Nested::Named(name, value) => { + error_type.named_attr(name, value, parser); + } + } + } + + if let Some(previous) = parser.error_type.replace(error_type) { parser .err("Error type can be defined only once", span) .err("Previous definition here", previous.span()); } } _ => { - parser.err("Expected: #[logos(error = SomeType)]", span); + parser.err( + concat!( + "Expected: #[logos(error = SomeType)] or ", + "#[logos(error(SomeType[, callback))]" + ), + span, + ); } }), ("extras", |parser, span, value| match value { diff --git a/src/internal.rs b/src/internal.rs index 3a26f7c5..dddc5b2c 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -72,7 +72,7 @@ impl<'s, T: Logos<'s>> CallbackResult<'s, (), T> for bool { { match self { true => lex.set(Ok(c(()))), - false => lex.set(Err(T::Error::default())), + false => T::make_error(lex), } } } @@ -85,7 +85,7 @@ impl<'s, P, T: Logos<'s>> CallbackResult<'s, P, T> for Option

{ { match self { Some(product) => lex.set(Ok(c(product))), - None => lex.set(Err(T::Error::default())), + None => T::make_error(lex), } } } diff --git a/src/lexer.rs b/src/lexer.rs index 22fd3b5a..273d2065 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -376,14 +376,7 @@ where #[inline] fn error(&mut self) { self.token_end = self.source.find_boundary(self.token_end); - #[cfg(not(feature = "forbid_unsafe"))] - { - self.token = core::mem::ManuallyDrop::new(Some(Err(Token::Error::default()))); - } - #[cfg(feature = "forbid_unsafe")] - { - self.token = Some(Err(Token::Error::default())); - } + Token::make_error(self); } #[inline] diff --git a/src/lib.rs b/src/lib.rs index 6d58a490..3ebee366 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,6 +76,13 @@ pub trait Logos<'source>: Sized { ) -> Lexer<'source, Self> { Lexer::with_extras(source, extras) } + + #[inline(always)] + #[doc(hidden)] + fn make_error(lexer: &mut Lexer<'source, Self>) { + use internal::LexerInternal as _; + lexer.set(Err(Self::Error::default())) + } } /// Type that can be returned from a callback, informing the `Lexer`, to skip diff --git a/tests/tests/custom_error.rs b/tests/tests/custom_error.rs index 0a0e7353..a2b93fce 100644 --- a/tests/tests/custom_error.rs +++ b/tests/tests/custom_error.rs @@ -6,6 +6,7 @@ use tests::assert_lex; enum LexingError { NumberTooLong, NumberNotEven(u32), + UnrecognisedCharacter(char), #[default] Other, } @@ -19,6 +20,12 @@ impl From for LexingError { } } +impl LexingError { + fn unrecognised_character<'src>(lexer: &mut logos::Lexer<'src, Token<'src>>) -> Self { + Self::UnrecognisedCharacter(lexer.slice().chars().next().unwrap()) + } +} + fn parse_number(input: &str) -> Result { let num = input.parse::()?; if num % 2 == 0 { @@ -29,7 +36,7 @@ fn parse_number(input: &str) -> Result { } #[derive(Logos, Debug, Clone, Copy, PartialEq)] -#[logos(error = LexingError)] +#[logos(error(LexingError, LexingError::unrecognised_character))] enum Token<'a> { #[regex(r"[0-9]+", |lex| parse_number(lex.slice()))] Number(u32), @@ -51,7 +58,7 @@ fn test() { "1111111111111111111111111111111111111111111111111111111", 13..68, ), - (Err(LexingError::Other), ",", 68..69), + (Err(LexingError::UnrecognisedCharacter(',')), ",", 68..69), ], ); }