From 0b533407a94c7b74e8f3550bd12f87f87e9ec099 Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Tue, 3 Dec 2024 00:43:38 +1100 Subject: [PATCH 01/14] Added initial functionality to error callback --- logos-codegen/src/lib.rs | 20 ++++++++++++++++++++ logos-codegen/src/parser/mod.rs | 20 ++++++++++++++++++++ src/lexer.rs | 4 ++-- src/lib.rs | 6 ++++++ 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/logos-codegen/src/lib.rs b/logos-codegen/src/lib.rs index 2b2d3db2..47e69e19 100644 --- a/logos-codegen/src/lib.rs +++ b/logos-codegen/src/lib.rs @@ -231,6 +231,24 @@ pub fn generate(input: TokenStream) -> TokenStream { .take() .unwrap_or_else(|| parse_quote!(::logos)); + let make_error_impl = match parser.error_callback.take() { + Some(leaf::Callback::Label(label)) => Some(quote! { + fn make_error(lex: &#logos_path::Lexer<'s, Self>) -> #error_type { + #label(lex) + } + }), + Some(leaf::Callback::Inline(inline)) => { + let leaf::InlineCallback { arg, body, .. } = *inline; + + Some(quote! { + fn make_error(#arg: &#logos_path::Lexer<'s, Self>) -> #error_type { + #body + } + }) + } + _ => None, + }; + let generics = parser.generics(); let this = quote!(#name #generics); @@ -246,6 +264,8 @@ pub fn generate(input: TokenStream) -> TokenStream { fn lex(lex: &mut #logos_path::Lexer<'s, Self>) { #body } + + #make_error_impl } } }; diff --git a/logos-codegen/src/parser/mod.rs b/logos-codegen/src/parser/mod.rs index 3ad7202e..85832a6f 100644 --- a/logos-codegen/src/parser/mod.rs +++ b/logos-codegen/src/parser/mod.rs @@ -29,6 +29,7 @@ pub struct Parser { pub skips: Vec, pub extras: MaybeVoid, pub error_type: MaybeVoid, + pub error_callback: Option, pub subpatterns: Subpatterns, pub logos_path: Option, types: TypeParams, @@ -118,6 +119,25 @@ impl Parser { parser.err("Expected: #[logos(error = SomeType)]", span); } }), + ("error_callback", |parser, span, value| match value { + NestedValue::Assign(value) => { + let callback = match parser.parse_callback(value) { + Some(callback) => callback, + None => { + parser.err("Not a valid callback", span); + return; + } + }; + if let Some(previous) = parser.error_callback.replace(callback) { + parser + .err("Error callback can be defined only once", span) + .err("Previous definition here", previous.span()); + } + } + _ => { + parser.err("Expected #[logos(error_callback = ...)]", span); + } + }), ("extras", |parser, span, value| match value { NestedValue::Assign(value) => { let span = value.span(); diff --git a/src/lexer.rs b/src/lexer.rs index 22fd3b5a..c1b7458d 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -378,11 +378,11 @@ where self.token_end = self.source.find_boundary(self.token_end); #[cfg(not(feature = "forbid_unsafe"))] { - self.token = core::mem::ManuallyDrop::new(Some(Err(Token::Error::default()))); + self.token = core::mem::ManuallyDrop::new(Some(Err(Token::make_error(&self)))); } #[cfg(feature = "forbid_unsafe")] { - self.token = Some(Err(Token::Error::default())); + self.token = Some(Err(Token::make_error(&self))); } } diff --git a/src/lib.rs b/src/lib.rs index 6d58a490..27d3f847 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,6 +76,12 @@ pub trait Logos<'source>: Sized { ) -> Lexer<'source, Self> { Lexer::with_extras(source, extras) } + + /// Create a new error. The default implementation uses `Error::default()`. If you want to make + /// your own, use `#[logos(error_callback = ...)]` + fn make_error(_lexer: &Lexer<'source, Self>) -> Self::Error { + Self::Error::default() + } } /// Type that can be returned from a callback, informing the `Lexer`, to skip From 63169b53067b6230d0b98cbba125833da315ed9a Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Tue, 3 Dec 2024 01:15:34 +1100 Subject: [PATCH 02/14] Changed Logos::make_error to take a mutable reference --- logos-codegen/src/lib.rs | 4 ++-- src/lexer.rs | 4 ++-- src/lib.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/logos-codegen/src/lib.rs b/logos-codegen/src/lib.rs index 47e69e19..70700867 100644 --- a/logos-codegen/src/lib.rs +++ b/logos-codegen/src/lib.rs @@ -233,7 +233,7 @@ pub fn generate(input: TokenStream) -> TokenStream { let make_error_impl = match parser.error_callback.take() { Some(leaf::Callback::Label(label)) => Some(quote! { - fn make_error(lex: &#logos_path::Lexer<'s, Self>) -> #error_type { + fn make_error(lex: &mut #logos_path::Lexer<'s, Self>) -> #error_type { #label(lex) } }), @@ -241,7 +241,7 @@ pub fn generate(input: TokenStream) -> TokenStream { let leaf::InlineCallback { arg, body, .. } = *inline; Some(quote! { - fn make_error(#arg: &#logos_path::Lexer<'s, Self>) -> #error_type { + fn make_error(#arg: &mut #logos_path::Lexer<'s, Self>) -> #error_type { #body } }) diff --git a/src/lexer.rs b/src/lexer.rs index c1b7458d..3f7df71c 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -378,11 +378,11 @@ where self.token_end = self.source.find_boundary(self.token_end); #[cfg(not(feature = "forbid_unsafe"))] { - self.token = core::mem::ManuallyDrop::new(Some(Err(Token::make_error(&self)))); + self.token = core::mem::ManuallyDrop::new(Some(Err(Token::make_error(self)))); } #[cfg(feature = "forbid_unsafe")] { - self.token = Some(Err(Token::make_error(&self))); + self.token = Some(Err(Token::make_error(self))); } } diff --git a/src/lib.rs b/src/lib.rs index 27d3f847..b209a91e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,7 +79,7 @@ pub trait Logos<'source>: Sized { /// Create a new error. The default implementation uses `Error::default()`. If you want to make /// your own, use `#[logos(error_callback = ...)]` - fn make_error(_lexer: &Lexer<'source, Self>) -> Self::Error { + fn make_error(_lexer: &mut Lexer<'source, Self>) -> Self::Error { Self::Error::default() } } From fe62bcbf8457c3357b06ed9ad8ffeb1fc17b12e3 Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Tue, 3 Dec 2024 01:16:19 +1100 Subject: [PATCH 03/14] Added error_callback usage to custom_error.rs --- examples/custom_error.rs | 12 ++++++++++-- examples/t.rs | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 examples/t.rs diff --git a/examples/custom_error.rs b/examples/custom_error.rs index 757ca0db..7aa936b4 100644 --- a/examples/custom_error.rs +++ b/examples/custom_error.rs @@ -14,8 +14,9 @@ use std::num::ParseIntError; #[derive(Default, Debug, Clone, PartialEq)] enum LexingError { InvalidInteger(String), + NonAsciiCharacter(char), #[default] - NonAsciiCharacter, + Other, } /// Error type returned by calling `lex.slice().parse()` to u8. @@ -29,8 +30,15 @@ impl From for LexingError { } } +impl LexingError { + fn from_lexer<'src>(lex: &mut logos::Lexer<'src, Token>) -> Self { + LexingError::NonAsciiCharacter(lex.slice().chars().next().unwrap()) + } +} + #[derive(Debug, Logos, PartialEq)] #[logos(error = LexingError)] +#[logos(error_callback = LexingError::from_lexer)] #[logos(skip r"[ \t]+")] enum Token { #[regex(r"[a-zA-Z]+")] @@ -58,7 +66,7 @@ fn main() { assert_eq!(lex.next(), Some(Ok(Token::Word))); assert_eq!(lex.slice(), "J"); - assert_eq!(lex.next(), Some(Err(LexingError::NonAsciiCharacter))); + assert_eq!(lex.next(), Some(Err(LexingError::NonAsciiCharacter('é')))); assert_eq!(lex.slice(), "é"); assert_eq!(lex.next(), Some(Ok(Token::Word))); diff --git a/examples/t.rs b/examples/t.rs new file mode 100644 index 00000000..f27da8d5 --- /dev/null +++ b/examples/t.rs @@ -0,0 +1,17 @@ +use logos::Logos; + +#[derive(Logos, Debug)] +#[logos(error = String)] +#[logos(error_callback = |lex| { + format!("Syntax error at {:?}: unrecognised character '{}'", lex.span(), lex.slice()) +})] +enum Token { + #[token("a")] + A, + #[token("b")] + B, +} + +fn main() { + println!("{:?}", Token::lexer("ababcab").collect::>()) +} From 5d80583c295872ed0745d286e6ee5d4de84670e5 Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Tue, 3 Dec 2024 01:29:53 +1100 Subject: [PATCH 04/14] changed custom_error test to use error_callback --- tests/tests/custom_error.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/tests/custom_error.rs b/tests/tests/custom_error.rs index 0a0e7353..c6f15da9 100644 --- a/tests/tests/custom_error.rs +++ b/tests/tests/custom_error.rs @@ -6,6 +6,7 @@ use tests::assert_lex; enum LexingError { NumberTooLong, NumberNotEven(u32), + UnrecognisedCharacter(char), #[default] Other, } @@ -19,6 +20,12 @@ impl From for LexingError { } } +impl LexingError { + fn unrecognised_character<'src>(lexer: &mut logos::Lexer<'src, Token<'src>>) -> Self { + Self::UnrecognisedCharacter(lexer.slice().chars().next().unwrap()) + } +} + fn parse_number(input: &str) -> Result { let num = input.parse::()?; if num % 2 == 0 { @@ -30,6 +37,7 @@ fn parse_number(input: &str) -> Result { #[derive(Logos, Debug, Clone, Copy, PartialEq)] #[logos(error = LexingError)] +#[logos(error_callback = LexingError::unrecognised_character)] enum Token<'a> { #[regex(r"[0-9]+", |lex| parse_number(lex.slice()))] Number(u32), @@ -51,7 +59,7 @@ fn test() { "1111111111111111111111111111111111111111111111111111111", 13..68, ), - (Err(LexingError::Other), ",", 68..69), + (Err(LexingError::UnrecognisedCharacter(',')), ",", 68..69), ], ); } From 2ee7e149fbe839dc6d2811918865afe3c5547c5b Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Tue, 3 Dec 2024 01:33:20 +1100 Subject: [PATCH 05/14] Updated book --- book/src/attributes/logos.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/book/src/attributes/logos.md b/book/src/attributes/logos.md index 38475df9..93ec3603 100644 --- a/book/src/attributes/logos.md +++ b/book/src/attributes/logos.md @@ -34,7 +34,8 @@ The type `ErrorType` can be any type that implements `Clone`, `PartialEq`, `Default` and `From` for each callback's error type. `ErrorType` must implement the `Default` trait because invalid tokens, i.e., -literals that do not match any variant, will produce `Err(ErrorType::default())`. +literals that do not match any variant, will produce `Err(ErrorType::default())`, +unless you specify a different constructor with `#[logos(error_callback = ...)]`. For example, here is an example using a custom error type: From a0a358eca6e97020d8ee71219e2c15ae5b058873 Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Tue, 3 Dec 2024 13:15:25 +1100 Subject: [PATCH 06/14] Removed temporary example --- examples/t.rs | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 examples/t.rs diff --git a/examples/t.rs b/examples/t.rs deleted file mode 100644 index f27da8d5..00000000 --- a/examples/t.rs +++ /dev/null @@ -1,17 +0,0 @@ -use logos::Logos; - -#[derive(Logos, Debug)] -#[logos(error = String)] -#[logos(error_callback = |lex| { - format!("Syntax error at {:?}: unrecognised character '{}'", lex.span(), lex.slice()) -})] -enum Token { - #[token("a")] - A, - #[token("b")] - B, -} - -fn main() { - println!("{:?}", Token::lexer("ababcab").collect::>()) -} From 582cfcc8fa3ee2fb5177df28541036d1ef36c99c Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Tue, 3 Dec 2024 13:17:35 +1100 Subject: [PATCH 07/14] Removed error_callback attr, added to error `#[logos(error = SomeType)]` `#[logos(error_callback = callback)]` is now expressed as `#[logos(error(SomeType, callback))]` --- examples/custom_error.rs | 3 +- logos-codegen/src/lib.rs | 4 +- logos-codegen/src/parser/error_type.rs | 71 ++++++++++++++++++++++++++ logos-codegen/src/parser/mod.rs | 70 +++++++++++++++++++------ tests/tests/custom_error.rs | 3 +- 5 files changed, 130 insertions(+), 21 deletions(-) create mode 100644 logos-codegen/src/parser/error_type.rs diff --git a/examples/custom_error.rs b/examples/custom_error.rs index 7aa936b4..a36a2bfe 100644 --- a/examples/custom_error.rs +++ b/examples/custom_error.rs @@ -37,8 +37,7 @@ impl LexingError { } #[derive(Debug, Logos, PartialEq)] -#[logos(error = LexingError)] -#[logos(error_callback = LexingError::from_lexer)] +#[logos(error(LexingError, LexingError::from_lexer))] #[logos(skip r"[ \t]+")] enum Token { #[regex(r"[a-zA-Z]+")] diff --git a/logos-codegen/src/lib.rs b/logos-codegen/src/lib.rs index 70700867..b6af8ac2 100644 --- a/logos-codegen/src/lib.rs +++ b/logos-codegen/src/lib.rs @@ -216,7 +216,7 @@ pub fn generate(input: TokenStream) -> TokenStream { debug!("Parsing additional options (extras, source, ...)"); - let error_type = parser.error_type.take(); + let (error_type, error_callback) = parser::ErrorType::unwrap(parser.error_type.take()); let extras = parser.extras.take(); let source = parser .source @@ -231,7 +231,7 @@ pub fn generate(input: TokenStream) -> TokenStream { .take() .unwrap_or_else(|| parse_quote!(::logos)); - let make_error_impl = match parser.error_callback.take() { + let make_error_impl = match error_callback { Some(leaf::Callback::Label(label)) => Some(quote! { fn make_error(lex: &mut #logos_path::Lexer<'s, Self>) -> #error_type { #label(lex) diff --git a/logos-codegen/src/parser/error_type.rs b/logos-codegen/src/parser/error_type.rs new file mode 100644 index 00000000..e5ccaa0f --- /dev/null +++ b/logos-codegen/src/parser/error_type.rs @@ -0,0 +1,71 @@ +use proc_macro2::{Span, TokenStream}; +use syn::spanned::Spanned; +use syn::Ident; + +use crate::leaf::Callback; +use crate::parser::nested::NestedValue; +use crate::parser::Parser; +use crate::util::MaybeVoid; + +pub struct ErrorType { + pub ty: TokenStream, + pub callback: Option, +} + +impl ErrorType { + pub fn new(ty: TokenStream) -> Self { + Self { ty, callback: None } + } + + pub fn named_attr(&mut self, name: Ident, value: NestedValue, parser: &mut Parser) { + match (name.to_string().as_str(), value) { + ("callback", NestedValue::Assign(tokens)) => { + let span = tokens.span(); + let callback = match parser.parse_callback(tokens) { + Some(callback) => callback, + None => { + parser.err("Not a valid callback", span); + return; + } + }; + + if let Some(previous) = self.callback.replace(callback) { + parser + .err( + "Callback has been already set", + span.join(name.span()).unwrap(), + ) + .err("Previous callback set here", previous.span()); + } + } + ("callback", _) => { + parser.err("Expected: callback = ...", name.span()); + } + (unknown, _) => { + parser.err( + format!( + "\ + Unknown nested attribute: {}\n\ + \n\ + Expected one of: callback\ + ", + unknown + ), + name.span(), + ); + } + } + } + + pub fn unwrap(opt: Option) -> (MaybeVoid, Option) { + if let Some(Self { ty, callback }) = opt { + (MaybeVoid::Some(ty), callback) + } else { + (MaybeVoid::Void, None) + } + } + + pub fn span(&self) -> Span { + self.ty.span() + } +} diff --git a/logos-codegen/src/parser/mod.rs b/logos-codegen/src/parser/mod.rs index 85832a6f..9fb60cd3 100644 --- a/logos-codegen/src/parser/mod.rs +++ b/logos-codegen/src/parser/mod.rs @@ -10,12 +10,14 @@ use crate::util::{expect_punct, MaybeVoid}; use crate::LOGOS_ATTR; mod definition; +mod error_type; mod ignore_flags; mod nested; mod subpattern; mod type_params; pub use self::definition::{Definition, Literal}; +pub use self::error_type::ErrorType; pub use self::ignore_flags::IgnoreFlags; use self::nested::{AttributeParser, Nested, NestedValue}; pub use self::subpattern::Subpatterns; @@ -28,8 +30,7 @@ pub struct Parser { pub source: Option, pub skips: Vec, pub extras: MaybeVoid, - pub error_type: MaybeVoid, - pub error_callback: Option, + pub error_type: Option, pub subpatterns: Subpatterns, pub logos_path: Option, types: TypeParams, @@ -109,33 +110,72 @@ impl Parser { NestedValue::Assign(value) => { let span = value.span(); - if let MaybeVoid::Some(previous) = parser.error_type.replace(value) { + let error_ty = ErrorType::new(value); + + if let Some(previous) = parser.error_type.replace(error_ty) { parser .err("Error type can be defined only once", span) .err("Previous definition here", previous.span()); } } - _ => { - parser.err("Expected: #[logos(error = SomeType)]", span); - } - }), - ("error_callback", |parser, span, value| match value { - NestedValue::Assign(value) => { - let callback = match parser.parse_callback(value) { - Some(callback) => callback, + NestedValue::Group(value) => { + let span = value.span(); + let mut nested = AttributeParser::new(value); + let ty = match nested.parsed::() { + Some(Ok(ty)) => ty, + Some(Err(e)) => { + parser.err(e.to_string(), e.span()); + return; + } None => { - parser.err("Not a valid callback", span); + parser.err("Expected #[logos(error(SomeType))]", span); return; } }; - if let Some(previous) = parser.error_callback.replace(callback) { + + let mut error_type = { + use quote::ToTokens; + ErrorType::new(ty.into_token_stream()) + }; + + for (position, next) in nested.enumerate() { + match next { + Nested::Unexpected(tokens) => { + parser.err("Unexpected token in attribute", tokens.span()); + } + Nested::Unnamed(tokens) => match position { + 0 => error_type.callback = parser.parse_callback(tokens), + _ => { + parser.err( + "\ + Expected a named argument at this position\n\ + \n\ + hint: If you are trying to define a callback here use: callback = ...\ + ", + tokens.span(), + ); + } + }, + Nested::Named(name, value) => { + error_type.named_attr(name, value, parser); + } + } + } + + if let Some(previous) = parser.error_type.replace(error_type) { parser - .err("Error callback can be defined only once", span) + .err("Error type can be defined only once", span) .err("Previous definition here", previous.span()); } } _ => { - parser.err("Expected #[logos(error_callback = ...)]", span); + parser.err( + concat!( + "Expected: #[logos(error = SomeType)] or ", + "#[logos(error(SomeType[, callback))]" + ), + span, + ); } }), ("extras", |parser, span, value| match value { diff --git a/tests/tests/custom_error.rs b/tests/tests/custom_error.rs index c6f15da9..a2b93fce 100644 --- a/tests/tests/custom_error.rs +++ b/tests/tests/custom_error.rs @@ -36,8 +36,7 @@ fn parse_number(input: &str) -> Result { } #[derive(Logos, Debug, Clone, Copy, PartialEq)] -#[logos(error = LexingError)] -#[logos(error_callback = LexingError::unrecognised_character)] +#[logos(error(LexingError, LexingError::unrecognised_character))] enum Token<'a> { #[regex(r"[0-9]+", |lex| parse_number(lex.slice()))] Number(u32), From 783459736dc9f718f37f1f7be5838a593f7eb90b Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Tue, 3 Dec 2024 13:23:02 +1100 Subject: [PATCH 08/14] Edit book to reflect syntax changes --- book/src/attributes/logos.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/book/src/attributes/logos.md b/book/src/attributes/logos.md index 93ec3603..4412f401 100644 --- a/book/src/attributes/logos.md +++ b/book/src/attributes/logos.md @@ -35,7 +35,7 @@ The type `ErrorType` can be any type that implements `Clone`, `PartialEq`, `ErrorType` must implement the `Default` trait because invalid tokens, i.e., literals that do not match any variant, will produce `Err(ErrorType::default())`, -unless you specify a different constructor with `#[logos(error_callback = ...)]`. +unless you provide a callback with the alternate syntax `#[logos(error(ErrorType, callback = ...))]` For example, here is an example using a custom error type: From ac220e4c40bccf2d0318959081414aff16b32f31 Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Sun, 8 Dec 2024 02:33:53 +1100 Subject: [PATCH 09/14] Added mini example to book --- book/src/attributes/logos.md | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/book/src/attributes/logos.md b/book/src/attributes/logos.md index 4412f401..d27d9bfe 100644 --- a/book/src/attributes/logos.md +++ b/book/src/attributes/logos.md @@ -33,10 +33,6 @@ This can be changed by using `#[logos(error = ErrorType)]` attribute on the enum The type `ErrorType` can be any type that implements `Clone`, `PartialEq`, `Default` and `From` for each callback's error type. -`ErrorType` must implement the `Default` trait because invalid tokens, i.e., -literals that do not match any variant, will produce `Err(ErrorType::default())`, -unless you provide a callback with the alternate syntax `#[logos(error(ErrorType, callback = ...))]` - For example, here is an example using a custom error type: ```rust,no_run,noplayground @@ -47,6 +43,24 @@ You can add error variants to `LexingError`, and implement `From` for each error type `E` that could be returned by a callback. See [callbacks](../callbacks.md). +`ErrorType` must implement the `Default` trait because invalid tokens, i.e., +literals that do not match any variant, will produce `Err(ErrorType::default())`. + +Alternatively, you can provide a callback with the alternate syntax +`#[logos(error(ErrorType, callback = ...))]`, which allows you to include information +from the lexer such as the span where the error occurred: + +```rust,no_run,noplayground +#[derive(Logos)] +#[logos(error(Range, callback = |lex| lex.span()))] +enum Token { + #[token("a")] + A, + #[token("b")] + B, +} +``` + ## Specifying path to logos You can force the derive macro to use a different path to `Logos`'s crate From d7b1e7c537755fae5d2dfb2c7d237f26830fcbbd Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Wed, 11 Dec 2024 22:58:06 +1100 Subject: [PATCH 10/14] Added #[inline(always)] --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index b209a91e..388ad0fc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,6 +79,7 @@ pub trait Logos<'source>: Sized { /// Create a new error. The default implementation uses `Error::default()`. If you want to make /// your own, use `#[logos(error_callback = ...)]` + #[inline(always)] fn make_error(_lexer: &mut Lexer<'source, Self>) -> Self::Error { Self::Error::default() } From 227eed453ee78be50c4a7e3fd1965a0d922853b6 Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Wed, 11 Dec 2024 23:45:22 +1100 Subject: [PATCH 11/14] Fixed performance regression --- logos-codegen/src/lib.rs | 14 ++++++++++---- src/internal.rs | 2 ++ src/lexer.rs | 9 +++++++-- src/lib.rs | 7 +++---- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/logos-codegen/src/lib.rs b/logos-codegen/src/lib.rs index b6af8ac2..ecf56df8 100644 --- a/logos-codegen/src/lib.rs +++ b/logos-codegen/src/lib.rs @@ -233,16 +233,22 @@ pub fn generate(input: TokenStream) -> TokenStream { let make_error_impl = match error_callback { Some(leaf::Callback::Label(label)) => Some(quote! { - fn make_error(lex: &mut #logos_path::Lexer<'s, Self>) -> #error_type { - #label(lex) + #[inline] + fn make_error(mut lex: &mut #logos_path::Lexer<'s, Self>) { + use #logos_path::internal::LexerInternal; + let error = #label(&mut lex); + lex.set_error(error); } }), Some(leaf::Callback::Inline(inline)) => { let leaf::InlineCallback { arg, body, .. } = *inline; Some(quote! { - fn make_error(#arg: &mut #logos_path::Lexer<'s, Self>) -> #error_type { - #body + #[inline] + fn make_error(#arg: &mut #logos_path::Lexer<'s, Self>) { + use #logos_path::internal::LexerInternal; + let error = #body; + #arg.set_error(error) } }) } diff --git a/src/internal.rs b/src/internal.rs index 3a26f7c5..76f37163 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -37,6 +37,8 @@ pub trait LexerInternal<'source> { /// Guarantee that `token_end` is at char boundary for `&str`. fn error(&mut self); + fn set_error(&mut self, error: >::Error); + fn end(&mut self); fn set( diff --git a/src/lexer.rs b/src/lexer.rs index 3f7df71c..83cc8a21 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -376,13 +376,18 @@ where #[inline] fn error(&mut self) { self.token_end = self.source.find_boundary(self.token_end); + Token::make_error(self); + } + + #[inline] + fn set_error(&mut self, error: Token::Error) { #[cfg(not(feature = "forbid_unsafe"))] { - self.token = core::mem::ManuallyDrop::new(Some(Err(Token::make_error(self)))); + self.token = core::mem::ManuallyDrop::new(Some(Err(error))); } #[cfg(feature = "forbid_unsafe")] { - self.token = Some(Err(Token::make_error(self))); + self.token = Some(Err(error)); } } diff --git a/src/lib.rs b/src/lib.rs index 388ad0fc..00dada51 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,11 +77,10 @@ pub trait Logos<'source>: Sized { Lexer::with_extras(source, extras) } - /// Create a new error. The default implementation uses `Error::default()`. If you want to make - /// your own, use `#[logos(error_callback = ...)]` #[inline(always)] - fn make_error(_lexer: &mut Lexer<'source, Self>) -> Self::Error { - Self::Error::default() + fn make_error(lexer: &mut Lexer<'source, Self>) { + use internal::LexerInternal as _; + lexer.set_error(Self::Error::default()) } } From 98358963add5b14ca10618b5a11cd8dc3242e492 Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Wed, 11 Dec 2024 23:46:15 +1100 Subject: [PATCH 12/14] Added #[doc(hidden)] to Logos::make_error --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index 00dada51..cbd0db9b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,6 +78,7 @@ pub trait Logos<'source>: Sized { } #[inline(always)] + #[doc(hidden)] fn make_error(lexer: &mut Lexer<'source, Self>) { use internal::LexerInternal as _; lexer.set_error(Self::Error::default()) From ac45573a7b90a2e59e19ed5f9fb1d08d747a9d93 Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Thu, 12 Dec 2024 02:42:32 +1100 Subject: [PATCH 13/14] Removed redundant method `set_error` Replaced usages with `set` --- logos-codegen/src/lib.rs | 4 ++-- src/internal.rs | 2 -- src/lexer.rs | 12 ------------ src/lib.rs | 2 +- 4 files changed, 3 insertions(+), 17 deletions(-) diff --git a/logos-codegen/src/lib.rs b/logos-codegen/src/lib.rs index ecf56df8..0b5c2fff 100644 --- a/logos-codegen/src/lib.rs +++ b/logos-codegen/src/lib.rs @@ -237,7 +237,7 @@ pub fn generate(input: TokenStream) -> TokenStream { fn make_error(mut lex: &mut #logos_path::Lexer<'s, Self>) { use #logos_path::internal::LexerInternal; let error = #label(&mut lex); - lex.set_error(error); + lex.set(Err(error)); } }), Some(leaf::Callback::Inline(inline)) => { @@ -248,7 +248,7 @@ pub fn generate(input: TokenStream) -> TokenStream { fn make_error(#arg: &mut #logos_path::Lexer<'s, Self>) { use #logos_path::internal::LexerInternal; let error = #body; - #arg.set_error(error) + #arg.set(Err(error)) } }) } diff --git a/src/internal.rs b/src/internal.rs index 76f37163..3a26f7c5 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -37,8 +37,6 @@ pub trait LexerInternal<'source> { /// Guarantee that `token_end` is at char boundary for `&str`. fn error(&mut self); - fn set_error(&mut self, error: >::Error); - fn end(&mut self); fn set( diff --git a/src/lexer.rs b/src/lexer.rs index 83cc8a21..273d2065 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -379,18 +379,6 @@ where Token::make_error(self); } - #[inline] - fn set_error(&mut self, error: Token::Error) { - #[cfg(not(feature = "forbid_unsafe"))] - { - self.token = core::mem::ManuallyDrop::new(Some(Err(error))); - } - #[cfg(feature = "forbid_unsafe")] - { - self.token = Some(Err(error)); - } - } - #[inline] fn end(&mut self) { self.token = Default::default(); diff --git a/src/lib.rs b/src/lib.rs index cbd0db9b..3ebee366 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -81,7 +81,7 @@ pub trait Logos<'source>: Sized { #[doc(hidden)] fn make_error(lexer: &mut Lexer<'source, Self>) { use internal::LexerInternal as _; - lexer.set_error(Self::Error::default()) + lexer.set(Err(Self::Error::default())) } } From 6ae71c4687439f73e05cb6cffd74135bb7287eec Mon Sep 17 00:00:00 2001 From: mysteriouslyseeing <61419567+mysteriouslyseeing@users.noreply.github.com> Date: Thu, 12 Dec 2024 02:44:49 +1100 Subject: [PATCH 14/14] CallbackResults now use Token::make_error --- src/internal.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/internal.rs b/src/internal.rs index 3a26f7c5..dddc5b2c 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -72,7 +72,7 @@ impl<'s, T: Logos<'s>> CallbackResult<'s, (), T> for bool { { match self { true => lex.set(Ok(c(()))), - false => lex.set(Err(T::Error::default())), + false => T::make_error(lex), } } } @@ -85,7 +85,7 @@ impl<'s, P, T: Logos<'s>> CallbackResult<'s, P, T> for Option

{ { match self { Some(product) => lex.set(Ok(c(product))), - None => lex.set(Err(T::Error::default())), + None => T::make_error(lex), } } }