diff --git a/src/lexer.rs b/src/lexer.rs index f5b12b2d..f8db4ce3 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -194,7 +194,7 @@ where fn clone(&self) -> Self { Lexer { extras: self.extras.clone(), - token: self.token.clone(), + token: ManuallyDrop::new(None), ..*self } } diff --git a/tests/tests/clone.rs b/tests/tests/clone.rs new file mode 100644 index 00000000..01a5fcc6 --- /dev/null +++ b/tests/tests/clone.rs @@ -0,0 +1,62 @@ +use std::cell::Cell; + +use logos::Logos as _; +use logos_derive::Logos; + +#[derive(Logos, Clone, Debug, PartialEq)] +pub enum Token { + #[token("a", |_| Evil::default())] + Evil(Evil), +} + +#[derive(Debug, Default, PartialEq)] +pub struct Evil(Box>); + +impl Clone for Evil { + fn clone(&self) -> Self { + self.0.set(self.0.get() + 1); + Self::default() + } +} + +#[test] +fn test_it_works_without_cloning() { + let mut lexer = Token::lexer("aaa"); + assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default())))); + assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default())))); + assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default())))); + assert_eq!(lexer.next(), None); +} + +#[test] +fn test_clone_ub() { + let mut lexer = Token::lexer("a"); + assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default())))); + + // In logos 0.14.0, this causes use-after-free (UB), + // because `Clone` dereferences the value returned by the last call to `lexer.next()`, + // which got deallocated. + // A real-life example where this could happen is with `Rc`. + // Note that it may still pass `cargo test`, it will fail with Miri. + let mut lexer2 = lexer.clone(); + + assert_eq!(lexer2.next(), None); +} + +#[test] +fn test_clone_leak() { + let mut lexer = Token::lexer("a"); + let Some(Ok(Token::Evil(evil))) = lexer.next() else { + panic!("Expected Token::Evil"); + }; + assert_eq!(evil.0.get(), 0); + + // In logos 0.14.0, this causes a memory leak because `evil` is cloned with `lexer`. + // This produces `evil.0.get() == 1`. It will fail even on `cargo test`. + let mut lexer2 = lexer.clone(); + assert_eq!(evil.0.get(), 0); + + assert_eq!(lexer2.next(), None); + let _ = evil.clone(); + assert_eq!(evil.0.get(), 1); +} diff --git a/tests/tests/string.rs b/tests/tests/string.rs new file mode 100644 index 00000000..53d82e87 --- /dev/null +++ b/tests/tests/string.rs @@ -0,0 +1,118 @@ +use logos::{Lexer, Logos as _}; +use logos_derive::Logos; + +#[derive(Logos, Clone, Debug, PartialEq)] +#[logos(skip " ")] +pub enum Token { + #[regex(r#""([^"\\]+|\\.)*""#, lex_single_line_string)] + String(String), +} + +#[test] +fn test_it_works_without_cloning() { + let mut lexer = Token::lexer(r#""Hello, world!" "foo😀bar\nbaz \x3F\u{1234}""#); + assert_eq!( + lexer.next(), + Some(Ok(Token::String("Hello, world!".to_string()))) + ); + assert_eq!( + lexer.next(), + Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string()))) + ); + assert_eq!(lexer.next(), None); +} + +#[test] +fn test_it_works_with_cloning() { + let mut lexer = Token::lexer(r#""Hello, world!" "foo😀bar\nbaz \x3F\u{1234}""#); + let mut lexer2 = lexer.clone(); + assert_eq!( + lexer2.next(), + Some(Ok(Token::String("Hello, world!".to_string()))) + ); + let mut lexer3 = lexer2.clone(); + let mut lexer4 = lexer3.clone(); + assert_eq!( + lexer3.next(), + Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string()))) + ); + assert_eq!( + lexer4.next(), + Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string()))) + ); + assert_eq!(lexer4.next(), None); + let mut lexer5 = lexer.clone(); + assert_eq!( + lexer5.next(), + Some(Ok(Token::String("Hello, world!".to_string()))) + ); + assert_eq!( + lexer5.next(), + Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string()))) + ); + assert_eq!( + lexer.next(), + Some(Ok(Token::String("Hello, world!".to_string()))) + ); + assert_eq!(lexer5.next(), None); + assert_eq!( + lexer2.next(), + Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string()))) + ); + assert_eq!(lexer2.next(), None); + assert_eq!(lexer3.next(), None); + assert_eq!( + lexer.next(), + Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string()))) + ); + assert_eq!(lexer.next(), None); +} + +// Not important +pub fn lex_single_line_string(lexer: &mut Lexer) -> Result { + let mut string = String::new(); + let mut chars = lexer.slice()[1..lexer.slice().len() - 1].chars(); + while let Some(c) = chars.next() { + match c { + '\\' => { + let c = chars.next().ok_or(())?; + match c { + '\n' => {} + 'n' => string.push('\n'), + 'r' => string.push('\r'), + 't' => string.push('\t'), + '0' => string.push('\0'), + '\'' | '"' | '\\' => string.push(c), + 'x' => { + let mut hex = String::new(); + hex.push(chars.next().ok_or(())?); + hex.push(chars.next().ok_or(())?); + let code = u8::from_str_radix(&hex, 16).map_err(|_| ())?; + if code > 0x7F { + return Err(()); + } + string.push(code as char); + } + 'u' => { + if chars.next() != Some('{') { + return Err(()); + } + let mut hex = String::new(); + for _ in 0..6 { + let c = chars.next().ok_or(())?; + if c == '}' { + break; + } + hex.push(c); + } + let code = u32::from_str_radix(&hex, 16).map_err(|_| ())?; + string.push(char::from_u32(code).ok_or(())?); + } + _ => return Err(()), + } + } + _ => string.push(c), + } + } + Ok(string) +}