From 57719e6bafec6113ad23130429b9d52e96a86f41 Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Sun, 5 May 2024 19:49:01 +0200 Subject: [PATCH] fix: fixed unexpected successful logfmt parsing of single-word input (#248) --- Cargo.lock | 4 +- Cargo.toml | 2 +- src/logfmt/de.rs | 256 ++++++++++++++++++++------------------------ src/logfmt/error.rs | 20 +--- src/logfmt/mod.rs | 2 + src/logfmt/raw.rs | 2 +- src/model.rs | 30 +++++- 7 files changed, 152 insertions(+), 164 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4941e9a9..e913395f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -611,7 +611,7 @@ checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" [[package]] name = "encstr" -version = "0.29.2-alpha.2" +version = "0.29.2-alpha.3" [[package]] name = "enum-map" @@ -768,7 +768,7 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "hl" -version = "0.29.2-alpha.2" +version = "0.29.2-alpha.3" dependencies = [ "atoi", "bincode", diff --git a/Cargo.toml b/Cargo.toml index ddb23141..88f6638b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ members = [".", "crate/encstr"] [workspace.package] repository = "https://github.com/pamburus/hl" authors = ["Pavel Ivanov "] -version = "0.29.2-alpha.2" +version = "0.29.2-alpha.3" edition = "2021" license = "MIT" diff --git a/src/logfmt/de.rs b/src/logfmt/de.rs index 7618776c..156b5bba 100644 --- a/src/logfmt/de.rs +++ b/src/logfmt/de.rs @@ -3,7 +3,7 @@ use std::{ str, }; -use serde::de::{self, DeserializeSeed, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, VariantAccess, Visitor}; +use serde::de::{self, DeserializeSeed, IntoDeserializer, MapAccess, SeqAccess, Visitor}; use serde::Deserialize; use super::error::{Error, Result}; @@ -356,18 +356,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { where V: Visitor<'de>, { - if self.parser.peek() == Some(b'"') { - visitor.visit_enum(self.parse_string(false)?.into_deserializer()) - } else if self.parser.next() == Some(b'{') { - let value = visitor.visit_enum(Enum::new(self))?; - if self.parser.next() == Some(b'}') { - Ok(value) - } else { - Err(Error::ExpectedMapEnd) - } - } else { - Err(Error::ExpectedEnum) - } + visitor.visit_enum(self.parse_string(false)?.into_deserializer()) } #[inline] @@ -532,7 +521,9 @@ impl<'de> Parser<'de> { } let s = &self.input[start..self.index]; - self.next(); + if self.next() != Some(b'=') { + return Err(Error::ExpectedKeyValueDelimiter); + } if unicode { return Ok(str::from_utf8(s).map_err(|_| Error::InvalidUnicodeCodePoint)?); @@ -802,67 +793,6 @@ impl<'de, 'a> MapAccess<'de> for KeyValueSequence<'a, 'de> { } } -struct Enum<'a, 'de: 'a> { - de: &'a mut Deserializer<'de>, -} - -impl<'a, 'de> Enum<'a, 'de> { - #[inline] - fn new(de: &'a mut Deserializer<'de>) -> Self { - Enum { de } - } -} - -impl<'de, 'a> EnumAccess<'de> for Enum<'a, 'de> { - type Error = Error; - type Variant = Self; - - #[inline] - fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> - where - V: DeserializeSeed<'de>, - { - let val = seed.deserialize(&mut *self.de)?; - if self.de.parser.next() == Some(b'=') { - Ok((val, self)) - } else { - Err(Error::ExpectedMapKeyValueDelimiter) - } - } -} - -impl<'de, 'a> VariantAccess<'de> for Enum<'a, 'de> { - type Error = Error; - - fn unit_variant(self) -> Result<()> { - Err(Error::ExpectedString) - } - - #[inline] - fn newtype_variant_seed(self, seed: T) -> Result - where - T: DeserializeSeed<'de>, - { - seed.deserialize(self.de) - } - - #[inline] - fn tuple_variant(self, _len: usize, visitor: V) -> Result - where - V: Visitor<'de>, - { - de::Deserializer::deserialize_seq(self.de, visitor) - } - - #[inline] - fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result - where - V: Visitor<'de>, - { - de::Deserializer::deserialize_map(self.de, visitor) - } -} - pub enum Reference<'b, 'c, T> where T: ?Sized + 'static, @@ -984,73 +914,117 @@ static KEY: [u8; 256] = { // --- -#[test] -fn test_struct_no_escape() { - #[derive(Deserialize, PartialEq, Debug)] - struct Test { - int: u32, - str1: String, - str2: String, - } - - let j = r#"int=42 str1=a str2="b c""#; - let expected = Test { - int: 42, - str1: "a".to_string(), - str2: "b c".to_string(), - }; - assert_eq!(expected, from_str(j).unwrap()); -} +#[cfg(test)] +mod tests { + use super::{super::raw::RawValue, *}; + use std::collections::HashMap; + + #[test] + fn test_struct_no_escape() { + #[derive(Deserialize, PartialEq, Debug)] + struct Test { + int: u32, + str1: String, + str2: String, + } -#[test] -fn test_struct_escape() { - #[derive(Deserialize, PartialEq, Debug)] - struct Test { - int: u32, - str1: String, - str2: String, - } - - let j = r#"int=0 str1="b=c" str2="a\nb""#; - let expected = Test { - int: 0, - str1: "b=c".to_string(), - str2: "a\nb".to_string(), - }; - assert_eq!(expected, from_str(j).unwrap()); -} + let j = r#"int=42 str1=a str2="b c""#; + let expected = Test { + int: 42, + str1: "a".to_string(), + str2: "b c".to_string(), + }; + assert_eq!(expected, from_str(j).unwrap()); + } -#[test] -fn test_hex_escape() { - #[derive(Deserialize, PartialEq, Debug)] - struct Test { - int: u32, - str1: String, - str2: String, - } - - let j = r#"int=0 str1="\u001b[3m" str2="a""#; - let expected = Test { - int: 0, - str1: "\x1b[3m".to_string(), - str2: "a".to_string(), - }; - assert_eq!(expected, from_str(j).unwrap()); -} + #[test] + fn test_struct_escape() { + #[derive(Deserialize, PartialEq, Debug)] + struct Test { + int: u32, + str1: String, + str2: String, + } + + let j = r#"int=0 str1="b=c" str2="a\nb""#; + let expected = Test { + int: 0, + str1: "b=c".to_string(), + str2: "a\nb".to_string(), + }; + assert_eq!(expected, from_str(j).unwrap()); + } + + #[test] + fn test_hex_escape() { + #[derive(Deserialize, PartialEq, Debug)] + struct Test { + int: u32, + str1: String, + str2: String, + } + + let j = r#"int=0 str1="\u001b[3m" str2="a""#; + let expected = Test { + int: 0, + str1: "\x1b[3m".to_string(), + str2: "a".to_string(), + }; + assert_eq!(expected, from_str(j).unwrap()); + } + + #[test] + fn test_raw() { + #[derive(Deserialize)] + struct Test<'a> { + int: i32, + str1: String, + #[serde(borrow)] + str2: &'a RawValue, + } + + let j = r#"int=-42 str1=a str2="b \nc""#; + let parsed: Test = from_str(j).unwrap(); + assert_eq!(parsed.int, -42); + assert_eq!(parsed.str1, "a"); + assert_eq!(parsed.str2.get(), r#""b \nc""#); + } + + #[test] + fn test_single_word() { + let result = from_str::>(r#"word"#); + assert_eq!(result, Err(Error::ExpectedKeyValueDelimiter)); + assert_eq!(result.unwrap_err().to_string(), "expected key-value delimiter"); + } -#[test] -fn test_raw() { - #[derive(Deserialize)] - struct Test<'a> { - int: i32, - str1: String, - #[serde(borrow)] - str2: &'a super::raw::RawValue, - } - - let j = r#"int=-42 str1=a str2="b \nc""#; - let parsed: Test = from_str(j).unwrap(); - assert_eq!(parsed.int, -42); - assert_eq!(parsed.str1, "a"); - assert_eq!(parsed.str2.get(), r#""b \nc""#); + #[test] + fn test_raw_enum() { + #[derive(Deserialize, PartialEq, Debug)] + enum TestEnum { + A, + B, + C, + } + + let val: TestEnum = from_str("B").unwrap(); + assert_eq!(val, TestEnum::B); + } + + #[test] + fn test_raw_struct_with_enum() { + #[derive(Deserialize, PartialEq, Debug)] + enum TestEnum { + A, + B, + C, + } + + #[derive(Deserialize, PartialEq, Debug)] + struct TestStruct { + v: TestEnum, + } + + let val: TestStruct = from_str("v=B").unwrap(); + assert_eq!(val, TestStruct { v: TestEnum::B }); + } } diff --git a/src/logfmt/error.rs b/src/logfmt/error.rs index 1b666933..6e996e62 100644 --- a/src/logfmt/error.rs +++ b/src/logfmt/error.rs @@ -4,22 +4,15 @@ use serde::de; pub type Result = std::result::Result; -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum Error { Eof, ExpectedBoolean, ExpectedInteger, ExpectedNull, ExpectedString, - ExpectedArray, - ExpectedArrayDelimiter, - ExpectedArrayEnd, - ExpectedMap, - ExpectedMapDelimiter, - ExpectedMapKeyValueDelimiter, - ExpectedMapEnd, - ExpectedEnum, ExpectedKey, + ExpectedKeyValueDelimiter, Syntax, InvalidEscape, LoneLeadingSurrogateInHexEscape, @@ -40,15 +33,8 @@ impl fmt::Display for Error { Self::ExpectedInteger => f.write_str("expected integer"), Self::ExpectedNull => f.write_str("expected null"), Self::ExpectedString => f.write_str("expected string"), - Self::ExpectedArray => f.write_str("expected array"), - Self::ExpectedArrayDelimiter => f.write_str("expected space or array end"), - Self::ExpectedArrayEnd => f.write_str("expected array end"), - Self::ExpectedMap => f.write_str("expected map"), - Self::ExpectedMapDelimiter => f.write_str("expected space or map end"), - Self::ExpectedMapKeyValueDelimiter => f.write_str("expected equal sign"), - Self::ExpectedMapEnd => f.write_str("expected map end"), - Self::ExpectedEnum => f.write_str("expected enum"), Self::ExpectedKey => f.write_str("expected key"), + Self::ExpectedKeyValueDelimiter => f.write_str("expected key-value delimiter"), Self::Syntax => f.write_str("syntax error"), Self::InvalidEscape => f.write_str("invalid escape sequence"), Self::LoneLeadingSurrogateInHexEscape => f.write_str("lone leading surrogate in hex escape"), diff --git a/src/logfmt/mod.rs b/src/logfmt/mod.rs index 370488e4..88bd38ca 100644 --- a/src/logfmt/mod.rs +++ b/src/logfmt/mod.rs @@ -3,3 +3,5 @@ pub mod error; pub mod raw; pub use de::{from_slice, from_str}; +#[allow(unused_imports)] +pub use error::Error; diff --git a/src/logfmt/raw.rs b/src/logfmt/raw.rs index 28740ed3..a4e06b0d 100644 --- a/src/logfmt/raw.rs +++ b/src/logfmt/raw.rs @@ -110,7 +110,7 @@ impl<'de: 'a, 'a> Deserialize<'de> for &'a RawValue { type Value = &'de RawValue; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - write!(formatter, "any valid JSON value") + write!(formatter, "any valid logfmt value") } fn visit_map(self, mut visitor: V) -> Result diff --git a/src/model.rs b/src/model.rs index 83888d46..a744fd45 100644 --- a/src/model.rs +++ b/src/model.rs @@ -968,6 +968,15 @@ where Self::Logfmt(stream) => stream.next(), } } + + #[inline] + pub fn collect_vec(&mut self) -> Vec>> { + let mut result = Vec::new(); + while let Some(item) = self.next() { + result.push(item); + } + result + } } // --- @@ -1701,6 +1710,7 @@ const RAW_RECORD_FIELDS_CAPACITY: usize = RECORD_EXTRA_CAPACITY + MAX_PREDEFINED #[cfg(test)] mod tests { use super::*; + use crate::logfmt; use maplit::hashmap; #[test] @@ -2003,9 +2013,25 @@ mod tests { assert_eq!(filter.apply(&record), true); } + #[test] + fn test_parse_single_word() { + let result = try_parse("test"); + assert!(result.is_err()); + assert!(matches!( + result.err(), + Some(Error::LogfmtParseError(logfmt::Error::ExpectedKeyValueDelimiter)) + )); + } + fn parse(s: &str) -> Record { - let raw = RawRecord::parser().parse(s.as_bytes()).next().unwrap().unwrap().record; + try_parse(s).unwrap() + } + + fn try_parse(s: &str) -> Result { + let items = RawRecord::parser().parse(s.as_bytes()).collect_vec(); + assert_eq!(items.len(), 1); + let raw = items.into_iter().next().unwrap()?.record; let parser = Parser::new(ParserSettings::default()); - parser.parse(raw) + Ok(parser.parse(raw)) } }