From 2e775df0c138b0202e9162152a8cf9f958c89fe6 Mon Sep 17 00:00:00 2001 From: Pavel Ivanov Date: Sun, 25 Feb 2024 12:19:34 +0100 Subject: [PATCH] new: added support for logfmt format --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/app.rs | 38 +- src/error.rs | 2 + src/formatting.rs | 193 ++++----- src/index.rs | 57 +-- src/lib.rs | 1 + src/logfmt/de.rs | 994 ++++++++++++++++++++++++++++++++++++++++++++ src/logfmt/error.rs | 97 +++++ src/logfmt/mod.rs | 5 + src/logfmt/raw.rs | 557 +++++++++++++++++++++++++ src/model.rs | 544 ++++++++++++++++++++---- 12 files changed, 2243 insertions(+), 249 deletions(-) create mode 100644 src/logfmt/de.rs create mode 100644 src/logfmt/error.rs create mode 100644 src/logfmt/mod.rs create mode 100644 src/logfmt/raw.rs diff --git a/Cargo.lock b/Cargo.lock index c4b3f44d..0e3c586f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -738,7 +738,7 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "hl" -version = "0.26.2" +version = "0.27.0-beta.1" dependencies = [ "atoi", "bincode", diff --git a/Cargo.toml b/Cargo.toml index 4e193458..4f1bb5d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ categories = ["command-line-utilities"] description = "Utility for viewing json-formatted log files." keywords = ["cli", "human", "log"] name = "hl" -version = "0.26.2" +version = "0.27.0-beta.1" edition = "2021" build = "build.rs" diff --git a/src/app.rs b/src/app.rs index 3bb28e93..640c0072 100644 --- a/src/app.rs +++ b/src/app.rs @@ -21,7 +21,6 @@ use crossbeam_channel::{self as channel, Receiver, RecvError, RecvTimeoutError, use crossbeam_utils::thread; use itertools::{izip, Itertools}; use platform_dirs::AppDirs; -use serde_json as json; use sha2::{Digest, Sha256}; use std::num::{NonZeroU32, NonZeroUsize}; @@ -35,7 +34,6 @@ use crate::input::{BlockLine, Input, InputHolder, InputReference}; use crate::model::{Filter, Parser, ParserSettings, RawRecord, Record, RecordFilter, RecordWithSourceConstructor}; use crate::query::Query; use crate::scanning::{BufFactory, Delimit, Delimiter, Scanner, SearchExt, Segment, SegmentBuf, SegmentBufFactory}; -use crate::serdex::StreamDeserializerWithOffsets; use crate::settings::{Fields, Formatting}; use crate::theme::{Element, StylingPush, Theme}; use crate::timezone::Tz; @@ -206,6 +204,7 @@ impl App { cache_dir, &self.options.fields.settings.predefined, self.options.delimiter.clone(), + self.options.allow_prefix, ); let input_badges = self.input_badges(inputs.iter().map(|x| &x.reference)); @@ -580,6 +579,7 @@ impl App { &self.options.buffer_size, &self.options.max_message_size, &self.options.fields.settings.predefined, + &self.options.allow_prefix, ), )?; Ok(hasher.finalize().into()) @@ -771,45 +771,31 @@ impl<'a, Formatter: RecordWithSourceFormatter, Filter: RecordFilter> SegmentProc continue; } - let extra_prefix = if self.options.allow_prefix { - line.split(|c| *c == b'{').next().unwrap() - } else { - b"" - }; - - let xn = extra_prefix.len(); - let json_data = &line[xn..]; - let stream = json::Deserializer::from_slice(json_data).into_iter::(); - let mut stream = StreamDeserializerWithOffsets(stream); + let mut stream = RawRecord::parser().allow_prefix(self.options.allow_prefix).parse(line); let mut parsed_some = false; let mut produced_some = false; - while let Some(Ok((record, offsets))) = stream.next() { + let mut last_offset = 0; + while let Some(Ok(ar)) = stream.next() { + last_offset = ar.offsets.end; if parsed_some { buf.push(b'\n'); } parsed_some = true; - let record = self.parser.parse(record); + let record = self.parser.parse(ar.record); if record.matches(&self.filter) { let begin = buf.len(); buf.extend(prefix.as_bytes()); - buf.extend(extra_prefix); - if let Some(back) = extra_prefix.last() { - if *back != b' ' { - buf.push(b' '); - } + buf.extend(ar.prefix); + if ar.prefix.last().map(|&x| x == b' ') == Some(false) { + buf.push(b' '); } - self.formatter - .format_record(buf, record.with_source(&line[xn + offsets.start..xn + offsets.end])); + self.formatter.format_record(buf, record.with_source(ar.source)); let end = buf.len(); observer.observe_record(&record, begin..end); produced_some = true; } } - let remainder = if parsed_some { - &line[xn + stream.0.byte_offset()..] - } else { - line - }; + let remainder = if parsed_some { &line[last_offset..] } else { line }; if remainder.len() != 0 && self.show_unparsed() { if !parsed_some { buf.extend(prefix.as_bytes()); diff --git a/src/error.rs b/src/error.rs index aab6de4b..a9a538e0 100644 --- a/src/error.rs +++ b/src/error.rs @@ -76,6 +76,8 @@ pub enum Error { UnsupportedFormatForIndexing { path: PathBuf, format: String }, #[error("failed to parse json: {0}")] JsonParseError(#[from] serde_json::Error), + #[error("failed to parse logfmt: {0}")] + LogfmtParseError(#[from] crate::logfmt::error::Error), #[error(transparent)] TryFromIntError(#[from] TryFromIntError), #[error(transparent)] diff --git a/src/formatting.rs b/src/formatting.rs index 155cd614..fd706c45 100644 --- a/src/formatting.rs +++ b/src/formatting.rs @@ -1,22 +1,16 @@ // std imports use std::sync::Arc; -// third-party imports -use json::{de::Read, de::StrRead, value::RawValue}; -use serde_json as json; - // local imports -use crate::datefmt; -use crate::filtering::IncludeExcludeSetting; -use crate::fmtx; -use crate::model; -use crate::settings::Formatting; -use crate::theme; -use crate::IncludeExcludeKeyFilter; -use datefmt::DateTimeFormatter; -use fmtx::{aligned_left, centered}; -use model::{Caller, Level}; -use theme::{Element, StylingPush, Theme}; +use crate::{ + datefmt::DateTimeFormatter, + filtering::IncludeExcludeSetting, + fmtx::{aligned_left, centered}, + model::{self, Caller, Level, RawValue, ValueKind}, + settings::Formatting, + theme::{Element, StylingPush, Theme}, + IncludeExcludeKeyFilter, +}; // --- @@ -152,22 +146,17 @@ impl RecordFormatter { // // message text // - if let Some(text) = rec.message { + if let Some(text) = &rec.message { s.batch(|buf| buf.push(b' ')); - s.element(Element::Message, |s| self.format_message(s, text)); + s.element(Element::Message, |s| self.format_message(s, *text)); } // // fields // let mut some_fields_hidden = false; for (k, v) in rec.fields() { - if !self.hide_empty_fields - || match v.get() { - r#""""# | "null" | "{}" | "[]" => false, - _ => true, - } - { - some_fields_hidden |= !self.format_field(s, k, v, Some(&self.fields)); + if !self.hide_empty_fields || v.is_empty() { + some_fields_hidden |= !self.format_field(s, k, *v, Some(&self.fields)); } } if some_fields_hidden { @@ -205,51 +194,43 @@ impl RecordFormatter { }); } - fn format_field>( + fn format_field<'a, S: StylingPush>( &self, s: &mut S, key: &str, - value: &RawValue, + value: RawValue<'a>, filter: Option<&IncludeExcludeKeyFilter>, ) -> bool { let mut fv = FieldFormatter::new(self); fv.format(s, key, value, filter, IncludeExcludeSetting::Unspecified) } - fn format_value>(&self, s: &mut S, value: &RawValue) { + fn format_value<'a, S: StylingPush>(&self, s: &mut S, value: RawValue<'a>) { let mut fv = FieldFormatter::new(self); fv.format_value(s, value, None, IncludeExcludeSetting::Unspecified); } - fn format_message>(&self, s: &mut S, value: &RawValue) { - match value.get().as_bytes()[0] { - b'"' => { - s.element(Element::Message, |s| { - s.batch(|buf| format_str_unescaped(buf, value.get())) - }); + fn format_message<'a, S: StylingPush>(&self, s: &mut S, value: RawValue<'a>) { + match value.kind() { + ValueKind::QuotedString | ValueKind::String => { + s.element(Element::Message, |s| s.batch(|buf| value.format_as_str(buf))); } - b'0'..=b'9' | b'-' | b'+' | b'.' => { - s.element(Element::Number, |s| { - s.batch(|buf| buf.extend_from_slice(value.get().as_bytes())) - }); + ValueKind::Number => { + s.element(Element::Number, |s| s.batch(|buf| value.format_readable(buf))); } - b't' | b'f' => { - s.element(Element::Boolean, |s| { - s.batch(|buf| buf.extend_from_slice(value.get().as_bytes())) - }); + ValueKind::Boolean => { + s.element(Element::Boolean, |s| s.batch(|buf| value.format_readable(buf))); } - b'n' => { - s.element(Element::Null, |s| { - s.batch(|buf| buf.extend_from_slice(value.get().as_bytes())) - }); + ValueKind::Null => { + s.element(Element::Null, |s| s.batch(|buf| value.format_readable(buf))); } - b'{' => { + ValueKind::Object => { s.element(Element::Object, |s| { - let item = json::from_str::(value.get()).unwrap(); + let item = value.parse_object().unwrap(); s.batch(|buf| buf.push(b'{')); let mut has_some = false; for (k, v) in item.fields.iter() { - has_some |= self.format_field(s, k, v, None) + has_some |= self.format_field(s, k, *v, None) } s.batch(|buf| { if has_some { @@ -259,21 +240,18 @@ impl RecordFormatter { }); }); } - b'[' => { - let item = json::from_str::>(value.get()).unwrap(); + ValueKind::Array => { + let item = value.parse_array::<256>().unwrap(); let is_byte_string = item .iter() - .map(|&v| { - let v = v.get().as_bytes(); - only_digits(v) && (v.len() < 3 || (v.len() == 3 && v <= &b"255"[..])) - }) + .map(|&v| v.is_byte_code()) .position(|x| x == false) .is_none(); if is_byte_string { s.batch(|buf| buf.extend_from_slice(b"b'")); s.element(Element::Message, |s| { for item in item.iter() { - let b = atoi::atoi::(item.get().as_bytes()).unwrap(); + let b = item.parse_byte_code(); if b >= 32 { s.batch(|buf| buf.push(b)); } else { @@ -298,17 +276,12 @@ impl RecordFormatter { } else { first = false; } - self.format_value(s, v); + self.format_value(s, *v); } - s.batch(|buf| buf.push(b']')); + s.batch(|buf| buf.push(b']')) }); } } - _ => { - s.element(Element::Message, |s| { - s.batch(|buf| buf.extend_from_slice(value.get().as_bytes())) - }); - } }; } } @@ -321,13 +294,6 @@ impl RecordWithSourceFormatter for RecordFormatter { // --- -fn format_str_unescaped(buf: &mut Buf, s: &str) { - let mut reader = StrRead::new(&s[1..]); - reader.parse_str_raw(buf).unwrap(); -} - -// --- - struct FieldFormatter<'a> { rf: &'a RecordFormatter, } @@ -341,7 +307,7 @@ impl<'a> FieldFormatter<'a> { &mut self, s: &mut S, key: &str, - value: &'a RawValue, + value: RawValue<'a>, filter: Option<&IncludeExcludeKeyFilter>, setting: IncludeExcludeSetting, ) -> bool { @@ -371,9 +337,7 @@ impl<'a> FieldFormatter<'a> { if self.rf.unescape_fields { self.format_value(s, value, filter, setting); } else { - s.element(Element::String, |s| { - s.batch(|buf| buf.extend_from_slice(value.get().as_bytes())) - }); + s.element(Element::String, |s| s.batch(|buf| value.format_as_json_str(buf))); } true } @@ -381,42 +345,34 @@ impl<'a> FieldFormatter<'a> { fn format_value>( &mut self, s: &mut S, - value: &'a RawValue, + value: RawValue<'a>, filter: Option<&IncludeExcludeKeyFilter>, setting: IncludeExcludeSetting, ) { - match value.get().as_bytes()[0] { - b'"' => { + match value.kind() { + ValueKind::String | ValueKind::QuotedString => { s.element(Element::String, |s| { s.batch(|buf| { - buf.extend_from_slice(self.rf.cfg.punctuation.string_opening_quote.as_bytes()); - format_str_unescaped(buf, value.get()); - buf.extend_from_slice(self.rf.cfg.punctuation.string_closing_quote.as_bytes()); + value.format_as_str(buf); }) }); } - b'0'..=b'9' | b'-' | b'+' | b'.' => { - s.element(Element::Number, |s| { - s.batch(|buf| buf.extend_from_slice(value.get().as_bytes())) - }); + ValueKind::Number => { + s.element(Element::Number, |s| s.batch(|buf| value.format_readable(buf))); } - b't' | b'f' => { - s.element(Element::Boolean, |s| { - s.batch(|buf| buf.extend_from_slice(value.get().as_bytes())) - }); + ValueKind::Boolean => { + s.element(Element::Boolean, |s| s.batch(|buf| value.format_readable(buf))); } - b'n' => { - s.element(Element::Null, |s| { - s.batch(|buf| buf.extend_from_slice(value.get().as_bytes())) - }); + ValueKind::Null => { + s.element(Element::Null, |s| s.batch(|buf| value.format_readable(buf))); } - b'{' => { - let item = json::from_str::(value.get()).unwrap(); + ValueKind::Object => { + let item = value.parse_object().unwrap(); s.element(Element::Object, |s| { s.batch(|buf| buf.push(b'{')); let mut some_fields_hidden = false; for (k, v) in item.fields.iter() { - some_fields_hidden |= !self.format(s, k, v, filter, setting); + some_fields_hidden |= !self.format(s, k, *v, filter, setting); } if some_fields_hidden { s.element(Element::Ellipsis, |s| s.batch(|buf| buf.extend_from_slice(b" ..."))); @@ -429,9 +385,9 @@ impl<'a> FieldFormatter<'a> { }); }); } - b'[' => { + ValueKind::Array => { s.element(Element::Array, |s| { - let item = json::from_str::>(value.get()).unwrap(); + let item = value.parse_array::<32>().unwrap(); s.batch(|buf| buf.push(b'[')); let mut first = true; for v in item.iter() { @@ -440,24 +396,15 @@ impl<'a> FieldFormatter<'a> { } else { first = false; } - self.format_value(s, v, None, IncludeExcludeSetting::Unspecified); + self.format_value(s, *v, None, IncludeExcludeSetting::Unspecified); } s.batch(|buf| buf.push(b']')); }); } - _ => { - s.element(Element::String, |s| { - s.batch(|buf| buf.extend_from_slice(value.get().as_bytes())) - }); - } }; } } -fn only_digits(b: &[u8]) -> bool { - b.iter().position(|&b| !b.is_ascii_digit()).is_none() -} - const HEXDIGIT: [u8; 16] = [ b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'a', b'b', b'c', b'd', b'e', b'f', ]; @@ -465,15 +412,17 @@ const HEXDIGIT: [u8; 16] = [ #[cfg(test)] mod tests { use super::*; - use crate::model::Record; - use crate::theme::Theme; - use crate::themecfg::testing; - use crate::timestamp::Timestamp; - use crate::timezone::Tz; - use crate::{error::Error, settings::Punctuation}; + use crate::{ + datefmt::LinuxDateFormat, + error::Error, + model::{Record, RecordFields}, + settings::Punctuation, + theme::Theme, + themecfg::testing, + timestamp::Timestamp, + timezone::Tz, + }; use chrono::{Offset, Utc}; - use datefmt::LinuxDateFormat; - use json::value::RawValue; use serde_json as json; fn format(rec: &Record) -> Result { @@ -494,19 +443,25 @@ mod tests { Ok(String::from_utf8(buf)?) } + fn json_raw_value(s: &str) -> Box { + json::value::RawValue::from_string(s.into()).unwrap() + } + #[test] fn test_nested_objects() { assert_eq!( format(&Record { ts: Some(Timestamp::new("2000-01-02T03:04:05.123Z", None)), - message: Some(RawValue::from_string(r#""tm""#.into()).unwrap().as_ref()), + message: Some(RawValue::Json(&json_raw_value(r#""tm""#))), level: Some(Level::Debug), logger: Some("tl"), caller: Some(Caller::Text("tc")), - extra: heapless::Vec::from_slice(&[ - ("ka", RawValue::from_string(r#"{"va":{"kb":42}}"#.into()).unwrap().as_ref()), - ]).unwrap(), - extrax: Vec::default(), + fields: RecordFields{ + head: heapless::Vec::from_slice(&[ + ("ka", RawValue::Json(&json_raw_value(r#"{"va":{"kb":42}}"#))), + ]).unwrap(), + tail: Vec::default(), + }, predefined: heapless::Vec::default(), }).unwrap(), String::from("\u{1b}[0;2;3m00-01-02 03:04:05.123 \u{1b}[0;36m|\u{1b}[0;95mDBG\u{1b}[0;36m|\u{1b}[0;2;3m \u{1b}[0;2;4mtl:\u{1b}[0;2;3m \u{1b}[0;1;39mtm \u{1b}[0;32mka\u{1b}[0;2m:\u{1b}[0;33m{ \u{1b}[0;32mva\u{1b}[0;2m:\u{1b}[0;33m{ \u{1b}[0;32mkb\u{1b}[0;2m:\u{1b}[0;94m42\u{1b}[0;33m } }\u{1b}[0;2;3m @ tc\u{1b}[0m"), diff --git a/src/index.rs b/src/index.rs index ac8cdcea..36e8ef60 100644 --- a/src/index.rs +++ b/src/index.rs @@ -28,7 +28,6 @@ use crossbeam_channel::RecvError; use crossbeam_utils::thread; use itertools::izip; use serde::{Deserialize, Serialize}; -use serde_json as json; use sha2::{Digest, Sha256}; // local imports @@ -129,6 +128,7 @@ pub struct Indexer { dir: PathBuf, parser: Parser, delimiter: Delimiter, + allow_prefix: bool, } impl Indexer { @@ -140,6 +140,7 @@ impl Indexer { dir: PathBuf, fields: &PredefinedFields, delimiter: Delimiter, + allow_prefix: bool, ) -> Self { Self { concurrency, @@ -148,6 +149,7 @@ impl Indexer { dir, parser: Parser::new(ParserSettings::new(&fields, empty(), false)), delimiter, + allow_prefix, } } @@ -347,36 +349,37 @@ impl Indexer { let data = strip(data, b'\r'); let mut ts = None; if data.len() != 0 { - let prefix = data.split(|c| *c == b'{').next().unwrap(); - let data = &data[prefix.len()..]; - match json::from_slice::(data) { - Ok(rec) => { - let rec = self.parser.parse(rec); - let mut flags = 0; - match rec.level { - Some(Level::Debug) => { - flags |= schema::FLAG_LEVEL_DEBUG; + let mut stream = RawRecord::parser().allow_prefix(self.allow_prefix).parse(data); + while let Some(item) = stream.next() { + match item { + Ok(ar) => { + let rec = self.parser.parse(ar.record); + let mut flags = 0; + match rec.level { + Some(Level::Debug) => { + flags |= schema::FLAG_LEVEL_DEBUG; + } + Some(Level::Info) => { + flags |= schema::FLAG_LEVEL_INFO; + } + Some(Level::Warning) => { + flags |= schema::FLAG_LEVEL_WARNING; + } + Some(Level::Error) => { + flags |= schema::FLAG_LEVEL_ERROR; + } + None => (), } - Some(Level::Info) => { - flags |= schema::FLAG_LEVEL_INFO; + ts = rec.ts.and_then(|ts| ts.unix_utc()).map(|ts| ts.into()); + if ts < prev_ts { + sorted = false; } - Some(Level::Warning) => { - flags |= schema::FLAG_LEVEL_WARNING; - } - Some(Level::Error) => { - flags |= schema::FLAG_LEVEL_ERROR; - } - None => (), + prev_ts = ts; + stat.add_valid(ts, flags); } - ts = rec.ts.and_then(|ts| ts.unix_utc()).map(|ts| ts.into()); - if ts < prev_ts { - sorted = false; + _ => { + stat.add_invalid(); } - prev_ts = ts; - stat.add_valid(ts, flags); - } - _ => { - stat.add_invalid(); } } } diff --git a/src/lib.rs b/src/lib.rs index 3e11425c..24e120f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,6 +24,7 @@ mod eseq; mod filtering; mod formatting; mod fsmon; +mod logfmt; mod model; mod pool; mod replay; diff --git a/src/logfmt/de.rs b/src/logfmt/de.rs new file mode 100644 index 00000000..39344470 --- /dev/null +++ b/src/logfmt/de.rs @@ -0,0 +1,994 @@ +use std::{ + ops::{AddAssign, Deref, MulAssign, Neg}, + str, +}; + +use serde::de::{self, DeserializeSeed, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, VariantAccess, Visitor}; +use serde::Deserialize; + +use super::error::{Error, Result}; + +#[inline] +pub fn from_str<'a, T>(s: &'a str) -> Result +where + T: Deserialize<'a>, +{ + from_slice(s.as_bytes()) +} + +#[inline] +pub fn from_slice<'a, T>(s: &'a [u8]) -> Result +where + T: Deserialize<'a>, +{ + let mut deserializer = Deserializer::from_slice(s); + let t = T::deserialize(&mut deserializer)?; + if deserializer.parser.tail().is_empty() { + Ok(t) + } else { + Err(Error::TrailingCharacters) + } +} + +// --- + +pub struct Deserializer<'de> { + scratch: Vec, + parser: Parser<'de>, +} + +impl<'de> Deserializer<'de> { + #[inline] + pub fn from_str(input: &'de str) -> Self { + Self::from_slice(input.as_bytes()) + } + + #[inline] + pub fn from_slice(input: &'de [u8]) -> Self { + Deserializer { + scratch: Vec::new(), + parser: Parser { + input, + index: 0, + key: true, + }, + } + } + + #[inline] + pub fn parse_str_to_buf(&mut self, buf: &mut Vec) -> Result<()> { + match self.parser.parse_value(buf, false) { + Ok(Reference::Borrowed(b)) => { + buf.extend(b.as_bytes()); + Ok(()) + } + Ok(Reference::Copied(_)) => Ok(()), + Err(e) => Err(e), + } + } +} + +impl<'de> Deserializer<'de> { + #[inline] + fn parse_bool(&mut self) -> Result { + self.parser.parse_bool() + } + + #[inline] + fn parse_unsigned(&mut self) -> Result + where + T: AddAssign + MulAssign + From, + { + self.parser.parse_unsigned() + } + + #[inline] + fn parse_signed(&mut self) -> Result + where + T: Neg + AddAssign + MulAssign + From, + { + self.parser.parse_signed() + } + + #[inline] + fn parse_string<'s>(&'s mut self, ignore: bool) -> Result> { + self.scratch.clear(); + self.parser.parse_string(&mut self.scratch, ignore) + } + + #[inline] + fn deserialize_raw_value(&mut self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + self.parser.deserialize_raw_value(visitor) + } +} + +impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { + type Error = Error; + + #[inline] + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + #[inline] + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_bool(self.parse_bool()?) + } + + #[inline] + fn deserialize_i8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i8(self.parse_signed()?) + } + + #[inline] + fn deserialize_i16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i16(self.parse_signed()?) + } + + #[inline] + fn deserialize_i32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i32(self.parse_signed()?) + } + + #[inline] + fn deserialize_i64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i64(self.parse_signed()?) + } + + #[inline] + fn deserialize_u8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u8(self.parse_unsigned()?) + } + + #[inline] + fn deserialize_u16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u16(self.parse_unsigned()?) + } + + #[inline] + fn deserialize_u32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u32(self.parse_unsigned()?) + } + + #[inline] + fn deserialize_u64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u64(self.parse_unsigned()?) + } + + fn deserialize_f32(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + unimplemented!() + } + + fn deserialize_f64(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + unimplemented!() + } + + fn deserialize_char(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + unimplemented!() + } + + #[inline] + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self.parse_string(false)? { + Reference::Borrowed(b) => visitor.visit_borrowed_str(b), + Reference::Copied(c) => visitor.visit_str(c), + } + } + + #[inline] + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_bytes(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + unimplemented!() + } + + fn deserialize_byte_buf(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + unimplemented!() + } + + #[inline] + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if self.parser.input.starts_with(b"null") { + self.parser.input = &self.parser.input["null".len()..]; + visitor.visit_none() + } else { + visitor.visit_some(self) + } + } + + #[inline] + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if self.parser.input.starts_with(b"null") { + self.parser.input = &self.parser.input["null".len()..]; + visitor.visit_unit() + } else { + Err(Error::ExpectedNull) + } + } + + #[inline] + fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_unit(visitor) + } + + #[inline] + fn deserialize_newtype_struct(self, name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + if name == super::raw::TOKEN { + return self.deserialize_raw_value(visitor); + } + + visitor.visit_newtype_struct(self) + } + + #[inline] + fn deserialize_seq(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + unimplemented!() + } + + #[inline] + fn deserialize_tuple(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + #[inline] + fn deserialize_tuple_struct(self, _name: &'static str, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + #[inline] + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + Ok(visitor.visit_map(KeyValueSequence::new(self))?) + } + + #[inline] + fn deserialize_struct( + self, + _name: &'static str, + _fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_map(visitor) + } + + #[inline] + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + if self.parser.peek() == Some(b'"') { + visitor.visit_enum(self.parse_string(false)?.into_deserializer()) + } else if self.parser.next() == Some(b'{') { + let value = visitor.visit_enum(Enum::new(self))?; + if self.parser.next() == Some(b'}') { + Ok(value) + } else { + Err(Error::ExpectedMapEnd) + } + } else { + Err(Error::ExpectedEnum) + } + } + + #[inline] + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + #[inline] + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_any(visitor) + } +} + +// --- + +struct Parser<'de> { + input: &'de [u8], + index: usize, + key: bool, +} + +impl<'de> Parser<'de> { + #[inline] + fn peek(&mut self) -> Option { + if self.index < self.input.len() { + Some(self.input[self.index]) + } else { + None + } + } + + #[inline] + fn next(&mut self) -> Option { + if self.index < self.input.len() { + let ch = self.input[self.index]; + self.index += 1; + Some(ch) + } else { + None + } + } + + #[inline] + fn parse_bool(&mut self) -> Result { + if self.tail().starts_with(b"true") { + self.advance(4); + self.key = !self.key; + Ok(true) + } else if self.tail().starts_with(b"false") { + self.advance(5); + self.key = !self.key; + Ok(false) + } else { + Err(Error::ExpectedBoolean) + } + } + + fn parse_unsigned(&mut self) -> Result + where + T: AddAssign + MulAssign + From, + { + let mut int = match self.next() { + Some(ch @ b'0'..=b'9') => T::from(ch - b'0'), + _ => { + return Err(Error::ExpectedInteger); + } + }; + loop { + match self.peek() { + Some(ch @ b'0'..=b'9') => { + self.advance(1); + int *= T::from(10); + int += T::from(ch - b'0'); + } + _ => { + self.key = !self.key; + return Ok(int); + } + } + } + } + + fn parse_signed(&mut self) -> Result + where + T: Neg + AddAssign + MulAssign + From, + { + let mut negative = false; + if self.peek() == Some(b'-') { + negative = true; + self.advance(1); + } + + let mut int = match self.next() { + Some(ch @ b'0'..=b'9') => T::from((ch - b'0') as i8), + _ => { + return Err(Error::ExpectedInteger); + } + }; + loop { + match self.peek() { + Some(ch @ b'0'..=b'9') => { + self.advance(1); + int *= T::from(10); + int += T::from((ch - b'0') as i8); + } + _ => { + self.key = !self.key; + if negative { + int = -int; + } + return Ok(int); + } + } + } + } + + fn skip_garbage(&mut self) { + if let Some(i) = self.tail().iter().position(|&c| c > b' ') { + self.advance(i); + } else { + self.index = self.input.len(); + } + } + + fn parse_string<'s>(&'s mut self, scratch: &'s mut Vec, ignore: bool) -> Result> { + if self.key { + self.key = false; + self.parse_key().map(Reference::Borrowed) + } else { + self.parse_value(scratch, ignore) + } + } + + fn parse_key(&mut self) -> Result<&'de str> { + self.skip_garbage(); + + let start = self.index; + let mut unicode = false; + + while self.index < self.input.len() { + let c = self.input[self.index]; + match c { + b'=' => { + break; + } + b'"' => { + return Err(Error::ExpectedKey); + } + b'\x00'..=b' ' => { + self.key = true; + break; + } + b'\x80'..=b'\xFF' => { + unicode = true; + self.index += 1; + } + _ => { + self.index += 1; + } + } + } + + if self.index == start { + return Err(Error::ExpectedKey); + } + + let s = &self.input[start..self.index]; + self.next(); + + if unicode { + return Ok(str::from_utf8(s).map_err(|_| Error::InvalidUnicodeCodePoint)?); + } + + Ok(unsafe { str::from_utf8_unchecked(s) }) + } + + fn parse_value<'s>(&'s mut self, scratch: &'s mut Vec, ignore: bool) -> Result> { + match self.peek() { + Some(b'"') => self.parse_quoted_value(scratch, ignore), + _ => self.parse_unquoted_value().map(Reference::Borrowed), + } + } + + fn parse_unquoted_value(&mut self) -> Result<&'de str> { + self.skip_garbage(); + + let start = self.index; + let mut unicode = false; + + while self.index < self.input.len() { + let c = self.input[self.index]; + match c { + b'\x00'..=b' ' => { + break; + } + b'"' | b'=' => { + return Err(Error::UnexpectedByte(c)); + } + b'\x80'..=b'\xFF' => { + unicode = true; + self.index += 1; + } + _ => { + self.index += 1; + } + } + } + + self.key = true; + if self.index == start { + return Ok(""); + } + + let s = &self.input[start..self.index]; + + if unicode { + return Ok(str::from_utf8(s).map_err(|_| Error::InvalidUnicodeCodePoint)?); + } + + Ok(unsafe { str::from_utf8_unchecked(s) }) + } + + fn parse_quoted_value<'s>(&'s mut self, scratch: &'s mut Vec, ignore: bool) -> Result> { + self.next(); + let mut no_escapes = true; + let mut start = self.index; + + loop { + while self.index < self.input.len() && !ESCAPE[self.input[self.index] as usize] { + self.advance(1); + } + if self.index == self.input.len() { + return Err(Error::Eof); + } + match self.input[self.index] { + b'"' => { + self.key = true; + if no_escapes { + let borrowed = &self.input[start..self.index]; + self.advance(1); + return Ok(Reference::Borrowed(unsafe { str::from_utf8_unchecked(borrowed) })); + } + + if !ignore { + scratch.extend_from_slice(&self.input[start..self.index]); + } + self.advance(1); + + return if !ignore { + Ok(Reference::Copied(unsafe { str::from_utf8_unchecked(scratch) })) + } else { + Ok(Reference::Borrowed(unsafe { + str::from_utf8_unchecked(&self.input[self.index..self.index]) + })) + }; + } + b'\\' => { + no_escapes = false; + if !ignore { + scratch.extend_from_slice(&self.input[start..self.index]); + } + self.advance(1); + self.parse_escape(scratch, ignore)?; + start = self.index; + } + _ => { + self.advance(1); + return Err(Error::UnexpectedControlCharacter); + } + } + } + } + + fn parse_escape(&mut self, scratch: &mut Vec, ignore: bool) -> Result<()> { + let Some(ch) = self.next() else { + return Err(Error::Eof); + }; + + match ch { + b'"' => scratch.push(b'"'), + b'\\' => scratch.push(b'\\'), + b'/' => scratch.push(b'/'), + b'b' => scratch.push(b'\x08'), + b'f' => scratch.push(b'\x0c'), + b'n' => scratch.push(b'\n'), + b'r' => scratch.push(b'\r'), + b't' => scratch.push(b'\t'), + b'u' => { + let c = match self.decode_hex_escape()? { + 0xDC00..=0xDFFF => { + return Err(Error::LoneLeadingSurrogateInHexEscape); + } + + n1 @ 0xD800..=0xDBFF => { + if self.peek() == Some(b'\\') { + self.next(); + } else { + return Err(Error::UnexpectedEndOfHexEscape); + } + + if self.peek() == Some(b'u') { + self.next(); + } else { + return Err(Error::UnexpectedEndOfHexEscape); + } + + let n2 = self.decode_hex_escape()?; + + if n2 < 0xDC00 || n2 > 0xDFFF { + return Err(Error::LoneLeadingSurrogateInHexEscape); + } + + let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000; + + match char::from_u32(n) { + Some(c) => c, + None => { + return Err(Error::InvalidUnicodeCodePoint); + } + } + } + + n => char::from_u32(n as u32).unwrap(), + }; + + if !ignore { + scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes()); + } + } + _ => { + return Err(Error::InvalidEscape); + } + } + + Ok(()) + } + + fn deserialize_raw_value(&mut self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let start_index = self.index; + self.ignore_value()?; + let raw = &self.input[start_index..self.index]; + visitor.visit_map(super::raw::BorrowedRawDeserializer { + raw_value: Some(unsafe { str::from_utf8_unchecked(raw) }), + }) + } + + fn ignore_value(&mut self) -> Result<()> { + let mut scratch = Vec::new(); + self.parse_string(&mut scratch, true).map(|_| ()) + } + + fn decode_hex_escape(&mut self) -> Result { + if self.input.len() < 4 { + self.input = &self.input[self.input.len()..]; + return Err(Error::Eof); + } + + let mut n = 0; + for i in 0..4 { + let ch = decode_hex_val(self.input[i]); + match ch { + None => { + self.input = &self.input[i..]; + return Err(Error::InvalidEscape); + } + Some(val) => { + n = (n << 4) + val; + } + } + } + self.input = &self.input[4..]; + Ok(n) + } + + #[inline] + fn tail(&self) -> &'de [u8] { + &self.input[self.index..] + } + + #[inline] + fn advance(&mut self, n: usize) { + self.index += n; + } +} + +// --- + +struct KeyValueSequence<'a, 'de: 'a> { + de: &'a mut Deserializer<'de>, +} + +impl<'a, 'de> KeyValueSequence<'a, 'de> { + fn new(de: &'a mut Deserializer<'de>) -> Self { + KeyValueSequence { de } + } +} + +impl<'de, 'a> SeqAccess<'de> for KeyValueSequence<'a, 'de> { + type Error = Error; + + fn next_element_seed(&mut self, _seed: T) -> Result> + where + T: DeserializeSeed<'de>, + { + unimplemented!() + } +} + +impl<'de, 'a> MapAccess<'de> for KeyValueSequence<'a, 'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: DeserializeSeed<'de>, + { + if self.de.parser.tail().len() == 0 { + return Ok(None); + } + seed.deserialize(&mut *self.de).map(Some) + } + + #[inline] + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + seed.deserialize(&mut *self.de) + } +} + +struct Enum<'a, 'de: 'a> { + de: &'a mut Deserializer<'de>, +} + +impl<'a, 'de> Enum<'a, 'de> { + #[inline] + fn new(de: &'a mut Deserializer<'de>) -> Self { + Enum { de } + } +} + +impl<'de, 'a> EnumAccess<'de> for Enum<'a, 'de> { + type Error = Error; + type Variant = Self; + + #[inline] + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> + where + V: DeserializeSeed<'de>, + { + let val = seed.deserialize(&mut *self.de)?; + if self.de.parser.next() == Some(b'=') { + Ok((val, self)) + } else { + Err(Error::ExpectedMapKeyValueDelimiter) + } + } +} + +impl<'de, 'a> VariantAccess<'de> for Enum<'a, 'de> { + type Error = Error; + + fn unit_variant(self) -> Result<()> { + Err(Error::ExpectedString) + } + + #[inline] + fn newtype_variant_seed(self, seed: T) -> Result + where + T: DeserializeSeed<'de>, + { + seed.deserialize(self.de) + } + + #[inline] + fn tuple_variant(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + de::Deserializer::deserialize_seq(self.de, visitor) + } + + #[inline] + fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result + where + V: Visitor<'de>, + { + de::Deserializer::deserialize_map(self.de, visitor) + } +} + +pub enum Reference<'b, 'c, T> +where + T: ?Sized + 'static, +{ + Borrowed(&'b T), + Copied(&'c T), +} + +impl<'b, 'c, T> Deref for Reference<'b, 'c, T> +where + T: ?Sized + 'static, +{ + type Target = T; + + #[inline] + fn deref(&self) -> &Self::Target { + match *self { + Reference::Borrowed(b) => b, + Reference::Copied(c) => c, + } + } +} + +#[inline] +fn decode_hex_val(val: u8) -> Option { + let n = HEX[val as usize] as u16; + if n == 255 { + None + } else { + Some(n) + } +} + +// Lookup table of bytes that must be escaped. A value of true at index i means +// that byte i requires an escape sequence in the input. +static ESCAPE: [bool; 256] = { + const CT: bool = true; // control character \x00..=\x1F + const QU: bool = true; // quote \x22 + const BS: bool = true; // backslash \x5C + const __: bool = false; // allow unescaped + [ + // 1 2 3 4 5 6 7 8 9 A B C D E F + CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 0 + CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 1 + __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4 + __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F + ] +}; + +static HEX: [u8; 256] = { + const __: u8 = 255; // not a hex digit + [ + // 1 2 3 4 5 6 7 8 9 A B C D E F + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2 + 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, // 3 + __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5 + __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F + ] +}; + +// --- + +#[test] +fn test_struct_no_escape() { + #[derive(Deserialize, PartialEq, Debug)] + struct Test { + int: u32, + str1: String, + str2: String, + } + + let j = r#"int=42 str1=a str2="b c""#; + let expected = Test { + int: 42, + str1: "a".to_string(), + str2: "b c".to_string(), + }; + assert_eq!(expected, from_str(j).unwrap()); +} + +#[test] +fn test_struct_escape() { + #[derive(Deserialize, PartialEq, Debug)] + struct Test { + int: u32, + str1: String, + str2: String, + } + + let j = r#"int=0 str1="b=c" str2="a\nb""#; + let expected = Test { + int: 0, + str1: "b=c".to_string(), + str2: "a\nb".to_string(), + }; + assert_eq!(expected, from_str(j).unwrap()); +} +#[test] +fn test_raw() { + #[derive(Deserialize)] + struct Test<'a> { + int: i32, + str1: String, + #[serde(borrow)] + str2: &'a super::raw::RawValue, + } + + let j = r#"int=-42 str1=a str2="b \nc""#; + let parsed: Test = from_str(j).unwrap(); + assert_eq!(parsed.int, -42); + assert_eq!(parsed.str1, "a"); + assert_eq!(parsed.str2.get(), r#""b \nc""#); +} diff --git a/src/logfmt/error.rs b/src/logfmt/error.rs new file mode 100644 index 00000000..1e1213c5 --- /dev/null +++ b/src/logfmt/error.rs @@ -0,0 +1,97 @@ +use std::{error, fmt}; + +use serde::de; + +pub type Result = std::result::Result; + +#[derive(Debug)] +pub enum Error { + Eof, + ExpectedBoolean, + ExpectedInteger, + ExpectedNull, + ExpectedString, + ExpectedArray, + ExpectedArrayDelimiter, + ExpectedArrayEnd, + ExpectedMap, + ExpectedMapDelimiter, + ExpectedMapKeyValueDelimiter, + ExpectedMapEnd, + ExpectedEnum, + ExpectedKey, + Syntax, + InvalidEscape, + LoneLeadingSurrogateInHexEscape, + UnexpectedEndOfHexEscape, + InvalidUnicodeCodePoint, + TrailingCharacters, + UnexpectedControlCharacter, + UnexpectedByte(u8), + Custom(String), +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Eof => f.write_str("unexpected end of input"), + Self::ExpectedBoolean => f.write_str("expected boolean"), + Self::ExpectedInteger => f.write_str("expected integer"), + Self::ExpectedNull => f.write_str("expected null"), + Self::ExpectedString => f.write_str("expected string"), + Self::ExpectedArray => f.write_str("expected array"), + Self::ExpectedArrayDelimiter => f.write_str("expected space or array end"), + Self::ExpectedArrayEnd => f.write_str("expected array end"), + Self::ExpectedMap => f.write_str("expected map"), + Self::ExpectedMapDelimiter => f.write_str("expected space or map end"), + Self::ExpectedMapKeyValueDelimiter => f.write_str("expected equal sign"), + Self::ExpectedMapEnd => f.write_str("expected map end"), + Self::ExpectedEnum => f.write_str("expected enum"), + Self::ExpectedKey => f.write_str("expected key"), + Self::Syntax => f.write_str("syntax error"), + Self::InvalidEscape => f.write_str("invalid escape sequence"), + Self::LoneLeadingSurrogateInHexEscape => f.write_str("lone leading surrogate in hex escape"), + Self::UnexpectedEndOfHexEscape => f.write_str("unexpected end of hex escape"), + Self::InvalidUnicodeCodePoint => f.write_str("invalid unicode code point"), + Self::TrailingCharacters => f.write_str("trailing characters"), + Self::UnexpectedControlCharacter => f.write_str("unexpected control character"), + Self::UnexpectedByte(byte) => write!(f, "unexpected byte: {}", byte), + Self::Custom(msg) => f.write_str(msg), + } + } +} + +impl serde::de::StdError for Error { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + None + } +} + +impl de::Error for Error { + #[cold] + fn custom(msg: T) -> Error { + Self::Custom(msg.to_string()) + } + + #[cold] + fn invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self { + Error::custom(format_args!("invalid type: {}, expected {}", Unexpected(unexp), exp,)) + } + + #[cold] + fn invalid_value(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self { + Error::custom(format_args!("invalid value: {}, expected {}", Unexpected(unexp), exp,)) + } +} + +struct Unexpected<'a>(de::Unexpected<'a>); + +impl<'a> fmt::Display for Unexpected<'a> { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + match self.0 { + de::Unexpected::Unit => formatter.write_str("null"), + de::Unexpected::Float(value) => write!(formatter, "floating point `{}`", value), + unexp => fmt::Display::fmt(&unexp, formatter), + } + } +} diff --git a/src/logfmt/mod.rs b/src/logfmt/mod.rs new file mode 100644 index 00000000..370488e4 --- /dev/null +++ b/src/logfmt/mod.rs @@ -0,0 +1,5 @@ +pub mod de; +pub mod error; +pub mod raw; + +pub use de::{from_slice, from_str}; diff --git a/src/logfmt/raw.rs b/src/logfmt/raw.rs new file mode 100644 index 00000000..28740ed3 --- /dev/null +++ b/src/logfmt/raw.rs @@ -0,0 +1,557 @@ +use core::{ + fmt::{self, Debug, Display}, + mem, +}; +use serde::{ + de::{ + self, value::BorrowedStrDeserializer, Deserialize, DeserializeSeed, Deserializer, IntoDeserializer, MapAccess, + Unexpected, Visitor, + }, + forward_to_deserialize_any, + ser::{Serialize, SerializeStruct, Serializer}, +}; + +use super::error::Error; + +pub struct RawValue { + v: str, +} + +impl RawValue { + fn from_borrowed(v: &str) -> &Self { + unsafe { mem::transmute::<&str, &RawValue>(v) } + } + + fn from_owned(v: Box) -> Box { + unsafe { mem::transmute::, Box>(v) } + } + + fn into_owned(raw_value: Box) -> Box { + unsafe { mem::transmute::, Box>(raw_value) } + } +} + +impl Clone for Box { + fn clone(&self) -> Self { + (**self).to_owned() + } +} + +impl ToOwned for RawValue { + type Owned = Box; + + fn to_owned(&self) -> Self::Owned { + RawValue::from_owned(self.v.to_owned().into_boxed_str()) + } +} + +impl Default for Box { + fn default() -> Self { + RawValue::from_borrowed("null").to_owned() + } +} + +impl Debug for RawValue { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter + .debug_tuple("RawValue") + .field(&format_args!("{}", &self.v)) + .finish() + } +} + +impl Display for RawValue { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(&self.v) + } +} + +impl RawValue { + pub fn from_string(s: String) -> Result, Error> { + let borrowed = super::de::from_str::<&Self>(&s)?; + if borrowed.v.len() < s.len() { + return Ok(borrowed.to_owned()); + } + Ok(Self::from_owned(s.into_boxed_str())) + } + + pub fn get(&self) -> &str { + &self.v + } +} + +impl From> for Box { + fn from(raw_value: Box) -> Self { + RawValue::into_owned(raw_value) + } +} + +pub const TOKEN: &str = "$serde_logfmt::private::RawValue"; + +impl Serialize for RawValue { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut s = serializer.serialize_struct(TOKEN, 1)?; + s.serialize_field(TOKEN, &self.v)?; + s.end() + } +} + +impl<'de: 'a, 'a> Deserialize<'de> for &'a RawValue { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct ReferenceVisitor; + + impl<'de> Visitor<'de> for ReferenceVisitor { + type Value = &'de RawValue; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "any valid JSON value") + } + + fn visit_map(self, mut visitor: V) -> Result + where + V: MapAccess<'de>, + { + let value = visitor.next_key::()?; + if value.is_none() { + return Err(de::Error::invalid_type(Unexpected::Map, &self)); + } + visitor.next_value_seed(ReferenceFromString) + } + } + + deserializer.deserialize_newtype_struct(TOKEN, ReferenceVisitor) + } +} + +impl<'de> Deserialize<'de> for Box { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct BoxedVisitor; + + impl<'de> Visitor<'de> for BoxedVisitor { + type Value = Box; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "any valid logfmt value") + } + + fn visit_map(self, mut visitor: V) -> Result + where + V: MapAccess<'de>, + { + let value = visitor.next_key::()?; + if value.is_none() { + return Err(de::Error::invalid_type(Unexpected::Map, &self)); + } + visitor.next_value_seed(BoxedFromString) + } + } + + deserializer.deserialize_newtype_struct(TOKEN, BoxedVisitor) + } +} + +struct RawKey; + +impl<'de> Deserialize<'de> for RawKey { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct FieldVisitor; + + impl<'de> Visitor<'de> for FieldVisitor { + type Value = (); + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("raw value") + } + + fn visit_str(self, s: &str) -> Result<(), E> + where + E: de::Error, + { + if s == TOKEN { + Ok(()) + } else { + Err(de::Error::custom("unexpected raw value")) + } + } + } + + deserializer.deserialize_identifier(FieldVisitor)?; + Ok(RawKey) + } +} + +pub struct ReferenceFromString; + +impl<'de> DeserializeSeed<'de> for ReferenceFromString { + type Value = &'de RawValue; + + fn deserialize(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(self) + } +} + +impl<'de> Visitor<'de> for ReferenceFromString { + type Value = &'de RawValue; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("raw value") + } + + fn visit_borrowed_str(self, s: &'de str) -> Result + where + E: de::Error, + { + Ok(RawValue::from_borrowed(s)) + } +} + +pub struct BoxedFromString; + +impl<'de> DeserializeSeed<'de> for BoxedFromString { + type Value = Box; + + fn deserialize(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(self) + } +} + +impl<'de> Visitor<'de> for BoxedFromString { + type Value = Box; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("raw value") + } + + fn visit_str(self, s: &str) -> Result + where + E: de::Error, + { + Ok(RawValue::from_owned(s.to_owned().into_boxed_str())) + } +} + +struct RawKeyDeserializer; + +impl<'de> Deserializer<'de> for RawKeyDeserializer { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + visitor.visit_borrowed_str(TOKEN) + } + + forward_to_deserialize_any! { + bool u8 u16 u32 u64 u128 i8 i16 i32 i64 i128 f32 f64 char str string seq + bytes byte_buf map struct option unit newtype_struct ignored_any + unit_struct tuple_struct tuple enum identifier + } +} + +pub struct OwnedRawDeserializer { + pub raw_value: Option, +} + +impl<'de> MapAccess<'de> for OwnedRawDeserializer { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result, Error> + where + K: de::DeserializeSeed<'de>, + { + if self.raw_value.is_none() { + return Ok(None); + } + seed.deserialize(RawKeyDeserializer).map(Some) + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: de::DeserializeSeed<'de>, + { + seed.deserialize(self.raw_value.take().unwrap().into_deserializer()) + } +} + +pub struct BorrowedRawDeserializer<'de> { + pub raw_value: Option<&'de str>, +} + +impl<'de> MapAccess<'de> for BorrowedRawDeserializer<'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result, Error> + where + K: de::DeserializeSeed<'de>, + { + if self.raw_value.is_none() { + return Ok(None); + } + seed.deserialize(RawKeyDeserializer).map(Some) + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: de::DeserializeSeed<'de>, + { + seed.deserialize(BorrowedStrDeserializer::new(self.raw_value.take().unwrap())) + } +} + +impl<'de> IntoDeserializer<'de, Error> for &'de RawValue { + type Deserializer = &'de RawValue; + + fn into_deserializer(self) -> Self::Deserializer { + self + } +} + +impl<'de> Deserializer<'de> for &'de RawValue { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_any(visitor) + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_bool(visitor) + } + + fn deserialize_i8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_i8(visitor) + } + + fn deserialize_i16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_i16(visitor) + } + + fn deserialize_i32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_i32(visitor) + } + + fn deserialize_i64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_i64(visitor) + } + + fn deserialize_i128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_i128(visitor) + } + + fn deserialize_u8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_u8(visitor) + } + + fn deserialize_u16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_u16(visitor) + } + + fn deserialize_u32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_u32(visitor) + } + + fn deserialize_u64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_u64(visitor) + } + + fn deserialize_u128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_u128(visitor) + } + + fn deserialize_f32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_f32(visitor) + } + + fn deserialize_f64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_f64(visitor) + } + + fn deserialize_char(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_char(visitor) + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_str(visitor) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_string(visitor) + } + + fn deserialize_bytes(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_bytes(visitor) + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_byte_buf(visitor) + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_option(visitor) + } + + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_unit(visitor) + } + + fn deserialize_unit_struct(self, name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_unit_struct(name, visitor) + } + + fn deserialize_newtype_struct(self, name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_newtype_struct(name, visitor) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_seq(visitor) + } + + fn deserialize_tuple(self, len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_tuple(len, visitor) + } + + fn deserialize_tuple_struct(self, name: &'static str, len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_tuple_struct(name, len, visitor) + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_map(visitor) + } + + fn deserialize_struct( + self, + name: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_struct(name, fields, visitor) + } + + fn deserialize_enum( + self, + name: &'static str, + variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_enum(name, variants, visitor) + } + + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_identifier(visitor) + } + + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + super::de::Deserializer::from_str(&self.v).deserialize_ignored_any(visitor) + } +} diff --git a/src/model.rs b/src/model.rs index c4f7fcd2..40ab050c 100644 --- a/src/model.rs +++ b/src/model.rs @@ -3,18 +3,24 @@ use std::collections::HashMap; use std::fmt; use std::iter::IntoIterator; use std::marker::PhantomData; +use std::ops::Range; // third-party imports use chrono::{DateTime, Utc}; -use json::value::RawValue; use regex::Regex; use serde::de::{Deserialize, Deserializer, MapAccess, SeqAccess, Visitor}; -use serde_json as json; +use serde_json::{ + self as json, + de::{Read, StrRead}, +}; use wildflower::Pattern; // local imports use crate::error::{Error, Result}; +use crate::fmtx::Push; use crate::level; +use crate::logfmt; +use crate::serdex::StreamDeserializerWithOffsets; use crate::settings::PredefinedFields; use crate::timestamp::Timestamp; use crate::types::FieldKind; @@ -25,25 +31,237 @@ pub use level::Level; // --- +#[derive(Clone, Copy)] +pub enum RawValue<'a> { + Json(&'a json::value::RawValue), + Logfmt(&'a logfmt::raw::RawValue), +} + +impl<'a> RawValue<'a> { + #[inline] + pub fn kind(&self) -> ValueKind { + match self { + Self::Json(value) => { + let bytes = value.get().as_bytes(); + if bytes.len() == 0 { + return ValueKind::Null; + } + match bytes[0] { + b'"' => ValueKind::QuotedString, + b'0'..=b'9' | b'-' | b'+' | b'.' => ValueKind::Number, + b'{' => ValueKind::Object, + b'[' => ValueKind::Array, + b't' | b'f' => ValueKind::Boolean, + _ => ValueKind::Null, + } + } + Self::Logfmt(value) => { + let looks_like_number = || { + let mut s = value.get(); + let mut n_dots = 0; + if s.starts_with('-') { + s = &s[1..]; + } + s.len() < 40 + && s.as_bytes().iter().all(|&x| { + if x == b'.' { + n_dots += 1; + n_dots <= 1 + } else { + x.is_ascii_digit() + } + }) + }; + + if !value.get().is_empty() && value.get().as_bytes()[0] == b'"' { + ValueKind::QuotedString + } else if value.get() == "false" || value.get() == "true" { + ValueKind::Boolean + } else if value.get() == "null" { + ValueKind::Null + } else if looks_like_number() { + ValueKind::Number + } else { + ValueKind::String + } + } + } + } + + #[inline] + pub fn is_empty(&self) -> bool { + match self { + Self::Json(value) => match value.get() { + r#""""# | "null" | "{}" | "[]" => false, + _ => true, + }, + Self::Logfmt(value) => value.get().is_empty(), + } + } + + #[inline] + pub fn raw_str(&self) -> &'a str { + match self { + Self::Json(value) => value.get(), + Self::Logfmt(value) => value.get(), + } + } + + #[inline] + pub fn format_as_json_str>(&self, buf: &mut B) { + match self { + Self::Json(value) => buf.extend_from_slice(value.get().as_bytes()), + Self::Logfmt(value) => { + if value.get().is_empty() { + buf.push(b'"'); + buf.push(b'"'); + } else if value.get().as_bytes()[0] == b'"' { + buf.extend_from_slice(value.get().as_bytes()); + } else { + buf.push(b'"'); + buf.extend_from_slice(value.get().as_bytes()); + buf.push(b'"'); + } + } + } + } + + #[inline] + pub fn format_as_str(&self, buf: &mut Vec) { + match self { + Self::Json(value) => { + let mut reader = StrRead::new(&value.get()[1..]); + reader.parse_str_raw(buf).unwrap(); + } + Self::Logfmt(value) => { + logfmt::de::Deserializer::from_str(value.get()) + .parse_str_to_buf(buf) + .unwrap(); + } + } + } + + #[inline] + pub fn format_readable(&self, buf: &mut Vec) { + match self { + Self::Json(value) => { + if value.get().as_bytes().first() == Some(&b'"') { + self.format_as_str(buf) + } else { + buf.extend_from_slice(value.get().as_bytes()); + } + } + Self::Logfmt(value) => { + if value.get().as_bytes().first() == Some(&b'"') { + self.format_as_str(buf) + } else { + buf.extend_from_slice(value.get().as_bytes()); + } + } + } + } + + #[inline] + pub fn parse_object(&self) -> Result> { + match self { + Self::Json(value) => json::from_str::(value.get()).map_err(Error::JsonParseError), + Self::Logfmt(value) => logfmt::from_str::(value.get()).map_err(Error::LogfmtParseError), + } + } + + #[inline] + pub fn parse_array(&self) -> Result> { + match self { + Self::Json(value) => json::from_str::>(value.get()).map_err(Error::JsonParseError), + Self::Logfmt(value) => logfmt::from_str::>(value.get()).map_err(Error::LogfmtParseError), + } + } + + #[inline] + pub fn parse>(&self) -> Result { + match self { + Self::Json(value) => json::from_str(value.get()).map_err(Error::JsonParseError), + Self::Logfmt(value) => logfmt::from_str(value.get()).map_err(Error::LogfmtParseError), + } + } + + #[inline] + pub fn is_byte_code(&self) -> bool { + match self { + Self::Json(value) => { + let v = value.get().as_bytes(); + match v.len() { + 1 => v[0].is_ascii_digit(), + 2 => v[0].is_ascii_digit() && v[1].is_ascii_digit(), + 3 => &b"100"[..] <= v && v <= &b"255"[..], + _ => false, + } + } + Self::Logfmt(_) => false, + } + } + + #[inline] + pub fn parse_byte_code(&self) -> u8 { + match self { + Self::Json(value) => match value.get().as_bytes() { + [a] => a - b'0', + [a, b] => (a - b'0') * 10 + (b - b'0'), + [a, b, c] => (a - b'0') * 100 + (b - b'0') * 10 + (c - b'0'), + _ => 0, + }, + Self::Logfmt(_) => 0, + } + } +} + +impl<'a> From<&'a json::value::RawValue> for RawValue<'a> { + #[inline(always)] + fn from(value: &'a json::value::RawValue) -> Self { + Self::Json(value) + } +} + +impl<'a> From<&'a logfmt::raw::RawValue> for RawValue<'a> { + #[inline(always)] + fn from(value: &'a logfmt::raw::RawValue) -> Self { + Self::Logfmt(value) + } +} + +// --- + +#[derive(Clone, Debug, Copy, PartialEq, Eq)] +pub enum ValueKind { + String, + QuotedString, + Number, + Object, + Array, + Boolean, + Null, +} + +// --- + pub struct Record<'a> { pub ts: Option>, - pub message: Option<&'a RawValue>, + pub message: Option>, pub level: Option, pub logger: Option<&'a str>, pub caller: Option>, - pub(crate) extra: heapless::Vec<(&'a str, &'a RawValue), RECORD_EXTRA_CAPACITY>, - pub(crate) extrax: Vec<(&'a str, &'a RawValue)>, - pub(crate) predefined: heapless::Vec<(&'a str, &'a RawValue), MAX_PREDEFINED_FIELDS>, + pub(crate) fields: RecordFields<'a>, + pub(crate) predefined: heapless::Vec<(&'a str, RawValue<'a>), MAX_PREDEFINED_FIELDS>, } impl<'a> Record<'a> { #[inline(always)] - pub fn fields(&self) -> impl Iterator { - self.extra.iter().chain(self.extrax.iter()) + pub fn fields(&self) -> impl Iterator)> { + self.fields.head.iter().chain(self.fields.tail.iter()) } #[inline(always)] - pub fn fields_for_search(&self) -> impl Iterator { + pub fn fields_for_search(&self) -> impl Iterator)> { self.fields().chain(self.predefined.iter()) } @@ -59,17 +277,24 @@ impl<'a> Record<'a> { level: None, logger: None, caller: None, - extra: heapless::Vec::new(), - extrax: if capacity > RECORD_EXTRA_CAPACITY { - Vec::with_capacity(capacity - RECORD_EXTRA_CAPACITY) - } else { - Vec::new() + fields: RecordFields { + head: heapless::Vec::new(), + tail: if capacity > RECORD_EXTRA_CAPACITY { + Vec::with_capacity(capacity - RECORD_EXTRA_CAPACITY) + } else { + Vec::new() + }, }, predefined: heapless::Vec::new(), } } } +pub struct RecordFields<'a> { + pub(crate) head: heapless::Vec<(&'a str, RawValue<'a>), RECORD_EXTRA_CAPACITY>, + pub(crate) tail: Vec<(&'a str, RawValue<'a>)>, +} + // --- pub trait RecordWithSourceConstructor { @@ -286,14 +511,14 @@ impl ParserSettings { } #[inline(always)] - fn apply<'a>(&self, key: &'a str, value: &'a RawValue, to: &mut Record<'a>, pc: &mut PriorityController) { + fn apply<'a>(&self, key: &'a str, value: RawValue<'a>, to: &mut Record<'a>, pc: &mut PriorityController) { self.blocks[0].apply(self, key, value, to, pc, true); } #[inline(always)] fn apply_each<'a, 'i, I>(&self, items: I, to: &mut Record<'a>) where - I: IntoIterator, + I: IntoIterator)>, 'a: 'i, { let mut pc = PriorityController::default(); @@ -303,11 +528,11 @@ impl ParserSettings { #[inline(always)] fn apply_each_ctx<'a, 'i, I>(&self, items: I, to: &mut Record<'a>, pc: &mut PriorityController) where - I: IntoIterator, + I: IntoIterator)>, 'a: 'i, { for (key, value) in items { - self.apply(key, value, to, pc) + self.apply(key, *value, to, pc) } } } @@ -324,7 +549,7 @@ impl ParserSettingsBlock { &self, ps: &ParserSettings, key: &'a str, - value: &'a RawValue, + value: RawValue<'a>, to: &mut Record<'a>, pc: &mut PriorityController, is_root: bool, @@ -353,9 +578,9 @@ impl ParserSettingsBlock { return; } } - match to.extra.push((key, value)) { + match to.fields.head.push((key, value)) { Ok(_) => {} - Err(value) => to.extrax.push(value), + Err(value) => to.fields.tail.push(value), } } @@ -368,11 +593,11 @@ impl ParserSettingsBlock { ctx: &mut PriorityController, is_root: bool, ) where - I: IntoIterator, + I: IntoIterator)>, 'a: 'i, { for (key, value) in items { - self.apply(ps, key, value, to, ctx, is_root) + self.apply(ps, key, *value, to, ctx, is_root) } } } @@ -428,10 +653,10 @@ enum FieldSettings { } impl FieldSettings { - fn apply<'a>(&self, ps: &ParserSettings, value: &'a RawValue, to: &mut Record<'a>) { + fn apply<'a>(&self, ps: &ParserSettings, value: RawValue<'a>, to: &mut Record<'a>) { match *self { Self::Time => { - let s = value.get(); + let s = value.raw_str(); let s = if s.as_bytes()[0] == b'"' { &s[1..s.len() - 1] } else { s }; let ts = Timestamp::new(s, None); if ps.pre_parse_time { @@ -441,19 +666,17 @@ impl FieldSettings { } } Self::Level(i) => { - to.level = json::from_str(value.get()) - .ok() - .and_then(|x: &'a str| ps.level[i].get(x).cloned()); + to.level = value.parse().ok().and_then(|x: &'a str| ps.level[i].get(x).cloned()); } - Self::Logger => to.logger = json::from_str(value.get()).ok(), + Self::Logger => to.logger = value.parse().ok(), Self::Message => to.message = Some(value), - Self::Caller => to.caller = json::from_str(value.get()).ok().map(|x| Caller::Text(x)), + Self::Caller => to.caller = value.parse().ok().map(|x| Caller::Text(x)), Self::CallerFile => match &mut to.caller { None => { - to.caller = json::from_str(value.get()).ok().map(|x| Caller::FileLine(x, "")); + to.caller = value.parse().ok().map(|x| Caller::FileLine(x, "")); } Some(Caller::FileLine(file, _)) => { - if let Some(value) = json::from_str(value.get()).ok() { + if let Some(value) = value.parse().ok() { *file = value } } @@ -461,15 +684,17 @@ impl FieldSettings { }, Self::CallerLine => match &mut to.caller { None => { - to.caller = Some(Caller::FileLine("", value.get())); + to.caller = Some(Caller::FileLine("", value.raw_str())); } - Some(Caller::FileLine(_, line)) => { - if value.get().bytes().next().map_or(false, |x| x.is_ascii_digit()) { - *line = value.get() - } else if let Some(value) = json::from_str(value.get()).ok() { - *line = value + Some(Caller::FileLine(_, line)) => match value.kind() { + ValueKind::Number => *line = value.raw_str(), + ValueKind::String => { + if let Some(value) = value.parse().ok() { + *line = value + } } - } + _ => {} + }, _ => {} }, Self::Nested(_) => {} @@ -480,19 +705,19 @@ impl FieldSettings { fn apply_ctx<'a>( &self, ps: &ParserSettings, - value: &'a RawValue, + value: RawValue<'a>, to: &mut Record<'a>, ctx: &mut PriorityController, ) { match *self { - Self::Nested(nested) => { - let s = value.get(); - if s.len() > 0 && s.as_bytes()[0] == b'{' { - if let Ok(record) = json::from_str::(s) { + Self::Nested(nested) => match value.kind() { + ValueKind::Object => { + if let Ok(record) = value.parse::() { ps.blocks[nested].apply_each_ctx(ps, record.fields(), to, ctx, false); } } - } + _ => {} + }, _ => self.apply(ps, value, to), } } @@ -537,14 +762,22 @@ impl Parser { // --- pub struct RawRecord<'a> { - fields: heapless::Vec<(&'a str, &'a RawValue), RAW_RECORD_FIELDS_CAPACITY>, - fieldsx: Vec<(&'a str, &'a RawValue)>, + fields: RawRecordFields<'a>, +} + +pub struct RawRecordFields<'a> { + head: heapless::Vec<(&'a str, RawValue<'a>), RAW_RECORD_FIELDS_CAPACITY>, + tail: Vec<(&'a str, RawValue<'a>)>, } impl<'a> RawRecord<'a> { - #[inline(always)] - pub fn fields(&self) -> impl Iterator { - self.fields.iter().chain(self.fieldsx.iter()) + #[inline] + pub fn fields(&self) -> impl Iterator)> { + self.fields.head.iter().chain(self.fields.tail.iter()) + } + + pub fn parser() -> RawRecordParser { + RawRecordParser::new() } } @@ -553,44 +786,205 @@ impl<'de: 'a, 'a> Deserialize<'de> for RawRecord<'a> { where D: Deserializer<'de>, { - Ok(deserializer.deserialize_map(RawRecordVisitor::new())?) + Ok(deserializer.deserialize_map(RawRecordVisitor::::new())?) + } +} + +// --- + +pub struct RawRecordParser { + allow_prefix: bool, +} + +impl RawRecordParser { + pub fn new() -> Self { + Self { allow_prefix: false } + } + + pub fn allow_prefix(self, value: bool) -> Self { + Self { allow_prefix: value } + } + + pub fn parse<'a>(&self, line: &'a [u8]) -> RawRecordStream, impl RawRecordIterator<'a>> { + let prefix = if self.allow_prefix && line.last() == Some(&b'}') { + line.split(|c| *c == b'{').next().unwrap() + } else { + b"" + }; + + let xn = prefix.len(); + let data = &line[xn..]; + + if data.first().map(|&x| x == b'{') == Some(false) { + RawRecordStream::Logfmt(RawRecordLogfmtStream { + line, + prefix, + done: false, + }) + } else { + RawRecordStream::Json(RawRecordJsonStream { + line, + prefix, + delegate: StreamDeserializerWithOffsets(json::Deserializer::from_slice(data).into_iter::()), + }) + } + } +} + +// --- + +pub enum RawRecordStream { + Json(Json), + Logfmt(Logfmt), +} + +impl<'a, Json, Logfmt> RawRecordStream +where + Json: RawRecordIterator<'a>, + Logfmt: RawRecordIterator<'a>, +{ + pub fn next(&mut self) -> Option>> { + match self { + Self::Json(stream) => stream.next(), + Self::Logfmt(stream) => stream.next(), + } } } // --- -struct RawRecordVisitor<'a> { - marker: PhantomData RawRecord<'a>>, +pub trait RawRecordIterator<'a> { + fn next(&mut self) -> Option>>; } -impl<'a> RawRecordVisitor<'a> { +// --- + +pub struct AnnotatedRawRecord<'a> { + pub prefix: &'a [u8], + pub record: RawRecord<'a>, + pub source: &'a [u8], + pub offsets: Range, +} + +// --- + +struct RawRecordJsonStream<'a, 'de, R> { + line: &'a [u8], + prefix: &'a [u8], + delegate: StreamDeserializerWithOffsets<'de, R, RawRecord<'a>>, +} + +impl<'a, 'de: 'a, R> RawRecordIterator<'a> for RawRecordJsonStream<'a, 'de, R> +where + R: serde_json::de::Read<'de>, +{ + fn next(&mut self) -> Option>> { + let pl = self.prefix.len(); + self.delegate.next().map(|res| { + res.map(|(record, range)| { + let range = range.start + pl..range.end + pl; + AnnotatedRawRecord { + prefix: self.prefix, + record, + source: &self.line[range.start..range.end], + offsets: range, + } + }) + .map_err(Error::JsonParseError) + }) + } +} + +// --- + +struct RawRecordLogfmtStream<'a> { + line: &'a [u8], + prefix: &'a [u8], + done: bool, +} + +impl<'a> RawRecordIterator<'a> for RawRecordLogfmtStream<'a> { + fn next(&mut self) -> Option>> { + if self.done { + return None; + } + + self.done = true; + match logfmt::from_slice::(self.line) { + Ok(record) => Some(Ok(AnnotatedRawRecord { + prefix: self.prefix, + record: record.0, + source: self.line, + offsets: 0..self.line.len(), + })), + Err(err) => Some(Err(err.into())), + } + } +} + +// --- + +struct RawRecordVisitor<'a, RV> +where + RV: ?Sized + 'a, +{ + marker: PhantomData (RawRecord<'a>, &'a RV)>, +} + +impl<'a, RV> RawRecordVisitor<'a, RV> +where + RV: ?Sized + 'a, +{ #[inline(always)] fn new() -> Self { Self { marker: PhantomData } } } -impl<'de: 'a, 'a> Visitor<'de> for RawRecordVisitor<'a> { +impl<'de: 'a, 'a, RV> Visitor<'de> for RawRecordVisitor<'a, RV> +where + RV: ?Sized + 'a, + &'a RV: Deserialize<'de> + 'a, + RawValue<'a>: std::convert::From<&'a RV>, +{ type Value = RawRecord<'a>; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("object json") } fn visit_map>(self, mut access: M) -> std::result::Result { - let mut fields = heapless::Vec::new(); + let mut head = heapless::Vec::new(); let count = access.size_hint().unwrap_or(0); - let mut fieldsx = match count > RAW_RECORD_FIELDS_CAPACITY { + let mut tail = match count > RAW_RECORD_FIELDS_CAPACITY { false => Vec::new(), true => Vec::with_capacity(count - RAW_RECORD_FIELDS_CAPACITY), }; while let Some(Some(key)) = access.next_key::<&'a str>().ok() { - match fields.push((key, access.next_value()?)) { + let value: &RV = access.next_value()?; + match head.push((key, value.into())) { Ok(_) => {} - Err(value) => fieldsx.push(value), + Err(value) => tail.push(value), } } - Ok(RawRecord { fields, fieldsx }) + Ok(RawRecord { + fields: RawRecordFields { head, tail }, + }) + } +} + +// --- + +pub struct LogfmtRawRecord<'a>(pub RawRecord<'a>); + +impl<'de: 'a, 'a> Deserialize<'de> for LogfmtRawRecord<'a> { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + Ok(Self( + deserializer.deserialize_map(RawRecordVisitor::::new())?, + )) } } @@ -845,20 +1239,19 @@ impl FieldFilter { } } - fn match_value_partial(&self, subkey: KeyMatcher, value: &RawValue) -> bool { - let bytes = value.get().as_bytes(); - if bytes[0] != b'{' { + fn match_value_partial<'a>(&self, subkey: KeyMatcher, value: RawValue<'a>) -> bool { + if value.kind() != ValueKind::Object { return false; } - let item = json::from_str::(value.get()).unwrap(); + let item = value.parse_object().unwrap(); for (k, v) in item.fields.iter() { match subkey.match_key(*k) { None => { continue; } Some(KeyMatch::Full) => { - return self.match_value(Some(v.get()), v.get().starts_with('"')); + return self.match_value(Some(v.raw_str()), v.kind() == ValueKind::String); } Some(KeyMatch::Partial(subkey)) => { return self.match_value_partial(subkey, *v); @@ -882,7 +1275,7 @@ impl RecordFilter for FieldFilter { } FieldKind::Message => { if let Some(message) = record.message { - self.match_value(Some(message.get()), true) + self.match_value(Some(message.raw_str()), true) } else { false } @@ -908,8 +1301,8 @@ impl RecordFilter for FieldFilter { match self.match_custom_key(*k) { None => {} Some(KeyMatch::Full) => { - let escaped = v.get().starts_with('"'); - if self.match_value(Some(v.get()), escaped) { + let escaped = v.kind() == ValueKind::QuotedString; + if self.match_value(Some(v.raw_str()), escaped) { return true; } } @@ -1003,7 +1396,7 @@ impl RecordFilter for Filter { // --- pub struct Object<'a> { - pub fields: heapless::Vec<(&'a str, &'a RawValue), 32>, + pub fields: heapless::Vec<(&'a str, RawValue<'a>), 32>, } struct ObjectVisitor<'a> { @@ -1025,8 +1418,8 @@ impl<'de: 'a, 'a> Visitor<'de> for ObjectVisitor<'a> { fn visit_map>(self, mut access: A) -> std::result::Result { let mut fields = heapless::Vec::new(); while let Some(key) = access.next_key::<&'a str>()? { - let value = access.next_value()?; - fields.push((key, value)).ok(); + let value: &json::value::RawValue = access.next_value()?; + fields.push((key, value.into())).ok(); } Ok(Object { fields }) @@ -1043,13 +1436,13 @@ impl<'de: 'a, 'a> Deserialize<'de> for Object<'a> { } pub struct Array<'a, const N: usize> { - items: heapless::Vec<&'a RawValue, N>, - more: Vec<&'a RawValue>, + items: heapless::Vec, N>, + more: Vec>, } impl<'a, const N: usize> Array<'a, N> { #[inline(always)] - pub fn iter(&self) -> impl Iterator { + pub fn iter(&self) -> impl Iterator> { self.items.iter().chain(self.more.iter()) } } @@ -1073,7 +1466,8 @@ impl<'de: 'a, 'a, const N: usize> Visitor<'de> for ArrayVisitor<'a, N> { fn visit_seq>(self, mut access: A) -> std::result::Result { let mut items = heapless::Vec::new(); let mut more = Vec::new(); - while let Some(item) = access.next_element()? { + while let Some(item) = access.next_element::<&json::value::RawValue>()? { + let item = item.into(); match items.push(item) { Ok(()) => {} Err(item) => more.push(item),