Skip to content

Commit

Permalink
refactor: replace time pre-parsing with lazy parsing (#629)
Browse files Browse the repository at this point in the history
  • Loading branch information
pamburus authored Dec 24, 2024
1 parent afb73b2 commit ff0e512
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 37 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ members = [".", "crate/encstr"]
[workspace.package]
repository = "https://github.com/pamburus/hl"
authors = ["Pavel Ivanov <[email protected]>"]
version = "0.30.1-alpha.1"
version = "0.30.1-alpha.2"
edition = "2021"
license = "MIT"

Expand Down
2 changes: 1 addition & 1 deletion benches/parse-and-format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn benchmark(c: &mut Criterion) {
for theme in ["universal", "classic"] {
c.bench_function(format!("{}/{}", name, theme), |b| {
let settings = Settings::default();
let parser = Parser::new(ParserSettings::new(&settings.fields.predefined, empty(), false, None));
let parser = Parser::new(ParserSettings::new(&settings.fields.predefined, empty(), None));
let formatter = RecordFormatter::new(
Arc::new(Theme::embedded(theme).unwrap()),
DateTimeFormatter::new(
Expand Down
1 change: 0 additions & 1 deletion src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,6 @@ impl App {
Parser::new(ParserSettings::new(
&self.options.fields.settings.predefined,
&self.options.fields.settings.ignore,
self.options.filter.since.is_some() || self.options.filter.until.is_some() || self.options.follow,
self.options.unix_ts_unit,
))
}
Expand Down
7 changes: 1 addition & 6 deletions src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,12 +268,7 @@ where
buffer_size: settings.buffer_size.into(),
max_message_size: settings.max_message_size.into(),
dir,
parser: Parser::new(ParserSettings::new(
&settings.fields,
empty(),
false,
settings.unix_ts_unit,
)),
parser: Parser::new(ParserSettings::new(&settings.fields, empty(), settings.unix_ts_unit)),
delimiter: settings.delimiter,
allow_prefix: settings.allow_prefix,
format: settings.format,
Expand Down
11 changes: 2 additions & 9 deletions src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,6 @@ impl RecordFilter for RecordFilterNone {
// ---

pub struct ParserSettings {
pre_parse_time: bool,
unix_ts_unit: Option<UnixTimestampUnit>,
level: Vec<(HashMap<String, Level>, Option<Level>)>,
blocks: Vec<ParserSettingsBlock>,
Expand All @@ -504,11 +503,9 @@ impl ParserSettings {
pub fn new<'a, I: IntoIterator<Item = &'a String>>(
predefined: &PredefinedFields,
ignore: I,
pre_parse_time: bool,
unix_ts_unit: Option<UnixTimestampUnit>,
) -> Self {
let mut result = Self {
pre_parse_time,
unix_ts_unit,
level: Vec::new(),
blocks: vec![ParserSettingsBlock::default()],
Expand Down Expand Up @@ -618,7 +615,7 @@ impl ParserSettings {
impl Default for ParserSettings {
#[inline]
fn default() -> Self {
Self::new(&PredefinedFields::default(), Vec::new(), false, None)
Self::new(&PredefinedFields::default(), Vec::new(), None)
}
}

Expand Down Expand Up @@ -743,11 +740,7 @@ impl FieldSettings {
let s = value.raw_str();
let s = if s.as_bytes()[0] == b'"' { &s[1..s.len() - 1] } else { s };
let ts = Timestamp::new(s).with_unix_unit(ps.unix_ts_unit);
if ps.pre_parse_time {
to.ts = Some(ts.preparsed())
} else {
to.ts = Some(ts);
}
to.ts = Some(ts);
true
}
Self::Level(i) => {
Expand Down
69 changes: 51 additions & 18 deletions src/timestamp.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// stdlib imports
use std::cell::OnceCell;

// third-party imports
use chrono::{DateTime, Duration, FixedOffset, NaiveDateTime};

Expand All @@ -9,15 +12,15 @@ use crate::app::UnixTimestampUnit;
#[derive(Debug)]
pub struct Timestamp<'a> {
raw: &'a str,
parsed: Option<Option<DateTime<FixedOffset>>>,
parsed: OnceCell<Option<DateTime<FixedOffset>>>,
unix_unit: Option<UnixTimestampUnit>,
}

impl<'a> Timestamp<'a> {
pub fn new(value: &'a str) -> Self {
Self {
raw: value,
parsed: None,
parsed: OnceCell::new(),
unix_unit: None,
}
}
Expand All @@ -26,27 +29,27 @@ impl<'a> Timestamp<'a> {
self.raw
}

pub fn preparsed(&self) -> Self {
pub fn with_unix_unit(self, unit: Option<UnixTimestampUnit>) -> Self {
Self {
raw: self.raw,
parsed: Some(self.parse()),
unix_unit: self.unix_unit,
parsed: if unit == self.unix_unit {
self.parsed
} else {
OnceCell::new()
},
unix_unit: unit,
}
}

pub fn with_unix_unit(&self, unit: Option<UnixTimestampUnit>) -> Self {
Self {
raw: self.raw,
parsed: if unit == self.unix_unit { self.parsed } else { None },
unix_unit: unit,
}
pub fn parsed(&self) -> &Option<DateTime<FixedOffset>> {
self.parsed.get_or_init(|| self.reparse())
}

pub fn parse(&self) -> Option<DateTime<FixedOffset>> {
if let Some(parsed) = self.parsed {
return parsed;
}
*self.parsed()
}

fn reparse(&self) -> Option<DateTime<FixedOffset>> {
if let Ok(ts) = self.raw.parse() {
Some(ts)
} else if let Some(nt) = guess_number_type(self.raw.as_bytes()) {
Expand Down Expand Up @@ -101,7 +104,7 @@ impl<'a> Timestamp<'a> {
}

pub fn unix_utc(&self) -> Option<(i64, u32)> {
self.parse()
self.parsed()
.and_then(|ts| Some((ts.timestamp(), ts.timestamp_subsec_nanos())))
}
}
Expand Down Expand Up @@ -490,12 +493,42 @@ mod tests {

#[test]
fn test_parse() {
let test = |s, unix_timestamp, tz| {
let ts = Timestamp::new(s).parse().unwrap();
let test = |s, unit, unix_timestamp, nanos, tz| {
let ts = Timestamp::new(s).with_unix_unit(unit).parse().unwrap();
assert_eq!(ts.timestamp(), unix_timestamp);
assert_eq!(ts.timezone().local_minus_utc(), tz);
assert_eq!(ts.timestamp_subsec_nanos(), nanos);
};
test("2020-08-21 07:20:48", 1597994448, 0);
test("2020-08-21 07:20:48", None, 1597994448, 0, 0);
test("1597994448", None, 1597994448, 0, 0);
test("1597994448123", None, 1597994448, 123000000, 0);
test("1597994448123456", None, 1597994448, 123456000, 0);
test("1597994448123456789", None, 1597994448, 123456789, 0);
test("1597994448.123", None, 1597994448, 123000000, 0);
test("1597994448.123456", None, 1597994448, 123456000, 0);
test("1597994448.123456789", None, 1597994448, 123456789, 0);
test("-1.123456789", None, -2, 1000000000 - 123456789, 0);
test(
"1597994448123.456789",
Some(UnixTimestampUnit::Milliseconds),
1597994448,
123456789,
0,
);
test(
"1597994448123456.789",
Some(UnixTimestampUnit::Microseconds),
1597994448,
123456789,
0,
);
test(
"1597994448123456789.0",
Some(UnixTimestampUnit::Nanoseconds),
1597994448,
123456789,
0,
);
}

#[test]
Expand Down

0 comments on commit ff0e512

Please sign in to comment.