Skip to content

Commit

Permalink
new: added --unix-timestamp-unit option
Browse files Browse the repository at this point in the history
  • Loading branch information
pamburus committed Mar 23, 2024
1 parent 8459f09 commit 90743ed
Show file tree
Hide file tree
Showing 11 changed files with 240 additions and 108 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ categories = ["command-line-utilities"]
description = "Utility for viewing json-formatted log files."
keywords = ["cli", "human", "log"]
name = "hl"
version = "0.27.0-beta.4.4"
version = "0.27.0-beta.4.5"
edition = "2021"
build = "build.rs"

Expand Down
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ bench:
@cargo bench
.PHONY: bench

## Show usage of the binary
usage: build
@env -i ./target/debug/hl --help
.PHONY: usage

## Clean build artifacts
clean:
@cargo clean
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ Options:
-Z, --time-zone <TIME_ZONE> Time zone name, see column "TZ identifier" at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones [env: HL_TIME_ZONE=] [default: UTC]
-L, --local Use local time zone, overrides --time-zone option
--no-local Disable local time zone, overrides --local option
--unix-timestamp-unit <UNIX_TIMESTAMP_UNIT> Unix timestamp unit, [auto, s, ms, us, ns] [env: HL_UNIX_TIMESTAMP_UNIT=] [default: auto] [possible values: auto, s, ms, us, ns]
-e, --hide-empty-fields Hide empty fields, applies for null, string, object and array fields only [env: HL_HIDE_EMPTY_FIELDS=]
-E, --show-empty-fields Show empty fields, overrides --hide-empty-fields option [env: HL_SHOW_EMPTY_FIELDS=]
--input-info <INPUT_INFO> Show input number and/or input filename before each message [default: auto] [possible values: auto, none, full, compact, minimal]
Expand All @@ -480,6 +481,7 @@ Options:
-o, --output <OUTPUT> Output file
--delimiter <DELIMITER> Log message delimiter, [NUL, CR, LF, CRLF] or any custom string
--dump-index Dump index metadata and exit
--debug Print debug error messages that can help with troubleshooting
--help Print help
-V, --version Print version
```
Expand Down
79 changes: 50 additions & 29 deletions src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::convert::{TryFrom, TryInto};
use std::fs;
use std::io::{BufWriter, Write};
use std::iter::repeat;
use std::num::NonZeroUsize;
use std::ops::Range;
use std::path::PathBuf;
use std::rc::Rc;
Expand All @@ -21,15 +22,14 @@ use crossbeam_channel::{self as channel, Receiver, RecvError, RecvTimeoutError,
use crossbeam_utils::thread;
use itertools::{izip, Itertools};
use platform_dirs::AppDirs;
use sha2::{Digest, Sha256};
use std::num::{NonZeroU32, NonZeroUsize};
use serde::{Deserialize, Serialize};

// local imports
use crate::datefmt::{DateTimeFormat, DateTimeFormatter};
use crate::fmtx::aligned_left;
use crate::formatting::{RawRecordFormatter, RecordFormatter, RecordWithSourceFormatter};
use crate::fsmon::{self, EventKind};
use crate::index::{Indexer, Timestamp};
use crate::index::{Indexer, IndexerSettings, Timestamp};
use crate::input::{BlockLine, Input, InputHolder, InputReference};
use crate::model::{Filter, Parser, ParserSettings, RawRecord, Record, RecordFilter, RecordWithSourceConstructor};
use crate::query::Query;
Expand Down Expand Up @@ -64,9 +64,11 @@ pub struct Options {
pub sync_interval: Duration,
pub input_info: Option<InputInfo>,
pub dump_index: bool,
pub debug: bool,
pub app_dirs: Option<AppDirs>,
pub tail: u64,
pub delimiter: Delimiter,
pub unix_ts_unit: Option<UnixTimestampUnit>,
}

impl Options {
Expand All @@ -93,6 +95,35 @@ pub enum InputInfo {
Minimal,
}

// ---

#[derive(Eq, PartialEq, Copy, Clone, Debug, Serialize, Deserialize)]
pub enum UnixTimestampUnit {
Seconds,
Milliseconds,
Microseconds,
Nanoseconds,
}

impl UnixTimestampUnit {
pub fn guess(ts: i64) -> Self {
match ts {
Self::TS_UNIX_AUTO_S_MIN..=Self::TS_UNIX_AUTO_S_MAX => Self::Seconds,
Self::TS_UNIX_AUTO_MS_MIN..=Self::TS_UNIX_AUTO_MS_MAX => Self::Milliseconds,
Self::TS_UNIX_AUTO_US_MIN..=Self::TS_UNIX_AUTO_US_MAX => Self::Microseconds,
_ => Self::Nanoseconds,
}
}

const TS_UNIX_AUTO_S_MIN: i64 = -62135596800;
const TS_UNIX_AUTO_S_MAX: i64 = 253402300799;
const TS_UNIX_AUTO_MS_MIN: i64 = Self::TS_UNIX_AUTO_S_MIN * 1000;
const TS_UNIX_AUTO_MS_MAX: i64 = Self::TS_UNIX_AUTO_S_MAX * 1000;
const TS_UNIX_AUTO_US_MIN: i64 = Self::TS_UNIX_AUTO_MS_MIN * 1000;
const TS_UNIX_AUTO_US_MAX: i64 = Self::TS_UNIX_AUTO_MS_MAX * 1000;
}
// ---

pub struct App {
options: Options,
}
Expand Down Expand Up @@ -188,7 +219,15 @@ impl App {

fn sort(&self, inputs: Vec<InputHolder>, output: &mut Output) -> Result<()> {
let mut output = BufWriter::new(output);
let param_hash = hex::encode(self.parameters_hash()?);
let indexer_settings = IndexerSettings::new(
self.options.buffer_size.try_into()?,
self.options.max_message_size.try_into()?,
&self.options.fields.settings.predefined,
self.options.delimiter.clone(),
self.options.allow_prefix,
self.options.unix_ts_unit,
);
let param_hash = hex::encode(indexer_settings.hash()?);
let cache_dir = self
.options
.app_dirs
Expand All @@ -197,16 +236,8 @@ impl App {
.unwrap_or_else(|| PathBuf::from(".cache"))
.join(param_hash);
fs::create_dir_all(&cache_dir)?;
let indexer = Indexer::new(
self.options.concurrency,
NonZeroU32::try_from(self.options.buffer_size)?.try_into()?,
NonZeroU32::try_from(self.options.max_message_size)?.try_into()?,
cache_dir,
&self.options.fields.settings.predefined,
self.options.delimiter.clone(),
self.options.allow_prefix,
);

let indexer = Indexer::new(self.options.concurrency, cache_dir, indexer_settings);
let input_badges = self.input_badges(inputs.iter().map(|x| &x.reference));

let inputs = inputs
Expand Down Expand Up @@ -309,8 +340,11 @@ impl App {
if let Some(ts) = &record.ts {
if let Some(unix_ts) = ts.unix_utc() {
items.push((unix_ts.into(), location));
} else {
eprintln!("skipped message because timestamp cannot be parsed: {:#?}", ts)
} else if self.options.debug {
eprintln!(
"skipped a message because its timestamp could not be parsed: {:#?}",
ts.raw()
)
}
}
},
Expand Down Expand Up @@ -571,25 +605,12 @@ impl App {
Ok(())
}

fn parameters_hash(&self) -> Result<[u8; 32]> {
let mut hasher = Sha256::new();
bincode::serialize_into(
&mut hasher,
&(
&self.options.buffer_size,
&self.options.max_message_size,
&self.options.fields.settings.predefined,
&self.options.allow_prefix,
),
)?;
Ok(hasher.finalize().into())
}

fn parser(&self) -> Parser {
Parser::new(ParserSettings::new(
&self.options.fields.settings.predefined,
&self.options.fields.settings.ignore,
self.options.filter.since.is_some() || self.options.filter.until.is_some() || self.options.follow,
self.options.unix_ts_unit,
))
}

Expand Down
2 changes: 1 addition & 1 deletion src/formatting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ mod tests {
fn test_nested_objects() {
assert_eq!(
format(&Record {
ts: Some(Timestamp::new("2000-01-02T03:04:05.123Z", None)),
ts: Some(Timestamp::new("2000-01-02T03:04:05.123Z")),
message: Some(RawValue::Json(&json_raw_value(r#""tm""#))),
level: Some(Level::Debug),
logger: Some("tl"),
Expand Down
74 changes: 60 additions & 14 deletions src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use std::fmt::{self, Display};
use std::fs::File;
use std::io::{Read, Write};
use std::iter::empty;
use std::num::NonZeroU32;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
Expand All @@ -30,6 +31,7 @@ use itertools::izip;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};

use crate::app::UnixTimestampUnit;
// local imports
use crate::error::{Error, Result};
use crate::index_capnp as schema;
Expand Down Expand Up @@ -120,6 +122,53 @@ impl std::ops::Sub for Timestamp {

// ---

pub struct IndexerSettings<'a> {
buffer_size: NonZeroU32,
max_message_size: NonZeroU32,
fields: &'a PredefinedFields,
delimiter: Delimiter,
allow_prefix: bool,
unix_ts_unit: Option<UnixTimestampUnit>,
}

impl<'a> IndexerSettings<'a> {
pub fn new(
buffer_size: NonZeroU32,
max_message_size: NonZeroU32,
fields: &'a PredefinedFields,
delimiter: Delimiter,
allow_prefix: bool,
unix_ts_unit: Option<UnixTimestampUnit>,
) -> Self {
Self {
buffer_size,
max_message_size,
fields,
delimiter,
allow_prefix,
unix_ts_unit,
}
}

pub fn hash(&self) -> Result<[u8; 32]> {
let mut hasher = Sha256::new();
bincode::serialize_into(
&mut hasher,
&(
&self.buffer_size,
&self.max_message_size,
&self.fields,
&self.delimiter,
&self.allow_prefix,
&self.unix_ts_unit,
),
)?;
Ok(hasher.finalize().into())
}
}

// ---

/// Allows log files indexing to enable message sorting.
pub struct Indexer {
concurrency: usize,
Expand All @@ -133,23 +182,20 @@ pub struct Indexer {

impl Indexer {
/// Returns a new Indexer with the given parameters.
pub fn new(
concurrency: usize,
buffer_size: u32,
max_message_size: u32,
dir: PathBuf,
fields: &PredefinedFields,
delimiter: Delimiter,
allow_prefix: bool,
) -> Self {
pub fn new(concurrency: usize, dir: PathBuf, settings: IndexerSettings<'_>) -> Self {
Self {
concurrency,
buffer_size,
max_message_size,
buffer_size: settings.buffer_size.into(),
max_message_size: settings.max_message_size.into(),
dir,
parser: Parser::new(ParserSettings::new(&fields, empty(), false)),
delimiter,
allow_prefix,
parser: Parser::new(ParserSettings::new(
&settings.fields,
empty(),
false,
settings.unix_ts_unit,
)),
delimiter: settings.delimiter,
allow_prefix: settings.allow_prefix,
}
}

Expand Down
Loading

0 comments on commit 90743ed

Please sign in to comment.