From 604d39c7de69cbe42a8c5562d634b6512d4dfb21 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Tue, 19 Nov 2024 15:33:52 +0100 Subject: [PATCH 1/4] chg: [unifiedlog_iterator] support jsonl output and more params --- examples/unifiedlog_iterator/src/main.rs | 189 +++++++++++++++-------- 1 file changed, 126 insertions(+), 63 deletions(-) diff --git a/examples/unifiedlog_iterator/src/main.rs b/examples/unifiedlog_iterator/src/main.rs index bd82e41..54bc75a 100644 --- a/examples/unifiedlog_iterator/src/main.rs +++ b/examples/unifiedlog_iterator/src/main.rs @@ -40,6 +40,15 @@ struct Args { /// Path to output file. Any directories must already exist #[clap(short, long, default_value = "")] output: String, + + /// Output format. Options: csv, jsonl. Default is autodetect. + #[clap(short, long, default_value = "auto")] + format: String, + + /// Append to output file + /// If false, will overwrite output file + #[clap(short, long, default_value = "false")] + append: bool, } fn main() { @@ -49,9 +58,18 @@ fn main() { .expect("Failed to initialize simple logger"); let args = Args::parse(); - let mut writer = construct_writer(&args.output).unwrap(); - // Create headers for CSV file - output_header(&mut writer).unwrap(); + let output_format = if args.format.is_empty() || args.format == "auto" { + std::path::Path::new(&args.output) + .extension() + .and_then(std::ffi::OsStr::to_str) + .unwrap_or("csv") + .to_string() + } else { + args.format.clone() + }; + + + let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap(); if args.input != "" { parse_log_archive(&args.input, &mut writer); @@ -61,7 +79,7 @@ fn main() { } // Parse a provided directory path. Currently, expect the path to follow macOS log collect structure -fn parse_log_archive(path: &str, writer: &mut Writer>) { +fn parse_log_archive(path: &str, writer: &mut OutputWriter) { let mut archive_path = PathBuf::from(path); // Parse all UUID files which contain strings and other metadata @@ -92,7 +110,7 @@ fn parse_log_archive(path: &str, writer: &mut Writer>) { } // Parse a live macOS system -fn parse_live_system(writer: &mut Writer>) { +fn parse_live_system(writer: &mut OutputWriter) { let strings = collect_strings_system().unwrap(); let shared_strings = collect_shared_strings_system().unwrap(); let timesync_data = collect_timesync_system().unwrap(); @@ -115,7 +133,7 @@ fn parse_trace_file( shared_strings_results: &[SharedCacheStrings], timesync_data: &[TimesyncBoot], path: &str, - writer: &mut Writer>, + writer: &mut OutputWriter, ) { // We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries) // Some log entries have Oversize strings located in different tracev3 files. @@ -302,7 +320,7 @@ fn iterate_chunks( strings_data: &[UUIDText], shared_strings: &[SharedCacheStrings], timesync_data: &[TimesyncBoot], - writer: &mut Writer>, + writer: &mut OutputWriter, oversize_strings: &mut UnifiedLogData, ) -> usize { let log_bytes = fs::read(path).unwrap(); @@ -338,71 +356,116 @@ fn iterate_chunks( count } -fn construct_writer(output_path: &str) -> Result>, Box> { - let writer = if output_path != "" { - Box::new( - OpenOptions::new() - .append(true) - .create(true) - .open(output_path)?, - ) as Box - } else { - Box::new(io::stdout()) as Box - }; - Ok(Writer::from_writer(writer)) +pub struct OutputWriter { + writer: OutputWriterEnum, } -// Create csv file and create headers -fn output_header(writer: &mut Writer>) -> Result<(), Box> { - writer.write_record(&[ - "Timestamp", - "Event Type", - "Log Type", - "Subsystem", - "Thread ID", - "PID", - "EUID", - "Library", - "Library UUID", - "Activity ID", - "Category", - "Process", - "Process UUID", - "Message", - "Raw Message", - "Boot UUID", - "System Timezone Name", - ])?; - writer.flush()?; - Ok(()) +enum OutputWriterEnum { + Csv(Writer>), + Json(Box), } +impl OutputWriter { + pub fn new(output_path: &str, output_format: &str, append: bool) -> Result> { + let writer: Box = if output_path != "" { + Box::new( + OpenOptions::new() + .write(true) + .create(true) + .truncate(!append) + .append(append) + .open(output_path)?, + ) + } else { + Box::new(io::stdout()) + }; + + let writer_enum = match output_format { + "csv" => { + let mut csv_writer = Writer::from_writer(writer); + // Write CSV headers + csv_writer.write_record(&[ + "Timestamp", + "Event Type", + "Log Type", + "Subsystem", + "Thread ID", + "PID", + "EUID", + "Library", + "Library UUID", + "Activity ID", + "Category", + "Process", + "Process UUID", + "Message", + "Raw Message", + "Boot UUID", + "System Timezone Name", + ])?; + csv_writer.flush()?; + OutputWriterEnum::Csv(csv_writer) + } + "jsonl" => OutputWriterEnum::Json(writer), + _ => { + eprintln!("Unsupported output format: {}", output_format); + std::process::exit(1); + }, + }; + + Ok(OutputWriter { + writer: writer_enum, + }) + } + + pub fn write_record(&mut self, record: &LogData) -> Result<(), Box> { + match &mut self.writer { + OutputWriterEnum::Csv(csv_writer) => { + let date_time = Utc.timestamp_nanos(record.time as i64); + csv_writer.write_record(&[ + date_time.to_rfc3339_opts(SecondsFormat::Millis, true), + record.event_type.to_owned(), + record.log_type.to_owned(), + record.subsystem.to_owned(), + record.thread_id.to_string(), + record.pid.to_string(), + record.euid.to_string(), + record.library.to_owned(), + record.library_uuid.to_owned(), + record.activity_id.to_string(), + record.category.to_owned(), + record.process.to_owned(), + record.process_uuid.to_owned(), + record.message.to_owned(), + record.raw_message.to_owned(), + record.boot_uuid.to_owned(), + record.timezone_name.to_owned(), + ])?; + } + OutputWriterEnum::Json(json_writer) => { + writeln!(json_writer, "{}", serde_json::to_string(record).unwrap())?; + } + } + Ok(()) + } + + pub fn flush(&mut self) -> Result<(), Box> { + match &mut self.writer { + OutputWriterEnum::Csv(csv_writer) => csv_writer.flush()?, + OutputWriterEnum::Json(json_writer) => json_writer.flush()?, + } + Ok(()) + } +} + + // Append or create csv file fn output( results: &Vec, - writer: &mut Writer>, + writer: &mut OutputWriter, ) -> Result<(), Box> { for data in results { - let date_time = Utc.timestamp_nanos(data.time as i64); - writer.write_record(&[ - date_time.to_rfc3339_opts(SecondsFormat::Millis, true), - data.event_type.to_owned(), - data.log_type.to_owned(), - data.subsystem.to_owned(), - data.thread_id.to_string(), - data.pid.to_string(), - data.euid.to_string(), - data.library.to_owned(), - data.library_uuid.to_owned(), - data.activity_id.to_string(), - data.category.to_owned(), - data.process.to_owned(), - data.process_uuid.to_owned(), - data.message.to_owned(), - data.raw_message.to_owned(), - data.boot_uuid.to_owned(), - data.timezone_name.to_owned(), - ])?; + writer.write_record(&data)?; } writer.flush()?; Ok(()) From 8e9ffb45839e32ed47546d977273d1e1ab7d3d41 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Tue, 19 Nov 2024 15:43:12 +0100 Subject: [PATCH 2/4] fix: [unifiedlog_iterator] fix forgotten serde dependency --- examples/unifiedlog_iterator/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/unifiedlog_iterator/Cargo.toml b/examples/unifiedlog_iterator/Cargo.toml index 24372c5..a2dcea6 100644 --- a/examples/unifiedlog_iterator/Cargo.toml +++ b/examples/unifiedlog_iterator/Cargo.toml @@ -10,5 +10,6 @@ simplelog = "0.12.2" csv = "1.3.0" chrono = "0.4.38" log = "0.4.22" +serde_json = "1.0.122" macos-unifiedlogs = {path = "../../"} clap = {version = "4.5.18", features = ["derive"]} \ No newline at end of file From c9eb4be2c0afc86e9ac4f33096ca650498ccbc2c Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Wed, 27 Nov 2024 14:01:08 +0100 Subject: [PATCH 3/4] fix: [unifiedlog_iterator] syntax corrections + parser fix --- examples/unifiedlog_iterator/src/main.rs | 45 ++++++++++++------------ src/parser.rs | 2 +- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/examples/unifiedlog_iterator/src/main.rs b/examples/unifiedlog_iterator/src/main.rs index 54bc75a..3e08d64 100644 --- a/examples/unifiedlog_iterator/src/main.rs +++ b/examples/unifiedlog_iterator/src/main.rs @@ -68,10 +68,9 @@ fn main() { args.format.clone() }; - let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap(); - if args.input != "" { + if !args.input.is_empty() { parse_log_archive(&args.input, &mut writer); } else if args.live != "false" { parse_live_system(&mut writer); @@ -160,7 +159,7 @@ fn parse_trace_file( eprintln!("Parsing: {}", full_path); if data.path().exists() { - let count = iterate_chunks( + let count = iterate_chunks( &full_path, &mut missing_data, string_results, @@ -190,7 +189,7 @@ fn parse_trace_file( eprintln!("Parsing: {}", full_path); if data.path().exists() { - let count = iterate_chunks( + let count = iterate_chunks( &full_path, &mut missing_data, string_results, @@ -220,7 +219,7 @@ fn parse_trace_file( eprintln!("Parsing: {}", full_path); if data.path().exists() { - let count = iterate_chunks( + let count = iterate_chunks( &full_path, &mut missing_data, string_results, @@ -249,7 +248,7 @@ fn parse_trace_file( eprintln!("Parsing: {}", full_path); if data.path().exists() { - let count = iterate_chunks( + let count = iterate_chunks( &full_path, &mut missing_data, string_results, @@ -273,7 +272,7 @@ fn parse_trace_file( if archive_path.exists() { eprintln!("Parsing: logdata.LiveData.tracev3"); - let count = iterate_chunks( + let count = iterate_chunks( &archive_path.display().to_string(), &mut missing_data, string_results, @@ -294,8 +293,7 @@ fn parse_trace_file( // Since we have all Oversize entries now. Go through any log entries that we were not able to build before for mut leftover_data in missing_data { // Add all of our previous oversize data to logs for lookups - leftover_data - .oversize = oversize_strings.oversize.clone(); + leftover_data.oversize = oversize_strings.oversize.clone(); // Exclude_missing = false // If we fail to find any missing data its probably due to the logs rolling @@ -346,7 +344,10 @@ fn iterate_chunks( count += results.len(); oversize_strings.oversize = chunk.oversize; output(&results, writer).unwrap(); - if missing_logs.catalog_data.is_empty() && missing_logs.header.is_empty() && missing_logs.oversize.is_empty() { + if missing_logs.catalog_data.is_empty() + && missing_logs.header.is_empty() + && missing_logs.oversize.is_empty() + { continue; } // Track possible missing log data due to oversize strings being in another file @@ -361,13 +362,17 @@ pub struct OutputWriter { } enum OutputWriterEnum { - Csv(Writer>), + Csv(Box>>), Json(Box), } impl OutputWriter { - pub fn new(output_path: &str, output_format: &str, append: bool) -> Result> { - let writer: Box = if output_path != "" { + pub fn new( + output_path: &str, + output_format: &str, + append: bool, + ) -> Result> { + let writer: Box = if !output_path.is_empty() { Box::new( OpenOptions::new() .write(true) @@ -384,7 +389,7 @@ impl OutputWriter { "csv" => { let mut csv_writer = Writer::from_writer(writer); // Write CSV headers - csv_writer.write_record(&[ + csv_writer.write_record([ "Timestamp", "Event Type", "Log Type", @@ -404,13 +409,13 @@ impl OutputWriter { "System Timezone Name", ])?; csv_writer.flush()?; - OutputWriterEnum::Csv(csv_writer) + OutputWriterEnum::Csv(Box::new(csv_writer)) } "jsonl" => OutputWriterEnum::Json(writer), _ => { eprintln!("Unsupported output format: {}", output_format); std::process::exit(1); - }, + } }; Ok(OutputWriter { @@ -458,14 +463,10 @@ impl OutputWriter { } } - // Append or create csv file -fn output( - results: &Vec, - writer: &mut OutputWriter, -) -> Result<(), Box> { +fn output(results: &Vec, writer: &mut OutputWriter) -> Result<(), Box> { for data in results { - writer.write_record(&data)?; + writer.write_record(data)?; } writer.flush()?; Ok(()) diff --git a/src/parser.rs b/src/parser.rs index 09cc789..618b752 100755 --- a/src/parser.rs +++ b/src/parser.rs @@ -105,7 +105,7 @@ pub fn collect_strings(path: &str) -> Result, ParserError> { let entries = paths .flat_map(|path| { - path.inspect_err(|err| { + path.map_err(|err| { error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",) }) .ok() From 200ee974f8d7f5a83764d34aabbc4336abeb18fc Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Wed, 27 Nov 2024 14:02:31 +0100 Subject: [PATCH 4/4] fix: [parser] cargo fmt fix --- src/parser.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 618b752..dbfdafa 100755 --- a/src/parser.rs +++ b/src/parser.rs @@ -105,9 +105,9 @@ pub fn collect_strings(path: &str) -> Result, ParserError> { let entries = paths .flat_map(|path| { - path.map_err(|err| { - error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",) - }) + path.map_err( + |err| error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",), + ) .ok() }) .collect::>();