diff --git a/Cargo.toml b/Cargo.toml index 9f0a356..be683a1 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,19 +7,19 @@ edition = "2021" [dependencies] nom = "7.1.3" -serde_json = "1.0.128" -serde = { version = "1.0.210", features = ["derive"] } +serde_json = "1.0.133" +serde = { version = "1.0.215", features = ["derive"] } log = "0.4.22" lz4_flex = "0.11.3" byteorder = "1.5.0" plist = "1.7.0" -regex = "1.10.6" +regex = "1.11.1" base64 = "0.22.1" chrono = "0.4.38" [dev-dependencies] simplelog = "0.12.2" -csv = "1.3.0" +csv = "1.3.1" chrono = "0.4.38" criterion = "0.5.1" anyhow = "1.0.93" diff --git a/benches/big_sur_benchmark.rs b/benches/big_sur_benchmark.rs index 63f80e7..71649bd 100644 --- a/benches/big_sur_benchmark.rs +++ b/benches/big_sur_benchmark.rs @@ -5,8 +5,6 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and limitations under the License. -use std::path::PathBuf; - use criterion::{criterion_group, criterion_main, Criterion}; use macos_unifiedlogs::{ dsc::SharedCacheStrings, @@ -15,6 +13,7 @@ use macos_unifiedlogs::{ unified_log::UnifiedLogData, uuidtext::UUIDText, }; +use std::{collections::HashMap, path::PathBuf}; fn big_sur_parse_log(path: &str) { let _ = parse_log(&path).unwrap(); @@ -24,7 +23,7 @@ fn bench_build_log( log_data: &UnifiedLogData, string_results: &Vec, shared_strings_results: &Vec, - timesync_data: &Vec, + timesync_data: &HashMap, exclude_missing: bool, ) { let (_, _) = build_log( diff --git a/benches/high_sierra_benchmark.rs b/benches/high_sierra_benchmark.rs index c91a43b..fbb34f3 100644 --- a/benches/high_sierra_benchmark.rs +++ b/benches/high_sierra_benchmark.rs @@ -5,8 +5,6 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and limitations under the License. -use std::path::PathBuf; - use criterion::{criterion_group, criterion_main, Criterion}; use macos_unifiedlogs::{ dsc::SharedCacheStrings, @@ -15,6 +13,7 @@ use macos_unifiedlogs::{ unified_log::UnifiedLogData, uuidtext::UUIDText, }; +use std::{collections::HashMap, path::PathBuf}; fn high_sierra_parse_log(path: &str) { let _ = parse_log(&path).unwrap(); } @@ -23,7 +22,7 @@ fn bench_build_log( log_data: &UnifiedLogData, string_results: &Vec, shared_strings_results: &Vec, - timesync_data: &Vec, + timesync_data: &HashMap, exclude_missing: bool, ) { let (_, _) = build_log( diff --git a/benches/monterey_benchmark.rs b/benches/monterey_benchmark.rs index 954ffa0..bb03b47 100644 --- a/benches/monterey_benchmark.rs +++ b/benches/monterey_benchmark.rs @@ -5,7 +5,7 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and limitations under the License. -use std::path::PathBuf; +use std::{collections::HashMap, path::PathBuf}; use criterion::{criterion_group, criterion_main, Criterion}; use macos_unifiedlogs::{ @@ -23,7 +23,7 @@ fn bench_build_log( log_data: &UnifiedLogData, string_results: &Vec, shared_strings_results: &Vec, - timesync_data: &Vec, + timesync_data: &HashMap, exclude_missing: bool, ) { let (_, _) = build_log( diff --git a/examples/parse_tracev3/src/main.rs b/examples/parse_tracev3/src/main.rs index 350f99e..c6a06b5 100644 --- a/examples/parse_tracev3/src/main.rs +++ b/examples/parse_tracev3/src/main.rs @@ -6,13 +6,10 @@ // See the License for the specific language governing permissions and limitations under the License. use log::LevelFilter; -use macos_unifiedlogs::dsc::SharedCacheStrings; use macos_unifiedlogs::parser::{build_log, parse_log}; -use macos_unifiedlogs::timesync::TimesyncBoot; use macos_unifiedlogs::unified_log::LogData; -use macos_unifiedlogs::uuidtext::UUIDText; - use simplelog::{Config, SimpleLogger}; +use std::collections::HashMap; use std::env; use std::error::Error; use std::fs::OpenOptions; @@ -39,9 +36,9 @@ fn parse_trace_file(path: &str) { let log_data = parse_log(path).unwrap(); let filename = Path::new(path); // Pass empty UUID, UUID cache, timesync files - let string_results: Vec = Vec::new(); - let shared_strings_results: Vec = Vec::new(); - let timesync_data: Vec = Vec::new(); + let string_results = Vec::new(); + let shared_strings_results = Vec::new(); + let timesync_data = HashMap::new(); let exclude_missing = false; // We only get minimal data since we dont have the log metadata diff --git a/examples/unifiedlog_iterator/src/main.rs b/examples/unifiedlog_iterator/src/main.rs index 3e08d64..5b3cd40 100644 --- a/examples/unifiedlog_iterator/src/main.rs +++ b/examples/unifiedlog_iterator/src/main.rs @@ -17,6 +17,7 @@ use macos_unifiedlogs::timesync::TimesyncBoot; use macos_unifiedlogs::unified_log::{LogData, UnifiedLogData}; use macos_unifiedlogs::uuidtext::UUIDText; use simplelog::{Config, SimpleLogger}; +use std::collections::HashMap; use std::error::Error; use std::fs::OpenOptions; use std::io::Write; @@ -130,7 +131,7 @@ fn parse_live_system(writer: &mut OutputWriter) { fn parse_trace_file( string_results: &[UUIDText], shared_strings_results: &[SharedCacheStrings], - timesync_data: &[TimesyncBoot], + timesync_data: &HashMap, path: &str, writer: &mut OutputWriter, ) { @@ -317,7 +318,7 @@ fn iterate_chunks( missing: &mut Vec, strings_data: &[UUIDText], shared_strings: &[SharedCacheStrings], - timesync_data: &[TimesyncBoot], + timesync_data: &HashMap, writer: &mut OutputWriter, oversize_strings: &mut UnifiedLogData, ) -> usize { diff --git a/examples/unifiedlog_parser/src/main.rs b/examples/unifiedlog_parser/src/main.rs index 8ce6e9e..51d4a70 100755 --- a/examples/unifiedlog_parser/src/main.rs +++ b/examples/unifiedlog_parser/src/main.rs @@ -16,11 +16,12 @@ use macos_unifiedlogs::timesync::TimesyncBoot; use macos_unifiedlogs::unified_log::{LogData, UnifiedLogData}; use macos_unifiedlogs::uuidtext::UUIDText; use simplelog::{Config, SimpleLogger}; +use std::collections::HashMap; use std::error::Error; -use std::{fs, io}; use std::fs::OpenOptions; use std::io::Write; use std::path::PathBuf; +use std::{fs, io}; use clap::Parser; use csv::Writer; @@ -101,7 +102,7 @@ fn parse_live_system(writer: &mut Writer>) { &shared_strings, ×ync_data, "/private/var/db/diagnostics", - writer + writer, ); eprintln!("\nFinished parsing Unified Log data."); @@ -112,9 +113,9 @@ fn parse_live_system(writer: &mut Writer>) { fn parse_trace_file( string_results: &[UUIDText], shared_strings_results: &[SharedCacheStrings], - timesync_data: &[TimesyncBoot], + timesync_data: &HashMap, path: &str, - writer: &mut Writer> + writer: &mut Writer>, ) { // We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries) // Some log entries have Oversize strings located in different tracev3 files. @@ -318,8 +319,7 @@ fn parse_trace_file( // Since we have all Oversize entries now. Go through any log entries that we were not able to build before for mut leftover_data in missing_data { // Add all of our previous oversize data to logs for lookups - leftover_data - .oversize = oversize_strings.oversize.clone(); + leftover_data.oversize = oversize_strings.oversize.clone(); // Exclude_missing = false // If we fail to find any missing data its probably due to the logs rolling @@ -340,10 +340,12 @@ fn parse_trace_file( fn construct_writer(output_path: &str) -> Result>, Box> { let writer = if output_path != "" { - Box::new(OpenOptions::new() - .append(true) - .create(true) - .open(output_path)?) as Box + Box::new( + OpenOptions::new() + .append(true) + .create(true) + .open(output_path)?, + ) as Box } else { Box::new(io::stdout()) as Box }; @@ -376,7 +378,10 @@ fn output_header(writer: &mut Writer>) -> Result<(), Box, writer: &mut Writer>) -> Result<(), Box> { +fn output( + results: &Vec, + writer: &mut Writer>, +) -> Result<(), Box> { for data in results { let date_time = Utc.timestamp_nanos(data.time as i64); writer.write_record(&[ diff --git a/examples/unifiedlog_parser_json/src/main.rs b/examples/unifiedlog_parser_json/src/main.rs index d5fb1b3..46a67b7 100644 --- a/examples/unifiedlog_parser_json/src/main.rs +++ b/examples/unifiedlog_parser_json/src/main.rs @@ -15,6 +15,7 @@ use macos_unifiedlogs::timesync::TimesyncBoot; use macos_unifiedlogs::unified_log::{LogData, UnifiedLogData}; use macos_unifiedlogs::uuidtext::UUIDText; use simplelog::{Config, SimpleLogger}; +use std::collections::HashMap; use std::error::Error; use std::fs; use std::fs::OpenOptions; @@ -105,7 +106,7 @@ fn parse_live_system() { fn parse_trace_file( string_results: &[UUIDText], shared_strings_results: &[SharedCacheStrings], - timesync_data: &[TimesyncBoot], + timesync_data: &HashMap, path: &str, ) { // We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries) diff --git a/src/parser.rs b/src/parser.rs index dbfdafa..f051bdc 100755 --- a/src/parser.rs +++ b/src/parser.rs @@ -12,6 +12,7 @@ use crate::error::ParserError; use crate::timesync::TimesyncBoot; use crate::unified_log::{LogData, UnifiedLogData}; use crate::uuidtext::UUIDText; +use std::collections::HashMap; use std::fs; /// Parse the UUID files on a live system @@ -27,7 +28,7 @@ pub fn collect_shared_strings_system() -> Result, Parser } /// Parse the timesync files on a live system -pub fn collect_timesync_system() -> Result, ParserError> { +pub fn collect_timesync_system() -> Result, ParserError> { let timesync = String::from("/private/var/db/diagnostics/timesync"); collect_timesync(×ync) } @@ -65,7 +66,7 @@ pub fn iter_log<'a>( unified_log_data: &'a UnifiedLogData, strings_data: &'a [UUIDText], shared_strings: &'a [SharedCacheStrings], - timesync_data: &'a [TimesyncBoot], + timesync_data: &'a HashMap, exclude_missing: bool, ) -> Result, UnifiedLogData)> + 'a, regex::Error> { LogData::iter_log( @@ -84,7 +85,7 @@ pub fn build_log( unified_data: &UnifiedLogData, strings_data: &[UUIDText], shared_strings: &[SharedCacheStrings], - timesync_data: &[TimesyncBoot], + timesync_data: &HashMap, exclude_missing: bool, ) -> (Vec, UnifiedLogData) { LogData::build_log( @@ -292,7 +293,7 @@ pub fn collect_shared_strings(path: &str) -> Result, Par } /// Parse all timesync files in provided directory -pub fn collect_timesync(path: &str) -> Result, ParserError> { +pub fn collect_timesync(path: &str) -> Result, ParserError> { let paths_results = fs::read_dir(path); let paths = match paths_results { @@ -306,7 +307,7 @@ pub fn collect_timesync(path: &str) -> Result, ParserError> { } }; - let mut timesync_data_vec: Vec = Vec::new(); + let mut timesync_data: HashMap = HashMap::new(); // Start process to read and parse all timesync files for path in paths { let data = match path { @@ -340,8 +341,8 @@ pub fn collect_timesync(path: &str) -> Result, ParserError> { ); let timesync_results = TimesyncBoot::parse_timesync_data(&buffer); - match timesync_results { - Ok((_, mut timesync)) => timesync_data_vec.append(&mut timesync), + let timesync_map = match timesync_results { + Ok((_, results)) => results, Err(err) => { error!( "[macos-unifiedlogs] Failed to parse timesync file {}: {:?}", @@ -350,9 +351,21 @@ pub fn collect_timesync(path: &str) -> Result, ParserError> { ); continue; } + }; + + /* + * If a macOS system has been online for a long time. macOS will create a new timesync file with the same boot UUID + * So we check if we already have an existing UUID and if we do, we just add the data to the existing data we have + */ + for (key, mut value) in timesync_map { + if let Some(exiting_boot) = timesync_data.get_mut(&key) { + exiting_boot.timesync.append(&mut value.timesync); + continue; + } + timesync_data.insert(key, value); } } - Ok(timesync_data_vec) + Ok(timesync_data) } #[cfg(test)] @@ -388,19 +401,77 @@ mod tests { let timesync_data = collect_timesync(&test_path.display().to_string()).unwrap(); assert_eq!(timesync_data.len(), 5); - assert_eq!(timesync_data[0].signature, 48048); - assert_eq!(timesync_data[0].unknown, 0); assert_eq!( - timesync_data[0].boot_uuid, + timesync_data + .get("9A6A3124274A44B29ABF2BC9E4599B3B") + .unwrap() + .signature, + 48048 + ); + assert_eq!( + timesync_data + .get("9A6A3124274A44B29ABF2BC9E4599B3B") + .unwrap() + .unknown, + 0 + ); + assert_eq!( + timesync_data + .get("9A6A3124274A44B29ABF2BC9E4599B3B") + .unwrap() + .boot_uuid, "9A6A3124274A44B29ABF2BC9E4599B3B" ); - assert_eq!(timesync_data[0].timesync.len(), 5); - assert_eq!(timesync_data[0].daylight_savings, 0); - assert_eq!(timesync_data[0].boot_time, 1642302206000000000); - assert_eq!(timesync_data[0].header_size, 48); - assert_eq!(timesync_data[0].timebase_denominator, 1); - assert_eq!(timesync_data[0].timebase_numerator, 1); - assert_eq!(timesync_data[0].timezone_offset_mins, 0); + assert_eq!( + timesync_data + .get("9A6A3124274A44B29ABF2BC9E4599B3B") + .unwrap() + .timesync + .len(), + 5 + ); + assert_eq!( + timesync_data + .get("9A6A3124274A44B29ABF2BC9E4599B3B") + .unwrap() + .daylight_savings, + 0 + ); + assert_eq!( + timesync_data + .get("9A6A3124274A44B29ABF2BC9E4599B3B") + .unwrap() + .boot_time, + 1642302206000000000 + ); + assert_eq!( + timesync_data + .get("9A6A3124274A44B29ABF2BC9E4599B3B") + .unwrap() + .header_size, + 48 + ); + assert_eq!( + timesync_data + .get("9A6A3124274A44B29ABF2BC9E4599B3B") + .unwrap() + .timebase_denominator, + 1 + ); + assert_eq!( + timesync_data + .get("9A6A3124274A44B29ABF2BC9E4599B3B") + .unwrap() + .timebase_numerator, + 1 + ); + assert_eq!( + timesync_data + .get("9A6A3124274A44B29ABF2BC9E4599B3B") + .unwrap() + .timezone_offset_mins, + 0 + ); } #[test] diff --git a/src/timesync.rs b/src/timesync.rs index fca7d20..79f1334 100755 --- a/src/timesync.rs +++ b/src/timesync.rs @@ -10,6 +10,7 @@ use nom::bytes::complete::take; use nom::number::complete::{be_u128, le_i64, le_u16, le_u32, le_u64}; use nom::Needed; use serde::{Deserialize, Serialize}; +use std::collections::HashMap; use std::mem::size_of; #[derive(Debug, Serialize, Deserialize)] @@ -39,8 +40,8 @@ pub struct Timesync { impl TimesyncBoot { /// Parse the Unified Log timesync files - pub fn parse_timesync_data(data: &[u8]) -> nom::IResult<&[u8], Vec> { - let mut timesync_data: Vec = Vec::new(); + pub fn parse_timesync_data(data: &[u8]) -> nom::IResult<&[u8], HashMap> { + let mut timesync_data: HashMap = HashMap::new(); let mut input = data; let mut timesync_boot = TimesyncBoot { @@ -67,7 +68,11 @@ impl TimesyncBoot { input = timesync_input; } else { if timesync_boot.signature != 0 { - timesync_data.push(timesync_boot); + if let Some(existing_boot) = timesync_data.get_mut(×ync_boot.boot_uuid) { + existing_boot.timesync.append(&mut timesync_boot.timesync); + } else { + timesync_data.insert(timesync_boot.boot_uuid.clone(), timesync_boot); + } } let (timesync_input, timesync_boot_data) = TimesyncBoot::parse_timesync_boot(input)?; @@ -75,7 +80,11 @@ impl TimesyncBoot { input = timesync_input; } } - timesync_data.push(timesync_boot); + if let Some(existing_boot) = timesync_data.get_mut(×ync_boot.boot_uuid) { + existing_boot.timesync.append(&mut timesync_boot.timesync); + } else { + timesync_data.insert(timesync_boot.boot_uuid.clone(), timesync_boot); + } Ok((input, timesync_data)) } @@ -171,7 +180,7 @@ impl TimesyncBoot { /// Calculate timestamp for firehose log entry pub fn get_timestamp( - timesync_data: &[TimesyncBoot], + timesync_data: &HashMap, boot_uuid: &str, firehose_log_delta_time: u64, firehose_preamble_time: u64, @@ -201,15 +210,9 @@ impl TimesyncBoot { let mut timesync_continous_time = 0; let mut timesync_walltime = 0; - let mut larger_time = false; - // Apple Intel uses 1/1 as the timebase let mut timebase_adjustment = 1.0; - for timesync in timesync_data { - if boot_uuid != timesync.boot_uuid { - continue; - } - + if let Some(timesync) = timesync_data.get(boot_uuid) { if timesync.timebase_numerator == 125 && timesync.timebase_denominator == 3 { // For Apple Silicon (ARM) we need to adjust the mach time by multiplying by 125.0/3.0 to get the accurate nanosecond count timebase_adjustment = 125.0 / 3.0; @@ -227,17 +230,12 @@ impl TimesyncBoot { timesync_continous_time = timesync_record.kernel_time; timesync_walltime = timesync_record.walltime; } - larger_time = true; break; } timesync_continous_time = timesync_record.kernel_time; timesync_walltime = timesync_record.walltime; } - // We should only break once we encountered a timesync_record.kernel_time greater than the firehose_log_delta_time - if larger_time { - break; - } } let continous_time = (firehose_log_delta_time as f64).mul_add( @@ -269,7 +267,14 @@ mod tests { let (_, timesync_data) = TimesyncBoot::parse_timesync_data(&buffer).unwrap(); assert_eq!(timesync_data.len(), 5); - assert_eq!(timesync_data[0].timesync.len(), 5); + assert_eq!( + timesync_data + .get("9A6A3124274A44B29ABF2BC9E4599B3B") + .unwrap() + .timesync + .len(), + 5 + ); } #[test] diff --git a/src/unified_log.rs b/src/unified_log.rs index c93892e..3525df0 100755 --- a/src/unified_log.rs +++ b/src/unified_log.rs @@ -9,6 +9,8 @@ //! //! Provides a simple library to parse the macOS Unified Log format. +use std::collections::HashMap; + use crate::catalog::CatalogChunk; use crate::chunks::firehose::activity::FirehoseActivity; use crate::chunks::firehose::firehose_log::{Firehose, FirehoseItemInfo, FirehosePreamble}; @@ -51,7 +53,7 @@ struct LogIterator<'a> { unified_log_data: &'a UnifiedLogData, strings_data: &'a [UUIDText], shared_strings: &'a [SharedCacheStrings], - timesync_data: &'a [TimesyncBoot], + timesync_data: &'a HashMap, exclude_missing: bool, message_re: Regex, catalog_data_iterator_index: usize, @@ -61,7 +63,7 @@ impl<'a> LogIterator<'a> { unified_log_data: &'a UnifiedLogData, strings_data: &'a [UUIDText], shared_strings: &'a [SharedCacheStrings], - timesync_data: &'a [TimesyncBoot], + timesync_data: &'a HashMap, exclude_missing: bool, ) -> Result { /* @@ -747,7 +749,7 @@ impl LogData { unified_log_data: &'a UnifiedLogData, strings_data: &'a [UUIDText], shared_strings: &'a [SharedCacheStrings], - timesync_data: &'a [TimesyncBoot], + timesync_data: &'a HashMap, exclude_missing: bool, ) -> Result, UnifiedLogData)> + 'a, regex::Error> { LogIterator::new( @@ -765,7 +767,7 @@ impl LogData { unified_log_data: &UnifiedLogData, strings_data: &[UUIDText], shared_strings: &[SharedCacheStrings], - timesync_data: &[TimesyncBoot], + timesync_data: &HashMap, exclude_missing: bool, ) -> (Vec, UnifiedLogData) { let mut log_data_vec: Vec = Vec::new(); @@ -960,7 +962,7 @@ mod tests { parser::{collect_shared_strings, collect_strings, collect_timesync, iter_log, parse_log}, unified_log::UnifiedLogCatalogData, }; - use std::{fs, path::PathBuf}; + use std::{collections::HashMap, fs, path::PathBuf}; #[test] fn test_parse_unified_log() { @@ -987,7 +989,8 @@ mod tests { let buffer = fs::read(test_path).unwrap(); let (_, results) = LogData::parse_unified_log(&buffer).unwrap(); - let iter = iter_log(&results, &[], &[], &[], false).unwrap(); + let map = HashMap::new(); + let iter = iter_log(&results, &[], &[], &map, false).unwrap(); for (entry, remaining) in iter { assert!(entry.len() > 1000); assert!(remaining.catalog_data.is_empty());