Skip to content

Commit

Permalink
Merge pull request #32 from cvandeplas/main
Browse files Browse the repository at this point in the history
  • Loading branch information
puffyCid authored Nov 29, 2024
2 parents 7fd1d1a + 200ee97 commit 21a286f
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 78 deletions.
1 change: 1 addition & 0 deletions examples/unifiedlog_iterator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ simplelog = "0.12.2"
csv = "1.3.0"
chrono = "0.4.38"
log = "0.4.22"
serde_json = "1.0.122"
macos-unifiedlogs = {path = "../../"}
clap = {version = "4.5.18", features = ["derive"]}
214 changes: 139 additions & 75 deletions examples/unifiedlog_iterator/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ struct Args {
/// Path to output file. Any directories must already exist
#[clap(short, long, default_value = "")]
output: String,

/// Output format. Options: csv, jsonl. Default is autodetect.
#[clap(short, long, default_value = "auto")]
format: String,

/// Append to output file
/// If false, will overwrite output file
#[clap(short, long, default_value = "false")]
append: bool,
}

fn main() {
Expand All @@ -49,19 +58,27 @@ fn main() {
.expect("Failed to initialize simple logger");

let args = Args::parse();
let mut writer = construct_writer(&args.output).unwrap();
// Create headers for CSV file
output_header(&mut writer).unwrap();
let output_format = if args.format.is_empty() || args.format == "auto" {
std::path::Path::new(&args.output)
.extension()
.and_then(std::ffi::OsStr::to_str)
.unwrap_or("csv")
.to_string()
} else {
args.format.clone()
};

if args.input != "" {
let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap();

if !args.input.is_empty() {
parse_log_archive(&args.input, &mut writer);
} else if args.live != "false" {
parse_live_system(&mut writer);
}
}

// Parse a provided directory path. Currently, expect the path to follow macOS log collect structure
fn parse_log_archive(path: &str, writer: &mut Writer<Box<dyn Write>>) {
fn parse_log_archive(path: &str, writer: &mut OutputWriter) {
let mut archive_path = PathBuf::from(path);

// Parse all UUID files which contain strings and other metadata
Expand Down Expand Up @@ -92,7 +109,7 @@ fn parse_log_archive(path: &str, writer: &mut Writer<Box<dyn Write>>) {
}

// Parse a live macOS system
fn parse_live_system(writer: &mut Writer<Box<dyn Write>>) {
fn parse_live_system(writer: &mut OutputWriter) {
let strings = collect_strings_system().unwrap();
let shared_strings = collect_shared_strings_system().unwrap();
let timesync_data = collect_timesync_system().unwrap();
Expand All @@ -115,7 +132,7 @@ fn parse_trace_file(
shared_strings_results: &[SharedCacheStrings],
timesync_data: &[TimesyncBoot],
path: &str,
writer: &mut Writer<Box<dyn Write>>,
writer: &mut OutputWriter,
) {
// We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries)
// Some log entries have Oversize strings located in different tracev3 files.
Expand All @@ -142,7 +159,7 @@ fn parse_trace_file(
eprintln!("Parsing: {}", full_path);

if data.path().exists() {
let count = iterate_chunks(
let count = iterate_chunks(
&full_path,
&mut missing_data,
string_results,
Expand Down Expand Up @@ -172,7 +189,7 @@ fn parse_trace_file(
eprintln!("Parsing: {}", full_path);

if data.path().exists() {
let count = iterate_chunks(
let count = iterate_chunks(
&full_path,
&mut missing_data,
string_results,
Expand Down Expand Up @@ -202,7 +219,7 @@ fn parse_trace_file(
eprintln!("Parsing: {}", full_path);

if data.path().exists() {
let count = iterate_chunks(
let count = iterate_chunks(
&full_path,
&mut missing_data,
string_results,
Expand Down Expand Up @@ -231,7 +248,7 @@ fn parse_trace_file(
eprintln!("Parsing: {}", full_path);

if data.path().exists() {
let count = iterate_chunks(
let count = iterate_chunks(
&full_path,
&mut missing_data,
string_results,
Expand All @@ -255,7 +272,7 @@ fn parse_trace_file(
if archive_path.exists() {
eprintln!("Parsing: logdata.LiveData.tracev3");

let count = iterate_chunks(
let count = iterate_chunks(
&archive_path.display().to_string(),
&mut missing_data,
string_results,
Expand All @@ -276,8 +293,7 @@ fn parse_trace_file(
// Since we have all Oversize entries now. Go through any log entries that we were not able to build before
for mut leftover_data in missing_data {
// Add all of our previous oversize data to logs for lookups
leftover_data
.oversize = oversize_strings.oversize.clone();
leftover_data.oversize = oversize_strings.oversize.clone();

// Exclude_missing = false
// If we fail to find any missing data its probably due to the logs rolling
Expand All @@ -302,7 +318,7 @@ fn iterate_chunks(
strings_data: &[UUIDText],
shared_strings: &[SharedCacheStrings],
timesync_data: &[TimesyncBoot],
writer: &mut Writer<Box<dyn Write>>,
writer: &mut OutputWriter,
oversize_strings: &mut UnifiedLogData,
) -> usize {
let log_bytes = fs::read(path).unwrap();
Expand All @@ -328,7 +344,10 @@ fn iterate_chunks(
count += results.len();
oversize_strings.oversize = chunk.oversize;
output(&results, writer).unwrap();
if missing_logs.catalog_data.is_empty() && missing_logs.header.is_empty() && missing_logs.oversize.is_empty() {
if missing_logs.catalog_data.is_empty()
&& missing_logs.header.is_empty()
&& missing_logs.oversize.is_empty()
{
continue;
}
// Track possible missing log data due to oversize strings being in another file
Expand All @@ -338,71 +357,116 @@ fn iterate_chunks(
count
}

fn construct_writer(output_path: &str) -> Result<Writer<Box<dyn Write>>, Box<dyn Error>> {
let writer = if output_path != "" {
Box::new(
OpenOptions::new()
.append(true)
.create(true)
.open(output_path)?,
) as Box<dyn Write>
} else {
Box::new(io::stdout()) as Box<dyn Write>
};
Ok(Writer::from_writer(writer))
pub struct OutputWriter {
writer: OutputWriterEnum,
}

// Create csv file and create headers
fn output_header(writer: &mut Writer<Box<dyn Write>>) -> Result<(), Box<dyn Error>> {
writer.write_record(&[
"Timestamp",
"Event Type",
"Log Type",
"Subsystem",
"Thread ID",
"PID",
"EUID",
"Library",
"Library UUID",
"Activity ID",
"Category",
"Process",
"Process UUID",
"Message",
"Raw Message",
"Boot UUID",
"System Timezone Name",
])?;
writer.flush()?;
Ok(())
enum OutputWriterEnum {
Csv(Box<Writer<Box<dyn Write>>>),
Json(Box<dyn Write>),
}

impl OutputWriter {
pub fn new(
output_path: &str,
output_format: &str,
append: bool,
) -> Result<Self, Box<dyn Error>> {
let writer: Box<dyn Write> = if !output_path.is_empty() {
Box::new(
OpenOptions::new()
.write(true)
.create(true)
.truncate(!append)
.append(append)
.open(output_path)?,
)
} else {
Box::new(io::stdout())
};

let writer_enum = match output_format {
"csv" => {
let mut csv_writer = Writer::from_writer(writer);
// Write CSV headers
csv_writer.write_record([
"Timestamp",
"Event Type",
"Log Type",
"Subsystem",
"Thread ID",
"PID",
"EUID",
"Library",
"Library UUID",
"Activity ID",
"Category",
"Process",
"Process UUID",
"Message",
"Raw Message",
"Boot UUID",
"System Timezone Name",
])?;
csv_writer.flush()?;
OutputWriterEnum::Csv(Box::new(csv_writer))
}
"jsonl" => OutputWriterEnum::Json(writer),
_ => {
eprintln!("Unsupported output format: {}", output_format);
std::process::exit(1);
}
};

Ok(OutputWriter {
writer: writer_enum,
})
}

pub fn write_record(&mut self, record: &LogData) -> Result<(), Box<dyn Error>> {
match &mut self.writer {
OutputWriterEnum::Csv(csv_writer) => {
let date_time = Utc.timestamp_nanos(record.time as i64);
csv_writer.write_record(&[
date_time.to_rfc3339_opts(SecondsFormat::Millis, true),
record.event_type.to_owned(),
record.log_type.to_owned(),
record.subsystem.to_owned(),
record.thread_id.to_string(),
record.pid.to_string(),
record.euid.to_string(),
record.library.to_owned(),
record.library_uuid.to_owned(),
record.activity_id.to_string(),
record.category.to_owned(),
record.process.to_owned(),
record.process_uuid.to_owned(),
record.message.to_owned(),
record.raw_message.to_owned(),
record.boot_uuid.to_owned(),
record.timezone_name.to_owned(),
])?;
}
OutputWriterEnum::Json(json_writer) => {
writeln!(json_writer, "{}", serde_json::to_string(record).unwrap())?;
}
}
Ok(())
}

pub fn flush(&mut self) -> Result<(), Box<dyn Error>> {
match &mut self.writer {
OutputWriterEnum::Csv(csv_writer) => csv_writer.flush()?,
OutputWriterEnum::Json(json_writer) => json_writer.flush()?,
}
Ok(())
}
}

// Append or create csv file
fn output(
results: &Vec<LogData>,
writer: &mut Writer<Box<dyn Write>>,
) -> Result<(), Box<dyn Error>> {
fn output(results: &Vec<LogData>, writer: &mut OutputWriter) -> Result<(), Box<dyn Error>> {
for data in results {
let date_time = Utc.timestamp_nanos(data.time as i64);
writer.write_record(&[
date_time.to_rfc3339_opts(SecondsFormat::Millis, true),
data.event_type.to_owned(),
data.log_type.to_owned(),
data.subsystem.to_owned(),
data.thread_id.to_string(),
data.pid.to_string(),
data.euid.to_string(),
data.library.to_owned(),
data.library_uuid.to_owned(),
data.activity_id.to_string(),
data.category.to_owned(),
data.process.to_owned(),
data.process_uuid.to_owned(),
data.message.to_owned(),
data.raw_message.to_owned(),
data.boot_uuid.to_owned(),
data.timezone_name.to_owned(),
])?;
writer.write_record(data)?;
}
writer.flush()?;
Ok(())
Expand Down
6 changes: 3 additions & 3 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ pub fn collect_strings(path: &str) -> Result<Vec<UUIDText>, ParserError> {

let entries = paths
.flat_map(|path| {
path.inspect_err(|err| {
error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",)
})
path.map_err(
|err| error!("[macos-unifiedlogs] Failed to get directory entry: {err:?}",),
)
.ok()
})
.collect::<Vec<_>>();
Expand Down

0 comments on commit 21a286f

Please sign in to comment.