Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move regex to lazy lock #37

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions src/message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// See the License for the specific language governing permissions and limitations under the License.

use std::mem::size_of;
use std::sync::LazyLock;

use crate::chunks::firehose::firehose_log::FirehoseItemInfo;
use crate::decoders::decoder;
Expand All @@ -27,11 +28,37 @@ const OCTAL_TYPES: [&str; 2] = ["o", "O"];
const ERROR_TYPES: [&str; 1] = ["m"];
const STRING_TYPES: [&str; 6] = ["c", "s", "@", "S", "C", "P"];

/*
Crazy Regex to try to get all log message formatters
Formatters are based off of printf formatters with additional Apple values
( # start of capture group 1
% # literal "%"
(?: # first option

(?:{[^}]+}?) # Get String formatters with %{<variable>}<variable> values. Ex: %{public}#llx with team ID %{public}@
(?:[-+0#]{0,5}) # optional flags
(?:\d+|\*)? # width
(?:\.(?:\d+|\*))? # precision
(?:h|hh|l|ll|t|q|w|I|z|I32|I64)? # size
[cCdiouxXeEfgGaAnpsSZPm@}] # type

| # OR get regular string formatters, ex: %s, %d

(?:[-+0 #]{0,5}) # optional flags
(?:\d+|\*)? # width
(?:\.(?:\d+|\*))? # precision
(?:h|hh|l|ll|w|I|t|q|z|I32|I64)? # size
[cCdiouxXeEfgGaAnpsSZPm@%] # type
))
*/
static MESSAGE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(%(?:(?:\{[^}]+}?)(?:[-+0#]{0,5})(?:\d+|\*)?(?:\.(?:\d+|\*))?(?:h|hh|l|ll|w|I|z|t|q|I32|I64)?[cmCdiouxXeEfgGaAnpsSZP@}]|(?:[-+0 #]{0,5})(?:\d+|\*)?(?:\.(?:\d+|\*))?(?:h|hh|l||q|t|ll|w|I|z|I32|I64)?[cmCdiouxXeEfgGaAnpsSZP@%]))",).expect("failed to compile message regex")
});

/// Format the Unified Log message entry based on the parsed log items. Formatting follows the C lang prinf formatting process
pub fn format_firehose_log_message(
format_string: String,
item_message: &Vec<FirehoseItemInfo>,
message_re: &Regex,
) -> String {
let mut log_message = format_string;
let mut format_and_message_vec: Vec<FormatAndMessage> = Vec::new();
Expand All @@ -56,7 +83,7 @@ pub fn format_firehose_log_message(
if log_message.is_empty() {
return item_message[0].message_strings.to_owned();
}
let results = message_re.find_iter(&log_message);
let results = MESSAGE_REGEX.find_iter(&log_message);

let mut item_index = 0;
for formatter in results {
Expand Down Expand Up @@ -1098,7 +1125,6 @@ mod tests {
format_alignment_right_space, format_firehose_log_message, format_left, format_right,
parse_float, parse_formatter, parse_int, parse_signpost_format, parse_type_formatter,
};
use regex::Regex;

#[test]
fn test_format_firehose_log_message() {
Expand All @@ -1109,9 +1135,8 @@ mod tests {
item_type: 34,
item_size: 0,
});
let message_re = Regex::new(r"(%(?:(?:\{[^}]+}?)(?:[-+0#]{0,5})(?:\d+|\*)?(?:\.(?:\d+|\*))?(?:h|hh|l|ll|w|I|z|t|q|I32|I64)?[cmCdiouxXeEfgGaAnpsSZP@%}]|(?:[-+0 #]{0,5})(?:\d+|\*)?(?:\.(?:\d+|\*))?(?:h|hh|l||q|t|ll|w|I|z|I32|I64)?[cmCdiouxXeEfgGaAnpsSZP@%]))").unwrap();

let log_string = format_firehose_log_message(test_data, &item_message, &message_re);
let log_string = format_firehose_log_message(test_data, &item_message);
assert_eq!(log_string, "opendirectoryd (build 796.100) launched...")
}

Expand Down
2 changes: 1 addition & 1 deletion src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ pub fn iter_log<'a>(
shared_strings: &'a [SharedCacheStrings],
timesync_data: &'a [TimesyncBoot],
exclude_missing: bool,
) -> Result<impl Iterator<Item = (Vec<LogData>, UnifiedLogData)> + 'a, regex::Error> {
) -> impl Iterator<Item = (Vec<LogData>, UnifiedLogData)> + 'a {
LogData::iter_log(
unified_log_data,
strings_data,
Expand Down
63 changes: 8 additions & 55 deletions src/unified_log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ use crate::util::{extract_string, padding_size, unixepoch_to_iso};
use crate::uuidtext::UUIDText;
use log::{error, warn};
use nom::bytes::complete::take;
use regex::Regex;
use serde::Serialize;

#[derive(Debug, Clone)]
Expand All @@ -53,63 +52,25 @@ struct LogIterator<'a> {
shared_strings: &'a [SharedCacheStrings],
timesync_data: &'a [TimesyncBoot],
exclude_missing: bool,
message_re: Regex,
catalog_data_iterator_index: usize,
}

impl<'a> LogIterator<'a> {
fn new(
unified_log_data: &'a UnifiedLogData,
strings_data: &'a [UUIDText],
shared_strings: &'a [SharedCacheStrings],
timesync_data: &'a [TimesyncBoot],
exclude_missing: bool,
) -> Result<Self, regex::Error> {
/*
Crazy Regex to try to get all log message formatters
Formatters are based off of printf formatters with additional Apple values
( # start of capture group 1
% # literal "%"
(?: # first option

(?:{[^}]+}?) # Get String formatters with %{<variable>}<variable> values. Ex: %{public}#llx with team ID %{public}@
(?:[-+0#]{0,5}) # optional flags
(?:\d+|\*)? # width
(?:\.(?:\d+|\*))? # precision
(?:h|hh|l|ll|t|q|w|I|z|I32|I64)? # size
[cCdiouxXeEfgGaAnpsSZPm@}] # type

| # OR get regular string formatters, ex: %s, %d

(?:[-+0 #]{0,5}) # optional flags
(?:\d+|\*)? # width
(?:\.(?:\d+|\*))? # precision
(?:h|hh|l|ll|w|I|t|q|z|I32|I64)? # size
[cCdiouxXeEfgGaAnpsSZPm@%] # type
))
*/
let message_re_result = Regex::new(
r"(%(?:(?:\{[^}]+}?)(?:[-+0#]{0,5})(?:\d+|\*)?(?:\.(?:\d+|\*))?(?:h|hh|l|ll|w|I|z|t|q|I32|I64)?[cmCdiouxXeEfgGaAnpsSZP@}]|(?:[-+0 #]{0,5})(?:\d+|\*)?(?:\.(?:\d+|\*))?(?:h|hh|l||q|t|ll|w|I|z|I32|I64)?[cmCdiouxXeEfgGaAnpsSZP@%]))",
);
let message_re = match message_re_result {
Ok(message_re) => message_re,
Err(err) => {
error!(
"Failed to compile regex for printf format parsing: {:?}",
err
);
return Err(err);
}
};

Ok(LogIterator {
) -> Self {
LogIterator {
unified_log_data,
strings_data,
shared_strings,
timesync_data,
exclude_missing,
message_re,
catalog_data_iterator_index: 0,
})
}
}
}

Expand Down Expand Up @@ -226,14 +187,12 @@ impl Iterator for LogIterator<'_> {
format_firehose_log_message(
results.format_string,
&oversize_strings,
&self.message_re,
)
} else {
// Format and map the log strings with the message format string found UUIDText or shared string file
format_firehose_log_message(
results.format_string,
&firehose.message.item_info,
&self.message_re,
)
};
// If we are tracking missing data (due to it being stored in another log file). Add missing data to vec to track and parse again once we got all data
Expand Down Expand Up @@ -311,7 +270,6 @@ impl Iterator for LogIterator<'_> {
let log_message = format_firehose_log_message(
results.format_string,
&firehose.message.item_info,
&self.message_re,
);

if self.exclude_missing
Expand Down Expand Up @@ -374,14 +332,12 @@ impl Iterator for LogIterator<'_> {
format_firehose_log_message(
results.format_string,
&oversize_strings,
&self.message_re,
)
} else {
// Format and map the log strings with the message format string found UUIDText or shared string file
format_firehose_log_message(
results.format_string,
&firehose.message.item_info,
&self.message_re,
)
};
if self.exclude_missing
Expand Down Expand Up @@ -455,7 +411,6 @@ impl Iterator for LogIterator<'_> {
let log_message = format_firehose_log_message(
results.format_string,
&firehose.message.item_info,
&self.message_re,
);

if self.exclude_missing
Expand Down Expand Up @@ -749,7 +704,7 @@ impl LogData {
shared_strings: &'a [SharedCacheStrings],
timesync_data: &'a [TimesyncBoot],
exclude_missing: bool,
) -> Result<impl Iterator<Item = (Vec<LogData>, UnifiedLogData)> + 'a, regex::Error> {
) -> impl Iterator<Item = (Vec<LogData>, UnifiedLogData)> + 'a {
LogIterator::new(
unified_log_data,
strings_data,
Expand All @@ -776,15 +731,13 @@ impl LogData {
oversize: Vec::new(),
};

let Ok(log_iterator) = LogIterator::new(
let log_iterator = LogIterator::new(
unified_log_data,
strings_data,
shared_strings,
timesync_data,
exclude_missing,
) else {
return (log_data_vec, missing_unified_log_data_vec);
};
);
for (mut log_data, mut missing_unified_log) in log_iterator {
log_data_vec.append(&mut log_data);
missing_unified_log_data_vec
Expand Down Expand Up @@ -987,7 +940,7 @@ mod tests {
let buffer = fs::read(test_path).unwrap();

let (_, results) = LogData::parse_unified_log(&buffer).unwrap();
let iter = iter_log(&results, &[], &[], &[], false).unwrap();
let iter = iter_log(&results, &[], &[], &[], false);
for (entry, remaining) in iter {
assert!(entry.len() > 1000);
assert!(remaining.catalog_data.is_empty());
Expand Down