diff --git a/src/sysdiagnose/parsers/logarchive.py b/src/sysdiagnose/parsers/logarchive.py index e80ab0b..24812e5 100644 --- a/src/sysdiagnose/parsers/logarchive.py +++ b/src/sysdiagnose/parsers/logarchive.py @@ -206,7 +206,7 @@ def parse_folder_to_file(input_folder: str, output_file: str) -> bool: def __convert_using_native_logparser(input_folder: str, output_file: str) -> list: with open(output_file, 'w') as f_out: # output to stdout and not to a file as we need to convert the output to a unified format - cmd_array = ['/usr/bin/log', 'show', input_folder, '--style', 'ndjson'] + cmd_array = ['/usr/bin/log', 'show', input_folder, '--style', 'ndjson', '--info', '--debug', '--signpost'] # read each line, convert line by line and write the output directly to the new file # this approach limits memory consumption for line in LogarchiveParser.__execute_cmd_and_yield_result(cmd_array): diff --git a/src/sysdiagnose/parsers/mobilebackup.py b/src/sysdiagnose/parsers/mobilebackup.py new file mode 100644 index 0000000..a9cb28d --- /dev/null +++ b/src/sysdiagnose/parsers/mobilebackup.py @@ -0,0 +1,78 @@ +#! /usr/bin/env python3 + +import glob +import os +from sysdiagnose.utils.base import BaseParserInterface +from sysdiagnose.utils import misc +from datetime import datetime +import re + + +class MobileBackupParser(BaseParserInterface): + description = "Parsing mobilebackup plist file" + format = 'jsonl' + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_files_globs = [ + 'logs/MobileBackup/com.apple.MobileBackup.plist' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + + return log_files + + def execute(self) -> list | dict: + result = [] + for logfile in self.get_log_files(): + json_data = misc.load_plist_file_as_json(logfile) + # add LastOnConditionEvents that contain errors + for i, event in enumerate(json_data.get('LastOnConditionEvents', [])): + # "2023-02-22T10:24:49.051-08:00|158966.533|1|1|0|1|1|0|120|73683|15023|0|MBErrorDomain|209|2|0|0|0|0" + parts = event.split('|') + timestamp = datetime.fromisoformat(parts[0]) + + item = { + 'datetime': timestamp.isoformat(timespec='microseconds'), + 'timestamp': timestamp.timestamp(), + 'domain': parts[12], + 'code': int(parts[13]), + # TODO understand the meaning of the other fields + } + item.update() + try: + backupstateinfo = json_data['BackupStateInfo']['errors'][i] + item.update(backupstateinfo) + except IndexError: + # could not find a correlating BackupStateInfo + pass + result.append(item) + + # add PreflightSizing that does not have a timestamp + for key, value in json_data.get('PreflightSizing', {}).items(): + timestamp = self.sysdiagnose_creation_datetime + item = { + 'type': 'MobileBackup PreflightSizing entry', + 'datetime': timestamp.isoformat(timespec='microseconds'), + 'timestamp': timestamp.timestamp(), + 'timestamp_desc': 'sysdiagnose creation time', + 'key': key, + 'size': value, + } + try: + item['bundle_id'] = re.search(r'[^-]+Domain[^-]*-(.+)$', key).group(1) + except Exception: + # not a bundle id + pass + try: + item['domain'] = re.search(r'([^-]+Domain[^-]*)', key).group(1) + except Exception: + # not a domain + pass + + result.append(item) + + return result diff --git a/src/sysdiagnose/parsers/security_sysdiagnose.py b/src/sysdiagnose/parsers/security_sysdiagnose.py index 92df066..92d76d3 100644 --- a/src/sysdiagnose/parsers/security_sysdiagnose.py +++ b/src/sysdiagnose/parsers/security_sysdiagnose.py @@ -1,16 +1,12 @@ import os import re from sysdiagnose.utils.base import BaseParserInterface, logger - -# TODO make a security sysdiagnose analyser exporting in time based jsonl for timeline. -# - client_trust: date -# - client_transparency: date -# - client_pcs: date -# - client_local: date +from datetime import datetime class SecuritySysdiagnoseParser(BaseParserInterface): description = "Parsing security-sysdiagnose.txt file containing keychain information" + format = 'jsonl' def __init__(self, config: dict, case_id: str): super().__init__(__file__, config, case_id) @@ -24,19 +20,17 @@ def get_log_files(self) -> list: ] return [os.path.join(self.case_data_subfolder, log_files) for log_files in log_files] - def execute(self) -> list | dict: + def execute(self) -> list: log_files = self.get_log_files() if not log_files: return {'errors': ['No security-sysdiagnose.txt file present']} - return SecuritySysdiagnoseParser.parse_file(log_files[0]) - - def parse_file(path: str) -> dict: - json_result = {'errors': []} - with open(path, "r") as f: + json_result = {'errors': [], 'events': [], 'meta': {}} + with open(log_files[0], "r") as f: buffer = [] buffer_section = None + # TODO cleanup way of passing results, as this was just a small refactor from an old way of working for line in f: line = line.rstrip() if line == '': @@ -74,7 +68,19 @@ def parse_file(path: str) -> dict: # call the last buffer SecuritySysdiagnoseParser.process_buffer(buffer, buffer_section, json_result) - return json_result + + # transform the 'meta' into one jsonl entry + timestamp = self.sysdiagnose_creation_datetime + item = { + 'timestamp': timestamp.timestamp(), + 'datetime': timestamp.isoformat(timespec='microseconds'), + 'timestamp_desc': 'sysdiagnose_creation_datetime', + 'section': 'metadata' + } + item.update(json_result['meta']) + json_result['events'].append(item) + + return json_result['events'] def process_buffer(buffer: list, section: str, json_result: dict): """ @@ -98,7 +104,7 @@ def process_buffer_circle(buffer: list, json_result: dict): we keep it and just add the lines as list to the result. TODO consider to parse the circle section in more detail """ - json_result['circle'] = buffer + json_result['meta']['circle'] = buffer def process_buffer_engine_state(buffer: list, json_result: dict): """ @@ -106,7 +112,7 @@ def process_buffer_engine_state(buffer: list, json_result: dict): """ line_format_local = r'^(\w+) \{([^\}]+)\} \[([0-9]+)\] (\w+)' # noqa F841 # LATER consider splitting up the line format - json_result['engine'] = buffer + json_result['meta']['engine'] = buffer pass def process_buffer_keychain_state(buffer: list, json_result: dict): @@ -114,7 +120,7 @@ def process_buffer_keychain_state(buffer: list, json_result: dict): process the buffer for the homekit section """ section = buffer.pop(0).split(' ').pop(0).lower() - json_result[section] = [] + json_result['meta'][section] = [] for line in buffer: # parse the csv line with key=value structure # unfortunately value can be { foo,bar }, so splitting on comma is not an option. @@ -146,7 +152,7 @@ def process_buffer_keychain_state(buffer: list, json_result: dict): i += 1 # process the last key value pair row[key] = line[start:] - json_result[section].append(row) + json_result['meta'][section].append(row) def process_buffer_analytics(buffer: list, json_result: dict): """ @@ -160,22 +166,24 @@ def process_buffer_client(buffer: list, json_result: dict): process the buffer for the client section """ section = f"client_{buffer.pop(0).split(':').pop(1).lower().strip()}" - json_result[section] = [] if buffer[0].startswith('No data'): return i = 0 while i < len(buffer): line = buffer[i] - row = {} - row['date'] = line[:25] # 25 chars = 'YYYY-mm-dd HH:MM:SS +0000' - end = line.find(': ', 26) - row['result'] = line[26:end] - start = end + 2 - end = line.find(' - ', end + 2) - row['type'] = line[start:end] - row['attributes'] = {} - attribute_string = line[end + 16:] # 16 chars = ' - Attributes: {' + match = re.search(r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} [+-]\d{4}) ([^:]+): (.+?) - Attributes: {(.*)', line) + timestamp = datetime.fromisoformat(match.group(1)) + row = { + 'timestamp': timestamp.timestamp(), + 'datetime': timestamp.isoformat(timespec='microseconds'), + 'section': section, + 'result': match.group(2), + 'event': match.group(3), + 'attributes': {} + } + attribute_string = match.group(4) + # while next rows do not start with a date, they are part of the attributes try: while not re.search(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', buffer[i + 1]): @@ -189,7 +197,7 @@ def process_buffer_client(buffer: list, json_result: dict): for key, value in attribute_pairs: row['attributes'][key.strip()] = value.strip() - json_result[section].append(row) + json_result['events'].append(row) i += 1 def process_buffer_keys_and_values(buffer: list, json_result: dict): @@ -197,7 +205,7 @@ def process_buffer_keys_and_values(buffer: list, json_result: dict): process the buffer for the values section """ section = buffer.pop(0) - json_result[section] = {} + json_result['meta'][section] = {} i = 0 while i < len(buffer): @@ -209,5 +217,5 @@ def process_buffer_keys_and_values(buffer: list, json_result: dict): except IndexError: pass key, value = line.split(': ', 1) - json_result[section][key.strip()] = value.strip() + json_result['meta'][section][key.strip()] = value.strip() i += 1 diff --git a/tests/test_parsers_mobilebackup.py b/tests/test_parsers_mobilebackup.py new file mode 100644 index 0000000..afdf10d --- /dev/null +++ b/tests/test_parsers_mobilebackup.py @@ -0,0 +1,25 @@ +from sysdiagnose.parsers.mobilebackup import MobileBackupParser +from tests import SysdiagnoseTestCase +import unittest +import os + + +class TestParsersBackup(SysdiagnoseTestCase): + + def test_mobilebackup(self): + for case_id, case in self.sd.cases().items(): + p = MobileBackupParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + if not files: # we may not have backup + continue + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() + for item in result: + self.assertTrue('timestamp' in item) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_parsers_security_sysdiagnose.py b/tests/test_parsers_security_sysdiagnose.py index 6cf7969..b608862 100644 --- a/tests/test_parsers_security_sysdiagnose.py +++ b/tests/test_parsers_security_sysdiagnose.py @@ -16,9 +16,8 @@ def test_get_security_sysdiagnose(self): self.assertTrue(os.path.isfile(p.output_file)) result = p.get_result() - if result: - # test for no errors - self.assertEqual(result.get('errors'), []) + for item in result: + self.assertTrue('timestamp' in item) def test_process_buffer_keychain_state(self): input = [ @@ -26,11 +25,13 @@ def test_process_buffer_keychain_state(self): 'rapport: accc=,acct=69AAAFE9-A7FA-4BF3-922E-A14C33F11924,agrp=com.apple.rapport,cdat=2023-05-24 19:56:14 +0000,gena={length = 33, bytes = 0xe3456d6f 64656c49 6950686f 6e65392c ... 6973696f 6e494409 },invi=1,labl=iPhone,mdat=2023-05-24 19:56:14 +0000,musr={length = 0, bytes = 0x},pdmn=ck,sha1={length = 20, bytes = 0x1490ff273a003ef4089c46beb3731eb04754c7e5},svce=RPIdentity-SameAccountDevice,sync=1,tomb=0,vwht=Home', ] expected_output = { - 'rapport': [ - {'accc': '', 'acct': '69AAAFE9-A7FA-4BF3-922E-A14C33F11924', 'agrp': 'com.apple.rapport', 'cdat': '2023-05-24 19:56:14 +0000', 'gena': '{length = 33, bytes = 0xe3456d6f 64656c49 6950686f 6e65392c ... 6973696f 6e494409 }', 'invi': '1', 'labl': 'iPhone', 'mdat': '2023-05-24 19:56:14 +0000', 'musr': '{length = 0, bytes = 0x}', 'pdmn': 'ck', 'sha1': '{length = 20, bytes = 0x1490ff273a003ef4089c46beb3731eb04754c7e5}', 'svce': 'RPIdentity-SameAccountDevice', 'sync': '1', 'tomb': '0', 'vwht': 'Home'} - ] + 'meta': { + 'rapport': [ + {'accc': '', 'acct': '69AAAFE9-A7FA-4BF3-922E-A14C33F11924', 'agrp': 'com.apple.rapport', 'cdat': '2023-05-24 19:56:14 +0000', 'gena': '{length = 33, bytes = 0xe3456d6f 64656c49 6950686f 6e65392c ... 6973696f 6e494409 }', 'invi': '1', 'labl': 'iPhone', 'mdat': '2023-05-24 19:56:14 +0000', 'musr': '{length = 0, bytes = 0x}', 'pdmn': 'ck', 'sha1': '{length = 20, bytes = 0x1490ff273a003ef4089c46beb3731eb04754c7e5}', 'svce': 'RPIdentity-SameAccountDevice', 'sync': '1', 'tomb': '0', 'vwht': 'Home'} + ] + } } - result = {} + result = {'meta': {}} SecuritySysdiagnoseParser.process_buffer_keychain_state(input, result) self.maxDiff = None self.assertDictEqual(result, expected_output) @@ -44,12 +45,12 @@ def test_process_buffer_client(self): '), errorDomain : MITMErrorDomain, modelid : iPhone9,3, errorCode : 0 }' ] expected_output = { - 'client_trust': [ - {'date': '2023-05-24 19:55:51 +0000', 'result': 'EventSoftFailure', 'type': 'OTAPKIEvent', 'attributes': {'product': 'iPhone OS', 'build': '19H349', 'errorDomain': 'NSOSStatusErrorDomain', 'modelid': 'iPhone9,3', 'errorCode': '-67694'}}, - {'date': '2023-05-24 19:57:58 +0000', 'result': 'EventSoftFailure', 'type': 'MitmDetectionEvent', 'attributes': {'product': 'iPhone OS', 'build': '19H349', 'overallScore': '0', 'timeSinceLastReset': '127', 'rootUsages': '( foo, bar )', 'errorDomain': 'MITMErrorDomain', 'modelid': 'iPhone9,3', 'errorCode': '0'}} + 'events': [ + {'datetime': '2023-05-24T19:55:51.000000+00:00', 'timestamp': 1684958151.0, 'section': 'client_trust', 'result': 'EventSoftFailure', 'event': 'OTAPKIEvent', 'attributes': {'product': 'iPhone OS', 'build': '19H349', 'errorDomain': 'NSOSStatusErrorDomain', 'modelid': 'iPhone9,3', 'errorCode': '-67694'}}, + {'datetime': '2023-05-24T19:57:58.000000+00:00', 'timestamp': 1684958278.0, 'section': 'client_trust', 'result': 'EventSoftFailure', 'event': 'MitmDetectionEvent', 'attributes': {'product': 'iPhone OS', 'build': '19H349', 'overallScore': '0', 'timeSinceLastReset': '127', 'rootUsages': '( foo, bar )', 'errorDomain': 'MITMErrorDomain', 'modelid': 'iPhone9,3', 'errorCode': '0'}} ] } - result = {} + result = {'events': []} SecuritySysdiagnoseParser.process_buffer_client(input, result) self.maxDiff = None self.assertDictEqual(result, expected_output) @@ -60,11 +61,13 @@ def test_process_buffer_keys_and_values(self): '~PCS-Notes-tomb: {length = 7741, capacity = 7741, bytes = 0x30821e39314c300e0c0a4170706c6963 ... 8df3270d7d823100}' ] expected_output = { - 'values': { - '~PCS-Notes-tomb': '{length = 7741, capacity = 7741, bytes = 0x30821e39314c300e0c0a4170706c6963 ... 8df3270d7d823100}' + 'meta': { + 'values': { + '~PCS-Notes-tomb': '{length = 7741, capacity = 7741, bytes = 0x30821e39314c300e0c0a4170706c6963 ... 8df3270d7d823100}' + } } } - result = {} + result = {'meta': {}} SecuritySysdiagnoseParser.process_buffer_keys_and_values(input, result) self.maxDiff = None self.assertDictEqual(result, expected_output)