From 22350732a768d9400ccb39bb8bf0de6e332332cd Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Wed, 18 Dec 2024 10:58:04 +0100 Subject: [PATCH 1/3] chg: more timeline based output --- src/sysdiagnose/parsers/appinstallation.py | 1 + src/sysdiagnose/parsers/lockdownd.py | 2 +- src/sysdiagnose/parsers/olddsc.py | 25 ++++++++++++++++++---- src/sysdiagnose/parsers/psthread.py | 22 +++++++++++++------ src/sysdiagnose/parsers/sys.py | 13 ++++++++--- tests/test_parsers_olddsc.py | 10 +++++---- tests/test_parsers_psthread.py | 6 +++--- tests/test_parsers_sys.py | 8 +++---- 8 files changed, 62 insertions(+), 25 deletions(-) diff --git a/src/sysdiagnose/parsers/appinstallation.py b/src/sysdiagnose/parsers/appinstallation.py index 00c1f10..ad8800a 100644 --- a/src/sysdiagnose/parsers/appinstallation.py +++ b/src/sysdiagnose/parsers/appinstallation.py @@ -18,6 +18,7 @@ class AppInstallationParser(BaseParserInterface): description = "Parsing app installation logs" + format = 'jsonl' def __init__(self, config: dict, case_id: str): super().__init__(__file__, config, case_id) diff --git a/src/sysdiagnose/parsers/lockdownd.py b/src/sysdiagnose/parsers/lockdownd.py index c3f3945..c28a855 100644 --- a/src/sysdiagnose/parsers/lockdownd.py +++ b/src/sysdiagnose/parsers/lockdownd.py @@ -2,7 +2,7 @@ import glob import os from sysdiagnose.utils.base import BaseParserInterface -from datetime import datetime, timezone +from datetime import datetime import re diff --git a/src/sysdiagnose/parsers/olddsc.py b/src/sysdiagnose/parsers/olddsc.py index 90db7cc..6a5e389 100644 --- a/src/sysdiagnose/parsers/olddsc.py +++ b/src/sysdiagnose/parsers/olddsc.py @@ -8,11 +8,13 @@ import glob import os from sysdiagnose.utils.misc import load_plist_file_as_json -from sysdiagnose.utils.base import BaseParserInterface +from sysdiagnose.utils.base import BaseParserInterface, logger class OldDscParser(BaseParserInterface): description = "Parsing olddsc files" + format = 'jsonl' + json_pretty = False def __init__(self, config: dict, case_id: str): @@ -28,10 +30,25 @@ def get_log_files(self) -> dict: return log_files - def execute(self) -> list | dict: + def execute(self) -> list: + timestamp_dict = {} + timestamp = self.sysdiagnose_creation_datetime + timestamp_dict['timestamp'] = timestamp.timestamp() + timestamp_dict['datetime'] = timestamp.isoformat(timespec='microseconds') + timestamp_dict['timestamp_desc'] = 'sysdiagnose creation' + + entries = [] + # we're not doing anything with + # - Unslid_Base_Address + # - Cache_UUID_String + # only acting on Binaries list for log_file in self.get_log_files(): - return OldDscParser.parse_file(log_file) - return {'error': ['No olddsc files present']} + for entry in OldDscParser.parse_file(log_file).get('Binaries', []): + entry.update(timestamp_dict) + entries.append(entry) + if not entries: + logger.warning('No olddsc files present') + return entries def parse_file(path: str) -> list | dict: try: diff --git a/src/sysdiagnose/parsers/psthread.py b/src/sysdiagnose/parsers/psthread.py index 65dacdb..e8ebb4b 100644 --- a/src/sysdiagnose/parsers/psthread.py +++ b/src/sysdiagnose/parsers/psthread.py @@ -12,11 +12,13 @@ import glob import os import re -from sysdiagnose.utils.base import BaseParserInterface +from sysdiagnose.utils.base import BaseParserInterface, logger +from sysdiagnose.utils.misc import snake_case class PsThreadParser(BaseParserInterface): description = "Parsing ps_thread.txt file" + format = 'jsonl' def __init__(self, config: dict, case_id: str): super().__init__(__file__, config, case_id) @@ -31,8 +33,15 @@ def get_log_files(self) -> list: return log_files - def execute(self) -> list | dict: - # TODO not really easy to conver to timebased jsonl, as the timestamp is complex to compute. + def execute(self) -> list: + # not really easy to conver to true timebased jsonl, as the timestamp is complex to compute. + # so we just fall back to the sysdiagnose creation timestamp + timestamp_dict = {} + timestamp = self.sysdiagnose_creation_datetime + timestamp_dict['timestamp'] = timestamp.timestamp() + timestamp_dict['datetime'] = timestamp.isoformat(timespec='microseconds') + timestamp_dict['timestamp_desc'] = 'sysdiagnose creation' + timestamp_dict['THREADS'] = 1 result = [] try: @@ -47,11 +56,11 @@ def execute(self) -> list | dict: result.append(row) patterns = line.strip().split(None, header_length - 1) - row = {'THREADS': 1} + row = timestamp_dict.copy() # merge last entries together, as last entry may contain spaces for col in range(header_length): # try to cast as int, float and fallback to string - col_name = header[col] + col_name = snake_case(header[col]) try: row[col_name] = int(patterns[col]) continue @@ -67,4 +76,5 @@ def execute(self) -> list | dict: result.append(row) return result except IndexError: - return {'error': 'No ps_thread.txt file present'} + logger.warning('No ps_thread.txt file present') + return [] diff --git a/src/sysdiagnose/parsers/sys.py b/src/sysdiagnose/parsers/sys.py index c31dd3c..a519de2 100644 --- a/src/sysdiagnose/parsers/sys.py +++ b/src/sysdiagnose/parsers/sys.py @@ -9,11 +9,12 @@ import os import glob import sysdiagnose.utils.misc as misc -from sysdiagnose.utils.base import BaseParserInterface +from sysdiagnose.utils.base import BaseParserInterface, logger class SystemVersionParser(BaseParserInterface): description = "Parsing SystemVersion plist file" + format = 'jsonl' def __init__(self, config: dict, case_id: str): super().__init__(__file__, config, case_id) @@ -30,9 +31,15 @@ def get_log_files(self) -> list: def execute(self) -> list | dict: try: - return SystemVersionParser.parse_file(self.get_log_files()[0]) + entry = SystemVersionParser.parse_file(self.get_log_files()[0]) + timestamp = self.sysdiagnose_creation_datetime + entry['timestamp_desc'] = 'sysdiagnose creation' + entry['timestamp'] = timestamp.timestamp() + entry['datetime'] = timestamp.isoformat(timespec='microseconds') + return [entry] except IndexError: - return {'error': 'No SystemVersion.plist file present'} + logger.warning('No SystemVersion.plist file present') + return [] def parse_file(path: str) -> list | dict: return misc.load_plist_file_as_json(path) diff --git a/tests/test_parsers_olddsc.py b/tests/test_parsers_olddsc.py index 15933c1..aa1a3c2 100644 --- a/tests/test_parsers_olddsc.py +++ b/tests/test_parsers_olddsc.py @@ -16,10 +16,12 @@ def test_parse_olddsc_file(self): self.assertTrue(os.path.isfile(p.output_file)) result = p.get_result() - self.assertTrue('Unslid_Base_Address' in result) - self.assertTrue('Cache_UUID_String' in result) - self.assertTrue('Binaries' in result) - self.assertTrue(len(result['Binaries']) > 0) + for entry in result: + self.assertTrue('Load_Address' in entry) + # self.assertTrue('Unslid_Base_Address' in result) + # self.assertTrue('Cache_UUID_String' in result) + # self.assertTrue('Binaries' in result) + # self.assertTrue(len(result['Binaries']) > 0) if __name__ == '__main__': diff --git a/tests/test_parsers_psthread.py b/tests/test_parsers_psthread.py index 0cdf6ad..c29147f 100644 --- a/tests/test_parsers_psthread.py +++ b/tests/test_parsers_psthread.py @@ -18,9 +18,9 @@ def test_parse_psthread(self): result = p.get_result() if result: # not all logs contain data for item in result: - self.assertTrue('COMMAND' in item) - self.assertTrue('PID' in item) - self.assertTrue('USER' in item) + self.assertTrue('command' in item) + self.assertTrue('pid' in item) + self.assertTrue('user' in item) if __name__ == '__main__': diff --git a/tests/test_parsers_sys.py b/tests/test_parsers_sys.py index 33d5991..2e18c1a 100644 --- a/tests/test_parsers_sys.py +++ b/tests/test_parsers_sys.py @@ -18,10 +18,10 @@ def test_getProductInfo(self): result = p.get_result() self.assertGreater(len(result), 0) - - self.assertTrue(result.keys() | self.productinfo_keys == result.keys()) # check if the result contains at least the following keys - self.assertTrue('iPhone OS' in result['ProductName']) - self.assertTrue(result['BuildID']) + for item in result: + self.assertTrue(item.keys() | self.productinfo_keys == item.keys()) # check if the result contains at least the following keys + self.assertTrue('iPhone OS' in item['ProductName']) + self.assertTrue(item['BuildID']) if __name__ == '__main__': From 3639083c98d47996cc93a534afcfdd4b62ec89c9 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Wed, 18 Dec 2024 11:08:59 +0100 Subject: [PATCH 2/3] more todos and fixmes --- src/sysdiagnose/parsers/brctl.py | 3 +-- src/sysdiagnose/parsers/transparency.py | 1 + src/sysdiagnose/parsers/wifi_known_networks.py | 1 + src/sysdiagnose/parsers/wifinetworks.py | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/sysdiagnose/parsers/brctl.py b/src/sysdiagnose/parsers/brctl.py index 8fa623a..58e4abc 100644 --- a/src/sysdiagnose/parsers/brctl.py +++ b/src/sysdiagnose/parsers/brctl.py @@ -8,9 +8,8 @@ import os from sysdiagnose.utils.base import BaseParserInterface -# TODO brctl analyser for boot_history section -> timeline - +# TODO brctl analyser for boot_history section -> timeline class BrctlParser(BaseParserInterface): description = "Parsing brctl files" diff --git a/src/sysdiagnose/parsers/transparency.py b/src/sysdiagnose/parsers/transparency.py index fcbdc0c..8e9e8df 100644 --- a/src/sysdiagnose/parsers/transparency.py +++ b/src/sysdiagnose/parsers/transparency.py @@ -4,6 +4,7 @@ from sysdiagnose.utils.base import BaseParserInterface, logger +# FIXME convert to timeline class TransparencyParser(BaseParserInterface): description = "Parsing transparency.log json file" diff --git a/src/sysdiagnose/parsers/wifi_known_networks.py b/src/sysdiagnose/parsers/wifi_known_networks.py index 2eaac06..8488bb3 100644 --- a/src/sysdiagnose/parsers/wifi_known_networks.py +++ b/src/sysdiagnose/parsers/wifi_known_networks.py @@ -12,6 +12,7 @@ from sysdiagnose.utils.base import BaseParserInterface, logger +# FIXME convert to timeline - warning: format changes from one iOS version to another class WifiKnownNetworksParser(BaseParserInterface): description = "Parsing Known Wifi Networks plist file" diff --git a/src/sysdiagnose/parsers/wifinetworks.py b/src/sysdiagnose/parsers/wifinetworks.py index a6c0cc6..e94b4e7 100644 --- a/src/sysdiagnose/parsers/wifinetworks.py +++ b/src/sysdiagnose/parsers/wifinetworks.py @@ -11,6 +11,7 @@ from sysdiagnose.utils.base import BaseParserInterface +# FIXME convert to timeline class WifiNetworksParser(BaseParserInterface): description = "Parsing com.apple.wifi plist files" From b808f58bacaa09987b467c927e7511261860abf4 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Wed, 18 Dec 2024 11:38:28 +0100 Subject: [PATCH 3/3] fix: [tests] fix broken tests --- src/sysdiagnose/analysers/ps_everywhere.py | 16 ++++++++-------- src/sysdiagnose/analysers/ps_matrix.py | 22 +++++++++++----------- src/sysdiagnose/parsers/ps.py | 7 ++++--- src/sysdiagnose/utils/misc.py | 2 +- tests/test_parsers_ps.py | 22 +++++++++++----------- 5 files changed, 35 insertions(+), 34 deletions(-) diff --git a/src/sysdiagnose/analysers/ps_everywhere.py b/src/sysdiagnose/analysers/ps_everywhere.py index 8ca55af..b0dc1df 100644 --- a/src/sysdiagnose/analysers/ps_everywhere.py +++ b/src/sysdiagnose/analysers/ps_everywhere.py @@ -27,31 +27,31 @@ def execute(self): # processes with full path and parameters, no threads ps_json = PsParser(self.config, self.case_id).get_result() - self.all_ps.update([p['COMMAND'] for p in ps_json]) + self.all_ps.update([p['command'] for p in ps_json]) print(f"{len(self.all_ps)} entries after ps") # processes with full path and parameters psthread_json = PsThreadParser(self.config, self.case_id).get_result() - self.all_ps.update([p['COMMAND'] for p in psthread_json]) + self.all_ps.update([p['command'] for p in psthread_json]) print(f"{len(self.all_ps)} entries after psthread") # processes with full path, no parameters, with threads spindumpnosymbols_json = SpindumpNoSymbolsParser(self.config, self.case_id).get_result() for p in spindumpnosymbols_json: - if 'Process' not in p: + if 'process' not in p: continue try: - self.add_if_full_command_is_not_in_set(p['Path']) - # all_ps.add(f"{p['Path']}::#{len(p['threads'])}") # count is different than in taskinfo + self.add_if_full_command_is_not_in_set(p['path']) + # all_ps.add(f"{p['path']}::#{len(p['threads'])}") # count is different than in taskinfo except KeyError: - if p['Process'] == 'kernel_task [0]': + if p['process'] == 'kernel_task [0]': self.all_ps.add('/kernel') # is similar to the other formats else: - self.add_if_full_command_is_not_in_set(p['Process']) # backup uption to keep trace of this anomaly + self.add_if_full_command_is_not_in_set(p['process']) # backup uption to keep trace of this anomaly for t in p['threads']: try: - self.add_if_full_command_is_not_in_set(f"{p['Path']}::{t['ThreadName']}") + self.add_if_full_command_is_not_in_set(f"{p['path']}::{t['thread_name']}") except KeyError: pass print(f"{len(self.all_ps)} entries after spindumpnosymbols") diff --git a/src/sysdiagnose/analysers/ps_matrix.py b/src/sysdiagnose/analysers/ps_matrix.py index 422f296..f605063 100644 --- a/src/sysdiagnose/analysers/ps_matrix.py +++ b/src/sysdiagnose/analysers/ps_matrix.py @@ -22,11 +22,11 @@ def execute(self): all_pids = set() ps_json = PsParser(self.config, self.case_id).get_result() - ps_dict = {int(p['PID']): p for p in ps_json} + ps_dict = {int(p['pid']): p for p in ps_json} all_pids.update(ps_dict.keys()) psthread_json = PsThreadParser(self.config, self.case_id).get_result() - psthread_dict = {int(p['PID']): p for p in psthread_json} + psthread_dict = {int(p['pid']): p for p in psthread_json} all_pids.update(psthread_dict.keys()) taskinfo_json = TaskinfoParser(self.config, self.case_id).get_result() @@ -35,7 +35,7 @@ def execute(self): if 'pid' not in p: continue taskinfo_dict[int(p['pid'])] = { - 'PID': p['pid'] + 'pid': p['pid'] } all_pids.update(taskinfo_dict.keys()) @@ -44,12 +44,12 @@ def execute(self): spindumpnosymbols_json = SpindumpNoSymbolsParser(self.config, self.case_id).get_result() spindumpnosymbols_dict = {} for p in spindumpnosymbols_json: - if 'Process' not in p: + if 'process' not in p: continue - spindumpnosymbols_dict[int(p['PID'])] = { - 'PID': p['PID'], - 'PPID': p.get('PPID', ''), - 'COMMAND': p.get('Path', ''), + spindumpnosymbols_dict[int(p['pid'])] = { + 'pid': p['pid'], + 'ppid': p.get('ppid', ''), + 'command': p.get('path', ''), } matrix = {} @@ -57,13 +57,13 @@ def execute(self): all_pids.sort() for pid in all_pids: matrix[pid] = { - 'cmd': ps_dict.get(pid, {}).get('COMMAND'), + 'cmd': ps_dict.get(pid, {}).get('command'), } # '%CPU', '%MEM', 'F', 'NI', # 'PRI', 'RSS', # 'STARTED', 'STAT', 'TIME', 'TT', 'USER', 'VSZ' - for col in ['PID']: + for col in ['pid']: ps_val = str(ps_dict.get(pid, {}).get(col)) psthread_val = str(psthread_dict.get(pid, {}).get(col)) taskinfo_val = str(taskinfo_dict.get(pid, {}).get(col)) @@ -75,7 +75,7 @@ def execute(self): else: # different matrix[pid][col] = f"{ps_val} != {psthread_val} != {taskinfo_val} != {spindump_val}" - for col in ['PPID']: + for col in ['ppid']: ps_val = str(ps_dict.get(pid, {}).get(col)) psthread_val = str(psthread_dict.get(pid, {}).get(col)) spindump_val = str(spindumpnosymbols_dict.get(pid, {}).get(col)) diff --git a/src/sysdiagnose/parsers/ps.py b/src/sysdiagnose/parsers/ps.py index 486e55c..34ce81b 100644 --- a/src/sysdiagnose/parsers/ps.py +++ b/src/sysdiagnose/parsers/ps.py @@ -6,6 +6,7 @@ # from sysdiagnose.utils.base import BaseParserInterface, logger +from sysdiagnose.utils.misc import snake_case import glob import os import re @@ -49,7 +50,7 @@ def parse_file(self, filename): # merge last entries together, as last entry may contain spaces for col in range(header_length): # try to cast as int, float and fallback to string - col_name = header[col] + col_name = snake_case(header[col]) try: entry[col_name] = int(patterns[col]) continue @@ -81,10 +82,10 @@ def exclude_known_goods(processes: dict, known_good: dict) -> list[dict]: dict: The updated list of processes with known good processes excluded. """ - known_good_cmd = [x['COMMAND'] for x in known_good] + known_good_cmd = [x['command'] for x in known_good] for proc in processes: - if proc['COMMAND'] in known_good_cmd: + if proc['command'] in known_good_cmd: processes.remove(proc) return processes diff --git a/src/sysdiagnose/utils/misc.py b/src/sysdiagnose/utils/misc.py index 3567d8e..a36bb41 100644 --- a/src/sysdiagnose/utils/misc.py +++ b/src/sysdiagnose/utils/misc.py @@ -144,4 +144,4 @@ def find_bytes(d): def snake_case(s): # lowercase and replace non a-z characters as _ - return re.sub(r'[^a-zA-Z0-9]', '_', s.lower()) + return re.sub(r'[^a-zA-Z0-9%]', '_', s.lower()) diff --git a/tests/test_parsers_ps.py b/tests/test_parsers_ps.py index ab2a236..f738f91 100644 --- a/tests/test_parsers_ps.py +++ b/tests/test_parsers_ps.py @@ -21,9 +21,9 @@ def test_parse_ps(self): if p.get_result(): # not all logs contain data for item in p.get_result(): - self.assertTrue('COMMAND' in item) - self.assertTrue('PID' in item) - self.assertTrue('USER' in item) + self.assertTrue('command' in item) + self.assertTrue('pid' in item) + self.assertTrue('user' in item) def test_parse_ps_lower_than_v16(self): input = [ @@ -32,7 +32,7 @@ def test_parse_ps_lower_than_v16(self): ] expected_result = [ { - 'USER': 'root', 'UID': 0, 'PID': 1, 'PPID': 0, '%CPU': 0.0, '%MEM': 0.4, 'PRI': 37, 'NI': 0, 'VSZ': 4226848, 'RSS': 8912, 'WCHAN': '-', 'TT': '??', 'STAT': 'Ss', 'STARTED': '14Jan19', 'TIME': '7:27.40', 'COMMAND': '/sbin/launchd with space', + 'user': 'root', 'uid': 0, 'pid': 1, 'ppid': 0, '%cpu': 0.0, '%mem': 0.4, 'pri': 37, 'ni': 0, 'vsz': 4226848, 'rss': 8912, 'wchan': '-', 'tt': '??', 'stat': 'Ss', 'started': '14Jan19', 'time': '7:27.40', 'command': '/sbin/launchd with space', 'timestamp_desc': 'sysdiagnose creation', 'timestamp': 1.0, 'datetime': '1970-01-01T00:00:01.000000+00:00' @@ -54,7 +54,7 @@ def test_parse_ps_newer_than_v16(self): ] expected_result = [ { - 'USER': 'root', 'UID': 0, 'PRSNA': '-', 'PID': 1, 'PPID': 0, 'F': 4004, '%CPU': 0.0, '%MEM': 0.0, 'PRI': 0, 'NI': 0, 'VSZ': 0, 'RSS': 0, 'WCHAN': '-', 'TT': '??', 'STAT': '?s', 'STARTED': 'Tue09PM', 'TIME': '0:00.00', 'COMMAND': '/sbin/launchd', + 'user': 'root', 'uid': 0, 'prsna': '-', 'pid': 1, 'ppid': 0, 'f': 4004, '%cpu': 0.0, '%mem': 0.0, 'pri': 0, 'ni': 0, 'vsz': 0, 'rss': 0, 'wchan': '-', 'tt': '??', 'stat': '?s', 'started': 'Tue09PM', 'time': '0:00.00', 'command': '/sbin/launchd', 'timestamp_desc': 'sysdiagnose creation', 'timestamp': 1.0, 'datetime': '1970-01-01T00:00:01.000000+00:00' @@ -71,16 +71,16 @@ def test_parse_ps_newer_than_v16(self): def test_ps_exclude_known_goods(self): processes = [ - {'COMMAND': 'good', 'PID': 1}, - {'COMMAND': 'bad', 'PID': 2}, - {'COMMAND': 'unknown', 'PID': 3} + {'command': 'good', 'pid': 1}, + {'command': 'bad', 'pid': 2}, + {'command': 'unknown', 'pid': 3} ] known_good = [ - {'COMMAND': 'good', 'PID': 1} + {'command': 'good', 'pid': 1} ] expected_result = [ - {'COMMAND': 'bad', 'PID': 2}, - {'COMMAND': 'unknown', 'PID': 3} + {'command': 'bad', 'pid': 2}, + {'command': 'unknown', 'pid': 3} ] result = PsParser.exclude_known_goods(processes, known_good) self.assertEqual(result, expected_result)