From 94ee14993b765f1de362171de9f313dc886f977e Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Wed, 28 Aug 2024 10:06:02 +0200 Subject: [PATCH] chg: [all] parsers and analysers are now classes --- .vscode/launch.json | 36 +- analysers/apps.py | 60 +- analysers/demo_analyser.py | 30 +- analysers/ps_everywhere.py | 183 +++-- analysers/ps_matrix.py | 109 ++- analysers/timeliner.py | 268 ++++--- analysers/wifi_geolocation.py | 154 ++-- analysers/wifi_geolocation_kml.py | 186 +++-- analysers/yarascan.py | 186 +++-- parsers/accessibility_tcc.py | 46 +- parsers/appinstallation.py | 35 +- parsers/brctl.py | 694 +++++++++---------- parsers/containermanager.py | 33 +- parsers/crashlogs.py | 58 +- parsers/demo_parser.py | 77 +- parsers/itunesstore.py | 43 +- parsers/logarchive.py | 420 +++++------ parsers/mobileactivation.py | 40 +- parsers/mobileinstallation.py | 33 +- parsers/networkextension.py | 34 +- parsers/networkextensioncache.py | 34 +- parsers/olddsc.py | 37 +- parsers/plists.py | 95 ++- parsers/powerlogs.py | 66 +- parsers/ps.py | 127 ++-- parsers/psthread.py | 86 +-- parsers/remotectl_dumpstate.py | 32 +- parsers/security_sysdiagnose.py | 417 ++++++----- parsers/shutdownlogs.py | 108 ++- parsers/spindumpnosymbols.py | 361 +++++----- parsers/swcutil.py | 194 +++--- parsers/sys.py | 70 +- parsers/taskinfo.py | 110 +-- parsers/uuid2path.py | 72 +- parsers/wifi_known_networks.py | 56 +- parsers/wifinetworks.py | 63 +- parsers/wifiscan.py | 36 +- parsers/wifisecurity.py | 102 +-- sysdiagnose.py | 166 ++--- tests/__init__.py | 52 +- tests/test_analysers_apps.py | 24 +- tests/test_analysers_ps_everywhere.py | 23 + tests/test_analysers_ps_matrix.py | 23 + tests/test_analysers_timeliner.py | 27 +- tests/test_analysers_wifi_geolocation.py | 27 +- tests/test_analysers_wifi_geolocation_kml.py | 21 +- tests/test_analysers_yarascan.py | 26 + tests/test_parsers.py | 53 +- tests/test_parsers_accessibility_tcc.py | 17 +- tests/test_parsers_appinstallation.py | 17 +- tests/test_parsers_brctl.py | 14 +- tests/test_parsers_containermanager.py | 16 +- tests/test_parsers_itunesstore.py | 16 +- tests/test_parsers_logarchive.py | 49 +- tests/test_parsers_mobileactivation.py | 16 +- tests/test_parsers_mobileinstallation.py | 16 +- tests/test_parsers_networkextension.py | 17 +- tests/test_parsers_networkextensioncache.py | 15 +- tests/test_parsers_olddsc.py | 16 +- tests/test_parsers_plist.py | 19 +- tests/test_parsers_powerlogs.py | 16 +- tests/test_parsers_ps.py | 26 +- tests/test_parsers_psthread.py | 15 +- tests/test_parsers_remotectl_dumpstate.py | 20 +- tests/test_parsers_security_sysdiagnose.py | 26 +- tests/test_parsers_shutdownlogs.py | 15 +- tests/test_parsers_spindumpnosymbols.py | 19 +- tests/test_parsers_swcutil.py | 15 +- tests/test_parsers_sys.py | 15 +- tests/test_parsers_taskinfo.py | 15 +- tests/test_parsers_uuid2path.py | 16 +- tests/test_parsers_wifi_known_networks.py | 15 +- tests/test_parsers_wifinetworks.py | 18 +- tests/test_parsers_wifiscan.py | 17 +- tests/test_parsers_wifisecurity.py | 15 +- utils/base.py | 159 +++++ utils/tabbasedhierarchy.py | 3 +- 77 files changed, 3172 insertions(+), 2714 deletions(-) create mode 100644 tests/test_analysers_ps_everywhere.py create mode 100644 tests/test_analysers_ps_matrix.py create mode 100644 tests/test_analysers_yarascan.py create mode 100644 utils/base.py diff --git a/.vscode/launch.json b/.vscode/launch.json index 4820abf..09ac084 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -30,11 +30,11 @@ "cwd": "${workspaceFolder}/" }, { - "name": "Python Debugger: analyse list", + "name": "Python Debugger: analysers list", "type": "debugpy", "request": "launch", "program": "${workspaceFolder}/sysdiagnose.py", - "args": "analyse list", + "args": "analysers", "cwd": "${workspaceFolder}/" }, { @@ -53,6 +53,14 @@ "args": "-c 1 analyse timeliner", "cwd": "${workspaceFolder}/" }, + { + "name": "Python Debugger: analyse demo_analyser", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/sysdiagnose.py", + "args": "-c 1 analyse demo_analyser", + "cwd": "${workspaceFolder}/" + }, { "name": "Python Debugger: analyse apps 1", "type": "debugpy", @@ -93,6 +101,22 @@ "args": "-c 1 analyse timeliner", "cwd": "${workspaceFolder}/" }, + { + "name": "Python Debugger: analyse yarascan", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/sysdiagnose.py", + "args": "-c 1 analyse yarascan", + "cwd": "${workspaceFolder}/" + }, + { + "name": "Python Debugger: list parsers", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/sysdiagnose.py", + "args": "parsers", + "cwd": "${workspaceFolder}/" + }, { "name": "Python Debugger: parse demo_parser", "type": "debugpy", @@ -133,6 +157,14 @@ "args": "-c 2 parse networkextension", "cwd": "${workspaceFolder}/" }, + { + "name": "Python Debugger: parse psthread", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/sysdiagnose.py", + "args": "-c 1 parse psthread", + "cwd": "${workspaceFolder}/" + }, { "name": "Python Debugger: parse ps", "type": "debugpy", diff --git a/analysers/apps.py b/analysers/apps.py index afbf015..51af288 100644 --- a/analysers/apps.py +++ b/analysers/apps.py @@ -3,28 +3,30 @@ # For Python3 # Author: Emiliern Le Jamtel -import os -import json import re +from utils.base import BaseAnalyserInterface +from parsers.accessibility_tcc import AccessibilityTccParser +from parsers.brctl import BrctlParser +from parsers.itunesstore import iTunesStoreParser +from parsers.logarchive import LogarchiveParser -analyser_description = 'Get list of Apps installed on the device' -analyser_format = 'json' -uses_parsers = ['accessibility_tcc', 'brctl', 'itunesstore', 'logarchive'] # not used yet, but just interesting to keep track +class AppsAnalyser(BaseAnalyserInterface): + description = 'Get list of Apps installed on the device' + format = 'json' -# TODO this code is terribly slow. I would expect this is due to all the if key in lookups. It took 49 seconds for case 1 + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + # TODO this code is terribly slow. I would expect this is due to all the if key in lookups. It took 49 seconds for case 1 + def execute(self): + ''' + Go through all json files in the folder and generate the json list of apps + ''' + apps = {} + # TODO add a check to see if the files exist, and if necessary, call the parsers (or ask the user to call them), or maybe using a flag in the function call -def analyse_path(case_folder: str, output_file: str = 'apps.json') -> bool: - ''' - Go through all json files in the folder and generate the json list of apps - ''' - - apps = {} - # TODO add a check to see if the files exist, and if necessary, call the parsers (or ask the user to call them), or maybe using a flag in the function call - - with open(os.path.join(case_folder, 'accessibility_tcc.json'), 'r') as f: - json_data = json.load(f) + json_data = AccessibilityTccParser(self.config, self.case_id).get_result() if json_data and not json_data.get('error'): for entry in json_data['access']: if entry['client'] not in apps: @@ -35,8 +37,7 @@ def analyse_path(case_folder: str, output_file: str = 'apps.json') -> bool: except KeyError: apps[entry['client']]['services'] = [entry['service']] - with open(os.path.join(case_folder, 'brctl.json'), 'r') as f: - json_data = json.load(f) + json_data = BrctlParser(self.config, self.case_id).get_result() if json_data and not json_data.get('error'): # directly going to the list of apps for entry in json_data['app_library_id']: @@ -50,8 +51,7 @@ def analyse_path(case_folder: str, output_file: str = 'apps.json') -> bool: apps[entry]['found'].append('brctl') - with open(os.path.join(case_folder, 'itunesstore.json'), 'r') as f: - json_data = json.load(f) + json_data = iTunesStoreParser(self.config, self.case_id).get_result() if json_data and not json_data.get('error'): # directly going to the list of apps for entry in json_data['application_id']: @@ -60,19 +60,17 @@ def analyse_path(case_folder: str, output_file: str = 'apps.json') -> bool: else: apps[entry['bundle_id']]['found'].append('itunesstore') - re_bundle_id_pattern = r'(([a-zA-Z0-9-_]+\.)+[a-zA-Z0-9-_]+)' - # list files in here - with open(os.path.join(case_folder, 'logarchive.json'), 'r') as f: - for line in f: # jsonl format + re_bundle_id_pattern = r'(([a-zA-Z0-9-_]+\.)+[a-zA-Z0-9-_]+)' + # list files in here + json_entries = LogarchiveParser(self.config, self.case_id).get_result() + for entry in json_entries: try: - entry = json.loads(line) # skip empty entries if entry['subsystem'] == '': continue - except KeyError: # last line of the native logarchive.json file - continue - except json.decoder.JSONDecodeError: # last lines of the native logarchive.json file + except KeyError: # last line of the native logarchive.jsonl file continue + # extract app/bundle id or process name from the subsystem field if not re.search(r'^' + re_bundle_id_pattern + r'$', entry['subsystem']): # extract foo.bar.hello from the substing if it is in that format @@ -101,8 +99,4 @@ def analyse_path(case_folder: str, output_file: str = 'apps.json') -> bool: if 'logarchive' not in apps[entry['subsystem']]['found']: apps[entry['subsystem']]['found'].append('logarchive') - with open(output_file, 'w') as f: - json.dump(apps, f, indent=4, sort_keys=True) - print(f"Apps list written to {output_file}") - # print(json.dumps(apps, indent=4)) - return + return apps diff --git a/analysers/demo_analyser.py b/analysers/demo_analyser.py index ee8fcf0..d1b1574 100644 --- a/analysers/demo_analyser.py +++ b/analysers/demo_analyser.py @@ -3,18 +3,26 @@ # For Python3 # DEMO - Skeleton -import json +from utils.base import BaseAnalyserInterface -analyser_description = "Do something useful (DEMO)" -analyser_format = "json" +class DemoAnalyser(BaseAnalyserInterface): + description = "Do something useful (DEMO)" + # format = "json" # by default json + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) -def analyse_path(case_folder: str, output_file: str = "demo-analyser.json") -> bool: - """ - Generate the timeline and save it to filename - """ - print("DO SOMETHING HERE") - with open(output_file, 'w') as f: - json.dump({"Hello": "World"}, f, indent=4) - return + def execute(self): + """ + This method is responsible for executing the functionality of the class. + + Load parsers here, and use the parser.get_result() to get the data. + By doing so you will get the parser output even if it never ran before. + """ + print("DO SOMETHING HERE") + + # json_data = p_fooparser.get_result() + + result = {'foo': 'bar'} + return result diff --git a/analysers/ps_everywhere.py b/analysers/ps_everywhere.py index b20156a..3e67733 100644 --- a/analysers/ps_everywhere.py +++ b/analysers/ps_everywhere.py @@ -1,129 +1,128 @@ #! /usr/bin/env python3 -import json -import os - -analyser_description = "List all processes we can find a bit everywhere." -analyser_format = "json" - - -def analyse_path(case_folder: str, output_file: str = "ps_everywhere.json") -> bool: - all_ps = set() - - # the order of below is important: we want to have the most detailed information first - # - first processes with full path and parameters - # - then processes with full path and no parameters - # - then processes no full path and no parameters - - # processes with full path and parameters, no threads - with open(os.path.join(case_folder, "ps.json"), "r") as f: - ps_json = json.load(f) - all_ps.update([p['COMMAND'] for p in ps_json]) - print(f"{len(all_ps)} entries after ps") - - # processes with full path and parameters - with open(os.path.join(case_folder, "psthread.json"), "r") as f: - psthread_json = json.load(f) - all_ps.update([p['COMMAND'] for p in psthread_json]) - print(f"{len(all_ps)} entries after psthread") - - # processes with full path, no parameters, with threads - with open(os.path.join(case_folder, "spindumpnosymbols.json"), "r") as f: - spindumpnosymbols_json = json.load(f) +from utils.base import BaseAnalyserInterface +from parsers.ps import PsParser +from parsers.psthread import PsThreadParser +from parsers.spindumpnosymbols import SpindumpNoSymbolsParser +from parsers.shutdownlogs import ShutdownLogsParser +from parsers.logarchive import LogarchiveParser +from parsers.uuid2path import UUID2PathParser +from parsers.taskinfo import TaskinfoParser +from parsers.remotectl_dumpstate import RemotectlDumpstateParser + + +class PsEverywhereAnalyser(BaseAnalyserInterface): + description = "List all processes we can find a bit everywhere." + format = "json" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + self.all_ps = set() + + def execute(self): + # the order of below is important: we want to have the most detailed information first + # - first processes with full path and parameters + # - then processes with full path and no parameters + # - then processes no full path and no parameters + + # processes with full path and parameters, no threads + ps_json = PsParser(self.config, self.case_id).get_result() + self.all_ps.update([p['COMMAND'] for p in ps_json]) + print(f"{len(self.all_ps)} entries after ps") + + # processes with full path and parameters + + psthread_json = PsThreadParser(self.config, self.case_id).get_result() + self.all_ps.update([p['COMMAND'] for p in psthread_json]) + print(f"{len(self.all_ps)} entries after psthread") + + # processes with full path, no parameters, with threads + spindumpnosymbols_json = SpindumpNoSymbolsParser(self.config, self.case_id).get_result() for p in spindumpnosymbols_json['processes']: try: - add_if_full_command_is_not_in_set(p['Path'], all_ps) + self.add_if_full_command_is_not_in_set(p['Path']) # all_ps.add(f"{p['Path']}::#{len(p['threads'])}") # count is different than in taskinfo except KeyError: if p['Process'] == 'kernel_task [0]': - all_ps.add('/kernel') # is similar to the other formats + self.all_ps.add('/kernel') # is similar to the other formats else: - add_if_full_command_is_not_in_set(p['Process'], all_ps) # backup uption to keep trace of this anomaly + self.add_if_full_command_is_not_in_set(p['Process']) # backup uption to keep trace of this anomaly for t in p['threads']: try: - add_if_full_command_is_not_in_set(f"{p['Path']}::{t['ThreadName']}", all_ps) + self.add_if_full_command_is_not_in_set(f"{p['Path']}::{t['ThreadName']}") except KeyError: pass - print(f"{len(all_ps)} entries after spindumpnosymbols") + print(f"{len(self.all_ps)} entries after spindumpnosymbols") - # processes with full path, no parameters, no threads - with open(os.path.join(case_folder, "shutdownlogs.json"), "r") as f: - shutdownlogs_json = json.load(f) + # processes with full path, no parameters, no threads + shutdownlogs_json = ShutdownLogsParser(self.config, self.case_id).get_result() for section in shutdownlogs_json.values(): # not using 'path' but 'command', as the path being appended by the UUID will be counter productive to normalisation for p in section: - add_if_full_command_is_not_in_set(p['command'], all_ps) - print(f"{len(all_ps)} entries after shutdownlogs") + self.add_if_full_command_is_not_in_set(p['command']) + print(f"{len(self.all_ps)} entries after shutdownlogs") - # processes with full path, no parameters, no threads - with open(os.path.join(case_folder, "logarchive.json"), "r") as f: + # processes with full path, no parameters, no threads logarchive_procs = set() - for line in f: - event = json.loads(line) + for event in LogarchiveParser(self.config, self.case_id).get_result(): try: logarchive_procs.add(event['process']) except KeyError: pass for entry in logarchive_procs: - add_if_full_command_is_not_in_set(entry, all_ps) - print(f"{len(all_ps)} entries after logarchive") + self.add_if_full_command_is_not_in_set(entry) + print(f"{len(self.all_ps)} entries after logarchive") - # processes with full path, no parameters, no threads - with open(os.path.join(case_folder, "uuid2path.json"), "r") as f: - uuid2path_json = json.load(f) + # processes with full path, no parameters, no threads + uuid2path_json = UUID2PathParser(self.config, self.case_id).get_result() for item in uuid2path_json.values(): - add_if_full_command_is_not_in_set(item, all_ps) - print(f"{len(all_ps)} entries after uuid2path") + self.add_if_full_command_is_not_in_set(item) + print(f"{len(self.all_ps)} entries after uuid2path") - # processes no full path, no parameters, with threads - with open(os.path.join(case_folder, "taskinfo.json"), "r") as f: - taskinfo_json = json.load(f) + # processes no full path, no parameters, with threads + taskinfo_json = TaskinfoParser(self.config, self.case_id).get_result() # p['name'] is the short version of COMMAND, so incompatible with the other formats. # on the other hand it may contain valuable stuff, so we use it in 2 formats # - name::#num_of_threads # - name::thread name for p in taskinfo_json['tasks']: - add_if_full_path_is_not_in_set(p['name'], all_ps) + self.add_if_full_path_is_not_in_set(p['name']) # add_if_full_path_is_not_in_set(f"{p['name']}::#{len(p['threads'])}") # count is different than in spindumpnosymbols for t in p['threads']: try: - add_if_full_path_is_not_in_set(f"{p['name']}::{t['thread name']}", all_ps) + self.add_if_full_path_is_not_in_set(f"{p['name']}::{t['thread name']}") except KeyError: pass - print(f"{len(all_ps)} entries after taskinfo") - - # processes no full path, no parameters, no threads - with open(os.path.join(case_folder, "remotectl_dumpstate.json"), "r") as f: - remotectl_dumpstate_json = json.load(f) - for p in remotectl_dumpstate_json['Local device']['Services']: - add_if_full_path_is_not_in_set(p, all_ps) - - print(f"{len(all_ps)} entries after remotectl_dumpstate") - - all_ps = list(all_ps) - all_ps.sort() - with open(output_file, 'w') as f: - json.dump(all_ps, f, indent=4) - return - - -def add_if_full_path_is_not_in_set(name: str, all_ps: set): - for item in all_ps: - # no need to add it in the following cases - if item.endswith(name): - return - if item.split('::').pop(0).endswith(name): - return - if '::' not in item and item.split(' ').pop(0).endswith(name): - # this will but with commands that have a space, but looking at data this should not happend often - return - all_ps.add(name) - - -def add_if_full_command_is_not_in_set(name: str, all_ps: set): - for item in all_ps: - if item.startswith(name): - # no need to add it - return - all_ps.add(name) + print(f"{len(self.all_ps)} entries after taskinfo") + + # processes no full path, no parameters, no threads + remotectl_dumpstate_json = RemotectlDumpstateParser(self.config, self.case_id).get_result() + if remotectl_dumpstate_json: + for p in remotectl_dumpstate_json['Local device']['Services']: + self.add_if_full_path_is_not_in_set(p) + + print(f"{len(self.all_ps)} entries after remotectl_dumpstate") + + self.all_ps = list(self.all_ps) + self.all_ps.sort() + return self.all_ps + + def add_if_full_path_is_not_in_set(self, name: str): + for item in self.all_ps: + # no need to add it in the following cases + if item.endswith(name): + return + if item.split('::').pop(0).endswith(name): + return + if '::' not in item and item.split(' ').pop(0).endswith(name): + # this will but with commands that have a space, but looking at data this should not happend often + return + self.all_ps.add(name) + + def add_if_full_command_is_not_in_set(self, name: str): + for item in self.all_ps: + if item.startswith(name): + # no need to add it + return + self.all_ps.add(name) diff --git a/analysers/ps_matrix.py b/analysers/ps_matrix.py index ca1956a..49a0e05 100644 --- a/analysers/ps_matrix.py +++ b/analysers/ps_matrix.py @@ -2,32 +2,34 @@ # make a matrix comparing, and showing visually # TODO improve ps_matrix as it's not very useful right now -import json -import os import pandas as pd from tabulate import tabulate +from utils.base import BaseAnalyserInterface +from parsers.ps import PsParser +from parsers.psthread import PsThreadParser +from parsers.taskinfo import TaskinfoParser +from parsers.spindumpnosymbols import SpindumpNoSymbolsParser -analyser_description = "Makes a matrix comparing ps, psthread, taskinfo" -analyser_format = "txt" -uses_parsers = ['ps', 'psthread', 'taskinfo', 'spindumpnosymbols'] +class PsMatrixAnalyser(BaseAnalyserInterface): + description = "Makes a matrix comparing ps, psthread, taskinfo" + format = "txt" + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) -def analyse_path(case_folder: str, output_file: str = "ps_matrix.txt") -> bool: - all_pids = set() + def execute(self): + all_pids = set() - with open(os.path.join(case_folder, "ps.json"), "r") as f: - ps_json = json.load(f) + ps_json = PsParser(self.config, self.case_id).get_result() ps_dict = {int(p['PID']): p for p in ps_json} all_pids.update(ps_dict.keys()) - with open(os.path.join(case_folder, "psthread.json"), "r") as f: - psthread_json = json.load(f) + psthread_json = PsThreadParser(self.config, self.case_id).get_result() psthread_dict = {int(p['PID']): p for p in psthread_json} all_pids.update(psthread_dict.keys()) - with open(os.path.join(case_folder, "taskinfo.json"), "r") as f: - taskinfo_json = json.load(f) + taskinfo_json = TaskinfoParser(self.config, self.case_id).get_result() taskinfo_dict = {} for p in taskinfo_json['tasks']: taskinfo_dict[int(p['pid'])] = { @@ -35,10 +37,9 @@ def analyse_path(case_folder: str, output_file: str = "ps_matrix.txt") -> bool: } all_pids.update(taskinfo_dict.keys()) - # not possible to use shutdownlogs as we're looking at different timeframes + # not possible to use shutdownlogs as we're looking at different timeframes - with open(os.path.join(case_folder, "spindumpnosymbols.json"), "r") as f: - spindumpnosymbols_json = json.load(f) + spindumpnosymbols_json = SpindumpNoSymbolsParser(self.config, self.case_id).get_result() spindumpnosymbols_dict = {} for p in spindumpnosymbols_json['processes']: spindumpnosymbols_dict[int(p['PID'])] = { @@ -47,43 +48,39 @@ def analyse_path(case_folder: str, output_file: str = "ps_matrix.txt") -> bool: 'COMMAND': p.get('Path', ''), } - matrix = {} - all_pids = list(all_pids) - all_pids.sort() - for pid in all_pids: - matrix[pid] = { - 'cmd': ps_dict.get(pid, {}).get('COMMAND'), - } - - # '%CPU', '%MEM', 'F', 'NI', - # 'PRI', 'RSS', - # 'STARTED', 'STAT', 'TIME', 'TT', 'USER', 'VSZ' - for col in ['PID']: - ps_val = str(ps_dict.get(pid, {}).get(col)) - psthread_val = str(psthread_dict.get(pid, {}).get(col)) - taskinfo_val = str(taskinfo_dict.get(pid, {}).get(col)) - spindump_val = str(spindumpnosymbols_dict.get(pid, {}).get(col)) - - cmpr = ps_val == psthread_val == taskinfo_val == spindump_val - if cmpr: - matrix[pid][col] = True - else: # different - matrix[pid][col] = f"{ps_val} != {psthread_val} != {taskinfo_val} != {spindump_val}" - - for col in ['PPID']: - ps_val = str(ps_dict.get(pid, {}).get(col)) - psthread_val = str(psthread_dict.get(pid, {}).get(col)) - spindump_val = str(spindumpnosymbols_dict.get(pid, {}).get(col)) - - cmpr = ps_val == psthread_val == spindump_val - if cmpr: - matrix[pid][col] = True - else: # different - matrix[pid][col] = f"{ps_val} != {psthread_val} != {spindump_val}" - - # LATER consider filtering the table to only show differences - # print(tabulate(pd.DataFrame(matrix).T, headers='keys', tablefmt='psql')) - with open(output_file, 'w') as f: - f.write(tabulate(pd.DataFrame(matrix).T, headers='keys', tablefmt='psql')) - - return + matrix = {} + all_pids = list(all_pids) + all_pids.sort() + for pid in all_pids: + matrix[pid] = { + 'cmd': ps_dict.get(pid, {}).get('COMMAND'), + } + + # '%CPU', '%MEM', 'F', 'NI', + # 'PRI', 'RSS', + # 'STARTED', 'STAT', 'TIME', 'TT', 'USER', 'VSZ' + for col in ['PID']: + ps_val = str(ps_dict.get(pid, {}).get(col)) + psthread_val = str(psthread_dict.get(pid, {}).get(col)) + taskinfo_val = str(taskinfo_dict.get(pid, {}).get(col)) + spindump_val = str(spindumpnosymbols_dict.get(pid, {}).get(col)) + + cmpr = ps_val == psthread_val == taskinfo_val == spindump_val + if cmpr: + matrix[pid][col] = True + else: # different + matrix[pid][col] = f"{ps_val} != {psthread_val} != {taskinfo_val} != {spindump_val}" + + for col in ['PPID']: + ps_val = str(ps_dict.get(pid, {}).get(col)) + psthread_val = str(psthread_dict.get(pid, {}).get(col)) + spindump_val = str(spindumpnosymbols_dict.get(pid, {}).get(col)) + + cmpr = ps_val == psthread_val == spindump_val + if cmpr: + matrix[pid][col] = True + else: # different + matrix[pid][col] = f"{ps_val} != {psthread_val} != {spindump_val}" + + # LATER consider filtering the table to only show differences + return tabulate(pd.DataFrame(matrix).T, headers='keys', tablefmt='psql') diff --git a/analysers/timeliner.py b/analysers/timeliner.py index 913fd9a..8df71af 100644 --- a/analysers/timeliner.py +++ b/analysers/timeliner.py @@ -4,29 +4,36 @@ # Script to extract timestamp and generate a timesketch output # Author: david@autopsit.org # -# Important note: timestamp are in microseconds! standard epoch is in seconds. # FIXME is this correct? -import os -import json from datetime import datetime, timezone -from parsers.logarchive import convert_unifiedlog_time_to_datetime +from parsers.logarchive import LogarchiveParser +from parsers.mobileactivation import MobileActivationParser +from parsers.powerlogs import PowerLogsParser +from parsers.swcutil import SwcutilParser +from parsers.accessibility_tcc import AccessibilityTccParser +from parsers.shutdownlogs import ShutdownLogsParser +from parsers.wifisecurity import WifiSecurityParser +from parsers.wifi_known_networks import WifiKnownNetworksParser from collections.abc import Generator +from utils.base import BaseAnalyserInterface -analyser_description = 'Generate a Timesketch compatible timeline' -analyser_format = 'jsonl' +class TimelinerAnalyser(BaseAnalyserInterface): + description = 'Generate a Timesketch compatible timeline' + format = 'jsonl' + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) -# Timesketch format: -# https://timesketch.org/guides/user/import-from-json-csv/ -# Mandatory: timestamps must be in microseconds !!! -# {"message": "A message","timestamp": 123456789,"datetime": "2015-07-24T19:01:01+00:00","timestamp_desc": "Write time","extra_field_1": "foo"} + # Timesketch format: + # https://timesketch.org/guides/user/import-from-json-csv/ + # Mandatory: timestamps must be in microseconds !!! + # {"message": "A message","timestamp": 123456789,"datetime": "2015-07-24T19:01:01+00:00","timestamp_desc": "Write time","extra_field_1": "foo"} -def __extract_ts_mobileactivation(case_folder: str) -> Generator[dict, None, None]: - try: - filename = 'mobileactivation.json' - with open(os.path.join(case_folder, filename), 'r') as fd: - data = json.load(fd) + def __extract_ts_mobileactivation(self) -> Generator[dict, None, None]: + try: + p = MobileActivationParser(self.config, self.case_id) + data = p.get_result() for event in data: ts_event = { 'message': 'Mobile Activation', @@ -41,78 +48,27 @@ def __extract_ts_mobileactivation(case_folder: str) -> Generator[dict, None, Non # FIXME what should we do? the log file (now) contains nice timestamps, do we want to extract less, but summarized, data? continue yield ts_event - except Exception as e: - print(f"ERROR while extracting timestamp from {filename}. Reason: {str(e)}") - - -def __extract_ts_powerlogs(case_folder: str) -> Generator[dict, None, None]: - try: - filename = 'powerlogs.json' - with open(os.path.join(case_folder, filename), 'r') as fd: - data = json.load(fd) + except Exception as e: + print(f"ERROR while extracting timestamp from mobileactivation file. Reason: {str(e)}") + def __extract_ts_powerlogs(self) -> Generator[dict, None, None]: + try: + p = PowerLogsParser(self.config, self.case_id) + data = p.get_result() # extract tables of interest - for entry in __powerlogs__PLProcessMonitorAgent_EventPoint_ProcessExit(data): + for entry in TimelinerAnalyser.__powerlogs__PLProcessMonitorAgent_EventPoint_ProcessExit(data): yield entry - for entry in __powerlogs__PLProcessMonitorAgent_EventBackward_ProcessExitHistogram(data): + for entry in TimelinerAnalyser.__powerlogs__PLProcessMonitorAgent_EventBackward_ProcessExitHistogram(data): yield entry - for entry in __powerlogs__PLAccountingOperator_EventNone_Nodes(data): + for entry in TimelinerAnalyser.__powerlogs__PLAccountingOperator_EventNone_Nodes(data): yield entry - except Exception as e: - print(f"ERROR while extracting timestamp from {filename}. Reason: {str(e)}") - - -def __powerlogs__PLProcessMonitorAgent_EventPoint_ProcessExit(jdata): - proc_exit = jdata['PLProcessMonitorAgent_EventPoint_ProcessExit'] - for proc in proc_exit: - timestamp = datetime.fromtimestamp(proc['timestamp'], tz=timezone.utc) - - extra_field = '' - if 'IsPermanent' in proc.keys(): - extra_field = 'Is permanent: %d' % proc['IsPermanent'] - ts_event = { - 'message': proc['ProcessName'], - 'timestamp': proc['timestamp'] * 1000000, - 'datetime': timestamp.isoformat(), - 'timestamp_desc': 'Process Exit with reason code: %d reason namespace %d' % (proc['ReasonCode'], proc['ReasonNamespace']), - 'extra_field_1': extra_field - } - yield ts_event - - -def __powerlogs__PLProcessMonitorAgent_EventBackward_ProcessExitHistogram(jdata): - events = jdata['PLProcessMonitorAgent_EventBackward_ProcessExitHistogram'] - for event in events: - timestamp = datetime.fromtimestamp(event['timestamp'], tz=timezone.utc) - ts_event = { - 'message': event['ProcessName'], - 'timestamp': event['timestamp'] * 1000000, - 'datetime': timestamp.isoformat(), - 'timestamp_desc': 'Process Exit with reason code: %d reason namespace %d' % (event['ReasonCode'], event['ReasonNamespace']), - 'extra_field_1': 'Crash frequency: [0-5s]: %d, [5-10s]: %d, [10-60s]: %d, [60s+]: %d' % (event['0s-5s'], event['5s-10s'], event['10s-60s'], event['60s+']) - } - yield ts_event - - -def __powerlogs__PLAccountingOperator_EventNone_Nodes(jdata): - eventnone = jdata['PLAccountingOperator_EventNone_Nodes'] - for event in eventnone: - timestamp = datetime.fromtimestamp(event['timestamp'], tz=timezone.utc) - ts_event = { - 'message': event['Name'], - 'timestamp': event['timestamp'] * 1000000, - 'datetime': timestamp.isoformat(), - 'timestamp_desc': 'PLAccountingOperator Event', - 'extra_field_1': 'Is permanent: %d' % event['IsPermanent'] - } - yield ts_event + except Exception as e: + print(f"ERROR while extracting timestamp from powerlogs. Reason: {str(e)}") - -def __extract_ts_swcutil(case_folder: str) -> Generator[dict, None, None]: - filename = 'swcutil.json' - try: - with open(os.path.join(case_folder, filename), 'r') as fd: - data = json.load(fd) + def __extract_ts_swcutil(self) -> Generator[dict, None, None]: + try: + p = SwcutilParser(self.config, self.case_id) + data = p.get_result() if 'db' in data.keys(): for service in data['db']: try: @@ -129,15 +85,13 @@ def __extract_ts_swcutil(case_folder: str) -> Generator[dict, None, None]: # some entries do not have a Last Checked or timestamp field # print(f"WARNING {filename} while extracting timestamp from {(service['Service'])} - {(service['App ID'])}. Record not inserted.") pass - except Exception as e: - print(f"ERROR while extracting timestamp from {filename}. Reason {str(e)}") - + except Exception as e: + print(f"ERROR while extracting timestamp from swcutil. Reason {str(e)}") -def __extract_ts_accessibility_tcc(case_folder: str) -> Generator[dict, None, None]: - filename = 'accessibility_tcc.json' - try: - with open(os.path.join(case_folder, filename), 'r') as fd: - data = json.load(fd) + def __extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]: + try: + p = AccessibilityTccParser(self.config, self.case_id) + data = p.get_result() if 'access' in data.keys(): for access in data['access']: # create timeline entry @@ -150,15 +104,13 @@ def __extract_ts_accessibility_tcc(case_folder: str) -> Generator[dict, None, No 'extra_field_1': 'client: %s' % access['client'] } yield ts_event - except Exception as e: - print(f"ERROR while extracting timestamp from {filename}. Reason {str(e)}") - + except Exception as e: + print(f"ERROR while extracting timestamp from accessibility_tcc. Reason {str(e)}") -def __extract_ts_shutdownlogs(case_folder: str) -> Generator[dict, None, None]: - filename = 'shutdownlogs.json' - try: - with open(os.path.join(case_folder, filename), 'r') as fd: - data = json.load(fd) + def __extract_ts_shutdownlogs(self) -> Generator[dict, None, None]: + try: + p = ShutdownLogsParser(self.config, self.case_id) + data = p.get_result() for ts, processes in data.items(): try: # create timeline entries @@ -174,19 +126,17 @@ def __extract_ts_shutdownlogs(case_folder: str) -> Generator[dict, None, None]: yield ts_event except Exception as e: print(f"WARNING: shutdownlog entry not parsed: {ts}. Reason: {str(e)}") - except Exception as e: - print(f"ERROR while extracting timestamp from {filename}. Reason: {str(e)}") - - -def __extract_ts_logarchive(case_folder: str) -> Generator[dict, None, None]: - logarchive_file = os.path.join(case_folder, 'logarchive.json') - try: - with open(logarchive_file, 'r') as fd: - for line in fd: + except Exception as e: + print(f"ERROR while extracting timestamp from shutdownlog. Reason: {str(e)}") + + def __extract_ts_logarchive(self) -> Generator[dict, None, None]: + try: + p = LogarchiveParser(self.config, self.case_id) + data = p.get_result() + for trace in data: try: - trace = json.loads(line) # create timeline entry - timestamp = convert_unifiedlog_time_to_datetime(trace['time']) + timestamp = LogarchiveParser.convert_unifiedlog_time_to_datetime(trace['time']) ts_event = { 'message': trace['message'], 'timestamp': timestamp.timestamp() * 1000000, @@ -197,15 +147,13 @@ def __extract_ts_logarchive(case_folder: str) -> Generator[dict, None, None]: yield ts_event except KeyError as e: print(f"WARNING: trace not parsed: {trace}. Error {e}") - except Exception as e: - print(f"ERROR while extracting timestamp from {logarchive_file}. Reason: {str(e)}") + except Exception as e: + print(f"ERROR while extracting timestamp from logarchive. Reason: {str(e)}") - -def __extract_ts_wifisecurity(case_folder: str) -> Generator[dict, None, None]: - filename = 'wifisecurity.json' - try: - with open(os.path.join(case_folder, filename), 'r') as fd: - data = json.load(fd) + def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]: + try: + p = WifiSecurityParser(self.config, self.case_id) + data = p.get_result() for wifi in data: # create timeline entry ctimestamp = datetime.strptime(wifi['cdat'], '%Y-%m-%d %H:%M:%S %z') @@ -230,15 +178,13 @@ def __extract_ts_wifisecurity(case_folder: str) -> Generator[dict, None, None]: 'extra_field_1': wifi['accc'] } yield ts_event - except Exception as e: - print(f"ERROR while extracting timestamp from {filename}. Reason {str(e)}") - + except Exception as e: + print(f"ERROR while extracting timestamp from wifisecurity. Reason {str(e)}") -def __extract_ts_wifi_known_networks(case_folder: str) -> Generator[dict, None, None]: - filename = 'wifi_known_networks.json' - try: - with open(os.path.join(case_folder, filename), 'r') as fd: - data = json.load(fd) + def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: + try: + p = WifiKnownNetworksParser(self.config, self.case_id) + data = p.get_result() for item in data.values(): ssid = item['SSID'] # WIFI added @@ -291,23 +237,57 @@ def __extract_ts_wifi_known_networks(case_folder: str) -> Generator[dict, None, # some wifi networks do not have a password modification date # print(f"ERROR {filename} while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") pass - except Exception as e: - print(f"ERROR while extracting timestamp from {filename}. Reason {str(e)}") - - -def analyse_path(case_folder: str, output_file: str = 'timeliner.jsonl') -> bool: - # Get all the functions that start with '__extract_ts_' - # and call these with the case_folder as parameter - # do this using generators, as this eats much less memory and is just so much more efficient - try: - with open(output_file, 'w') as f: - for func in globals(): - if func.startswith('__extract_ts_'): - for event in globals()[func](case_folder): # call the function - line = json.dumps(event) - f.write(line) - f.write('\n') - except Exception as e: - print(f"ERROR: impossible to save timeline to {output_file}. Reason: {str(e)}") - return False - return True + except Exception as e: + print(f"ERROR while extracting timestamp from wifi_known_networks. Reason {str(e)}") + + def execute(self): + # Get all the functions that start with '__extract_ts_' + # and call these with the case_folder as parameter + # do this using generators, as this eats much less memory and is just so much more efficient + for func in dir(self): + if func.startswith(f"_{self.__class__.__name__}__extract_ts_"): + for event in getattr(self, func)(): # call the function + yield event + + def __powerlogs__PLProcessMonitorAgent_EventPoint_ProcessExit(jdata): + proc_exit = jdata.get('PLProcessMonitorAgent_EventPoint_ProcessExit', []) + for proc in proc_exit: + timestamp = datetime.fromtimestamp(proc['timestamp'], tz=timezone.utc) + + extra_field = '' + if 'IsPermanent' in proc.keys(): + extra_field = 'Is permanent: %d' % proc['IsPermanent'] + ts_event = { + 'message': proc['ProcessName'], + 'timestamp': proc['timestamp'] * 1000000, + 'datetime': timestamp.isoformat(), + 'timestamp_desc': 'Process Exit with reason code: %d reason namespace %d' % (proc['ReasonCode'], proc['ReasonNamespace']), + 'extra_field_1': extra_field + } + yield ts_event + + def __powerlogs__PLProcessMonitorAgent_EventBackward_ProcessExitHistogram(jdata): + events = jdata.get('PLProcessMonitorAgent_EventBackward_ProcessExitHistogram', []) + for event in events: + timestamp = datetime.fromtimestamp(event['timestamp'], tz=timezone.utc) + ts_event = { + 'message': event['ProcessName'], + 'timestamp': event['timestamp'] * 1000000, + 'datetime': timestamp.isoformat(), + 'timestamp_desc': 'Process Exit with reason code: %d reason namespace %d' % (event['ReasonCode'], event['ReasonNamespace']), + 'extra_field_1': 'Crash frequency: [0-5s]: %d, [5-10s]: %d, [10-60s]: %d, [60s+]: %d' % (event['0s-5s'], event['5s-10s'], event['10s-60s'], event['60s+']) + } + yield ts_event + + def __powerlogs__PLAccountingOperator_EventNone_Nodes(jdata): + eventnone = jdata.get('PLAccountingOperator_EventNone_Nodes', []) + for event in eventnone: + timestamp = datetime.fromtimestamp(event['timestamp'], tz=timezone.utc) + ts_event = { + 'message': event['Name'], + 'timestamp': event['timestamp'] * 1000000, + 'datetime': timestamp.isoformat(), + 'timestamp_desc': 'PLAccountingOperator Event', + 'extra_field_1': 'Is permanent: %d' % event['IsPermanent'] + } + yield ts_event diff --git a/analysers/wifi_geolocation.py b/analysers/wifi_geolocation.py index dd3e3af..f9e2381 100644 --- a/analysers/wifi_geolocation.py +++ b/analysers/wifi_geolocation.py @@ -3,82 +3,88 @@ # For Python3 # Author: Aaron Kaplan -import sys import json import dateutil.parser import os import gpxpy import gpxpy.gpx - -sys.path.append('..') # noqa: E402 - - -analyser_description = "Generate GPS Exchange (GPX) of wifi geolocations" -analyser_format = "gpx" - - -def analyse_path(case_folder: str, output_file: str = "wifi-geolocations.gpx") -> bool: - potential_source_files = ['wifinetworks/WiFi_com.apple.wifi.known-networks.plist.json', 'plists/WiFi_com.apple.wifi.known-networks.plist.json', 'wifi_known_networks.json'] - input_file_path = None - for fname in potential_source_files: - input_file_path = os.path.join(case_folder, fname) - if os.path.isfile(input_file_path): - break - if not input_file_path: - # TODO we could call the parser and generate the file for us...and then parse it... - raise FileNotFoundError(f"Could not find any of the potential source files: {potential_source_files}.") - - # we have a valid file_path and can generate the gpx file - with open(input_file_path, 'r') as f: - json_data = json.load(f) - return generate_gpx_from_known_networks_json(json_data=json_data, output_file=output_file) - - -def generate_gpx_from_known_networks_json(json_data: str, output_file: str): - # Create new GPX object - gpx = gpxpy.gpx.GPX() - - for network_name, network_data in json_data.items(): - ssid = network_data.get('SSID', network_name) - # timestamps are always tricky - timestamp_str = network_data.get('AddedAt', '') - if not timestamp_str: - timestamp_str = network_data.get('JoinedByUserAt', '') # second best attempt - if not timestamp_str: - timestamp_str = network_data.get('UpdatedAt', '') # third best attempt - # Convert ISO 8601 format to datetime - add_reason = network_data.get("AddReason", '') - - try: - timestamp = dateutil.parser.parse(timestamp_str) - except Exception as e: - print(f"Error converting timestamp. Reason: {str(e)}. Timestamp was: {str(timestamp_str)}. Assuming Jan 1st 1970.") - timestamp = dateutil.parser.parse('1970-01-01') # begin of epoch - - bssid = network_data.get('__OSSpecific__', {}).get('BSSID', '') - channel = network_data.get('__OSSpecific__', {}).get('CHANNEL', '') - for bss in network_data.get('BSSList', []): - lat = bss.get('LocationLatitude', '') - lon = bss.get('LocationLongitude', '') - location_accuracy = bss.get('LocationAccuracy', '') - - description = f'''BSSID: {bssid} -Channel: {channel} -Timestamp: {timestamp_str} -LocationAccuracy: {location_accuracy} -Latitude: {lat} -Longitude: {lon} -Reason for Adding: {add_reason}''' - - # Create new waypoint - waypoint = gpxpy.gpx.GPXWaypoint(latitude=lat, longitude=lon, time=timestamp) - waypoint.name = ssid - waypoint.description = description - - # Add waypoint to gpx file - gpx.waypoints.append(waypoint) - - # Save gpx file - with open(output_file, 'w') as f: - f.write(gpx.to_xml()) - return +from utils.base import BaseAnalyserInterface + + +class WifiGeolocationAnalyser(BaseAnalyserInterface): + description = "Generate GPS Exchange (GPX) of wifi geolocations" + format = "gpx" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + + def get_result(self, force: bool = False): + raise NotImplementedError("This function is not compatible with this module.") + + def save_result(self, force: bool = False, indent=None): + self.execute() + + def execute(self): + potential_source_files = ['wifinetworks/WiFi_com.apple.wifi.known-networks.plist.json', 'plists/WiFi_com.apple.wifi.known-networks.plist.json', 'wifi_known_networks.json'] + input_file_path = None + for fname in potential_source_files: + input_file_path = os.path.join(self.case_parsed_data_folder, fname) + if os.path.isfile(input_file_path): + break + if not input_file_path: + # TODO we could call the parser and generate the file for us...and then parse it... + raise FileNotFoundError(f"Could not find any of the potential source files: {potential_source_files}.") + + # we have a valid file_path and can generate the gpx file + with open(input_file_path, 'r') as f: + json_data = json.load(f) + return WifiGeolocationAnalyser.generate_gpx_from_known_networks_json(json_data=json_data, output_file=self.output_file) + + def generate_gpx_from_known_networks_json(json_data: str, output_file: str): + # Create new GPX object + gpx = gpxpy.gpx.GPX() + + for network_name, network_data in json_data.items(): + ssid = network_data.get('SSID', network_name) + # timestamps are always tricky + timestamp_str = network_data.get('AddedAt', '') + if not timestamp_str: + timestamp_str = network_data.get('JoinedByUserAt', '') # second best attempt + if not timestamp_str: + timestamp_str = network_data.get('UpdatedAt', '') # third best attempt + # Convert ISO 8601 format to datetime + add_reason = network_data.get("AddReason", '') + + try: + timestamp = dateutil.parser.parse(timestamp_str) + except Exception as e: + print(f"Error converting timestamp. Reason: {str(e)}. Timestamp was: {str(timestamp_str)}. Assuming Jan 1st 1970.") + timestamp = dateutil.parser.parse('1970-01-01') # begin of epoch + + bssid = network_data.get('__OSSpecific__', {}).get('BSSID', '') + channel = network_data.get('__OSSpecific__', {}).get('CHANNEL', '') + for bss in network_data.get('BSSList', []): + lat = bss.get('LocationLatitude', '') + lon = bss.get('LocationLongitude', '') + location_accuracy = bss.get('LocationAccuracy', '') + + description = f'''BSSID: {bssid} + Channel: {channel} + Timestamp: {timestamp_str} + LocationAccuracy: {location_accuracy} + Latitude: {lat} + Longitude: {lon} + Reason for Adding: {add_reason}''' + + # Create new waypoint + waypoint = gpxpy.gpx.GPXWaypoint(latitude=lat, longitude=lon, time=timestamp) + waypoint.name = ssid + waypoint.description = description + + # Add waypoint to gpx file + gpx.waypoints.append(waypoint) + + # Save gpx file + with open(output_file, 'w') as f: + f.write(gpx.to_xml()) + return diff --git a/analysers/wifi_geolocation_kml.py b/analysers/wifi_geolocation_kml.py index 502098a..6d9d8c6 100644 --- a/analysers/wifi_geolocation_kml.py +++ b/analysers/wifi_geolocation_kml.py @@ -3,99 +3,95 @@ # For Python3 # Author: Aaron Kaplan -import json -import dateutil.parser -import os import xml.etree.ElementTree as ET - - -analyser_description = "Generate KML file for wifi geolocations" -analyser_format = "kml" - - -def analyse_path(case_folder: str, output_file: str = "wifi-geolocations.kml") -> bool: - potential_source_files = ['wifinetworks/WiFi_com.apple.wifi.known-networks.plist.json', 'plists/WiFi_com.apple.wifi.known-networks.plist.json', 'wifi_known_networks.json'] - input_file_path = None - for fname in potential_source_files: - input_file_path = os.path.join(case_folder, fname) - if os.path.isfile(input_file_path): - break - if not input_file_path: - # TODO we could call the parser and generate the file for us...and then parse it... - raise FileNotFoundError(f"Could not find any of the potential source files: {potential_source_files}.") - - # we have a valid file_path and can generate the gpx file - with open(input_file_path, 'r') as f: - json_data = json.load(f) - return generate_kml_from_known_networks_json(json_data=json_data, output_file=output_file) - - -# LATER merge this and wifi_geolocation.py to share as much common code as possible -def generate_kml_from_known_networks_json(json_data: str, output_file: str): - # Create new KML root - kml = ET.Element('kml', xmlns='http://www.opengis.net/kml/2.2') - document = ET.SubElement(kml, 'Document') - - # Add tour elements - tour = ET.SubElement(document, 'gx:Tour') - ET.SubElement(tour, 'name').text = 'WiFi Tour' - playlist = ET.SubElement(tour, 'gx:Playlist') - - for network_name, network_data in json_data.items(): - ssid = network_data.get('SSID', network_name) - # timestamps are always tricky - timestamp_str = network_data.get('AddedAt', '') - if not timestamp_str: - timestamp_str = network_data.get('JoinedByUserAt', '') # second best attempt - if not timestamp_str: - timestamp_str = network_data.get('UpdatedAt', '') # third best attempt - # Convert ISO 8601 format to datetime - add_reason = network_data.get("AddReason", '') - - try: - timestamp = dateutil.parser.parse(timestamp_str) - except Exception as e: - print(f"Error converting timestamp. Reason: {str(e)}. Timestamp was: {str(timestamp_str)}. Assuming Jan 1st 1970.") - timestamp = dateutil.parser.parse('1970-01-01') # begin of epoch - - bssid = network_data.get('__OSSpecific__', {}).get('BSSID', '') - channel = network_data.get('__OSSpecific__', {}).get('CHANNEL', '') - for bss in network_data.get('BSSList', []): - lat = bss.get('LocationLatitude', '') - lon = bss.get('LocationLongitude', '') - location_accuracy = bss.get('LocationAccuracy', '') - - description = f'''BSSID: {bssid} -Channel: {channel} -Timestamp: {timestamp_str} -LocationAccuracy: {location_accuracy} -Latitude: {lat} -Longitude: {lon} -Reason for Adding: {add_reason}''' - - # Create new waypoint - placemark = ET.SubElement(document, 'Placemark') - ET.SubElement(placemark, 'name').text = ssid - point = ET.SubElement(placemark, 'Point') - ET.SubElement(point, 'coordinates').text = f"{lon},{lat},0" - - et_description = ET.SubElement(placemark, 'description') - et_description.text = description - - # Add to tour playlist # TODO ideally the toor should be generated in the same order as the timestamps - flyto = ET.SubElement(playlist, 'gx:FlyTo') - ET.SubElement(flyto, 'gx:duration').text = '5.0' # Duration of each flyto - ET.SubElement(flyto, 'gx:flyToMode').text = 'smooth' - camera = ET.SubElement(flyto, 'Camera') - ET.SubElement(camera, 'longitude').text = str(lon) - ET.SubElement(camera, 'latitude').text = str(lat) - ET.SubElement(camera, 'altitude').text = '500' # Camera altitude - ET.SubElement(camera, 'heading').text = '0' - ET.SubElement(camera, 'tilt').text = '45' - ET.SubElement(camera, 'roll').text = '0' - - # Convert the ElementTree to a string and save it to a file - tree = ET.ElementTree(kml) - tree.write(output_file) - - return +from utils.base import BaseAnalyserInterface +from parsers.wifi_known_networks import WifiKnownNetworksParser + + +class WifiGeolocationKmlAnalyser(BaseAnalyserInterface): + description = "Generate KML file for wifi geolocations" + format = "kml" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + + def get_result(self, force: bool = False): + raise NotImplementedError("This function is not compatible with this module.") + + def save_result(self, force: bool = False, indent=None): + self.execute() + + def execute(self): + json_data = WifiKnownNetworksParser(self.config, self.case_id).get_result() + # we have a valid file_path and can generate the gpx file + return WifiGeolocationKmlAnalyser.generate_kml_from_known_networks_json(json_data=json_data, output_file=self.output_file) + + # LATER merge this and wifi_geolocation.py to share as much common code as possible + def generate_kml_from_known_networks_json(json_data: str, output_file: str): + # Create new KML root + kml = ET.Element('kml', xmlns='http://www.opengis.net/kml/2.2') + document = ET.SubElement(kml, 'Document') + + # Add tour elements + tour = ET.SubElement(document, 'gx:Tour') + ET.SubElement(tour, 'name').text = 'WiFi Tour' + playlist = ET.SubElement(tour, 'gx:Playlist') + + for network_name, network_data in json_data.items(): + ssid = network_data.get('SSID', network_name) + # timestamps are always tricky + timestamp_str = network_data.get('AddedAt', '') + if not timestamp_str: + timestamp_str = network_data.get('JoinedByUserAt', '') # second best attempt + if not timestamp_str: + timestamp_str = network_data.get('UpdatedAt', '') # third best attempt + # Convert ISO 8601 format to datetime + add_reason = network_data.get("AddReason", '') + + # try: + # timestamp = dateutil.parser.parse(timestamp_str) + # except Exception as e: + # print(f"Error converting timestamp. Reason: {str(e)}. Timestamp was: {str(timestamp_str)}. Assuming Jan 1st 1970.") + # timestamp = dateutil.parser.parse('1970-01-01') # begin of epoch + + bssid = network_data.get('__OSSpecific__', {}).get('BSSID', '') + channel = network_data.get('__OSSpecific__', {}).get('CHANNEL', '') + for bss in network_data.get('BSSList', []): + lat = bss.get('LocationLatitude', '') + lon = bss.get('LocationLongitude', '') + location_accuracy = bss.get('LocationAccuracy', '') + + description = f'''BSSID: {bssid} + Channel: {channel} + Timestamp: {timestamp_str} + LocationAccuracy: {location_accuracy} + Latitude: {lat} + Longitude: {lon} + Reason for Adding: {add_reason}''' + + # Create new waypoint + placemark = ET.SubElement(document, 'Placemark') + ET.SubElement(placemark, 'name').text = ssid + point = ET.SubElement(placemark, 'Point') + ET.SubElement(point, 'coordinates').text = f"{lon},{lat},0" + + et_description = ET.SubElement(placemark, 'description') + et_description.text = description + + # Add to tour playlist # TODO ideally the toor should be generated in the same order as the timestamps + flyto = ET.SubElement(playlist, 'gx:FlyTo') + ET.SubElement(flyto, 'gx:duration').text = '5.0' # Duration of each flyto + ET.SubElement(flyto, 'gx:flyToMode').text = 'smooth' + camera = ET.SubElement(flyto, 'Camera') + ET.SubElement(camera, 'longitude').text = str(lon) + ET.SubElement(camera, 'latitude').text = str(lat) + ET.SubElement(camera, 'altitude').text = '500' # Camera altitude + ET.SubElement(camera, 'heading').text = '0' + ET.SubElement(camera, 'tilt').text = '45' + ET.SubElement(camera, 'roll').text = '0' + + # Convert the ElementTree to a string and save it to a file + tree = ET.ElementTree(kml) + tree.write(output_file) + + return diff --git a/analysers/yarascan.py b/analysers/yarascan.py index a03b267..16f900e 100644 --- a/analysers/yarascan.py +++ b/analysers/yarascan.py @@ -1,16 +1,10 @@ -import json import yara import os import glob import threading import queue +from utils.base import BaseAnalyserInterface -analyser_description = "Scan the case folder using YARA rules ('./yara' or SYSDIAGNOSE_YARA_RULES_PATH)" -analyser_format = "json" - -yara_rules_path = os.getenv('SYSDIAGNOSE_YARA_RULES_PATH', './yara') - -# FIXME currently only looks in parsed_data folder, not the cases folder. Requires a revamp of all analysers. # These are the commonly used external variables that can be used in the YARA rules externals = { @@ -20,66 +14,84 @@ 'filetype': '', # just a stub to allow some rules to load 'owner': '', # just a stub to allow some rules to load } -# Question: What is the impact of externals? (single threaded) -# - timing without externals at all : 1m30 - we discard a few (useful?) rules, so faster? ... -# 45s multithreaded (have 2 large files and 16 threads) -# - timing without externals per file : 1m30 - loaded empty externals, just to ensure rules are equivalent -# 47s multithreaded (have 2 large files and 16 threads) -# - timing with externals per file : 4m - delays caused by the many yara.compile calls. -# - timing with externals per file MT: 1m multithreaded (have 2 large files and 16 threads) - - -def analyse_path(case_folder: str, output_file: str = "yarascan.json") -> bool: - results = {'errors': [], 'matches': []} - - if not os.path.isdir(yara_rules_path): - raise FileNotFoundError(f"Could not find the YARA rules folder: {yara_rules_path}") - - rule_files, errors = get_valid_yara_rule_files(yara_rules_path) - if errors: - results['errors'] = errors - if len(rule_files) == 0: - results['errors'].append(f"No valid YARA rules were present in the YARA rules folder: {yara_rules_path}") - rule_filepaths = {} # we need to convert the list of rule files to a dictionary for yara.compile - for rule_file in rule_files: - namespace = rule_file[len(yara_rules_path):].strip(os.path.sep) - rule_filepaths[namespace] = rule_file - - matches, errors = scan_directory(case_folder, rule_filepaths, ignore_files=[output_file]) - if errors: - results['errors'].extend(errors) - results['matches'] = matches - - with open(output_file, 'w') as f: - json.dump(results, f, indent=4) - return - - -def get_valid_yara_rule_files(rules_path: str) -> tuple[list, list]: - rule_files_to_test = glob.glob(os.path.join(yara_rules_path, '**', '*.yar'), recursive=True) - rule_files_validated = [] - errors = [] - for rule_file in rule_files_to_test: - if not os.path.isfile(rule_file): - continue - print(f"Loading YARA rule: {rule_file}") - try: - yara.compile(filepath=rule_file, externals=externals) - # if we reach this point, the rule is valid - rule_files_validated.append(rule_file) - except yara.SyntaxError as e: - print(f"Error compiling rule {rule_file}: {str(e)}") - errors.append(f"Error compiling rule {rule_file}: {str(e)}") - continue - except yara.Error as e: - print(f"Error compiling rule {rule_file}: {str(e)}") - errors.append(f"Error loading rule {rule_file}: {str(e)}") - continue - - return rule_files_validated, errors - - -def scan_directory(directory: str, rule_filepaths: dict, ignore_files: list) -> tuple[list, list]: + + +class YaraAnalyser(BaseAnalyserInterface): + description = "Scan the case folder using YARA rules ('./yara' or SYSDIAGNOSE_YARA_RULES_PATH)" + format = "json" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + self.yara_rules_path = os.getenv('SYSDIAGNOSE_YARA_RULES_PATH', './yara') + + # Question: What is the impact of externals? (single threaded) + # - timing without externals at all : 1m30 - we discard a few (useful?) rules, so faster? ... + # 45s multithreaded (have 2 large files and 16 threads) + # - timing without externals per file : 1m30 - loaded empty externals, just to ensure rules are equivalent + # 47s multithreaded (have 2 large files and 16 threads) + # - timing with externals per file : 4m - delays caused by the many yara.compile calls. + # - timing with externals per file MT: 1m multithreaded (have 2 large files and 16 threads) + + def execute(self): + results = {'errors': [], 'matches': []} + + if not os.path.isdir(self.yara_rules_path): + raise FileNotFoundError(f"Could not find the YARA rules folder: {self.yara_rules_path}") + + rule_files, errors = self.get_valid_yara_rule_files(self.yara_rules_path) + if errors: + results['errors'] = errors + if len(rule_files) == 0: + results['errors'].append(f"No valid YARA rules were present in the YARA rules folder: {self.yara_rules_path}") + rule_filepaths = {} # we need to convert the list of rule files to a dictionary for yara.compile + for rule_file in rule_files: + namespace = rule_file[len(self.yara_rules_path):].strip(os.path.sep) + rule_filepaths[namespace] = rule_file + + matches, errors = scan_directory( + [ + self.case_parsed_data_folder, + self.case_data_folder + ], + rule_filepaths, + ignore_files=[ + self.output_file, # don't match on ourselves + ], + ignore_folders=[ + glob.glob(os.path.join(self.case_data_subfolder, 'system_logs.logarchive')).pop(), # irrelevant for YARA rules + ] + ) + if errors: + results['errors'].extend(errors) + results['matches'] = matches + + return results + + def get_valid_yara_rule_files(self, rules_path: str) -> tuple[list, list]: + rule_files_to_test = glob.glob(os.path.join(self.yara_rules_path, '**', '*.yar'), recursive=True) + rule_files_validated = [] + errors = [] + for rule_file in rule_files_to_test: + if not os.path.isfile(rule_file): + continue + print(f"Loading YARA rule: {rule_file}") + try: + yara.compile(filepath=rule_file, externals=externals) + # if we reach this point, the rule is valid + rule_files_validated.append(rule_file) + except yara.SyntaxError as e: + print(f"Error compiling rule {rule_file}: {str(e)}") + errors.append(f"Error compiling rule {rule_file}: {str(e)}") + continue + except yara.Error as e: + print(f"Error compiling rule {rule_file}: {str(e)}") + errors.append(f"Error loading rule {rule_file}: {str(e)}") + continue + + return rule_files_validated, errors + + +def scan_directory(directories: list, rule_filepaths: dict, ignore_files: list, ignore_folders: list) -> tuple[list, list]: results_lock = threading.Lock() matches = {} errors = [] @@ -88,14 +100,33 @@ def scan_directory(directory: str, rule_filepaths: dict, ignore_files: list) -> # build and fill the queue file_queue = queue.Queue() - for root, _, files in os.walk(directory): - for file in files: - if file in ignore_files: # skip the output file, as we know we may have matches on ourselves + for directory in directories: + for root, _, files in os.walk(directory): + stop = False + for ignore_folder in ignore_folders: + if root.startswith(ignore_folder): + stop = True + print(f"Skipping folder: {root}") + continue + if stop: continue - file_queue.put(os.path.join(root, file)) + for file in files: + file_full_path = os.path.join(root, file) + stop = False + for ignore_file in ignore_files: + if file_full_path.startswith(ignore_file): + stop = True + print(f"Skipping file: {file_full_path}") + continue + if stop: + continue + file_queue.put(file_full_path) # define our consumer that will run in the threads def consumer(): + # compile rules only once ... and ignore file specific externals. Massive speedup + rules = yara.compile(filepaths=rule_filepaths, externals=externals) + while True: print(f"Consumer thread seeing {file_queue.qsize()} files in queue, and taking one") file_path = file_queue.get() @@ -104,11 +135,12 @@ def consumer(): break print(f"Scanning file: {file_path}") - # set the externals for this file - externals['filename'] = file - externals['filepath'] = file_path[len(directory) + 1:] # exclude the case root directory that installation specific - externals['extension'] = os.path.splitext(file)[1] - rules = yara.compile(filepaths=rule_filepaths, externals=externals) + # set the externals for this file - massive slowdown + # externals_local = externals.copy() + # externals_local['filename'] = file + # externals_local['filepath'] = file_path[len(directory) + 1:] # exclude the case root directory that installation specific + # externals_local['extension'] = os.path.splitext(file)[1] + # rules = yara.compile(filepaths=rule_filepaths, externals=externals_local) try: m = rules.match(file_path) if m: @@ -127,7 +159,7 @@ def consumer(): errors.append(f"Error matching file {file_path}: {e}") file_queue.task_done() # signal that the file has been processed - max_threads = os.cpu_count() or 4 # default to 4 if we can't determine the number of CPUs + max_threads = os.cpu_count() * 2 or 4 # default to 4 if we can't determine the number of CPUs # Create and start consumer threads consumer_threads = [] for _ in range(max_threads): diff --git a/parsers/accessibility_tcc.py b/parsers/accessibility_tcc.py index 62eed00..7d51934 100644 --- a/parsers/accessibility_tcc.py +++ b/parsers/accessibility_tcc.py @@ -8,36 +8,28 @@ import glob import os import utils.misc as misc -import json +from utils.base import BaseParserInterface -version_string = "sysdiagnose-Accessibility-TCC.py v2020-20-20 Version 1.0" -# ----- definition for parsing.py script -----# +class AccessibilityTccParser(BaseParserInterface): + description = "Parsing Accessibility TCC logs" -parser_description = "Parsing Accessibility TCC logs" + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + def get_log_files(self) -> list: + log_files_globs = [ + 'logs/Accessibility/TCC.db' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'logs/Accessibility/TCC.db' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + return log_files - return log_files - - -def parse_path(path: str) -> list | dict: - # only one file to parse - try: - return misc.json_serializable(sqlite2json.sqlite2struct(get_log_files(path)[0])) - except IndexError: - return {'error': 'No TCC.db file found in logs/Accessibility/ directory'} - - -def parse_path_to_folder(path: str, output_folder: str) -> bool: - result = parse_path(path) - output_file = os.path.join(output_folder, f"{__name__.split('.')[-1]}.json") - with open(output_file, 'w') as f: - json.dump(result, f, indent=4) + def execute(self) -> list | dict: + # only one file to parse + try: + return misc.json_serializable(sqlite2json.sqlite2struct(self.get_log_files()[0])) + except IndexError: + return {'error': 'No TCC.db file found in logs/Accessibility/ directory'} diff --git a/parsers/appinstallation.py b/parsers/appinstallation.py index e1bf2d8..d531601 100644 --- a/parsers/appinstallation.py +++ b/parsers/appinstallation.py @@ -13,25 +13,28 @@ import glob import os import utils.misc as misc +from utils.base import BaseParserInterface -parser_description = "Parsing app installation logs" +class AppInstallationParser(BaseParserInterface): + description = "Parsing app installation logs" + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'logs/appinstallation/AppUpdates.sqlitedb', - 'logs/appinstallation/appstored.sqlitedb' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def get_log_files(self) -> list: + log_files_globs = [ + 'logs/appinstallation/AppUpdates.sqlitedb', + 'logs/appinstallation/appstored.sqlitedb' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) - return log_files + return log_files - -def parse_path(path: str) -> list | dict: - try: - return misc.json_serializable(sqlite2json.sqlite2struct(get_log_files(path)[0])) - except IndexError: - return {'error': 'No AppUpdates.sqlitedb or appstored.sqlitedb file found in logs/appinstallation/ directory'} + def execute(self) -> list | dict: + try: + return misc.json_serializable(sqlite2json.sqlite2struct(self.get_log_files()[0])) + except IndexError: + return {'error': 'No AppUpdates.sqlitedb or appstored.sqlitedb file found in logs/appinstallation/ directory'} diff --git a/parsers/brctl.py b/parsers/brctl.py index fdfd496..3d987d9 100644 --- a/parsers/brctl.py +++ b/parsers/brctl.py @@ -6,371 +6,355 @@ import json import re import os - - -parser_description = "Parsing brctl files" - - -def get_log_files(log_root_path: str) -> list: - log_folders = [ - 'brctl/' - ] - return [os.path.join(log_root_path, log_folder) for log_folder in log_folders] - - -def parse_path(path: str) -> list | dict: - try: - return parsebrctl(get_log_files(path)[0]) - except IndexError: - return {'error': 'No brctl folder found'} - - -def parse_path_to_folder(path: str, output_folder: str) -> bool: - result = parse_path(path) - output_file = os.path.join(output_folder, f"{__name__.split('.')[-1]}.json") - with open(output_file, 'w') as f: - json.dump(result, f, indent=4) - - -def parselistfile(container_list_file): - containers = {"containers": []} - result = [] - # print(container_list_file) - with open(container_list_file[0], 'r') as f: - keys = ['id', 'localizedName', 'documents', 'Public', 'clients'] - for line in f: - line = line.strip() - line = line.replace('Mobile Documents', 'Mobile_Documents') - keys = ['id', 'localizedName', 'documents', 'Public', 'Private', 'clients'] - values = re.findall(rf"({'|'.join(keys)}):\s*([^ \[]+|\[[^\]]*\])", line) - result = {k: v.strip('[]') for k, v in values} - if result != {}: - result['documents'] = result['documents'].replace('Mobile_Documents', 'Mobile Documents') - containers['containers'].append(result) - return containers - - -def parsedumpfile(container_dump_file): - with open(container_dump_file[0], 'r') as f: - dump = {} - section = "header" - previous_line = "" - current_section = "" - for line in f: - if line.strip() == "-----------------------------------------------------": +from utils.base import BaseParserInterface + + +class BrctlParser(BaseParserInterface): + description = "Parsing brctl files" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_folders = [ + 'brctl/' + ] + return [os.path.join(self.case_data_subfolder, log_folder) for log_folder in log_folders] + + def execute(self) -> list | dict: + try: + return BrctlParser.parse_folder(self.get_log_files()[0]) + except IndexError: + return {'error': 'No brctl folder found'} + + def parselistfile(container_list_file): + containers = {"containers": []} + result = [] + # print(container_list_file) + with open(container_list_file[0], 'r') as f: + keys = ['id', 'localizedName', 'documents', 'Public', 'clients'] + for line in f: + line = line.strip() + line = line.replace('Mobile Documents', 'Mobile_Documents') + keys = ['id', 'localizedName', 'documents', 'Public', 'Private', 'clients'] + values = re.findall(rf"({'|'.join(keys)}):\s*([^ \[]+|\[[^\]]*\])", line) + result = {k: v.strip('[]') for k, v in values} + if result != {}: + result['documents'] = result['documents'].replace('Mobile_Documents', 'Mobile Documents') + containers['containers'].append(result) + return containers + + def parsedumpfile(container_dump_file): + with open(container_dump_file[0], 'r') as f: + dump = {} + section = "header" + previous_line = "" + current_section = "" + for line in f: + if line.strip() == "-----------------------------------------------------": + dump[section] = current_section + section = previous_line.strip() + current_section = "" + else: + previous_line = line + current_section += line + if current_section != "": dump[section] = current_section - section = previous_line.strip() - current_section = "" - else: - previous_line = line - current_section += line - if current_section != "": - dump[section] = current_section - # print(dump.keys()) - - # parsing different sections - # header - header = parse_header(dump['header']) - - # boot_history - # finding key value - # loop through the keys of the dictionary - for key in dump.keys(): - # check if the key starts with "boot_history" - if key.startswith("boot_history"): - # print the key and its value - bhkey = key - # runing the parser - try: - boot_history = parse_boot_history(dump[bhkey]) - except UnboundLocalError: - boot_history = {} - - # server_state - try: - server_state = parse_server_state(dump['server_state']) - except KeyError: - server_state = {} - - # client_state - try: - client_state = parse_client_state(dump['client_state']) - except KeyError: - client_state = {} - - # system - try: - system = parse_system_scheduler(dump['system']) - except KeyError: - system = {} - - # scheduler - try: - scheduler = parse_system_scheduler(dump['scheduler']) - except KeyError: - scheduler = {} - - # containers - # finding key value - # loop through the keys of the dictionary - for key, value in dump.items(): - # check if the key contains "containers matching" - if "+ app library:" in value: - # print the key and its value - ckey = key - # creating several parser with the same data - - # applibrary - try: - applibrary = parse_app_library(dump[ckey]) - except UnboundLocalError: - applibrary = {} - - # server_items - try: - server_items = parse_server_items(dump[ckey]) - except UnboundLocalError: - server_items = {} - - # app library IDs by App ID - try: - app_library_id, app_ids = parse_apps_monitor(dump['apps monitor']) - except KeyError: - app_library_id = {} - app_ids = {} - # putting together all the parsed data - - result = { - "header": header, - "boot_history": boot_history, - "server_state": server_state, - "client_state": client_state, - "system": system, - "scheduler": scheduler, - "applibrary": applibrary, - "server_items": server_items, - "app_library_id": app_library_id, - "app_ids": app_ids - } - - return result - - -def parse_header(header): - # Define a regular expression to match the key-value pairs - pattern = r"(\w+):\s+(.+)(?:\n|$)" - - # Find all the matches in the content - matches = re.findall(pattern, header) - - # Create an empty dictionary to store the output - output = {} - - # Loop through the matches and add them to the output dictionary - for key, value in matches: - # If the value contains a comma, split it into a list - if "," in value: - value = value.split(", ") - # If the value contains brackets, remove them - if value.startswith("<") and value.endswith(">"): - value = value[1:-1] - # Add the key-value pair to the output dictionary - output[key] = value - - pattern = r"dump taken at (\d{2}/\d{2}/\d{4}, \d{2}:\d{2}:\d{2}) \[account=(\d+)\] \[inCarry=(\w+)\] \[home=(.+)\]" - - # Find the match in the content - match = re.search(pattern, header) - - # Check if there is a match - if match: - # save the values - output['timestamp'] = match.group(1) - output['account'] = match.group(2) - output['inCarry'] = match.group(3) - output['home'] = match.group(4) - - # Convert the output dictionary to a JSON string - output_json = json.dumps(output) - - # Print the output JSON string - return output_json - - -def parse_boot_history(boot_history): - # split the section by newline characters - lines = boot_history.split("\n") - # initialize an empty list to store the parsed lines - result = [] - # loop through each line - for line in lines: - # parse the line and append it to the result list if not None - parsed_line = parse_line_boot_history(line) - if parsed_line: - result.append(parsed_line) - # return the result list - return result - - -def parse_line_boot_history(line): - # use regular expressions to extract the fields - match = re.search(r"\[(.+?)\] OS:(.+?) CloudDocs:(.+?) BirdSchema:(.+?) DBSchema:(.+)", line) - if match: - # return a dictionary with the field names and values - return { - "date": match.group(1), - "OS": match.group(2), - "CloudDocs": match.group(3), - "BirdSchema": match.group(4), - "DBSchema": match.group(5) + # print(dump.keys()) + + # parsing different sections + # header + header = BrctlParser.parse_header(dump['header']) + + # boot_history + # finding key value + # loop through the keys of the dictionary + for key in dump.keys(): + # check if the key starts with "boot_history" + if key.startswith("boot_history"): + # print the key and its value + bhkey = key + # runing the parser + try: + boot_history = BrctlParser.parse_boot_history(dump[bhkey]) + except UnboundLocalError: + boot_history = {} + + # server_state + try: + server_state = BrctlParser.parse_server_state(dump['server_state']) + except KeyError: + server_state = {} + + # client_state + try: + client_state = BrctlParser.parse_client_state(dump['client_state']) + except KeyError: + client_state = {} + + # system + try: + system = BrctlParser.parse_system_scheduler(dump['system']) + except KeyError: + system = {} + + # scheduler + try: + scheduler = BrctlParser.parse_system_scheduler(dump['scheduler']) + except KeyError: + scheduler = {} + + # containers + # finding key value + # loop through the keys of the dictionary + for key, value in dump.items(): + # check if the key contains "containers matching" + if "+ app library:" in value: + # print the key and its value + ckey = key + # creating several parser with the same data + + # applibrary + try: + applibrary = BrctlParser.parse_app_library(dump[ckey]) + except UnboundLocalError: + applibrary = {} + + # server_items + try: + server_items = BrctlParser.parse_server_items(dump[ckey]) + except UnboundLocalError: + server_items = {} + + # app library IDs by App ID + try: + app_library_id, app_ids = BrctlParser.parse_apps_monitor(dump['apps monitor']) + except KeyError: + app_library_id = {} + app_ids = {} + # putting together all the parsed data + + result = { + "header": header, + "boot_history": boot_history, + "server_state": server_state, + "client_state": client_state, + "system": system, + "scheduler": scheduler, + "applibrary": applibrary, + "server_items": server_items, + "app_library_id": app_library_id, + "app_ids": app_ids } - else: - # return None if the line does not match the pattern - return None + return result -def parse_server_state(server_state): - # Define the regex pattern to match the fields and their values - pattern = r"(last-sync|nextRank|minUsedTime):(.+?)(?=\s|$)" + def parse_header(header): + # Define a regular expression to match the key-value pairs + pattern = r"(\w+):\s+(.+)(?:\n|$)" - # Use re.findall to get all the matches as a list of tuples - matches = re.findall(pattern, server_state) + # Find all the matches in the content + matches = re.findall(pattern, header) - # Initialize an empty dictionary to store the parsed data - output_dict = {} + # Create an empty dictionary to store the output + output = {} - # Loop through the matches - for match in matches: - # Get the field name and value from the tuple - field, value = match - # Replace any dashes with underscores in the field name - field = field.replace("-", "_") - # If the field is shared_db, create a nested dictionary for its value - if field == "shared_db": - value = {} - # Add the field-value pair to the output dictionary - output_dict[field] = value + # Loop through the matches and add them to the output dictionary + for key, value in matches: + # If the value contains a comma, split it into a list + if "," in value: + value = value.split(", ") + # If the value contains brackets, remove them + if value.startswith("<") and value.endswith(">"): + value = value[1:-1] + # Add the key-value pair to the output dictionary + output[key] = value - # Print the output dictionary - return output_dict + pattern = r"dump taken at (\d{2}/\d{2}/\d{4}, \d{2}:\d{2}:\d{2}) \[account=(\d+)\] \[inCarry=(\w+)\] \[home=(.+)\]" + # Find the match in the content + match = re.search(pattern, header) -def parse_client_state(data: str) -> dict: - # Split the data into lines - lines = data.split('\n') - - # Initialize an empty dictionary to store the parsed data - parsed_data = {} - - # Iterate over each line in the data - for line in lines: - # Use regular expressions to match key-value pairs - match = re.match(r'\s*(\w+)\s*=\s*(.*);', line) + # Check if there is a match + if match: + # save the values + output['timestamp'] = match.group(1) + output['account'] = match.group(2) + output['inCarry'] = match.group(3) + output['home'] = match.group(4) + + # Convert the output dictionary to a JSON string + output_json = json.dumps(output) + + # Print the output JSON string + return output_json + + def parse_boot_history(boot_history): + # split the section by newline characters + lines = boot_history.split("\n") + # initialize an empty list to store the parsed lines + result = [] + # loop through each line + for line in lines: + # parse the line and append it to the result list if not None + parsed_line = BrctlParser.parse_line_boot_history(line) + if parsed_line: + result.append(parsed_line) + # return the result list + return result + + def parse_line_boot_history(line): + # use regular expressions to extract the fields + match = re.search(r"\[(.+?)\] OS:(.+?) CloudDocs:(.+?) BirdSchema:(.+?) DBSchema:(.+)", line) if match: - key, value = match.groups() - # Remove any quotes from the value - value = value.strip('"') - # Try to convert the value to an integer or float - try: - value = int(value) - except ValueError: + # return a dictionary with the field names and values + return { + "date": match.group(1), + "OS": match.group(2), + "CloudDocs": match.group(3), + "BirdSchema": match.group(4), + "DBSchema": match.group(5) + } + else: + # return None if the line does not match the pattern + return None + + def parse_server_state(server_state): + # Define the regex pattern to match the fields and their values + pattern = r"(last-sync|nextRank|minUsedTime):(.+?)(?=\s|$)" + + # Use re.findall to get all the matches as a list of tuples + matches = re.findall(pattern, server_state) + + # Initialize an empty dictionary to store the parsed data + output_dict = {} + + # Loop through the matches + for match in matches: + # Get the field name and value from the tuple + field, value = match + # Replace any dashes with underscores in the field name + field = field.replace("-", "_") + # If the field is shared_db, create a nested dictionary for its value + if field == "shared_db": + value = {} + # Add the field-value pair to the output dictionary + output_dict[field] = value + + # Print the output dictionary + return output_dict + + def parse_client_state(data: str) -> dict: + # Split the data into lines + lines = data.split('\n') + + # Initialize an empty dictionary to store the parsed data + parsed_data = {} + + # Iterate over each line in the data + for line in lines: + # Use regular expressions to match key-value pairs + match = re.match(r'\s*(\w+)\s*=\s*(.*);', line) + if match: + key, value = match.groups() + # Remove any quotes from the value + value = value.strip('"') + # Try to convert the value to an integer or float try: - value = float(value) + value = int(value) except ValueError: - pass - # Add the key-value pair to the dictionary - parsed_data[key] = value - - return parsed_data - - -def parse_system_scheduler(input): - data = {} - lines = input.split('\n') - for line in lines: - # removing ANSI escape codes - line = re.sub(r'\x1b\[[0-9;]*m', '', line) - line = line.strip() - if line.startswith('+'): - key, value = line.split(':', 1) - key = key.strip().replace('+', '').strip() - value = value.strip() - data[key] = value - return data - - -def parse_app_library(data): - lines = data.splitlines() - matching_lines = [line for line in lines if "+ app library" in line] - - pattern = r'<(.*?)\[(\d+)\].*?ino:(\d+).*?apps:\{(.*?)\}.*?bundles:\{(.*?)\}' - matches = re.findall(pattern, '\n'.join(matching_lines)) - - result = [] - for match in matches: - library = match[0] - app_id = match[1] # noqa F841 - ino = match[2] - apps = match[3].split('; ') - bundles = match[4].split(', ') - result.append({'library': library, 'ino': ino, 'apps': apps, 'bundles': bundles}) - - return result - - -def parse_server_items(data): - lines = data.splitlines() - matching_lines = [line for line in lines if "----------------------" in line] - - app_list = [] - - for line in matching_lines: - pattern = r'-+([^\[]+)\[(\d+)\]-+' - match = re.search(pattern, line) - - if match: - library_name = match.group(1) - library_id = match.group(2) - app_list.append({'library_name': library_name, 'library_id': library_id}) - - return app_list - - -def parse_apps_monitor(data): - # Split the text into two parts - parts = data.split("=======================") - - # Extract the JSON strings from each part - json_str1 = parts[1].strip().replace("=", ":").replace("\\", "").replace( - "\"{(n \"", "[\"").replace("\"n)}\"", "\"]").replace(",n ", ",").replace(";", ",") - json_str2 = parts[2].strip().replace("=", ":").replace("\\", "").replace( - "\"{(n \"", "[\"").replace("\"n)}\"", "\"]").replace(",n ", ",").replace(";", ",") - - # ugly fixes - last_comma_index = json_str1.rfind(",") - json_str1_new = json_str1[:last_comma_index] + json_str1[last_comma_index + 1:] - - first_brace_index = json_str1_new.find("}") - json_str1 = json_str1_new[:first_brace_index + 1] - - # ugly fixes - last_comma_index = json_str2.rfind(",") - json_str2_new = json_str2[:last_comma_index] + json_str2[last_comma_index + 1:] - - first_brace_index = json_str2_new.find("}") - json_str2 = json_str2_new[:first_brace_index + 1] - - # Load the JSON strings into Python dictionaries - json1 = json.loads(json_str1) - json2 = json.loads(json_str2) - - return json1, json2 - - -def parsebrctl(brctl_folder): - container_list_file = [os.path.join(brctl_folder, 'brctl-container-list.txt')] - container_dump_file = [os.path.join(brctl_folder, 'brctl-dump.txt')] - if os.path.exists(container_list_file[0]) and os.path.exists(container_dump_file[0]): - brctl_parsing = {**parselistfile(container_list_file), **parsedumpfile(container_dump_file)} - return brctl_parsing - return {} + try: + value = float(value) + except ValueError: + pass + # Add the key-value pair to the dictionary + parsed_data[key] = value + + return parsed_data + + def parse_system_scheduler(input): + data = {} + lines = input.split('\n') + for line in lines: + # removing ANSI escape codes + line = re.sub(r'\x1b\[[0-9;]*m', '', line) + line = line.strip() + if line.startswith('+'): + key, value = line.split(':', 1) + key = key.strip().replace('+', '').strip() + value = value.strip() + data[key] = value + return data + + def parse_app_library(data): + lines = data.splitlines() + matching_lines = [line for line in lines if "+ app library" in line] + + pattern = r'<(.*?)\[(\d+)\].*?ino:(\d+).*?apps:\{(.*?)\}.*?bundles:\{(.*?)\}' + matches = re.findall(pattern, '\n'.join(matching_lines)) + + result = [] + for match in matches: + library = match[0] + app_id = match[1] # noqa F841 + ino = match[2] + apps = match[3].split('; ') + bundles = match[4].split(', ') + result.append({'library': library, 'ino': ino, 'apps': apps, 'bundles': bundles}) + + return result + + def parse_server_items(data): + lines = data.splitlines() + matching_lines = [line for line in lines if "----------------------" in line] + + app_list = [] + + for line in matching_lines: + pattern = r'-+([^\[]+)\[(\d+)\]-+' + match = re.search(pattern, line) + + if match: + library_name = match.group(1) + library_id = match.group(2) + app_list.append({'library_name': library_name, 'library_id': library_id}) + + return app_list + + def parse_apps_monitor(data): + # Split the text into two parts + parts = data.split("=======================") + + # Extract the JSON strings from each part + json_str1 = parts[1].strip().replace("=", ":").replace("\\", "").replace( + "\"{(n \"", "[\"").replace("\"n)}\"", "\"]").replace(",n ", ",").replace(";", ",") + json_str2 = parts[2].strip().replace("=", ":").replace("\\", "").replace( + "\"{(n \"", "[\"").replace("\"n)}\"", "\"]").replace(",n ", ",").replace(";", ",") + + # ugly fixes + last_comma_index = json_str1.rfind(",") + json_str1_new = json_str1[:last_comma_index] + json_str1[last_comma_index + 1:] + + first_brace_index = json_str1_new.find("}") + json_str1 = json_str1_new[:first_brace_index + 1] + + # ugly fixes + last_comma_index = json_str2.rfind(",") + json_str2_new = json_str2[:last_comma_index] + json_str2[last_comma_index + 1:] + + first_brace_index = json_str2_new.find("}") + json_str2 = json_str2_new[:first_brace_index + 1] + + # Load the JSON strings into Python dictionaries + json1 = json.loads(json_str1) + json2 = json.loads(json_str2) + + return json1, json2 + + def parse_folder(brctl_folder): + container_list_file = [os.path.join(brctl_folder, 'brctl-container-list.txt')] + container_dump_file = [os.path.join(brctl_folder, 'brctl-dump.txt')] + if os.path.exists(container_list_file[0]) and os.path.exists(container_dump_file[0]): + brctl_parsing = {**BrctlParser.parselistfile(container_list_file), **BrctlParser.parsedumpfile(container_dump_file)} + return brctl_parsing + return {} diff --git a/parsers/containermanager.py b/parsers/containermanager.py index 0999b0d..5a7bb00 100644 --- a/parsers/containermanager.py +++ b/parsers/containermanager.py @@ -7,24 +7,27 @@ import glob import os from utils import multilinelog +from utils.base import BaseParserInterface -parser_description = "Parsing containermanagerd logs file" +class ContainerManagerParser(BaseParserInterface): + description = "Parsing containermanagerd logs file" + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'logs/MobileContainerManager/containermanagerd.log*' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def get_log_files(self) -> list: + log_files_globs = [ + 'logs/MobileContainerManager/containermanagerd.log*' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) - return log_files + return log_files - -def parse_path(path: str) -> list | dict: - result = [] - for logfile in get_log_files(path): - result.extend(multilinelog.extract_from_file(logfile)) - return result + def execute(self) -> list | dict: + result = [] + for logfile in self.get_log_files(): + result.extend(multilinelog.extract_from_file(logfile)) + return result diff --git a/parsers/crashlogs.py b/parsers/crashlogs.py index f4bd3ad..b701ea9 100644 --- a/parsers/crashlogs.py +++ b/parsers/crashlogs.py @@ -1,36 +1,44 @@ import glob import os -''' +from utils.base import BaseParserInterface -# FIXME Have a look at the interesting evidence first, see which files are there that are not on other devices -- crashes_and_spins folder - - ExcUserFault file -- crashes_and_spins/Panics subfolder -- summaries/crashes_and_spins.log -Though one as there is not necessary a fixed structure -- first line is json -- rest depends ... +class CrashLogsParser(BaseParserInterface): + ''' + # FIXME Have a look at the interesting evidence first, see which files are there that are not on other devices + - crashes_and_spins folder + - ExcUserFault file + - crashes_and_spins/Panics subfolder + - summaries/crashes_and_spins.log -Or perhaps include that in a normal log-parser. -And do the secret magic in the hunting rule -''' + Though one as there is not necessary a fixed structure + - first line is json + - rest depends ... -parser_description = "Parsing crashes folder" + Or perhaps include that in a normal log-parser. + And do the secret magic in the hunting rule + ''' + description = "Parsing crashes folder" -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'crashes_and_spins/*.ips' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) - return log_files + def get_log_files(self) -> list: + log_files_globs = [ + 'crashes_and_spins/*.ips' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + return log_files -def parse_path(path: str) -> list | dict: - files = get_log_files(path) - print(f"Files: {files}") - raise NotImplementedError("not implemented yet") + def execute(self) -> list | dict: + files = self.get_log_files() + raise NotImplementedError("not implemented yet") + for file in files: + print(f"Processing file: {file}") + + def parse_file(path: str) -> list | dict: + print(f"Parsing file: {path}") diff --git a/parsers/demo_parser.py b/parsers/demo_parser.py index 4a7fdce..761074d 100644 --- a/parsers/demo_parser.py +++ b/parsers/demo_parser.py @@ -2,54 +2,47 @@ import os import json +from utils.base import BaseParserInterface -version_string = "sysdiagnose-demo-parser.py v2023-04-26 Version 1.0" -# ----- definition for parsing.py script -----# +class DemoParser(BaseParserInterface): + description = "Demo parsers" + # format = "json" # by default json -parser_description = "Demo parsers" + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) -# --------------------------------------------# + def get_log_files(self) -> list: + log_files = [ + "demo_input_file.txt" + ] + return [os.path.join(self.case_data_subfolder, log_files) for log_files in log_files] -# --------------------------------------------------------------------------- # - - -def get_log_files(log_root_path: str) -> list: - """ - Get the list of log files to be parsed - """ - log_files = [ - "demo_input_file.txt" - ] - return [os.path.join(log_root_path, log_files) for log_files in log_files] - - -def parse_path(path: str) -> list | dict: - ''' + def execute(self, path: str) -> list | dict: + ''' this is the function that will be called - ''' - json_object = {} - log_files = get_log_files(path) - for log_file in log_files: - pass - return json_object - - -def parse_path_to_folder(path: str, output_folder: str) -> bool: - ''' - this is the function that will be called - ''' - try: + ''' json_object = {} - log_files = get_log_files(path) + log_files = self.get_log_files(path) for log_file in log_files: pass - # ideally stream to the file directly - output_folder = os.path.join(output_folder, __name__.split('.')[-1]) - os.makedirs(output_folder, exist_ok=True) - with open(os.path.join(output_folder, "demo_output.json"), "w") as f: - json.dump(json_object, f) - return True - except Exception as e: - print(f"Error: {e}") - return False + return json_object + + def parse_path_to_folder(self, path: str, output_folder: str) -> bool: + ''' + this is the function that will be called + ''' + try: + json_object = {} + log_files = self.get_log_files(path) + for log_file in log_files: + pass + # ideally stream to the file directly + output_folder = os.path.join(output_folder, __name__.split('.')[-1]) + os.makedirs(output_folder, exist_ok=True) + with open(os.path.join(output_folder, "demo_output.json"), "w") as f: + json.dump(json_object, f) + return True + except Exception as e: + print(f"Error: {e}") + return False diff --git a/parsers/itunesstore.py b/parsers/itunesstore.py index 863be17..79daac2 100644 --- a/parsers/itunesstore.py +++ b/parsers/itunesstore.py @@ -8,32 +8,31 @@ import glob import os import utils.misc as misc -import json +from utils.base import BaseParserInterface -parser_description = "Parsing iTunes store logs" +class iTunesStoreParser(BaseParserInterface): + description = "Parsing iTunes store logs" -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'logs/itunesstored/downloads.*.sqlitedb' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) - return log_files + def get_log_files(self) -> list: + log_files_globs = [ + 'logs/itunesstored/downloads.*.sqlitedb' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + return log_files -def parse_path(path: str) -> list | dict: - # there's only one file to parse - try: - return misc.json_serializable(sqlite2json.sqlite2struct(get_log_files(path)[0])) - except IndexError: - return {'error': 'No downloads.*.sqlitedb file found in logs/itunesstored/ directory'} + def execute(self) -> list | dict: + # there's only one file to parse + try: + return iTunesStoreParser.parse_file(self.get_log_files()[0]) + except IndexError: + return {'error': 'No downloads.*.sqlitedb file found in logs/itunesstored/ directory'} - -def parse_path_to_folder(path: str, output_folder: str) -> bool: - result = parse_path(path) - output_file = os.path.join(output_folder, f"{__name__.split('.')[-1]}.json") - with open(output_file, 'w') as f: - json.dump(result, f, indent=4) + def parse_file(path: str) -> list | dict: + return misc.json_serializable(sqlite2json.sqlite2struct(path)) diff --git a/parsers/logarchive.py b/parsers/logarchive.py index ec25fea..7ef4b65 100644 --- a/parsers/logarchive.py +++ b/parsers/logarchive.py @@ -13,9 +13,7 @@ import subprocess from datetime import datetime, timezone from collections.abc import Generator - - -parser_description = 'Parsing system_logs.logarchive folder' +from utils.base import BaseParserInterface # --------------------------------------------# @@ -40,200 +38,234 @@ cmd_parsing_linux_test = ['unifiedlog_parser_json', '--help'] # --------------------------------------------------------------------------- # - -def get_log_files(log_root_path: str) -> list: - log_folders = [ - 'system_logs.logarchive/' - ] - return [os.path.join(log_root_path, log_folder) for log_folder in log_folders] - - -def parse_path(path: str) -> list | dict: - # OK, this is really inefficient as we're reading a file that we just wrote to a temporary folder - # but who cares, nobody uses this function anyway... - try: - with tempfile.TemporaryDirectory() as tmp_outpath: - parse_path_to_folder(path, tmp_outpath) - with open(os.path.join(tmp_outpath, 'logarchive.json'), 'r') as f: - return [json.loads(line) for line in f] - except IndexError: - return {'error': 'No system_logs.logarchive/ folder found in logs/ directory'} - - -def parse_path_to_folder(path: str, output_folder: str) -> bool: - filename = get_log_files(path)[0] - try: - if (platform.system() == 'Darwin'): - __convert_using_native_logparser(filename, output_folder) +# LATER consider refactoring using yield to lower memory consumption + + +class LogarchiveParser(BaseParserInterface): + description = 'Parsing system_logs.logarchive folder' + format = 'jsonl' + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_folders = [ + 'system_logs.logarchive/' + ] + return [os.path.join(self.case_data_subfolder, log_folder) for log_folder in log_folders] + + @DeprecationWarning + def execute(self) -> list | dict: + # OK, this is really inefficient as we're reading a file that we just wrote to a temporary folder + # but who cares, nobody uses this function anyway... + try: + with tempfile.TemporaryDirectory() as tmp_outpath: + tmp_output_file = os.path.join(tmp_outpath.name, 'logarchive.tmp') + LogarchiveParser.parse_file_to_file(self.get_log_files()[0], tmp_output_file) + with open(tmp_output_file, 'r') as f: + return [json.loads(line) for line in f] + except IndexError: + return {'error': 'No system_logs.logarchive/ folder found in logs/ directory'} + + def get_result(self, force: bool = False): + if force: + # force parsing + self.save_result(force) + + if not self._result: + if not self.output_exists(): + self.save_result() + + if self.output_exists(): + # load existing output + with open(self.output_file, 'r') as f: + for line in f: + try: + yield json.loads(line) + except json.decoder.JSONDecodeError: # last lines of the native logarchive.jsonl file + continue else: - __convert_using_unifiedlogparser(filename, output_folder) - return True - except IndexError: - print('Error: No system_logs.logarchive/ folder found in logs/ directory') - return False - - -def __convert_using_native_logparser(filename: str, output_folder: str) -> list: - with open(os.path.join(output_folder, 'logarchive.json'), 'w') as f_out: - cmd_array = ['/usr/bin/log', 'show', filename, '--style', 'ndjson'] - # read each line, conver line by line and write the output directly to the new file - for line in __execute_cmd_and_yield_result(cmd_array): - try: - entry_json = convert_entry_to_unifiedlog_format(json.loads(line)) - f_out.write(json.dumps(entry_json) + '\n') - except json.JSONDecodeError as e: - print(f"WARNING: error parsing JSON {line}: {str(e)}") - except KeyError: - # last line of log does not contain 'time' field, nor the rest of the data. - # so just ignore it - pass - - -def __convert_using_unifiedlogparser(filename: str, output_folder: str) -> list: - print('WARNING: using Mandiant UnifiedLogReader to parse logs, results will be less reliable than on OS X') - # run the conversion tool, saving to a temp folder - # read the created file/files, add timestamp - # sort based on time - # save to one single file in output folder - - # first check if binary exists in PATH, if not, return an error - try: - subprocess.check_output(cmd_parsing_linux_test, universal_newlines=True) - except FileNotFoundError: - print('ERROR: UnifiedLogReader not found, please install it. See README.md for more information.') - return - - # really run the tool now - entries = [] - with tempfile.TemporaryDirectory() as tmp_outpath: - cmd_array = ['unifiedlog_parser_json', '--input', filename, '--output', tmp_outpath] - # run the command and get the result in our tmp_outpath folder - __execute_cmd_and_get_result(cmd_array) - # read each file, conver line by line and write the output directly to the new file - # LATER run this in multiprocessing, one per file to speed up the process - for fname_reading in os.listdir(tmp_outpath): - with open(os.path.join(tmp_outpath, fname_reading), 'r') as f: - for line in f: # jsonl format - one json object per line - try: - entry_json = convert_entry_to_unifiedlog_format(json.loads(line)) - entries.append(entry_json) - except json.JSONDecodeError as e: - print(f"WARNING: error parsing JSON {fname_reading}: {str(e)}") - # tempfolder is cleaned automatically after the block - - # sort the data as it's not sorted by default - entries.sort(key=lambda x: x['time']) - # save to file as JSONL - with open(os.path.join(output_folder, 'logarchive.json'), 'w') as f_out: - for entry in entries: - f_out.write(json.dumps(entry)) - f_out.write('\n') - - -def __execute_cmd_and_yield_result(cmd_array: list) -> Generator[dict, None, None]: - ''' - Return None if it failed or the result otherwise. - - ''' - with subprocess.Popen(cmd_array, stdout=subprocess.PIPE, universal_newlines=True) as process: - for line in iter(process.stdout.readline, ''): - yield line - - -def __execute_cmd_and_get_result(cmd_array: list, outputfile=None): - ''' - Return None if it failed or the result otherwise. - - Outfile can have 3 values: - - None: no output except return value - - sys.stdout: print to stdout - - path to a file to write to - ''' - result = [] - - with subprocess.Popen(cmd_array, stdout=subprocess.PIPE, universal_newlines=True) as process: - if outputfile is None: - for line in iter(process.stdout.readline, ''): - try: - result.append(json.loads(line)) - except Exception: - result.append(line) - elif outputfile == sys.stdout: - for line in iter(process.stdout.readline, ''): - print(line) + # should never happen, as we never keep it in memory + for entry in self._result: + yield entry + + def save_result(self, force: bool = False, indent=None): + ''' + Save the result of the parsing operation to a file in the parser output folder + ''' + if not force and self._result is not None: + # the result was already computed, just save it now + super().save_result(force, indent) else: - with open(outputfile, 'w') as outfd: - for line in iter(process.stdout.readline, ''): - outfd.write(line) - result = f'Output written to {outputfile}' - - return result - - -def convert_entry_to_unifiedlog_format(entry: dict) -> dict: - ''' - Convert the entry to unifiedlog format - ''' - # already in the Mandiant unifiedlog format - if 'event_type' in entry: - entry['datetime'] = convert_unifiedlog_time_to_datetime(entry['time']).isoformat() - return entry - ''' - jq '. |= keys' logarchive-native.json > native_keys.txt - sort native_keys.txt | uniq -c | sort -n > native_keys_sort_unique.txt - ''' - - mapper = { - 'creatorActivityID': 'activity_id', - 'messageType': 'log_type', - # 'source': '', # not present in the Mandiant format - # 'backtrace': '', # sub-dictionary - 'activityIdentifier': 'activity_id', - 'bootUUID': 'boot_uuid', # remove - in the UUID - 'category': 'category', - 'eventMessage': 'message', - 'eventType': 'event_type', - 'formatString': 'raw_message', - # 'machTimestamp': '', # not present in the Mandiant format - # 'parentActivityIdentifier': '', # not present in the Mandiant format - 'processID': 'pid', - 'processImagePath': 'process', - 'processImageUUID': 'process_uuid', # remove - in the UUID - 'senderImagePath': 'library', - 'senderImageUUID': 'library_uuid', # remove - in the UUID - # 'senderProgramCounter': '', # not present in the Mandiant format - 'subsystem': 'subsystem', - 'threadID': 'thread_id', - 'timestamp': 'time', # requires conversion - 'timezoneName': 'timezone_name', # ignore timezone as time and timestamp are correct - # 'traceID': '', # not present in the Mandiant format - 'userID': 'euid' - } - - new_entry = {} - for key, value in entry.items(): - if key in mapper: - new_key = mapper[key] - if 'uuid' in new_key: # remove - in UUID - new_entry[new_key] = value.replace('-', '') - else: - new_entry[new_key] = value - else: - # keep the non-matching entries - new_entry[key] = value - # convert time - new_entry['datetime'] = new_entry['time'] - new_entry['time'] = convert_native_time_to_unifiedlog_format(new_entry['time']) - - return new_entry + # no caching + LogarchiveParser.parse_file_to_file(self.get_log_files()[0], self.output_file) + def parse_file_to_file(input_file: str, output_file: str) -> bool: + try: + if (platform.system() == 'Darwin'): + LogarchiveParser.__convert_using_native_logparser(input_file, output_file) + else: + LogarchiveParser.__convert_using_unifiedlogparser(input_file, output_file) + return True + except IndexError: + print('Error: No system_logs.logarchive/ folder found in logs/ directory') + return False + + def __convert_using_native_logparser(input_file: str, output_file: str) -> list: + with open(output_file, 'w') as f_out: + cmd_array = ['/usr/bin/log', 'show', input_file, '--style', 'ndjson'] + # read each line, conver line by line and write the output directly to the new file + for line in LogarchiveParser.__execute_cmd_and_yield_result(cmd_array): + try: + entry_json = LogarchiveParser.convert_entry_to_unifiedlog_format(json.loads(line)) + f_out.write(json.dumps(entry_json) + '\n') + except json.JSONDecodeError as e: + print(f"WARNING: error parsing JSON {line}: {str(e)}") + except KeyError: + # last line of log does not contain 'time' field, nor the rest of the data. + # so just ignore it + pass + + def __convert_using_unifiedlogparser(input_file: str, output_file: str) -> list: + print('WARNING: using Mandiant UnifiedLogReader to parse logs, results will be less reliable than on OS X') + # run the conversion tool, saving to a temp folder + # read the created file/files, add timestamp + # sort based on time + # save to one single file in output folder + + # first check if binary exists in PATH, if not, return an error + try: + subprocess.check_output(cmd_parsing_linux_test, universal_newlines=True) + except FileNotFoundError: + print('ERROR: UnifiedLogReader not found, please install it. See README.md for more information.') + return + + # really run the tool now + entries = [] + with tempfile.TemporaryDirectory() as tmp_outpath: + cmd_array = ['unifiedlog_parser_json', '--input', input_file, '--output', tmp_outpath] + # run the command and get the result in our tmp_outpath folder + LogarchiveParser.__execute_cmd_and_get_result(cmd_array) + # read each file, conver line by line and write the output directly to the new file + # LATER run this in multiprocessing, one per file to speed up the process + for fname_reading in os.listdir(tmp_outpath): + with open(os.path.join(tmp_outpath, fname_reading), 'r') as f: + for line in f: # jsonl format - one json object per line + try: + entry_json = LogarchiveParser.convert_entry_to_unifiedlog_format(json.loads(line)) + entries.append(entry_json) + except json.JSONDecodeError as e: + print(f"WARNING: error parsing JSON {fname_reading}: {str(e)}") + # tempfolder is cleaned automatically after the block + + # sort the data as it's not sorted by default + entries.sort(key=lambda x: x['time']) + # save to file as JSONL + with open(output_file, 'w') as f_out: + for entry in entries: + f_out.write(json.dumps(entry)) + f_out.write('\n') + + def __execute_cmd_and_yield_result(cmd_array: list) -> Generator[dict, None, None]: + ''' + Return None if it failed or the result otherwise. + + ''' + with subprocess.Popen(cmd_array, stdout=subprocess.PIPE, universal_newlines=True) as process: + for line in iter(process.stdout.readline, ''): + yield line -def convert_native_time_to_unifiedlog_format(time: str) -> int: - timestamp = datetime.fromisoformat(time) - return int(timestamp.timestamp() * 1000000000) + def __execute_cmd_and_get_result(cmd_array: list, outputfile=None): + ''' + Return None if it failed or the result otherwise. + Outfile can have 3 values: + - None: no output except return value + - sys.stdout: print to stdout + - path to a file to write to + ''' + result = [] -def convert_unifiedlog_time_to_datetime(time: int) -> datetime: - # convert time to datetime object - timestamp = datetime.fromtimestamp(time / 1000000000, tz=timezone.utc) - return timestamp + with subprocess.Popen(cmd_array, stdout=subprocess.PIPE, universal_newlines=True) as process: + if outputfile is None: + for line in iter(process.stdout.readline, ''): + try: + result.append(json.loads(line)) + except Exception: + result.append(line) + elif outputfile == sys.stdout: + for line in iter(process.stdout.readline, ''): + print(line) + else: + with open(outputfile, 'w') as outfd: + for line in iter(process.stdout.readline, ''): + outfd.write(line) + result = f'Output written to {outputfile}' + + return result + + def convert_entry_to_unifiedlog_format(entry: dict) -> dict: + ''' + Convert the entry to unifiedlog format + ''' + # already in the Mandiant unifiedlog format + if 'event_type' in entry: + entry['datetime'] = LogarchiveParser.convert_unifiedlog_time_to_datetime(entry['time']).isoformat() + return entry + ''' + jq '. |= keys' logarchive-native.json > native_keys.txt + sort native_keys.txt | uniq -c | sort -n > native_keys_sort_unique.txt + ''' + + mapper = { + 'creatorActivityID': 'activity_id', + 'messageType': 'log_type', + # 'source': '', # not present in the Mandiant format + # 'backtrace': '', # sub-dictionary + 'activityIdentifier': 'activity_id', + 'bootUUID': 'boot_uuid', # remove - in the UUID + 'category': 'category', + 'eventMessage': 'message', + 'eventType': 'event_type', + 'formatString': 'raw_message', + # 'machTimestamp': '', # not present in the Mandiant format + # 'parentActivityIdentifier': '', # not present in the Mandiant format + 'processID': 'pid', + 'processImagePath': 'process', + 'processImageUUID': 'process_uuid', # remove - in the UUID + 'senderImagePath': 'library', + 'senderImageUUID': 'library_uuid', # remove - in the UUID + # 'senderProgramCounter': '', # not present in the Mandiant format + 'subsystem': 'subsystem', + 'threadID': 'thread_id', + 'timestamp': 'time', # requires conversion + 'timezoneName': 'timezone_name', # ignore timezone as time and timestamp are correct + # 'traceID': '', # not present in the Mandiant format + 'userID': 'euid' + } + + new_entry = {} + for key, value in entry.items(): + if key in mapper: + new_key = mapper[key] + if 'uuid' in new_key: # remove - in UUID + new_entry[new_key] = value.replace('-', '') + else: + new_entry[new_key] = value + else: + # keep the non-matching entries + new_entry[key] = value + # convert time + new_entry['datetime'] = new_entry['time'] + new_entry['time'] = LogarchiveParser.convert_native_time_to_unifiedlog_format(new_entry['time']) + + return new_entry + + def convert_native_time_to_unifiedlog_format(time: str) -> int: + timestamp = datetime.fromisoformat(time) + return int(timestamp.timestamp() * 1000000000) + + def convert_unifiedlog_time_to_datetime(time: int) -> datetime: + # convert time to datetime object + timestamp = datetime.fromtimestamp(time / 1000000000, tz=timezone.utc) + return timestamp diff --git a/parsers/mobileactivation.py b/parsers/mobileactivation.py index 73b07bd..73d3692 100644 --- a/parsers/mobileactivation.py +++ b/parsers/mobileactivation.py @@ -7,31 +7,27 @@ import glob import os from utils import multilinelog -import json +from utils.base import BaseParserInterface -parser_description = "Parsing mobileactivation logs file" +class MobileActivationParser(BaseParserInterface): + parser_description = "Parsing mobileactivation logs file" -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'logs/MobileActivation/mobileactivationd.log*' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) - return log_files + def get_log_files(self) -> list: + log_files_globs = [ + 'logs/MobileActivation/mobileactivationd.log*' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + return log_files -def parse_path(path: str) -> list | dict: - result = [] - for logfile in get_log_files(path): - result.extend(multilinelog.extract_from_file(logfile)) - return result - - -def parse_path_to_folder(path: str, output_folder: str) -> bool: - result = parse_path(path) - output_file = os.path.join(output_folder, f"{__name__.split('.')[-1]}.json") - with open(output_file, 'w') as f: - json.dump(result, f, indent=4) + def execute(self) -> list | dict: + result = [] + for logfile in self.get_log_files(): + result.extend(multilinelog.extract_from_file(logfile)) + return result diff --git a/parsers/mobileinstallation.py b/parsers/mobileinstallation.py index 6d00468..7893ae2 100644 --- a/parsers/mobileinstallation.py +++ b/parsers/mobileinstallation.py @@ -7,24 +7,27 @@ import glob import os from utils import multilinelog +from utils.base import BaseParserInterface -parser_description = "Parsing mobile_installation logs file" +class MobileInstallationParser(BaseParserInterface): + description = "Parsing mobile_installation logs file" + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'logs/MobileInstallation/mobile_installation.log*' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def get_log_files(self) -> list: + log_files_globs = [ + 'logs/MobileInstallation/mobile_installation.log*' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) - return log_files + return log_files - -def parse_path(path: str) -> list | dict: - result = [] - for logfile in get_log_files(path): - result.extend(multilinelog.extract_from_file(logfile)) - return result + def execute(self) -> list | dict: + result = [] + for logfile in self.get_log_files(): + result.extend(multilinelog.extract_from_file(logfile)) + return result diff --git a/parsers/networkextension.py b/parsers/networkextension.py index 25539b1..c06be92 100644 --- a/parsers/networkextension.py +++ b/parsers/networkextension.py @@ -8,24 +8,30 @@ import utils.misc as misc import os import glob +from utils.base import BaseParserInterface -parser_description = "Parsing networkextension plist file" +class NetworkExtensionParser(BaseParserInterface): + description = "Parsing networkextension plist file" + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'logs/Networking/com.apple.networkextension.plist' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def get_log_files(self) -> list: + log_files_globs = [ + 'logs/Networking/com.apple.networkextension.plist' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) - return log_files + return log_files + def execute(self) -> list | dict: + return NetworkExtensionParser.parse_file(self.get_log_files()[0]) -def parse_path(path: str) -> list | dict: - try: - return misc.load_plist_file_as_json(get_log_files(path)[0]) - except IndexError: - return {'error': 'No com.apple.networkextension.plist file present'} + def parse_file(path: str) -> list | dict: + try: + return misc.load_plist_file_as_json(path) + except IndexError: + return {'error': 'No com.apple.networkextension.plist file present'} diff --git a/parsers/networkextensioncache.py b/parsers/networkextensioncache.py index dbac717..a726a6d 100644 --- a/parsers/networkextensioncache.py +++ b/parsers/networkextensioncache.py @@ -8,24 +8,30 @@ import glob import os import utils.misc as misc +from utils.base import BaseParserInterface -parser_description = "Parsing networkextensioncache plist file" +class NetworkExtensionCacheParser(BaseParserInterface): + description = "Parsing networkextensioncache plist file" + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'logs/Networking/com.apple.networkextension.cache.plist' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def get_log_files(self) -> list: + log_files_globs = [ + 'logs/Networking/com.apple.networkextension.cache.plist' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) - return log_files + return log_files + def execute(self) -> list | dict: + return NetworkExtensionCacheParser.parse_file(self.get_log_files()[0]) -def parse_path(path: str) -> list | dict: - try: - return misc.load_plist_file_as_json(get_log_files(path)[0]) - except IndexError: - return {'error': 'No com.apple.networkextension.cache.plist file present'} + def parse_file(path: str) -> list | dict: + try: + return misc.load_plist_file_as_json(path) + except IndexError: + return {'error': 'No com.apple.networkextension.cache.plist file present'} diff --git a/parsers/olddsc.py b/parsers/olddsc.py index a258c0a..56188fc 100644 --- a/parsers/olddsc.py +++ b/parsers/olddsc.py @@ -7,24 +7,31 @@ # import glob import os -import utils.misc as misc +from utils.misc import load_plist_file_as_json +from utils.base import BaseParserInterface -parser_description = "Parsing olddsc files" +class OldDscParser(BaseParserInterface): + description = "Parsing olddsc files" -def get_log_files(log_root_path: str) -> dict: - log_files_globs = [ - 'logs/olddsc/*' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) - return log_files + def get_log_files(self) -> dict: + log_files_globs = [ + 'logs/olddsc/*' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + return log_files -def parse_path(path: str) -> list | dict: - try: - return misc.load_plist_file_as_json(get_log_files(path)[0]) - except IndexError: - return {'error': 'No olddsc file present'} + def execute(self) -> list | dict: + return OldDscParser.parse_file(self.get_log_files()[0]) + + def parse_file(path: str) -> list | dict: + try: + return load_plist_file_as_json(path) + except IndexError: + return {'error': 'No olddsc file present'} diff --git a/parsers/plists.py b/parsers/plists.py index b0989dd..c27c3a0 100644 --- a/parsers/plists.py +++ b/parsers/plists.py @@ -4,42 +4,61 @@ import utils.misc as misc import os import json +from utils.base import BaseParserInterface -parser_description = "Parsing any pslist into json" - - -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - '**/*.plist' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob), recursive=True)) - - return log_files - - -def parse_path(path: str) -> dict: - result = {} - for logfile in get_log_files(path): - try: - json_data = misc.load_plist_file_as_json(logfile) - except Exception as e: - json_data = {"error": str(e)} - end_of_path = logfile[len(path):].lstrip(os.path.sep) # take the path after the root path - result[end_of_path] = json_data - return result - - -def parse_path_to_folder(path: str, output_folder: str) -> bool: - output_folder = os.path.join(output_folder, __name__.split('.')[-1]) - os.makedirs(output_folder, exist_ok=True) - for logfile in get_log_files(path): - try: - json_data = misc.load_plist_file_as_json(logfile) - except Exception as e: - json_data = {"error": str(e)} - end_of_path = logfile[len(path):].lstrip(os.path.sep) # take the path after the root path - output_filename = end_of_path.replace(os.path.sep, '_') + '.json' # replace / with _ in the path - with open(os.path.join(output_folder, output_filename), 'w') as f: - json.dump(json_data, f, indent=4) + +class PlistParser(BaseParserInterface): + description = "Parsing any pslist into json" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + self.output_folder = os.path.join(self.case_parsed_data_folder, self.module_name) + + def get_log_files(self) -> list: + log_files_globs = [ + '**/*.plist' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob), recursive=True)) + + return log_files + + def execute(self) -> dict: + result = {} + for logfile in self.get_log_files(): + try: + json_data = misc.load_plist_file_as_json(logfile) + except Exception as e: + json_data = {"error": str(e)} + end_of_path = logfile[len(self.case_data_subfolder):].lstrip(os.path.sep) # take the path after the root path + result[end_of_path] = json_data + return result + + # LATER output_exists() now always returns False. This is because the output is saved in multiple files. + # we may want to change this behavior in the future, but that requires overwriting output_exists() and get_result() here + + def save_result(self, force: bool = False, indent=None): + """ + Saves the result of the parsing operation to many files in the parser output folder + + This function overrides the default save_result function to save each file in a different json file + """ + os.makedirs(self.output_folder, exist_ok=True) + if not force and self._result is not None: + # the result was already computed + for end_of_path, json_data in self._result.items(): + output_filename = end_of_path.replace(os.path.sep, '_') + '.json' # replace / with _ in the path + with open(os.path.join(self.output_folder, output_filename), 'w') as f: + f.write(json.dumps(json_data, ensure_ascii=False)) + else: + # no caching + for logfile in self.get_log_files(): + try: + json_data = misc.load_plist_file_as_json(logfile) + except Exception as e: + json_data = {"error": str(e)} + end_of_path = logfile[len(self.case_data_subfolder):].lstrip(os.path.sep) # take the path after the root path + output_filename = end_of_path.replace(os.path.sep, '_') + '.json' # replace / with _ in the path + with open(os.path.join(self.output_folder, output_filename), 'w') as f: + f.write(json.dumps(json_data, ensure_ascii=False)) diff --git a/parsers/powerlogs.py b/parsers/powerlogs.py index b0d55e9..fd12faa 100644 --- a/parsers/powerlogs.py +++ b/parsers/powerlogs.py @@ -8,37 +8,35 @@ import glob import os from utils.misc import merge_dicts -import json - - -parser_description = "Parsing powerlogs database" - - -def get_log_files(log_root_path: str) -> list: - """ - Get the list of log files to be parsed - """ - log_files_globs = [ - 'logs/powerlogs/powerlog_*', - 'logs/powerlogs/log_*' # LATER is this file of interest? - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) - - return log_files - - -def parse_path(path: str) -> dict: - result = {} - for logfile in get_log_files(path): - db_json = sqlite2json.sqlite2struct(logfile) - result = merge_dicts(result, db_json) # merge both - return result - - -def parse_path_to_folder(path: str, output_folder: str) -> bool: - result = parse_path(path) - output_file = os.path.join(output_folder, f"{__name__.split('.')[-1]}.json") - with open(output_file, 'w') as f: - json.dump(result, f, indent=4) +from utils.base import BaseParserInterface + + +class PowerLogsParser(BaseParserInterface): + description = "Parsing powerlogs database" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + """ + Get the list of log files to be parsed + """ + log_files_globs = [ + 'logs/powerlogs/powerlog_*', + 'logs/powerlogs/log_*' # LATER is this file of interest? + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + + return log_files + + def execute(self) -> list | dict: + result = {} + for logfile in self.get_log_files(): + db_json = PowerLogsParser.parse_file(logfile) + result = merge_dicts(result, db_json) # merge both + return result + + def parse_file(path: str) -> dict: + return sqlite2json.sqlite2struct(path) diff --git a/parsers/ps.py b/parsers/ps.py index dd05268..835f643 100644 --- a/parsers/ps.py +++ b/parsers/ps.py @@ -15,77 +15,76 @@ import glob import os import re - -parser_description = "Parsing ps.txt file" - - -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'ps.txt' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) - - return log_files - - -def parse_path(path: str) -> list | dict: - try: - return parse_ps(get_log_files(path)[0]) - except IndexError: - return {'error': 'No ps.txt file present'} - - -def parse_ps(filename): - result = [] - try: - with open(filename, "r") as f: - header = re.split(r"\s+", f.readline().strip()) - header_length = len(header) - - # print(f"Found header: {header}") - for line in f: - patterns = line.strip().split(None, header_length - 1) - row = {} - # merge last entries together, as last entry may contain spaces - for col in range(header_length): - # try to cast as int, float and fallback to string - col_name = header[col] - try: - row[col_name] = int(patterns[col]) - continue - except ValueError: +from utils.base import BaseParserInterface + + +class PsParser(BaseParserInterface): + description = "Parsing ps.txt file" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_files_globs = [ + 'ps.txt' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + + return log_files + + def execute(self) -> list | dict: + return PsParser.parse_file(self.get_log_files()[0]) + + def parse_file(filename): + result = [] + try: + with open(filename, "r") as f: + header = re.split(r"\s+", f.readline().strip()) + header_length = len(header) + + # print(f"Found header: {header}") + for line in f: + patterns = line.strip().split(None, header_length - 1) + row = {} + # merge last entries together, as last entry may contain spaces + for col in range(header_length): + # try to cast as int, float and fallback to string + col_name = header[col] try: - row[col_name] = float(patterns[col]) + row[col_name] = int(patterns[col]) + continue except ValueError: - row[col_name] = patterns[col] - result.append(row) - return result - except Exception as e: - print(f"Could not parse ps.txt: {str(e)}") - return [] - + try: + row[col_name] = float(patterns[col]) + except ValueError: + row[col_name] = patterns[col] + result.append(row) + return result + except Exception as e: + print(f"Could not parse ps.txt: {str(e)}") + return [] -def exclude_known_goods(processes: dict, known_good: dict) -> list[dict]: - """ - Exclude known good processes from the given list of processes. + def exclude_known_goods(processes: dict, known_good: dict) -> list[dict]: + """ + Exclude known good processes from the given list of processes. - Args: - processes (dict): The output from parse_ps() to check. - known_good (dict): The output of parse_ps() from a known good. + Args: + processes (dict): The output from parse_file() to check. + known_good (dict): The output of parse_file() from a known good. - Returns: - dict: The updated list of processes with known good processes excluded. - """ + Returns: + dict: The updated list of processes with known good processes excluded. + """ - known_good_cmd = [x['COMMAND'] for x in known_good] + known_good_cmd = [x['COMMAND'] for x in known_good] - for proc in processes: - if proc['COMMAND'] in known_good_cmd: - processes.remove(proc) + for proc in processes: + if proc['COMMAND'] in known_good_cmd: + processes.remove(proc) - return processes + return processes """ @@ -179,7 +178,7 @@ def main(): # parse PS file :) if options.inputfile: - processes = parse_ps(options.inputfile) + processes = PsParser.parse_file(options.inputfile) export_as_tree(processes, True) else: print("WARNING -i option is mandatory!") diff --git a/parsers/psthread.py b/parsers/psthread.py index 761808d..f6a7499 100644 --- a/parsers/psthread.py +++ b/parsers/psthread.py @@ -12,53 +12,57 @@ import glob import os import re +from utils.base import BaseParserInterface -parser_description = "Parsing ps_thread.txt file" +class PsThreadParser(BaseParserInterface): + description = "Parsing ps_thread.txt file" -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'ps_thread.txt' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) - return log_files + def get_log_files(self) -> list: + log_files_globs = [ + 'ps_thread.txt' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + return log_files -def parse_path(path: str) -> list | dict: - result = [] - try: - with open(get_log_files(path)[0], "r") as f: - header = re.split(r"\s+", f.readline().strip()) - header_length = len(header) - row = None - for line in f: - if '??' in line: - # append previous entry - if row: - result.append(row) + def execute(self) -> list | dict: + result = [] + try: + with open(self.get_log_files()[0], "r") as f: + header = re.split(r"\s+", f.readline().strip()) + header_length = len(header) + row = None + for line in f: + if '??' in line: + # append previous entry + if row: + result.append(row) - patterns = line.strip().split(None, header_length - 1) - row = {'THREADS': 1} - # merge last entries together, as last entry may contain spaces - for col in range(header_length): - # try to cast as int, float and fallback to string - col_name = header[col] - try: - row[col_name] = int(patterns[col]) - continue - except ValueError: + patterns = line.strip().split(None, header_length - 1) + row = {'THREADS': 1} + # merge last entries together, as last entry may contain spaces + for col in range(header_length): + # try to cast as int, float and fallback to string + col_name = header[col] try: - row[col_name] = float(patterns[col]) + row[col_name] = int(patterns[col]) + continue except ValueError: - row[col_name] = patterns[col] - else: - row['THREADS'] += 1 - # append last entry - if row: - result.append(row) - return result - except IndexError: - return {'error': 'No ps_thread.txt file present'} + try: + row[col_name] = float(patterns[col]) + except ValueError: + row[col_name] = patterns[col] + else: + row['THREADS'] += 1 + # append last entry + if row: + result.append(row) + return result + except IndexError: + return {'error': 'No ps_thread.txt file present'} diff --git a/parsers/remotectl_dumpstate.py b/parsers/remotectl_dumpstate.py index 4fb2d26..120e0c3 100644 --- a/parsers/remotectl_dumpstate.py +++ b/parsers/remotectl_dumpstate.py @@ -2,23 +2,27 @@ import glob import os from utils.tabbasedhierarchy import parse_tab_based_hierarchal_file +from utils.base import BaseParserInterface -parser_description = "Parsing remotectl_dumpstate file containing system information" +class RemotectlDumpstateParser(BaseParserInterface): + description = "Parsing remotectl_dumpstate file containing system information" -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'remotectl_dumpstate.txt' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) - return log_files + def get_log_files(self) -> list: + log_files_globs = [ + 'remotectl_dumpstate.txt' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + return log_files -def parse_path(path: str) -> list | dict: - try: - return parse_tab_based_hierarchal_file(get_log_files(path)[0]) - except IndexError: - return {'error': 'No remotectl_dumpstate.txt file present'} + def execute(self) -> list | dict: + try: + return parse_tab_based_hierarchal_file(self.get_log_files()[0]) + except IndexError: + return {'error': 'No remotectl_dumpstate.txt file present'} diff --git a/parsers/security_sysdiagnose.py b/parsers/security_sysdiagnose.py index b746065..45017af 100644 --- a/parsers/security_sysdiagnose.py +++ b/parsers/security_sysdiagnose.py @@ -1,224 +1,207 @@ import os -import json import re +from utils.base import BaseParserInterface + + +class SecuritySysdiagnoseParser(BaseParserInterface): + description = "Parsing security-sysdiagnose.txt file containing keychain information" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + """ + Get the list of log files to be parsed + """ + log_files = [ + "security-sysdiagnose.txt" + ] + return [os.path.join(self.case_data_subfolder, log_files) for log_files in log_files] + + def execute(self) -> list | dict: + log_files = self.get_log_files() + if not log_files: + return {'errors': ['No security-sysdiagnose.txt file present']} + + return SecuritySysdiagnoseParser.parse_file(log_files[0]) + + def parse_file(path: str) -> dict: + json_result = {'errors': []} + with open(path, "r") as f: + buffer = [] + buffer_section = None + + for line in f: + line = line.rstrip() + if line == '': + continue + elif line.startswith('ccstatus:'): + SecuritySysdiagnoseParser.process_buffer(buffer, buffer_section, json_result) + buffer_section = 'circle' + buffer = [line] + elif line.startswith('Engine state:'): + SecuritySysdiagnoseParser.process_buffer(buffer, buffer_section, json_result) + buffer_section = 'engine_state' + buffer = [] + elif line.endswith('keychain state:'): + SecuritySysdiagnoseParser.process_buffer(buffer, buffer_section, json_result) + buffer_section = 'keychain_state' + buffer = [line] + elif line.startswith('Analystics sysdiagnose'): + SecuritySysdiagnoseParser.process_buffer(buffer, buffer_section, json_result) + buffer_section = 'analytics' + buffer = [] + elif line.startswith('Client:'): + SecuritySysdiagnoseParser.process_buffer(buffer, buffer_section, json_result) + buffer_section = 'client' + buffer = [line] # this line contains the client type. (trust, cloudservices, networking, ...) + elif line.startswith('All keys and values'): + SecuritySysdiagnoseParser.process_buffer(buffer, buffer_section, json_result) + buffer_section = 'keys_and_values' + buffer = ['keysandvalues'] + elif line.startswith('All values in'): + SecuritySysdiagnoseParser.process_buffer(buffer, buffer_section, json_result) + buffer_section = 'keys_and_values' + buffer = ['values'] + else: + buffer.append(line) + + # call the last buffer + SecuritySysdiagnoseParser.process_buffer(buffer, buffer_section, json_result) + return json_result + + def process_buffer(buffer: list, section: str, json_result: dict): + """ + process the buffer for the given section + """ + if section is None: + return + function_name = f'process_buffer_{section}' + if function_name in dir(SecuritySysdiagnoseParser): + getattr(SecuritySysdiagnoseParser, function_name)(buffer, json_result) + else: + print(f"ERROR: Function {function_name} not found in the SecuritySysdiagnoseParser class.") + json_result['errors'].append(f"Cannot parse section {function_name} as it is unknown. Parser needs to be extended.") + + def process_buffer_circle(buffer: list, json_result: dict): + """ + process the buffer for the circle section + + This section is really though to process, as there are many variants. + As it contains interesting information about the circle of trust within the apple account + we keep it and just add the lines as list to the result. + TODO consider to parse the circle section in more detail + """ + json_result['circle'] = buffer + + def process_buffer_engine_state(buffer: list, json_result: dict): + """ + process the buffer for the engine section + """ + line_format_local = r'^(\w+) \{([^\}]+)\} \[([0-9]+)\] (\w+)' # noqa F841 + # LATER consider splitting up the line format + json_result['engine'] = buffer + pass + + def process_buffer_keychain_state(buffer: list, json_result: dict): + """ + process the buffer for the homekit section + """ + section = buffer.pop(0).split(' ').pop(0).lower() + json_result[section] = [] + for line in buffer: + # parse the csv line with key=value structure + # unfortunately value can be { foo,bar }, so splitting on comma is not an option. + # We need to implement a more complex parser here. + start = line.find(': ') + line = line[start + 2:] + + row = {} + subsection = False + key = None + i = 0 + start = 0 + while i < len(line): + if line[i] == '}': + subsection = False + elif line[i] == '{': + subsection = True + elif key is None and line[i] == '=': + key = line[start:i] + start = i + 1 + elif not subsection and line[i] == ',': + # new key value pair will start + # process old key value pair + row[key] = line[start:i] + start = i + 1 + # start new key value pair + key = None -parser_description = "Parsing security-sysdiagnose.txt file containing keychain information" - - -def get_log_files(log_root_path: str) -> list: - """ - Get the list of log files to be parsed - """ - log_files = [ - "security-sysdiagnose.txt" - ] - return [os.path.join(log_root_path, log_files) for log_files in log_files] - - -def parse_path(path: str) -> list | dict: - json_result = {'errors': []} - log_files = get_log_files(path) - if not log_files: - return {'errors': ['No security-sysdiagnose.txt file present']} - - log_file = log_files[0] - with open(log_file, "r") as f: - buffer = [] - buffer_section = None - - for line in f: - line = line.rstrip() - if line == '': - continue - elif line.startswith('ccstatus:'): - process_buffer(buffer, buffer_section, json_result) - buffer_section = 'circle' - buffer = [line] - elif line.startswith('Engine state:'): - process_buffer(buffer, buffer_section, json_result) - buffer_section = 'engine_state' - buffer = [] - elif line.endswith('keychain state:'): - process_buffer(buffer, buffer_section, json_result) - buffer_section = 'keychain_state' - buffer = [line] - elif line.startswith('Analystics sysdiagnose'): - process_buffer(buffer, buffer_section, json_result) - buffer_section = 'analytics' - buffer = [] - elif line.startswith('Client:'): - process_buffer(buffer, buffer_section, json_result) - buffer_section = 'client' - buffer = [line] # this line contains the client type. (trust, cloudservices, networking, ...) - elif line.startswith('All keys and values'): - process_buffer(buffer, buffer_section, json_result) - buffer_section = 'keys_and_values' - buffer = ['keysandvalues'] - elif line.startswith('All values in'): - process_buffer(buffer, buffer_section, json_result) - buffer_section = 'keys_and_values' - buffer = ['values'] - else: - buffer.append(line) - - # call the last buffer - process_buffer(buffer, buffer_section, json_result) + i += 1 + # process the last key value pair + row[key] = line[start:] + json_result[section].append(row) + + def process_buffer_analytics(buffer: list, json_result: dict): + """ + process the buffer for the analytics section + """ + # nothing to do here pass - return json_result - - -def parse_path_to_folder(path: str, output_folder: str) -> bool: - try: - json_object = parse_path(path) - # ideally stream to the file directly - output_folder = os.path.join(output_folder, __name__.split('.')[-1]) - os.makedirs(output_folder, exist_ok=True) - with open(os.path.join(output_folder, "security_sysdiagnose.json"), "w") as f: - json.dump(json_object, f) - return True - except Exception as e: - print(f"Error: {e}") - return False - - -def process_buffer(buffer: list, section: str, json_result: dict): - """ - process the buffer for the given section - """ - if section is None: - return - function_name = f'process_buffer_{section}' - if function_name in globals(): - globals()[function_name](buffer, json_result) - else: - print(f"ERROR: Function {function_name} not found in the globals.") - json_result['errors'].append(f"Cannot parse section {function_name} as it is unknown. Parser needs to be extended.") - - -def process_buffer_circle(buffer: list, json_result: dict): - """ - process the buffer for the circle section - - This section is really though to process, as there are many variants. - As it contains interesting information about the circle of trust within the apple account - we keep it and just add the lines as list to the result. - TODO consider to parse the circle section in more detail - """ - json_result['circle'] = buffer - - -def process_buffer_engine_state(buffer: list, json_result: dict): - """ - process the buffer for the engine section - """ - line_format_local = r'^(\w+) \{([^\}]+)\} \[([0-9]+)\] (\w+)' # noqa F841 - # LATER consider splitting up the line format - json_result['engine'] = buffer - pass - - -def process_buffer_keychain_state(buffer: list, json_result: dict): - """ - process the buffer for the homekit section - """ - section = buffer.pop(0).split(' ').pop(0).lower() - json_result[section] = [] - for line in buffer: - # parse the csv line with key=value structure - # unfortunately value can be { foo,bar }, so splitting on comma is not an option. - # We need to implement a more complex parser here. - start = line.find(': ') - line = line[start + 2:] - - row = {} - subsection = False - key = None + + def process_buffer_client(buffer: list, json_result: dict): + """ + process the buffer for the client section + """ + section = f"client_{buffer.pop(0).split(':').pop(1).lower().strip()}" + json_result[section] = [] + if buffer[0].startswith('No data'): + return + i = 0 - start = 0 - while i < len(line): - if line[i] == '}': - subsection = False - elif line[i] == '{': - subsection = True - elif key is None and line[i] == '=': - key = line[start:i] - start = i + 1 - elif not subsection and line[i] == ',': - # new key value pair will start - # process old key value pair - row[key] = line[start:i] - start = i + 1 - # start new key value pair - key = None + while i < len(buffer): + line = buffer[i] + row = {} + row['date'] = line[:25] # 25 chars = 'YYYY-mm-dd HH:MM:SS +0000' + end = line.find(': ', 26) + row['result'] = line[26:end] + start = end + 2 + end = line.find(' - ', end + 2) + row['type'] = line[start:end] + row['attributes'] = {} + attribute_string = line[end + 16:] # 16 chars = ' - Attributes: {' + # while next rows do not start with a date, they are part of the attributes + try: + while not re.search(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', buffer[i + 1]): + i += 1 + attribute_string += buffer[i] + except IndexError: + pass + # drop last } and split the attributes + attribute_string = attribute_string.replace('\n', '').strip()[:-1].strip() + attribute_pairs = re.findall(r'(\w+)\s*:\s*(\([^)]+\)|.+?)(?:, |$)', attribute_string) + for key, value in attribute_pairs: + row['attributes'][key.strip()] = value.strip() + + json_result[section].append(row) + i += 1 + + def process_buffer_keys_and_values(buffer: list, json_result: dict): + """ + process the buffer for the values section + """ + section = buffer.pop(0) + json_result[section] = {} + i = 0 + while i < len(buffer): + line = buffer[i] + try: + while buffer[i + 1].startswith('\t'): + i += 1 + line += '\n' + buffer[i] + except IndexError: + pass + key, value = line.split(': ', 1) + json_result[section][key.strip()] = value.strip() i += 1 - # process the last key value pair - row[key] = line[start:] - json_result[section].append(row) - - -def process_buffer_analytics(buffer: list, json_result: dict): - """ - process the buffer for the analytics section - """ - # nothing to do here - pass - - -def process_buffer_client(buffer: list, json_result: dict): - """ - process the buffer for the client section - """ - section = f"client_{buffer.pop(0).split(':').pop(1).lower().strip()}" - json_result[section] = [] - if buffer[0].startswith('No data'): - return - - i = 0 - while i < len(buffer): - line = buffer[i] - row = {} - row['date'] = line[:25] # 25 chars = 'YYYY-mm-dd HH:MM:SS +0000' - end = line.find(': ', 26) - row['result'] = line[26:end] - start = end + 2 - end = line.find(' - ', end + 2) - row['type'] = line[start:end] - row['attributes'] = {} - attribute_string = line[end + 16:] # 16 chars = ' - Attributes: {' - # while next rows do not start with a date, they are part of the attributes - try: - while not re.search(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', buffer[i + 1]): - i += 1 - attribute_string += buffer[i] - except IndexError: - pass - # drop last } and split the attributes - attribute_string = attribute_string.replace('\n', '').strip()[:-1].strip() - attribute_pairs = re.findall(r'(\w+)\s*:\s*(\([^)]+\)|.+?)(?:, |$)', attribute_string) - for key, value in attribute_pairs: - row['attributes'][key.strip()] = value.strip() - - json_result[section].append(row) - i += 1 - - -def process_buffer_keys_and_values(buffer: list, json_result: dict): - """ - process the buffer for the values section - """ - section = buffer.pop(0) - json_result[section] = {} - - i = 0 - while i < len(buffer): - line = buffer[i] - try: - while buffer[i + 1].startswith('\t'): - i += 1 - line += '\n' + buffer[i] - except IndexError: - pass - key, value = line.split(': ', 1) - json_result[section][key.strip()] = value.strip() - i += 1 diff --git a/parsers/shutdownlogs.py b/parsers/shutdownlogs.py index 89392af..1b219ab 100644 --- a/parsers/shutdownlogs.py +++ b/parsers/shutdownlogs.py @@ -8,71 +8,69 @@ import glob import os import re -import json - - -parser_description = "Parsing shutdown.log file" +from utils.base import BaseParserInterface CLIENTS_ARE_STILL_HERE_LINE = "these clients are still here" REMAINING_CLIENT_PID_LINE = "remaining client pid" SIGTERM_LINE = "SIGTERM" -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'system_logs.logarchive/Extra/shutdown.log' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) +class ShutdownLogsParser(BaseParserInterface): + description = "Parsing shutdown.log file" - return log_files + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + def get_log_files(self) -> list: + log_files_globs = [ + 'system_logs.logarchive/Extra/shutdown.log' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) -def parse_path(path: str) -> list | dict: - # read log file content - log_lines = "" - try: - with open(get_log_files(path)[0], "r") as f: - log_lines = f.readlines() - except IndexError: - return {'error': 'No shutdown.log file present in system_logs.logarchive/Extra/ directory'} + return log_files - parsed_data = {} - index = 0 - # go through log file - while index < len(log_lines): - # look for begining of shutdown sequence - if CLIENTS_ARE_STILL_HERE_LINE in log_lines[index]: - running_processes = {} - time_waiting = 0 - while not (SIGTERM_LINE in log_lines[index]): - if CLIENTS_ARE_STILL_HERE_LINE in log_lines[index]: - time_waiting = re.search(r'After ([\d\.]+)s,', log_lines[index]).group(1) - if (REMAINING_CLIENT_PID_LINE in log_lines[index]): - result = re.search(r".*: (\b\d+) \((.*)\).*", log_lines[index]) - pid = result.groups()[0] - binary_path = result.groups()[1] - running_processes[pid] = { - "pid": pid, - "path": binary_path, - "command": '/'.join(binary_path.split('/')[:-1]), - "time_waiting": time_waiting - } - index += 1 - # compute timestamp from SIGTERM line - result = re.search(r".*\[(\d+)\].*", log_lines[index]) - timestamp = result.groups()[0] - time = str(datetime.datetime.fromtimestamp(int(timestamp), datetime.UTC)) - # add entries - parsed_data[time] = list(running_processes.values()) - index += 1 + def execute(self) -> list | dict: + return ShutdownLogsParser.parse_file(self.get_log_files()[0]) - return parsed_data + def parse_file(path: str) -> list | dict: + # read log file content + log_lines = "" + try: + with open(path, "r") as f: + log_lines = f.readlines() + except IndexError: + return {'error': 'No shutdown.log file present in system_logs.logarchive/Extra/ directory'} + parsed_data = {} + index = 0 + # go through log file + while index < len(log_lines): + # look for begining of shutdown sequence + if CLIENTS_ARE_STILL_HERE_LINE in log_lines[index]: + running_processes = {} + time_waiting = 0 + while not (SIGTERM_LINE in log_lines[index]): + if CLIENTS_ARE_STILL_HERE_LINE in log_lines[index]: + time_waiting = re.search(r'After ([\d\.]+)s,', log_lines[index]).group(1) + if (REMAINING_CLIENT_PID_LINE in log_lines[index]): + result = re.search(r".*: (\b\d+) \((.*)\).*", log_lines[index]) + pid = result.groups()[0] + binary_path = result.groups()[1] + running_processes[pid] = { + "pid": pid, + "path": binary_path, + "command": '/'.join(binary_path.split('/')[:-1]), + "time_waiting": time_waiting + } + index += 1 + # compute timestamp from SIGTERM line + result = re.search(r".*\[(\d+)\].*", log_lines[index]) + timestamp = result.groups()[0] + time = str(datetime.datetime.fromtimestamp(int(timestamp), datetime.UTC)) + # add entries + parsed_data[time] = list(running_processes.values()) + index += 1 -def parse_path_to_folder(path: str, output_folder: str) -> bool: - result = parse_path(path) - output_file = os.path.join(output_folder, f"{__name__.split('.')[-1]}.json") - with open(output_file, 'w') as f: - json.dump(result, f, indent=4) + return parsed_data diff --git a/parsers/spindumpnosymbols.py b/parsers/spindumpnosymbols.py index bcdf887..d1c60fe 100644 --- a/parsers/spindumpnosymbols.py +++ b/parsers/spindumpnosymbols.py @@ -7,188 +7,189 @@ import glob import os import re - -parser_description = "Parsing spindump-nosymbols file" - - -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'spindump-nosymbols.txt' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) - - return log_files - - -def parse_path(path: str) -> list | dict: - try: - with open(get_log_files(path)[0], 'r') as f_in: - # init section - headers = [] - processes_raw = [] - status = 'headers' - - # stripping - for line in f_in: - if line.strip() == "" or line.strip() == "Heavy format: stacks are sorted by count" or line.strip() == "Use -i and -timeline to re-report with chronological sorting": - continue - elif line.strip() == "------------------------------------------------------------": - status = 'processes_raw' - continue - elif line.strip() == "Spindump binary format": - status = 'binary' - continue - elif status == 'headers': - headers.append(line.strip()) - continue - elif status == 'processes_raw': - processes_raw.append(line.strip()) - continue - - # call parsing function per section - output = parse_basic(headers) - output['processes'] = parse_processes(processes_raw) - +from utils.base import BaseParserInterface + + +class SpindumpNoSymbolsParser(BaseParserInterface): + description = "Parsing spindump-nosymbols file" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_files_globs = [ + 'spindump-nosymbols.txt' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + + return log_files + + def execute(self) -> list | dict: + return SpindumpNoSymbolsParser.parse_file(self.get_log_files()[0]) + + def parse_file(path: str) -> list | dict: + try: + with open(path, 'r') as f_in: + # init section + headers = [] + processes_raw = [] + status = 'headers' + + # stripping + for line in f_in: + if line.strip() == "" or line.strip() == "Heavy format: stacks are sorted by count" or line.strip() == "Use -i and -timeline to re-report with chronological sorting": + continue + elif line.strip() == "------------------------------------------------------------": + status = 'processes_raw' + continue + elif line.strip() == "Spindump binary format": + status = 'binary' + continue + elif status == 'headers': + headers.append(line.strip()) + continue + elif status == 'processes_raw': + processes_raw.append(line.strip()) + continue + + # call parsing function per section + output = SpindumpNoSymbolsParser.parse_basic(headers) + output['processes'] = SpindumpNoSymbolsParser.parse_processes(processes_raw) + + return output + except IndexError: + return {'error': 'No spindump-nosymbols.txt file present'} + + def parse_basic(data): + output = {} + for line in data: + splitted = line.split(":", 1) + if len(splitted) > 1: + output[splitted[0]] = splitted[1].strip() return output - except IndexError: - return {'error': 'No spindump-nosymbols.txt file present'} - - -def parse_basic(data): - output = {} - for line in data: - splitted = line.split(":", 1) - if len(splitted) > 1: - output[splitted[0]] = splitted[1].strip() - return output - - -def parse_processes(data): - # init - processes = [] - init = True - process = [] - for line in data: - if "Process:" in line.strip(): - if not init: - processes.append(parse_process(process)) - process = [line.strip()] + + def parse_processes(data): + # init + processes = [] + init = True + process = [] + for line in data: + if "Process:" in line.strip(): + if not init: + processes.append(SpindumpNoSymbolsParser.parse_process(process)) + process = [line.strip()] + else: + init = False + process.append(line.strip()) else: - init = False process.append(line.strip()) - else: - process.append(line.strip()) - processes.append(parse_process(process)) - return processes - - -def parse_process(data): - # init - status = 'infos' - infos = [] - threads = [] - images = [] - for line in data: - if "Thread 0x" in line.strip(): - status = "threads" - threads.append(line.strip()) - continue - elif "Binary Images:" in line.strip(): - status = "images" - continue - elif status == "infos": - infos.append(line.strip()) - continue - elif status == "threads": - threads.append(line.strip()) - continue - elif status == "images": - images.append(line.strip()) - continue - process = parse_basic(infos) - process['threads'] = parse_threads(threads) - process['images'] = parse_images(images) - # parse special substrings - process['PID'] = int(re.search(r'\[(\d+)\]', process['Process']).group(1)) - process['Process'] = process['Process'].split("[", 1)[0].strip() - try: - process['PPID'] = int(re.search(r'\[(\d+)\]', process['Parent']).group(1)) - process['Parent'] = process['Parent'].split("[", 1)[0].strip() - except KeyError: # some don't have a parent - pass - process['UID'] = 501 - return process - - -def parse_threads(data): - # init - init = True - threads = [] - thread = [] - for line in data: - if "Thread 0x" in line.strip(): - if not init: - threads.append(parse_thread(thread)) - thread = [line.strip()] + processes.append(SpindumpNoSymbolsParser.parse_process(process)) + return processes + + def parse_process(data): + # init + status = 'infos' + infos = [] + threads = [] + images = [] + for line in data: + if "Thread 0x" in line.strip(): + status = "threads" + threads.append(line.strip()) + continue + elif "Binary Images:" in line.strip(): + status = "images" + continue + elif status == "infos": + infos.append(line.strip()) + continue + elif status == "threads": + threads.append(line.strip()) + continue + elif status == "images": + images.append(line.strip()) + continue + process = SpindumpNoSymbolsParser.parse_basic(infos) + process['threads'] = SpindumpNoSymbolsParser.parse_threads(threads) + process['images'] = SpindumpNoSymbolsParser.parse_images(images) + # parse special substrings + process['PID'] = int(re.search(r'\[(\d+)\]', process['Process']).group(1)) + process['Process'] = process['Process'].split("[", 1)[0].strip() + try: + process['PPID'] = int(re.search(r'\[(\d+)\]', process['Parent']).group(1)) + process['Parent'] = process['Parent'].split("[", 1)[0].strip() + except KeyError: # some don't have a parent + pass + process['UID'] = 501 + return process + + def parse_threads(data): + # init + init = True + threads = [] + thread = [] + for line in data: + if "Thread 0x" in line.strip(): + if not init: + threads.append(SpindumpNoSymbolsParser.parse_thread(thread)) + thread = [line.strip()] + else: + init = False + thread.append(line.strip()) else: - init = False thread.append(line.strip()) - else: - thread.append(line.strip()) - threads.append(parse_thread(thread)) - return threads - - -def parse_thread(data): - output = {} - # parse first line - # Thread Hex value - threadHEXregex = re.search(r"Thread 0x..", data[0]) - output['thread'] = threadHEXregex.group(0).split(" ", 1)[1] - # Thread Name / DispatchQueue - if "DispatchQueue \"" in data[0]: - dispacthregex = re.search(r"DispatchQueue(.*)\"\(", data[0]) - output['DispatchQueue'] = dispacthregex.group(0).split("\"")[1] - if "Thread name \"" in data[0]: - dispacthregex = re.search(r"Thread name\ \"(.*)\"", data[0]) - output['ThreadName'] = dispacthregex.group(0).split("\"")[1] - # priority - if "priority" in data[0]: - priorityregex = re.search(r"priority\ [0-9]+", data[0]) - output['priority'] = priorityregex.group(0).split(" ", 1)[1] - if "cpu time" in data[0]: - cputimeregex = re.search(r"cpu\ time\ (.*)\)", data[0]) - output["cputime"] = cputimeregex.group(0).split("time ", 1)[1] - - output["loaded"] = [] - - for line in data[1:]: - loaded = {} - if "+" in line: - loaded["library"] = line.split("(", 1)[1].split("+", 1)[0].strip() - loaded["int"] = line.split("(", 1)[1].split("+", 1)[1].split(")", 1)[0].strip() - loaded["hex"] = line.split("[", 1)[1][:-1].strip() - elif "truncated backtrace>" not in line: - loaded["hex"] = line.split("[", 1)[1][:-1].strip() - output["loaded"].append(loaded) - return output - - -def parse_images(data): - images = [] - for line in data: - image = {} - if line.strip() is not None: - clean = ' '.join(line.split(" ")).split() - image['start'] = clean[0] - image['end'] = clean[2] - image['image'] = clean[3] - image['UUID'] = clean[4][1:-1] - try: - image['path'] = clean[5] - except: # noqa E722 - pass - images.append(image) - return images + threads.append(SpindumpNoSymbolsParser.parse_thread(thread)) + return threads + + def parse_thread(data): + output = {} + # parse first line + # Thread Hex value + threadHEXregex = re.search(r"Thread 0x..", data[0]) + output['thread'] = threadHEXregex.group(0).split(" ", 1)[1] + # Thread Name / DispatchQueue + if "DispatchQueue \"" in data[0]: + dispacthregex = re.search(r"DispatchQueue(.*)\"\(", data[0]) + output['DispatchQueue'] = dispacthregex.group(0).split("\"")[1] + if "Thread name \"" in data[0]: + dispacthregex = re.search(r"Thread name\ \"(.*)\"", data[0]) + output['ThreadName'] = dispacthregex.group(0).split("\"")[1] + # priority + if "priority" in data[0]: + priorityregex = re.search(r"priority\ [0-9]+", data[0]) + output['priority'] = priorityregex.group(0).split(" ", 1)[1] + if "cpu time" in data[0]: + cputimeregex = re.search(r"cpu\ time\ (.*)\)", data[0]) + output["cputime"] = cputimeregex.group(0).split("time ", 1)[1] + + output["loaded"] = [] + + for line in data[1:]: + loaded = {} + if "+" in line: + loaded["library"] = line.split("(", 1)[1].split("+", 1)[0].strip() + loaded["int"] = line.split("(", 1)[1].split("+", 1)[1].split(")", 1)[0].strip() + loaded["hex"] = line.split("[", 1)[1][:-1].strip() + elif "truncated backtrace>" not in line: + loaded["hex"] = line.split("[", 1)[1][:-1].strip() + output["loaded"].append(loaded) + return output + + def parse_images(data): + images = [] + for line in data: + image = {} + if line.strip() is not None: + clean = ' '.join(line.split(" ")).split() + image['start'] = clean[0] + image['end'] = clean[2] + image['image'] = clean[3] + image['UUID'] = clean[4][1:-1] + try: + image['path'] = clean[5] + except: # noqa E722 + pass + images.append(image) + return images diff --git a/parsers/swcutil.py b/parsers/swcutil.py index 4615213..5eaf968 100644 --- a/parsers/swcutil.py +++ b/parsers/swcutil.py @@ -6,102 +6,98 @@ import glob import os -import json - - -parser_description = "Parsing swcutil_show file" - - -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'swcutil_show.txt' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) - - return log_files - - -def parse_path(path: str) -> list | dict: - try: - with open(get_log_files(path)[0], 'r') as f_in: - # init section - headers = [] - db = [] - network = [] - settings = [] - memory = [] - status = 'headers' - - # stripping - for line in f_in: - if line.strip() == "": - continue - if line.strip() == "=================================== DATABASE ===================================": - status = 'db' - continue - elif line.strip() == "=================================== NETWORK ====================================": - status = 'network' - continue - elif line.strip() == "=================================== SETTINGS ===================================": - status = 'settings' - continue - elif line.strip() == "================================= MEMORY USAGE =================================": - status = 'memory' - continue - elif status == 'headers': - headers.append(line.strip()) - continue - elif status == 'db': - db.append(line.strip()) - continue - elif status == 'network': - network.append(line.strip()) - continue - elif status == 'settings': - settings.append(line.strip()) - continue - elif status == 'memory': - memory.append(line.strip()) - continue - - # call parsing function per section - parsed_headers = parse_basic(headers) - parsed_db = parse_db(db) - parsed_network = parse_basic(network) - parsed_settings = parse_basic(settings) - parsed_memory = parse_basic(memory) - - return {'headers': parsed_headers, 'db': parsed_db, 'network': parsed_network, 'settings': parsed_settings, 'memory': parsed_memory} - except IndexError: - return {'error': 'No swcutil_show.txt file present'} - - -def parse_path_to_folder(path: str, output_folder: str) -> bool: - result = parse_path(path) - output_file = os.path.join(output_folder, f"{__name__.split('.')[-1]}.json") - with open(output_file, 'w') as f: - json.dump(result, f, indent=4) - - -def parse_basic(data): - output = {} - for line in data: - splitted = line.split(":", 1) - if len(splitted) > 1: - output[splitted[0]] = splitted[1].strip() - return output - - -def parse_db(data): - # init - db = [] - db_data = [] - for line in data: - if line.strip() == "--------------------------------------------------------------------------------": - db.append(parse_basic(db_data)) - db_data = [] - else: - db_data.append(line.strip()) - return db +from utils.base import BaseParserInterface + + +class SwcutilParser(BaseParserInterface): + description = "Parsing swcutil_show file" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_files_globs = [ + 'swcutil_show.txt' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + + return log_files + + def execute(self) -> list | dict: + return SwcutilParser.parse_file(self.get_log_files()[0]) + + def parse_file(path: str) -> list | dict: + try: + with open(path, 'r') as f_in: + # init section + headers = [] + db = [] + network = [] + settings = [] + memory = [] + status = 'headers' + + # stripping + for line in f_in: + if line.strip() == "": + continue + if line.strip() == "=================================== DATABASE ===================================": + status = 'db' + continue + elif line.strip() == "=================================== NETWORK ====================================": + status = 'network' + continue + elif line.strip() == "=================================== SETTINGS ===================================": + status = 'settings' + continue + elif line.strip() == "================================= MEMORY USAGE =================================": + status = 'memory' + continue + elif status == 'headers': + headers.append(line.strip()) + continue + elif status == 'db': + db.append(line.strip()) + continue + elif status == 'network': + network.append(line.strip()) + continue + elif status == 'settings': + settings.append(line.strip()) + continue + elif status == 'memory': + memory.append(line.strip()) + continue + + # call parsing function per section + parsed_headers = SwcutilParser.parse_basic(headers) + parsed_db = SwcutilParser.parse_db(db) + parsed_network = SwcutilParser.parse_basic(network) + parsed_settings = SwcutilParser.parse_basic(settings) + parsed_memory = SwcutilParser.parse_basic(memory) + + return {'headers': parsed_headers, 'db': parsed_db, 'network': parsed_network, 'settings': parsed_settings, 'memory': parsed_memory} + except IndexError: + return {'error': 'No swcutil_show.txt file present'} + + def parse_basic(data): + output = {} + for line in data: + splitted = line.split(":", 1) + if len(splitted) > 1: + output[splitted[0]] = splitted[1].strip() + return output + + def parse_db(data): + # init + db = [] + db_data = [] + for line in data: + if line.strip() == "--------------------------------------------------------------------------------": + db.append(SwcutilParser.parse_basic(db_data)) + db_data = [] + else: + db_data.append(line.strip()) + return db diff --git a/parsers/sys.py b/parsers/sys.py index e00aded..6a36e09 100644 --- a/parsers/sys.py +++ b/parsers/sys.py @@ -9,35 +9,41 @@ import os import glob import utils.misc as misc - -parser_description = "Parsing SystemVersion plist file" - - -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'logs/SystemVersion/SystemVersion.plist' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) - - return log_files - - -def parse_path(path: str) -> list | dict: - try: - return misc.load_plist_file_as_json(get_log_files(path)[0]) - except IndexError: - return {'error': 'No SystemVersion.plist file present'} - - -''' -old code to print the values - if options.inputfile: - pl = getProductInfo(options.inputfile) - print(f"ProductName = {pl['ProductName']}") # XXX #9 FIXME: should that return the structure instead of print() ing it? - print(f"ProductVersion = {pl['ProductVersion']}") - print(f"ProductBuildVersion = {pl['ProductBuildVersion']}") - else: - print("WARNING -i option is mandatory!", file=sys.stderr) -''' +from utils.base import BaseParserInterface + + +class SystemVersionParser(BaseParserInterface): + description = "Parsing SystemVersion plist file" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_files_globs = [ + 'logs/SystemVersion/SystemVersion.plist' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + + return log_files + + def execute(self) -> list | dict: + try: + return SystemVersionParser.parse_file(self.get_log_files()[0]) + except IndexError: + return {'error': 'No SystemVersion.plist file present'} + + def parse_file(path: str) -> list | dict: + return misc.load_plist_file_as_json(path) + + ''' + old code to print the values + if options.inputfile: + pl = getProductInfo(options.inputfile) + print(f"ProductName = {pl['ProductName']}") # XXX #9 FIXME: should that return the structure instead of print() ing it? + print(f"ProductVersion = {pl['ProductVersion']}") + print(f"ProductBuildVersion = {pl['ProductBuildVersion']}") + else: + print("WARNING -i option is mandatory!", file=sys.stderr) + ''' diff --git a/parsers/taskinfo.py b/parsers/taskinfo.py index 71d9e64..121490c 100644 --- a/parsers/taskinfo.py +++ b/parsers/taskinfo.py @@ -11,67 +11,73 @@ import glob import os from utils import tabbasedhierarchy +from utils.base import BaseParserInterface -parser_description = "Parsing taskinfo txt file" +class TaskinfoParser(BaseParserInterface): + description = "Parsing taskinfo txt file" + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'taskinfo.txt' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def get_log_files(self) -> list: + log_files_globs = [ + 'taskinfo.txt' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) - return log_files + return log_files + def execute(self) -> dict: + return TaskinfoParser.parse_file(self.get_log_files()[0]) -def parse_path(path: str) -> dict: - processes = [] - try: - with open(get_log_files(path)[0], "r") as f: - lines = f.readlines() + def parse_file(path: str) -> dict: + processes = [] + try: + with open(path, "r") as f: + lines = f.readlines() - result = re.search(r'(num tasks: )(\d+)', lines[0]) - if (result is not None): - numb_tasks = int(result.group(2)) - else: - numb_tasks = -1 + result = re.search(r'(num tasks: )(\d+)', lines[0]) + if (result is not None): + numb_tasks = int(result.group(2)) + else: + numb_tasks = -1 - n = 1 # skip lines to right section - extracted_block = [] - while n < len(lines): - if 'thread ID:' in lines[n]: - # end of main block OR thread block detected - if 'threads:' in lines[n - 1]: - # end of main block detected - process = tabbasedhierarchy.parse_block(extracted_block) - # extract process id and process_name from process['process'] line - process['pid'] = int(re.search(r'\[(\d+)\]', process['process']).group(1)) - process['name'] = re.search(r'"([^"]+)"', process['process']).group(1) - process['threads'] = [] + n = 1 # skip lines to right section + extracted_block = [] + while n < len(lines): + if 'thread ID:' in lines[n]: + # end of main block OR thread block detected + if 'threads:' in lines[n - 1]: + # end of main block detected + process = tabbasedhierarchy.parse_block(extracted_block) + # extract process id and process_name from process['process'] line + process['pid'] = int(re.search(r'\[(\d+)\]', process['process']).group(1)) + process['name'] = re.search(r'"([^"]+)"', process['process']).group(1) + process['threads'] = [] + pass + else: + # start of thread_block detected + # this is also the end of the previous thread block + process['threads'].append(tabbasedhierarchy.parse_block(extracted_block)) + pass + # be ready to accept new thread block + extracted_block = [] + extracted_block.append(lines[n]) + if n >= 41058: pass - else: - # start of thread_block detected + if lines[n].strip() == "" and lines[n + 1].strip() == "": + # start of new process block detected # this is also the end of the previous thread block process['threads'].append(tabbasedhierarchy.parse_block(extracted_block)) - pass - # be ready to accept new thread block - extracted_block = [] - extracted_block.append(lines[n]) - if n >= 41058: - pass - if lines[n].strip() == "" and lines[n + 1].strip() == "": - # start of new process block detected - # this is also the end of the previous thread block - process['threads'].append(tabbasedhierarchy.parse_block(extracted_block)) - processes.append(process) - extracted_block = [] - n = n + 1 # add one more to n as we are skipping the empty line - else: - extracted_block.append(lines[n]) - n = n + 1 - return {"numb_tasks": numb_tasks, "tasks": processes} - except IndexError: - return {'error': 'No taskinfo.txt file present'} + processes.append(process) + extracted_block = [] + n = n + 1 # add one more to n as we are skipping the empty line + else: + extracted_block.append(lines[n]) + n = n + 1 + return {"numb_tasks": numb_tasks, "tasks": processes} + except IndexError: + return {'error': 'No taskinfo.txt file present'} diff --git a/parsers/uuid2path.py b/parsers/uuid2path.py index 16acf00..41beb81 100644 --- a/parsers/uuid2path.py +++ b/parsers/uuid2path.py @@ -10,35 +10,43 @@ import os import glob import utils.misc as misc - -parser_description = "Parsing UUIDToBinaryLocations plist file" - - -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'logs/tailspindb/UUIDToBinaryLocations' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) - - return log_files - - -def parse_path(path: str) -> list | dict: - try: - fname = get_log_files(path)[0] - return misc.load_plist_file_as_json(fname) - except IndexError: - return {'error': 'No UUIDToBinaryLocations file present'} - - -def printResult(data): - """ - Print the hashtable produced by getUUID2Path to console as UUID, path - """ - if data: - for uuid in data.keys(): - print(f"{str(uuid)}, {str(data[uuid])}") - print(f"\n {str(len(data.keys()))} GUIDs found\n") - return +from utils.base import BaseParserInterface + + +class UUID2PathParser(BaseParserInterface): + description = "Parsing UUIDToBinaryLocations plist file" + + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + + def get_log_files(self) -> list: + log_files_globs = [ + 'logs/tailspindb/UUIDToBinaryLocations' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + + return log_files + + def execute(self) -> list | dict: + try: + return UUID2PathParser.parse_file(self.get_log_files()[0]) + except IndexError: + return {'error': 'No UUIDToBinaryLocations file present'} + + def parse_file(path: str) -> list | dict: + try: + return misc.load_plist_file_as_json(path) + except IndexError: + return {'error': 'No UUIDToBinaryLocations file present'} + + def printResult(data): + """ + Print the hashtable produced by getUUID2Path to console as UUID, path + """ + if data: + for uuid in data.keys(): + print(f"{str(uuid)}, {str(data[uuid])}") + print(f"\n {str(len(data.keys()))} GUIDs found\n") + return diff --git a/parsers/wifi_known_networks.py b/parsers/wifi_known_networks.py index ccd2f4f..293a7ed 100644 --- a/parsers/wifi_known_networks.py +++ b/parsers/wifi_known_networks.py @@ -9,43 +9,41 @@ import os import glob import utils.misc as misc -import json +from utils.base import BaseParserInterface -parser_description = "Parsing Known Wifi Networks plist file" +class WifiKnownNetworksParser(BaseParserInterface): + description = "Parsing Known Wifi Networks plist file" -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'WiFi/com.apple.wifi.known-networks.plist' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) - return log_files + def get_log_files(self) -> list: + log_files_globs = [ + 'WiFi/com.apple.wifi.known-networks.plist' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + return log_files -def parse_path(path: str) -> list | dict: - return misc.load_plist_file_as_json(get_log_files(path)[0]) + def execute(self) -> list | dict: + return WifiKnownNetworksParser.parse_file(self.get_log_files()[0]) + def parse_file(path: str) -> list | dict: + return misc.load_plist_file_as_json(path) -def parse_path_to_folder(path: str, output_folder: str) -> bool: - result = parse_path(path) - output_file = os.path.join(output_folder, f"{__name__.split('.')[-1]}.json") - with open(output_file, 'w') as f: - json.dump(result, f, indent=4) + ''' + code usefull for future printing function + class CustomEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, Uid) or isinstance(obj, Data) or isinstance(obj, datetime): + return str(obj) + return super().default(obj) -''' -code usefull for future printing function -class CustomEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, Uid) or isinstance(obj, Data) or isinstance(obj, datetime): - return str(obj) - return super().default(obj) - - - pl = getKnownWifiNetworks(options.inputfile) - print(json.dumps(pl, indent=4, cls=CustomEncoder), file=sys.stderr) -''' + pl = getKnownWifiNetworks(options.inputfile) + print(json.dumps(pl, indent=4, cls=CustomEncoder), file=sys.stderr) + ''' diff --git a/parsers/wifinetworks.py b/parsers/wifinetworks.py index cfe8c8d..8985cab 100644 --- a/parsers/wifinetworks.py +++ b/parsers/wifinetworks.py @@ -8,48 +8,37 @@ import glob import utils.misc as misc import os +from utils.base import BaseParserInterface -parser_description = "Parsing com.apple.wifi plist files" +class WifiNetworksParser(BaseParserInterface): + description = "Parsing com.apple.wifi plist files" -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'WiFi/*.plist', - 'WiFi/com.apple.wifi.recent-networks.json' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) - return log_files + def get_log_files(self) -> list: + log_files_globs = [ + 'WiFi/*.plist', + 'WiFi/com.apple.wifi.recent-networks.json' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) + return log_files -def parse_file(fname: str) -> dict | list: - if fname.endswith('.json'): - with open(fname, 'r') as f: - return json.load(f) - if fname.endswith('.plist'): - return misc.load_plist_file_as_json(fname) + def execute(self) -> dict: + result = {} + for logfile in self.get_log_files(): + end_of_path = os.path.splitext(os.path.basename(logfile))[0] # keep the filename without the extension + result[end_of_path] = WifiNetworksParser.parse_file(logfile) + return result - -def parse_path(path: str) -> dict: - result = {} - for logfile in get_log_files(path): - end_of_path = logfile[len(path):].lstrip(os.path.sep) # take the path after the root path - result[end_of_path] = parse_file(logfile) - return result - - -def parse_path_to_folder(path: str, output_folder: str) -> bool: - output_folder = os.path.join(output_folder, __name__.split('.')[-1]) - os.makedirs(output_folder, exist_ok=True) - for logfile in get_log_files(path): - try: - json_data = parse_file(logfile) - except Exception as e: - json_data = {"error": str(e)} - end_of_path = logfile[len(path):].lstrip(os.path.sep) # take the path after the root path - output_filename = end_of_path.replace(os.path.sep, '_') + '.json' # replace / with _ in the path - with open(os.path.join(output_folder, output_filename), 'w') as f: - json.dump(json_data, f, indent=4) + def parse_file(fname: str) -> dict | list: + if fname.endswith('.json'): + with open(fname, 'r') as f: + return json.load(f) + if fname.endswith('.plist'): + return misc.load_plist_file_as_json(fname) diff --git a/parsers/wifiscan.py b/parsers/wifiscan.py index ce5762e..c1bc6be 100644 --- a/parsers/wifiscan.py +++ b/parsers/wifiscan.py @@ -3,26 +3,34 @@ import glob import os import re +from utils.base import BaseParserInterface -parser_description = "Parsing wifi_scan files" +class WifiScanParser(BaseParserInterface): + description = "Parsing wifi_scan files" + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) -def get_log_files(log_root_path: str) -> list: - log_files_globs = [ - 'WiFi/wifi_scan*.txt' - ] - log_files = [] - for log_files_glob in log_files_globs: - log_files.extend(glob.glob(os.path.join(log_root_path, log_files_glob))) + def get_log_files(self) -> list: + log_files_globs = [ + 'WiFi/wifi_scan*.txt' + ] + log_files = [] + for log_files_glob in log_files_globs: + log_files.extend(glob.glob(os.path.join(self.case_data_subfolder, log_files_glob))) - return log_files + return log_files + def execute(self) -> list | dict: + output = [] + for logfile in self.get_log_files(): + output.extend(WifiScanParser.parse_file(logfile)) + return output -def parse_path(path: str) -> list | dict: - output = [] - for logfile in get_log_files(path): - with open(logfile, 'r') as f: + def parse_file(path: str) -> list | dict: + output = [] + with open(path, 'r') as f: for line in f: line = line.strip() # skip empty lines @@ -74,4 +82,4 @@ def parse_path(path: str) -> list | dict: index_now = index_close + 2 parsed_data[key] = value output.append(parsed_data) - return output + return output diff --git a/parsers/wifisecurity.py b/parsers/wifisecurity.py index 1d72a5f..158d5b1 100644 --- a/parsers/wifisecurity.py +++ b/parsers/wifisecurity.py @@ -5,61 +5,61 @@ # Author: david@autopsit.org import os -import json +from utils.base import BaseParserInterface -parser_description = "Parsing WiFi Security logs" +class WifiSecurityParser(BaseParserInterface): + description = "Parsing WiFi Security logs" -def get_log_files(log_root_path: str) -> list: - """ - Get the list of log files to be parsed - """ - log_files = [ - "WiFi/security.txt" - ] - return [os.path.join(log_root_path, log_files) for log_files in log_files] + def __init__(self, config: dict, case_id: str): + super().__init__(__file__, config, case_id) + def get_log_files(self) -> list: + """ + Get the list of log files to be parsed + """ + log_files = [ + "WiFi/security.txt" + ] + return [os.path.join(self.case_data_subfolder, log_files) for log_files in log_files] -def parse_path(path: str) -> list | dict: - """ - Parse ./WiFi/security.txt and extract block of interest: + def execute(self) -> list | dict: + return WifiSecurityParser.parse_file(self.get_log_files()[0]) - accc : - acct : - agrp : apple - cdat : 2023-02-09 21:10:38 +0000 - desc : - labl : - mdat : 2023-02-09 21:10:38 +0000 - musr : {length = 0, bytes = 0x} - pdmn : ck - sha1 : {length = 20, bytes = 0x98146b802675fb480dc64a8f3a7597ea70f03b46} - svce : AirPort - sync : 1 - tomb : 0 - """ - entries = [] - element = {} - try: - with open(get_log_files(path)[0], "r") as f: - for line in f: - if ' : ' in line: - key, value = line.split(" : ") - # print(f"key: {key.strip()}, value: {value.strip()}") - element[key.strip()] = value.strip() - elif element: - entries.append(element) - # print(f"appending {element}") - element = {} - except IndexError: - return {'error': 'No WiFi/security.txt file present'} - except Exception as e: - print(f"Could not parse: {get_log_files(path)[0]}. Reason: {str(e)}") - return entries + def parse_file(path: str) -> list | dict: + """ + Parse ./WiFi/security.txt and extract block of interest: - -def parse_path_to_folder(path: str, output_folder: str) -> bool: - result = parse_path(path) - output_file = os.path.join(output_folder, f"{__name__.split('.')[-1]}.json") - with open(output_file, 'w') as f: - json.dump(result, f, indent=4) + accc : + acct : + agrp : apple + cdat : 2023-02-09 21:10:38 +0000 + desc : + labl : + mdat : 2023-02-09 21:10:38 +0000 + musr : {length = 0, bytes = 0x} + pdmn : ck + sha1 : {length = 20, bytes = 0x98146b802675fb480dc64a8f3a7597ea70f03b46} + svce : AirPort + sync : 1 + tomb : 0 + """ + entries = [] + element = {} + try: + with open(path, "r") as f: + for line in f: + if ' : ' in line: + key, value = line.split(" : ") + # print(f"key: {key.strip()}, value: {value.strip()}") + element[key.strip()] = value.strip() + elif element: + entries.append(element) + # print(f"appending {element}") + element = {} + except IndexError: + return {'error': 'No WiFi/security.txt file present'} + except Exception as e: + print(f"Could not parse: {path}. Reason: {str(e)}") + return {'error': f'Could not parse: {path}. Reason: {str(e)}'} + return entries diff --git a/sysdiagnose.py b/sysdiagnose.py index 03e65a4..780b601 100755 --- a/sysdiagnose.py +++ b/sysdiagnose.py @@ -12,6 +12,8 @@ import tarfile import fcntl +from utils.base import BaseParserInterface, BaseAnalyserInterface, SysdiagnoseConfig + def main(): parser = argparse.ArgumentParser( @@ -93,13 +95,13 @@ def main(): sd.print_parsers_list() return elif args.parser == 'all': - parsers = list(sd.get_parsers().keys()) + parsers_list = list(sd.get_parsers().keys()) elif not sd.is_valid_parser_name(args.parser): sd.print_parsers_list() print("") exit(f"Parser '{args.parser}' does not exist, possible options are listed above.") else: - parsers = [args.parser] + parsers_list = [args.parser] if args.case_id == 'all': case_ids = sd.get_case_ids() @@ -112,7 +114,7 @@ def main(): for case_id in case_ids: print(f"Case ID: {case_id}") - for parser in parsers: + for parser in parsers_list: print(f"Parser '{parser}' for case ID '{case_id}'") try: sd.parse(parser, case_id) @@ -125,13 +127,13 @@ def main(): sd.print_analysers_list() return elif args.analyser == 'all': - analysers = list(sd.get_analysers().keys()) + analysers_list = list(sd.get_analysers().keys()) elif not sd.is_valid_analyser_name(args.analyser): sd.print_analysers_list() print("") exit(f"Analyser '{args.analyser}' does not exist, possible options are listed above.") else: - analysers = [args.analyser] + analysers_list = [args.analyser] if args.case_id == 'all': case_ids = sd.get_case_ids() @@ -144,7 +146,7 @@ def main(): for case_id in case_ids: print(f"Case ID: {case_id}") - for analyser in analysers: + for analyser in analysers_list: print(f" Analyser '{analyser}' for case ID '{case_id}'") try: sd.analyse(analyser, case_id) @@ -174,29 +176,15 @@ def analyse_parser_error(message): class Sysdiagnose: def __init__(self, cases_path=os.getenv('SYSDIAGNOSE_CASES_PATH', './cases')): - self.config_folder = os.path.dirname(os.path.abspath(__file__)) - self.parsers_folder = os.path.join(self.config_folder, "parsers") - self.analysers_folder = os.path.join(self.config_folder, "analysers") - - # case data is in current working directory by default - self.cases_root_folder = cases_path - - self.cases_file = os.path.join(self.cases_root_folder, "cases.json") - self.data_folder = os.path.join(self.cases_root_folder, "data") - self.parsed_data_folder = os.path.join(self.cases_root_folder, "parsed_data") # stay in current folder - self._cases = False # will be populated through cases() singleton method - - os.makedirs(self.cases_root_folder, exist_ok=True) - os.makedirs(self.data_folder, exist_ok=True) - os.makedirs(self.parsed_data_folder, exist_ok=True) + self.config = SysdiagnoseConfig(cases_path) def cases(self, force: bool = False) -> dict: - # singleton, so it's not loaded unless necessary + # pseudo singleton, so it's not loaded unless necessary # load cases + migration of old cases format to new format if not self._cases or force: try: - with open(self.cases_file, 'r+') as f: + with open(self.config.cases_file, 'r+') as f: try: fcntl.flock(f, fcntl.LOCK_EX) # enable lock self._cases = json.load(f) @@ -214,7 +202,7 @@ def cases(self, force: bool = False) -> dict: fcntl.flock(f, fcntl.LOCK_UN) except FileNotFoundError: self._cases = {} - with open(self.cases_file, 'w') as f: + with open(self.config.cases_file, 'w') as f: try: fcntl.flock(f, fcntl.LOCK_EX) # enable lock json.dump(self._cases, f, indent=4) @@ -299,11 +287,11 @@ def create_case(self, sysdiagnose_file: str, force: bool = False, case_id: bool } # create case folder - case_folder = os.path.join(self.data_folder, str(case['case_id'])) + case_folder = os.path.join(self.config.data_folder, str(case['case_id'])) os.makedirs(case_folder, exist_ok=True) # create parsed_data folder - parsed_folder = os.path.join(self.parsed_data_folder, str(case['case_id'])) + parsed_folder = os.path.join(self.config.parsed_data_folder, str(case['case_id'])) os.makedirs(parsed_folder, exist_ok=True) # extract sysdiagnose files @@ -336,21 +324,20 @@ def create_case(self, sysdiagnose_file: str, force: bool = False, case_id: bool raise Exception(f"Could not open file {new_case_json['sysdiagnose.log']}. Reason: {str(e)}") # Save JSON file - case_fname = os.path.join(self.data_folder, f"{case_id}.json") + case_fname = os.path.join(self.config.data_folder, f"{case_id}.json") with open(case_fname, 'w') as data_file: data_file.write(json.dumps(new_case_json, indent=4)) # update cases list file - extracted_files_path = os.path.join(case_folder, os.listdir(case_folder).pop()) - remotectl_dumpstate_json = remotectl_dumpstate.parse_path(extracted_files_path) + remotectl_dumpstate_json = remotectl_dumpstate.RemotectlDumpstateParser(self.config, case_id).get_result() try: case['serial_number'] = remotectl_dumpstate_json['Local device']['Properties']['SerialNumber'] case['unique_device_id'] = remotectl_dumpstate_json['Local device']['Properties']['UniqueDeviceID'] - except KeyError as e: + except (KeyError, TypeError) as e: print(f"WARNING: Could not parse remotectl_dumpstate, and therefore extract serial numbers. Error {e}") # update case with new data - with open(self.cases_file, 'r+') as f: + with open(self.config.cases_file, 'r+') as f: try: fcntl.flock(f, fcntl.LOCK_EX) # enable lock self._cases = json.load(f) # load latest version @@ -367,47 +354,32 @@ def create_case(self, sysdiagnose_file: str, force: bool = False, case_id: bool def parse(self, parser: str, case_id: str): # Load parser module - spec = importlib.util.spec_from_file_location(parser, os.path.join(self.parsers_folder, parser) + '.py') - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - - case_folder = os.path.join(self.data_folder, case_id) - if not os.path.isdir(case_folder): - print(f"Case {case_id} does not exist", file=sys.stderr) - return -1 - - extracted_files_path = os.path.join(case_folder, os.listdir(case_folder).pop()) - - if hasattr(module, 'parse_path_to_folder'): - output_folder = os.path.join(self.parsed_data_folder, case_id) - os.makedirs(output_folder, exist_ok=True) - result = module.parse_path_to_folder(path=extracted_files_path, output_folder=output_folder) - print(f'Execution finished, output saved in: {output_folder}', file=sys.stderr) - else: # if the module cannot (yet) save directly to a folder, we wrap around by doing it ourselves - # parsers that output in the result variable - # building command - result = module.parse_path(path=extracted_files_path) - # saving the parser output - output_file = os.path.join(self.parsed_data_folder, case_id, f"{parser}.json") - with open(output_file, 'w') as data_file: - data_file.write(json.dumps(result, indent=4, ensure_ascii=False)) - print(f'Execution finished, output saved in: {output_file}', file=sys.stderr) + module = importlib.import_module(f'parsers.{parser}') + parser_instance = None + # figure out the class name and create an instance of it + for attr in dir(module): + obj = getattr(module, attr) + if isinstance(obj, type) and issubclass(obj, BaseParserInterface) and obj is not BaseParserInterface: + parser_instance: BaseParserInterface = obj(config=self.config, case_id=case_id) + break + if not parser_instance: + raise NotImplementedError(f"Parser '{parser}' does not exist or has problems") + + parser_instance.save_result(force=True) # force parsing return 0 def analyse(self, analyser: str, case_id: str): - # Load parser module - spec = importlib.util.spec_from_file_location(analyser, os.path.join(self.analysers_folder, analyser + '.py')) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - - # building command - parse_data_path = os.path.join(self.parsed_data_folder, case_id) - if not os.path.isdir(parse_data_path): - print(f"Case {case_id} does not exist", file=sys.stderr) - return -1 - output_file = os.path.join(self.parsed_data_folder, case_id, analyser + "." + module.analyser_format) - module.analyse_path(case_folder=parse_data_path, output_file=output_file) - print(f'Execution success, output saved in: {output_file}', file=sys.stderr) + module = importlib.import_module(f'analysers.{analyser}') + analyser_instance = None + for attr in dir(module): + obj = getattr(module, attr) + if isinstance(obj, type) and issubclass(obj, BaseAnalyserInterface) and obj is not BaseAnalyserInterface: + analyser_instance: BaseAnalyserInterface = obj(config=self.config, case_id=case_id) + break + if not analyser_instance: + raise NotImplementedError(f"Analyser '{analyser}' does not exist or has problems") + + analyser_instance.save_result(force=True) # force parsing return 0 @@ -432,7 +404,7 @@ def is_valid_case_id(self, case_id): def is_valid_parser_name(self, name): if name == '__init__': return False - fname = os.path.join(self.parsers_folder, f'{name}.py') + fname = os.path.join(self.config.parsers_folder, f'{name}.py') if os.path.isfile(fname): try: spec = importlib.util.spec_from_file_location(name, fname) @@ -446,7 +418,7 @@ def is_valid_parser_name(self, name): def is_valid_analyser_name(self, name): if name == '__init__': return False - fname = os.path.join(self.analysers_folder, f'{name}.py') + fname = os.path.join(self.config.analysers_folder, f'{name}.py') if os.path.isfile(fname): try: spec = importlib.util.spec_from_file_location(name, fname) @@ -458,38 +430,46 @@ def is_valid_analyser_name(self, name): return False def get_parsers(self) -> dict: - modules = glob.glob(os.path.join(self.parsers_folder, '*.py')) - parsers = {} - for parser in modules: - if parser.endswith('__init__.py'): + modules = glob.glob(os.path.join(self.config.parsers_folder, '*.py')) + results = {} + for item in modules: + if item.endswith('__init__.py'): continue try: - name = parser[len(self.parsers_folder) + 1:-3] - spec = importlib.util.spec_from_file_location(name, parser) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - parsers[name] = module.parser_description + name = os.path.splitext(os.path.basename(item))[0] + module = importlib.import_module(f'parsers.{name}') + # figure out the class name + for attr in dir(module): + obj = getattr(module, attr) + if isinstance(obj, type) and issubclass(obj, BaseParserInterface) and obj is not BaseParserInterface: + results[name] = obj.description + break except AttributeError: continue - parsers = dict(sorted(parsers.items())) - return parsers + + results = dict(sorted(results.items())) + return results def get_analysers(self) -> dict: - modules = glob.glob(os.path.join(self.analysers_folder, '*.py')) - analysers = {} - for parser in modules: - if parser.endswith('__init__.py'): + modules = glob.glob(os.path.join(self.config.analysers_folder, '*.py')) + results = {} + for item in modules: + if item.endswith('__init__.py'): continue try: - name = parser[len(self.analysers_folder) + 1:-3] - spec = importlib.util.spec_from_file_location(name, parser) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - analysers[name] = module.analyser_description + name = os.path.splitext(os.path.basename(item))[0] + module = importlib.import_module(f'analysers.{name}') + # figure out the class name + for attr in dir(module): + obj = getattr(module, attr) + if isinstance(obj, type) and issubclass(obj, BaseAnalyserInterface) and obj is not BaseAnalyserInterface: + results[name] = obj.description + break except AttributeError: continue - analysers = dict(sorted(analysers.items())) - return analysers + + results = dict(sorted(results.items())) + return results def print_parsers_list(self) -> None: lines = [['all', 'Run all parsers']] diff --git a/tests/__init__.py b/tests/__init__.py index da00c62..4ce2fa1 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,14 +1,56 @@ import unittest import os import glob +from sysdiagnose import Sysdiagnose class SysdiagnoseTestCase(unittest.TestCase): + @classmethod + def setUpClass(cls): + # theoretically better to use tempfile.TemporaryDirectory(), but that would prevent re-use between test runs + # differently better, and reused over multiple test runs + # if we'd autogenerated tempdirs we'd also have to figure out a system to pass on the name to the instantiated classes. (singleton?) + cls.tmp_folder = '/tmp/sysdiagnose' + os.makedirs(cls.tmp_folder, exist_ok=True) + + cases_path = os.path.join(cls.tmp_folder, 'cases') + cls.sd = Sysdiagnose(cases_path=cases_path) + + # if cases.json does not exist, create cases from testdata, + # otherwise assume cases are already created and extracted + if os.path.exists(os.path.join(cases_path, 'cases.json')): + print("Cases already exist, skipping creation") + else: + # find all folders such as + # - testdata/iOSxx/sysdiagnose_YYYY.MM.DD_HH-MM-SS-SSSS_....tar.gz + # - testdata-private/iOSxx/sysdiagnose_YYYY.MM.DD_HH-MM-SS-SSSS_....tar.gz + # this allows testing locally with more data, while keeping online tests coherent + sd_archive_files = [name for name in glob.glob('tests/testdata*/**/*.tar.gz', recursive=True)] + for archive_file in sd_archive_files: + print(f"Creating case from {archive_file}") + try: + cls.sd.create_case(archive_file) + except ValueError as ve: + # ignore errors as we know we may have multiple times the same case + print(f"Error: {ve}") + def setUp(self): - # find all folders such as - # - testdata/iOSxx/sysdiagnose_YYYY.MM.DD_HH-MM-SS-SSSS_... - # - testdata-private/iOSxx/sysdiagnose_YYYY.MM.DD_HH-MM-SS-SSSS_... - # this allows testing locally with more data, while keeping online tests coherent + self.tmp_folder = '/tmp/sysdiagnose' + cases_path = os.path.join(self.tmp_folder, 'cases') + self.sd = Sysdiagnose(cases_path=cases_path) + + def get_parsers(self): + """ + List all parsers in the parsers folder. + We can't use the self.sd.get_parsers() method as that would discard broken parsers. + """ + results = [] + modules = glob.glob(os.path.join(self.sd.config.parsers_folder, "*.py")) + for item in modules: + if item.endswith('__init__.py'): + continue + results.append(os.path.splitext(os.path.basename(item))[0]) - self.log_root_paths = [name for name in glob.glob('tests/testdata*/**/sysdiagnose_*', recursive=True) if os.path.isdir(name)] + results.sort() + return results diff --git a/tests/test_analysers_apps.py b/tests/test_analysers_apps.py index 6299297..b463e02 100644 --- a/tests/test_analysers_apps.py +++ b/tests/test_analysers_apps.py @@ -1,27 +1,21 @@ -from analysers.apps import analyse_path -from parsers import accessibility_tcc, brctl, itunesstore, logarchive +from analysers.apps import AppsAnalyser from tests import SysdiagnoseTestCase import unittest import os -import tempfile class TestAnalysersApps(SysdiagnoseTestCase): def test_analyse_apps(self): - for log_root_path in self.log_root_paths: + for case_id, case in self.sd.cases().items(): + # run the analyser + a = AppsAnalyser(self.sd.config, case_id=case_id) + a.save_result(force=True) + self.assertTrue(os.path.isfile(a.output_file)) + self.assertTrue(os.path.getsize(a.output_file) > 0) - with tempfile.TemporaryDirectory() as tmp_outpath: - # first run the parsers - accessibility_tcc.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - brctl.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - itunesstore.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - logarchive.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - - # then run the analyser - output_file = os.path.join(tmp_outpath, 'apps.json') - analyse_path(case_folder=tmp_outpath, output_file=output_file) - self.assertTrue(os.path.isfile(output_file)) + result = a.get_result() + self.assertGreater(len(result), 0) if __name__ == '__main__': diff --git a/tests/test_analysers_ps_everywhere.py b/tests/test_analysers_ps_everywhere.py new file mode 100644 index 0000000..07ad2c8 --- /dev/null +++ b/tests/test_analysers_ps_everywhere.py @@ -0,0 +1,23 @@ +from analysers.ps_everywhere import PsEverywhereAnalyser +from tests import SysdiagnoseTestCase +import unittest +import os + + +class TestAnalysersPsEverywhere(SysdiagnoseTestCase): + + def test_analyse_ps_everywhere(self): + for case_id, case in self.sd.cases().items(): + print(f"Running PsEverywhereAnalyser for {case_id}") + # run the analyser + a = PsEverywhereAnalyser(self.sd.config, case_id=case_id) + a.save_result(force=True) + self.assertTrue(os.path.isfile(a.output_file)) + self.assertTrue(os.path.getsize(a.output_file) > 0) + + result = a.get_result() + self.assertGreater(len(result), 0) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_analysers_ps_matrix.py b/tests/test_analysers_ps_matrix.py new file mode 100644 index 0000000..5a1f20a --- /dev/null +++ b/tests/test_analysers_ps_matrix.py @@ -0,0 +1,23 @@ +from analysers.ps_matrix import PsMatrixAnalyser +from tests import SysdiagnoseTestCase +import unittest +import os + + +class TestAnalysersPsMatrix(SysdiagnoseTestCase): + + def test_analyse_ps_matrix(self): + for case_id, case in self.sd.cases().items(): + print(f"Running PsMatrix for {case_id}") + # run the analyser + a = PsMatrixAnalyser(self.sd.config, case_id=case_id) + a.save_result(force=True) + self.assertTrue(os.path.isfile(a.output_file)) + self.assertTrue(os.path.getsize(a.output_file) > 0) + + result = a.get_result() + self.assertGreater(len(result), 0) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_analysers_timeliner.py b/tests/test_analysers_timeliner.py index fe609ad..ceda167 100644 --- a/tests/test_analysers_timeliner.py +++ b/tests/test_analysers_timeliner.py @@ -1,32 +1,19 @@ -from analysers.timeliner import analyse_path -from parsers import accessibility_tcc, logarchive, mobileactivation, powerlogs, swcutil, shutdownlogs, wifisecurity, wifi_known_networks +from analysers.timeliner import TimelinerAnalyser from tests import SysdiagnoseTestCase import unittest import os -import tempfile class TestAnalysersTimeliner(SysdiagnoseTestCase): def test_analyse_timeliner(self): - for log_root_path in self.log_root_paths: + for case_id, case in self.sd.cases().items(): + print(f"Running Timeliner for {case_id}") + a = TimelinerAnalyser(self.sd.config, case_id=case_id) + a.save_result(force=True) - with tempfile.TemporaryDirectory() as tmp_outpath: - # first run the parsers - accessibility_tcc.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - logarchive.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - mobileactivation.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - powerlogs.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - shutdownlogs.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - swcutil.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - wifi_known_networks.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - wifisecurity.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - - # then run the analyser - output_file = os.path.join(tmp_outpath, 'timeliner.jsonl') - analyse_path(case_folder=tmp_outpath, output_file=output_file) - self.assertTrue(os.path.isfile(output_file)) - self.assertGreater(os.path.getsize(output_file), 0) + self.assertTrue(os.path.isfile(a.output_file)) + self.assertTrue(os.path.getsize(a.output_file) > 0) if __name__ == '__main__': diff --git a/tests/test_analysers_wifi_geolocation.py b/tests/test_analysers_wifi_geolocation.py index 275c1fe..424590c 100644 --- a/tests/test_analysers_wifi_geolocation.py +++ b/tests/test_analysers_wifi_geolocation.py @@ -1,29 +1,20 @@ -from analysers.wifi_geolocation import analyse_path -from parsers import wifi_known_networks +from analysers.wifi_geolocation import WifiGeolocationAnalyser from tests import SysdiagnoseTestCase import unittest import os -import tempfile -import re class TestAnalysersWifiGeolocation(SysdiagnoseTestCase): def test_analyse_wifi_geolocation(self): - for log_root_path in self.log_root_paths: - files = wifi_known_networks.get_log_files(log_root_path) - self.assertTrue(len(files) > 0) - with tempfile.TemporaryDirectory() as tmp_outpath: - wifi_known_networks.parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - output_file = os.path.join(tmp_outpath, 'wifi_geolocation.gpx') - analyse_path(case_folder=tmp_outpath, output_file=output_file) - self.assertTrue(os.path.isfile(output_file)) - # check if destination file contain Latitude info if source did - with open(os.path.join(tmp_outpath, 'wifi_known_networks.json'), 'r') as f_in: - if 'Latitude' in f_in.read(): - with open(output_file, 'r') as f_out: - f_data = f_out.read() - self.assertTrue(re.search(r'lat="[0-9]+\.[0-9]', f_data, flags=re.MULTILINE)) + for case_id, case in self.sd.cases().items(): + a = WifiGeolocationAnalyser(self.sd.config, case_id=case_id) + a.save_result(force=True) + + self.assertTrue(os.path.isfile(a.output_file)) + self.assertTrue(os.path.getsize(a.output_file) > 0) + + # FIXME check for something else within the file... if __name__ == '__main__': diff --git a/tests/test_analysers_wifi_geolocation_kml.py b/tests/test_analysers_wifi_geolocation_kml.py index dec969e..dd67b94 100644 --- a/tests/test_analysers_wifi_geolocation_kml.py +++ b/tests/test_analysers_wifi_geolocation_kml.py @@ -1,23 +1,20 @@ -from analysers.wifi_geolocation_kml import analyse_path -from parsers.wifi_known_networks import parse_path_to_folder, get_log_files +from analysers.wifi_geolocation_kml import WifiGeolocationKmlAnalyser from tests import SysdiagnoseTestCase import unittest import os -import tempfile class TestAnalysersWifiGeolocationKml(SysdiagnoseTestCase): def test_analyse_wifi_geolocation_kml(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - self.assertTrue(len(files) > 0) - with tempfile.TemporaryDirectory() as tmp_outpath: - parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - output_file = os.path.join(tmp_outpath, 'wifi_geolocation.kml') - analyse_path(case_folder=tmp_outpath, output_file=output_file) - self.assertTrue(os.path.isfile(output_file)) - # FIXME check for something else within the file... + for case_id, case in self.sd.cases().items(): + a = WifiGeolocationKmlAnalyser(self.sd.config, case_id=case_id) + a.save_result(force=True) + + self.assertTrue(os.path.isfile(a.output_file)) + self.assertTrue(os.path.getsize(a.output_file) > 0) + + # FIXME check for something else within the file... if __name__ == '__main__': diff --git a/tests/test_analysers_yarascan.py b/tests/test_analysers_yarascan.py new file mode 100644 index 0000000..91f290c --- /dev/null +++ b/tests/test_analysers_yarascan.py @@ -0,0 +1,26 @@ +from analysers.yarascan import YaraAnalyser +from tests import SysdiagnoseTestCase +import unittest +import os + + +class TestAnalysersYarascan(SysdiagnoseTestCase): + + def test_analyse_yarascan(self): + # FIXME we need to first create a yara rule + + + for case_id, case in self.sd.cases().items(): + print(f"Running Yarascan for {case_id}") + # run the analyser + a = YaraAnalyser(self.sd.config, case_id=case_id) + a.save_result(force=True) + self.assertTrue(os.path.isfile(a.output_file)) + self.assertTrue(os.path.getsize(a.output_file) > 0) + + result = a.get_result() + self.assertGreater(len(result), 0) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 5b236a0..8078f96 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -1,9 +1,7 @@ from tests import SysdiagnoseTestCase import unittest -import glob -import os import importlib.util -import sysdiagnose +from utils.base import BaseParserInterface ''' Test file structure of parsers @@ -12,30 +10,37 @@ class TestParsers(SysdiagnoseTestCase): - def list_all_parsers(self): - sd = sysdiagnose.Sysdiagnose() - modules = glob.glob(os.path.join(sd.parsers_folder, "*.py")) - for parser in modules: - if parser.endswith('__init__.py'): - continue - yield parser - def test_parsers_filestructure(self): - required_functions = ['get_log_files', 'parse_path'] # TODO add parse_path_to_folder(path: str, output_folder: str) -> bool: - required_variables = ['parser_description'] - - parsers = self.list_all_parsers() - for parser in parsers: - parser_fname = os.path.basename(parser) - spec = importlib.util.spec_from_file_location(parser_fname[:-3], parser) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) + required_functions = ['execute'] + required_variables = ['description'] + + print("Checking parsers for required functions and variables...") + for parser_name in self.get_parsers(): + print(f"- {parser_name}") + + module = importlib.import_module(f'parsers.{parser_name}') + # spec = importlib.util.spec_from_file_location(parser_fname[:-3], parser) + # module = importlib.util.module_from_spec(spec) + # spec.loader.exec_module(module) + + # figure out the class name + obj = None + obj_instance = None + for attr in dir(module): + obj = getattr(module, attr) + if isinstance(obj, type) and issubclass(obj, BaseParserInterface) and obj is not BaseParserInterface: + obj_instance: BaseParserInterface = obj(config=self.sd.config, case_id='1') + break + + self.assertIsNotNone(obj_instance, f'Parser {parser_name} is missing a class definition inheriting BaseParserInterface.') + # ensure the module_filename is correct, and not from a parent class + self.assertEqual(obj_instance.module_name, parser_name, f'Parser {parser_name} has incorrect module_filename. Did you add the following?\n def __init__(self, config: dict, case_id: str):\n super().__init__(__file__, config, case_id)') + + # check for required functions and variables for required_function in required_functions: - self.assertTrue(hasattr(module, required_function), f'Parser {parser_fname} is missing {required_function} function.') + self.assertTrue(hasattr(obj, required_function), f'Parser {parser_name} is missing {required_function} function.') for required_variable in required_variables: - self.assertTrue(hasattr(module, required_variable), f'Parser {parser_fname} is missing {required_variable} variable.') - print(parser) - pass + self.assertTrue(hasattr(obj, required_variable), f'Parser {parser_name} is missing {required_variable} variable.') if __name__ == '__main__': diff --git a/tests/test_parsers_accessibility_tcc.py b/tests/test_parsers_accessibility_tcc.py index a549b04..1c2fcf5 100644 --- a/tests/test_parsers_accessibility_tcc.py +++ b/tests/test_parsers_accessibility_tcc.py @@ -1,16 +1,21 @@ -from parsers.accessibility_tcc import parse_path, get_log_files +from parsers.accessibility_tcc import AccessibilityTccParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersAccessibilityTcc(SysdiagnoseTestCase): def test_get_accessibility_tcc(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - print(f'Parsing {files}') - self.assertEqual(len(files), 1) - result = parse_path(log_root_path) + for case_id, case in self.sd.cases().items(): + p = AccessibilityTccParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertTrue('admin' in result) self.assertTrue('policies' in result) self.assertTrue('active_policy' in result) diff --git a/tests/test_parsers_appinstallation.py b/tests/test_parsers_appinstallation.py index 8c32466..07ca2d3 100644 --- a/tests/test_parsers_appinstallation.py +++ b/tests/test_parsers_appinstallation.py @@ -1,18 +1,23 @@ -from parsers.appinstallation import parse_path, get_log_files +from parsers.appinstallation import AppInstallationParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersAppinstallation(SysdiagnoseTestCase): def test_get_appinstallation(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - print(f'Parsing {files}') + for case_id, case in self.sd.cases().items(): + p = AppInstallationParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + if not files: continue - self.assertEqual(len(files), 1) - result = parse_path(log_root_path) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertTrue('application' in result) self.assertTrue('asset' in result) self.assertTrue('client' in result) diff --git a/tests/test_parsers_brctl.py b/tests/test_parsers_brctl.py index 01381c6..1a3ff17 100644 --- a/tests/test_parsers_brctl.py +++ b/tests/test_parsers_brctl.py @@ -1,16 +1,22 @@ -from parsers.brctl import parse_path, get_log_files +from parsers.brctl import BrctlParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersBrctl(SysdiagnoseTestCase): def test_parsebrctl(self): - for log_root_path in self.log_root_paths: - folders = get_log_files(log_root_path) + for case_id, case in self.sd.cases().items(): + p = BrctlParser(self.sd.config, case_id=case_id) + folders = p.get_log_files() self.assertEqual(len(folders), 1) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + print(f'Parsing {folders}') - result = parse_path(log_root_path) + result = p.get_result() if result: self.assertTrue('containers' in result) self.assertTrue('boot_history' in result) diff --git a/tests/test_parsers_containermanager.py b/tests/test_parsers_containermanager.py index c5278fa..eb6efcd 100644 --- a/tests/test_parsers_containermanager.py +++ b/tests/test_parsers_containermanager.py @@ -1,15 +1,21 @@ -from parsers.containermanager import parse_path, get_log_files +from parsers.containermanager import ContainerManagerParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersContainermanager(SysdiagnoseTestCase): def test_parsecontainermanager(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - print(f'Parsing {files}') - result = parse_path(log_root_path) + for case_id, case in self.sd.cases().items(): + p = ContainerManagerParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() for item in result: self.assertTrue('timestamp' in item) self.assertTrue('loglevel' in item) diff --git a/tests/test_parsers_itunesstore.py b/tests/test_parsers_itunesstore.py index 42a184f..1836069 100644 --- a/tests/test_parsers_itunesstore.py +++ b/tests/test_parsers_itunesstore.py @@ -1,18 +1,24 @@ -from parsers.itunesstore import parse_path, get_log_files +from parsers.itunesstore import iTunesStoreParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersIntunesstore(SysdiagnoseTestCase): def test_get_itunesstore(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) + for case_id, case in self.sd.cases().items(): + p = iTunesStoreParser(self.sd.config, case_id=case_id) + files = p.get_log_files() if not files: continue + self.assertEqual(len(files), 1) - print(f'Parsing {files}') - result = parse_path(log_root_path) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertTrue('application_id' in result) self.assertTrue('download' in result) self.assertTrue('persistent_manager' in result) diff --git a/tests/test_parsers_logarchive.py b/tests/test_parsers_logarchive.py index c04fdae..aa05151 100644 --- a/tests/test_parsers_logarchive.py +++ b/tests/test_parsers_logarchive.py @@ -1,56 +1,45 @@ -from parsers.logarchive import get_log_files, parse_path, parse_path_to_folder, convert_entry_to_unifiedlog_format, convert_unifiedlog_time_to_datetime, convert_native_time_to_unifiedlog_format +from parsers.logarchive import LogarchiveParser from tests import SysdiagnoseTestCase import os -import tempfile import unittest import json class TestParsersLogarchive(SysdiagnoseTestCase): - def test_get_logs_outputdir(self): - for log_root_path in self.log_root_paths: - folders = get_log_files(log_root_path) + def test_parse_logarchive(self): + for case_id, case in self.sd.cases().items(): + print(f'Parsing logarchive for {case_id}') + p = LogarchiveParser(self.sd.config, case_id=case_id) - with tempfile.TemporaryDirectory() as tmp_outpath: - print(f'Parsing {folders} to {tmp_outpath}') - result = parse_path_to_folder(log_root_path, output_folder=tmp_outpath) - # check if folder is not empty - self.assertNotEqual(os.listdir(tmp_outpath), []) - # result should contain at least one entry (linux = stdout, mac = mention it's saved to a file) - self.assertTrue(result) + files = p.get_log_files() + self.assertTrue(len(files) > 0) - self.assertTrue(os.path.isfile(os.path.join(tmp_outpath, 'logarchive.json'))) - with open(os.path.join(tmp_outpath, 'logarchive.json'), 'r') as f: - line = f.readline() - json_data = json.loads(line) - self.assertTrue('subsystem' in json_data) - self.assertTrue('datetime' in json_data) + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) - def test_get_logs_result(self): - for log_root_path in self.log_root_paths: - folders = get_log_files(log_root_path) - print(f'Parsing {folders}') - result = parse_path(log_root_path) - self.assertGreater(len(result), 0) - self.assertTrue('subsystem' in result[0]) - self.assertTrue('datetime' in result[0]) + # we don't test getting result in memory, but check one line in the output. + with open(p.output_file, 'r') as f: + line = f.readline() + json_data = json.loads(line) + self.assertTrue('subsystem' in json_data) + self.assertTrue('datetime' in json_data) def test_convert_native_time_to_unifiedlog(self): input = '2023-05-24 13:03:28.908085-0700' expected_output = 1684958608908084992 - result = convert_native_time_to_unifiedlog_format(input) + result = LogarchiveParser.convert_native_time_to_unifiedlog_format(input) self.assertEqual(result, expected_output) input = '2023-05-24 20:03:28.908085-0000' expected_output = 1684958608908084992 - result = convert_native_time_to_unifiedlog_format(input) + result = LogarchiveParser.convert_native_time_to_unifiedlog_format(input) self.assertEqual(result, expected_output) def test_convert_unifiedlog_time_to_datetime(self): input = 1684958608908085200 expected_output = '2023-05-24T20:03:28.908085+00:00' - result = convert_unifiedlog_time_to_datetime(input).isoformat() + result = LogarchiveParser.convert_unifiedlog_time_to_datetime(input).isoformat() self.assertEqual(result, expected_output) def test_convert_entry_to_un(self): @@ -112,7 +101,7 @@ def test_convert_entry_to_un(self): 'parentActivityIdentifier': 0, 'datetime': '2023-05-24 13:03:28.908085-0700' } - result = convert_entry_to_unifiedlog_format(input) + result = LogarchiveParser.convert_entry_to_unifiedlog_format(input) self.assertDictEqual(result, expected_output) diff --git a/tests/test_parsers_mobileactivation.py b/tests/test_parsers_mobileactivation.py index 20ce8b1..d399d7d 100644 --- a/tests/test_parsers_mobileactivation.py +++ b/tests/test_parsers_mobileactivation.py @@ -1,15 +1,21 @@ -from parsers.mobileactivation import parse_path, get_log_files +from parsers.mobileactivation import MobileActivationParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersMobileactivation(SysdiagnoseTestCase): def test_mobileactivation(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - print(f'Parsing {files}') - result = parse_path(log_root_path) + for case_id, case in self.sd.cases().items(): + p = MobileActivationParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() for item in result: self.assertTrue('timestamp' in item) self.assertTrue('loglevel' in item) diff --git a/tests/test_parsers_mobileinstallation.py b/tests/test_parsers_mobileinstallation.py index 837afb5..a7c1b7b 100644 --- a/tests/test_parsers_mobileinstallation.py +++ b/tests/test_parsers_mobileinstallation.py @@ -1,15 +1,21 @@ -from parsers.mobileinstallation import parse_path, get_log_files +from parsers.mobileinstallation import MobileInstallationParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersMobileinstallation(SysdiagnoseTestCase): def test_mobileinstallation(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - print(f'Parsing {files}') - result = parse_path(log_root_path) + for case_id, case in self.sd.cases().items(): + p = MobileInstallationParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() for item in result: self.assertTrue('timestamp' in item) self.assertTrue('loglevel' in item) diff --git a/tests/test_parsers_networkextension.py b/tests/test_parsers_networkextension.py index 5f2830f..f95c6aa 100644 --- a/tests/test_parsers_networkextension.py +++ b/tests/test_parsers_networkextension.py @@ -1,15 +1,22 @@ -from parsers.networkextension import parse_path, get_log_files +from parsers.networkextension import NetworkExtensionParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersNetworkExtension(SysdiagnoseTestCase): def test_networkextension(self): - for log_root_path in self.log_root_paths: - files = [log_file for log_file in get_log_files(log_root_path)] - print(f'Parsing {files}') - result = parse_path(log_root_path) + for case_id, case in self.sd.cases().items(): + p = NetworkExtensionParser(self.sd.config, case_id=case_id) + + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() # TODO below needs to be changed if https://github.com/ydkhatri/nska_deserialize/pull/3 is merged # self.assertTrue('Version' in result) seen = False diff --git a/tests/test_parsers_networkextensioncache.py b/tests/test_parsers_networkextensioncache.py index 6d94e38..b7fb0be 100644 --- a/tests/test_parsers_networkextensioncache.py +++ b/tests/test_parsers_networkextensioncache.py @@ -1,17 +1,22 @@ -from parsers.networkextensioncache import parse_path, get_log_files +from parsers.networkextensioncache import NetworkExtensionCacheParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersNetworkExtensionCache(SysdiagnoseTestCase): def test_networkextensioncache(self): - for log_root_path in self.log_root_paths: - files = [log_file for log_file in get_log_files(log_root_path)] + for case_id, case in self.sd.cases().items(): + p = NetworkExtensionCacheParser(self.sd.config, case_id=case_id) + files = p.get_log_files() if not files: continue - print(f'Parsing {files}') - result = parse_path(log_root_path) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertGreater(len(result), 0) diff --git a/tests/test_parsers_olddsc.py b/tests/test_parsers_olddsc.py index 1a81929..8b43ca2 100644 --- a/tests/test_parsers_olddsc.py +++ b/tests/test_parsers_olddsc.py @@ -1,15 +1,21 @@ -from parsers.olddsc import parse_path, get_log_files +from parsers.olddsc import OldDscParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersOlddsc(SysdiagnoseTestCase): def test_parse_olddsc_file(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - print(f'Parsing {files}') - result = parse_path(log_root_path) + for case_id, case in self.sd.cases().items(): + p = OldDscParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertTrue('Unslid_Base_Address' in result) self.assertTrue('Cache_UUID_String' in result) self.assertTrue('Binaries' in result) diff --git a/tests/test_parsers_plist.py b/tests/test_parsers_plist.py index 522570a..33f4291 100644 --- a/tests/test_parsers_plist.py +++ b/tests/test_parsers_plist.py @@ -1,16 +1,23 @@ -from parsers.plists import parse_path, get_log_files +from parsers.plists import PlistParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersPlist(SysdiagnoseTestCase): def test_get_plists(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - self.assertGreater(len(files), 0) - print(f'Parsing {files}') - result = parse_path(log_root_path) + for case_id, case in self.sd.cases().items(): + p = PlistParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + # first run to store in memory + result = p.get_result() + + p.save_result(force=True) + self.assertTrue(os.path.isdir(p.output_folder)) + self.assertGreater(len(result), 0) print(result.keys()) self.assertIn('hidutil.plist', result.keys()) diff --git a/tests/test_parsers_powerlogs.py b/tests/test_parsers_powerlogs.py index 558e01c..19fd0a4 100644 --- a/tests/test_parsers_powerlogs.py +++ b/tests/test_parsers_powerlogs.py @@ -1,15 +1,21 @@ -from parsers.powerlogs import parse_path, get_log_files +from parsers.powerlogs import PowerLogsParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersPowerlogs(SysdiagnoseTestCase): def test_get_powerlogs(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - print(f'Parsing {files}') - result = parse_path(log_root_path) + for case_id, case in self.sd.cases().items(): + p = PowerLogsParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() if result: # some files are empty self.assertTrue('sqlite_sequence' in result) self.assertIsInstance(result['sqlite_sequence'], list) diff --git a/tests/test_parsers_ps.py b/tests/test_parsers_ps.py index 0f44656..4ca5a47 100644 --- a/tests/test_parsers_ps.py +++ b/tests/test_parsers_ps.py @@ -1,19 +1,25 @@ -from parsers import ps +from parsers.ps import PsParser from tests import SysdiagnoseTestCase import unittest import tempfile +import os class TestParsersPs(SysdiagnoseTestCase): def test_parse_ps(self): - for log_root_path in self.log_root_paths: - files = ps.get_log_files(log_root_path) + for case_id, case in self.sd.cases().items(): + + p = PsParser(self.sd.config, case_id=case_id) + + files = p.get_log_files() self.assertTrue(len(files) > 0) - print(f'Parsing {files}') - result = ps.parse_path(log_root_path) - if result: # not all logs contain data - for item in result: + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + if p.get_result(): # not all logs contain data + for item in p.get_result(): self.assertTrue('COMMAND' in item) self.assertTrue('PID' in item) self.assertTrue('USER' in item) @@ -29,7 +35,7 @@ def test_parse_ps_lower_than_v16(self): tmp_inputfile = tempfile.NamedTemporaryFile() with open(tmp_inputfile.name, 'w') as f: f.write('\n'.join(input)) - result = ps.parse_ps(tmp_inputfile.name) + result = PsParser.parse_file(tmp_inputfile.name) tmp_inputfile.close() self.assertEqual(result, expected_result) @@ -44,7 +50,7 @@ def test_parse_ps_newer_than_v16(self): tmp_inputfile = tempfile.NamedTemporaryFile() with open(tmp_inputfile.name, 'w') as f: f.write('\n'.join(input)) - result = ps.parse_ps(tmp_inputfile.name) + result = PsParser.parse_file(tmp_inputfile.name) tmp_inputfile.close() self.assertEqual(result, expected_result) @@ -61,7 +67,7 @@ def test_ps_exclude_known_goods(self): {'COMMAND': 'bad', 'PID': 2}, {'COMMAND': 'unknown', 'PID': 3} ] - result = ps.exclude_known_goods(processes, known_good) + result = PsParser.exclude_known_goods(processes, known_good) self.assertEqual(result, expected_result) diff --git a/tests/test_parsers_psthread.py b/tests/test_parsers_psthread.py index 1288b35..e0af889 100644 --- a/tests/test_parsers_psthread.py +++ b/tests/test_parsers_psthread.py @@ -1,16 +1,21 @@ -from parsers.psthread import parse_path, get_log_files +from parsers.psthread import PsThreadParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersPsthread(SysdiagnoseTestCase): def test_parse_psthread(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) + for case_id, case in self.sd.cases().items(): + p = PsThreadParser(self.sd.config, case_id=case_id) + files = p.get_log_files() self.assertTrue(len(files) > 0) - print(f'Parsing {files}') - result = parse_path(log_root_path) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() if result: # not all logs contain data for item in result: self.assertTrue('COMMAND' in item) diff --git a/tests/test_parsers_remotectl_dumpstate.py b/tests/test_parsers_remotectl_dumpstate.py index 3d35e9c..8f0cee5 100644 --- a/tests/test_parsers_remotectl_dumpstate.py +++ b/tests/test_parsers_remotectl_dumpstate.py @@ -1,16 +1,24 @@ -from parsers.remotectl_dumpstate import parse_path, get_log_files +from parsers.remotectl_dumpstate import RemotectlDumpstateParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersRemotectlDumpstate(SysdiagnoseTestCase): def test_get_remotectldumpstate(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - print(f'Parsing {files}') - parse_path(log_root_path) - # just test for no exceptions + for case_id, case in self.sd.cases().items(): + p = RemotectlDumpstateParser(self.sd.config, case_id=case_id) + + files = p.get_log_files() + self.assertEqual(len(files), 1) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() + if result: + self.assertTrue('Local device' in result) if __name__ == '__main__': diff --git a/tests/test_parsers_security_sysdiagnose.py b/tests/test_parsers_security_sysdiagnose.py index fa418a0..16087e2 100644 --- a/tests/test_parsers_security_sysdiagnose.py +++ b/tests/test_parsers_security_sysdiagnose.py @@ -1,16 +1,24 @@ -from parsers.security_sysdiagnose import parse_path, get_log_files, process_buffer_keychain_state, process_buffer_client, process_buffer_keys_and_values +from parsers.security_sysdiagnose import SecuritySysdiagnoseParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersSecuritySysdiagnose(SysdiagnoseTestCase): def test_get_security_sysdiagnose(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - print(f'Parsing {files}') - parse_path(log_root_path) - # just test for no exceptions + for case_id, case in self.sd.cases().items(): + p = SecuritySysdiagnoseParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertEqual(len(files), 1) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() + if result: + # test for no errors + self.assertEqual(result.get('errors'), []) def test_process_buffer_keychain_state(self): input = [ @@ -23,7 +31,7 @@ def test_process_buffer_keychain_state(self): ] } result = {} - process_buffer_keychain_state(input, result) + SecuritySysdiagnoseParser.process_buffer_keychain_state(input, result) self.maxDiff = None self.assertDictEqual(result, expected_output) @@ -42,7 +50,7 @@ def test_process_buffer_client(self): ] } result = {} - process_buffer_client(input, result) + SecuritySysdiagnoseParser.process_buffer_client(input, result) self.maxDiff = None self.assertDictEqual(result, expected_output) @@ -57,7 +65,7 @@ def test_process_buffer_keys_and_values(self): } } result = {} - process_buffer_keys_and_values(input, result) + SecuritySysdiagnoseParser.process_buffer_keys_and_values(input, result) self.maxDiff = None self.assertDictEqual(result, expected_output) diff --git a/tests/test_parsers_shutdownlogs.py b/tests/test_parsers_shutdownlogs.py index cbc1385..204f248 100644 --- a/tests/test_parsers_shutdownlogs.py +++ b/tests/test_parsers_shutdownlogs.py @@ -1,16 +1,21 @@ -from parsers.shutdownlogs import parse_path, get_log_files +from parsers.shutdownlogs import ShutdownLogsParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersShutdownlogs(SysdiagnoseTestCase): def test_parse_shutdownlog(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) + for case_id, case in self.sd.cases().items(): + p = ShutdownLogsParser(self.sd.config, case_id=case_id) + files = p.get_log_files() self.assertTrue(len(files) > 0) - print(f'Parsing {files}') - result = parse_path(log_root_path) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertGreater(len(result), 0) for shutdown in result.values(): for process in shutdown: diff --git a/tests/test_parsers_spindumpnosymbols.py b/tests/test_parsers_spindumpnosymbols.py index b2f1c3a..5e203e9 100644 --- a/tests/test_parsers_spindumpnosymbols.py +++ b/tests/test_parsers_spindumpnosymbols.py @@ -1,16 +1,21 @@ -from parsers import spindumpnosymbols +from parsers.spindumpnosymbols import SpindumpNoSymbolsParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersSpindumpnosymbols(SysdiagnoseTestCase): def test_parsespindumpNS(self): - for log_root_path in self.log_root_paths: - files = spindumpnosymbols.get_log_files(log_root_path) + for case_id, case in self.sd.cases().items(): + p = SpindumpNoSymbolsParser(self.sd.config, case_id=case_id) + files = p.get_log_files() self.assertTrue(len(files) > 0) - print(f'Parsing {files}') - result = spindumpnosymbols.parse_path(log_root_path) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertGreater(len(result), 0) self.assertTrue('OS Version' in result) self.assertGreater(len(result['processes']), 0) @@ -24,7 +29,7 @@ def test_parse_basic(self): 'Report Version: 35.1', ] expected_result = {'Date/Time': '2023-05-24 13:29:15.759 -0700', 'End time': '2023-05-24 13:29:17.757 -0700', 'OS Version': 'iPhone OS 15.7.6 (Build 19H349)', 'Architecture': 'arm64', 'Report Version': '35.1'} - result = spindumpnosymbols.parse_basic(lines) + result = SpindumpNoSymbolsParser.parse_basic(lines) self.maxDiff = None self.assertDictEqual(expected_result, result) @@ -89,7 +94,7 @@ def test_parse_process(self): {'start': '0x1bb3f9000', 'end': '0x1bb42cfff', 'image': 'libsystem_kernel.dylib', 'UUID': 'D3BAC787-09EE-3319-BE24-4115817391E2', 'path': '/usr/lib/system/libsystem_kernel.dylib'} ] } - result = spindumpnosymbols.parse_process(lines) + result = SpindumpNoSymbolsParser.parse_process(lines) self.maxDiff = None self.assertDictEqual(expected_result, result) diff --git a/tests/test_parsers_swcutil.py b/tests/test_parsers_swcutil.py index 0192f4d..a8fd3d0 100644 --- a/tests/test_parsers_swcutil.py +++ b/tests/test_parsers_swcutil.py @@ -1,15 +1,20 @@ -from parsers.swcutil import parse_path, get_log_files +from parsers.swcutil import SwcutilParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersSwcutil(SysdiagnoseTestCase): def test_parseswcutil(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) + for case_id, case in self.sd.cases().items(): + p = SwcutilParser(self.sd.config, case_id=case_id) + files = p.get_log_files() self.assertTrue(len(files) > 0) - print(f'Parsing {files}') - result = parse_path(log_root_path) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertGreater(len(result), 0) self.assertTrue('headers' in result) self.assertTrue('network' in result) diff --git a/tests/test_parsers_sys.py b/tests/test_parsers_sys.py index 9b5d8ba..52819f3 100644 --- a/tests/test_parsers_sys.py +++ b/tests/test_parsers_sys.py @@ -1,17 +1,22 @@ -from parsers.sys import parse_path, get_log_files +from parsers.sys import SystemVersionParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersSys(SysdiagnoseTestCase): productinfo_keys = ['ProductName', 'ProductBuildVersion', 'ProductVersion', 'BuildID', 'SystemImageID'] def test_getProductInfo(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) + for case_id, case in self.sd.cases().items(): + p = SystemVersionParser(self.sd.config, case_id=case_id) + files = p.get_log_files() self.assertTrue(len(files) > 0) - print(f'Parsing {files}') - result = parse_path(log_root_path) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertGreater(len(result), 0) self.assertTrue(result.keys() | self.productinfo_keys == result.keys()) # check if the result contains at least the following keys diff --git a/tests/test_parsers_taskinfo.py b/tests/test_parsers_taskinfo.py index 888aa2f..ee844a6 100644 --- a/tests/test_parsers_taskinfo.py +++ b/tests/test_parsers_taskinfo.py @@ -1,16 +1,21 @@ -from parsers.taskinfo import parse_path, get_log_files +from parsers.taskinfo import TaskinfoParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersTaskinfo(SysdiagnoseTestCase): def test_get_tasks(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) + for case_id, case in self.sd.cases().items(): + p = TaskinfoParser(self.sd.config, case_id=case_id) + files = p.get_log_files() self.assertTrue(len(files) > 0) - print(f'Parsing {files}') - result = parse_path(log_root_path) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertGreater(len(result), 0) self.assertGreater(result['numb_tasks'], 0) self.assertGreater(len(result['tasks']), 0) diff --git a/tests/test_parsers_uuid2path.py b/tests/test_parsers_uuid2path.py index 8c24b6c..c23e6aa 100644 --- a/tests/test_parsers_uuid2path.py +++ b/tests/test_parsers_uuid2path.py @@ -1,17 +1,21 @@ from tests import SysdiagnoseTestCase -from parsers.uuid2path import parse_path, get_log_files +from parsers.uuid2path import UUID2PathParser import unittest +import os class TestParsersUuid2path(SysdiagnoseTestCase): def test_uuid2path(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - if not files: # not all sysdiagnose dumps have this log file + for case_id, case in self.sd.cases().items(): + p = UUID2PathParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + if not files: continue - print(f'Parsing {files}') - result = parse_path(log_root_path) + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertGreater(len(result), 0) diff --git a/tests/test_parsers_wifi_known_networks.py b/tests/test_parsers_wifi_known_networks.py index 3407e95..90c863c 100644 --- a/tests/test_parsers_wifi_known_networks.py +++ b/tests/test_parsers_wifi_known_networks.py @@ -1,16 +1,21 @@ -from parsers.wifi_known_networks import parse_path, get_log_files +from parsers.wifi_known_networks import WifiKnownNetworksParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersWifiKnownNetworks(SysdiagnoseTestCase): def test_getKnownWifiNetworks(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) + for case_id, case in self.sd.cases().items(): + p = WifiKnownNetworksParser(self.sd.config, case_id=case_id) + files = p.get_log_files() self.assertTrue(len(files) > 0) - print(f'Parsing {files}') - result = parse_path(log_root_path) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertGreater(len(result), 0) diff --git a/tests/test_parsers_wifinetworks.py b/tests/test_parsers_wifinetworks.py index 28ac24b..d16f548 100644 --- a/tests/test_parsers_wifinetworks.py +++ b/tests/test_parsers_wifinetworks.py @@ -1,17 +1,23 @@ -from parsers.wifinetworks import parse_path, get_log_files +from parsers.wifinetworks import WifiNetworksParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersWifiNetworks(SysdiagnoseTestCase): def test_parsewifinetwork(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - # self.assertTrue(len(files) > 0) - print(f'Parsing {files}') - parse_path(log_root_path) + for case_id, case in self.sd.cases().items(): + p = WifiNetworksParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() + self.assertTrue(len(result) > 0) # not sure what to assert here as there's not always a result # if result: # for key in result.keys(): diff --git a/tests/test_parsers_wifiscan.py b/tests/test_parsers_wifiscan.py index 9e880e2..d1e7311 100644 --- a/tests/test_parsers_wifiscan.py +++ b/tests/test_parsers_wifiscan.py @@ -1,16 +1,21 @@ -from parsers.wifiscan import parse_path, get_log_files +from parsers.wifiscan import WifiScanParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersWifiScan(SysdiagnoseTestCase): def test_parsewifiscan(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) - # self.assertTrue(len(files) > 0) # not all sysdiagnose have wifiscan logs - print(f'Parsing {files}') - result = parse_path(log_root_path) + for case_id, case in self.sd.cases().items(): + p = WifiScanParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() self.assertGreater(len(result), 0) self.assertTrue('total' in result[0]) diff --git a/tests/test_parsers_wifisecurity.py b/tests/test_parsers_wifisecurity.py index 7e6b556..d0b561a 100644 --- a/tests/test_parsers_wifisecurity.py +++ b/tests/test_parsers_wifisecurity.py @@ -1,16 +1,21 @@ -from parsers.wifisecurity import parse_path, get_log_files +from parsers.wifisecurity import WifiSecurityParser from tests import SysdiagnoseTestCase import unittest +import os class TestParsersWifiSecurity(SysdiagnoseTestCase): def test_get_wifi_security_log(self): - for log_root_path in self.log_root_paths: - files = get_log_files(log_root_path) + for case_id, case in self.sd.cases().items(): + p = WifiSecurityParser(self.sd.config, case_id=case_id) + files = p.get_log_files() self.assertTrue(len(files) > 0) - print(f'Parsing {files}') - result = parse_path(log_root_path) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() for item in result: self.assertTrue('acct' in item) self.assertTrue('agrp' in item) diff --git a/utils/base.py b/utils/base.py new file mode 100644 index 0000000..a982040 --- /dev/null +++ b/utils/base.py @@ -0,0 +1,159 @@ +from abc import ABC, abstractmethod +import os +import json +import sys +from pathlib import Path + + +class SysdiagnoseConfig: + def __init__(self, cases_path: str): + self.config_folder = str(Path(os.path.dirname(os.path.abspath(__file__))).parent) + self.parsers_folder = os.path.join(self.config_folder, "parsers") + self.analysers_folder = os.path.join(self.config_folder, "analysers") + + # case data is in current working directory by default + self.cases_root_folder = cases_path + + self.cases_file = os.path.join(self.cases_root_folder, "cases.json") + self.data_folder = os.path.join(self.cases_root_folder, "data") + self.parsed_data_folder = os.path.join(self.cases_root_folder, "parsed_data") # stay in current folder + + os.makedirs(self.cases_root_folder, exist_ok=True) + os.makedirs(self.data_folder, exist_ok=True) + os.makedirs(self.parsed_data_folder, exist_ok=True) + + def get_case_data_folder(self, case_id: str) -> str: + return os.path.join(self.data_folder, case_id) + + def get_case_parsed_data_folder(self, case_id: str) -> str: + return os.path.join(self.parsed_data_folder, case_id) + + +class BaseInterface(ABC): + + description = '' # implementation should set this + format = 'json' # implementation should set this + + def __init__(self, module_filename: str, config: SysdiagnoseConfig, case_id: str): + self.config = config + + self.module_name = os.path.basename(module_filename).split('.')[0] + self.case_id = case_id + self.case_data_folder = config.get_case_data_folder(case_id) + self.case_data_subfolder = next(os.scandir(self.case_data_folder)).path + self.case_parsed_data_folder = config.get_case_parsed_data_folder(case_id) + + if not os.path.isdir(self.case_data_folder): + print(f"Case {case_id} does not exist", file=sys.stderr) + raise FileNotFoundError(f"Case {case_id} does not exist") + + self.output_file = os.path.join(self.case_parsed_data_folder, self.module_name + '.' + self.format) + + self._result: dict | list = None # empty result set, used for caching + + def output_exists(self) -> bool: + """ + Checks if the output file or exists, which means the parser already ran. + + WARNING: You may need to overwrite this method if your parser saves multiple files. + + Returns: + bool: True if the output file exists, False otherwise. + """ + return os.path.exists(self.output_file) and os.path.getsize(self.output_file) > 0 + + def get_result(self, force: bool = False) -> list | dict: + """ + Retrieves the result of the parsing operation, and run the parsing if necessary. + Also ensures the result is saved to the output_file, and can be used as a cache. + + Args: + force (bool, optional): If True, forces the parsing operation even if the output cache or file exists. Defaults to False. + + Returns: + list | dict: The parsed result as a list or dictionary. + + Raises: + FileNotFoundError: If the output file does not exist and force is set to False. + + WARNING: You may need to overwrite this method if your parser saves multiple files. + """ + if force: + # force parsing + self._result = self.execute() + # content has changed, save it + self.save_result() + + if self._result is None: + if self.output_exists(): + # load existing output + with open(self.output_file, 'r') as f: + if self.format == 'json': + self._result = json.load(f) + elif self.format == 'jsonl': + self._result = [json.loads(line) for line in f] + else: + self._result = f.read() + else: + # output does not exist, and we don't have a result yet + self._result = self.execute() + # content has changed, save it + self.save_result() + + return self._result + + def save_result(self, force: bool = False, indent=None): + """ + Saves the result of the parsing operation to a file. + + Args: + force (bool, optional): If True, forces the parsing operation even if the output cache or file exists. Defaults to False. + + WARNING: You may need to overwrite this method if your parser saves multiple files. + """ + # save to file + with open(self.output_file, 'w') as f: + if self.format == 'json': + # json.dumps is MUCH faster than json.dump, but less efficient on memory level + # also no indent as that's terribly slow + f.write(json.dumps(self.get_result(force), ensure_ascii=False, indent=indent)) + elif self.format == 'jsonl': + for line in self.get_result(force): + f.write(json.dumps(line, ensure_ascii=False, indent=indent)) + f.write('\n') + else: + f.write(self.get_result(force)) + + @abstractmethod + def execute(self) -> list | dict: + """ + This method is responsible for executing the functionality of the class. + + Returns: + list | dict: The result of the execution. + """ + + # When implementing a parser, make sure you use the self.get_log_files() method to get the log files, + # and then process those files using the magic you have implemented. + pass + + +class BaseParserInterface(BaseInterface): + + def __init__(self, module_filename: str, config: SysdiagnoseConfig, case_id: str): + super().__init__(module_filename, config, case_id) + + @abstractmethod + def get_log_files(self) -> list: + """ + Retrieves the log files used by this parser. + + Returns: + list: A list of log files that exist. + """ + pass + + +class BaseAnalyserInterface(BaseInterface): + def __init__(self, module_filename: str, config: SysdiagnoseConfig, case_id: str): + super().__init__(module_filename, config, case_id) diff --git a/utils/tabbasedhierarchy.py b/utils/tabbasedhierarchy.py index c7bedea..0b97089 100644 --- a/utils/tabbasedhierarchy.py +++ b/utils/tabbasedhierarchy.py @@ -2,6 +2,7 @@ def parse_tab_based_hierarchal_file(path: str) -> list | dict: + result = {} with open(path, 'r') as f: lines = f.readlines() result = parse_block(lines) @@ -9,7 +10,7 @@ def parse_tab_based_hierarchal_file(path: str) -> list | dict: def parse_block(lines: list) -> list | dict: - result = None + result = {} n = 0 while n < len(lines): line = lines[n]