From b7cae5e94812725de6c228959a946325b5abaa0e Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Thu, 29 Aug 2024 15:55:31 +0200 Subject: [PATCH] new: [crashlogs] initial crashlogs parser + updated timeliner fixes #70 --- analysers/timeliner.py | 37 ++++++-- parsers/crashlogs.py | 159 +++++++++++++++++++++++++++++++- tests/test_parsers_crashlogs.py | 25 +++++ 3 files changed, 209 insertions(+), 12 deletions(-) create mode 100644 tests/test_parsers_crashlogs.py diff --git a/analysers/timeliner.py b/analysers/timeliner.py index 8df71af..8c4fd54 100644 --- a/analysers/timeliner.py +++ b/analysers/timeliner.py @@ -14,6 +14,7 @@ from parsers.shutdownlogs import ShutdownLogsParser from parsers.wifisecurity import WifiSecurityParser from parsers.wifi_known_networks import WifiKnownNetworksParser +from parsers.crashlogs import CrashLogsParser from collections.abc import Generator from utils.base import BaseAnalyserInterface @@ -30,7 +31,7 @@ def __init__(self, config: dict, case_id: str): # Mandatory: timestamps must be in microseconds !!! # {"message": "A message","timestamp": 123456789,"datetime": "2015-07-24T19:01:01+00:00","timestamp_desc": "Write time","extra_field_1": "foo"} - def __extract_ts_mobileactivation(self) -> Generator[dict, None, None]: + def a__extract_ts_mobileactivation(self) -> Generator[dict, None, None]: try: p = MobileActivationParser(self.config, self.case_id) data = p.get_result() @@ -51,7 +52,7 @@ def __extract_ts_mobileactivation(self) -> Generator[dict, None, None]: except Exception as e: print(f"ERROR while extracting timestamp from mobileactivation file. Reason: {str(e)}") - def __extract_ts_powerlogs(self) -> Generator[dict, None, None]: + def a__extract_ts_powerlogs(self) -> Generator[dict, None, None]: try: p = PowerLogsParser(self.config, self.case_id) data = p.get_result() @@ -65,7 +66,7 @@ def __extract_ts_powerlogs(self) -> Generator[dict, None, None]: except Exception as e: print(f"ERROR while extracting timestamp from powerlogs. Reason: {str(e)}") - def __extract_ts_swcutil(self) -> Generator[dict, None, None]: + def a__extract_ts_swcutil(self) -> Generator[dict, None, None]: try: p = SwcutilParser(self.config, self.case_id) data = p.get_result() @@ -88,7 +89,7 @@ def __extract_ts_swcutil(self) -> Generator[dict, None, None]: except Exception as e: print(f"ERROR while extracting timestamp from swcutil. Reason {str(e)}") - def __extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]: + def a__extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]: try: p = AccessibilityTccParser(self.config, self.case_id) data = p.get_result() @@ -107,7 +108,7 @@ def __extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]: except Exception as e: print(f"ERROR while extracting timestamp from accessibility_tcc. Reason {str(e)}") - def __extract_ts_shutdownlogs(self) -> Generator[dict, None, None]: + def a__extract_ts_shutdownlogs(self) -> Generator[dict, None, None]: try: p = ShutdownLogsParser(self.config, self.case_id) data = p.get_result() @@ -129,7 +130,7 @@ def __extract_ts_shutdownlogs(self) -> Generator[dict, None, None]: except Exception as e: print(f"ERROR while extracting timestamp from shutdownlog. Reason: {str(e)}") - def __extract_ts_logarchive(self) -> Generator[dict, None, None]: + def a__extract_ts_logarchive(self) -> Generator[dict, None, None]: try: p = LogarchiveParser(self.config, self.case_id) data = p.get_result() @@ -150,7 +151,7 @@ def __extract_ts_logarchive(self) -> Generator[dict, None, None]: except Exception as e: print(f"ERROR while extracting timestamp from logarchive. Reason: {str(e)}") - def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]: + def a__extract_ts_wifisecurity(self) -> Generator[dict, None, None]: try: p = WifiSecurityParser(self.config, self.case_id) data = p.get_result() @@ -181,7 +182,7 @@ def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]: except Exception as e: print(f"ERROR while extracting timestamp from wifisecurity. Reason {str(e)}") - def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: + def a__extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: try: p = WifiKnownNetworksParser(self.config, self.case_id) data = p.get_result() @@ -240,6 +241,26 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: except Exception as e: print(f"ERROR while extracting timestamp from wifi_known_networks. Reason {str(e)}") + def __extract_ts_crashlogs(self) -> Generator[dict, None, None]: + try: + p = CrashLogsParser(self.config, self.case_id) + data = p.get_result() + # process summary + for event in data.get('summary', []): + if event['datetime'] == '': + continue + timestamp = datetime.fromisoformat(event['datetime']) + ts_event = { + 'message': f"Application {event['app']} crashed.", + 'timestamp': timestamp.timestamp() * 1000000, + 'datetime': event['datetime'], + 'timestamp_desc': 'Application crash' + } + yield ts_event + # no need to also process the detailed crashes, as we already have the summary + except Exception as e: + print(f"ERROR while extracting timestamp from crashlog. Reason {str(e)}") + def execute(self): # Get all the functions that start with '__extract_ts_' # and call these with the case_folder as parameter diff --git a/parsers/crashlogs.py b/parsers/crashlogs.py index b701ea9..e78e579 100644 --- a/parsers/crashlogs.py +++ b/parsers/crashlogs.py @@ -1,6 +1,10 @@ import glob import os from utils.base import BaseParserInterface +import re +import json +from datetime import datetime, timezone +# from pycrashreport.crash_report import get_crash_report_from_file class CrashLogsParser(BaseParserInterface): @@ -26,7 +30,8 @@ def __init__(self, config: dict, case_id: str): def get_log_files(self) -> list: log_files_globs = [ - 'crashes_and_spins/*.ips' + 'crashes_and_spins/*.ips', + 'summaries/crashes_and_spins.log', ] log_files = [] for log_files_glob in log_files_globs: @@ -36,9 +41,155 @@ def get_log_files(self) -> list: def execute(self) -> list | dict: files = self.get_log_files() - raise NotImplementedError("not implemented yet") + result = { + 'summary': {}, + 'crashes': {} + } for file in files: print(f"Processing file: {file}") + if file.endswith('crashes_and_spins.log'): + result['summary'] = CrashLogsParser.parse_summary_file(file) + elif os.path.basename(file).startswith('.'): + pass + elif file.endswith('.ips'): + try: + basename = os.path.basename(file) + result['crashes'][basename] = {} + result['crashes'][basename].update(CrashLogsParser.parse_ips_file(file)) + except Exception as e: + print(f"Skipping file due to error {file}: {e}") + return result - def parse_file(path: str) -> list | dict: - print(f"Parsing file: {path}") + def parse_ips_file(path: str) -> list | dict: + # identify the type of file + result = { + 'metadata': {}, + 'report': {} + } + with open(path, 'r') as f: + result['metadata'] = json.loads(f.readline()) # first line + + lines = f.readlines() + + # next section is json structure + if lines[0].startswith('{') and lines[len(lines) - 1].strip().endswith('}'): + result['report'] = json.loads('\n'.join(lines)) + return result + + # next section is structured text + # either key: value + # or key: + # multiple lines + # key: + # multiple lines + n = 0 + while n < len(lines): + line = lines[n].strip() + + if not line: + n += 1 + continue + + if ':' in line: + key, value = line.split(':', 1) + key = key.strip() + if value.strip(): + result['report'][key] = value.strip() + else: + result['report'][key] = [] + n += 1 + while n < len(lines): + line = lines[n].strip() + if not line: # end of section + break + + if 'Thread' in key and 'crashed with ARM Thread State' in key: + if result['report'][key] == []: + result['report'][key] = {} + result['report'][key].update(CrashLogsParser.split_thread_crashes_with_arm_thread_state(line)) + elif 'Binary Images' in key: + result['report'][key].append(CrashLogsParser.split_binary_images(line)) + elif 'Thread' in key: + result['report'][key].append(CrashLogsParser.split_thread(line)) + else: + result['report'][key].append(line) + n += 1 + elif line == 'EOF': + break + else: + raise Exception(f"Parser bug: Unexpected line in crashlogs at line {n}. Line: {line}") + + n += 1 + return result + + def parse_summary_file(path: str) -> list | dict: + print(f"Parsing summary file: {path}") + result = [] + with open(path, 'r') as f: + for line in f: + if not line.startswith('/'): + continue + + app, timestamp = CrashLogsParser.metadata_from_filename(line) + path = line.split(',')[0] + entry = { + 'app': app, + 'datetime': timestamp, + 'filename': os.path.basename(path), + 'path': path, + } + result.append(entry) + return result + + def split_thread_crashes_with_arm_thread_state(line) -> dict: + elements = line.split() + result = {} + for i in range(0, len(elements), 2): + if not elements[i].endswith(':'): + break # last entry is not a valid key:value + result[elements[i][:-1]] = elements[i + 1] + return result + + def split_thread(line) -> dict: + elements = line.split() + result = { + 'id': elements[0], + 'image_name': elements[1], + 'image_base': elements[2], + 'image_offset': elements[3], + 'symbol_offset': elements[5] + } + return result + + def split_binary_images(line) -> dict: + elements = line.split() + result = { + 'image_offset_start': elements[0], + 'image_offset_end': elements[2], + 'image_name': elements[3], + 'arch': elements[4], + 'uuid': elements[5][1:-1], + 'path': elements[6], + } + return result + + def metadata_from_filename(filename: str) -> tuple[str, str]: + while True: + # option 1: YYYY-MM-DD-HHMMSS + m = re.search(r'/([^/]+)-(\d{4}-\d{2}-\d{2}-\d{6})', filename) + if m: + timestamp = datetime.strptime(m.group(2), '%Y-%m-%d-%H%M%S') + break + # option 2: YYYY-MM-DD-HH-MM-SS + m = re.search(r'/([^/]+)-(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2})', filename) + if m: + timestamp = datetime.strptime(m.group(2), '%Y-%m-%d-%H-%M-%S') + break + # fallback, basename + app = os.path.basename(filename) + return app, '' + + app = m.group(1) + # FIXME timezone is from local phone time at file creation. Not UTC + timestamp = timestamp.replace(tzinfo=timezone.utc) + return app, timestamp.isoformat() diff --git a/tests/test_parsers_crashlogs.py b/tests/test_parsers_crashlogs.py new file mode 100644 index 0000000..7210b6b --- /dev/null +++ b/tests/test_parsers_crashlogs.py @@ -0,0 +1,25 @@ +from parsers.crashlogs import CrashLogsParser +from tests import SysdiagnoseTestCase +import unittest +import os + + +class TestParsersCrashlogs(SysdiagnoseTestCase): + + def test_parse_psthread(self): + for case_id, case in self.sd.cases().items(): + p = CrashLogsParser(self.sd.config, case_id=case_id) + files = p.get_log_files() + self.assertTrue(len(files) > 0) + + p.save_result(force=True) + self.assertTrue(os.path.isfile(p.output_file)) + + result = p.get_result() + self.assertTrue('summary' in result) + for item in result.get('items', []): + print(item) + + +if __name__ == '__main__': + unittest.main()