Skip to content

Commit

Permalink
fix: [crashlogs] better parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
cvandeplas committed Nov 6, 2024
1 parent d7a6723 commit e71238e
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 7 deletions.
17 changes: 14 additions & 3 deletions src/sysdiagnose/parsers/crashlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import re
import json
from datetime import datetime, timezone
from sysdiagnose.utils.misc import load_plist_string_as_json
# from pycrashreport.crash_report import get_crash_report_from_file


Expand Down Expand Up @@ -38,6 +39,11 @@ def get_log_files(self) -> list:
for log_files_glob in log_files_globs:
log_files.extend(glob.glob(os.path.join(self.case_data_folder, log_files_glob), recursive=True))

# exclude some files
exclusion_strings = ['WiFiLQMMetrics', 'OTAUpdate']
for exclusion_string in exclusion_strings:
log_files = [x for x in log_files if exclusion_string not in x]

return log_files

def execute(self) -> list | dict:
Expand Down Expand Up @@ -79,8 +85,9 @@ def parse_ips_file(path: str) -> list | dict:

def process_ips_lines(lines: list) -> dict:
'''
There are 2 main models of crashlogs:
There are multiple main models of crashlogs:
- one big entry nicely structured in json.
- one big entry nicely structured as plist
- pseudo-structured text. with multiple powerstats entries
'''
result = {}
Expand All @@ -89,6 +96,11 @@ def process_ips_lines(lines: list) -> dict:
result = json.loads('\n'.join(lines))
return result

# next section is plist structure
if lines[0].startswith('<?xml') and lines[len(lines) - 1].strip().endswith('</plist>'):
result = load_plist_string_as_json('\n'.join(lines))
return result

# next section is structured text
# either key: value
# or key:
Expand Down Expand Up @@ -227,8 +239,7 @@ def split_thread(line) -> dict:
def split_binary_images(line) -> dict:
# need to be regexp based
# option 1: image_offset_start image_offset_end image_name uuid path
m = re.search(r'\s*(\w+) -\s+([^\s]+)\s+([^<]+)<([^>]+)>\s+(.+)', line)

m = re.search(r'\s*(\w+) -\s+([^\s]+)\s+([^<]+)<([^>]+)>\s*(.*)', line)
elements = m.groups()
result = {
'image_offset_start': elements[0].strip(),
Expand Down
2 changes: 1 addition & 1 deletion src/sysdiagnose/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def load_plist_file_as_json(fname: str):


def load_plist_string_as_json(plist_string: str):
plist = nska_deserialize.deserialize_plist_from_string(plist_string, full_recurse_convert_nska=True, format=dict)
plist = nska_deserialize.deserialize_plist_from_string(plist_string.encode(), full_recurse_convert_nska=True, format=dict)
return json_serializable(plist)


Expand Down
2 changes: 1 addition & 1 deletion src/sysdiagnose/utils/multilinelog.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def build_from_logentry(line):
plist_start = line.index('<?xml version')
entry['msg'] = line[:plist_start].strip()
plist_data = line[plist_start:]
entry['plist'] = misc.load_plist_string_as_json(plist_data.encode())
entry['plist'] = misc.load_plist_string_as_json(plist_data)
# LATER parse the plist content
# - extract the recursive plist
# - decode the certificates into nice JSON
Expand Down
39 changes: 37 additions & 2 deletions tests/test_parsers_crashlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def test_split_binary_images(self):
lines = [
' 0x123456000 - ??? com.apple.foo (1) <5BFC3EC3-2045-4F95-880A-DEC88832F639> /System/Library/bar',
' 0x123456000 - 0x123456fff libhello <5BFC3EC3-2045-4F95-880A-DEC88832F639> /usr/lib/hello',
'0x123456000 - 0x123456fff FooBar arm64 <5BFC3EC320454F95880ADEC88832F639> /System/Library/bar'
'0x123456000 - 0x123456fff FooBar arm64 <5BFC3EC320454F95880ADEC88832F639> /System/Library/bar',
'0x123456000 - ??? ??? <5BFC3EC3-2045-4F95-880A-DEC88832F639>',
]
expected_results = [
{'image_offset_start': '0x123456000', 'image_offset_end': '???',
Expand All @@ -59,7 +60,10 @@ def test_split_binary_images(self):
{'image_offset_start': '0x123456000', 'image_offset_end': '0x123456fff',
'image_name': 'FooBar arm64',
'uuid': '5BFC3EC320454F95880ADEC88832F639',
'path': '/System/Library/bar'}
'path': '/System/Library/bar'},
{'image_offset_start': '0x123456000', 'image_offset_end': '???',
'image_name': '???',
'uuid': '5BFC3EC3-2045-4F95-880A-DEC88832F639', 'path': ''}
]
for line, expected_result in zip(lines, expected_results, strict=True):
result = CrashLogsParser.split_binary_images(line)
Expand All @@ -82,6 +86,37 @@ def test_split_thread_crashes_with_arm_thread_state(self):
result = CrashLogsParser.split_thread_crashes_with_arm_thread_state(line)
self.assertEqual(result, expected_result)

def test_process_ips_lines_json(self):
lines = [
'{"foo": "bar"}'
]
expected_results = {"foo": "bar"}
result = CrashLogsParser.process_ips_lines(lines)
self.assertEqual(result, expected_results)

def test_process_ips_lines_plist(self):
lines = [
'<?xml version="1.0" encoding="UTF-8"?>',
'<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">',
'<plist version="1.0">',
'<dict>',
' <key>foo</key>',
' <string>bar</string>',
'</dict>',
'</plist>'
]
expected_results = {"foo": "bar"}
result = CrashLogsParser.process_ips_lines(lines)
self.assertEqual(result, expected_results)

def test_process_ips_lines_text(self):
lines = [
'foo: bar'
]
expected_results = {"foo": "bar"}
result = CrashLogsParser.process_ips_lines(lines)
self.assertEqual(result, expected_results)


if __name__ == '__main__':
unittest.main()

0 comments on commit e71238e

Please sign in to comment.