Skip to content

Commit

Permalink
Merge branch 'logging-chg' of https://github.com/EC-DIGIT-CSIRC/sysdi…
Browse files Browse the repository at this point in the history
…agnose into logging-chg
  • Loading branch information
dario-br committed Dec 19, 2024
2 parents 4e91d82 + 4286dd7 commit 785a9d2
Show file tree
Hide file tree
Showing 17 changed files with 101 additions and 61 deletions.
16 changes: 8 additions & 8 deletions src/sysdiagnose/analysers/ps_everywhere.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,31 +27,31 @@ def execute(self):

# processes with full path and parameters, no threads
ps_json = PsParser(self.config, self.case_id).get_result()
self.all_ps.update([p['COMMAND'] for p in ps_json])
self.all_ps.update([p['command'] for p in ps_json])
print(f"{len(self.all_ps)} entries after ps")

# processes with full path and parameters

psthread_json = PsThreadParser(self.config, self.case_id).get_result()
self.all_ps.update([p['COMMAND'] for p in psthread_json])
self.all_ps.update([p['command'] for p in psthread_json])
print(f"{len(self.all_ps)} entries after psthread")

# processes with full path, no parameters, with threads
spindumpnosymbols_json = SpindumpNoSymbolsParser(self.config, self.case_id).get_result()
for p in spindumpnosymbols_json:
if 'Process' not in p:
if 'process' not in p:
continue
try:
self.add_if_full_command_is_not_in_set(p['Path'])
# all_ps.add(f"{p['Path']}::#{len(p['threads'])}") # count is different than in taskinfo
self.add_if_full_command_is_not_in_set(p['path'])
# all_ps.add(f"{p['path']}::#{len(p['threads'])}") # count is different than in taskinfo
except KeyError:
if p['Process'] == 'kernel_task [0]':
if p['process'] == 'kernel_task [0]':
self.all_ps.add('/kernel') # is similar to the other formats
else:
self.add_if_full_command_is_not_in_set(p['Process']) # backup uption to keep trace of this anomaly
self.add_if_full_command_is_not_in_set(p['process']) # backup uption to keep trace of this anomaly
for t in p['threads']:
try:
self.add_if_full_command_is_not_in_set(f"{p['Path']}::{t['ThreadName']}")
self.add_if_full_command_is_not_in_set(f"{p['path']}::{t['thread_name']}")
except KeyError:
pass
print(f"{len(self.all_ps)} entries after spindumpnosymbols")
Expand Down
22 changes: 11 additions & 11 deletions src/sysdiagnose/analysers/ps_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ def execute(self):
all_pids = set()

ps_json = PsParser(self.config, self.case_id).get_result()
ps_dict = {int(p['PID']): p for p in ps_json}
ps_dict = {int(p['pid']): p for p in ps_json}
all_pids.update(ps_dict.keys())

psthread_json = PsThreadParser(self.config, self.case_id).get_result()
psthread_dict = {int(p['PID']): p for p in psthread_json}
psthread_dict = {int(p['pid']): p for p in psthread_json}
all_pids.update(psthread_dict.keys())

taskinfo_json = TaskinfoParser(self.config, self.case_id).get_result()
Expand All @@ -35,7 +35,7 @@ def execute(self):
if 'pid' not in p:
continue
taskinfo_dict[int(p['pid'])] = {
'PID': p['pid']
'pid': p['pid']
}
all_pids.update(taskinfo_dict.keys())

Expand All @@ -44,26 +44,26 @@ def execute(self):
spindumpnosymbols_json = SpindumpNoSymbolsParser(self.config, self.case_id).get_result()
spindumpnosymbols_dict = {}
for p in spindumpnosymbols_json:
if 'Process' not in p:
if 'process' not in p:
continue
spindumpnosymbols_dict[int(p['PID'])] = {
'PID': p['PID'],
'PPID': p.get('PPID', ''),
'COMMAND': p.get('Path', ''),
spindumpnosymbols_dict[int(p['pid'])] = {
'pid': p['pid'],
'ppid': p.get('ppid', ''),
'command': p.get('path', ''),
}

matrix = {}
all_pids = list(all_pids)
all_pids.sort()
for pid in all_pids:
matrix[pid] = {
'cmd': ps_dict.get(pid, {}).get('COMMAND'),
'cmd': ps_dict.get(pid, {}).get('command'),
}

# '%CPU', '%MEM', 'F', 'NI',
# 'PRI', 'RSS',
# 'STARTED', 'STAT', 'TIME', 'TT', 'USER', 'VSZ'
for col in ['PID']:
for col in ['pid']:
ps_val = str(ps_dict.get(pid, {}).get(col))
psthread_val = str(psthread_dict.get(pid, {}).get(col))
taskinfo_val = str(taskinfo_dict.get(pid, {}).get(col))
Expand All @@ -75,7 +75,7 @@ def execute(self):
else: # different
matrix[pid][col] = f"{ps_val} != {psthread_val} != {taskinfo_val} != {spindump_val}"

for col in ['PPID']:
for col in ['ppid']:
ps_val = str(ps_dict.get(pid, {}).get(col))
psthread_val = str(psthread_dict.get(pid, {}).get(col))
spindump_val = str(spindumpnosymbols_dict.get(pid, {}).get(col))
Expand Down
1 change: 1 addition & 0 deletions src/sysdiagnose/parsers/appinstallation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

class AppInstallationParser(BaseParserInterface):
description = "Parsing app installation logs"
format = 'jsonl'

def __init__(self, config: dict, case_id: str):
super().__init__(__file__, config, case_id)
Expand Down
3 changes: 1 addition & 2 deletions src/sysdiagnose/parsers/brctl.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@
import os
from sysdiagnose.utils.base import BaseParserInterface

# TODO brctl analyser for boot_history section -> timeline


# TODO brctl analyser for boot_history section -> timeline
class BrctlParser(BaseParserInterface):
description = "Parsing brctl files"

Expand Down
2 changes: 1 addition & 1 deletion src/sysdiagnose/parsers/lockdownd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import glob
import os
from sysdiagnose.utils.base import BaseParserInterface
from datetime import datetime, timezone
from datetime import datetime
import re


Expand Down
25 changes: 21 additions & 4 deletions src/sysdiagnose/parsers/olddsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
import glob
import os
from sysdiagnose.utils.misc import load_plist_file_as_json
from sysdiagnose.utils.base import BaseParserInterface
from sysdiagnose.utils.base import BaseParserInterface, logger


class OldDscParser(BaseParserInterface):
description = "Parsing olddsc files"
format = 'jsonl'

json_pretty = False

def __init__(self, config: dict, case_id: str):
Expand All @@ -28,10 +30,25 @@ def get_log_files(self) -> dict:

return log_files

def execute(self) -> list | dict:
def execute(self) -> list:
timestamp_dict = {}
timestamp = self.sysdiagnose_creation_datetime
timestamp_dict['timestamp'] = timestamp.timestamp()
timestamp_dict['datetime'] = timestamp.isoformat(timespec='microseconds')
timestamp_dict['timestamp_desc'] = 'sysdiagnose creation'

entries = []
# we're not doing anything with
# - Unslid_Base_Address
# - Cache_UUID_String
# only acting on Binaries list
for log_file in self.get_log_files():
return OldDscParser.parse_file(log_file)
return {'error': ['No olddsc files present']}
for entry in OldDscParser.parse_file(log_file).get('Binaries', []):
entry.update(timestamp_dict)
entries.append(entry)
if not entries:
logger.warning('No olddsc files present')
return entries

def parse_file(path: str) -> list | dict:
try:
Expand Down
7 changes: 4 additions & 3 deletions src/sysdiagnose/parsers/ps.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#

from sysdiagnose.utils.base import BaseParserInterface, logger
from sysdiagnose.utils.misc import snake_case
import glob
import os
import re
Expand Down Expand Up @@ -49,7 +50,7 @@ def parse_file(self, filename):
# merge last entries together, as last entry may contain spaces
for col in range(header_length):
# try to cast as int, float and fallback to string
col_name = header[col]
col_name = snake_case(header[col])
try:
entry[col_name] = int(patterns[col])
continue
Expand Down Expand Up @@ -81,10 +82,10 @@ def exclude_known_goods(processes: dict, known_good: dict) -> list[dict]:
dict: The updated list of processes with known good processes excluded.
"""

known_good_cmd = [x['COMMAND'] for x in known_good]
known_good_cmd = [x['command'] for x in known_good]

for proc in processes:
if proc['COMMAND'] in known_good_cmd:
if proc['command'] in known_good_cmd:
processes.remove(proc)

return processes
22 changes: 16 additions & 6 deletions src/sysdiagnose/parsers/psthread.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
import glob
import os
import re
from sysdiagnose.utils.base import BaseParserInterface
from sysdiagnose.utils.base import BaseParserInterface, logger
from sysdiagnose.utils.misc import snake_case


class PsThreadParser(BaseParserInterface):
description = "Parsing ps_thread.txt file"
format = 'jsonl'

def __init__(self, config: dict, case_id: str):
super().__init__(__file__, config, case_id)
Expand All @@ -31,8 +33,15 @@ def get_log_files(self) -> list:

return log_files

def execute(self) -> list | dict:
# TODO not really easy to conver to timebased jsonl, as the timestamp is complex to compute.
def execute(self) -> list:
# not really easy to conver to true timebased jsonl, as the timestamp is complex to compute.
# so we just fall back to the sysdiagnose creation timestamp
timestamp_dict = {}
timestamp = self.sysdiagnose_creation_datetime
timestamp_dict['timestamp'] = timestamp.timestamp()
timestamp_dict['datetime'] = timestamp.isoformat(timespec='microseconds')
timestamp_dict['timestamp_desc'] = 'sysdiagnose creation'
timestamp_dict['THREADS'] = 1

result = []
try:
Expand All @@ -47,11 +56,11 @@ def execute(self) -> list | dict:
result.append(row)

patterns = line.strip().split(None, header_length - 1)
row = {'THREADS': 1}
row = timestamp_dict.copy()
# merge last entries together, as last entry may contain spaces
for col in range(header_length):
# try to cast as int, float and fallback to string
col_name = header[col]
col_name = snake_case(header[col])
try:
row[col_name] = int(patterns[col])
continue
Expand All @@ -67,4 +76,5 @@ def execute(self) -> list | dict:
result.append(row)
return result
except IndexError:
return {'error': 'No ps_thread.txt file present'}
logger.warning('No ps_thread.txt file present')
return []
13 changes: 10 additions & 3 deletions src/sysdiagnose/parsers/sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
import os
import glob
import sysdiagnose.utils.misc as misc
from sysdiagnose.utils.base import BaseParserInterface
from sysdiagnose.utils.base import BaseParserInterface, logger


class SystemVersionParser(BaseParserInterface):
description = "Parsing SystemVersion plist file"
format = 'jsonl'

def __init__(self, config: dict, case_id: str):
super().__init__(__file__, config, case_id)
Expand All @@ -30,9 +31,15 @@ def get_log_files(self) -> list:

def execute(self) -> list | dict:
try:
return SystemVersionParser.parse_file(self.get_log_files()[0])
entry = SystemVersionParser.parse_file(self.get_log_files()[0])
timestamp = self.sysdiagnose_creation_datetime
entry['timestamp_desc'] = 'sysdiagnose creation'
entry['timestamp'] = timestamp.timestamp()
entry['datetime'] = timestamp.isoformat(timespec='microseconds')
return [entry]
except IndexError:
return {'error': 'No SystemVersion.plist file present'}
logger.warning('No SystemVersion.plist file present')
return []

def parse_file(path: str) -> list | dict:
return misc.load_plist_file_as_json(path)
Expand Down
1 change: 1 addition & 0 deletions src/sysdiagnose/parsers/transparency.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from sysdiagnose.utils.base import BaseParserInterface, logger


# FIXME convert to timeline
class TransparencyParser(BaseParserInterface):

description = "Parsing transparency.log json file"
Expand Down
1 change: 1 addition & 0 deletions src/sysdiagnose/parsers/wifi_known_networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from sysdiagnose.utils.base import BaseParserInterface, logger


# FIXME convert to timeline - warning: format changes from one iOS version to another
class WifiKnownNetworksParser(BaseParserInterface):
description = "Parsing Known Wifi Networks plist file"

Expand Down
1 change: 1 addition & 0 deletions src/sysdiagnose/parsers/wifinetworks.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from sysdiagnose.utils.base import BaseParserInterface


# FIXME convert to timeline
class WifiNetworksParser(BaseParserInterface):

description = "Parsing com.apple.wifi plist files"
Expand Down
2 changes: 1 addition & 1 deletion src/sysdiagnose/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,4 @@ def find_bytes(d):

def snake_case(s):
# lowercase and replace non a-z characters as _
return re.sub(r'[^a-zA-Z0-9]', '_', s.lower())
return re.sub(r'[^a-zA-Z0-9%]', '_', s.lower())
10 changes: 6 additions & 4 deletions tests/test_parsers_olddsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ def test_parse_olddsc_file(self):
self.assertTrue(os.path.isfile(p.output_file))

result = p.get_result()
self.assertTrue('Unslid_Base_Address' in result)
self.assertTrue('Cache_UUID_String' in result)
self.assertTrue('Binaries' in result)
self.assertTrue(len(result['Binaries']) > 0)
for entry in result:
self.assertTrue('Load_Address' in entry)
# self.assertTrue('Unslid_Base_Address' in result)
# self.assertTrue('Cache_UUID_String' in result)
# self.assertTrue('Binaries' in result)
# self.assertTrue(len(result['Binaries']) > 0)


if __name__ == '__main__':
Expand Down
22 changes: 11 additions & 11 deletions tests/test_parsers_ps.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ def test_parse_ps(self):

if p.get_result(): # not all logs contain data
for item in p.get_result():
self.assertTrue('COMMAND' in item)
self.assertTrue('PID' in item)
self.assertTrue('USER' in item)
self.assertTrue('command' in item)
self.assertTrue('pid' in item)
self.assertTrue('user' in item)

def test_parse_ps_lower_than_v16(self):
input = [
Expand All @@ -32,7 +32,7 @@ def test_parse_ps_lower_than_v16(self):
]
expected_result = [
{
'USER': 'root', 'UID': 0, 'PID': 1, 'PPID': 0, '%CPU': 0.0, '%MEM': 0.4, 'PRI': 37, 'NI': 0, 'VSZ': 4226848, 'RSS': 8912, 'WCHAN': '-', 'TT': '??', 'STAT': 'Ss', 'STARTED': '14Jan19', 'TIME': '7:27.40', 'COMMAND': '/sbin/launchd with space',
'user': 'root', 'uid': 0, 'pid': 1, 'ppid': 0, '%cpu': 0.0, '%mem': 0.4, 'pri': 37, 'ni': 0, 'vsz': 4226848, 'rss': 8912, 'wchan': '-', 'tt': '??', 'stat': 'Ss', 'started': '14Jan19', 'time': '7:27.40', 'command': '/sbin/launchd with space',
'timestamp_desc': 'sysdiagnose creation',
'timestamp': 1.0,
'datetime': '1970-01-01T00:00:01.000000+00:00'
Expand All @@ -54,7 +54,7 @@ def test_parse_ps_newer_than_v16(self):
]
expected_result = [
{
'USER': 'root', 'UID': 0, 'PRSNA': '-', 'PID': 1, 'PPID': 0, 'F': 4004, '%CPU': 0.0, '%MEM': 0.0, 'PRI': 0, 'NI': 0, 'VSZ': 0, 'RSS': 0, 'WCHAN': '-', 'TT': '??', 'STAT': '?s', 'STARTED': 'Tue09PM', 'TIME': '0:00.00', 'COMMAND': '/sbin/launchd',
'user': 'root', 'uid': 0, 'prsna': '-', 'pid': 1, 'ppid': 0, 'f': 4004, '%cpu': 0.0, '%mem': 0.0, 'pri': 0, 'ni': 0, 'vsz': 0, 'rss': 0, 'wchan': '-', 'tt': '??', 'stat': '?s', 'started': 'Tue09PM', 'time': '0:00.00', 'command': '/sbin/launchd',
'timestamp_desc': 'sysdiagnose creation',
'timestamp': 1.0,
'datetime': '1970-01-01T00:00:01.000000+00:00'
Expand All @@ -71,16 +71,16 @@ def test_parse_ps_newer_than_v16(self):

def test_ps_exclude_known_goods(self):
processes = [
{'COMMAND': 'good', 'PID': 1},
{'COMMAND': 'bad', 'PID': 2},
{'COMMAND': 'unknown', 'PID': 3}
{'command': 'good', 'pid': 1},
{'command': 'bad', 'pid': 2},
{'command': 'unknown', 'pid': 3}
]
known_good = [
{'COMMAND': 'good', 'PID': 1}
{'command': 'good', 'pid': 1}
]
expected_result = [
{'COMMAND': 'bad', 'PID': 2},
{'COMMAND': 'unknown', 'PID': 3}
{'command': 'bad', 'pid': 2},
{'command': 'unknown', 'pid': 3}
]
result = PsParser.exclude_known_goods(processes, known_good)
self.assertEqual(result, expected_result)
Expand Down
Loading

0 comments on commit 785a9d2

Please sign in to comment.