From dc4d98f4e74d7c2db3c3b18cab3ae67ffa20586b Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Thu, 20 Jun 2024 10:02:04 +0200 Subject: [PATCH] chg: [parsers] ps parser version independent, json structure slightly changed --- parsers/ps.py | 88 ++++++++++++---------------------------- tests/test_parsers_ps.py | 35 +++++++++++++++- 2 files changed, 60 insertions(+), 63 deletions(-) diff --git a/parsers/ps.py b/parsers/ps.py index fe7ed7c..6c982a6 100644 --- a/parsers/ps.py +++ b/parsers/ps.py @@ -9,13 +9,12 @@ # - tree structure # - simplified # -import re import sys import json from optparse import OptionParser import glob import os - +import re parser_description = "Parsing ps.txt file" @@ -38,68 +37,35 @@ def parse_path(path: str) -> list | dict: return {'error': 'No ps.txt file present'} -def parse_ps(filename, ios_version=16): - processes = {} +def parse_ps(filename): + result = [] try: - with open(filename, "r") as fd: - fd.readline() # skip header line - # FIXME investigate if it's possible to rewrite it dynamically: extract header, and use header as key for the variables when parsing the line - for line in fd: - """ - iOS < 16 - USER UID PID PPID %CPU %MEM PRI NI VSZ RSS WCHAN TT STAT STARTED TIME COMMAND - root 0 1 0 0.0 0.4 37 0 4226848 8912 - ?? Ss 14Jan19 7:27.40 /sbin/launchd - - iOS > 16 - USER UID PRSNA PID PPID F %CPU %MEM PRI NI VSZ RSS WCHAN TT STAT STARTED TIME COMMAND - root 0 - 1 0 4004 0.0 0.0 0 0 0 0 - ?? ?s Tue09PM 0:00.00 /sbin/launchd - """ - patterns = re.split(r"\s+", line) # XXX FIXME? don't we need a \r" string here for the regexp? - # key of hash table is PID - if (ios_version < 16): - processes[int(patterns[2])] = { - "USER": patterns[0], - "UID": patterns[1], - "PID": int(patterns[2]), - "PPID": int(patterns[3]), - "CPU": patterns[4], - "MEM": patterns[5], - "PRI": patterns[6], - "NI": patterns[7], - "VSZ": patterns[8], - "RSS": patterns[9], - "WCHAN": patterns[10], - "TT": patterns[11], - "STAT": patterns[12], - "STARTED": patterns[13], - "TIME": patterns[14], - "COMMAND": "".join(patterns[15:])} - else: - # Note: bellow - attempt to create a regex but feel it will more lead to errors. Instead lets merge all parts of the commands (patterns[17:]) - # regex = r"(?P\w+)\s+(?P\d+)\s+(?\d+|\-)\s+(?\d+)\s+(?\d+)\s+(?\d+)\s+(?\d+\.\d+)\s+(?\d+\.\d+)\s+(?\d+)\s+(?\d+)\s+(?\d+)\s+(?\-)" - processes[int(patterns[3])] = { - "USER": patterns[0], - "UID": patterns[1], - "PRSNA": patterns[2], - "PID": int(patterns[3]), - "PPID": int(patterns[4]), - "F": patterns[5], - "CPU": patterns[6], - "MEM": patterns[7], - "PRI": patterns[8], - "NI": patterns[9], - "VSZ": patterns[10], - "RSS": patterns[11], - "WCHAN": patterns[12], - "TT": patterns[13], - "STAT": patterns[14], - "STARTED": patterns[15], - "TIME": patterns[16], - "COMMAND": "".join(patterns[17:])} - + with open(filename, "r") as f: + header = re.split(r"\s+", f.readline().strip()) + header_length = len(header) + + print(f"Found header: {header}") + for line in f: + patterns = re.split(r"\s+", line.strip()) + row = {} + # merge last entries together, as last entry may contain spaces + for col in range(header_length): + # try to cast as int, float and fallback to string + col_name = header[col] + try: + row[col_name] = int(patterns[col]) + continue + except ValueError: + try: + row[col_name] = float(patterns[col]) + except ValueError: + row[col_name] = patterns[col] + row[header[-1]] = " ".join(patterns[header_length - 1:]) + result.append(row) + return result except Exception as e: print(f"Could not parse ps.txt: {str(e)}") - return processes + return [] """ diff --git a/tests/test_parsers_ps.py b/tests/test_parsers_ps.py index 00125b8..c941d68 100644 --- a/tests/test_parsers_ps.py +++ b/tests/test_parsers_ps.py @@ -1,6 +1,7 @@ -from parsers.ps import parse_path, get_log_files +from parsers.ps import parse_path, get_log_files, parse_ps from tests import SysdiagnoseTestCase import unittest +import tempfile class TestParsersPs(SysdiagnoseTestCase): @@ -12,11 +13,41 @@ def test_parse_ps(self): print(f'Parsing {files}') result = parse_path(log_root_path) if result: # not all logs contain data - for item in result.values(): + for item in result: self.assertTrue('COMMAND' in item) self.assertTrue('PID' in item) self.assertTrue('USER' in item) + def test_parse_ps_lower_than_v16(self): + input = [ + 'USER UID PID PPID %CPU %MEM PRI NI VSZ RSS WCHAN TT STAT STARTED TIME COMMAND', + 'root 0 1 0 0.0 0.4 37 0 4226848 8912 - ?? Ss 14Jan19 7:27.40 /sbin/launchd with space' + ] + expected_result = [ + {'USER': 'root', 'UID': 0, 'PID': 1, 'PPID': 0, '%CPU': 0.0, '%MEM': 0.4, 'PRI': 37, 'NI': 0, 'VSZ': 4226848, 'RSS': 8912, 'WCHAN': '-', 'TT': '??', 'STAT': 'Ss', 'STARTED': '14Jan19', 'TIME': '7:27.40', 'COMMAND': '/sbin/launchd with space'} + ] + tmp_inputfile = tempfile.NamedTemporaryFile() + with open(tmp_inputfile.name, 'w') as f: + f.write('\n'.join(input)) + result = parse_ps(tmp_inputfile.name) + tmp_inputfile.close() + self.assertEqual(result, expected_result) + + def test_parse_ps_newer_than_v16(self): + input = [ + 'USER UID PRSNA PID PPID F %CPU %MEM PRI NI VSZ RSS WCHAN TT STAT STARTED TIME COMMAND', + 'root 0 - 1 0 4004 0.0 0.0 0 0 0 0 - ?? ?s Tue09PM 0:00.00 /sbin/launchd' + ] + expected_result = [ + {'USER': 'root', 'UID': 0, 'PRSNA': '-', 'PID': 1, 'PPID': 0, 'F': 4004, '%CPU': 0.0, '%MEM': 0.0, 'PRI': 0, 'NI': 0, 'VSZ': 0, 'RSS': 0, 'WCHAN': '-', 'TT': '??', 'STAT': '?s', 'STARTED': 'Tue09PM', 'TIME': '0:00.00', 'COMMAND': '/sbin/launchd'} + ] + tmp_inputfile = tempfile.NamedTemporaryFile() + with open(tmp_inputfile.name, 'w') as f: + f.write('\n'.join(input)) + result = parse_ps(tmp_inputfile.name) + tmp_inputfile.close() + self.assertEqual(result, expected_result) + if __name__ == '__main__': unittest.main()