Skip to content

Commit

Permalink
chg: [parsers] ps parser version independent, json structure slightly…
Browse files Browse the repository at this point in the history
… changed
  • Loading branch information
cvandeplas committed Jun 20, 2024
1 parent 991bff3 commit 7c380cd
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 63 deletions.
88 changes: 27 additions & 61 deletions parsers/ps.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@
# - tree structure
# - simplified
#
import re
import sys
import json
from optparse import OptionParser
import glob
import os

import re

parser_description = "Parsing ps.txt file"

Expand All @@ -38,68 +37,35 @@ def parse_path(path: str) -> list | dict:
return {'error': 'No ps.txt file present'}


def parse_ps(filename, ios_version=16):
processes = {}
def parse_ps(filename):
result = []
try:
with open(filename, "r") as fd:
fd.readline() # skip header line
# FIXME investigate if it's possible to rewrite it dynamically: extract header, and use header as key for the variables when parsing the line
for line in fd:
"""
iOS < 16
USER UID PID PPID %CPU %MEM PRI NI VSZ RSS WCHAN TT STAT STARTED TIME COMMAND
root 0 1 0 0.0 0.4 37 0 4226848 8912 - ?? Ss 14Jan19 7:27.40 /sbin/launchd
iOS > 16
USER UID PRSNA PID PPID F %CPU %MEM PRI NI VSZ RSS WCHAN TT STAT STARTED TIME COMMAND
root 0 - 1 0 4004 0.0 0.0 0 0 0 0 - ?? ?s Tue09PM 0:00.00 /sbin/launchd
"""
patterns = re.split(r"\s+", line) # XXX FIXME? don't we need a \r" string here for the regexp?
# key of hash table is PID
if (ios_version < 16):
processes[int(patterns[2])] = {
"USER": patterns[0],
"UID": patterns[1],
"PID": int(patterns[2]),
"PPID": int(patterns[3]),
"CPU": patterns[4],
"MEM": patterns[5],
"PRI": patterns[6],
"NI": patterns[7],
"VSZ": patterns[8],
"RSS": patterns[9],
"WCHAN": patterns[10],
"TT": patterns[11],
"STAT": patterns[12],
"STARTED": patterns[13],
"TIME": patterns[14],
"COMMAND": "".join(patterns[15:])}
else:
# Note: bellow - attempt to create a regex but feel it will more lead to errors. Instead lets merge all parts of the commands (patterns[17:])
# regex = r"(?P<USER>\w+)\s+(?P<UID>\d+)\s+(?<PRSNA>\d+|\-)\s+(?<PID>\d+)\s+(?<PPID>\d+)\s+(?<F>\d+)\s+(?<CPU>\d+\.\d+)\s+(?<MEM>\d+\.\d+)\s+(?<PRI>\d+)\s+(?<NI>\d+)\s+(?<VSZ>\d+)\s+(?<WCHAN>\-)"
processes[int(patterns[3])] = {
"USER": patterns[0],
"UID": patterns[1],
"PRSNA": patterns[2],
"PID": int(patterns[3]),
"PPID": int(patterns[4]),
"F": patterns[5],
"CPU": patterns[6],
"MEM": patterns[7],
"PRI": patterns[8],
"NI": patterns[9],
"VSZ": patterns[10],
"RSS": patterns[11],
"WCHAN": patterns[12],
"TT": patterns[13],
"STAT": patterns[14],
"STARTED": patterns[15],
"TIME": patterns[16],
"COMMAND": "".join(patterns[17:])}

with open(filename, "r") as f:
header = re.split(r"\s+", f.readline().strip())
header_length = len(header)

print(f"Found header: {header}")
for line in f:
patterns = re.split(r"\s+", line.strip())
row = {}
# merge last entries together, as last entry may contain spaces
for col in range(header_length):
# try to cast as int, float and fallback to string
col_name = header[col]
try:
row[col_name] = int(patterns[col])
continue
except ValueError:
try:
row[col_name] = float(patterns[col])
except ValueError:
row[col_name] = patterns[col]
row[header[-1]] = " ".join(patterns[header_length - 1:])
result.append(row)
return result
except Exception as e:
print(f"Could not parse ps.txt: {str(e)}")
return processes
return []


"""
Expand Down
35 changes: 33 additions & 2 deletions tests/test_parsers_ps.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from parsers.ps import parse_path, get_log_files
from parsers.ps import parse_path, get_log_files, parse_ps
from tests import SysdiagnoseTestCase
import unittest
import tempfile


class TestParsersPs(SysdiagnoseTestCase):
Expand All @@ -12,11 +13,41 @@ def test_parse_ps(self):
print(f'Parsing {files}')
result = parse_path(log_root_path)
if result: # not all logs contain data
for item in result.values():
for item in result:
self.assertTrue('COMMAND' in item)
self.assertTrue('PID' in item)
self.assertTrue('USER' in item)

def test_parse_ps_lower_than_v16(self):
input = [
'USER UID PID PPID %CPU %MEM PRI NI VSZ RSS WCHAN TT STAT STARTED TIME COMMAND',
'root 0 1 0 0.0 0.4 37 0 4226848 8912 - ?? Ss 14Jan19 7:27.40 /sbin/launchd with space'
]
expected_result = [
{'USER': 'root', 'UID': 0, 'PID': 1, 'PPID': 0, '%CPU': 0.0, '%MEM': 0.4, 'PRI': 37, 'NI': 0, 'VSZ': 4226848, 'RSS': 8912, 'WCHAN': '-', 'TT': '??', 'STAT': 'Ss', 'STARTED': '14Jan19', 'TIME': '7:27.40', 'COMMAND': '/sbin/launchd with space'}
]
tmp_inputfile = tempfile.NamedTemporaryFile()
with open(tmp_inputfile.name, 'w') as f:
f.write('\n'.join(input))
result = parse_ps(tmp_inputfile.name)
tmp_inputfile.close()
self.assertEqual(result, expected_result)

def test_parse_ps_newer_than_v16(self):
input = [
'USER UID PRSNA PID PPID F %CPU %MEM PRI NI VSZ RSS WCHAN TT STAT STARTED TIME COMMAND',
'root 0 - 1 0 4004 0.0 0.0 0 0 0 0 - ?? ?s Tue09PM 0:00.00 /sbin/launchd'
]
expected_result = [
{'USER': 'root', 'UID': 0, 'PRSNA': '-', 'PID': 1, 'PPID': 0, 'F': 4004, '%CPU': 0.0, '%MEM': 0.0, 'PRI': 0, 'NI': 0, 'VSZ': 0, 'RSS': 0, 'WCHAN': '-', 'TT': '??', 'STAT': '?s', 'STARTED': 'Tue09PM', 'TIME': '0:00.00', 'COMMAND': '/sbin/launchd'}
]
tmp_inputfile = tempfile.NamedTemporaryFile()
with open(tmp_inputfile.name, 'w') as f:
f.write('\n'.join(input))
result = parse_ps(tmp_inputfile.name)
tmp_inputfile.close()
self.assertEqual(result, expected_result)


if __name__ == '__main__':
unittest.main()

0 comments on commit 7c380cd

Please sign in to comment.