Skip to content

Commit

Permalink
chg: [containermanager] move multiline log parsing to utils fixes #67
Browse files Browse the repository at this point in the history
  • Loading branch information
cvandeplas committed May 3, 2024
1 parent 2aeb512 commit aa7a7d6
Show file tree
Hide file tree
Showing 7 changed files with 339 additions and 295 deletions.
86 changes: 2 additions & 84 deletions parsers/containermanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import glob
import json
import os
import re
from utils import multilinelog


# ----- definition for parsing.py script -----#
Expand All @@ -43,91 +43,9 @@ def get_log_files(log_root_path: str) -> list:
return log_files


# function copied from https://github.com/abrignoni/iOS-Mobile-Installation-Logs-Parser/blob/master/mib_parser.sql.py
# Month to numeric with leading zero when month < 10 function
# Function call: month = month_converter(month)


def month_converter(month):
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
month = months.index(month) + 1
if (month < 10):
month = f"{month:02d}"
return month

# Day with leading zero if day < 10 function
# Functtion call: day = day_converter(day)


def day_converter(day):
day = int(day)
if (day < 10):
day = f"{day:02d}"
return day
##


def parsecontainermanager(loglist):
events = {"events": []}
for logfile in loglist:
with open(logfile, 'r', encoding="utf-8") as f:
# multiline parsing with the following logic:
# - build an entry with the seen lines
# - upon discovery of a new entry, or the end of the file, consider the entry as complete and process the lines
# - discovery of a new entry is done based on the timestamp, as each new entry starts this way
prev_lines = []
for line in f:
timeregex = re.search(r"(?<=^)(.*?)(?= \[[0-9]+)", line) # Regex for timestamp
if timeregex:
# new entry, process the previous entry
if prev_lines:
new_entry = buildlogentry(''.join(prev_lines))
events['events'].append(new_entry)
# build the new entry
prev_lines = []
prev_lines.append(line)
else:
# not a new entry, add the line to the previous entry
prev_lines.append(line)
# process the last entry
new_entry = buildlogentry(''.join(prev_lines))
events['events'].append(new_entry)
return events


def buildlogentry(line):
entry = {}
# timestamp
timeregex = re.search(r"(?<=^)(.*?)(?= \[[0-9]+)", line) # Regex for timestamp
if timeregex:
timestamp = timeregex.group(1)
weekday, month, day, time, year = (str.split(timestamp[:24]))
day = day_converter(day)
month = month_converter(month)
entry['timestamp'] = str(year) + '-' + str(month) + '-' + str(day) + ' ' + str(time)

# log level
loglevelregex = re.search(r"\<(.*?)\>", line)
entry['loglevel'] = loglevelregex.group(1)

# hex_ID
hexIDregex = re.search(r"\(0x(.*?)\)", line)
entry['hexID'] = '0x' + hexIDregex.group(1)

# event_type
eventyperegex = re.search(r"\-\[(.*)(\]\:)", line)
if eventyperegex:
entry['event_type'] = eventyperegex.group(1)

# msg
if 'event_type' in entry:
msgregex = re.search(r"\]\:(.*)", line, re.MULTILINE | re.DOTALL)
entry['msg'] = msgregex.group(1).strip()
else:
msgregex = re.search(r"\)\ (.*)", line, re.MULTILINE | re.DOTALL)
entry['msg'] = msgregex.group(1).strip()

return entry
return multilinelog.extract_from_file(logfile)


def main():
Expand Down
126 changes: 2 additions & 124 deletions parsers/mobileactivation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@
from docopt import docopt
import glob
import json
import misc
import os
import re
from utils import multilinelog

# ----- definition for parsing.py script -----#
# ----- DO NOT DELETE ----#
Expand All @@ -44,130 +43,9 @@ def get_log_files(log_root_path: str) -> list:
return log_files


# function copied from https://github.com/abrignoni/iOS-Mobile-Installation-Logs-Parser/blob/master/mib_parser.sql.py
# Month to numeric with leading zero when month < 10 function
# Function call: month = month_converter(month)


def month_converter(month):
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
month = months.index(month) + 1
if (month < 10):
month = f"{month:02d}"
return month

# Day with leading zero if day < 10 function
# Functtion call: day = day_converter(day)


def day_converter(day):
day = int(day)
if (day < 10):
day = f"{day:02d}"
return day
##


def parsemobactiv(loglist):
events = {"events": []}
for logfile in loglist:
with open(logfile, 'r', encoding='utf8') as f:
status = None # status tracker for multiline parsing
for line in f:
# Activation multiline parsing
if not status and "____________________ Mobile Activation Startup _____________________" in line:
status = 'act_start'
act_lines = []
elif status == 'act_start' and "____________________________________________________________________" in line:
status = None
events['events'].append(buildlogentry_actentry(act_lines))
elif status == 'act_start':
act_lines.append(line.strip())
# plist multiline parsing
elif line.strip().endswith(":"): # next line will be starting with <?xml
status = 'plist_start'
plist_lines = {
'line': line.strip(),
'plist': []
}
elif status == 'plist_start':
plist_lines['plist'].append(line.encode())
if line.strip() == '</plist>': # end of plist
status = None
# end of plist, now need to parse the line and plist
event = buildlogentry_other(plist_lines['line'])
event['plist'] = misc.load_plist_string_as_json(b''.join(plist_lines['plist']))
# LATER parse the plist
# - extract the recursive plist
# - decode the certificates into nice JSON
# - and so on with more fun for the future
events['events'].append(event)
elif line.strip() != '':
events['events'].append(buildlogentry_other(line.strip()))
# print(json.dumps(events,indent=4))
return events


def buildlogentry_actentry(lines):
# print(lines)
event = {'loglevel': 'debug'}
# get timestamp
timeregex = re.search(r"(?<=^)(.*?)(?= \[)", lines[0])
timestamp = timeregex.group(1)
weekday, month, day, time, year = (str.split(timestamp))
day = day_converter(day)
month = month_converter(month)
event['timestamp'] = str(year) + '-' + str(month) + '-' + str(day) + ' ' + str(time)

# hex_ID
hexIDregex = re.search(r"\(0x(.*?)\)", lines[0])
event['hexID'] = '0x' + hexIDregex.group(1)

# build event
for line in lines:
splitted = line.split(":")
if len(splitted) > 1:
event[splitted[-2].strip()] = splitted[-1].strip()

return event


def buildlogentry_other(line):
event = {}
try:
# get timestamp
timeregex = re.search(r"(?<=^)(.*?)(?= \[)", line)
timestamp = timeregex.group(1)
weekday, month, day, time, year = (str.split(timestamp))
day = day_converter(day)
month = month_converter(month)
event['timestamp'] = str(year) + '-' + str(month) + '-' + str(day) + ' ' + str(time)

# log level
loglevelregex = re.search(r"\<(.*?)\>", line)
event['loglevel'] = loglevelregex.group(1)

# hex_ID
hexIDregex = re.search(r"\(0x(.*?)\)", line)
event['hexID'] = '0x' + hexIDregex.group(1)

# event_type
eventyperegex = re.search(r"\-\[(.*)(\]\:)", line)
if eventyperegex:
event['event_type'] = eventyperegex.group(1)

# msg
if 'event_type' in event:
msgregex = re.search(r"\]\:(.*)", line)
event['msg'] = msgregex.group(1).strip()
else:
msgregex = re.search(r"\)\ (.*)", line)
event['msg'] = msgregex.group(1).strip()
except Exception as e:
print(f"Error parsing line: {line}. Reason: {str(e)}")
raise Exception from e

return event
return multilinelog.extract_from_file(logfile)


def main():
Expand Down
87 changes: 2 additions & 85 deletions parsers/mobileinstallation.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
import glob
import json
import os
import re

from utils import multilinelog

# ----- definition for parsing.py script -----#
# ----- DO NOT DELETE ----#
Expand All @@ -44,95 +43,13 @@ def get_log_files(log_root_path: str) -> list:
return log_files


# function copied from https://github.com/abrignoni/iOS-Mobile-Installation-Logs-Parser/blob/master/mib_parser.sql.py
# Month to numeric with leading zero when month < 10 function
# Function call: month = month_converter(month)


def month_converter(month):
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
month = months.index(month) + 1
if (month < 10):
month = f"{month:02d}"
return month

# Day with leading zero if day < 10 function
# Functtion call: day = day_converter(day)


def day_converter(day):
day = int(day)
if (day < 10):
day = f"{day:02d}"
return day
##


def parsemobinstall(loglist):
events = {"events": []}
for logfile in loglist:
with open(logfile, 'r', encoding='utf8') as f:
prev_lines = []
for line in f:
line = line.strip()
# support multiline entries
if line.endswith('{'):
prev_lines.append(line)
continue
if prev_lines:
prev_lines.append(line)
if line.endswith('}'):
line = '\n'.join(prev_lines)
prev_lines = []
else:
continue
# normal or previously multiline entry
# getting Timestamp - adding entry only if timestamp is present
timeregex = re.search(r"(?<=^)(.*)(?= \[)", line) # Regex for timestamp
if timeregex:
new_entry = buildlogentry(line)
events['events'].append(new_entry)
return multilinelog.extract_from_file(logfile)
return events


def buildlogentry(line):
try:
entry = {}
# timestamp
timeregex = re.search(r"(?<=^)(.*?)(?= \[[0-9]+)", line) # Regex for timestamp
timestamp = timeregex.group(1)
weekday, month, day, time, year = (str.split(timestamp))
day = day_converter(day)
month = month_converter(month)
entry['timestamp'] = str(year) + '-' + str(month) + '-' + str(day) + ' ' + str(time)

# log level
loglevelregex = re.search(r"\<(.*?)\>", line)
entry['loglevel'] = loglevelregex.group(1)

# hex_ID
hexIDregex = re.search(r"\(0x(.*?)\)", line)
entry['hexID'] = '0x' + hexIDregex.group(1)

# event_type
eventyperegex = re.search(r"\-\[(.*)(\]\:)", line)
if eventyperegex:
entry['event_type'] = eventyperegex.group(1)

# msg
if 'event_type' in entry:
msgregex = re.search(r"\]\:(.*)", line)
entry['msg'] = msgregex.group(1).strip()
else:
msgregex = re.search(r"\)\ (.*)", line)
entry['msg'] = msgregex.group(1).strip()
except Exception as e:
print(f"Error parsing line: {line}. Reason: {str(e)}")
raise Exception from e

return entry


def main():
"""
Main function, to be called when used as CLI tool
Expand Down
Loading

0 comments on commit aa7a7d6

Please sign in to comment.