Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements #11 - JSONL logging instead of printing to stdout #106

Merged
merged 26 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
2a285ef
First commmit on Issue #11 On those modules where print statement was…
dario-br Oct 13, 2024
b4bd4ef
#11 adding logging to:
dario-br Oct 14, 2024
0e487db
#11. By default it always logs to file from INFO level. However, on C…
dario-br Oct 15, 2024
4c62518
#11
dario-br Oct 15, 2024
32a287a
#11 time in microseconds
dario-br Oct 15, 2024
e15ea9a
#11 fixing linting issues
dario-br Oct 15, 2024
8ff9082
Merge branch 'main' into issue-11-logging
cvandeplas Oct 15, 2024
3713c84
#11 trying to address review comments on PR
dario-br Oct 16, 2024
bd3e150
Merge branch 'issue-11-logging' of https://github.com/EC-DIGIT-CSIRC/…
dario-br Oct 16, 2024
2d80fed
#11 fixes last broken commit
dario-br Oct 16, 2024
c86d636
#11 added dependency to python-json-logger
dario-br Oct 16, 2024
921153d
#11 rest of commit trying to address last PR review comment
dario-br Oct 16, 2024
e7efd2c
Merge branch 'main' into issue-11-logging
dario-br Oct 16, 2024
2e6ba62
#11 indentation issue corrected
dario-br Oct 16, 2024
94fb389
#11 align the time fields with parsers output
dario-br Oct 16, 2024
60c7672
#11 fixing linting issues
dario-br Oct 16, 2024
5b34d40
#11 adding a few missing files without logging and removing print sta…
dario-br Oct 16, 2024
38a0ada
#11 forgot to click on save :) this fixes https://github.com/EC-DIGIT…
dario-br Oct 16, 2024
62354a5
#11 Addressing review comments: log parameter to the parser section a…
dario-br Oct 16, 2024
afe6b9e
#11 moves the console handler to the same level than now the log cmd …
dario-br Oct 16, 2024
d05172c
#11 one liner for console log handler
dario-br Oct 17, 2024
48a1154
#11 added logging to appinstallation parser and external script sqlit…
dario-br Oct 17, 2024
8739172
Merge branch 'main' into issue-11-logging
dario-br Oct 17, 2024
10fc4e7
#11 Added to the demo parser and analyser some extra log calls to dem…
dario-br Oct 17, 2024
d59a2f1
Merge branch 'main' into issue-11-logging
dario-br Oct 21, 2024
61c9603
Merge branch 'main' into issue-11-logging
cvandeplas Oct 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ dependencies = [
"pandas==2.2.2",
"numpy==2.0.0",
"nska-deserialize==1.5.1",
"yara-python==4.5.1"
"yara-python==4.5.1",
"python-json-logger==2.0.7"
]

[project.scripts]
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ pandas==2.2.2
numpy==2.0.0
nska-deserialize==1.5.1
yara-python==4.5.1
# pycrashreport==1.2.4
# pycrashreport==1.2.4
python-json-logger==2.0.7
7 changes: 5 additions & 2 deletions src/sysdiagnose/analysers/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
# Author: Emiliern Le Jamtel

import re
import logging
from sysdiagnose.utils.base import BaseAnalyserInterface
from sysdiagnose.parsers.accessibility_tcc import AccessibilityTccParser
from sysdiagnose.parsers.brctl import BrctlParser
from sysdiagnose.parsers.itunesstore import iTunesStoreParser
from sysdiagnose.parsers.logarchive import LogarchiveParser

logger = logging.getLogger('sysdiagnose')


class AppsAnalyser(BaseAnalyserInterface):
description = 'Get list of Apps installed on the device'
Expand Down Expand Up @@ -88,9 +91,9 @@ def execute(self):
if matches:
new_term = matches[0]
else:
# print(f"Skipping entry: {entry['subsystem']}")
logger.debug(f"Skipping entry: {entry['subsystem']}")
continue
# print(f"New entry: {new_term} - was: {entry['subsystem']}")
logger.debug(f"New entry: {new_term} - was: {entry['subsystem']}")
entry['subsystem'] = new_term
# add it to the list
try:
Expand Down
3 changes: 2 additions & 1 deletion src/sysdiagnose/analysers/demo_analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# For Python3
# DEMO - Skeleton

from sysdiagnose.utils.base import BaseAnalyserInterface
from sysdiagnose.utils.base import BaseAnalyserInterface, logger


class DemoAnalyser(BaseAnalyserInterface):
Expand All @@ -21,6 +21,7 @@ def execute(self):
By doing so you will get the parser output even if it never ran before.
"""
print("DO SOMETHING HERE")
logger.info("log something here", extra={'analyser': __name__})

# json_data = p_fooparser.get_result()

Expand Down
33 changes: 18 additions & 15 deletions src/sysdiagnose/analysers/timesketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
from sysdiagnose.parsers.crashlogs import CrashLogsParser
from collections.abc import Generator
from sysdiagnose.utils.base import BaseAnalyserInterface
import logging
dario-br marked this conversation as resolved.
Show resolved Hide resolved

logger = logging.getLogger('sysdiagnose')


class TimesketchAnalyser(BaseAnalyserInterface):
Expand Down Expand Up @@ -50,7 +53,7 @@ def __extract_ts_mobileactivation(self) -> Generator[dict, None, None]:
pass
yield ts_event
except Exception as e:
print(f"ERROR while extracting timestamp from mobileactivation file. Reason: {str(e)}")
logger.exception("ERROR while extracting timestamp from mobileactivation file.")

def __extract_ts_powerlogs(self) -> Generator[dict, None, None]:
try:
Expand Down Expand Up @@ -91,7 +94,7 @@ def __extract_ts_powerlogs(self) -> Generator[dict, None, None]:
pass

except Exception as e:
print(f"ERROR while extracting timestamp from powerlogs. Reason: {str(e)}")
logger.exception("ERROR while extracting timestamp from powerlogs.")

def __extract_ts_swcutil(self) -> Generator[dict, None, None]:
try:
Expand All @@ -111,10 +114,10 @@ def __extract_ts_swcutil(self) -> Generator[dict, None, None]:
yield ts_event
except KeyError:
# some entries do not have a Last Checked or timestamp field
# print(f"WARNING {filename} while extracting timestamp from {(service['Service'])} - {(service['App ID'])}. Record not inserted.")
logger.warning(f"Error while extracting timestamp from {(service['Service'])} - {(service['App ID'])}. Record not inserted.")
pass
except Exception as e:
print(f"ERROR while extracting timestamp from swcutil. Reason {str(e)}")
logger.exception("ERROR while extracting timestamp from swcutil.")

def __extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]:
try:
Expand All @@ -135,7 +138,7 @@ def __extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]:
}
yield ts_event
except Exception as e:
print(f"ERROR while extracting timestamp from accessibility_tcc. Reason {str(e)}")
logger.exception("ERROR while extracting timestamp from accessibility_tcc.")

def __extract_ts_shutdownlogs(self) -> Generator[dict, None, None]:
try:
Expand All @@ -153,9 +156,9 @@ def __extract_ts_shutdownlogs(self) -> Generator[dict, None, None]:
}
yield ts_event
except Exception as e:
print(f"WARNING: shutdownlog entry not parsed: {event}. Reason: {str(e)}")
logger.warning(f"WARNING: shutdownlog entry not parsed: {event}", exc_info=True)
except Exception as e:
print(f"ERROR while extracting timestamp from shutdownlog. Reason: {str(e)}")
logger.exception("ERROR while extracting timestamp from shutdownlog.")

def __extract_ts_logarchive(self) -> Generator[dict, None, None]:
try:
Expand All @@ -173,9 +176,9 @@ def __extract_ts_logarchive(self) -> Generator[dict, None, None]:
}
yield ts_event
except KeyError as e:
print(f"WARNING: trace not parsed: {event}. Error {e}")
logger.warning(f"WARNING: trace not parsed: {event}.", exc_info=True)
except Exception as e:
print(f"ERROR while extracting timestamp from logarchive. Reason: {str(e)}")
logger.exception(f"ERROR while extracting timestamp from logarchive.")

def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]:
try:
Expand Down Expand Up @@ -206,7 +209,7 @@ def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]:
}
yield ts_event
except Exception as e:
print(f"ERROR while extracting timestamp from wifisecurity. Reason {str(e)}")
logger.exception("ERROR while extracting timestamp from wifisecurity.")

def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]:
try:
Expand All @@ -228,7 +231,7 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]:
yield ts_event
except KeyError:
# some wifi networks do not have an AddedAt field
# print(f"ERROR {filename} while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.")
logger.warning(f"Error while extracting timestamp from {ssid}. Record not inserted.", exc_info=True)
pass

# WIFI modified
Expand All @@ -245,7 +248,7 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]:
yield ts_event
except KeyError:
# some wifi networks do not have an UpdatedAt field
# print(f"ERROR {filename} while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.")
logger.warning(f"Error while extracting timestamp from {ssid}.Record not inserted.", exc_info=True)
pass

# Password for wifi modified
Expand All @@ -262,10 +265,10 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]:
yield ts_event
except KeyError:
# some wifi networks do not have a password modification date
# print(f"ERROR {filename} while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.")
logger.warning(f"Error while extracting timestamp from {ssid}. Record not inserted.", exc_info=True)
pass
except Exception as e:
print(f"ERROR while extracting timestamp from wifi_known_networks. Reason {str(e)}")
logger.exception("ERROR while extracting timestamp from wifi_known_networks.")

def __extract_ts_crashlogs(self) -> Generator[dict, None, None]:
try:
Expand All @@ -288,7 +291,7 @@ def __extract_ts_crashlogs(self) -> Generator[dict, None, None]:
# skip bug_type fields
pass
except Exception as e:
print(f"ERROR while extracting timestamp from crashlog. Reason {str(e)}")
logger.exception("ERROR while extracting timestamp from crashlog.")

def execute(self):
# Get all the functions that start with '__extract_ts_'
Expand Down
21 changes: 12 additions & 9 deletions src/sysdiagnose/analysers/yarascan.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
import glob
import threading
import queue
import logging
dario-br marked this conversation as resolved.
Show resolved Hide resolved
from sysdiagnose.utils.base import BaseAnalyserInterface

logger = logging.getLogger('sysdiagnose')


# These are the commonly used external variables that can be used in the YARA rules
externals = {
Expand Down Expand Up @@ -69,7 +72,7 @@ def execute(self):
results['matches'] = matches

if len(results['errors']) > 0:
print("Scan finished with errors. Review the results")
logger.error("Scan finished with errors. Review the results")

return results

Expand All @@ -80,17 +83,17 @@ def get_valid_yara_rule_files(self) -> tuple[list, list]:
for rule_file in rule_files_to_test:
if not os.path.isfile(rule_file):
continue
print(f"Loading YARA rule: {rule_file}")
logger.info(f"Loading YARA rule: {rule_file}")
try:
yara.compile(filepath=rule_file, externals=externals)
# if we reach this point, the rule is valid
rule_files_validated.append(rule_file)
except yara.SyntaxError as e:
print(f"Error compiling rule {rule_file}: {str(e)}")
logger.exception(f"Error compiling rule {rule_file}")
errors.append(f"Error compiling rule {rule_file}: {str(e)}")
continue
except yara.Error as e:
print(f"Error compiling rule {rule_file}: {str(e)}")
logger.exception(f"Error compiling rule {rule_file}")
errors.append(f"Error loading rule {rule_file}: {str(e)}")
continue

Expand All @@ -111,7 +114,7 @@ def scan_directory(directories: list, rule_filepaths: dict, ignore_files: list,
for ignore_folder in ignore_folders:
if root.startswith(ignore_folder):
stop = True
print(f"Skipping folder: {root}")
logger.info(f"Skipping folder: {root}")
continue
if stop:
continue
Expand All @@ -121,7 +124,7 @@ def scan_directory(directories: list, rule_filepaths: dict, ignore_files: list,
for ignore_file in ignore_files:
if file_full_path.startswith(ignore_file):
stop = True
print(f"Skipping file: {file_full_path}")
logger.info(f"Skipping file: {file_full_path}")
continue
if stop:
continue
Expand All @@ -133,13 +136,13 @@ def consumer():
rules = yara.compile(filepaths=rule_filepaths, externals=externals)

while True:
print(f"Consumer thread seeing {file_queue.qsize()} files in queue, and taking one")
logger.info(f"Consumer thread seeing {file_queue.qsize()} files in queue, and taking one")
file_path = file_queue.get()
if file_path is None:
print("Consumer thread exiting")
logger.info("Consumer thread exiting")
break

print(f"Scanning file: {file_path}")
logger.info(f"Scanning file: {file_path}")
# set the externals for this file - massive slowdown
# externals_local = externals.copy()
# externals_local['filename'] = file
Expand Down
59 changes: 55 additions & 4 deletions src/sysdiagnose/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import sys
from sysdiagnose import Sysdiagnose
import os
import time
from sysdiagnose.utils.logger import logger, get_console_handler, get_json_handler


def parse_parser_error(message):
Expand Down Expand Up @@ -41,11 +43,13 @@ def main():
# parse mode
parse_parser = subparsers.add_parser('parse', help='Parse a case')
parse_parser.add_argument('parser', help='Name of the parser, "all" for running all parsers, or "list" for a listing of all parsers')
parse_parser.add_argument('--log', default='WARNING', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Enables logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)')
dario-br marked this conversation as resolved.
Show resolved Hide resolved
parse_parser.error = parse_parser_error

# analyse mode
analyse_parser = subparsers.add_parser('analyse', help='Analyse a case')
analyse_parser.add_argument('analyser', help='Name of the analyser, "all" for running all analysers, or "list" for a listing of all analysers')
analyse_parser.add_argument('--log', default='WARNING', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Enables logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)')
analyse_parser.error = analyse_parser_error

# list mode
Expand Down Expand Up @@ -123,14 +127,37 @@ def main():
else:
case_ids = [args.case_id]

# Handle console logging
log_level = args.log.upper()
logger.addHandler(get_console_handler(log_level))

logger2file = None
for case_id in case_ids:
# Handle file logging
time_str = time.strftime("%Y%m%dT%H%M%S")
filename = f"{time_str}-parse-{case_id}.jsonl"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The file is saved in the parsed_data_folder. Is there real value in adding the case_id in the filename?
I would propose to use something like log-{time_str}-parse.jsonl instead or {time_str}-log-parse.jsonl.
Second question: is it appending logs to that file? So if a user runs parser X, and later on parser Y the logs are appended?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you run parse/analyse all, all logs are appended to the same file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See 62354a5

folder = sd.config.get_case_parsed_data_folder(case_id)
# https://stackoverflow.com/questions/13839554/how-to-change-filehandle-with-python-logging-on-the-fly-with-different-classes-a
if logger2file is None:
logger2file = get_json_handler(os.path.join(folder, filename))
logger.addHandler(logger2file)
else:
logger2file.close()
logger2file.setStream(open(os.path.join(folder, filename), 'a'))

print(f"Case ID: {case_id}")
for parser in parsers_list:
print(f"Parser '{parser}' for case ID '{case_id}'")
logger.info(f"Parser '{parser}' started", extra={'parser': parser})
try:
sd.parse(parser, case_id)
result = sd.parse(parser, case_id)
result_str = "successfully" if result == 0 else "with errors"
logger.info(f"Parser '{parser}' finished {result_str}", extra={'parser': parser, 'result': result})
except NotImplementedError:
print(f"Parser '{parser}' is not implemented yet, skipping")
logger.warning(f"Parser '{parser}' is not implemented yet, skipping", extra={'parser': parser})

if logger2file is not None:
logger2file.close()

elif args.mode == 'analyse':
# Handle analyse mode
Expand All @@ -155,14 +182,38 @@ def main():
else:
case_ids = [args.case_id]

# Handle console logging
log_level = args.log.upper()
logger.addHandler(get_console_handler(log_level))

logger2file = None
for case_id in case_ids:
# Handle file logging
time_str = time.strftime("%Y%m%dT%H%M%S")
filename = f"{time_str}-analyse-{case_id}.jsonl"
folder = sd.config.get_case_parsed_data_folder(case_id)
# https://stackoverflow.com/questions/13839554/how-to-change-filehandle-with-python-logging-on-the-fly-with-different-classes-a
if logger2file is None:
logger2file = get_json_handler(os.path.join(folder, filename))
logger.addHandler(logger2file)
else:
logger2file.close()
logger2file.setStream(open(os.path.join(folder, filename), 'a'))

print(f"Case ID: {case_id}")
for analyser in analysers_list:
print(f" Analyser '{analyser}' for case ID '{case_id}'")
logger.info(f"Analyser '{analyser}' started", extra={'analyser': analyser})
try:
sd.analyse(analyser, case_id)
result = sd.analyse(analyser, case_id)
result_str = "successfully" if result == 0 else "with errors"
logger.info(f"Analyser '{analyser}' finished {result_str}", extra={'analyser': analyser, 'result': result})
except NotImplementedError:
print(f"Analyser '{analyser}' is not implemented yet, skipping")
logger.warning(f"Analyser '{analyser}' is not implemented yet, skipping", extra={'analyser': analyser})

if logger2file is not None:
logger2file.close()

else:
parser.print_help()

Expand Down
Loading
Loading