From 2a285ef8873916a93edb126352941454ee860fe8 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Mon, 14 Oct 2024 00:01:44 +0200 Subject: [PATCH 01/20] First commmit on Issue #11 On those modules where print statement was present: 1. added logger at module level 2. replace print statement by correspoding logging statement: info, warning, error and debug (for commented prints) --- src/sysdiagnose/analysers/apps.py | 7 ++-- src/sysdiagnose/analysers/demo_analyser.py | 4 +++ src/sysdiagnose/analysers/timesketch.py | 33 ++++++++++--------- src/sysdiagnose/analysers/yarascan.py | 21 +++++++----- src/sysdiagnose/parsers/crashlogs.py | 9 +++-- src/sysdiagnose/parsers/logarchive.py | 13 +++++--- src/sysdiagnose/parsers/powerlogs.py | 7 ++-- src/sysdiagnose/parsers/ps.py | 7 ++-- .../parsers/security_sysdiagnose.py | 5 ++- src/sysdiagnose/parsers/wifisecurity.py | 9 +++-- src/sysdiagnose/utils/base.py | 5 ++- 11 files changed, 77 insertions(+), 43 deletions(-) diff --git a/src/sysdiagnose/analysers/apps.py b/src/sysdiagnose/analysers/apps.py index c6116f3..10d0e38 100644 --- a/src/sysdiagnose/analysers/apps.py +++ b/src/sysdiagnose/analysers/apps.py @@ -4,12 +4,15 @@ # Author: Emiliern Le Jamtel import re +import logging from sysdiagnose.utils.base import BaseAnalyserInterface from sysdiagnose.parsers.accessibility_tcc import AccessibilityTccParser from sysdiagnose.parsers.brctl import BrctlParser from sysdiagnose.parsers.itunesstore import iTunesStoreParser from sysdiagnose.parsers.logarchive import LogarchiveParser +logger = logging.getLogger(__name__) + class AppsAnalyser(BaseAnalyserInterface): description = 'Get list of Apps installed on the device' @@ -88,9 +91,9 @@ def execute(self): if matches: new_term = matches[0] else: - # print(f"Skipping entry: {entry['subsystem']}") + logger.debug(f"Skipping entry: {entry['subsystem']}") continue - # print(f"New entry: {new_term} - was: {entry['subsystem']}") + logger.debug(f"New entry: {new_term} - was: {entry['subsystem']}") entry['subsystem'] = new_term # add it to the list try: diff --git a/src/sysdiagnose/analysers/demo_analyser.py b/src/sysdiagnose/analysers/demo_analyser.py index ce7abeb..3981f35 100644 --- a/src/sysdiagnose/analysers/demo_analyser.py +++ b/src/sysdiagnose/analysers/demo_analyser.py @@ -3,8 +3,11 @@ # For Python3 # DEMO - Skeleton +import logging from sysdiagnose.utils.base import BaseAnalyserInterface +logger = logging.getLogger(__name__) + class DemoAnalyser(BaseAnalyserInterface): description = "Do something useful (DEMO)" @@ -21,6 +24,7 @@ def execute(self): By doing so you will get the parser output even if it never ran before. """ print("DO SOMETHING HERE") + logger.info("log something here") # json_data = p_fooparser.get_result() diff --git a/src/sysdiagnose/analysers/timesketch.py b/src/sysdiagnose/analysers/timesketch.py index 0540edd..39e77e2 100644 --- a/src/sysdiagnose/analysers/timesketch.py +++ b/src/sysdiagnose/analysers/timesketch.py @@ -17,6 +17,9 @@ from sysdiagnose.parsers.crashlogs import CrashLogsParser from collections.abc import Generator from sysdiagnose.utils.base import BaseAnalyserInterface +import logging + +logger = logging.getLogger(__name__) class TimesketchAnalyser(BaseAnalyserInterface): @@ -50,7 +53,7 @@ def __extract_ts_mobileactivation(self) -> Generator[dict, None, None]: pass yield ts_event except Exception as e: - print(f"ERROR while extracting timestamp from mobileactivation file. Reason: {str(e)}") + logger.error(f"ERROR while extracting timestamp from mobileactivation file. Reason: {str(e)}") def __extract_ts_powerlogs(self) -> Generator[dict, None, None]: try: @@ -91,7 +94,7 @@ def __extract_ts_powerlogs(self) -> Generator[dict, None, None]: pass except Exception as e: - print(f"ERROR while extracting timestamp from powerlogs. Reason: {str(e)}") + logger.error(f"ERROR while extracting timestamp from powerlogs. Reason: {str(e)}") def __extract_ts_swcutil(self) -> Generator[dict, None, None]: try: @@ -111,10 +114,10 @@ def __extract_ts_swcutil(self) -> Generator[dict, None, None]: yield ts_event except KeyError: # some entries do not have a Last Checked or timestamp field - # print(f"WARNING {filename} while extracting timestamp from {(service['Service'])} - {(service['App ID'])}. Record not inserted.") + logger.warning(f"Error while extracting timestamp from {(service['Service'])} - {(service['App ID'])}. Record not inserted.") pass except Exception as e: - print(f"ERROR while extracting timestamp from swcutil. Reason {str(e)}") + logger.error(f"ERROR while extracting timestamp from swcutil. Reason {str(e)}") def __extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]: try: @@ -135,7 +138,7 @@ def __extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]: } yield ts_event except Exception as e: - print(f"ERROR while extracting timestamp from accessibility_tcc. Reason {str(e)}") + logger.error(f"ERROR while extracting timestamp from accessibility_tcc. Reason {str(e)}") def __extract_ts_shutdownlogs(self) -> Generator[dict, None, None]: try: @@ -153,9 +156,9 @@ def __extract_ts_shutdownlogs(self) -> Generator[dict, None, None]: } yield ts_event except Exception as e: - print(f"WARNING: shutdownlog entry not parsed: {event}. Reason: {str(e)}") + logger.warning(f"WARNING: shutdownlog entry not parsed: {event}. Reason: {str(e)}") except Exception as e: - print(f"ERROR while extracting timestamp from shutdownlog. Reason: {str(e)}") + logger.error(f"ERROR while extracting timestamp from shutdownlog. Reason: {str(e)}") def __extract_ts_logarchive(self) -> Generator[dict, None, None]: try: @@ -173,9 +176,9 @@ def __extract_ts_logarchive(self) -> Generator[dict, None, None]: } yield ts_event except KeyError as e: - print(f"WARNING: trace not parsed: {event}. Error {e}") + logger.warning(f"WARNING: trace not parsed: {event}. Error {e}") except Exception as e: - print(f"ERROR while extracting timestamp from logarchive. Reason: {str(e)}") + logger.error(f"ERROR while extracting timestamp from logarchive. Reason: {str(e)}") def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]: try: @@ -206,7 +209,7 @@ def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]: } yield ts_event except Exception as e: - print(f"ERROR while extracting timestamp from wifisecurity. Reason {str(e)}") + logger.error(f"ERROR while extracting timestamp from wifisecurity. Reason {str(e)}") def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: try: @@ -228,7 +231,7 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: yield ts_event except KeyError: # some wifi networks do not have an AddedAt field - # print(f"ERROR {filename} while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") + logger.warning(f"Error while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") pass # WIFI modified @@ -245,7 +248,7 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: yield ts_event except KeyError: # some wifi networks do not have an UpdatedAt field - # print(f"ERROR {filename} while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") + logger.warning(f"Error while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") pass # Password for wifi modified @@ -262,10 +265,10 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: yield ts_event except KeyError: # some wifi networks do not have a password modification date - # print(f"ERROR {filename} while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") + logger.warning(f"Error while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") pass except Exception as e: - print(f"ERROR while extracting timestamp from wifi_known_networks. Reason {str(e)}") + logger.error(f"ERROR while extracting timestamp from wifi_known_networks. Reason {str(e)}") def __extract_ts_crashlogs(self) -> Generator[dict, None, None]: try: @@ -288,7 +291,7 @@ def __extract_ts_crashlogs(self) -> Generator[dict, None, None]: # skip bug_type fields pass except Exception as e: - print(f"ERROR while extracting timestamp from crashlog. Reason {str(e)}") + logger.error(f"ERROR while extracting timestamp from crashlog. Reason {str(e)}") def execute(self): # Get all the functions that start with '__extract_ts_' diff --git a/src/sysdiagnose/analysers/yarascan.py b/src/sysdiagnose/analysers/yarascan.py index e2a8b13..28433ce 100644 --- a/src/sysdiagnose/analysers/yarascan.py +++ b/src/sysdiagnose/analysers/yarascan.py @@ -3,8 +3,11 @@ import glob import threading import queue +import logging from sysdiagnose.utils.base import BaseAnalyserInterface +logger = logging.getLogger(__name__) + # These are the commonly used external variables that can be used in the YARA rules externals = { @@ -67,7 +70,7 @@ def execute(self): results['matches'] = matches if len(results['errors']) > 0: - print("Scan finished with errors. Review the results") + logger.error("Scan finished with errors. Review the results") return results @@ -78,17 +81,17 @@ def get_valid_yara_rule_files(self) -> tuple[list, list]: for rule_file in rule_files_to_test: if not os.path.isfile(rule_file): continue - print(f"Loading YARA rule: {rule_file}") + logger.info(f"Loading YARA rule: {rule_file}") try: yara.compile(filepath=rule_file, externals=externals) # if we reach this point, the rule is valid rule_files_validated.append(rule_file) except yara.SyntaxError as e: - print(f"Error compiling rule {rule_file}: {str(e)}") + logger.error(f"Error compiling rule {rule_file}: {str(e)}") errors.append(f"Error compiling rule {rule_file}: {str(e)}") continue except yara.Error as e: - print(f"Error compiling rule {rule_file}: {str(e)}") + logger.error(f"Error compiling rule {rule_file}: {str(e)}") errors.append(f"Error loading rule {rule_file}: {str(e)}") continue @@ -109,7 +112,7 @@ def scan_directory(directories: list, rule_filepaths: dict, ignore_files: list, for ignore_folder in ignore_folders: if root.startswith(ignore_folder): stop = True - print(f"Skipping folder: {root}") + logger.info(f"Skipping folder: {root}") continue if stop: continue @@ -119,7 +122,7 @@ def scan_directory(directories: list, rule_filepaths: dict, ignore_files: list, for ignore_file in ignore_files: if file_full_path.startswith(ignore_file): stop = True - print(f"Skipping file: {file_full_path}") + logger.info(f"Skipping file: {file_full_path}") continue if stop: continue @@ -131,13 +134,13 @@ def consumer(): rules = yara.compile(filepaths=rule_filepaths, externals=externals) while True: - print(f"Consumer thread seeing {file_queue.qsize()} files in queue, and taking one") + logger.info(f"Consumer thread seeing {file_queue.qsize()} files in queue, and taking one") file_path = file_queue.get() if file_path is None: - print("Consumer thread exiting") + logger.info("Consumer thread exiting") break - print(f"Scanning file: {file_path}") + logger.info(f"Scanning file: {file_path}") # set the externals for this file - massive slowdown # externals_local = externals.copy() # externals_local['filename'] = file diff --git a/src/sysdiagnose/parsers/crashlogs.py b/src/sysdiagnose/parsers/crashlogs.py index 5bb5771..3894fc7 100644 --- a/src/sysdiagnose/parsers/crashlogs.py +++ b/src/sysdiagnose/parsers/crashlogs.py @@ -4,8 +4,11 @@ import re import json from datetime import datetime, timezone +import logging # from pycrashreport.crash_report import get_crash_report_from_file +logger = logging.getLogger(__name__) + class CrashLogsParser(BaseParserInterface): ''' @@ -45,7 +48,7 @@ def execute(self) -> list | dict: result = [] seen = set() for file in files: - print(f"Processing file: {file}") + logger.info(f"Processing file: {file}") if file.endswith('crashes_and_spins.log'): result.extend(CrashLogsParser.parse_summary_file(file)) elif os.path.basename(file).startswith('.'): @@ -60,7 +63,7 @@ def execute(self) -> list | dict: seen.add(ips_hash) result.append(ips) except Exception as e: - print(f"Skipping file due to error {file}: {e}") + logger.warning(f"Skipping file due to error {file}: {e}") return result def parse_ips_file(path: str) -> list | dict: @@ -127,7 +130,7 @@ def parse_ips_file(path: str) -> list | dict: return result def parse_summary_file(path: str) -> list | dict: - print(f"Parsing summary file: {path}") + logger.info(f"Parsing summary file: {path}") result = [] with open(path, 'r') as f: for line in f: diff --git a/src/sysdiagnose/parsers/logarchive.py b/src/sysdiagnose/parsers/logarchive.py index f059f85..b25cd85 100644 --- a/src/sysdiagnose/parsers/logarchive.py +++ b/src/sysdiagnose/parsers/logarchive.py @@ -16,7 +16,9 @@ import sys import tempfile import shutil +import logging +logger = logging.getLogger(__name__) # --------------------------------------------# # On 2023-04-13: using ndjson instead of json to avoid parsing issues. @@ -200,7 +202,7 @@ def parse_folder_to_file(input_folder: str, output_file: str) -> bool: LogarchiveParser.__convert_using_unifiedlogparser(input_folder, output_file) return True except IndexError: - print('Error: No system_logs.logarchive/ folder found in logs/ directory') + logger.error('Error: No system_logs.logarchive/ folder found in logs/ directory') return False def __convert_using_native_logparser(input_folder: str, output_file: str) -> list: @@ -214,15 +216,16 @@ def __convert_using_native_logparser(input_folder: str, output_file: str) -> lis entry_json = LogarchiveParser.convert_entry_to_unifiedlog_format(json.loads(line)) f_out.write(json.dumps(entry_json) + '\n') except json.JSONDecodeError as e: - print(f"WARNING: error parsing JSON {line}: {str(e)}") + logger.warning(f"WARNING: error parsing JSON {line}: {str(e)}") except KeyError: # last line of log does not contain 'time' field, nor the rest of the data. # so just ignore it and all the rest. # last line looks like {'count':xyz, 'finished':1} + logger.debug(f"Looks like we arrive to the end of the file: {line}") break def __convert_using_unifiedlogparser(input_folder: str, output_file: str) -> list: - print('WARNING: using Mandiant UnifiedLogReader to parse logs, results will be less reliable than on OS X') + logger.warning('WARNING: using Mandiant UnifiedLogReader to parse logs, results will be less reliable than on OS X') # run the conversion tool, saving to a temp folder # read the created file/files, add timestamp # sort based on time @@ -232,7 +235,7 @@ def __convert_using_unifiedlogparser(input_folder: str, output_file: str) -> lis try: subprocess.check_output(cmd_parsing_linux_test, universal_newlines=True) except FileNotFoundError: - print('ERROR: UnifiedLogReader not found, please install it. See README.md for more information.') + logger.error('ERROR: UnifiedLogReader not found, please install it. See README.md for more information.') return # really run the tool now @@ -250,7 +253,7 @@ def __convert_using_unifiedlogparser(input_folder: str, output_file: str) -> lis entry_json = LogarchiveParser.convert_entry_to_unifiedlog_format(json.loads(line)) entries.append(entry_json) except json.JSONDecodeError as e: - print(f"WARNING: error parsing JSON {fname_reading}: {str(e)}") + logger.warning(f"WARNING: error parsing JSON {fname_reading}: {str(e)}") # tempfolder is cleaned automatically after the block # sort the data as it's not sorted by default, and we need sorted data for other analysers diff --git a/src/sysdiagnose/parsers/powerlogs.py b/src/sysdiagnose/parsers/powerlogs.py index 28bccce..b33a9da 100644 --- a/src/sysdiagnose/parsers/powerlogs.py +++ b/src/sysdiagnose/parsers/powerlogs.py @@ -9,6 +9,9 @@ import os from sysdiagnose.utils.base import BaseParserInterface from datetime import datetime, timezone +import logging + +logger = logging.getLogger(__name__) class PowerLogsParser(BaseParserInterface): @@ -56,8 +59,8 @@ def execute(self) -> list: # skip "None" values and such pass - print("Skipped the following tables as there are not timestamps:") - [print(f" {table}") for table in skipped] + logger.warning("Skipped the following tables as there are not timestamps:") + [logger.warning(f" {table}") for table in skipped] return result def parse_file_to_json(path: str) -> dict: diff --git a/src/sysdiagnose/parsers/ps.py b/src/sysdiagnose/parsers/ps.py index 684334f..e60e2b7 100644 --- a/src/sysdiagnose/parsers/ps.py +++ b/src/sysdiagnose/parsers/ps.py @@ -16,6 +16,9 @@ import os import re import sys +import logging + +logger = logging.getLogger(__name__) class PsParser(BaseParserInterface): @@ -64,7 +67,7 @@ def parse_file(filename): result.append(row) return result except Exception as e: - print(f"Could not parse ps.txt: {str(e)}") + logger.error(f"Could not parse ps.txt: {str(e)}") return [] def exclude_known_goods(processes: dict, known_good: dict) -> list[dict]: @@ -99,7 +102,7 @@ def export_to_json(processes, filename="./ps.json"): with open(filename, "w") as fd: fd.write(json_ps) except Exception as e: - print(f"Impossible to dump the processes to {filename}. Reason: {str(e)}\n") + logger.error(f"Impossible to dump the processes to {filename}. Reason: {str(e)}\n") """ diff --git a/src/sysdiagnose/parsers/security_sysdiagnose.py b/src/sysdiagnose/parsers/security_sysdiagnose.py index 0739c0c..8e5f3ad 100644 --- a/src/sysdiagnose/parsers/security_sysdiagnose.py +++ b/src/sysdiagnose/parsers/security_sysdiagnose.py @@ -1,6 +1,9 @@ import os import re from sysdiagnose.utils.base import BaseParserInterface +import logging + +logger = logging.getLogger(__name__) # TODO make a security sysdiagnose analyser exporting in time based jsonl for timeline. # - client_trust: date @@ -86,7 +89,7 @@ def process_buffer(buffer: list, section: str, json_result: dict): if function_name in dir(SecuritySysdiagnoseParser): getattr(SecuritySysdiagnoseParser, function_name)(buffer, json_result) else: - print(f"ERROR: Function {function_name} not found in the SecuritySysdiagnoseParser class.") + logger.error(f"ERROR: Function {function_name} not found in the SecuritySysdiagnoseParser class.") json_result['errors'].append(f"Cannot parse section {function_name} as it is unknown. Parser needs to be extended.") def process_buffer_circle(buffer: list, json_result: dict): diff --git a/src/sysdiagnose/parsers/wifisecurity.py b/src/sysdiagnose/parsers/wifisecurity.py index af6b0e8..efee347 100644 --- a/src/sysdiagnose/parsers/wifisecurity.py +++ b/src/sysdiagnose/parsers/wifisecurity.py @@ -6,6 +6,9 @@ import os from sysdiagnose.utils.base import BaseParserInterface +import logging + +logger = logging.getLogger(__name__) class WifiSecurityParser(BaseParserInterface): @@ -51,15 +54,15 @@ def parse_file(path: str) -> list | dict: for line in f: if ' : ' in line: key, value = line.split(" : ") - # print(f"key: {key.strip()}, value: {value.strip()}") + logger.debug(f"key: {key.strip()}, value: {value.strip()}") element[key.strip()] = value.strip() elif element: entries.append(element) - # print(f"appending {element}") + logger.debug(f"appending {element}") element = {} except IndexError: return {'error': 'No WiFi/security.txt file present'} except Exception as e: - print(f"Could not parse: {path}. Reason: {str(e)}") + logger.error(f"Could not parse: {path}. Reason: {str(e)}") return {'error': f'Could not parse: {path}. Reason: {str(e)}'} return entries diff --git a/src/sysdiagnose/utils/base.py b/src/sysdiagnose/utils/base.py index 9535dab..179d217 100644 --- a/src/sysdiagnose/utils/base.py +++ b/src/sysdiagnose/utils/base.py @@ -6,6 +6,9 @@ from datetime import datetime import re from functools import cached_property +import logging + +logger = logging.getLogger(__name__) class SysdiagnoseConfig: @@ -47,7 +50,7 @@ def __init__(self, module_filename: str, config: SysdiagnoseConfig, case_id: str os.makedirs(self.case_parsed_data_folder, exist_ok=True) if not os.path.isdir(self.case_data_folder): - print(f"Case {case_id} does not exist", file=sys.stderr) + logger.error(f"Case {case_id} does not exist") raise FileNotFoundError(f"Case {case_id} does not exist") self.output_file = os.path.join(self.case_parsed_data_folder, self.module_name + '.' + self.format) From b4bd4ef0f30bb66ecc28d88ca9e15290855032d0 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Mon, 14 Oct 2024 10:38:47 +0200 Subject: [PATCH 02/20] #11 adding logging to: 1. another demo parser?? 2. get_result method when using cached results --- src/sysdiagnose/parsers/demo_parser.py | 6 +++++- src/sysdiagnose/utils/base.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/sysdiagnose/parsers/demo_parser.py b/src/sysdiagnose/parsers/demo_parser.py index b164d5e..5e1dd4c 100644 --- a/src/sysdiagnose/parsers/demo_parser.py +++ b/src/sysdiagnose/parsers/demo_parser.py @@ -3,6 +3,9 @@ import os import json from sysdiagnose.utils.base import BaseParserInterface +import logging + +logger = logging.getLogger(__name__) class DemoParser(BaseParserInterface): @@ -25,6 +28,7 @@ def execute(self) -> list | dict: json_object = {} log_files = self.get_log_files() for log_file in log_files: + logger.info(f"Processing file {log_file}") pass return json_object @@ -44,5 +48,5 @@ def parse_path_to_folder(self, path: str, output_folder: str) -> bool: json.dump(json_object, f) return True except Exception as e: - print(f"Error: {e}") + logger.error(f"Error: {e}") return False diff --git a/src/sysdiagnose/utils/base.py b/src/sysdiagnose/utils/base.py index 179d217..e7e94fb 100644 --- a/src/sysdiagnose/utils/base.py +++ b/src/sysdiagnose/utils/base.py @@ -112,6 +112,7 @@ def get_result(self, force: bool = False) -> list | dict: if self._result is None: if self.output_exists(): + logger.info("Using cached results") # load existing output with open(self.output_file, 'r') as f: if self.format == 'json': From 0e487dbc8835140e054a5adcf8d0ffef52fd3f04 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Tue, 15 Oct 2024 09:39:18 +0200 Subject: [PATCH 03/20] #11. By default it always logs to file from INFO level. However, on Console only as per cmd parameter (configured by default to WARNING). File logging rules: - JSONL format. Note: Still breaks if double quotes added to the message. - File per execution (analyse, parse), per case. --- src/sysdiagnose/analysers/apps.py | 2 +- src/sysdiagnose/analysers/demo_analyser.py | 2 +- src/sysdiagnose/analysers/timesketch.py | 2 +- src/sysdiagnose/analysers/yarascan.py | 2 +- src/sysdiagnose/main.py | 86 ++++++++++++++++++- src/sysdiagnose/parsers/crashlogs.py | 2 +- src/sysdiagnose/parsers/demo_parser.py | 2 +- src/sysdiagnose/parsers/logarchive.py | 2 +- src/sysdiagnose/parsers/powerlogs.py | 2 +- src/sysdiagnose/parsers/ps.py | 2 +- .../parsers/security_sysdiagnose.py | 2 +- src/sysdiagnose/parsers/wifisecurity.py | 2 +- src/sysdiagnose/utils/base.py | 2 +- 13 files changed, 94 insertions(+), 16 deletions(-) diff --git a/src/sysdiagnose/analysers/apps.py b/src/sysdiagnose/analysers/apps.py index 10d0e38..e95fc9c 100644 --- a/src/sysdiagnose/analysers/apps.py +++ b/src/sysdiagnose/analysers/apps.py @@ -11,7 +11,7 @@ from sysdiagnose.parsers.itunesstore import iTunesStoreParser from sysdiagnose.parsers.logarchive import LogarchiveParser -logger = logging.getLogger(__name__) +logger = logging.getLogger('sysdiagnose') class AppsAnalyser(BaseAnalyserInterface): diff --git a/src/sysdiagnose/analysers/demo_analyser.py b/src/sysdiagnose/analysers/demo_analyser.py index 3981f35..740c239 100644 --- a/src/sysdiagnose/analysers/demo_analyser.py +++ b/src/sysdiagnose/analysers/demo_analyser.py @@ -6,7 +6,7 @@ import logging from sysdiagnose.utils.base import BaseAnalyserInterface -logger = logging.getLogger(__name__) +logger = logging.getLogger('sysdiagnose') class DemoAnalyser(BaseAnalyserInterface): diff --git a/src/sysdiagnose/analysers/timesketch.py b/src/sysdiagnose/analysers/timesketch.py index 39e77e2..3d01c6e 100644 --- a/src/sysdiagnose/analysers/timesketch.py +++ b/src/sysdiagnose/analysers/timesketch.py @@ -19,7 +19,7 @@ from sysdiagnose.utils.base import BaseAnalyserInterface import logging -logger = logging.getLogger(__name__) +logger = logging.getLogger('sysdiagnose') class TimesketchAnalyser(BaseAnalyserInterface): diff --git a/src/sysdiagnose/analysers/yarascan.py b/src/sysdiagnose/analysers/yarascan.py index 28433ce..909c362 100644 --- a/src/sysdiagnose/analysers/yarascan.py +++ b/src/sysdiagnose/analysers/yarascan.py @@ -6,7 +6,7 @@ import logging from sysdiagnose.utils.base import BaseAnalyserInterface -logger = logging.getLogger(__name__) +logger = logging.getLogger('sysdiagnose') # These are the commonly used external variables that can be used in the YARA rules diff --git a/src/sysdiagnose/main.py b/src/sysdiagnose/main.py index 8674beb..8b0fa40 100644 --- a/src/sysdiagnose/main.py +++ b/src/sysdiagnose/main.py @@ -3,7 +3,12 @@ import sys from sysdiagnose import Sysdiagnose import os +import json +import logging +import time +logger = logging.getLogger('sysdiagnose') +logger.setLevel(logging.INFO) def parse_parser_error(message): sd = Sysdiagnose() @@ -22,6 +27,30 @@ def analyse_parser_error(message): sd.print_analysers_list() sys.exit(2) +def get_console_logger(level: str) -> logging.StreamHandler: + # Format + fmt_console = logging.Formatter('[%(levelname)s] [%(module)s] %(message)s') + # Console handler + ch = logging.StreamHandler() + ch.setLevel(level) + ch.setFormatter(fmt_console) + + return ch + +def get_json_logger(filename: str) -> logging.FileHandler: + # https://stackoverflow.com/questions/50144628/python-logging-into-file-as-a-dictionary-or-json + fmt_json = logging.Formatter( + json.dumps({ + 'timestamp':'%(asctime)s', + 'level': '%(levelname)s', + 'module': '%(module)s', + 'message': '%(message)s'})) + # File handler + fh = logging.FileHandler(filename) + fh.setLevel(logging.INFO) + fh.setFormatter(fmt_json) + + return fh def main(): parser = argparse.ArgumentParser( @@ -41,11 +70,13 @@ def main(): # parse mode parse_parser = subparsers.add_parser('parse', help='Parse a case') parse_parser.add_argument('parser', help='Name of the parser, "all" for running all parsers, or "list" for a listing of all parsers') + parse_parser.add_argument('--log', default='WARNING', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Enables logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)') parse_parser.error = parse_parser_error # analyse mode analyse_parser = subparsers.add_parser('analyse', help='Analyse a case') analyse_parser.add_argument('analyser', help='Name of the analyser, "all" for running all analysers, or "list" for a listing of all analysers') + analyse_parser.add_argument('--log', default='WARNING', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Enables logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)') analyse_parser.error = analyse_parser_error # list mode @@ -123,14 +154,37 @@ def main(): else: case_ids = [args.case_id] + # Handle console logging + log_level = args.log.upper() + logger.addHandler(get_console_logger(log_level)) + + logger2file = None for case_id in case_ids: + # Handle file logging + time_str = time.strftime("%Y%m%dT%H%M%S") + filename = f"{time_str}-parse-{case_id}.jsonl" + folder = sd.config.get_case_parsed_data_folder(case_id) + # https://stackoverflow.com/questions/13839554/how-to-change-filehandle-with-python-logging-on-the-fly-with-different-classes-a + if logger2file is None: + logger2file = get_json_logger(os.path.join(folder, filename)) + logger.addHandler(logger2file) + else: + logger2file.close() + logger2file.setStream(open(os.path.join(folder, filename), 'a')) + print(f"Case ID: {case_id}") for parser in parsers_list: print(f"Parser '{parser}' for case ID '{case_id}'") + logger.info(f"Parser '{parser}' started") try: - sd.parse(parser, case_id) + result = sd.parse(parser, case_id) + result_str = "successfully" if result == 0 else "with errors" + logger.info(f"Parser '{parser}' finished {result_str}") except NotImplementedError: - print(f"Parser '{parser}' is not implemented yet, skipping") + logger.warning(f"Parser '{parser}' is not implemented yet, skipping") + + if not logger2file is None: + logger2file.close() elif args.mode == 'analyse': # Handle analyse mode @@ -155,14 +209,38 @@ def main(): else: case_ids = [args.case_id] + # Handle console logging + log_level = args.log.upper() + logger.addHandler(get_console_logger(log_level)) + + logger2file = None for case_id in case_ids: + # Handle file logging + time_str = time.strftime("%Y%m%dT%H%M%S") + filename = f"{time_str}-analyse-{case_id}.jsonl" + folder = sd.config.get_case_parsed_data_folder(case_id) + # https://stackoverflow.com/questions/13839554/how-to-change-filehandle-with-python-logging-on-the-fly-with-different-classes-a + if logger2file is None: + logger2file = get_json_logger(os.path.join(folder, filename)) + logger.addHandler(logger2file) + else: + logger2file.close() + logger2file.setStream(open(os.path.join(folder, filename), 'a')) + print(f"Case ID: {case_id}") for analyser in analysers_list: print(f" Analyser '{analyser}' for case ID '{case_id}'") + logger.info(f"Analyser '{analyser}' started") try: - sd.analyse(analyser, case_id) + result = sd.analyse(analyser, case_id) + result_str = "successfully" if result == 0 else "with errors" + logger.info(f"Analyser '{analyser}' finished {result_str}") except NotImplementedError: - print(f"Analyser '{analyser}' is not implemented yet, skipping") + logger.warning(f"Analyser '{analyser}' is not implemented yet, skipping") + + if not logger2file is None: + logger2file.close() + else: parser.print_help() diff --git a/src/sysdiagnose/parsers/crashlogs.py b/src/sysdiagnose/parsers/crashlogs.py index 3894fc7..137915e 100644 --- a/src/sysdiagnose/parsers/crashlogs.py +++ b/src/sysdiagnose/parsers/crashlogs.py @@ -7,7 +7,7 @@ import logging # from pycrashreport.crash_report import get_crash_report_from_file -logger = logging.getLogger(__name__) +logger = logging.getLogger('sysdiagnose') class CrashLogsParser(BaseParserInterface): diff --git a/src/sysdiagnose/parsers/demo_parser.py b/src/sysdiagnose/parsers/demo_parser.py index 5e1dd4c..4feca32 100644 --- a/src/sysdiagnose/parsers/demo_parser.py +++ b/src/sysdiagnose/parsers/demo_parser.py @@ -5,7 +5,7 @@ from sysdiagnose.utils.base import BaseParserInterface import logging -logger = logging.getLogger(__name__) +logger = logging.getLogger('sysdiagnose') class DemoParser(BaseParserInterface): diff --git a/src/sysdiagnose/parsers/logarchive.py b/src/sysdiagnose/parsers/logarchive.py index b25cd85..2d87df0 100644 --- a/src/sysdiagnose/parsers/logarchive.py +++ b/src/sysdiagnose/parsers/logarchive.py @@ -18,7 +18,7 @@ import shutil import logging -logger = logging.getLogger(__name__) +logger = logging.getLogger('sysdiagnose') # --------------------------------------------# # On 2023-04-13: using ndjson instead of json to avoid parsing issues. diff --git a/src/sysdiagnose/parsers/powerlogs.py b/src/sysdiagnose/parsers/powerlogs.py index b33a9da..ee0ae2d 100644 --- a/src/sysdiagnose/parsers/powerlogs.py +++ b/src/sysdiagnose/parsers/powerlogs.py @@ -11,7 +11,7 @@ from datetime import datetime, timezone import logging -logger = logging.getLogger(__name__) +logger = logging.getLogger('sysdiagnose') class PowerLogsParser(BaseParserInterface): diff --git a/src/sysdiagnose/parsers/ps.py b/src/sysdiagnose/parsers/ps.py index e60e2b7..1e2c2bf 100644 --- a/src/sysdiagnose/parsers/ps.py +++ b/src/sysdiagnose/parsers/ps.py @@ -18,7 +18,7 @@ import sys import logging -logger = logging.getLogger(__name__) +logger = logging.getLogger('sysdiagnose') class PsParser(BaseParserInterface): diff --git a/src/sysdiagnose/parsers/security_sysdiagnose.py b/src/sysdiagnose/parsers/security_sysdiagnose.py index 8e5f3ad..4485eac 100644 --- a/src/sysdiagnose/parsers/security_sysdiagnose.py +++ b/src/sysdiagnose/parsers/security_sysdiagnose.py @@ -3,7 +3,7 @@ from sysdiagnose.utils.base import BaseParserInterface import logging -logger = logging.getLogger(__name__) +logger = logging.getLogger('sysdiagnose') # TODO make a security sysdiagnose analyser exporting in time based jsonl for timeline. # - client_trust: date diff --git a/src/sysdiagnose/parsers/wifisecurity.py b/src/sysdiagnose/parsers/wifisecurity.py index efee347..5122464 100644 --- a/src/sysdiagnose/parsers/wifisecurity.py +++ b/src/sysdiagnose/parsers/wifisecurity.py @@ -8,7 +8,7 @@ from sysdiagnose.utils.base import BaseParserInterface import logging -logger = logging.getLogger(__name__) +logger = logging.getLogger('sysdiagnose') class WifiSecurityParser(BaseParserInterface): diff --git a/src/sysdiagnose/utils/base.py b/src/sysdiagnose/utils/base.py index e7e94fb..3ca4017 100644 --- a/src/sysdiagnose/utils/base.py +++ b/src/sysdiagnose/utils/base.py @@ -8,7 +8,7 @@ from functools import cached_property import logging -logger = logging.getLogger(__name__) +logger = logging.getLogger('sysdiagnose') class SysdiagnoseConfig: From 4c625183fe312e03529a53f2c450dec5cfb603c0 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Tue, 15 Oct 2024 11:36:25 +0200 Subject: [PATCH 04/20] #11 1. Use of proper JSON formatter 2. Use of extra and exc_info fields when logging. The usage of extra fields needs to be standardised --- requirements.txt | 3 ++- src/sysdiagnose/analysers/demo_analyser.py | 2 +- src/sysdiagnose/analysers/timesketch.py | 28 +++++++++++----------- src/sysdiagnose/analysers/yarascan.py | 4 ++-- src/sysdiagnose/main.py | 28 ++++++++++++---------- src/sysdiagnose/parsers/crashlogs.py | 2 +- src/sysdiagnose/parsers/demo_parser.py | 4 ++-- src/sysdiagnose/parsers/logarchive.py | 8 +++---- src/sysdiagnose/parsers/ps.py | 4 ++-- src/sysdiagnose/parsers/wifisecurity.py | 2 +- src/sysdiagnose/utils/jsonlogger.py | 9 +++++++ 11 files changed, 54 insertions(+), 40 deletions(-) create mode 100644 src/sysdiagnose/utils/jsonlogger.py diff --git a/requirements.txt b/requirements.txt index d270cf4..650ba48 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ pandas==2.2.2 numpy==2.0.0 nska-deserialize==1.5.1 yara-python==4.5.1 -# pycrashreport==1.2.4 \ No newline at end of file +# pycrashreport==1.2.4 +python-json-logger==2.0.7 \ No newline at end of file diff --git a/src/sysdiagnose/analysers/demo_analyser.py b/src/sysdiagnose/analysers/demo_analyser.py index 740c239..6a9c853 100644 --- a/src/sysdiagnose/analysers/demo_analyser.py +++ b/src/sysdiagnose/analysers/demo_analyser.py @@ -24,7 +24,7 @@ def execute(self): By doing so you will get the parser output even if it never ran before. """ print("DO SOMETHING HERE") - logger.info("log something here") + logger.info("log something here", extra={'analyser': __name__}) # json_data = p_fooparser.get_result() diff --git a/src/sysdiagnose/analysers/timesketch.py b/src/sysdiagnose/analysers/timesketch.py index 3d01c6e..548f7f5 100644 --- a/src/sysdiagnose/analysers/timesketch.py +++ b/src/sysdiagnose/analysers/timesketch.py @@ -53,7 +53,7 @@ def __extract_ts_mobileactivation(self) -> Generator[dict, None, None]: pass yield ts_event except Exception as e: - logger.error(f"ERROR while extracting timestamp from mobileactivation file. Reason: {str(e)}") + logger.exception("ERROR while extracting timestamp from mobileactivation file.") def __extract_ts_powerlogs(self) -> Generator[dict, None, None]: try: @@ -94,7 +94,7 @@ def __extract_ts_powerlogs(self) -> Generator[dict, None, None]: pass except Exception as e: - logger.error(f"ERROR while extracting timestamp from powerlogs. Reason: {str(e)}") + logger.exception("ERROR while extracting timestamp from powerlogs.") def __extract_ts_swcutil(self) -> Generator[dict, None, None]: try: @@ -117,7 +117,7 @@ def __extract_ts_swcutil(self) -> Generator[dict, None, None]: logger.warning(f"Error while extracting timestamp from {(service['Service'])} - {(service['App ID'])}. Record not inserted.") pass except Exception as e: - logger.error(f"ERROR while extracting timestamp from swcutil. Reason {str(e)}") + logger.exception("ERROR while extracting timestamp from swcutil.") def __extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]: try: @@ -138,7 +138,7 @@ def __extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]: } yield ts_event except Exception as e: - logger.error(f"ERROR while extracting timestamp from accessibility_tcc. Reason {str(e)}") + logger.exception("ERROR while extracting timestamp from accessibility_tcc.") def __extract_ts_shutdownlogs(self) -> Generator[dict, None, None]: try: @@ -156,9 +156,9 @@ def __extract_ts_shutdownlogs(self) -> Generator[dict, None, None]: } yield ts_event except Exception as e: - logger.warning(f"WARNING: shutdownlog entry not parsed: {event}. Reason: {str(e)}") + logger.warning(f"WARNING: shutdownlog entry not parsed: {event}", exc_info=True) except Exception as e: - logger.error(f"ERROR while extracting timestamp from shutdownlog. Reason: {str(e)}") + logger.exception("ERROR while extracting timestamp from shutdownlog.") def __extract_ts_logarchive(self) -> Generator[dict, None, None]: try: @@ -176,9 +176,9 @@ def __extract_ts_logarchive(self) -> Generator[dict, None, None]: } yield ts_event except KeyError as e: - logger.warning(f"WARNING: trace not parsed: {event}. Error {e}") + logger.warning(f"WARNING: trace not parsed: {event}.", exc_info=True) except Exception as e: - logger.error(f"ERROR while extracting timestamp from logarchive. Reason: {str(e)}") + logger.exception(f"ERROR while extracting timestamp from logarchive.") def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]: try: @@ -209,7 +209,7 @@ def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]: } yield ts_event except Exception as e: - logger.error(f"ERROR while extracting timestamp from wifisecurity. Reason {str(e)}") + logger.exception("ERROR while extracting timestamp from wifisecurity.") def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: try: @@ -231,7 +231,7 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: yield ts_event except KeyError: # some wifi networks do not have an AddedAt field - logger.warning(f"Error while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") + logger.warning(f"Error while extracting timestamp from {ssid}. Record not inserted.", exc_info=True) pass # WIFI modified @@ -248,7 +248,7 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: yield ts_event except KeyError: # some wifi networks do not have an UpdatedAt field - logger.warning(f"Error while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") + logger.warning(f"Error while extracting timestamp from {ssid}.Record not inserted.", exc_info=True) pass # Password for wifi modified @@ -265,10 +265,10 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: yield ts_event except KeyError: # some wifi networks do not have a password modification date - logger.warning(f"Error while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") + logger.warning(f"Error while extracting timestamp from {ssid}. Record not inserted.", exc_info=True) pass except Exception as e: - logger.error(f"ERROR while extracting timestamp from wifi_known_networks. Reason {str(e)}") + logger.exception("ERROR while extracting timestamp from wifi_known_networks.") def __extract_ts_crashlogs(self) -> Generator[dict, None, None]: try: @@ -291,7 +291,7 @@ def __extract_ts_crashlogs(self) -> Generator[dict, None, None]: # skip bug_type fields pass except Exception as e: - logger.error(f"ERROR while extracting timestamp from crashlog. Reason {str(e)}") + logger.exception("ERROR while extracting timestamp from crashlog.") def execute(self): # Get all the functions that start with '__extract_ts_' diff --git a/src/sysdiagnose/analysers/yarascan.py b/src/sysdiagnose/analysers/yarascan.py index 909c362..a8648bc 100644 --- a/src/sysdiagnose/analysers/yarascan.py +++ b/src/sysdiagnose/analysers/yarascan.py @@ -87,11 +87,11 @@ def get_valid_yara_rule_files(self) -> tuple[list, list]: # if we reach this point, the rule is valid rule_files_validated.append(rule_file) except yara.SyntaxError as e: - logger.error(f"Error compiling rule {rule_file}: {str(e)}") + logger.exception(f"Error compiling rule {rule_file}") errors.append(f"Error compiling rule {rule_file}: {str(e)}") continue except yara.Error as e: - logger.error(f"Error compiling rule {rule_file}: {str(e)}") + logger.exception(f"Error compiling rule {rule_file}") errors.append(f"Error loading rule {rule_file}: {str(e)}") continue diff --git a/src/sysdiagnose/main.py b/src/sysdiagnose/main.py index 8b0fa40..c8d3548 100644 --- a/src/sysdiagnose/main.py +++ b/src/sysdiagnose/main.py @@ -6,6 +6,7 @@ import json import logging import time +from sysdiagnose.utils.jsonlogger import SysdiagnoseJsonFormatter logger = logging.getLogger('sysdiagnose') logger.setLevel(logging.INFO) @@ -39,12 +40,15 @@ def get_console_logger(level: str) -> logging.StreamHandler: def get_json_logger(filename: str) -> logging.FileHandler: # https://stackoverflow.com/questions/50144628/python-logging-into-file-as-a-dictionary-or-json - fmt_json = logging.Formatter( - json.dumps({ - 'timestamp':'%(asctime)s', - 'level': '%(levelname)s', - 'module': '%(module)s', - 'message': '%(message)s'})) + # fmt_json = logging.Formatter( + # json.dumps({ + # 'timestamp':'%(asctime)s', + # 'level': '%(levelname)s', + # 'module': '%(module)s', + # 'message': '%(message)s'})) + fmt_json = SysdiagnoseJsonFormatter( + fmt='%(asctime)s %(levelname)s %(module)s %(message)s', + rename_fields={'asctime':'timestamp'}) # File handler fh = logging.FileHandler(filename) fh.setLevel(logging.INFO) @@ -175,13 +179,13 @@ def main(): print(f"Case ID: {case_id}") for parser in parsers_list: print(f"Parser '{parser}' for case ID '{case_id}'") - logger.info(f"Parser '{parser}' started") + logger.info(f"Parser '{parser}' started", extra={'parser': parser}) try: result = sd.parse(parser, case_id) result_str = "successfully" if result == 0 else "with errors" - logger.info(f"Parser '{parser}' finished {result_str}") + logger.info(f"Parser '{parser}' finished {result_str}", extra={'parser': parser, 'result': result}) except NotImplementedError: - logger.warning(f"Parser '{parser}' is not implemented yet, skipping") + logger.warning(f"Parser '{parser}' is not implemented yet, skipping", extra={'parser': parser}) if not logger2file is None: logger2file.close() @@ -230,13 +234,13 @@ def main(): print(f"Case ID: {case_id}") for analyser in analysers_list: print(f" Analyser '{analyser}' for case ID '{case_id}'") - logger.info(f"Analyser '{analyser}' started") + logger.info(f"Analyser '{analyser}' started", extra={'analyser': analyser}) try: result = sd.analyse(analyser, case_id) result_str = "successfully" if result == 0 else "with errors" - logger.info(f"Analyser '{analyser}' finished {result_str}") + logger.info(f"Analyser '{analyser}' finished {result_str}", extra={'analyser': analyser, 'result': result}) except NotImplementedError: - logger.warning(f"Analyser '{analyser}' is not implemented yet, skipping") + logger.warning(f"Analyser '{analyser}' is not implemented yet, skipping", extra={'analyser': analyser}) if not logger2file is None: logger2file.close() diff --git a/src/sysdiagnose/parsers/crashlogs.py b/src/sysdiagnose/parsers/crashlogs.py index 137915e..03a84df 100644 --- a/src/sysdiagnose/parsers/crashlogs.py +++ b/src/sysdiagnose/parsers/crashlogs.py @@ -63,7 +63,7 @@ def execute(self) -> list | dict: seen.add(ips_hash) result.append(ips) except Exception as e: - logger.warning(f"Skipping file due to error {file}: {e}") + logger.warning(f"Skipping file due to error {file}", exc_info=True) return result def parse_ips_file(path: str) -> list | dict: diff --git a/src/sysdiagnose/parsers/demo_parser.py b/src/sysdiagnose/parsers/demo_parser.py index 4feca32..1c2d662 100644 --- a/src/sysdiagnose/parsers/demo_parser.py +++ b/src/sysdiagnose/parsers/demo_parser.py @@ -28,7 +28,7 @@ def execute(self) -> list | dict: json_object = {} log_files = self.get_log_files() for log_file in log_files: - logger.info(f"Processing file {log_file}") + logger.info(f"Processing file {log_file}", extra={'parser': __name__, 'log_file': log_file}) pass return json_object @@ -48,5 +48,5 @@ def parse_path_to_folder(self, path: str, output_folder: str) -> bool: json.dump(json_object, f) return True except Exception as e: - logger.error(f"Error: {e}") + logger.exception("Error") return False diff --git a/src/sysdiagnose/parsers/logarchive.py b/src/sysdiagnose/parsers/logarchive.py index 2d87df0..47fe139 100644 --- a/src/sysdiagnose/parsers/logarchive.py +++ b/src/sysdiagnose/parsers/logarchive.py @@ -202,7 +202,7 @@ def parse_folder_to_file(input_folder: str, output_file: str) -> bool: LogarchiveParser.__convert_using_unifiedlogparser(input_folder, output_file) return True except IndexError: - logger.error('Error: No system_logs.logarchive/ folder found in logs/ directory') + logger.exception('Error: No system_logs.logarchive/ folder found in logs/ directory') return False def __convert_using_native_logparser(input_folder: str, output_file: str) -> list: @@ -216,7 +216,7 @@ def __convert_using_native_logparser(input_folder: str, output_file: str) -> lis entry_json = LogarchiveParser.convert_entry_to_unifiedlog_format(json.loads(line)) f_out.write(json.dumps(entry_json) + '\n') except json.JSONDecodeError as e: - logger.warning(f"WARNING: error parsing JSON {line}: {str(e)}") + logger.warning(f"WARNING: error parsing JSON {line}", exc_info=True) except KeyError: # last line of log does not contain 'time' field, nor the rest of the data. # so just ignore it and all the rest. @@ -235,7 +235,7 @@ def __convert_using_unifiedlogparser(input_folder: str, output_file: str) -> lis try: subprocess.check_output(cmd_parsing_linux_test, universal_newlines=True) except FileNotFoundError: - logger.error('ERROR: UnifiedLogReader not found, please install it. See README.md for more information.') + logger.exception('ERROR: UnifiedLogReader not found, please install it. See README.md for more information.') return # really run the tool now @@ -253,7 +253,7 @@ def __convert_using_unifiedlogparser(input_folder: str, output_file: str) -> lis entry_json = LogarchiveParser.convert_entry_to_unifiedlog_format(json.loads(line)) entries.append(entry_json) except json.JSONDecodeError as e: - logger.warning(f"WARNING: error parsing JSON {fname_reading}: {str(e)}") + logger.warning(f"WARNING: error parsing JSON {fname_reading}", exc_info=True) # tempfolder is cleaned automatically after the block # sort the data as it's not sorted by default, and we need sorted data for other analysers diff --git a/src/sysdiagnose/parsers/ps.py b/src/sysdiagnose/parsers/ps.py index 1e2c2bf..eae9b98 100644 --- a/src/sysdiagnose/parsers/ps.py +++ b/src/sysdiagnose/parsers/ps.py @@ -67,7 +67,7 @@ def parse_file(filename): result.append(row) return result except Exception as e: - logger.error(f"Could not parse ps.txt: {str(e)}") + logger.exception("Could not parse ps.txt") return [] def exclude_known_goods(processes: dict, known_good: dict) -> list[dict]: @@ -102,7 +102,7 @@ def export_to_json(processes, filename="./ps.json"): with open(filename, "w") as fd: fd.write(json_ps) except Exception as e: - logger.error(f"Impossible to dump the processes to {filename}. Reason: {str(e)}\n") + logger.exception(f"Impossible to dump the processes to {filename}") """ diff --git a/src/sysdiagnose/parsers/wifisecurity.py b/src/sysdiagnose/parsers/wifisecurity.py index 5122464..ec3fde6 100644 --- a/src/sysdiagnose/parsers/wifisecurity.py +++ b/src/sysdiagnose/parsers/wifisecurity.py @@ -63,6 +63,6 @@ def parse_file(path: str) -> list | dict: except IndexError: return {'error': 'No WiFi/security.txt file present'} except Exception as e: - logger.error(f"Could not parse: {path}. Reason: {str(e)}") + logger.exception(f"Could not parse: {path}") return {'error': f'Could not parse: {path}. Reason: {str(e)}'} return entries diff --git a/src/sysdiagnose/utils/jsonlogger.py b/src/sysdiagnose/utils/jsonlogger.py new file mode 100644 index 0000000..2036087 --- /dev/null +++ b/src/sysdiagnose/utils/jsonlogger.py @@ -0,0 +1,9 @@ +from pythonjsonlogger import jsonlogger +from datetime import datetime + + +class SysdiagnoseJsonFormatter(jsonlogger.JsonFormatter): + '''Custom JSON logger formatter ''' + # https://stackoverflow.com/questions/50873446/python-logger-output-dates-in-is8601-format + def formatTime(self, record, datefmt=None): + return datetime.fromtimestamp(record.created).astimezone().isoformat(timespec='milliseconds') From 32a287a6bb9850d54740d07064dfeef75993435f Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Tue, 15 Oct 2024 12:38:26 +0200 Subject: [PATCH 05/20] #11 time in microseconds --- src/sysdiagnose/utils/jsonlogger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sysdiagnose/utils/jsonlogger.py b/src/sysdiagnose/utils/jsonlogger.py index 2036087..c9e99f6 100644 --- a/src/sysdiagnose/utils/jsonlogger.py +++ b/src/sysdiagnose/utils/jsonlogger.py @@ -6,4 +6,4 @@ class SysdiagnoseJsonFormatter(jsonlogger.JsonFormatter): '''Custom JSON logger formatter ''' # https://stackoverflow.com/questions/50873446/python-logger-output-dates-in-is8601-format def formatTime(self, record, datefmt=None): - return datetime.fromtimestamp(record.created).astimezone().isoformat(timespec='milliseconds') + return datetime.fromtimestamp(record.created).astimezone().isoformat(timespec='microseconds') From e15ea9aa26013c147574815bef266ff2165307e8 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Tue, 15 Oct 2024 12:53:42 +0200 Subject: [PATCH 06/20] #11 fixing linting issues --- src/sysdiagnose/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sysdiagnose/main.py b/src/sysdiagnose/main.py index c8d3548..7f6478e 100644 --- a/src/sysdiagnose/main.py +++ b/src/sysdiagnose/main.py @@ -48,7 +48,7 @@ def get_json_logger(filename: str) -> logging.FileHandler: # 'message': '%(message)s'})) fmt_json = SysdiagnoseJsonFormatter( fmt='%(asctime)s %(levelname)s %(module)s %(message)s', - rename_fields={'asctime':'timestamp'}) + rename_fields={'asctime': 'timestamp'}) # File handler fh = logging.FileHandler(filename) fh.setLevel(logging.INFO) @@ -187,7 +187,7 @@ def main(): except NotImplementedError: logger.warning(f"Parser '{parser}' is not implemented yet, skipping", extra={'parser': parser}) - if not logger2file is None: + if logger2file is not None: logger2file.close() elif args.mode == 'analyse': @@ -242,7 +242,7 @@ def main(): except NotImplementedError: logger.warning(f"Analyser '{analyser}' is not implemented yet, skipping", extra={'analyser': analyser}) - if not logger2file is None: + if logger2file is not None: logger2file.close() else: From 3713c848100f5248a685d896a5e23893a7bf66d3 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Wed, 16 Oct 2024 08:53:21 +0200 Subject: [PATCH 07/20] #11 trying to address review comments on PR --- src/sysdiagnose/main.py | 37 ++------------------- src/sysdiagnose/parsers/powerlogs.py | 5 +-- src/sysdiagnose/utils/base.py | 4 +-- src/sysdiagnose/utils/jsonlogger.py | 9 ------ src/sysdiagnose/utils/logger.py | 48 ++++++++++++++++++++++++++++ 5 files changed, 53 insertions(+), 50 deletions(-) delete mode 100644 src/sysdiagnose/utils/jsonlogger.py create mode 100644 src/sysdiagnose/utils/logger.py diff --git a/src/sysdiagnose/main.py b/src/sysdiagnose/main.py index 7f6478e..a82c7db 100644 --- a/src/sysdiagnose/main.py +++ b/src/sysdiagnose/main.py @@ -3,13 +3,9 @@ import sys from sysdiagnose import Sysdiagnose import os -import json -import logging import time -from sysdiagnose.utils.jsonlogger import SysdiagnoseJsonFormatter +from sysdiagnose.utils.logger import logger, get_console_handler, get_json_handler -logger = logging.getLogger('sysdiagnose') -logger.setLevel(logging.INFO) def parse_parser_error(message): sd = Sysdiagnose() @@ -28,33 +24,6 @@ def analyse_parser_error(message): sd.print_analysers_list() sys.exit(2) -def get_console_logger(level: str) -> logging.StreamHandler: - # Format - fmt_console = logging.Formatter('[%(levelname)s] [%(module)s] %(message)s') - # Console handler - ch = logging.StreamHandler() - ch.setLevel(level) - ch.setFormatter(fmt_console) - - return ch - -def get_json_logger(filename: str) -> logging.FileHandler: - # https://stackoverflow.com/questions/50144628/python-logging-into-file-as-a-dictionary-or-json - # fmt_json = logging.Formatter( - # json.dumps({ - # 'timestamp':'%(asctime)s', - # 'level': '%(levelname)s', - # 'module': '%(module)s', - # 'message': '%(message)s'})) - fmt_json = SysdiagnoseJsonFormatter( - fmt='%(asctime)s %(levelname)s %(module)s %(message)s', - rename_fields={'asctime': 'timestamp'}) - # File handler - fh = logging.FileHandler(filename) - fh.setLevel(logging.INFO) - fh.setFormatter(fmt_json) - - return fh def main(): parser = argparse.ArgumentParser( @@ -160,7 +129,7 @@ def main(): # Handle console logging log_level = args.log.upper() - logger.addHandler(get_console_logger(log_level)) + logger.addHandler(get_console_handler(log_level)) logger2file = None for case_id in case_ids: @@ -170,7 +139,7 @@ def main(): folder = sd.config.get_case_parsed_data_folder(case_id) # https://stackoverflow.com/questions/13839554/how-to-change-filehandle-with-python-logging-on-the-fly-with-different-classes-a if logger2file is None: - logger2file = get_json_logger(os.path.join(folder, filename)) + logger2file = get_json_handler(os.path.join(folder, filename)) logger.addHandler(logger2file) else: logger2file.close() diff --git a/src/sysdiagnose/parsers/powerlogs.py b/src/sysdiagnose/parsers/powerlogs.py index ee0ae2d..718122f 100644 --- a/src/sysdiagnose/parsers/powerlogs.py +++ b/src/sysdiagnose/parsers/powerlogs.py @@ -7,11 +7,8 @@ from sysdiagnose.utils import sqlite2json import glob import os -from sysdiagnose.utils.base import BaseParserInterface +from sysdiagnose.utils.base import BaseParserInterface, logger from datetime import datetime, timezone -import logging - -logger = logging.getLogger('sysdiagnose') class PowerLogsParser(BaseParserInterface): diff --git a/src/sysdiagnose/utils/base.py b/src/sysdiagnose/utils/base.py index 3ca4017..ac87f70 100644 --- a/src/sysdiagnose/utils/base.py +++ b/src/sysdiagnose/utils/base.py @@ -6,9 +6,7 @@ from datetime import datetime import re from functools import cached_property -import logging - -logger = logging.getLogger('sysdiagnose') +from sysdiagnose.utils.logger import logger class SysdiagnoseConfig: diff --git a/src/sysdiagnose/utils/jsonlogger.py b/src/sysdiagnose/utils/jsonlogger.py deleted file mode 100644 index c9e99f6..0000000 --- a/src/sysdiagnose/utils/jsonlogger.py +++ /dev/null @@ -1,9 +0,0 @@ -from pythonjsonlogger import jsonlogger -from datetime import datetime - - -class SysdiagnoseJsonFormatter(jsonlogger.JsonFormatter): - '''Custom JSON logger formatter ''' - # https://stackoverflow.com/questions/50873446/python-logger-output-dates-in-is8601-format - def formatTime(self, record, datefmt=None): - return datetime.fromtimestamp(record.created).astimezone().isoformat(timespec='microseconds') diff --git a/src/sysdiagnose/utils/logger.py b/src/sysdiagnose/utils/logger.py new file mode 100644 index 0000000..cc6ed8c --- /dev/null +++ b/src/sysdiagnose/utils/logger.py @@ -0,0 +1,48 @@ +import logging +from pythonjsonlogger import jsonlogger +from datetime import datetime + +logger = logging.getLogger('sysdiagnose') +# By default, we want to have the possibility to log almost everything. +logger.setLevel(logging.INFO) + +class SysdiagnoseJsonFormatter(jsonlogger.JsonFormatter): + '''Custom JSON logger formatter ''' + # https://stackoverflow.com/questions/50873446/python-logger-output-dates-in-is8601-format + def formatTime(self, record, datefmt=None): + return datetime.fromtimestamp(record.created).astimezone().isoformat(timespec='microseconds') + + +def get_console_handler(level: str) -> logging.StreamHandler: + ''' + Creates a logging stream handler. + + Args: + level: Logging level. https://docs.python.org/3/library/logging.html#logging-levels + ''' + # Format + fmt_console = logging.Formatter('[%(levelname)s] [%(module)s] %(message)s') + # Console handler + ch = logging.StreamHandler() + ch.setLevel(level) + ch.setFormatter(fmt_console) + + return ch + +def get_json_handler(filename: str, level: int = logging.INFO) -> logging.FileHandler: + ''' + Creates a logging JSON format file handler. + + Args: + filename: Filename where to log. + level: Logging level. By default to INFO. https://docs.python.org/3/library/logging.html#logging-levels + ''' + fmt_json = SysdiagnoseJsonFormatter( + fmt='%(asctime)s %(levelname)s %(module)s %(message)s', + rename_fields={'asctime': 'timestamp'}) + # File handler + fh = logging.FileHandler(filename) + fh.setLevel(level) + fh.setFormatter(fmt_json) + + return fh From 2d80fed2a8a3e334f910832e9f5c03c56c7d586a Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Wed, 16 Oct 2024 09:07:23 +0200 Subject: [PATCH 08/20] #11 fixes last broken commit --- src/sysdiagnose/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sysdiagnose/main.py b/src/sysdiagnose/main.py index a82c7db..76818e6 100644 --- a/src/sysdiagnose/main.py +++ b/src/sysdiagnose/main.py @@ -184,7 +184,7 @@ def main(): # Handle console logging log_level = args.log.upper() - logger.addHandler(get_console_logger(log_level)) + logger.addHandler(get_console_handler(log_level)) logger2file = None for case_id in case_ids: @@ -194,7 +194,7 @@ def main(): folder = sd.config.get_case_parsed_data_folder(case_id) # https://stackoverflow.com/questions/13839554/how-to-change-filehandle-with-python-logging-on-the-fly-with-different-classes-a if logger2file is None: - logger2file = get_json_logger(os.path.join(folder, filename)) + logger2file = get_json_handler(os.path.join(folder, filename)) logger.addHandler(logger2file) else: logger2file.close() From c86d6366fa22c966fd9f49970f963cc8739aad65 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Wed, 16 Oct 2024 09:29:42 +0200 Subject: [PATCH 09/20] #11 added dependency to python-json-logger --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c6e74cd..14ae045 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,8 @@ dependencies = [ "pandas==2.2.2", "numpy==2.0.0", "nska-deserialize==1.5.1", - "yara-python==4.5.1" + "yara-python==4.5.1", + "python-json-logger==2.0.7" ] [project.scripts] From 921153dbdb400b804ca6b1f4fe9a1c515de1d480 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Wed, 16 Oct 2024 09:47:18 +0200 Subject: [PATCH 10/20] #11 rest of commit trying to address last PR review comment --- src/sysdiagnose/analysers/demo_analyser.py | 5 +---- src/sysdiagnose/parsers/crashlogs.py | 5 +---- src/sysdiagnose/parsers/demo_parser.py | 5 +---- src/sysdiagnose/parsers/logarchive.py | 4 +--- src/sysdiagnose/parsers/ps.py | 5 +---- src/sysdiagnose/parsers/security_sysdiagnose.py | 5 +---- src/sysdiagnose/parsers/wifisecurity.py | 5 +---- 7 files changed, 7 insertions(+), 27 deletions(-) diff --git a/src/sysdiagnose/analysers/demo_analyser.py b/src/sysdiagnose/analysers/demo_analyser.py index 6a9c853..6872728 100644 --- a/src/sysdiagnose/analysers/demo_analyser.py +++ b/src/sysdiagnose/analysers/demo_analyser.py @@ -3,10 +3,7 @@ # For Python3 # DEMO - Skeleton -import logging -from sysdiagnose.utils.base import BaseAnalyserInterface - -logger = logging.getLogger('sysdiagnose') +from sysdiagnose.utils.base import BaseAnalyserInterface, logger class DemoAnalyser(BaseAnalyserInterface): diff --git a/src/sysdiagnose/parsers/crashlogs.py b/src/sysdiagnose/parsers/crashlogs.py index 03a84df..efbf410 100644 --- a/src/sysdiagnose/parsers/crashlogs.py +++ b/src/sysdiagnose/parsers/crashlogs.py @@ -1,14 +1,11 @@ import glob import os -from sysdiagnose.utils.base import BaseParserInterface +from sysdiagnose.utils.base import BaseParserInterface, logger import re import json from datetime import datetime, timezone -import logging # from pycrashreport.crash_report import get_crash_report_from_file -logger = logging.getLogger('sysdiagnose') - class CrashLogsParser(BaseParserInterface): ''' diff --git a/src/sysdiagnose/parsers/demo_parser.py b/src/sysdiagnose/parsers/demo_parser.py index 1c2d662..9a596f7 100644 --- a/src/sysdiagnose/parsers/demo_parser.py +++ b/src/sysdiagnose/parsers/demo_parser.py @@ -2,10 +2,7 @@ import os import json -from sysdiagnose.utils.base import BaseParserInterface -import logging - -logger = logging.getLogger('sysdiagnose') +from sysdiagnose.utils.base import BaseParserInterface, logger class DemoParser(BaseParserInterface): diff --git a/src/sysdiagnose/parsers/logarchive.py b/src/sysdiagnose/parsers/logarchive.py index 47fe139..e80ab0b 100644 --- a/src/sysdiagnose/parsers/logarchive.py +++ b/src/sysdiagnose/parsers/logarchive.py @@ -7,7 +7,7 @@ # from collections.abc import Generator from datetime import datetime, timezone -from sysdiagnose.utils.base import BaseParserInterface +from sysdiagnose.utils.base import BaseParserInterface, logger import glob import json import os @@ -16,9 +16,7 @@ import sys import tempfile import shutil -import logging -logger = logging.getLogger('sysdiagnose') # --------------------------------------------# # On 2023-04-13: using ndjson instead of json to avoid parsing issues. diff --git a/src/sysdiagnose/parsers/ps.py b/src/sysdiagnose/parsers/ps.py index eae9b98..4beeb5b 100644 --- a/src/sysdiagnose/parsers/ps.py +++ b/src/sysdiagnose/parsers/ps.py @@ -9,16 +9,13 @@ # - tree structure # - simplified # -from sysdiagnose.utils.base import BaseParserInterface +from sysdiagnose.utils.base import BaseParserInterface, logger import argparse import glob import json import os import re import sys -import logging - -logger = logging.getLogger('sysdiagnose') class PsParser(BaseParserInterface): diff --git a/src/sysdiagnose/parsers/security_sysdiagnose.py b/src/sysdiagnose/parsers/security_sysdiagnose.py index 4485eac..92df066 100644 --- a/src/sysdiagnose/parsers/security_sysdiagnose.py +++ b/src/sysdiagnose/parsers/security_sysdiagnose.py @@ -1,9 +1,6 @@ import os import re -from sysdiagnose.utils.base import BaseParserInterface -import logging - -logger = logging.getLogger('sysdiagnose') +from sysdiagnose.utils.base import BaseParserInterface, logger # TODO make a security sysdiagnose analyser exporting in time based jsonl for timeline. # - client_trust: date diff --git a/src/sysdiagnose/parsers/wifisecurity.py b/src/sysdiagnose/parsers/wifisecurity.py index ec3fde6..70d4e04 100644 --- a/src/sysdiagnose/parsers/wifisecurity.py +++ b/src/sysdiagnose/parsers/wifisecurity.py @@ -5,10 +5,7 @@ # Author: david@autopsit.org import os -from sysdiagnose.utils.base import BaseParserInterface -import logging - -logger = logging.getLogger('sysdiagnose') +from sysdiagnose.utils.base import BaseParserInterface, logger class WifiSecurityParser(BaseParserInterface): From 2e6ba62777e5bebe0aae6b6429b19389617411df Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Wed, 16 Oct 2024 14:20:13 +0200 Subject: [PATCH 11/20] #11 indentation issue corrected --- src/sysdiagnose/parsers/demo_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sysdiagnose/parsers/demo_parser.py b/src/sysdiagnose/parsers/demo_parser.py index 0a89feb..74610e3 100644 --- a/src/sysdiagnose/parsers/demo_parser.py +++ b/src/sysdiagnose/parsers/demo_parser.py @@ -33,4 +33,4 @@ def execute(self) -> list | dict: result.append(entry) logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file, 'entry': entry}) - return result + return result From 94fb389cfecff781a6d630625853006a44a63430 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Wed, 16 Oct 2024 14:38:40 +0200 Subject: [PATCH 12/20] #11 align the time fields with parsers output --- src/sysdiagnose/utils/logger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sysdiagnose/utils/logger.py b/src/sysdiagnose/utils/logger.py index cc6ed8c..10c39b6 100644 --- a/src/sysdiagnose/utils/logger.py +++ b/src/sysdiagnose/utils/logger.py @@ -38,8 +38,8 @@ def get_json_handler(filename: str, level: int = logging.INFO) -> logging.FileHa level: Logging level. By default to INFO. https://docs.python.org/3/library/logging.html#logging-levels ''' fmt_json = SysdiagnoseJsonFormatter( - fmt='%(asctime)s %(levelname)s %(module)s %(message)s', - rename_fields={'asctime': 'timestamp'}) + fmt='%(created)f %(asctime)s %(levelname)s %(module)s %(message)s', + rename_fields={'asctime': 'datetime', 'created' : 'timestamp'}) # File handler fh = logging.FileHandler(filename) fh.setLevel(level) From 60c76728ce88f6defadc6dcb01240f00512a4a21 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Wed, 16 Oct 2024 14:41:34 +0200 Subject: [PATCH 13/20] #11 fixing linting issues --- src/sysdiagnose/utils/logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sysdiagnose/utils/logger.py b/src/sysdiagnose/utils/logger.py index 10c39b6..5593399 100644 --- a/src/sysdiagnose/utils/logger.py +++ b/src/sysdiagnose/utils/logger.py @@ -39,7 +39,7 @@ def get_json_handler(filename: str, level: int = logging.INFO) -> logging.FileHa ''' fmt_json = SysdiagnoseJsonFormatter( fmt='%(created)f %(asctime)s %(levelname)s %(module)s %(message)s', - rename_fields={'asctime': 'datetime', 'created' : 'timestamp'}) + rename_fields={'asctime': 'datetime', 'created': 'timestamp'}) # File handler fh = logging.FileHandler(filename) fh.setLevel(level) From 5b34d40f75fbfd1a3d4c43dd8c03e90fc0064ca2 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Wed, 16 Oct 2024 14:51:27 +0200 Subject: [PATCH 14/20] #11 adding a few missing files without logging and removing print statements back from merge from main. --- src/sysdiagnose/__init__.py | 4 ++-- src/sysdiagnose/analysers/wifi_geolocation.py | 4 ++-- src/sysdiagnose/analysers/yarascan.py | 7 ++----- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/sysdiagnose/__init__.py b/src/sysdiagnose/__init__.py index fae3cda..71cbd72 100644 --- a/src/sysdiagnose/__init__.py +++ b/src/sysdiagnose/__init__.py @@ -7,7 +7,7 @@ import re import tarfile import fcntl -from sysdiagnose.utils.base import BaseParserInterface, BaseAnalyserInterface, SysdiagnoseConfig +from sysdiagnose.utils.base import BaseParserInterface, BaseAnalyserInterface, SysdiagnoseConfig, logger class Sysdiagnose: @@ -147,7 +147,7 @@ def create_case(self, sysdiagnose_file: str, force: bool = False, case_id: bool case['unique_device_id'] = remotectl_dumpstate_json['Local device']['Properties']['UniqueDeviceID'] case['version'] = remotectl_dumpstate_json['Local device']['Properties']['OSVersion'] except (KeyError, TypeError) as e: - print(f"WARNING: Could not parse remotectl_dumpstate, and therefore extract serial numbers. Error {e}") + logger.warning(f"WARNING: Could not parse remotectl_dumpstate, and therefore extract serial numbers.", exc_info=True) try: case['date'] = remotectl_dumpstate_parser.sysdiagnose_creation_datetime.isoformat(timespec='microseconds') diff --git a/src/sysdiagnose/analysers/wifi_geolocation.py b/src/sysdiagnose/analysers/wifi_geolocation.py index 8c6b732..d0f1452 100644 --- a/src/sysdiagnose/analysers/wifi_geolocation.py +++ b/src/sysdiagnose/analysers/wifi_geolocation.py @@ -6,7 +6,7 @@ import dateutil.parser import gpxpy import gpxpy.gpx -from sysdiagnose.utils.base import BaseAnalyserInterface +from sysdiagnose.utils.base import BaseAnalyserInterface, logger from sysdiagnose.parsers.wifi_known_networks import WifiKnownNetworksParser @@ -45,7 +45,7 @@ def generate_gpx_from_known_networks_json(json_data: str, output_file: str): try: timestamp = dateutil.parser.parse(timestamp_str) except Exception as e: - print(f"Error converting timestamp. Reason: {str(e)}. Timestamp was: {str(timestamp_str)}. Assuming Jan 1st 1970.") + logger.exception(f"Error converting timestamp. Timestamp was: {str(timestamp_str)}. Assuming Jan 1st 1970.") timestamp = dateutil.parser.parse('1970-01-01') # begin of epoch bssid = network_data.get('__OSSpecific__', {}).get('BSSID', '') diff --git a/src/sysdiagnose/analysers/yarascan.py b/src/sysdiagnose/analysers/yarascan.py index c974337..bc1b2e4 100644 --- a/src/sysdiagnose/analysers/yarascan.py +++ b/src/sysdiagnose/analysers/yarascan.py @@ -3,10 +3,7 @@ import glob import threading import queue -import logging -from sysdiagnose.utils.base import BaseAnalyserInterface - -logger = logging.getLogger('sysdiagnose') +from sysdiagnose.utils.base import BaseAnalyserInterface, logger # These are the commonly used external variables that can be used in the YARA rules @@ -39,7 +36,7 @@ def execute(self): results = {'errors': [], 'matches': []} if not os.path.isdir(self.yara_rules_path): - print(f"ERROR: Could not find the YARA rules (.yar) folder: {self.yara_rules_path}") + logger.error(f"ERROR: Could not find the YARA rules (.yar) folder: {self.yara_rules_path}") results['errors'].append(f"Could not find the YARA rules (.yar) folder: {self.yara_rules_path}") return results From 38a0adaa51877b42414ef819a2ddb46aa366dc68 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Wed, 16 Oct 2024 14:55:29 +0200 Subject: [PATCH 15/20] #11 forgot to click on save :) this fixes https://github.com/EC-DIGIT-CSIRC/sysdiagnose/pull/106#discussion_r1803042416 --- src/sysdiagnose/analysers/timesketch.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/sysdiagnose/analysers/timesketch.py b/src/sysdiagnose/analysers/timesketch.py index 548f7f5..14c76d8 100644 --- a/src/sysdiagnose/analysers/timesketch.py +++ b/src/sysdiagnose/analysers/timesketch.py @@ -16,10 +16,7 @@ from sysdiagnose.parsers.wifi_known_networks import WifiKnownNetworksParser from sysdiagnose.parsers.crashlogs import CrashLogsParser from collections.abc import Generator -from sysdiagnose.utils.base import BaseAnalyserInterface -import logging - -logger = logging.getLogger('sysdiagnose') +from sysdiagnose.utils.base import BaseAnalyserInterface, logger class TimesketchAnalyser(BaseAnalyserInterface): From 62354a54491e0e220412b612c7e19befbb3fd5c1 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Wed, 16 Oct 2024 15:02:04 +0200 Subject: [PATCH 16/20] #11 Addressing review comments: log parameter to the parser section and removing case_id from the filename. --- src/sysdiagnose/main.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/sysdiagnose/main.py b/src/sysdiagnose/main.py index 76818e6..b098941 100644 --- a/src/sysdiagnose/main.py +++ b/src/sysdiagnose/main.py @@ -32,6 +32,7 @@ def main(): ) # available for all parser.add_argument('-c', '--case_id', required=False, default='all', help='ID of the case, or "all" for all cases (default)') + parser.add_argument('-l', '--log', default='WARNING', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Enables logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)') subparsers = parser.add_subparsers(dest='mode') @@ -43,13 +44,11 @@ def main(): # parse mode parse_parser = subparsers.add_parser('parse', help='Parse a case') parse_parser.add_argument('parser', help='Name of the parser, "all" for running all parsers, or "list" for a listing of all parsers') - parse_parser.add_argument('--log', default='WARNING', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Enables logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)') parse_parser.error = parse_parser_error # analyse mode analyse_parser = subparsers.add_parser('analyse', help='Analyse a case') analyse_parser.add_argument('analyser', help='Name of the analyser, "all" for running all analysers, or "list" for a listing of all analysers') - analyse_parser.add_argument('--log', default='WARNING', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Enables logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)') analyse_parser.error = analyse_parser_error # list mode @@ -135,7 +134,7 @@ def main(): for case_id in case_ids: # Handle file logging time_str = time.strftime("%Y%m%dT%H%M%S") - filename = f"{time_str}-parse-{case_id}.jsonl" + filename = f"{time_str}-log-parse.jsonl" folder = sd.config.get_case_parsed_data_folder(case_id) # https://stackoverflow.com/questions/13839554/how-to-change-filehandle-with-python-logging-on-the-fly-with-different-classes-a if logger2file is None: @@ -190,7 +189,7 @@ def main(): for case_id in case_ids: # Handle file logging time_str = time.strftime("%Y%m%dT%H%M%S") - filename = f"{time_str}-analyse-{case_id}.jsonl" + filename = f"{time_str}-log-analyse.jsonl" folder = sd.config.get_case_parsed_data_folder(case_id) # https://stackoverflow.com/questions/13839554/how-to-change-filehandle-with-python-logging-on-the-fly-with-different-classes-a if logger2file is None: From afe6b9e182dc2f905f2e05c81be294bcacf7d246 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Wed, 16 Oct 2024 16:52:22 +0200 Subject: [PATCH 17/20] #11 moves the console handler to the same level than now the log cmd parameter resides. That is, available to all modes --- src/sysdiagnose/main.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/sysdiagnose/main.py b/src/sysdiagnose/main.py index b098941..74df6f5 100644 --- a/src/sysdiagnose/main.py +++ b/src/sysdiagnose/main.py @@ -67,6 +67,10 @@ def main(): args = parser.parse_args() + # Handle console logging + log_level = args.log.upper() + logger.addHandler(get_console_handler(log_level)) + sd = Sysdiagnose() if args.mode == 'list': @@ -126,10 +130,6 @@ def main(): else: case_ids = [args.case_id] - # Handle console logging - log_level = args.log.upper() - logger.addHandler(get_console_handler(log_level)) - logger2file = None for case_id in case_ids: # Handle file logging @@ -181,10 +181,6 @@ def main(): else: case_ids = [args.case_id] - # Handle console logging - log_level = args.log.upper() - logger.addHandler(get_console_handler(log_level)) - logger2file = None for case_id in case_ids: # Handle file logging From d05172ca4cb00c96aedfa811303ad9c5070a435c Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Thu, 17 Oct 2024 09:00:17 +0200 Subject: [PATCH 18/20] #11 one liner for console log handler --- src/sysdiagnose/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/sysdiagnose/main.py b/src/sysdiagnose/main.py index 74df6f5..d79e85f 100644 --- a/src/sysdiagnose/main.py +++ b/src/sysdiagnose/main.py @@ -68,8 +68,7 @@ def main(): args = parser.parse_args() # Handle console logging - log_level = args.log.upper() - logger.addHandler(get_console_handler(log_level)) + logger.addHandler(get_console_handler(args.log.upper())) sd = Sysdiagnose() From 48a11547a4bf6c21fd7e34d26c95b58709823142 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Thu, 17 Oct 2024 10:33:38 +0200 Subject: [PATCH 19/20] #11 added logging to appinstallation parser and external script sqlite2json --- src/sysdiagnose/parsers/appinstallation.py | 3 ++- src/sysdiagnose/utils/sqlite2json.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/sysdiagnose/parsers/appinstallation.py b/src/sysdiagnose/parsers/appinstallation.py index 77ca14d..e21223b 100644 --- a/src/sysdiagnose/parsers/appinstallation.py +++ b/src/sysdiagnose/parsers/appinstallation.py @@ -12,7 +12,7 @@ import glob import os import sysdiagnose.utils.misc as misc -from sysdiagnose.utils.base import BaseParserInterface +from sysdiagnose.utils.base import BaseParserInterface, logger from datetime import datetime, timezone @@ -57,4 +57,5 @@ def execute(self) -> list | dict: pass return result except IndexError: + logger.exception("Index error, returning empty list") return [] diff --git a/src/sysdiagnose/utils/sqlite2json.py b/src/sysdiagnose/utils/sqlite2json.py index c6a5d9c..9d8d9c8 100644 --- a/src/sysdiagnose/utils/sqlite2json.py +++ b/src/sysdiagnose/utils/sqlite2json.py @@ -13,6 +13,7 @@ import json import sqlite3 import argparse +from sysdiagnose.utils.logger import logger version_string = "sqlite2json.py v2020-02-18 Version 1.0" @@ -32,7 +33,7 @@ def sqlite2struct(dbpath) -> dict: dbstruct[table] = content return dbstruct except Exception as e: - print(f"Could not parse {dbpath}. Reason: {str(e)}", file=sys.stderr) + logger.exception(f"Could not parse {dbpath}.") return None @@ -74,7 +75,7 @@ def dump2json(dbstruct, jsonpath="./db.json"): with open(jsonpath, "w") as fd: fd.write(jsontxt) except Exception as e: - print(f"Impossible to dump the UUID to Path to {jsonpath}. Reason: {str(e)}\n", file=sys.stderr) + logger.exception(f"Impossible to dump the UUID to Path to {jsonpath}.") return jsontxt # --------------------------------------------------------------------------- # From 10fc4e75cecf3a855f12867856d580ae19f0837c Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Thu, 17 Oct 2024 16:26:51 +0200 Subject: [PATCH 20/20] #11 Added to the demo parser and analyser some extra log calls to demonstrate how-to log --- src/sysdiagnose/analysers/demo_analyser.py | 15 +++++++++++---- src/sysdiagnose/parsers/demo_parser.py | 19 ++++++++++++------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/sysdiagnose/analysers/demo_analyser.py b/src/sysdiagnose/analysers/demo_analyser.py index 6872728..a624461 100644 --- a/src/sysdiagnose/analysers/demo_analyser.py +++ b/src/sysdiagnose/analysers/demo_analyser.py @@ -20,10 +20,17 @@ def execute(self): Load parsers here, and use the parser.get_result() to get the data. By doing so you will get the parser output even if it never ran before. """ - print("DO SOMETHING HERE") - logger.info("log something here", extra={'analyser': __name__}) - - # json_data = p_fooparser.get_result() + try: + print("DO SOMETHING HERE") + logger.info("log something here", extra={'analyser': __name__}) + if True: + logger.warning("This will log a warning") + # logger.error("This will log an error") + + # json_data = p_fooparser.get_result() + except Exception as e: + logger.exception("This will log an error with the exception information") + # logger.warning("This will log a warning with the exception information", exc_info=True) result = {'foo': 'bar'} return result diff --git a/src/sysdiagnose/parsers/demo_parser.py b/src/sysdiagnose/parsers/demo_parser.py index 74610e3..a72b412 100644 --- a/src/sysdiagnose/parsers/demo_parser.py +++ b/src/sysdiagnose/parsers/demo_parser.py @@ -26,11 +26,16 @@ def execute(self) -> list | dict: log_files = self.get_log_files() for log_file in log_files: entry = {} - - # timestamp = datetime.strptime(item['timestamp'], '%Y-%m-%d %H:%M:%S.%f %z') - # entry['datetime'] = timestamp.isoformat(timespec='microseconds') - # entry['timestamp'] = timestamp.timestamp() - result.append(entry) - logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file, 'entry': entry}) - + try: + # timestamp = datetime.strptime(item['timestamp'], '%Y-%m-%d %H:%M:%S.%f %z') + # entry['datetime'] = timestamp.isoformat(timespec='microseconds') + # entry['timestamp'] = timestamp.timestamp() + result.append(entry) + logger.info(f"Processing file {log_file}, new entry added", extra={'log_file': log_file, 'entry': entry}) + if not entry: + logger.warning("Empty entry.") + # logger.error("Empty entry.") + except Exception as e: + logger.exception("This will log an error with the exception information") + # logger.warning("This will log a warning with the exception information", exc_info=True) return result