From 4c625183fe312e03529a53f2c450dec5cfb603c0 Mon Sep 17 00:00:00 2001 From: Dario Borreguero Rincon Date: Tue, 15 Oct 2024 11:36:25 +0200 Subject: [PATCH] #11 1. Use of proper JSON formatter 2. Use of extra and exc_info fields when logging. The usage of extra fields needs to be standardised --- requirements.txt | 3 ++- src/sysdiagnose/analysers/demo_analyser.py | 2 +- src/sysdiagnose/analysers/timesketch.py | 28 +++++++++++----------- src/sysdiagnose/analysers/yarascan.py | 4 ++-- src/sysdiagnose/main.py | 28 ++++++++++++---------- src/sysdiagnose/parsers/crashlogs.py | 2 +- src/sysdiagnose/parsers/demo_parser.py | 4 ++-- src/sysdiagnose/parsers/logarchive.py | 8 +++---- src/sysdiagnose/parsers/ps.py | 4 ++-- src/sysdiagnose/parsers/wifisecurity.py | 2 +- src/sysdiagnose/utils/jsonlogger.py | 9 +++++++ 11 files changed, 54 insertions(+), 40 deletions(-) create mode 100644 src/sysdiagnose/utils/jsonlogger.py diff --git a/requirements.txt b/requirements.txt index d270cf4..650ba48 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ pandas==2.2.2 numpy==2.0.0 nska-deserialize==1.5.1 yara-python==4.5.1 -# pycrashreport==1.2.4 \ No newline at end of file +# pycrashreport==1.2.4 +python-json-logger==2.0.7 \ No newline at end of file diff --git a/src/sysdiagnose/analysers/demo_analyser.py b/src/sysdiagnose/analysers/demo_analyser.py index 740c239..6a9c853 100644 --- a/src/sysdiagnose/analysers/demo_analyser.py +++ b/src/sysdiagnose/analysers/demo_analyser.py @@ -24,7 +24,7 @@ def execute(self): By doing so you will get the parser output even if it never ran before. """ print("DO SOMETHING HERE") - logger.info("log something here") + logger.info("log something here", extra={'analyser': __name__}) # json_data = p_fooparser.get_result() diff --git a/src/sysdiagnose/analysers/timesketch.py b/src/sysdiagnose/analysers/timesketch.py index 3d01c6e..548f7f5 100644 --- a/src/sysdiagnose/analysers/timesketch.py +++ b/src/sysdiagnose/analysers/timesketch.py @@ -53,7 +53,7 @@ def __extract_ts_mobileactivation(self) -> Generator[dict, None, None]: pass yield ts_event except Exception as e: - logger.error(f"ERROR while extracting timestamp from mobileactivation file. Reason: {str(e)}") + logger.exception("ERROR while extracting timestamp from mobileactivation file.") def __extract_ts_powerlogs(self) -> Generator[dict, None, None]: try: @@ -94,7 +94,7 @@ def __extract_ts_powerlogs(self) -> Generator[dict, None, None]: pass except Exception as e: - logger.error(f"ERROR while extracting timestamp from powerlogs. Reason: {str(e)}") + logger.exception("ERROR while extracting timestamp from powerlogs.") def __extract_ts_swcutil(self) -> Generator[dict, None, None]: try: @@ -117,7 +117,7 @@ def __extract_ts_swcutil(self) -> Generator[dict, None, None]: logger.warning(f"Error while extracting timestamp from {(service['Service'])} - {(service['App ID'])}. Record not inserted.") pass except Exception as e: - logger.error(f"ERROR while extracting timestamp from swcutil. Reason {str(e)}") + logger.exception("ERROR while extracting timestamp from swcutil.") def __extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]: try: @@ -138,7 +138,7 @@ def __extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]: } yield ts_event except Exception as e: - logger.error(f"ERROR while extracting timestamp from accessibility_tcc. Reason {str(e)}") + logger.exception("ERROR while extracting timestamp from accessibility_tcc.") def __extract_ts_shutdownlogs(self) -> Generator[dict, None, None]: try: @@ -156,9 +156,9 @@ def __extract_ts_shutdownlogs(self) -> Generator[dict, None, None]: } yield ts_event except Exception as e: - logger.warning(f"WARNING: shutdownlog entry not parsed: {event}. Reason: {str(e)}") + logger.warning(f"WARNING: shutdownlog entry not parsed: {event}", exc_info=True) except Exception as e: - logger.error(f"ERROR while extracting timestamp from shutdownlog. Reason: {str(e)}") + logger.exception("ERROR while extracting timestamp from shutdownlog.") def __extract_ts_logarchive(self) -> Generator[dict, None, None]: try: @@ -176,9 +176,9 @@ def __extract_ts_logarchive(self) -> Generator[dict, None, None]: } yield ts_event except KeyError as e: - logger.warning(f"WARNING: trace not parsed: {event}. Error {e}") + logger.warning(f"WARNING: trace not parsed: {event}.", exc_info=True) except Exception as e: - logger.error(f"ERROR while extracting timestamp from logarchive. Reason: {str(e)}") + logger.exception(f"ERROR while extracting timestamp from logarchive.") def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]: try: @@ -209,7 +209,7 @@ def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]: } yield ts_event except Exception as e: - logger.error(f"ERROR while extracting timestamp from wifisecurity. Reason {str(e)}") + logger.exception("ERROR while extracting timestamp from wifisecurity.") def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: try: @@ -231,7 +231,7 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: yield ts_event except KeyError: # some wifi networks do not have an AddedAt field - logger.warning(f"Error while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") + logger.warning(f"Error while extracting timestamp from {ssid}. Record not inserted.", exc_info=True) pass # WIFI modified @@ -248,7 +248,7 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: yield ts_event except KeyError: # some wifi networks do not have an UpdatedAt field - logger.warning(f"Error while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") + logger.warning(f"Error while extracting timestamp from {ssid}.Record not inserted.", exc_info=True) pass # Password for wifi modified @@ -265,10 +265,10 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: yield ts_event except KeyError: # some wifi networks do not have a password modification date - logger.warning(f"Error while extracting timestamp from {ssid}. Reason: {str(e)}. Record not inserted.") + logger.warning(f"Error while extracting timestamp from {ssid}. Record not inserted.", exc_info=True) pass except Exception as e: - logger.error(f"ERROR while extracting timestamp from wifi_known_networks. Reason {str(e)}") + logger.exception("ERROR while extracting timestamp from wifi_known_networks.") def __extract_ts_crashlogs(self) -> Generator[dict, None, None]: try: @@ -291,7 +291,7 @@ def __extract_ts_crashlogs(self) -> Generator[dict, None, None]: # skip bug_type fields pass except Exception as e: - logger.error(f"ERROR while extracting timestamp from crashlog. Reason {str(e)}") + logger.exception("ERROR while extracting timestamp from crashlog.") def execute(self): # Get all the functions that start with '__extract_ts_' diff --git a/src/sysdiagnose/analysers/yarascan.py b/src/sysdiagnose/analysers/yarascan.py index 909c362..a8648bc 100644 --- a/src/sysdiagnose/analysers/yarascan.py +++ b/src/sysdiagnose/analysers/yarascan.py @@ -87,11 +87,11 @@ def get_valid_yara_rule_files(self) -> tuple[list, list]: # if we reach this point, the rule is valid rule_files_validated.append(rule_file) except yara.SyntaxError as e: - logger.error(f"Error compiling rule {rule_file}: {str(e)}") + logger.exception(f"Error compiling rule {rule_file}") errors.append(f"Error compiling rule {rule_file}: {str(e)}") continue except yara.Error as e: - logger.error(f"Error compiling rule {rule_file}: {str(e)}") + logger.exception(f"Error compiling rule {rule_file}") errors.append(f"Error loading rule {rule_file}: {str(e)}") continue diff --git a/src/sysdiagnose/main.py b/src/sysdiagnose/main.py index 8b0fa40..c8d3548 100644 --- a/src/sysdiagnose/main.py +++ b/src/sysdiagnose/main.py @@ -6,6 +6,7 @@ import json import logging import time +from sysdiagnose.utils.jsonlogger import SysdiagnoseJsonFormatter logger = logging.getLogger('sysdiagnose') logger.setLevel(logging.INFO) @@ -39,12 +40,15 @@ def get_console_logger(level: str) -> logging.StreamHandler: def get_json_logger(filename: str) -> logging.FileHandler: # https://stackoverflow.com/questions/50144628/python-logging-into-file-as-a-dictionary-or-json - fmt_json = logging.Formatter( - json.dumps({ - 'timestamp':'%(asctime)s', - 'level': '%(levelname)s', - 'module': '%(module)s', - 'message': '%(message)s'})) + # fmt_json = logging.Formatter( + # json.dumps({ + # 'timestamp':'%(asctime)s', + # 'level': '%(levelname)s', + # 'module': '%(module)s', + # 'message': '%(message)s'})) + fmt_json = SysdiagnoseJsonFormatter( + fmt='%(asctime)s %(levelname)s %(module)s %(message)s', + rename_fields={'asctime':'timestamp'}) # File handler fh = logging.FileHandler(filename) fh.setLevel(logging.INFO) @@ -175,13 +179,13 @@ def main(): print(f"Case ID: {case_id}") for parser in parsers_list: print(f"Parser '{parser}' for case ID '{case_id}'") - logger.info(f"Parser '{parser}' started") + logger.info(f"Parser '{parser}' started", extra={'parser': parser}) try: result = sd.parse(parser, case_id) result_str = "successfully" if result == 0 else "with errors" - logger.info(f"Parser '{parser}' finished {result_str}") + logger.info(f"Parser '{parser}' finished {result_str}", extra={'parser': parser, 'result': result}) except NotImplementedError: - logger.warning(f"Parser '{parser}' is not implemented yet, skipping") + logger.warning(f"Parser '{parser}' is not implemented yet, skipping", extra={'parser': parser}) if not logger2file is None: logger2file.close() @@ -230,13 +234,13 @@ def main(): print(f"Case ID: {case_id}") for analyser in analysers_list: print(f" Analyser '{analyser}' for case ID '{case_id}'") - logger.info(f"Analyser '{analyser}' started") + logger.info(f"Analyser '{analyser}' started", extra={'analyser': analyser}) try: result = sd.analyse(analyser, case_id) result_str = "successfully" if result == 0 else "with errors" - logger.info(f"Analyser '{analyser}' finished {result_str}") + logger.info(f"Analyser '{analyser}' finished {result_str}", extra={'analyser': analyser, 'result': result}) except NotImplementedError: - logger.warning(f"Analyser '{analyser}' is not implemented yet, skipping") + logger.warning(f"Analyser '{analyser}' is not implemented yet, skipping", extra={'analyser': analyser}) if not logger2file is None: logger2file.close() diff --git a/src/sysdiagnose/parsers/crashlogs.py b/src/sysdiagnose/parsers/crashlogs.py index 137915e..03a84df 100644 --- a/src/sysdiagnose/parsers/crashlogs.py +++ b/src/sysdiagnose/parsers/crashlogs.py @@ -63,7 +63,7 @@ def execute(self) -> list | dict: seen.add(ips_hash) result.append(ips) except Exception as e: - logger.warning(f"Skipping file due to error {file}: {e}") + logger.warning(f"Skipping file due to error {file}", exc_info=True) return result def parse_ips_file(path: str) -> list | dict: diff --git a/src/sysdiagnose/parsers/demo_parser.py b/src/sysdiagnose/parsers/demo_parser.py index 4feca32..1c2d662 100644 --- a/src/sysdiagnose/parsers/demo_parser.py +++ b/src/sysdiagnose/parsers/demo_parser.py @@ -28,7 +28,7 @@ def execute(self) -> list | dict: json_object = {} log_files = self.get_log_files() for log_file in log_files: - logger.info(f"Processing file {log_file}") + logger.info(f"Processing file {log_file}", extra={'parser': __name__, 'log_file': log_file}) pass return json_object @@ -48,5 +48,5 @@ def parse_path_to_folder(self, path: str, output_folder: str) -> bool: json.dump(json_object, f) return True except Exception as e: - logger.error(f"Error: {e}") + logger.exception("Error") return False diff --git a/src/sysdiagnose/parsers/logarchive.py b/src/sysdiagnose/parsers/logarchive.py index 2d87df0..47fe139 100644 --- a/src/sysdiagnose/parsers/logarchive.py +++ b/src/sysdiagnose/parsers/logarchive.py @@ -202,7 +202,7 @@ def parse_folder_to_file(input_folder: str, output_file: str) -> bool: LogarchiveParser.__convert_using_unifiedlogparser(input_folder, output_file) return True except IndexError: - logger.error('Error: No system_logs.logarchive/ folder found in logs/ directory') + logger.exception('Error: No system_logs.logarchive/ folder found in logs/ directory') return False def __convert_using_native_logparser(input_folder: str, output_file: str) -> list: @@ -216,7 +216,7 @@ def __convert_using_native_logparser(input_folder: str, output_file: str) -> lis entry_json = LogarchiveParser.convert_entry_to_unifiedlog_format(json.loads(line)) f_out.write(json.dumps(entry_json) + '\n') except json.JSONDecodeError as e: - logger.warning(f"WARNING: error parsing JSON {line}: {str(e)}") + logger.warning(f"WARNING: error parsing JSON {line}", exc_info=True) except KeyError: # last line of log does not contain 'time' field, nor the rest of the data. # so just ignore it and all the rest. @@ -235,7 +235,7 @@ def __convert_using_unifiedlogparser(input_folder: str, output_file: str) -> lis try: subprocess.check_output(cmd_parsing_linux_test, universal_newlines=True) except FileNotFoundError: - logger.error('ERROR: UnifiedLogReader not found, please install it. See README.md for more information.') + logger.exception('ERROR: UnifiedLogReader not found, please install it. See README.md for more information.') return # really run the tool now @@ -253,7 +253,7 @@ def __convert_using_unifiedlogparser(input_folder: str, output_file: str) -> lis entry_json = LogarchiveParser.convert_entry_to_unifiedlog_format(json.loads(line)) entries.append(entry_json) except json.JSONDecodeError as e: - logger.warning(f"WARNING: error parsing JSON {fname_reading}: {str(e)}") + logger.warning(f"WARNING: error parsing JSON {fname_reading}", exc_info=True) # tempfolder is cleaned automatically after the block # sort the data as it's not sorted by default, and we need sorted data for other analysers diff --git a/src/sysdiagnose/parsers/ps.py b/src/sysdiagnose/parsers/ps.py index 1e2c2bf..eae9b98 100644 --- a/src/sysdiagnose/parsers/ps.py +++ b/src/sysdiagnose/parsers/ps.py @@ -67,7 +67,7 @@ def parse_file(filename): result.append(row) return result except Exception as e: - logger.error(f"Could not parse ps.txt: {str(e)}") + logger.exception("Could not parse ps.txt") return [] def exclude_known_goods(processes: dict, known_good: dict) -> list[dict]: @@ -102,7 +102,7 @@ def export_to_json(processes, filename="./ps.json"): with open(filename, "w") as fd: fd.write(json_ps) except Exception as e: - logger.error(f"Impossible to dump the processes to {filename}. Reason: {str(e)}\n") + logger.exception(f"Impossible to dump the processes to {filename}") """ diff --git a/src/sysdiagnose/parsers/wifisecurity.py b/src/sysdiagnose/parsers/wifisecurity.py index 5122464..ec3fde6 100644 --- a/src/sysdiagnose/parsers/wifisecurity.py +++ b/src/sysdiagnose/parsers/wifisecurity.py @@ -63,6 +63,6 @@ def parse_file(path: str) -> list | dict: except IndexError: return {'error': 'No WiFi/security.txt file present'} except Exception as e: - logger.error(f"Could not parse: {path}. Reason: {str(e)}") + logger.exception(f"Could not parse: {path}") return {'error': f'Could not parse: {path}. Reason: {str(e)}'} return entries diff --git a/src/sysdiagnose/utils/jsonlogger.py b/src/sysdiagnose/utils/jsonlogger.py new file mode 100644 index 0000000..2036087 --- /dev/null +++ b/src/sysdiagnose/utils/jsonlogger.py @@ -0,0 +1,9 @@ +from pythonjsonlogger import jsonlogger +from datetime import datetime + + +class SysdiagnoseJsonFormatter(jsonlogger.JsonFormatter): + '''Custom JSON logger formatter ''' + # https://stackoverflow.com/questions/50873446/python-logger-output-dates-in-is8601-format + def formatTime(self, record, datefmt=None): + return datetime.fromtimestamp(record.created).astimezone().isoformat(timespec='milliseconds')