From 1473ca8bc09d5813f12641b1b7b57f45aa24810e Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Tue, 8 Oct 2024 07:32:00 +0200 Subject: [PATCH] fix: [all] better uniformity in datetime and timestamp field --- src/sysdiagnose/__init__.py | 2 +- src/sysdiagnose/analysers/timesketch.py | 14 ++++++------- src/sysdiagnose/parsers/accessibility_tcc.py | 5 +++-- src/sysdiagnose/parsers/appinstallation.py | 4 +++- src/sysdiagnose/parsers/crashlogs.py | 4 ++-- src/sysdiagnose/parsers/lockdownd.py | 2 +- src/sysdiagnose/parsers/logarchive.py | 4 ++-- src/sysdiagnose/parsers/powerlogs.py | 4 +++- src/sysdiagnose/parsers/shutdownlogs.py | 2 +- src/sysdiagnose/parsers/spindumpnosymbols.py | 6 +++--- src/sysdiagnose/parsers/taskinfo.py | 4 ++-- src/sysdiagnose/utils/misc.py | 4 ++-- src/sysdiagnose/utils/multilinelog.py | 2 +- tests/test_multilinelog.py | 22 ++++++++++---------- tests/test_parsers_logarchive.py | 2 +- 15 files changed, 43 insertions(+), 38 deletions(-) diff --git a/src/sysdiagnose/__init__.py b/src/sysdiagnose/__init__.py index 24d9ee5..fae3cda 100644 --- a/src/sysdiagnose/__init__.py +++ b/src/sysdiagnose/__init__.py @@ -150,7 +150,7 @@ def create_case(self, sysdiagnose_file: str, force: bool = False, case_id: bool print(f"WARNING: Could not parse remotectl_dumpstate, and therefore extract serial numbers. Error {e}") try: - case['date'] = remotectl_dumpstate_parser.sysdiagnose_creation_datetime.isoformat() + case['date'] = remotectl_dumpstate_parser.sysdiagnose_creation_datetime.isoformat(timespec='microseconds') except Exception: pass diff --git a/src/sysdiagnose/analysers/timesketch.py b/src/sysdiagnose/analysers/timesketch.py index dde133b..0540edd 100644 --- a/src/sysdiagnose/analysers/timesketch.py +++ b/src/sysdiagnose/analysers/timesketch.py @@ -104,7 +104,7 @@ def __extract_ts_swcutil(self) -> Generator[dict, None, None]: ts_event = { 'message': service['Service'], 'timestamp': timestamp.timestamp() * 1000000, - 'datetime': timestamp.isoformat(), + 'datetime': timestamp.isoformat(timespec='microseconds'), 'timestamp_desc': 'swcutil last checked', 'extra_field_1': f"application: {service['App ID']}" } @@ -129,7 +129,7 @@ def __extract_ts_accessibility_tcc(self) -> Generator[dict, None, None]: ts_event = { 'message': item['service'], 'timestamp': timestamp.timestamp() * 1000000, - 'datetime': timestamp.isoformat(), + 'datetime': timestamp.isoformat(timespec='microseconds'), 'timestamp_desc': 'Accessibility TC Last Modified', 'extra_field_1': f"client: {item['client']}" } @@ -190,7 +190,7 @@ def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]: ts_event = { 'message': wifi['acct'], 'timestamp': ctimestamp.timestamp() * 1000000, - 'datetime': ctimestamp.isoformat(), + 'datetime': ctimestamp.isoformat(timespec='microseconds'), 'timestamp_desc': 'SSID added to known secured WIFI list', 'extra_field_1': wifi['accc'] } @@ -200,7 +200,7 @@ def __extract_ts_wifisecurity(self) -> Generator[dict, None, None]: ts_event = { 'message': wifi['acct'], 'timestamp': mtimestamp.timestamp() * 1000000, - 'datetime': mtimestamp.isoformat(), + 'datetime': mtimestamp.isoformat(timespec='microseconds'), 'timestamp_desc': 'SSID modified into the secured WIFI list', 'extra_field_1': wifi['accc'] } @@ -221,7 +221,7 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: ts_event = { 'message': f"WIFI {ssid} added", 'timestamp': added.timestamp() * 1000000, - 'datetime': added.isoformat(), + 'datetime': added.isoformat(timespec='microseconds'), 'timestamp_desc': 'added in known networks plist', 'extra_field_1': f"Add reason: {item['AddReason']}" } @@ -238,7 +238,7 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: ts_event = { 'message': f"WIFI {ssid} updated", 'timestamp': updated.timestamp() * 1000000, - 'datetime': updated.isoformat(), + 'datetime': updated.isoformat(timespec='microseconds'), 'timestamp_desc': 'updated in known networks plist', 'extra_field_1': f"Add reason: {item['AddReason']}" } @@ -255,7 +255,7 @@ def __extract_ts_wifi_known_networks(self) -> Generator[dict, None, None]: ts_event = { 'message': f"Password for WIFI {ssid} modified", 'timestamp': modified_password.timestamp() * 1000000, - 'datetime': modified_password.isoformat(), + 'datetime': modified_password.isoformat(timespec='microseconds'), 'timestamp_desc': 'password modified in known networks plist', 'extra_field_1': f"AP mode: {item['__OSSpecific__']['AP_MODE']}" } diff --git a/src/sysdiagnose/parsers/accessibility_tcc.py b/src/sysdiagnose/parsers/accessibility_tcc.py index e04e1a7..885e664 100644 --- a/src/sysdiagnose/parsers/accessibility_tcc.py +++ b/src/sysdiagnose/parsers/accessibility_tcc.py @@ -44,9 +44,10 @@ def execute(self) -> list | dict: continue try: + timestamp = datetime.fromtimestamp(value['last_modified'], tz=timezone.utc) value['db_table'] = key - value['datetime'] = datetime.fromtimestamp(value['last_modified'], tz=timezone.utc).isoformat() - value['timestamp'] = value['last_modified'] + value['datetime'] = timestamp.isoformat(timespec='microseconds') + value['timestamp'] = timestamp.timestamp() result.append(value) except TypeError: # skip "None" values and such diff --git a/src/sysdiagnose/parsers/appinstallation.py b/src/sysdiagnose/parsers/appinstallation.py index 068717e..77ca14d 100644 --- a/src/sysdiagnose/parsers/appinstallation.py +++ b/src/sysdiagnose/parsers/appinstallation.py @@ -47,8 +47,10 @@ def execute(self) -> list | dict: continue try: + timestamp = datetime.fromtimestamp(value['timestamp'], tz=timezone.utc) value['db_table'] = key - value['datetime'] = datetime.fromtimestamp(value['timestamp'], tz=timezone.utc).isoformat() + value['datetime'] = timestamp.isoformat(timespec='microseconds') + value['timestamp'] = timestamp.timestamp() result.append(value) except TypeError: # skip "None" values and such diff --git a/src/sysdiagnose/parsers/crashlogs.py b/src/sysdiagnose/parsers/crashlogs.py index e6bfd4f..5bb5771 100644 --- a/src/sysdiagnose/parsers/crashlogs.py +++ b/src/sysdiagnose/parsers/crashlogs.py @@ -122,7 +122,7 @@ def parse_ips_file(path: str) -> list | dict: timestamp = datetime.strptime(result['timestamp'], '%Y-%m-%d %H:%M:%S.%f %z') result['timestamp_orig'] = result['timestamp'] - result['datetime'] = timestamp.isoformat() + result['datetime'] = timestamp.isoformat(timespec='microseconds') result['timestamp'] = timestamp.timestamp() return result @@ -139,7 +139,7 @@ def parse_summary_file(path: str) -> list | dict: entry = { 'app_name': app, 'name': app, - 'datetime': timestamp.isoformat(), + 'datetime': timestamp.isoformat(timespec='microseconds'), 'timestamp': timestamp.timestamp(), 'filename': os.path.basename(path), 'path': path, diff --git a/src/sysdiagnose/parsers/lockdownd.py b/src/sysdiagnose/parsers/lockdownd.py index 99b2366..c7e14a8 100644 --- a/src/sysdiagnose/parsers/lockdownd.py +++ b/src/sysdiagnose/parsers/lockdownd.py @@ -56,7 +56,7 @@ def extract_from_list(lines: list) -> list: # LATER parse the json blob that can sometimes be in the message item = { 'timestamp': timestamp.timestamp(), - 'datetime': timestamp.isoformat(), + 'datetime': timestamp.isoformat(timespec='microseconds'), 'pid': int(match.group(2)), 'event_type': match.group(3), 'msg': match.group(4).strip() diff --git a/src/sysdiagnose/parsers/logarchive.py b/src/sysdiagnose/parsers/logarchive.py index b0d31c1..aefa17f 100644 --- a/src/sysdiagnose/parsers/logarchive.py +++ b/src/sysdiagnose/parsers/logarchive.py @@ -305,7 +305,7 @@ def convert_entry_to_unifiedlog_format(entry: dict) -> dict: # already in the Mandiant unifiedlog format if 'event_type' in entry: timestamp = LogarchiveParser.convert_unifiedlog_time_to_datetime(entry['time']) - entry['datetime'] = timestamp.isoformat() + entry['datetime'] = timestamp.isoformat(timespec='microseconds') entry['timestamp'] = timestamp.timestamp() return entry ''' @@ -353,7 +353,7 @@ def convert_entry_to_unifiedlog_format(entry: dict) -> dict: new_entry[key] = value # convert time timestamp = datetime.fromisoformat(new_entry['time']) - new_entry['datetime'] = timestamp.isoformat() + new_entry['datetime'] = timestamp.isoformat(timespec='microseconds') new_entry['timestamp'] = timestamp.timestamp() new_entry['time'] = new_entry['timestamp'] * 1000000000 diff --git a/src/sysdiagnose/parsers/powerlogs.py b/src/sysdiagnose/parsers/powerlogs.py index db7a5d2..28bccce 100644 --- a/src/sysdiagnose/parsers/powerlogs.py +++ b/src/sysdiagnose/parsers/powerlogs.py @@ -47,8 +47,10 @@ def execute(self) -> list: continue try: + timestamp = datetime.fromtimestamp(value['timestamp'], tz=timezone.utc) value['db_table'] = key - value['datetime'] = datetime.fromtimestamp(value['timestamp'], tz=timezone.utc).isoformat() + value['datetime'] = timestamp.isoformat(timespec='microseconds') + value['timestamp'] = timestamp.timestamp() result.append(value) except TypeError: # skip "None" values and such diff --git a/src/sysdiagnose/parsers/shutdownlogs.py b/src/sysdiagnose/parsers/shutdownlogs.py index 336dbba..6683666 100644 --- a/src/sysdiagnose/parsers/shutdownlogs.py +++ b/src/sysdiagnose/parsers/shutdownlogs.py @@ -78,7 +78,7 @@ def parse_file(path: str) -> list: # add entries for item in running_processes.values(): item['timestamp'] = timestamp.timestamp() - item['datetime'] = timestamp.isoformat() + item['datetime'] = timestamp.isoformat(timespec='microseconds') item['event'] = f"{item['command']} is still there during shutdown after {item['time_waiting']}s" events.append(item) index += 1 diff --git a/src/sysdiagnose/parsers/spindumpnosymbols.py b/src/sysdiagnose/parsers/spindumpnosymbols.py index d66a4a3..c7ae57f 100644 --- a/src/sysdiagnose/parsers/spindumpnosymbols.py +++ b/src/sysdiagnose/parsers/spindumpnosymbols.py @@ -77,7 +77,7 @@ def parse_basic(data: list) -> dict: if 'Date/Time' in output: timestamp = datetime.strptime(output['Date/Time'], "%Y-%m-%d %H:%M:%S.%f %z") output['timestamp'] = timestamp.timestamp() - output['datetime'] = timestamp.isoformat() + output['datetime'] = timestamp.isoformat(timespec='microseconds') return output @@ -96,7 +96,7 @@ def parse_processes(data: list, start_timestamp: int) -> list[dict]: except KeyError: # some don't have a time since fork, like zombie processes timestamp = start_time process['timestamp'] = timestamp.timestamp() - process['datetime'] = timestamp.isoformat() + process['datetime'] = timestamp.isoformat(timespec='microseconds') processes.append(process) process_buffer = [line.strip()] else: @@ -108,7 +108,7 @@ def parse_processes(data: list, start_timestamp: int) -> list[dict]: process = SpindumpNoSymbolsParser.parse_process(process_buffer) timestamp = start_time - timedelta(seconds=int(process['Time Since Fork'].rstrip('s'))) process['timestamp'] = timestamp.timestamp() - process['datetime'] = timestamp.isoformat() + process['datetime'] = timestamp.isoformat(timespec='microseconds') processes.append(process) return processes diff --git a/src/sysdiagnose/parsers/taskinfo.py b/src/sysdiagnose/parsers/taskinfo.py index 88fd1fa..64627ae 100644 --- a/src/sysdiagnose/parsers/taskinfo.py +++ b/src/sysdiagnose/parsers/taskinfo.py @@ -42,7 +42,7 @@ def execute(self) -> list: numb_tasks = int(result.group(2)) events.append({ 'timestamp': self.sysdiagnose_creation_datetime.timestamp(), - 'datetime': self.sysdiagnose_creation_datetime.isoformat(), + 'datetime': self.sysdiagnose_creation_datetime.isoformat(timespec='microseconds'), 'tasks': numb_tasks, 'datetime_description': f"{numb_tasks} tasks/programs running at sysdiagnose creation time.", }) @@ -78,7 +78,7 @@ def execute(self) -> list: seconds = int(process['run time'].split()[0]) timestamp = self.sysdiagnose_creation_datetime - timedelta(seconds=seconds) process['timestamp'] = timestamp.timestamp() - process['datetime'] = timestamp.isoformat() + process['datetime'] = timestamp.isoformat(timespec='microseconds') process['datetime_description'] = "Process launched at the timestamp, calculated from sysdiagnose creation time minus process run time" events.append(process) extracted_block = [] diff --git a/src/sysdiagnose/utils/misc.py b/src/sysdiagnose/utils/misc.py index ef4e3df..29b5f18 100644 --- a/src/sysdiagnose/utils/misc.py +++ b/src/sysdiagnose/utils/misc.py @@ -102,7 +102,7 @@ def _handle_bytes(value, skip_underscore=False): @json_serializable.register(datetime) def _handle_datetime(value, skip_underscore=False): - return value.isoformat() + return value.isoformat(timespec='microseconds') def find_datetime(d): @@ -114,7 +114,7 @@ def find_datetime(d): if isinstance(item, dict): find_datetime(item) elif isinstance(v, datetime.datetime): - d[k] = v.isoformat() + d[k] = v.isoformat(timespec='microseconds') return d diff --git a/src/sysdiagnose/utils/multilinelog.py b/src/sysdiagnose/utils/multilinelog.py index e2951c2..f7cb706 100644 --- a/src/sysdiagnose/utils/multilinelog.py +++ b/src/sysdiagnose/utils/multilinelog.py @@ -78,7 +78,7 @@ def build_from_logentry(line): timestamp = datetime.strptime(timestamp_str, "%a %b %d %H:%M:%S %Y") timestamp = timestamp.replace(tzinfo=timezone.utc) entry['timestamp'] = timestamp.timestamp() - entry['datetime'] = timestamp.isoformat() + entry['datetime'] = timestamp.isoformat(timespec='microseconds') # log level loglevelregex = re.search(r"\<(.*?)\>", line) diff --git a/tests/test_multilinelog.py b/tests/test_multilinelog.py index 4dcb394..0238bc2 100644 --- a/tests/test_multilinelog.py +++ b/tests/test_multilinelog.py @@ -27,7 +27,7 @@ def test_multilinelog_plist(self): ''' expected_result = { 'timestamp': 1684933084.0, - 'datetime': '2023-05-24T12:58:04+00:00', + 'datetime': '2023-05-24T12:58:04.000000+00:00', 'loglevel': 'debug', 'hexID': '0x16bf9b000', 'event_type': 'MobileActivationDaemon handleActivationInfoWithSession:activationSignature:completionBlock:', @@ -63,7 +63,7 @@ def test_multilinelog_curlybrackets(self): ''' expected_result = { 'timestamp': 1684933536.0, - 'datetime': '2023-05-24T13:05:36+00:00', + 'datetime': '2023-05-24T13:05:36.000000+00:00', 'loglevel': 'err', 'hexID': '0x16be43000', 'msg': '+[MCMMetadata readAndValidateMetadataAtFileUrl:forUserIdentity:containerClass:checkClassPath:transient:error:]: 199: Failed to validate metadata at URL [file:///private/var/mobile/Containers/Data/Application/0984009B-81D1-4F7F-BDBD-261E22059155/.com.apple.mobile_container_manager.metadata.plist]: {\n MCMMetadataActiveDPClass = 0;\n MCMMetadataContentClass = 2;\n MCMMetadataIdentifier = "com.apple.VoiceMemos";\n MCMMetadataInfo = {\n "com.apple.MobileInstallation.ContentProtectionClass" = 0;\n };\n MCMMetadataSchemaVersion = 1;\n MCMMetadataUUID = "12036663-1F3A-45B3-A34C-402D5BB7D4FB";\n MCMMetadataUserIdentity = {\n personaUniqueString = "83CB8039-725D-4462-84C2-7F79F0A6EFB3";\n posixGID = 501;\n posixUID = 501;\n type = 0;\n version = 2;\n };\n MCMMetadataVersion = 6;\n} (Error Domain=MCMErrorDomain Code=29 "Invalid metadata-URLs should match: /private/var/mobile/Containers/Data/Application/0984009B-81D1-4F7F-BDBD-261E22059155 : /private/var/mobile/Containers/Data/VPNPlugin/0984009B-81D1-4F7F-BDBD-261E22059155" UserInfo={SourceFileLine=370, NSLocalizedDescription=Invalid metadata-URLs should match: /private/var/mobile/Containers/Data/Application/0984009B-81D1-4F7F-BDBD-261E22059155 : /private/var/mobile/Containers/Data/VPNPlugin/0984009B-81D1-4F7F-BDBD-261E22059155, FunctionName=+[MCMMetadata _readAndValidateMetadataInDictionary:containerURL:forUserIdentity:containerClass:checkClassPath:fsNode:transient:error:]})'} @@ -74,7 +74,7 @@ def test_multilinelog_simple_1(self): s = '''Wed May 24 12:55:37 2023 [72] (0x16afb3000) -[MCMClientConnection _regenerateAllSystemContainerPaths]: Rolling system container directory UUIDs on disk''' expected_result = { 'timestamp': 1684932937.0, - 'datetime': '2023-05-24T12:55:37+00:00', + 'datetime': '2023-05-24T12:55:37.000000+00:00', 'loglevel': 'notice', 'hexID': '0x16afb3000', 'event_type': 'MCMClientConnection _regenerateAllSystemContainerPaths', @@ -86,7 +86,7 @@ def test_mutlinelog_simple_2(self): s = '''Wed May 24 13:05:30 2023 [72] (0x16be43000) _containermanagerd_init_block_invoke: containermanagerd first boot cleanup complete''' expected_result = { 'timestamp': 1684933530.0, - 'datetime': '2023-05-24T13:05:30+00:00', + 'datetime': '2023-05-24T13:05:30.000000+00:00', 'loglevel': 'notice', 'hexID': '0x16be43000', 'msg': '_containermanagerd_init_block_invoke: containermanagerd first boot cleanup complete'} @@ -98,13 +98,13 @@ def test_multilinelog_simple_multiplelines(self): Wed May 24 12:55:37 2023 [72] (0x16afb3000) -[MCMClientConnection _regenerateAllSystemContainerPaths]: Rolling system container directory UUIDs on disk''' expected_result_0 = { 'timestamp': 1684933530.0, - 'datetime': '2023-05-24T13:05:30+00:00', + 'datetime': '2023-05-24T13:05:30.000000+00:00', 'loglevel': 'notice', 'hexID': '0x16be43000', 'msg': '_containermanagerd_init_block_invoke: containermanagerd first boot cleanup complete'} expected_result_1 = { 'timestamp': 1684932937.0, - 'datetime': '2023-05-24T12:55:37+00:00', + 'datetime': '2023-05-24T12:55:37.000000+00:00', 'loglevel': 'notice', 'hexID': '0x16afb3000', 'event_type': 'MCMClientConnection _regenerateAllSystemContainerPaths', @@ -139,14 +139,14 @@ def test_multilinelog_keyvalue(self): Wed May 24 13:08:13 2023 [135] (0x16f1db000) MA: main: ____________________________________________________________________''' expected_result_0 = { 'timestamp': 1684932937.0, - 'datetime': '2023-05-24T12:55:37+00:00', + 'datetime': '2023-05-24T12:55:37.000000+00:00', 'loglevel': 'notice', 'hexID': '0x16afb3000', 'event_type': 'MCMClientConnection _regenerateAllSystemContainerPaths', 'msg': 'Rolling system container directory UUIDs on disk'} expected_result_1 = { 'timestamp': 1684933693.0, - 'datetime': '2023-05-24T13:08:13+00:00', + 'datetime': '2023-05-24T13:08:13.000000+00:00', 'loglevel': 'debug', 'hexID': '0x16f1db000', 'msg': 'MA: main: ____________________ Mobile Activation Startup _____________________', @@ -174,13 +174,13 @@ def test_multilinelog_keyvalue_onlyend(self): Sat Feb 18 09:48:39 2023 [2695] (0x16dc37000) MA: dealwith_activation: Activation State: Activated''' expected_result_0 = { 'timestamp': 1676713718.0, - 'datetime': '2023-02-18T09:48:38+00:00', + 'datetime': '2023-02-18T09:48:38.000000+00:00', 'loglevel': 'debug', 'hexID': '0x16dc37000', 'msg': 'MA: main: ____________________________________________________________________'} expected_result_1 = { 'timestamp': 1676713719.0, - 'datetime': '2023-02-18T09:48:39+00:00', + 'datetime': '2023-02-18T09:48:39.000000+00:00', 'loglevel': 'debug', 'hexID': '0x16dc37000', 'msg': 'MA: dealwith_activation: Activation State: Activated'} @@ -192,7 +192,7 @@ def test_multilinelog_keyvalue_onlystart(self): s = '''Fri Dec 2 11:32:19 2022 [84816] (0x16afff000) MA: main: ____________________ Mobile Activation Startup _____________________''' expected_result = { 'timestamp': 1669980739.0, - 'datetime': '2022-12-02T11:32:19+00:00', + 'datetime': '2022-12-02T11:32:19.000000+00:00', 'loglevel': 'debug', 'hexID': '0x16afff000', 'msg': 'MA: main: ____________________ Mobile Activation Startup _____________________'} diff --git a/tests/test_parsers_logarchive.py b/tests/test_parsers_logarchive.py index 40cfffc..50fb51b 100644 --- a/tests/test_parsers_logarchive.py +++ b/tests/test_parsers_logarchive.py @@ -40,7 +40,7 @@ def test_convert_native_time_to_unifiedlog(self): def test_convert_unifiedlog_time_to_datetime(self): input = 1684958608908085200 expected_output = '2023-05-24T20:03:28.908085+00:00' - result = LogarchiveParser.convert_unifiedlog_time_to_datetime(input).isoformat() + result = LogarchiveParser.convert_unifiedlog_time_to_datetime(input).isoformat(timespec='microseconds') self.assertEqual(result, expected_output) def test_convert_entry_to_unified(self):