From 00cea6d8e835aff55bfe04b09495d0cb28ab0a6e Mon Sep 17 00:00:00 2001
From: Frances Hartwell <frances@datacebo.com>
Date: Tue, 14 May 2024 14:58:51 -0400
Subject: [PATCH] Update logs to write to CSV

---
 sdv/logging/__init__.py              |   4 +-
 sdv/logging/sdv_logger_config.yml    |   8 +-
 sdv/logging/utils.py                 |  15 ++++
 sdv/multi_table/base.py              | 125 +++++++++++----------------
 sdv/single_table/base.py             | 118 ++++++++++---------------
 tests/unit/logging/test_utils.py     |  36 +++++++-
 tests/unit/multi_table/test_base.py  |  92 ++++++++++----------
 tests/unit/single_table/test_base.py |  92 ++++++++++----------
 8 files changed, 243 insertions(+), 247 deletions(-)

diff --git a/sdv/logging/__init__.py b/sdv/logging/__init__.py
index c15348231..2c5d10e88 100644
--- a/sdv/logging/__init__.py
+++ b/sdv/logging/__init__.py
@@ -1,10 +1,12 @@
 """Module for configuring loggers within the SDV library."""
 
 from sdv.logging.logger import get_sdv_logger
-from sdv.logging.utils import disable_single_table_logger, get_sdv_logger_config
+from sdv.logging.utils import (
+    disable_single_table_logger, get_sdv_logger_config, load_logfile_dataframe)
 
 __all__ = (
     'disable_single_table_logger',
     'get_sdv_logger',
     'get_sdv_logger_config',
+    'load_logfile_dataframe'
 )
diff --git a/sdv/logging/sdv_logger_config.yml b/sdv/logging/sdv_logger_config.yml
index 4b01b0c65..cf3b51710 100644
--- a/sdv/logging/sdv_logger_config.yml
+++ b/sdv/logging/sdv_logger_config.yml
@@ -6,22 +6,22 @@ loggers:
     propagate: false
     handlers:
       class: logging.FileHandler
-      filename: sdv_logs.log
+      filename: sdv_logs.csv
   MultiTableSynthesizer:
     level: INFO
     propagate: false
     handlers:
       class: logging.FileHandler
-      filename: sdv_logs.log
+      filename: sdv_logs.csv
     MultiTableMetadata:
       level: INFO
       propagate: false
       handlers:
         class: logging.FileHandler
-        filename: sdv_logs.log
+        filename: sdv_logs.csv
     SingleTableMetadata:
       level: INFO
       propagate: false
       handlers:
         class: logging.FileHandler
-        filename: sdv_logs.log
+        filename: sdv_logs.csv
diff --git a/sdv/logging/utils.py b/sdv/logging/utils.py
index 471870649..acea37bd7 100644
--- a/sdv/logging/utils.py
+++ b/sdv/logging/utils.py
@@ -5,6 +5,7 @@
 import shutil
 from pathlib import Path
 
+import pandas as pd
 import platformdirs
 import yaml
 
@@ -49,3 +50,17 @@ def disable_single_table_logger():
     finally:
         for handler in handlers:
             single_table_logger.addHandler(handler)
+
+
+def load_logfile_dataframe(logfile):
+    """Load the SDV logfile as a pandas DataFrame with correct column headers.
+
+    Args:
+        logfile (str):
+            Path to the SDV log CSV file.
+    """
+    column_names = [
+        'LEVEL', 'EVENT', 'TIMESTAMP', 'SYNTHESIZER CLASS NAME', 'SYNTHESIZER ID',
+        'TOTAL NUMBER OF TABLES', 'TOTAL NUMBER OF ROWS', 'TOTAL NUMBER OF COLUMNS'
+    ]
+    return pd.read_csv(logfile, names=column_names)
diff --git a/sdv/multi_table/base.py b/sdv/multi_table/base.py
index 779366cb1..ed08891fb 100644
--- a/sdv/multi_table/base.py
+++ b/sdv/multi_table/base.py
@@ -119,15 +119,12 @@ def __init__(self, metadata, locales=['en_US'], synthesizer_kwargs=None):
         self._fitted_sdv_version = None
         self._fitted_sdv_enterprise_version = None
         self._synthesizer_id = generate_synthesizer_id(self)
-        SYNTHESIZER_LOGGER.info(
-            '\nInstance:\n'
-            '  Timestamp: %s\n'
-            '  Synthesizer class name: %s\n'
-            '  Synthesizer id: %s',
-            datetime.datetime.now(),
-            self.__class__.__name__,
-            self._synthesizer_id
-        )
+        SYNTHESIZER_LOGGER.info({
+            'EVENT': 'Instance',
+            'TIMESTAMP': datetime.datetime.now(),
+            'SYNTHESIZER CLASS NAME': self.__class__.__name__,
+            'SYNTHESIZER ID': self._synthesizer_id
+        })
 
     def set_address_columns(self, table_name, column_names, anonymization_level='full'):
         """Set the address multi-column transformer.
@@ -403,22 +400,16 @@ def fit_processed_data(self, processed_data):
             total_rows += len(table)
             total_columns += len(table.columns)
 
-        SYNTHESIZER_LOGGER.info(
-            '\nFit processed data:\n'
-            '  Timestamp: %s\n'
-            '  Synthesizer class name: %s\n'
-            '  Statistics of the fit processed data:\n'
-            '    Total number of tables: %s\n'
-            '    Total number of rows: %s\n'
-            '    Total number of columns: %s\n'
-            '  Synthesizer id: %s',
-            datetime.datetime.now(),
-            self.__class__.__name__,
-            len(processed_data),
-            total_rows,
-            total_columns,
-            self._synthesizer_id,
-        )
+        SYNTHESIZER_LOGGER.info({
+            'EVENT': 'Fit processed data',
+            'TIMESTAMP': datetime.datetime.now(),
+            'SYNTHESIZER CLASS NAME': self.__class__.__name__,
+            'SYNTHESIZER ID': self._synthesizer_id,
+            'TOTAL NUMBER OF TABLES': len(processed_data),
+            'TOTAL NUMBER OF ROWS': total_rows,
+            'TOTAL NUMBER OF COLUMNS': total_columns
+        })
+
         check_synthesizer_version(self, is_fit_method=True, compare_operator=operator.lt)
         with disable_single_table_logger():
             augmented_data = self._augment_tables(processed_data)
@@ -443,22 +434,16 @@ def fit(self, data):
             total_rows += len(table)
             total_columns += len(table.columns)
 
-        SYNTHESIZER_LOGGER.info(
-            '\nFit:\n'
-            '  Timestamp: %s\n'
-            '  Synthesizer class name: %s\n'
-            '  Statistics of the fit data:\n'
-            '    Total number of tables: %s\n'
-            '    Total number of rows: %s\n'
-            '    Total number of columns: %s\n'
-            '  Synthesizer id: %s',
-            datetime.datetime.now(),
-            self.__class__.__name__,
-            len(data),
-            total_rows,
-            total_columns,
-            self._synthesizer_id,
-        )
+        SYNTHESIZER_LOGGER.info({
+            'EVENT': 'Fit',
+            'TIMESTAMP': datetime.datetime.now(),
+            'SYNTHESIZER CLASS NAME': self.__class__.__name__,
+            'SYNTHESIZER ID': self._synthesizer_id,
+            'TOTAL NUMBER OF TABLES': len(data),
+            'TOTAL NUMBER OF ROWS': total_rows,
+            'TOTAL NUMBER OF COLUMNS': total_columns
+        })
+
         check_synthesizer_version(self, is_fit_method=True, compare_operator=operator.lt)
         _validate_foreign_keys_not_null(self.metadata, data)
         self._check_metadata_updated()
@@ -511,22 +496,16 @@ def sample(self, scale=1.0):
             if table in table_columns:
                 sampled_data[table].columns = table_columns[table]
 
-        SYNTHESIZER_LOGGER.info(
-            '\nSample:\n'
-            '  Timestamp: %s\n'
-            '  Synthesizer class name: %s\n'
-            '  Statistics of the sample size:\n'
-            '    Total number of tables: %s\n'
-            '    Total number of rows: %s\n'
-            '    Total number of columns: %s\n'
-            '  Synthesizer id: %s',
-            datetime.datetime.now(),
-            self.__class__.__name__,
-            len(sampled_data),
-            total_rows,
-            total_columns,
-            self._synthesizer_id,
-        )
+        SYNTHESIZER_LOGGER.info({
+            'EVENT': 'Sample',
+            'TIMESTAMP': datetime.datetime.now(),
+            'SYNTHESIZER CLASS NAME': self.__class__.__name__,
+            'SYNTHESIZER ID': self._synthesizer_id,
+            'TOTAL NUMBER OF TABLES': len(sampled_data),
+            'TOTAL NUMBER OF ROWS': total_rows,
+            'TOTAL NUMBER OF COLUMNS': total_columns
+        })
+
         return sampled_data
 
     def get_learned_distributions(self, table_name):
@@ -692,15 +671,13 @@ def save(self, filepath):
                 Path where the instance will be serialized.
         """
         synthesizer_id = getattr(self, '_synthesizer_id', None)
-        SYNTHESIZER_LOGGER.info(
-            '\nSave:\n'
-            '  Timestamp: %s\n'
-            '  Synthesizer class name: %s\n'
-            '  Synthesizer id: %s',
-            datetime.datetime.now(),
-            self.__class__.__name__,
-            synthesizer_id
-        )
+        SYNTHESIZER_LOGGER.info({
+            'EVENT': 'Save',
+            'TIMESTAMP': datetime.datetime.now(),
+            'SYNTHESIZER CLASS NAME': self.__class__.__name__,
+            'SYNTHESIZER ID': synthesizer_id,
+        })
+
         with open(filepath, 'wb') as output:
             cloudpickle.dump(self, output)
 
@@ -724,13 +701,11 @@ def load(cls, filepath):
         if getattr(synthesizer, '_synthesizer_id', None) is None:
             synthesizer._synthesizer_id = generate_synthesizer_id(synthesizer)
 
-        SYNTHESIZER_LOGGER.info(
-            '\nLoad:\n'
-            '  Timestamp: %s\n'
-            '  Synthesizer class name: %s\n'
-            '  Synthesizer id: %s',
-            datetime.datetime.now(),
-            synthesizer.__class__.__name__,
-            synthesizer._synthesizer_id,
-        )
+        SYNTHESIZER_LOGGER.info({
+            'EVENT': 'Load',
+            'TIMESTAMP': datetime.datetime.now(),
+            'SYNTHESIZER CLASS NAME': synthesizer.__class__.__name__,
+            'SYNTHESIZER ID': synthesizer._synthesizer_id,
+        })
+
         return synthesizer
diff --git a/sdv/single_table/base.py b/sdv/single_table/base.py
index 3b3b93122..de474d1c7 100644
--- a/sdv/single_table/base.py
+++ b/sdv/single_table/base.py
@@ -112,15 +112,12 @@ def __init__(self, metadata, enforce_min_max_values=True, enforce_rounding=True,
         self._fitted_sdv_version = None
         self._fitted_sdv_enterprise_version = None
         self._synthesizer_id = generate_synthesizer_id(self)
-        SYNTHESIZER_LOGGER.info(
-            '\nInstance:\n'
-            '  Timestamp: %s\n'
-            '  Synthesizer class name: %s\n'
-            '  Synthesizer id: %s',
-            datetime.datetime.now(),
-            self.__class__.__name__,
-            self._synthesizer_id
-        )
+        SYNTHESIZER_LOGGER.info({
+            'EVENT': 'Instance',
+            'TIMESTAMP': datetime.datetime.now(),
+            'SYNTHESIZER CLASS NAME': self.__class__.__name__,
+            'SYNTHESIZER ID': self._synthesizer_id,
+        })
 
     def set_address_columns(self, column_names, anonymization_level='full'):
         """Set the address multi-column transformer."""
@@ -420,21 +417,15 @@ def fit_processed_data(self, processed_data):
             processed_data (pandas.DataFrame):
                 The transformed data used to fit the model to.
         """
-        SYNTHESIZER_LOGGER.info(
-            '\nFit processed data:\n'
-            '  Timestamp: %s\n'
-            '  Synthesizer class name: %s\n'
-            '  Statistics of the fit processed data:\n'
-            '    Total number of tables: 1\n'
-            '    Total number of rows: %s\n'
-            '    Total number of columns: %s\n'
-            '  Synthesizer id: %s',
-            datetime.datetime.now(),
-            self.__class__.__name__,
-            len(processed_data),
-            len(processed_data.columns),
-            self._synthesizer_id,
-        )
+        SYNTHESIZER_LOGGER.info({
+            'EVENT': 'Fit processed data',
+            'TIMESTAMP': datetime.datetime.now(),
+            'SYNTHESIZER CLASS NAME': self.__class__.__name__,
+            'SYNTHESIZER ID': self._synthesizer_id,
+            'TOTAL NUMBER OF TABLES': 1,
+            'TOTAL NUMBER OF ROWS': len(processed_data),
+            'TOTAL NUMBER OF COLUMNS': len(processed_data.columns)
+        })
 
         check_synthesizer_version(self, is_fit_method=True, compare_operator=operator.lt)
         if not processed_data.empty:
@@ -452,21 +443,15 @@ def fit(self, data):
             data (pandas.DataFrame):
                 The raw data (before any transformations) to fit the model to.
         """
-        SYNTHESIZER_LOGGER.info(
-            '\nFit:\n'
-            '  Timestamp: %s\n'
-            '  Synthesizer class name: %s\n'
-            '  Statistics of the fit data:\n'
-            '    Total number of tables: 1\n'
-            '    Total number of rows: %s\n'
-            '    Total number of columns: %s\n'
-            '  Synthesizer id: %s',
-            datetime.datetime.now(),
-            self.__class__.__name__,
-            len(data),
-            len(data.columns),
-            self._synthesizer_id,
-        )
+        SYNTHESIZER_LOGGER.info({
+            'EVENT': 'Fit',
+            'TIMESTAMP': datetime.datetime.now(),
+            'SYNTHESIZER CLASS NAME': self.__class__.__name__,
+            'SYNTHESIZER ID': self._synthesizer_id,
+            'TOTAL NUMBER OF TABLES': 1,
+            'TOTAL NUMBER OF ROWS': len(data),
+            'TOTAL NUMBER OF COLUMNS': len(data.columns)
+        })
 
         check_synthesizer_version(self, is_fit_method=True, compare_operator=operator.lt)
         self._check_metadata_updated()
@@ -484,15 +469,12 @@ def save(self, filepath):
                 Path where the synthesizer instance will be serialized.
         """
         synthesizer_id = getattr(self, '_synthesizer_id', None)
-        SYNTHESIZER_LOGGER.info(
-            '\nSave:\n'
-            '  Timestamp: %s\n'
-            '  Synthesizer class name: %s\n'
-            '  Synthesizer id: %s',
-            datetime.datetime.now(),
-            self.__class__.__name__,
-            synthesizer_id
-        )
+        SYNTHESIZER_LOGGER.info({
+            'EVENT': 'Save',
+            'TIMESTAMP': datetime.datetime.now(),
+            'SYNTHESIZER CLASS NAME': self.__class__.__name__,
+            'SYNTHESIZER ID': synthesizer_id,
+        })
 
         with open(filepath, 'wb') as output:
             cloudpickle.dump(self, output)
@@ -517,15 +499,12 @@ def load(cls, filepath):
         if getattr(synthesizer, '_synthesizer_id', None) is None:
             synthesizer._synthesizer_id = generate_synthesizer_id(synthesizer)
 
-        SYNTHESIZER_LOGGER.info(
-            '\nLoad:\n'
-            '  Timestamp: %s\n'
-            '  Synthesizer class name: %s\n'
-            '  Synthesizer id: %s',
-            datetime.datetime.now(),
-            synthesizer.__class__.__name__,
-            synthesizer._synthesizer_id,
-        )
+        SYNTHESIZER_LOGGER.info({
+            'EVENT': 'Load',
+            'TIMESTAMP': datetime.datetime.now(),
+            'SYNTHESIZER CLASS NAME': synthesizer.__class__.__name__,
+            'SYNTHESIZER ID': synthesizer._synthesizer_id,
+        })
 
         return synthesizer
 
@@ -913,21 +892,16 @@ def sample(self, num_rows, max_tries_per_batch=100, batch_size=None, output_file
         if not original_columns.empty:
             sampled_data.columns = self._original_columns
 
-        SYNTHESIZER_LOGGER.info(
-            '\nSample:\n'
-            '  Timestamp: %s\n'
-            '  Synthesizer class name: %s\n'
-            '  Statistics of the sample size:\n'
-            '    Total number of tables: 1\n'
-            '    Total number of rows: %s\n'
-            '    Total number of columns: %s\n'
-            '  Synthesizer id: %s',
-            sample_timestamp,
-            self.__class__.__name__,
-            len(sampled_data),
-            len(sampled_data.columns),
-            self._synthesizer_id,
-        )
+        SYNTHESIZER_LOGGER.info({
+            'EVENT': 'Sample',
+            'TIMESTAMP': sample_timestamp,
+            'SYNTHESIZER CLASS NAME': self.__class__.__name__,
+            'SYNTHESIZER ID': self._synthesizer_id,
+            'TOTAL NUMBER OF TABLES': 1,
+            'TOTAL NUMBER OF ROWS': len(sampled_data),
+            'TOTAL NUMBER OF COLUMNS': len(sampled_data.columns)
+
+        })
 
         return sampled_data
 
diff --git a/tests/unit/logging/test_utils.py b/tests/unit/logging/test_utils.py
index b585cb880..1bcf74788 100644
--- a/tests/unit/logging/test_utils.py
+++ b/tests/unit/logging/test_utils.py
@@ -1,7 +1,12 @@
 """Test ``SDV`` logging utilities."""
+from io import StringIO
 from unittest.mock import Mock, mock_open, patch
 
-from sdv.logging.utils import disable_single_table_logger, get_sdv_logger_config
+import numpy as np
+import pandas as pd
+
+from sdv.logging.utils import (
+    disable_single_table_logger, get_sdv_logger_config, load_logfile_dataframe)
 
 
 def test_get_sdv_logger_config():
@@ -54,3 +59,32 @@ def test_disable_single_table_logger(mock_getlogger):
 
     # Assert
     assert len(mock_logger.handlers) == 1
+
+
+def test_load_logfile_dataframe():
+    """Test loading the CSV logfile into a DataFrame"""
+    # Setup
+    logfile = StringIO(
+        'INFO,Instance,2024-05-14 11:29:00.649735,GaussianCopulaSynthesizer,'
+        'GaussianCopulaSynthesizer_1.12.1_5387a6e9f4d,,,\n'
+        'INFO,Fit,2024-05-14 11:29:00.649735,GaussianCopulaSynthesizer,'
+        'GaussianCopulaSynthesizer_1.12.1_5387a6e9f4d,1,500,9\n'
+        'INFO,Sample,2024-05-14 11:29:00.649735,GaussianCopulaSynthesizer,'
+        'GaussianCopulaSynthesizer_1.12.1_5387a6e9f4d,1,500,6\n'
+    )
+
+    # Run
+    log_dataframe = load_logfile_dataframe(logfile)
+
+    # Assert
+    expected_log = pd.DataFrame({
+        'LEVEL': ['INFO'] * 3,
+        'EVENT': ['Instance', 'Fit', 'Sample'],
+        'TIMESTAMP': ['2024-05-14 11:29:00.649735'] * 3,
+        'SYNTHESIZER CLASS NAME': ['GaussianCopulaSynthesizer'] * 3,
+        'SYNTHESIZER ID': ['GaussianCopulaSynthesizer_1.12.1_5387a6e9f4d'] * 3,
+        'TOTAL NUMBER OF TABLES': [np.nan, 1, 1],
+        'TOTAL NUMBER OF ROWS': [np.nan, 500, 500],
+        'TOTAL NUMBER OF COLUMNS': [np.nan, 9, 6]
+    })
+    pd.testing.assert_frame_equal(log_dataframe, expected_log)
diff --git a/tests/unit/multi_table/test_base.py b/tests/unit/multi_table/test_base.py
index ffcd63148..c4fe83d6e 100644
--- a/tests/unit/multi_table/test_base.py
+++ b/tests/unit/multi_table/test_base.py
@@ -133,11 +133,12 @@ def test___init__(self, mock_check_metadata_updated, mock_generate_synthesizer_i
         mock_check_metadata_updated.assert_called_once()
         mock_generate_synthesizer_id.assert_called_once_with(instance)
         assert instance._synthesizer_id == synthesizer_id
-        assert caplog.messages[0] == (
-            '\nInstance:\n  Timestamp: 2024-04-19 16:20:10.037183\n  Synthesizer class name: '
-            'BaseMultiTableSynthesizer\n  Synthesizer id: '
-            'BaseMultiTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
-        )
+        assert caplog.messages[0] == str({
+            'EVENT': 'Instance',
+            'TIMESTAMP': '2024-04-19 16:20:10.037183',
+            'SYNTHESIZER CLASS NAME': 'BaseMultiTableSynthesizer',
+            'SYNTHESIZER ID': 'BaseMultiTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
+        })
 
     def test__init__column_relationship_warning(self):
         """Test that a warning is raised only once when the metadata has column relationships."""
@@ -927,16 +928,15 @@ def test_fit_processed_data(self, mock_datetime, caplog):
         instance._augment_tables.assert_called_once_with(processed_data)
         instance._model_tables.assert_called_once_with(instance._augment_tables.return_value)
         assert instance._fitted
-        assert caplog.messages[0] == (
-            '\nFit processed data:\n'
-            '  Timestamp: 2024-04-19 16:20:10.037183\n'
-            '  Synthesizer class name: Mock\n'
-            '  Statistics of the fit processed data:\n'
-            '    Total number of tables: 2\n'
-            '    Total number of rows: 6\n'
-            '    Total number of columns: 4\n'
-            '  Synthesizer id: BaseMultiTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
-        )
+        assert caplog.messages[0] == str({
+            'EVENT': 'Fit processed data',
+            'TIMESTAMP': '2024-04-19 16:20:10.037183',
+            'SYNTHESIZER CLASS NAME': 'Mock',
+            'SYNTHESIZER ID': 'BaseMultiTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5',
+            'TOTAL NUMBER OF TABLES': 2,
+            'TOTAL NUMBER OF ROWS': 6,
+            'TOTAL NUMBER OF COLUMNS': 4
+        })
 
     def test_fit_processed_data_empty_table(self):
         """Test attributes are properly set when data is empty and that _fit is not called."""
@@ -1012,16 +1012,15 @@ def test_fit(self, mock_validate_foreign_keys_not_null, mock_datetime, caplog):
         instance.preprocess.assert_called_once_with(data)
         instance.fit_processed_data.assert_called_once_with(instance.preprocess.return_value)
         instance._check_metadata_updated.assert_called_once()
-        assert caplog.messages[0] == (
-            '\nFit:\n'
-            '  Timestamp: 2024-04-19 16:20:10.037183\n'
-            '  Synthesizer class name: Mock\n'
-            '  Statistics of the fit data:\n'
-            '    Total number of tables: 2\n'
-            '    Total number of rows: 6\n'
-            '    Total number of columns: 4\n'
-            '  Synthesizer id: BaseMultiTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
-        )
+        assert caplog.messages[0] == str({
+            'EVENT': 'Fit',
+            'TIMESTAMP': '2024-04-19 16:20:10.037183',
+            'SYNTHESIZER CLASS NAME': 'Mock',
+            'SYNTHESIZER ID': 'BaseMultiTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5',
+            'TOTAL NUMBER OF TABLES': 2,
+            'TOTAL NUMBER OF ROWS': 6,
+            'TOTAL NUMBER OF COLUMNS': 4
+        })
 
     def test_fit_raises_version_error(self):
         """Test that fit will raise a ``VersionError`` if the current version is bigger."""
@@ -1148,16 +1147,15 @@ def test_sample(self, mock_datetime, caplog):
 
         # Assert
         instance._sample.assert_called_once_with(scale=1.5)
-        assert caplog.messages[0] == (
-            '\nSample:\n'
-            '  Timestamp: 2024-04-19 16:20:10.037183\n'
-            '  Synthesizer class name: BaseMultiTableSynthesizer\n'
-            '  Statistics of the sample size:\n'
-            '    Total number of tables: 2\n'
-            '    Total number of rows: 6\n'
-            '    Total number of columns: 4\n'
-            '  Synthesizer id: BaseMultiTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
-        )
+        assert caplog.messages[0] == str({
+            'EVENT': 'Sample',
+            'TIMESTAMP': '2024-04-19 16:20:10.037183',
+            'SYNTHESIZER CLASS NAME': 'BaseMultiTableSynthesizer',
+            'SYNTHESIZER ID': 'BaseMultiTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5',
+            'TOTAL NUMBER OF TABLES': 2,
+            'TOTAL NUMBER OF ROWS': 6,
+            'TOTAL NUMBER OF COLUMNS': 4
+        })
 
     def test_get_learned_distributions_raises_an_unfitted_error(self):
         """Test that ``get_learned_distributions`` raises an error when model is not fitted."""
@@ -1563,12 +1561,12 @@ def test_save(self, cloudpickle_mock, mock_datetime, tmp_path, caplog):
 
         # Assert
         cloudpickle_mock.dump.assert_called_once_with(synthesizer, ANY)
-        assert caplog.messages[0] == (
-            '\nSave:\n'
-            '  Timestamp: 2024-04-19 16:20:10.037183\n'
-            '  Synthesizer class name: Mock\n'
-            '  Synthesizer id: BaseMultiTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
-        )
+        assert caplog.messages[0] == str({
+            'EVENT': 'Save',
+            'TIMESTAMP': '2024-04-19 16:20:10.037183',
+            'SYNTHESIZER CLASS NAME': 'Mock',
+            'SYNTHESIZER ID': 'BaseMultiTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5',
+        })
 
     @patch('sdv.multi_table.base.datetime')
     @patch('sdv.multi_table.base.generate_synthesizer_id')
@@ -1599,9 +1597,9 @@ def test_load(self, mock_file, cloudpickle_mock,
         mock_check_synthesizer_version.assert_called_once_with(synthesizer_mock)
         assert loaded_instance._synthesizer_id == synthesizer_id
         mock_generate_synthesizer_id.assert_called_once_with(synthesizer_mock)
-        assert caplog.messages[0] == (
-            '\nLoad:\n'
-            '  Timestamp: 2024-04-19 16:20:10.037183\n'
-            '  Synthesizer class name: Mock\n'
-            '  Synthesizer id: BaseMultiTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
-        )
+        assert caplog.messages[0] == str({
+            'EVENT': 'Load',
+            'TIMESTAMP': '2024-04-19 16:20:10.037183',
+            'SYNTHESIZER CLASS NAME': 'Mock',
+            'SYNTHESIZER ID': 'BaseMultiTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5',
+        })
diff --git a/tests/unit/single_table/test_base.py b/tests/unit/single_table/test_base.py
index dbaf35018..3074d8506 100644
--- a/tests/unit/single_table/test_base.py
+++ b/tests/unit/single_table/test_base.py
@@ -94,11 +94,12 @@ def test___init__(self, mock_check_metadata_updated, mock_data_processor,
         metadata.validate.assert_called_once_with()
         mock_check_metadata_updated.assert_called_once()
         mock_generate_synthesizer_id.assert_called_once_with(instance)
-        assert caplog.messages[0] == (
-            '\nInstance:\n  Timestamp: 2024-04-19 16:20:10.037183\n  Synthesizer class name: '
-            'BaseSingleTableSynthesizer\n  Synthesizer id: '
-            'BaseSingleTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
-        )
+        assert caplog.messages[0] == str({
+            'EVENT': 'Instance',
+            'TIMESTAMP': '2024-04-19 16:20:10.037183',
+            'SYNTHESIZER CLASS NAME': 'BaseSingleTableSynthesizer',
+            'SYNTHESIZER ID': 'BaseSingleTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
+        })
 
     @patch('sdv.single_table.base.DataProcessor')
     def test___init__custom(self, mock_data_processor):
@@ -398,16 +399,15 @@ def test_fit_processed_data(self, mock_datetime, caplog):
 
         # Assert
         instance._fit.assert_called_once_with(processed_data)
-        assert caplog.messages[0] == (
-            '\nFit processed data:\n'
-            '  Timestamp: 2024-04-19 16:20:10.037183\n'
-            '  Synthesizer class name: Mock\n'
-            '  Statistics of the fit processed data:\n'
-            '    Total number of tables: 1\n'
-            '    Total number of rows: 3\n'
-            '    Total number of columns: 1\n'
-            '  Synthesizer id: BaseSingleTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
-        )
+        assert caplog.messages[0] == str({
+            'EVENT': 'Fit processed data',
+            'TIMESTAMP': '2024-04-19 16:20:10.037183',
+            'SYNTHESIZER CLASS NAME': 'Mock',
+            'SYNTHESIZER ID': 'BaseSingleTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5',
+            'TOTAL NUMBER OF TABLES': 1,
+            'TOTAL NUMBER OF ROWS': 3,
+            'TOTAL NUMBER OF COLUMNS': 1
+        })
 
     def test_fit_processed_data_raises_version_error(self):
         """Test that ``fit`` raises ``VersionError``
@@ -461,16 +461,15 @@ def test_fit(self, mock_datetime, caplog):
         instance.preprocess.assert_called_once_with(data)
         instance.fit_processed_data.assert_called_once_with(instance.preprocess.return_value)
         instance._check_metadata_updated.assert_called_once()
-        assert caplog.messages[0] == (
-            '\nFit:\n'
-            '  Timestamp: 2024-04-19 16:20:10.037183\n'
-            '  Synthesizer class name: Mock\n'
-            '  Statistics of the fit data:\n'
-            '    Total number of tables: 1\n'
-            '    Total number of rows: 3\n'
-            '    Total number of columns: 2\n'
-            '  Synthesizer id: BaseSingleTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
-        )
+        assert caplog.messages[0] == str({
+            'EVENT': 'Fit',
+            'TIMESTAMP': '2024-04-19 16:20:10.037183',
+            'SYNTHESIZER CLASS NAME': 'Mock',
+            'SYNTHESIZER ID': 'BaseSingleTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5',
+            'TOTAL NUMBER OF TABLES': 1,
+            'TOTAL NUMBER OF ROWS': 3,
+            'TOTAL NUMBER OF COLUMNS': 2
+        })
 
     def test_fit_raises_version_error(self):
         """Test that ``fit`` raises ``VersionError``
@@ -1476,16 +1475,15 @@ def test_sample(self, mock_datetime, caplog):
             show_progress_bar=True
         )
         pd.testing.assert_frame_equal(result, pd.DataFrame({'col': [1, 2, 3]}))
-        assert caplog.messages[0] == (
-            '\nSample:\n'
-            '  Timestamp: 2024-04-19 16:20:10.037183\n'
-            '  Synthesizer class name: Mock\n'
-            '  Statistics of the sample size:\n'
-            '    Total number of tables: 1\n'
-            '    Total number of rows: 3\n'
-            '    Total number of columns: 1\n'
-            '  Synthesizer id: BaseSingleTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
-        )
+        assert caplog.messages[0] == str({
+            'EVENT': 'Sample',
+            'TIMESTAMP': '2024-04-19 16:20:10.037183',
+            'SYNTHESIZER CLASS NAME': 'Mock',
+            'SYNTHESIZER ID': 'BaseSingleTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5',
+            'TOTAL NUMBER OF TABLES': 1,
+            'TOTAL NUMBER OF ROWS': 3,
+            'TOTAL NUMBER OF COLUMNS': 1
+        })
 
     def test__validate_conditions_unseen_columns(self):
         """Test that conditions are within the ``data_processor`` fields."""
@@ -1855,12 +1853,12 @@ def test_save(self, cloudpickle_mock, mock_datetime, tmp_path, caplog):
 
         # Assert
         cloudpickle_mock.dump.assert_called_once_with(synthesizer, ANY)
-        assert caplog.messages[0] == (
-            '\nSave:\n'
-            '  Timestamp: 2024-04-19 16:20:10.037183\n'
-            '  Synthesizer class name: Mock\n'
-            '  Synthesizer id: BaseSingleTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
-        )
+        assert caplog.messages[0] == str({
+            'EVENT': 'Save',
+            'TIMESTAMP': '2024-04-19 16:20:10.037183',
+            'SYNTHESIZER CLASS NAME': 'Mock',
+            'SYNTHESIZER ID': 'BaseSingleTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5',
+        })
 
     @patch('sdv.single_table.base.datetime')
     @patch('sdv.single_table.base.generate_synthesizer_id')
@@ -1891,12 +1889,12 @@ def test_load(self, mock_file, cloudpickle_mock, mock_check_sdv_versions_and_war
         assert loaded_instance._synthesizer_id == synthesizer_id
         mock_check_synthesizer_version.assert_called_once_with(synthesizer_mock)
         mock_generate_synthesizer_id.assert_called_once_with(synthesizer_mock)
-        assert caplog.messages[0] == (
-            '\nLoad:\n'
-            '  Timestamp: 2024-04-19 16:20:10.037183\n'
-            '  Synthesizer class name: Mock\n'
-            '  Synthesizer id: BaseSingleTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5'
-        )
+        assert caplog.messages[0] == str({
+            'EVENT': 'Load',
+            'TIMESTAMP': '2024-04-19 16:20:10.037183',
+            'SYNTHESIZER CLASS NAME': 'Mock',
+            'SYNTHESIZER ID': 'BaseSingleTableSynthesizer_1.0.0_92aff11e9a5649d1a280990d1231a5f5',
+        })
 
     def test_load_custom_constraint_classes(self):
         """Test that ``load_custom_constraint_classes`` calls the ``DataProcessor``'s method."""