Skip to content

Commit

Permalink
Add usage logging (#1920)
Browse files Browse the repository at this point in the history
  • Loading branch information
pvk-developer authored Apr 25, 2024
1 parent 17c0d09 commit 275955d
Show file tree
Hide file tree
Showing 28 changed files with 951 additions and 109 deletions.
8 changes: 2 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,8 @@ test-integration: ## run tests quickly with the default Python
test-readme: ## run the readme snippets
invoke readme

.PHONY: test-tutorials
test-tutorials: ## run the tutorial notebooks
invoke tutorials

.PHONY: test
test: test-unit test-integration test-readme test-tutorials ## test everything that needs test dependencies
test: test-unit test-integration test-readme ## test everything that needs test dependencies

.PHONY: test-all
test-all: ## run tests on every Python version with tox
Expand Down Expand Up @@ -265,5 +261,5 @@ release-major: check-release bumpversion-major release

.PHONY: check-deps
check-deps:
$(eval allow_list='cloudpickle=|graphviz=|numpy=|pandas=|tqdm=|copulas=|ctgan=|deepecho=|rdt=|sdmetrics=')
$(eval allow_list='cloudpickle=|graphviz=|numpy=|pandas=|tqdm=|copulas=|ctgan=|deepecho=|rdt=|sdmetrics=|platformdirs=')
pip freeze | grep -v "SDV.git" | grep -E $(allow_list) | sort > $(OUTPUT_FILEPATH)
1 change: 1 addition & 0 deletions latest_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ pandas==2.2.2
rdt==1.12.0
sdmetrics==0.14.0
tqdm==4.66.2
platformdirs==4.2.0
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ dependencies = [
'deepecho>=0.6.0',
'rdt>=1.12.0',
'sdmetrics>=0.14.0',
'platformdirs>=4.0'
]

[project.urls]
Expand Down
5 changes: 3 additions & 2 deletions sdv/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from types import ModuleType

from sdv import (
constraints, data_processing, datasets, evaluation, io, lite, metadata, metrics, multi_table,
sampling, sequential, single_table, version)
constraints, data_processing, datasets, evaluation, io, lite, logging, metadata, metrics,
multi_table, sampling, sequential, single_table, version)

__all__ = [
'constraints',
Expand All @@ -26,6 +26,7 @@
'evaluation',
'io',
'lite',
'logging',
'metadata',
'metrics',
'multi_table',
Expand Down
9 changes: 9 additions & 0 deletions sdv/logging/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""Module for configuring loggers within the SDV library."""

from sdv.logging.utils import disable_single_table_logger, get_sdv_logger, get_sdv_logger_config

__all__ = (
'disable_single_table_logger',
'get_sdv_logger',
'get_sdv_logger_config',
)
27 changes: 27 additions & 0 deletions sdv/logging/sdv_logger_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
log_registry: 'local'
version: 1
loggers:
SingleTableSynthesizer:
level: INFO
propagate: false
handlers:
class: logging.FileHandler
filename: sdv_logs.log
MultiTableSynthesizer:
level: INFO
propagate: false
handlers:
class: logging.FileHandler
filename: sdv_logs.log
MultiTableMetadata:
level: INFO
propagate: false
handlers:
class: logging.FileHandler
filename: sdv_logs.log
SingleTableMetadata:
level: INFO
propagate: false
handlers:
class: logging.FileHandler
filename: sdv_logs.log
98 changes: 98 additions & 0 deletions sdv/logging/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""Utilities for configuring logging within the SDV library."""

import contextlib
import logging
from functools import lru_cache
from pathlib import Path

import platformdirs
import yaml


def get_sdv_logger_config():
"""Return a dictionary with the logging configuration."""
logging_path = Path(__file__).parent
with open(logging_path / 'sdv_logger_config.yml', 'r') as f:
logger_conf = yaml.safe_load(f)

# Logfile to be in this same directory
store_path = Path(platformdirs.user_data_dir('sdv', 'sdv-dev'))
store_path.mkdir(parents=True, exist_ok=True)
for logger in logger_conf.get('loggers', {}).values():
handler = logger.get('handlers', {})
if handler.get('filename') == 'sdv_logs.log':
handler['filename'] = store_path / handler['filename']

return logger_conf


@contextlib.contextmanager
def disable_single_table_logger():
"""Temporarily disables logging for the single table synthesizers.
This context manager temporarily removes all handlers associated with
the ``SingleTableSynthesizer`` logger, disabling logging for that module
within the current context. After the context exits, the
removed handlers are restored to the logger.
"""
# Logging without ``SingleTableSynthesizer``
single_table_logger = logging.getLogger('SingleTableSynthesizer')
handlers = single_table_logger.handlers
single_table_logger.handlers = []
try:
yield
finally:
for handler in handlers:
single_table_logger.addHandler(handler)


@lru_cache()
def get_sdv_logger(logger_name):
"""Get a logger instance with the specified name and configuration.
This function retrieves or creates a logger instance with the specified name
and applies configuration settings based on the logger's name and the logging
configuration.
Args:
logger_name (str):
The name of the logger to retrieve or create.
Returns:
logging.Logger:
A logger instance configured according to the logging configuration
and the specific settings for the given logger name.
"""
logger_conf = get_sdv_logger_config()
if logger_conf.get('log_registry') is None:
# Return a logger without any extra settings and avoid writing into files or other streams
return logging.getLogger(logger_name)

if logger_conf.get('log_registry') == 'local':
logger = logging.getLogger(logger_name)
if logger_name in logger_conf.get('loggers'):
formatter = None
config = logger_conf.get('loggers').get(logger_name)
log_level = getattr(logging, config.get('level', 'INFO'))
if config.get('format'):
formatter = logging.Formatter(config.get('format'))

logger.setLevel(log_level)
logger.propagate = config.get('propagate', False)
handler = config.get('handlers')
handlers = handler.get('class')
handlers = [handlers] if isinstance(handlers, str) else handlers
for handler_class in handlers:
if handler_class == 'logging.FileHandler':
logfile = handler.get('filename')
file_handler = logging.FileHandler(logfile)
file_handler.setLevel(log_level)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
elif handler_class in ('logging.consoleHandler', 'logging.StreamHandler'):
ch = logging.StreamHandler()
ch.setLevel(log_level)
ch.setFormatter(formatter)
logger.addHandler(ch)

return logger
19 changes: 19 additions & 0 deletions sdv/metadata/multi_table.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Multi Table Metadata."""

import datetime
import json
import logging
import warnings
Expand All @@ -11,6 +12,7 @@

from sdv._utils import _cast_to_iterable, _load_data_from_csv
from sdv.errors import InvalidDataError
from sdv.logging import get_sdv_logger
from sdv.metadata.errors import InvalidMetadataError
from sdv.metadata.metadata_upgrader import convert_metadata
from sdv.metadata.single_table import SingleTableMetadata
Expand All @@ -19,6 +21,7 @@
create_columns_node, create_summarized_columns_node, visualize_graph)

LOGGER = logging.getLogger(__name__)
MULTITABLEMETADATA_LOGGER = get_sdv_logger('MultiTableMetadata')
WARNINGS_COLUMN_ORDER = ['Table Name', 'Column Name', 'sdtype', 'datetime_format']


Expand Down Expand Up @@ -1054,6 +1057,22 @@ def save_to_json(self, filepath):
"""
validate_file_does_not_exist(filepath)
metadata = self.to_dict()
total_columns = 0
for table in self.tables.values():
total_columns += len(table.columns)

MULTITABLEMETADATA_LOGGER.info(
'\nMetadata Save:\n'
' Timestamp: %s\n'
' Statistics about the metadata:\n'
' Total number of tables: %s\n'
' Total number of columns: %s\n'
' Total number of relationships: %s',
datetime.datetime.now(),
len(self.tables),
total_columns,
len(self.relationships)
)
with open(filepath, 'w', encoding='utf-8') as metadata_file:
json.dump(metadata, metadata_file, indent=4)

Expand Down
12 changes: 12 additions & 0 deletions sdv/metadata/single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@
_cast_to_iterable, _format_invalid_values_string, _get_datetime_format, _is_boolean_type,
_is_datetime_type, _is_numerical_type, _load_data_from_csv, _validate_datetime_format)
from sdv.errors import InvalidDataError
from sdv.logging import get_sdv_logger
from sdv.metadata.errors import InvalidMetadataError
from sdv.metadata.metadata_upgrader import convert_metadata
from sdv.metadata.utils import read_json, validate_file_does_not_exist
from sdv.metadata.visualization import (
create_columns_node, create_summarized_columns_node, visualize_graph)

LOGGER = logging.getLogger(__name__)
SINGLETABLEMETADATA_LOGGER = get_sdv_logger('SingleTableMetadata')


class SingleTableMetadata:
Expand Down Expand Up @@ -1206,6 +1208,16 @@ def save_to_json(self, filepath):
validate_file_does_not_exist(filepath)
metadata = self.to_dict()
metadata['METADATA_SPEC_VERSION'] = self.METADATA_SPEC_VERSION
SINGLETABLEMETADATA_LOGGER.info(
'\nMetadata Save:\n'
' Timestamp: %s\n'
' Statistics about the metadata:\n'
' Total number of tables: 1'
' Total number of columns: %s'
' Total number of relationships: 0',
datetime.now(),
len(self.columns)
)
with open(filepath, 'w', encoding='utf-8') as metadata_file:
json.dump(metadata, metadata_file, indent=4)

Expand Down
Loading

0 comments on commit 275955d

Please sign in to comment.