Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add usage logging #1920

Merged
merged 19 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,8 @@ test-integration: ## run tests quickly with the default Python
test-readme: ## run the readme snippets
invoke readme

.PHONY: test-tutorials
test-tutorials: ## run the tutorial notebooks
invoke tutorials

.PHONY: test
test: test-unit test-integration test-readme test-tutorials ## test everything that needs test dependencies
test: test-unit test-integration test-readme ## test everything that needs test dependencies

.PHONY: test-all
test-all: ## run tests on every Python version with tox
Expand Down Expand Up @@ -265,5 +261,5 @@ release-major: check-release bumpversion-major release

.PHONY: check-deps
check-deps:
$(eval allow_list='cloudpickle=|graphviz=|numpy=|pandas=|tqdm=|copulas=|ctgan=|deepecho=|rdt=|sdmetrics=')
$(eval allow_list='cloudpickle=|graphviz=|numpy=|pandas=|tqdm=|copulas=|ctgan=|deepecho=|rdt=|sdmetrics=|platformdirs=')
pip freeze | grep -v "SDV.git" | grep -E $(allow_list) | sort > $(OUTPUT_FILEPATH)
1 change: 1 addition & 0 deletions latest_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ pandas==2.2.2
rdt==1.11.1
sdmetrics==0.14.0
tqdm==4.66.2
platformdirs==4.2.0
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ dependencies = [
'deepecho>=0.6.0',
'rdt>=1.12.0',
'sdmetrics>=0.14.0',
'platformdirs>=4.0'
pvk-developer marked this conversation as resolved.
Show resolved Hide resolved
]

[project.urls]
Expand Down
5 changes: 3 additions & 2 deletions sdv/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from types import ModuleType

from sdv import (
constraints, data_processing, datasets, evaluation, io, lite, metadata, metrics, multi_table,
sampling, sequential, single_table, version)
constraints, data_processing, datasets, evaluation, io, lite, logging, metadata, metrics,
multi_table, sampling, sequential, single_table, version)

__all__ = [
'constraints',
Expand All @@ -26,6 +26,7 @@
'evaluation',
'io',
'lite',
'logging',
'metadata',
'metrics',
'multi_table',
Expand Down
9 changes: 9 additions & 0 deletions sdv/logging/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""Module for configuring loggers within the SDV library."""

from sdv.logging.utils import disable_single_table_logger, get_sdv_logger, get_sdv_logger_config

__all__ = (
'disable_single_table_logger',
'get_sdv_logger',
'get_sdv_logger_config',
)
27 changes: 27 additions & 0 deletions sdv/logging/sdv_logger_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
log_registry: 'local'
version: 1
loggers:
SingleTableSynthesizer:
level: INFO
propagate: false
handlers:
class: logging.FileHandler
filename: sdv_logs.log
MultiTableSynthesizer:
level: INFO
propagate: false
handlers:
class: logging.FileHandler
filename: sdv_logs.log
MultiTableMetadata:
level: INFO
propagate: false
handlers:
class: logging.FileHandler
filename: sdv_logs.log
SingleTableMetadata:
level: INFO
propagate: false
handlers:
class: logging.FileHandler
filename: sdv_logs.log
98 changes: 98 additions & 0 deletions sdv/logging/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""Utilities for configuring logging within the SDV library."""

import contextlib
import logging
from functools import lru_cache
from pathlib import Path

import platformdirs
import yaml


def get_sdv_logger_config():
"""Return a dictionary with the logging configuration."""
logging_path = Path(__file__).parent
with open(logging_path / 'sdv_logger_config.yml', 'r') as f:
logger_conf = yaml.safe_load(f)

# Logfile to be in this same directory
store_path = Path(platformdirs.user_data_dir('sdv', 'sdv-dev'))
store_path.mkdir(parents=True, exist_ok=True)
for logger in logger_conf.get('loggers', {}).values():
handler = logger.get('handlers', {})
if handler.get('filename') == 'sdv_logs.log':
handler['filename'] = store_path / handler['filename']

return logger_conf


@contextlib.contextmanager
def disable_single_table_logger():
"""Temporarily disables logging for the single table synthesizers.

This context manager temporarily removes all handlers associated with
the ``SingleTableSynthesizer`` logger, disabling logging for that module
within the current context. After the context exits, the
removed handlers are restored to the logger.
"""
# Logging without ``SingleTableSynthesizer``
single_table_logger = logging.getLogger('SingleTableSynthesizer')
handlers = single_table_logger.handlers
single_table_logger.handlers = []
try:
yield
finally:
for handler in handlers:
single_table_logger.addHandler(handler)


@lru_cache()
def get_sdv_logger(logger_name):
"""Get a logger instance with the specified name and configuration.

This function retrieves or creates a logger instance with the specified name
and applies configuration settings based on the logger's name and the logging
configuration.

Args:
logger_name (str):
The name of the logger to retrieve or create.

Returns:
logging.Logger:
A logger instance configured according to the logging configuration
and the specific settings for the given logger name.
"""
logger_conf = get_sdv_logger_config()
if logger_conf.get('log_registry') is None:
# Return a logger without any extra settings and avoid writing into files or other streams
return logging.getLogger(logger_name)

if logger_conf.get('log_registry') == 'local':
logger = logging.getLogger(logger_name)
if logger_name in logger_conf.get('loggers'):
formatter = None
config = logger_conf.get('loggers').get(logger_name)
log_level = getattr(logging, config.get('level', 'INFO'))
if config.get('format'):
formatter = logging.Formatter(config.get('format'))

logger.setLevel(log_level)
logger.propagate = config.get('propagate', False)
handler = config.get('handlers')
handlers = handler.get('class')
handlers = [handlers] if isinstance(handlers, str) else handlers
for handler_class in handlers:
if handler_class == 'logging.FileHandler':
logfile = handler.get('filename')
file_handler = logging.FileHandler(logfile)
file_handler.setLevel(log_level)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
elif handler_class in ('logging.consoleHandler', 'logging.StreamHandler'):
ch = logging.StreamHandler()
ch.setLevel(log_level)
ch.setFormatter(formatter)
logger.addHandler(ch)

return logger
19 changes: 19 additions & 0 deletions sdv/metadata/multi_table.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Multi Table Metadata."""

import datetime
import json
import logging
import warnings
Expand All @@ -11,6 +12,7 @@

from sdv._utils import _cast_to_iterable, _load_data_from_csv
from sdv.errors import InvalidDataError
from sdv.logging import get_sdv_logger
from sdv.metadata.errors import InvalidMetadataError
from sdv.metadata.metadata_upgrader import convert_metadata
from sdv.metadata.single_table import SingleTableMetadata
Expand All @@ -19,6 +21,7 @@
create_columns_node, create_summarized_columns_node, visualize_graph)

LOGGER = logging.getLogger(__name__)
MULTITABLEMETADATA_LOGGER = get_sdv_logger('MultiTableMetadata')
WARNINGS_COLUMN_ORDER = ['Table Name', 'Column Name', 'sdtype', 'datetime_format']


Expand Down Expand Up @@ -1040,6 +1043,22 @@ def save_to_json(self, filepath):
"""
validate_file_does_not_exist(filepath)
metadata = self.to_dict()
total_columns = 0
for table in self.tables.values():
total_columns += len(table.columns)

MULTITABLEMETADATA_LOGGER.info(
'\nMetadata Save:\n'
' Timestamp: %s\n'
' Statistics about the metadata:\n'
' Total number of tables: %s\n'
' Total number of columns: %s\n'
' Total number of relationships: %s',
datetime.datetime.now(),
len(self.tables),
total_columns,
len(self.relationships)
)
with open(filepath, 'w', encoding='utf-8') as metadata_file:
json.dump(metadata, metadata_file, indent=4)

Expand Down
12 changes: 12 additions & 0 deletions sdv/metadata/single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@
_cast_to_iterable, _format_invalid_values_string, _get_datetime_format, _is_boolean_type,
_is_datetime_type, _is_numerical_type, _load_data_from_csv, _validate_datetime_format)
from sdv.errors import InvalidDataError
from sdv.logging import get_sdv_logger
from sdv.metadata.errors import InvalidMetadataError
from sdv.metadata.metadata_upgrader import convert_metadata
from sdv.metadata.utils import read_json, validate_file_does_not_exist
from sdv.metadata.visualization import (
create_columns_node, create_summarized_columns_node, visualize_graph)

LOGGER = logging.getLogger(__name__)
SINGLETABLEMETADATA_LOGGER = get_sdv_logger('SingleTableMetadata')


class SingleTableMetadata:
Expand Down Expand Up @@ -1206,6 +1208,16 @@ def save_to_json(self, filepath):
validate_file_does_not_exist(filepath)
metadata = self.to_dict()
metadata['METADATA_SPEC_VERSION'] = self.METADATA_SPEC_VERSION
SINGLETABLEMETADATA_LOGGER.info(
'\nMetadata Save:\n'
' Timestamp: %s\n'
' Statistics about the metadata:\n'
' Total number of tables: 1'
' Total number of columns: %s'
' Total number of relationships: 0',
datetime.now(),
len(self.columns)
)
with open(filepath, 'w', encoding='utf-8') as metadata_file:
json.dump(metadata, metadata_file, indent=4)

Expand Down
Loading
Loading