Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
David-Araripe committed Mar 21, 2024
2 parents 88da224 + 17f4142 commit 0dd63d8
Show file tree
Hide file tree
Showing 17 changed files with 1,467 additions and 1,036 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: CI

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
ubuntu:
runs-on: ubuntu-latest
steps:
- name: Checkout respository
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.10'
- name: Install dependencies
run: |
pip install -e .
- name: Run tests
run: python -m unittest discover

# macos:
# runs-on: macos-latest
# steps:
# - uses: actions/checkout@v2
# - name: Set up Python
# uses: actions/setup-python@v2
# with:
# python-version: '3.9'
# - name: Install dependencies
# run: |
# pip install -e .
# pip install -e ".[dev]"
# - name: Run tests
# run: python -m unittest discover

# windows:
# runs-on: windows-latest
# steps:
# - uses: actions/checkout@v2
# - name: Set up Python
# uses: actions/setup-python@v2
# with:
# python-version: '3.9'
# - name: Install dependencies
# run: |
# pip install -e .
# pip install -e ".[dev]"
# - name: Run tests
# run: python -m unittest discover
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ share/python-wheels/
.installed.cfg
*.egg
MANIFEST
gbmtsplits/_version.py

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down
11 changes: 11 additions & 0 deletions gbmtsplits/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import os

from .split import GloballyBalancedSplit
from .clustering import *

__version__ = '0.0.5'
if os.path.exists(os.path.join(os.path.dirname(__file__), '_version.py')):
from ._version import version
__version__ = version

VERSION = __version__
18 changes: 16 additions & 2 deletions src/gbmtsplits/cli.py → gbmtsplits/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import argparse
import pandas as pd
from timeit import default_timer as timer
from .logs.config import enable_logging
from .split import GloballyBalancedSplit
from .clustering import RandomClustering, LeaderPickerClustering, MaxMinClustering, MurckoScaffoldClustering

Expand Down Expand Up @@ -44,8 +46,8 @@ def main():
# Start the timer
start_time = timer()

# Parse arguments
args = parser.parse_args()

# Read input data from csv/tsv file ##########################
if '.csv' in args.input:
df = pd.read_csv(args.input)
Expand All @@ -62,6 +64,18 @@ def main():
if not args.output:
args.output = args.input.split('.')[0]

# Enable logging #############################################
logSettings = enable_file_logger(
os.path.dirname(args.output),
"gbmtsplits.log",
False,
__name__,
vars(args),
disable_existing_loggers=False,
)
log = logSettings.log


# Setup splitter #############################################
if args.clustering == 'random':
clustering = RandomClustering(n_clusters=args.n_clusters, seed=args.random_seed)
Expand Down Expand Up @@ -101,7 +115,7 @@ def main():

# Print elapsed time #########################################
elapsed_time = timer() - start_time
print('Elapsed time: {:.2f} seconds'.format(elapsed_time))
log.info('Elapsed time: {:.2f} seconds'.format(elapsed_time))

if __name__ == '__main__':

Expand Down
2 changes: 1 addition & 1 deletion src/gbmtsplits/clustering.py → gbmtsplits/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_name(self) -> str:
return self.__class__.__name__

def _set_n_clusters(self, N : int) -> None:
self.n_clusters = self.n_clusters if self.n_clusters is not None else N // 100
self.n_clusters = self.n_clusters if self.n_clusters is not None else N // 10



Expand Down
12 changes: 12 additions & 0 deletions gbmtsplits/logs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import logging
import sys

logger = None

if not logger:
logger = logging.getLogger("gbmtsplits")
logger.setLevel(logging.INFO)


def setLogger(log):
sys.modules[__name__].gbmtsplits = log
228 changes: 228 additions & 0 deletions gbmtsplits/logs/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
import os
import git
import json
import logging
from bisect import bisect
from datetime import datetime
from logging import config


from . import setLogger

class LogFileConfig:
def __init__(self, path, logger, debug):
self.path = path
self.log = logger
self.debug = debug

class LevelFilter(logging.Filter):
"""
LoggingFilter used to filter one or more specific log levels messages
"""
def __init__(self, level):
self.__level = level

def filter(self, record):
return record.levelno in self.__level


# Adapted from https://stackoverflow.com/a/68154386
class LevelFormatter(logging.Formatter):
"""LoggingFormatter used to specifiy the formatting per level"""
def __init__(self, formats: dict[int, str], **kwargs):
super().__init__()

if "fmt" in kwargs:
raise ValueError(
"Format string must be passed to level-surrogate formatters, "
"not this one"
)

self.formats = sorted(
(level, logging.Formatter(fmt, **kwargs)) for level, fmt in formats.items()
)

def format(self, record: logging.LogRecord) -> str:
idx = bisect(self.formats, (record.levelno, ), hi=len(self.formats) - 1)
level, formatter = self.formats[idx]
return formatter.format(record)


def config_logger(log_file_path, debug=None, disable_existing_loggers=True):
"""
Function to configure the logging.
All info is saved in a simple format on the log file path.
Debug entries are saved to a separate file if debug is True
Debug and warning and above are save in a verbose format.
Warning and above are also printed to std.out
Args:
log_file_path (str): Folder where all logs for this run are saved
debug (bool): if true, debug messages are saved
no_exist_log (bool): if true, existing loggers are disabled
"""
debug_path = os.path.join(os.path.dirname(log_file_path), "debug.log")
simple_format = "%(message)s"
verbose_format = "[%(asctime)s] %(levelname)s [%(filename)s %(name)s %(funcName)s (%(lineno)d)]: %(message)s" # noqa: E501

LOGGING_CONFIG = {
"version": 1,
"disable_existing_loggers": disable_existing_loggers,
"formatters":
{
"simple_formatter": {
"format": simple_format
},
"verbose_formatter": {
"format": verbose_format
},
"bylevel_formatter":
{
"()": LevelFormatter,
"formats":
{
logging.DEBUG: verbose_format,
logging.INFO: simple_format,
logging.WARNING: verbose_format,
},
},
},
"filters": {
"only_debug": {
"()": LevelFilter,
"level": [logging.DEBUG]
}
},
"handlers":
{
"stream_handler":
{
"class": "logging.StreamHandler",
"formatter": "simple_formatter",
"level": "WARNING",
},
"file_handler":
{
"class": "logging.FileHandler",
"formatter": "bylevel_formatter",
"filename": log_file_path,
"level": "INFO",
},
"file_handler_debug":
{
"class": "logging.FileHandler",
"formatter": "bylevel_formatter",
"filename": debug_path,
"mode": "w",
"delay": True,
"filters": ["only_debug"],
},
},
"loggers":
{
None:
{
"handlers":
["stream_handler", "file_handler", "file_handler_debug"]
if debug else ["stream_handler", "file_handler"],
"level":
"DEBUG",
}
},
}

config.dictConfig(LOGGING_CONFIG)


def get_git_info():
"""
Get information of the current git commit
If the package is installed with pip, read detailed version extracted by setuptools_scm.
Otherwise, use gitpython to get the information from the git repo.
"""

import qsprpred

path = qsprpred.__path__[0]
logging.debug(f"Package path: {path}")
is_pip_package = "site-packages" in path

if is_pip_package:
# Version info is extracted by setuptools_scm (default format)
from .._version import __version__

info = __version__
logging.info(f"Version info [from pip]: {info}")
else:
# If git repo
repo = git.Repo(search_parent_directories=True)
# Get git hash
git_hash = repo.head.object.hexsha[:8]
# Get git branch
try:
branch = repo.active_branch.name
except TypeError:
branch = "detached HEAD"
# Get git tag
tag = repo.tags[-1].name
# Get number of commits between current commit and last tag
ncommits = len(list(repo.iter_commits(f"{tag}..HEAD")))
# Check if repo is dirty
dirty = repo.is_dirty()
info = f"({branch}) {tag}+{ncommits}[{git_hash}]+{'dirty' if dirty else ''} "
logging.info(f"Version info [from git repo]: {info}")


def init_logfile(log, args=None):
"""
Put some intial information in the logfile
Args:
log : Logging instance
args (dict): Dictionary with all command line arguments
"""
logging.info(f"Initialize GBMT log file: {log.root.handlers[1].baseFilename} at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
get_git_info()
if args:
logging.info("Command line arguments:")
for key, value in args.items():
logging.info(f"{key}: {value}")
logging.info("")

def enable_file_logger(
log_folder: str,
filename: str,
debug: bool = False,
log_name: str | None = None,
init_data: dict | None = None,
disable_existing_loggers: bool = False,
):
"""Enable file logging.
Args:
log_folder (str): path to the folder where the log file should be stored
filename (str): name of the log file
debug (bool): whether to enable debug logging. Defaults to False.
log_name (str, optional): name of the logger. Defaults to None.
init_data (dict, optional): initial data to be logged. Defaults to None.
disable_existing_loggers (bool): whether to disable existing loggers.
"""
# create log folder if it does not exist
path = os.path.join(log_folder, filename)
if not os.path.exists(log_folder):
os.makedirs(log_folder)

# configure logging
config.config_logger(path, debug, disable_existing_loggers=disable_existing_loggers)

# get logger and init configuration
log = logging.getLogger(filename) if not log_name else logging.getLogger(log_name)
log.setLevel(logging.INFO)
setLogger(log)
settings = LogFileConfig(path, log, debug)

# Begin log file
config.init_logfile(log, json.dumps(init_data, sort_keys=False, indent=2))

return settings
File renamed without changes.
Loading

0 comments on commit 0dd63d8

Please sign in to comment.