Skip to content

Commit

Permalink
Added logging and utils.get_logger()
Browse files Browse the repository at this point in the history
fixed linter regex errors
  • Loading branch information
SamRWest committed Feb 18, 2024
1 parent d4d8eb3 commit be174b6
Show file tree
Hide file tree
Showing 8 changed files with 147 additions and 119 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,5 @@ addopts = '-s --durations=0 --durations-min=5.0 --tb=native --cov-report term --
# See https://github.com/nat-n/poethepoet for details.
benchmark = { cmd = "python utils/run_benchmarks.py benchmarks.yml --verbose --run", help = "Run a single benchmark. Usage: poe benchmark <benchmark_name>" }
benchmark_all = { shell = "python utils/run_benchmarks.py benchmarks.yml --verbose | tee out.txt", help = "Run the project", interpreter = "posix" }
lint = { cmd = "poetry run git add .pre-commit-config.yaml & pre-commit run", help = "Run pre-commit hooks" }
test = { cmd = "poetry run pytest", help = "Run unit tests with pytest" }
lint = { shell = "git add .pre-commit-config.yaml & pre-commit run", help = "Run pre-commit hooks", interpreter = "posix"}
test = { cmd = "pytest", help = "Run unit tests with pytest" }
3 changes: 2 additions & 1 deletion utils/dd_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import numpy as np
import pandas as pd
from loguru import logger


def parse_parameter_values_from_file(
Expand Down Expand Up @@ -179,7 +180,7 @@ def convert_dd_to_tabular(
all_sets = defaultdict(list)
all_parameters = defaultdict(list)
for path in dd_files:
print(f"Processing path: {path}")
logger.info(f"Processing path: {path}")
local_param_values, local_sets = parse_parameter_values_from_file(path)

# merge params from file into global collection
Expand Down
129 changes: 47 additions & 82 deletions utils/run_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,59 +20,23 @@
import yaml
from tabulate import tabulate

from xl2times import utils
from xl2times.utils import max_workers

# configure logger
# logging.basicConfig(
# level=logging.DEBUG,
# format="%(asctime)s - %(name)s - %(levelname)s - %(message)s - %(filename)s:%(lineno)d",
# handlers=[StreamHandler(), RotatingFileHandler("xl2times.log", maxBytes=1000000, backupCount=5)],
# force=True,
# datefmt="%Y-%m-%d %H:%M:%S",
# )
# logger = logging.getLogger("xl2times")
# logger.info("Logger!")

from loguru import logger

# set global log level via env var. Set to INFO if not already set.
if os.getenv("LOGURU_LEVEL") is None:
os.environ["LOGURU_LEVEL"] = "INFO"

log_conf = {
"handlers": [
{
"sink": sys.stdout,
"diagnose": False,
"format": "<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | <level>{level: <8}</level> : <level>{message}</level> (<cyan>{name}:{"
'thread.name}:pid-{process}</cyan> "<cyan>{'
'file.path}</cyan>:<cyan>{line}</cyan>")',
},
{
"sink": "./xl2times.log",
"enqueue": True,
"mode": "a+",
"level": "DEBUG",
"colorize": False,
"serialize": False,
"diagnose": False,
"rotation": "20 MB",
"compression": "zip",
},
],
}
logger.configure(**log_conf)
logger = utils.get_logger()


def parse_result(output: str) -> Tuple[float, int, int]:
# find pattern in multiline string
m = re.findall(
r"(\d+\.\d)\% of ground truth rows present in output \((\d+)/(\d+)\), (\d+) additional rows",
r"(\d+\.\d)% of ground truth rows present in output \((\d+)/(\d+)\), (\d+) additional rows",
output,
flags=re.MULTILINE,
)
if not m or len(m) == 0:
print(f"ERROR: could not parse output of run:\n{output}")
logger.error(f"could not parse output of run:\n{output}")
sys.exit(2)
# return (accuracy, num_correct_rows, num_additional_rows)
m = m[0]
Expand Down Expand Up @@ -107,13 +71,13 @@ def run_gams_gdxdiff(
text=True,
)
if res.returncode != 0:
print(res.stdout)
print(res.stderr if res.stderr is not None else "")
print(f"ERROR: GAMS failed on {benchmark['name']}")
logger.info(res.stdout)
logger.info(res.stderr if res.stderr is not None else "")
logger.error(f"GAMS failed on {benchmark['name']}")
sys.exit(3)
if "error" in res.stdout.lower():
print(res.stdout)
print(f"ERROR: GAMS errored on {benchmark['name']}")
logger.info(res.stdout)
logger.error(f"GAMS errored on {benchmark['name']}")
return "Error running GAMS"

# Run GAMS on ground truth:
Expand All @@ -138,13 +102,13 @@ def run_gams_gdxdiff(
text=True,
)
if res.returncode != 0:
print(res.stdout)
print(res.stderr if res.stderr is not None else "")
print(f"ERROR: GAMS failed on {benchmark['name']} ground truth")
logger.info(res.stdout)
logger.info(res.stderr if res.stderr is not None else "")
logger.error(f"GAMS failed on {benchmark['name']} ground truth")
sys.exit(4)
if "error" in res.stdout.lower():
print(res.stdout)
print(f"ERROR: GAMS errored on {benchmark['name']}")
logger.info(res.stdout)
logger.error(f"GAMS errored on {benchmark['name']}")
return "Error running GAMS on ground truth"

# Run gdxdiff to compare
Expand All @@ -161,8 +125,8 @@ def run_gams_gdxdiff(
text=True,
)
if verbose:
print(res.stdout)
print(res.stderr if res.stderr is not None else "")
logger.info(res.stdout)
logger.info(res.stderr if res.stderr is not None else "")
if res.returncode != 0:
return f"Diff ({len(res.stdout.splitlines())})"

Expand Down Expand Up @@ -202,8 +166,8 @@ def run_benchmark(
if res.returncode != 0:
# Remove partial outputs
shutil.rmtree(csv_folder, ignore_errors=True)
print(res.stdout)
print(f"ERROR: dd_to_csv failed on {benchmark['name']}")
logger.info(res.stdout)
logger.error(f"dd_to_csv failed on {benchmark['name']}")
sys.exit(5)
else:
# If debug option is set, run as a function call to allow stepping with a debugger.
Expand All @@ -212,7 +176,7 @@ def run_benchmark(
main([dd_folder, csv_folder])

elif not path.exists(csv_folder):
print(f"ERROR: --skip_csv is true but {csv_folder} does not exist")
logger.error(f"--skip_csv is true but {csv_folder} does not exist")
sys.exit(6)

# Then run the tool
Expand Down Expand Up @@ -251,14 +215,14 @@ def run_benchmark(

if verbose:
line = "-" * 80
print(f"\n{line}\n{benchmark['name']}\n{line}\n\n{res.stdout}")
print(res.stderr if res.stderr is not None else "")
logger.info(f"\n{line}\n{benchmark['name']}\n{line}\n\n{res.stdout}")
logger.info(res.stderr if res.stderr is not None else "")
else:
print(".", end="", flush=True)
logger.info(".", end="", flush=True)

if res.returncode != 0:
print(res.stdout)
print(f"ERROR: tool failed on {benchmark['name']}")
logger.info(res.stdout)
logger.error(f"tool failed on {benchmark['name']}")
sys.exit(7)
with open(path.join(out_folder, "stdout"), "w") as f:
f.write(res.stdout)
Expand All @@ -284,7 +248,7 @@ def run_all_benchmarks(
verbose=False,
debug: bool = False,
):
print("Running benchmarks", end="", flush=True)
logger.info("Running benchmarks", end="", flush=True)
headers = ["Benchmark", "Time (s)", "GDX Diff", "Accuracy", "Correct", "Additional"]
run_a_benchmark = partial(
run_benchmark,
Expand All @@ -298,10 +262,10 @@ def run_all_benchmarks(

with ProcessPoolExecutor(max_workers=max_workers) as executor:
results = list(executor.map(run_a_benchmark, benchmarks))
print("\n\n" + tabulate(results, headers, floatfmt=".1f") + "\n")
logger.info("\n\n" + tabulate(results, headers, floatfmt=".1f") + "\n")

if skip_regression:
print("Skipping regression tests.")
logger.info("Skipping regression tests.")
sys.exit(0)

# The rest of this script checks regressions against main
Expand All @@ -319,7 +283,7 @@ def run_all_benchmarks(
mybranch = repo.create_head("mybranch")

if mybranch.name == "main":
print("Skipping regression tests as we're on main branch. Goodbye!")
logger.info("Skipping regression tests as we're on main branch. Goodbye!")
sys.exit(0)

if skip_main:
Expand All @@ -332,18 +296,20 @@ def run_all_benchmarks(
result = parse_result(f.readlines()[-1])
# Use a fake runtime and GAMS result
results_main.append((benchmark["name"], 999, "--", *result))
print(
logger.info(
f"Skipped running on main. Using results from {path.join(benchmarks_folder, 'out-main')}"
)

else:
if repo.is_dirty():
print("Your working directory is not clean. Skipping regression tests.")
logger.info(
"Your working directory is not clean. Skipping regression tests."
)
sys.exit(8)

# Re-run benchmarks on main
repo.heads.main.checkout()
print("Running benchmarks on main", end="", flush=True)
logger.info("Running benchmarks on main", end="", flush=True)
run_a_benchmark = partial(
run_benchmark,
benchmarks_folder=benchmarks_folder,
Expand Down Expand Up @@ -371,7 +337,7 @@ def run_all_benchmarks(
)
for ((b, t, f, a, c, d), (_, t0, f0, a0, c0, d0)) in zip(results, results_main)
]
print("\n\n" + tabulate(combined_results, headers, stralign="right") + "\n")
logger.info("\n\n" + tabulate(combined_results, headers, stralign="right") + "\n")

# Checkout back to branch
mybranch.checkout()
Expand All @@ -385,7 +351,7 @@ def run_all_benchmarks(
how="outer",
)
if df.isna().values.any():
print(f"ERROR: number of benchmarks changed:\n{df}")
logger.error(f"number of benchmarks changed:\n{df}")
sys.exit(9)
accu_regressions = df[df["Correct"] < df["M Correct"]]["Benchmark"]
addi_regressions = df[df["Additional"] > df["M Additional"]]["Benchmark"]
Expand All @@ -395,37 +361,36 @@ def run_all_benchmarks(
main_time = df["M Time (s)"].sum()
runtime_change = our_time - main_time

print(f"Total runtime: {our_time:.2f}s (main: {main_time:.2f}s)")
print(
logger.info(f"Total runtime: {our_time:.2f}s (main: {main_time:.2f}s)")
logger.info(
f"Change in runtime (negative == faster): {runtime_change:+.2f}s ({100 * runtime_change / main_time:+.1f}%)"
)

our_correct = df["Correct"].sum()
main_correct = df["M Correct"].sum()
correct_change = our_correct - main_correct
print(
logger.info(
f"Change in correct rows (higher == better): {correct_change:+d} ({100 * correct_change / main_correct:+.1f}%)"
)

our_additional_rows = df["Additional"].sum()
main_additional_rows = df["M Additional"].sum()
additional_change = our_additional_rows - main_additional_rows
print(
logger.info(
f"Change in additional rows: {additional_change:+d} ({100 * additional_change / main_additional_rows:+.1f}%)"
)

if len(accu_regressions) + len(addi_regressions) + len(time_regressions) > 0:
print()
if not accu_regressions.empty:
print(f"ERROR: correct rows regressed on: {', '.join(accu_regressions)}")
logger.error(f"correct rows regressed on: {', '.join(accu_regressions)}")
if not addi_regressions.empty:
print(f"ERROR: additional rows regressed on: {', '.join(addi_regressions)}")
logger.error(f"additional rows regressed on: {', '.join(addi_regressions)}")
if not time_regressions.empty:
print(f"ERROR: runtime regressed on: {', '.join(time_regressions)}")
logger.error(f"runtime regressed on: {', '.join(time_regressions)}")
sys.exit(10)
# TODO also check if any new tables are missing?

print("No regressions. You're awesome!")
logger.info("No regressions. You're awesome!")


if __name__ == "__main__":
Expand Down Expand Up @@ -489,17 +454,17 @@ def run_all_benchmarks(
benchmarks_folder = spec["benchmarks_folder"]
benchmark_names = [b["name"] for b in spec["benchmarks"]]
if len(set(benchmark_names)) != len(benchmark_names):
print("ERROR: Found duplicate name in benchmarks YAML file")
logger.error(f"Found duplicate name in benchmarks YAML file")
sys.exit(11)

if args.dd and args.times_dir is None:
print("ERROR: --times_dir is required when using --dd")
logger.error(f"--times_dir is required when using --dd")
sys.exit(12)

if args.run is not None:
benchmark = next((b for b in spec["benchmarks"] if b["name"] == args.run), None)
if benchmark is None:
print(f"ERROR: could not find {args.run} in {args.benchmarks_yaml}")
logger.error(f"could not find {args.run} in {args.benchmarks_yaml}")
sys.exit(13)

_, runtime, gms, acc, cor, add = run_benchmark(
Expand All @@ -511,7 +476,7 @@ def run_all_benchmarks(
verbose=args.verbose,
debug=args.debug,
)
print(
logger.info(
f"Ran {args.run} in {runtime:.2f}s. {acc}% ({cor} correct, {add} additional).\n"
f"GAMS: {gms}"
)
Expand Down
4 changes: 3 additions & 1 deletion xl2times/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
from typing import Dict, List

from xl2times.utils import max_workers
from . import datatypes
from . import datatypes, utils
from . import excel
from . import transforms

logger = utils.get_logger()


def convert_xl_to_times(
input_files: List[str],
Expand Down
16 changes: 9 additions & 7 deletions xl2times/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import re
from typing import Dict, Iterable, List, Set, Tuple
from enum import Enum

from loguru import logger
from pandas.core.frame import DataFrame

# ============================================================================
Expand Down Expand Up @@ -235,7 +237,7 @@ def _process_times_info(
cat_to_tables[item["gams-cat"]].append(item["name"])
unknown_cats = {item["gams-cat"] for item in table_info} - set(categories)
if unknown_cats:
print(f"WARNING: Unknown categories in times-info.json: {unknown_cats}")
logger.warning(f"Unknown categories in times-info.json: {unknown_cats}")
dd_table_order = chain.from_iterable(
(sorted(cat_to_tables[c]) for c in categories)
)
Expand Down Expand Up @@ -307,9 +309,9 @@ def _read_mappings(filename: str) -> List[TimesXlMap]:
break
(times, xl) = line.split(" = ")
(times_name, times_cols_str) = list(
filter(None, re.split("\[|\]", times))
filter(None, re.split(r"\[|\]", times))
)
(xl_name, xl_cols_str) = list(filter(None, re.split("\(|\)", xl)))
(xl_name, xl_cols_str) = list(filter(None, re.split(r"\(|\)", xl)))
times_cols = times_cols_str.split(",")
xl_cols = xl_cols_str.split(",")
filter_rows = {}
Expand Down Expand Up @@ -343,8 +345,8 @@ def _read_mappings(filename: str) -> List[TimesXlMap]:
dropped.append(line)

if len(dropped) > 0:
print(
f"WARNING: Dropping {len(dropped)} mappings that are not yet complete"
logger.warning(
f"Dropping {len(dropped)} mappings that are not yet complete"
)
return mappings

Expand All @@ -370,8 +372,8 @@ def to_tag(s: str) -> Tag:
tags = {to_tag(tag_info["tag_name"]) for tag_info in veda_tags_info}
for tag in Tag:
if tag not in tags:
print(
f"WARNING: datatypes.Tag has an unknown Tag {tag} not in {veda_tags_file}"
logger.warning(
f"datatypes.Tag has an unknown Tag {tag} not in {veda_tags_file}"
)

valid_column_names = {}
Expand Down
Loading

0 comments on commit be174b6

Please sign in to comment.