Added logging and utils.get_logger()

fixed linter regex errors
etsap-TIMES · Feb 18, 2024 · be174b6 · be174b6
1 parent d4d8eb3
commit be174b6
Show file tree

Hide file tree

Showing 8 changed files with 147 additions and 119 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -59,5 +59,5 @@ addopts = '-s --durations=0 --durations-min=5.0 --tb=native --cov-report term --
 # See https://github.com/nat-n/poethepoet for details.
 benchmark = { cmd = "python utils/run_benchmarks.py benchmarks.yml --verbose --run", help = "Run a single benchmark.  Usage: poe benchmark <benchmark_name>" }
 benchmark_all = { shell = "python utils/run_benchmarks.py benchmarks.yml --verbose | tee out.txt", help = "Run the project", interpreter = "posix" }
-lint = { cmd = "poetry run git add .pre-commit-config.yaml & pre-commit run", help = "Run pre-commit hooks" }
-test = { cmd = "poetry run pytest", help = "Run unit tests with pytest" }
+lint = { shell = "git add .pre-commit-config.yaml & pre-commit run", help = "Run pre-commit hooks", interpreter = "posix"}
+test = { cmd = "pytest", help = "Run unit tests with pytest" }
diff --git a/utils/dd_to_csv.py b/utils/dd_to_csv.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import pandas as pd
+from loguru import logger
 
 
 def parse_parameter_values_from_file(
@@ -179,7 +180,7 @@ def convert_dd_to_tabular(
     all_sets = defaultdict(list)
     all_parameters = defaultdict(list)
     for path in dd_files:
-        print(f"Processing path: {path}")
+        logger.info(f"Processing path: {path}")
         local_param_values, local_sets = parse_parameter_values_from_file(path)
 
         # merge params from file into global collection

diff --git a/utils/run_benchmarks.py b/utils/run_benchmarks.py
@@ -20,59 +20,23 @@
 import yaml
 from tabulate import tabulate
 
+from xl2times import utils
 from xl2times.utils import max_workers
 
-# configure logger
-# logging.basicConfig(
-#     level=logging.DEBUG,
-#     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s - %(filename)s:%(lineno)d",
-#     handlers=[StreamHandler(), RotatingFileHandler("xl2times.log", maxBytes=1000000, backupCount=5)],
-#     force=True,
-#     datefmt="%Y-%m-%d %H:%M:%S",
-# )
-# logger = logging.getLogger("xl2times")
-# logger.info("Logger!")
-
 from loguru import logger
 
-# set global log level via env var.  Set to INFO if not already set.
-if os.getenv("LOGURU_LEVEL") is None:
-    os.environ["LOGURU_LEVEL"] = "INFO"
-
-log_conf = {
-    "handlers": [
-        {
-            "sink": sys.stdout,
-            "diagnose": False,
-            "format": "<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | <level>{level: <8}</level> : <level>{message}</level> (<cyan>{name}:{"
-            'thread.name}:pid-{process}</cyan> "<cyan>{'
-            'file.path}</cyan>:<cyan>{line}</cyan>")',
-        },
-        {
-            "sink": "./xl2times.log",
-            "enqueue": True,
-            "mode": "a+",
-            "level": "DEBUG",
-            "colorize": False,
-            "serialize": False,
-            "diagnose": False,
-            "rotation": "20 MB",
-            "compression": "zip",
-        },
-    ],
-}
-logger.configure(**log_conf)
+logger = utils.get_logger()
 
 
 def parse_result(output: str) -> Tuple[float, int, int]:
     # find pattern in multiline string
     m = re.findall(
-        r"(\d+\.\d)\% of ground truth rows present in output \((\d+)/(\d+)\), (\d+) additional rows",
+        r"(\d+\.\d)% of ground truth rows present in output \((\d+)/(\d+)\), (\d+) additional rows",
         output,
         flags=re.MULTILINE,
     )
     if not m or len(m) == 0:
-        print(f"ERROR: could not parse output of run:\n{output}")
+        logger.error(f"could not parse output of run:\n{output}")
         sys.exit(2)
     # return (accuracy, num_correct_rows, num_additional_rows)
     m = m[0]
@@ -107,13 +71,13 @@ def run_gams_gdxdiff(
         text=True,
     )
     if res.returncode != 0:
-        print(res.stdout)
-        print(res.stderr if res.stderr is not None else "")
-        print(f"ERROR: GAMS failed on {benchmark['name']}")
+        logger.info(res.stdout)
+        logger.info(res.stderr if res.stderr is not None else "")
+        logger.error(f"GAMS failed on {benchmark['name']}")
         sys.exit(3)
     if "error" in res.stdout.lower():
-        print(res.stdout)
-        print(f"ERROR: GAMS errored on {benchmark['name']}")
+        logger.info(res.stdout)
+        logger.error(f"GAMS errored on {benchmark['name']}")
         return "Error running GAMS"
 
     # Run GAMS on ground truth:
@@ -138,13 +102,13 @@ def run_gams_gdxdiff(
         text=True,
     )
     if res.returncode != 0:
-        print(res.stdout)
-        print(res.stderr if res.stderr is not None else "")
-        print(f"ERROR: GAMS failed on {benchmark['name']} ground truth")
+        logger.info(res.stdout)
+        logger.info(res.stderr if res.stderr is not None else "")
+        logger.error(f"GAMS failed on {benchmark['name']} ground truth")
         sys.exit(4)
     if "error" in res.stdout.lower():
-        print(res.stdout)
-        print(f"ERROR: GAMS errored on {benchmark['name']}")
+        logger.info(res.stdout)
+        logger.error(f"GAMS errored on {benchmark['name']}")
         return "Error running GAMS on ground truth"
 
     # Run gdxdiff to compare
@@ -161,8 +125,8 @@ def run_gams_gdxdiff(
         text=True,
     )
     if verbose:
-        print(res.stdout)
-        print(res.stderr if res.stderr is not None else "")
+        logger.info(res.stdout)
+        logger.info(res.stderr if res.stderr is not None else "")
     if res.returncode != 0:
         return f"Diff ({len(res.stdout.splitlines())})"
 
@@ -202,8 +166,8 @@ def run_benchmark(
             if res.returncode != 0:
                 # Remove partial outputs
                 shutil.rmtree(csv_folder, ignore_errors=True)
-                print(res.stdout)
-                print(f"ERROR: dd_to_csv failed on {benchmark['name']}")
+                logger.info(res.stdout)
+                logger.error(f"dd_to_csv failed on {benchmark['name']}")
                 sys.exit(5)
         else:
             # If debug option is set, run as a function call to allow stepping with a debugger.
@@ -212,7 +176,7 @@ def run_benchmark(
             main([dd_folder, csv_folder])
 
     elif not path.exists(csv_folder):
-        print(f"ERROR: --skip_csv is true but {csv_folder} does not exist")
+        logger.error(f"--skip_csv is true but {csv_folder} does not exist")
         sys.exit(6)
 
     # Then run the tool
@@ -251,14 +215,14 @@ def run_benchmark(
 
     if verbose:
         line = "-" * 80
-        print(f"\n{line}\n{benchmark['name']}\n{line}\n\n{res.stdout}")
-        print(res.stderr if res.stderr is not None else "")
+        logger.info(f"\n{line}\n{benchmark['name']}\n{line}\n\n{res.stdout}")
+        logger.info(res.stderr if res.stderr is not None else "")
     else:
-        print(".", end="", flush=True)
+        logger.info(".", end="", flush=True)
 
     if res.returncode != 0:
-        print(res.stdout)
-        print(f"ERROR: tool failed on {benchmark['name']}")
+        logger.info(res.stdout)
+        logger.error(f"tool failed on {benchmark['name']}")
         sys.exit(7)
     with open(path.join(out_folder, "stdout"), "w") as f:
         f.write(res.stdout)
@@ -284,7 +248,7 @@ def run_all_benchmarks(
     verbose=False,
     debug: bool = False,
 ):
-    print("Running benchmarks", end="", flush=True)
+    logger.info("Running benchmarks", end="", flush=True)
     headers = ["Benchmark", "Time (s)", "GDX Diff", "Accuracy", "Correct", "Additional"]
     run_a_benchmark = partial(
         run_benchmark,
@@ -298,10 +262,10 @@ def run_all_benchmarks(
 
     with ProcessPoolExecutor(max_workers=max_workers) as executor:
         results = list(executor.map(run_a_benchmark, benchmarks))
-    print("\n\n" + tabulate(results, headers, floatfmt=".1f") + "\n")
+    logger.info("\n\n" + tabulate(results, headers, floatfmt=".1f") + "\n")
 
     if skip_regression:
-        print("Skipping regression tests.")
+        logger.info("Skipping regression tests.")
         sys.exit(0)
 
     # The rest of this script checks regressions against main
@@ -319,7 +283,7 @@ def run_all_benchmarks(
         mybranch = repo.create_head("mybranch")
 
     if mybranch.name == "main":
-        print("Skipping regression tests as we're on main branch. Goodbye!")
+        logger.info("Skipping regression tests as we're on main branch. Goodbye!")
         sys.exit(0)
 
     if skip_main:
@@ -332,18 +296,20 @@ def run_all_benchmarks(
                 result = parse_result(f.readlines()[-1])
             # Use a fake runtime and GAMS result
             results_main.append((benchmark["name"], 999, "--", *result))
-        print(
+        logger.info(
             f"Skipped running on main. Using results from {path.join(benchmarks_folder, 'out-main')}"
         )
 
     else:
         if repo.is_dirty():
-            print("Your working directory is not clean. Skipping regression tests.")
+            logger.info(
+                "Your working directory is not clean. Skipping regression tests."
+            )
             sys.exit(8)
 
         # Re-run benchmarks on main
         repo.heads.main.checkout()
-        print("Running benchmarks on main", end="", flush=True)
+        logger.info("Running benchmarks on main", end="", flush=True)
         run_a_benchmark = partial(
             run_benchmark,
             benchmarks_folder=benchmarks_folder,
@@ -371,7 +337,7 @@ def run_all_benchmarks(
         )
         for ((b, t, f, a, c, d), (_, t0, f0, a0, c0, d0)) in zip(results, results_main)
     ]
-    print("\n\n" + tabulate(combined_results, headers, stralign="right") + "\n")
+    logger.info("\n\n" + tabulate(combined_results, headers, stralign="right") + "\n")
 
     # Checkout back to branch
     mybranch.checkout()
@@ -385,7 +351,7 @@ def run_all_benchmarks(
         how="outer",
     )
     if df.isna().values.any():
-        print(f"ERROR: number of benchmarks changed:\n{df}")
+        logger.error(f"number of benchmarks changed:\n{df}")
         sys.exit(9)
     accu_regressions = df[df["Correct"] < df["M Correct"]]["Benchmark"]
     addi_regressions = df[df["Additional"] > df["M Additional"]]["Benchmark"]
@@ -395,37 +361,36 @@ def run_all_benchmarks(
     main_time = df["M Time (s)"].sum()
     runtime_change = our_time - main_time
 
-    print(f"Total runtime: {our_time:.2f}s (main: {main_time:.2f}s)")
-    print(
+    logger.info(f"Total runtime: {our_time:.2f}s (main: {main_time:.2f}s)")
+    logger.info(
         f"Change in runtime (negative == faster): {runtime_change:+.2f}s ({100 * runtime_change / main_time:+.1f}%)"
     )
 
     our_correct = df["Correct"].sum()
     main_correct = df["M Correct"].sum()
     correct_change = our_correct - main_correct
-    print(
+    logger.info(
         f"Change in correct rows (higher == better): {correct_change:+d} ({100 * correct_change / main_correct:+.1f}%)"
     )
 
     our_additional_rows = df["Additional"].sum()
     main_additional_rows = df["M Additional"].sum()
     additional_change = our_additional_rows - main_additional_rows
-    print(
+    logger.info(
         f"Change in additional rows: {additional_change:+d} ({100 * additional_change / main_additional_rows:+.1f}%)"
     )
 
     if len(accu_regressions) + len(addi_regressions) + len(time_regressions) > 0:
-        print()
         if not accu_regressions.empty:
-            print(f"ERROR: correct rows regressed on: {', '.join(accu_regressions)}")
+            logger.error(f"correct rows regressed on: {', '.join(accu_regressions)}")
         if not addi_regressions.empty:
-            print(f"ERROR: additional rows regressed on: {', '.join(addi_regressions)}")
+            logger.error(f"additional rows regressed on: {', '.join(addi_regressions)}")
         if not time_regressions.empty:
-            print(f"ERROR: runtime regressed on: {', '.join(time_regressions)}")
+            logger.error(f"runtime regressed on: {', '.join(time_regressions)}")
         sys.exit(10)
     # TODO also check if any new tables are missing?
 
-    print("No regressions. You're awesome!")
+    logger.info("No regressions. You're awesome!")
 
 
 if __name__ == "__main__":
@@ -489,17 +454,17 @@ def run_all_benchmarks(
     benchmarks_folder = spec["benchmarks_folder"]
     benchmark_names = [b["name"] for b in spec["benchmarks"]]
     if len(set(benchmark_names)) != len(benchmark_names):
-        print("ERROR: Found duplicate name in benchmarks YAML file")
+        logger.error(f"Found duplicate name in benchmarks YAML file")
         sys.exit(11)
 
     if args.dd and args.times_dir is None:
-        print("ERROR: --times_dir is required when using --dd")
+        logger.error(f"--times_dir is required when using --dd")
         sys.exit(12)
 
     if args.run is not None:
         benchmark = next((b for b in spec["benchmarks"] if b["name"] == args.run), None)
         if benchmark is None:
-            print(f"ERROR: could not find {args.run} in {args.benchmarks_yaml}")
+            logger.error(f"could not find {args.run} in {args.benchmarks_yaml}")
             sys.exit(13)
 
         _, runtime, gms, acc, cor, add = run_benchmark(
@@ -511,7 +476,7 @@ def run_all_benchmarks(
             verbose=args.verbose,
             debug=args.debug,
         )
-        print(
+        logger.info(
             f"Ran {args.run} in {runtime:.2f}s. {acc}% ({cor} correct, {add} additional).\n"
             f"GAMS: {gms}"
         )

diff --git a/xl2times/__main__.py b/xl2times/__main__.py
@@ -10,10 +10,12 @@
 from typing import Dict, List
 
 from xl2times.utils import max_workers
-from . import datatypes
+from . import datatypes, utils
 from . import excel
 from . import transforms
 
+logger = utils.get_logger()
+
 
 def convert_xl_to_times(
     input_files: List[str],

diff --git a/xl2times/datatypes.py b/xl2times/datatypes.py
@@ -6,6 +6,8 @@
 import re
 from typing import Dict, Iterable, List, Set, Tuple
 from enum import Enum
+
+from loguru import logger
 from pandas.core.frame import DataFrame
 
 # ============================================================================
@@ -235,7 +237,7 @@ def _process_times_info(
             cat_to_tables[item["gams-cat"]].append(item["name"])
         unknown_cats = {item["gams-cat"] for item in table_info} - set(categories)
         if unknown_cats:
-            print(f"WARNING: Unknown categories in times-info.json: {unknown_cats}")
+            logger.warning(f"Unknown categories in times-info.json: {unknown_cats}")
         dd_table_order = chain.from_iterable(
             (sorted(cat_to_tables[c]) for c in categories)
         )
@@ -307,9 +309,9 @@ def _read_mappings(filename: str) -> List[TimesXlMap]:
                     break
                 (times, xl) = line.split(" = ")
                 (times_name, times_cols_str) = list(
-                    filter(None, re.split("\[|\]", times))
+                    filter(None, re.split(r"\[|\]", times))
                 )
-                (xl_name, xl_cols_str) = list(filter(None, re.split("\(|\)", xl)))
+                (xl_name, xl_cols_str) = list(filter(None, re.split(r"\(|\)", xl)))
                 times_cols = times_cols_str.split(",")
                 xl_cols = xl_cols_str.split(",")
                 filter_rows = {}
@@ -343,8 +345,8 @@ def _read_mappings(filename: str) -> List[TimesXlMap]:
                     dropped.append(line)
 
         if len(dropped) > 0:
-            print(
-                f"WARNING: Dropping {len(dropped)} mappings that are not yet complete"
+            logger.warning(
+                f"Dropping {len(dropped)} mappings that are not yet complete"
             )
         return mappings
 
@@ -370,8 +372,8 @@ def to_tag(s: str) -> Tag:
         tags = {to_tag(tag_info["tag_name"]) for tag_info in veda_tags_info}
         for tag in Tag:
             if tag not in tags:
-                print(
-                    f"WARNING: datatypes.Tag has an unknown Tag {tag} not in {veda_tags_file}"
+                logger.warning(
+                    f"datatypes.Tag has an unknown Tag {tag} not in {veda_tags_file}"
                 )
 
         valid_column_names = {}