From 9c36c37ec010962ff618f1a0c73a7dbf77ef70b2 Mon Sep 17 00:00:00 2001 From: Sam West Date: Thu, 15 Feb 2024 15:39:32 +1100 Subject: [PATCH] Feature/benchmarks on windows (#184) This PR contains changes necessary to run the benchmarks locally on a Windows machine. Main changes: - Introduced `shell=True` param to subprocess calls. Without this, the virtualenv was not accessible to the subprocesses, which would fail with, e.g. `ModuleNotFound: pandas` exceptions. - Python subprocesses in windows are heavyweight and so use a lot more RAM than linux, so I've added a variable in `utils` that limits the number `max_workers` param in ProcessPoolExecutor constructors to a small number (used to be the number of cpus). Without this limitation, the pool-inside-a-pool structure of the benchmarks was spawning 32*32 python.exe processes and using all my 64Gb of RAM in a few seconds. - Added a `--debug` switch to `run_benchmarks` to run each benchmakr as a function call, allowing use of debuggers/breakpoints - CI now runs CSV-only regression tests if GAMS license isn't in secrets --- .github/workflows/ci.yml | 30 ++++++++-- .gitignore | 4 +- README.md | 40 +++++++++++++ pyproject.toml | 3 +- utils/__init__.py | 0 utils/dd_to_csv.py | 8 ++- utils/run_benchmarks.py | 108 ++++++++++++++++++++++------------ xl2times/__main__.py | 123 +++++++++++++++++++++++++-------------- xl2times/transforms.py | 4 +- xl2times/utils.py | 5 ++ 10 files changed, 234 insertions(+), 91 deletions(-) create mode 100644 utils/__init__.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c7a3b09..7f42d5f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,9 +3,9 @@ name: CI on: # Triggers the workflow on push or pull request events but only for the main branch push: - branches: [main] + branches: [ main ] pull_request: - branches: [main] + branches: [ main ] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -69,6 +69,9 @@ jobs: # ---------- Install GAMS - name: Install GAMS + env: + GAMS_LICENSE: ${{ secrets.GAMS_LICENSE }} + if: ${{ env.GAMS_LICENSE != '' }} run: | curl https://d37drm4t2jghv5.cloudfront.net/distributions/44.1.0/linux/linux_x64_64_sfx.exe -o linux_x64_64_sfx.exe chmod +x linux_x64_64_sfx.exe @@ -81,17 +84,18 @@ jobs: mkdir -p $HOME/.local/share/GAMS echo "$GAMS_LICENSE" > $HOME/.local/share/GAMS/gamslice.txt ls -l $HOME/.local/share/GAMS/ - env: - GAMS_LICENSE: ${{ secrets.GAMS_LICENSE }} + # ---------- Run tool, check for regressions - name: Run tool on all benchmarks + env: + GAMS_LICENSE: ${{ secrets.GAMS_LICENSE }} + if: ${{ env.GAMS_LICENSE != '' }} working-directory: xl2times # Use tee to also save the output to out.txt so that the summary table can be # printed again in the next step. # Save the return code to retcode.txt so that the next step can fail the action - # if run_benchmarks.py failed. run: | source .venv/bin/activate export PATH=$PATH:$GITHUB_WORKSPACE/GAMS/gams44.1_linux_x64_64_sfx @@ -101,6 +105,22 @@ jobs: | tee out.txt; \ echo ${PIPESTATUS[0]} > retcode.txt) + - name: Run CSV-only regression tests (no GAMS license) + env: + GAMS_LICENSE: ${{ secrets.GAMS_LICENSE }} + if: ${{ env.GAMS_LICENSE == '' }} + working-directory: xl2times + # Run without --dd flag if GAMS license secret doesn't exist. + # Useful for testing for (CSV) regressions in forks before creating PRs. + run: | + source .venv/bin/activate + export PATH=$PATH:$GITHUB_WORKSPACE/GAMS/gams44.1_linux_x64_64_sfx + (python utils/run_benchmarks.py benchmarks.yml \ + --times_dir $GITHUB_WORKSPACE/TIMES_model \ + --verbose \ + | tee out.txt; \ + echo ${PIPESTATUS[0]} > retcode.txt) + - name: Print summary working-directory: xl2times run: | diff --git a/.gitignore b/.gitignore index a707f93..5b52706 100644 --- a/.gitignore +++ b/.gitignore @@ -13,7 +13,9 @@ ground_truth/* *.pyproj.* speedscope.json *.pkl -.venv/ +.venv*/ benchmarks/ +.idea/ +.python-version docs/_build/ docs/api/ diff --git a/README.md b/README.md index ee2c1e7..df36800 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,46 @@ git commit --no-verify See our GitHub Actions CI `.github/workflows/ci.yml` and the utility script `utils/run_benchmarks.py` to see how to run the tool on the DemoS models. +In short, use the commands below to clone the benchmarks data into your local `benchmarks` dir. +Note that this assumes you have access to all these repositories (some are private and +you'll have to request access) - if not, comment out the inaccessible benchmarks from `benchmakrs.yml` before running. + +```bash +mkdir benchmarks +# Get VEDA example models and reference DD files +# XLSX files are in private repo for licensing reasons, please request access or replace with your own licensed VEDA example files. +git clone git@github.com:olejandro/demos-xlsx.git benchmarks/xlsx/ +git clone git@github.com:olejandro/demos-dd.git benchmarks/dd/ + +# Get Ireland model and reference DD files +git clone git@github.com:esma-cgep/tim.git benchmarks/xlsx/Ireland +git clone git@github.com:esma-cgep/tim-gams.git benchmarks/dd/Ireland +``` +Then to run the benchmarks: +```bash +# Run a only a single benchmark by name (see benchmarks.yml for name list) +python utils/run_benchmarks.py benchmarks.yml --verbose --run DemoS_001-all | tee out.txt + +# Run all benchmarks (without GAMS run, just comparing CSV data) +python utils/run_benchmarks.py benchmarks.yml --verbose | tee out.txt + + +# Run benchmarks with regression tests vs main branch +git branch feature/your_new_changes --checkout +# ... make your code changes here ... +git commit -a -m "your commit message" # code must be committed for comparison to `main` branch to run. +python utils/run_benchmarks.py benchmarks.yml --verbose | tee out.txt +``` +At this point, if you haven't broken anything you should see something like: +``` +Change in runtime: +2.97s +Change in correct rows: +0 +Change in additional rows: +0 +No regressions. You're awesome! +``` +If you have a large increase in runtime, a decrease in correct rows or fewer rows being produced, then you've broken something and will need to figure out how to fix it. + + ### Debugging Regressions If your change is causing regressions on one of the benchmarks, a useful way to debug and find the difference is to run the tool in verbose mode and compare the intermediate tables. For example, if your branch has regressions on Demo 1: diff --git a/pyproject.toml b/pyproject.toml index 3b678e1..484e835 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools>=61.0.0", "wheel"] build-backend = "setuptools.build_meta" [tool.setuptools] -packages = ["xl2times"] +packages = ["xl2times", "utils"] [project] name = "xl2times" @@ -24,6 +24,7 @@ dependencies = [ "more-itertools", "openpyxl >= 3.0, < 3.1", "pandas >= 2.1", + "pyarrow" ] [project.optional-dependencies] diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/dd_to_csv.py b/utils/dd_to_csv.py index df444e7..5de10fd 100644 --- a/utils/dd_to_csv.py +++ b/utils/dd_to_csv.py @@ -216,7 +216,7 @@ def convert_dd_to_tabular( return -if __name__ == "__main__": +def main(arg_list: None | list[str] = None): args_parser = argparse.ArgumentParser() args_parser.add_argument( "input_dir", type=str, help="Input directory containing .dd files." @@ -224,5 +224,9 @@ def convert_dd_to_tabular( args_parser.add_argument( "output_dir", type=str, help="Output directory to save the .csv files in." ) - args = args_parser.parse_args() + args = args_parser.parse_args(arg_list) convert_dd_to_tabular(args.input_dir, args.output_dir, generate_headers_by_attr()) + + +if __name__ == "__main__": + main() diff --git a/utils/run_benchmarks.py b/utils/run_benchmarks.py index f2a9a91..5ae8876 100644 --- a/utils/run_benchmarks.py +++ b/utils/run_benchmarks.py @@ -1,4 +1,6 @@ import argparse +import os +from collections import namedtuple from concurrent.futures import ProcessPoolExecutor from functools import partial import git @@ -13,6 +15,8 @@ from typing import Any, Tuple import yaml +from xl2times.utils import max_workers + def parse_result(lastline): m = match( @@ -22,7 +26,7 @@ def parse_result(lastline): ) if not m: print(f"ERROR: could not parse output of run:\n{lastline}") - sys.exit(1) + sys.exit(2) # return (accuracy, num_correct_rows, num_additional_rows) return (float(m.groups()[0]), int(m.groups()[1]), int(m.groups()[3])) @@ -58,7 +62,7 @@ def run_gams_gdxdiff( print(res.stdout) print(res.stderr if res.stderr is not None else "") print(f"ERROR: GAMS failed on {benchmark['name']}") - sys.exit(1) + sys.exit(3) if "error" in res.stdout.lower(): print(res.stdout) print(f"ERROR: GAMS errored on {benchmark['name']}") @@ -89,7 +93,7 @@ def run_gams_gdxdiff( print(res.stdout) print(res.stderr if res.stderr is not None else "") print(f"ERROR: GAMS failed on {benchmark['name']} ground truth") - sys.exit(1) + sys.exit(4) if "error" in res.stdout.lower(): print(res.stdout) print(f"ERROR: GAMS errored on {benchmark['name']}") @@ -125,6 +129,7 @@ def run_benchmark( skip_csv: bool = False, out_folder: str = "out", verbose: bool = False, + debug: bool = False, ) -> Tuple[str, float, str, float, int, int]: xl_folder = path.join(benchmarks_folder, "xlsx", benchmark["input_folder"]) dd_folder = path.join(benchmarks_folder, "dd", benchmark["dd_folder"]) @@ -134,26 +139,33 @@ def run_benchmark( # First convert ground truth DD to csv if not skip_csv: shutil.rmtree(csv_folder, ignore_errors=True) - res = subprocess.run( - [ - "python", - "utils/dd_to_csv.py", - dd_folder, - csv_folder, - ], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) - if res.returncode != 0: - # Remove partial outputs - shutil.rmtree(csv_folder, ignore_errors=True) - print(res.stdout) - print(f"ERROR: dd_to_csv failed on {benchmark['name']}") - sys.exit(1) + if os.name != "nt": + res = subprocess.run( + [ + "python", + "utils/dd_to_csv.py", + dd_folder, + csv_folder, + ], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + if res.returncode != 0: + # Remove partial outputs + shutil.rmtree(csv_folder, ignore_errors=True) + print(res.stdout) + print(f"ERROR: dd_to_csv failed on {benchmark['name']}") + sys.exit(5) + else: + # If debug option is set, run as a function call to allow stepping with a debugger. + from utils.dd_to_csv import main + + main([dd_folder, csv_folder]) + elif not path.exists(csv_folder): print(f"ERROR: --skip_csv is true but {csv_folder} does not exist") - sys.exit(1) + sys.exit(6) # Then run the tool args = [ @@ -170,12 +182,23 @@ def run_benchmark( else: args.append(xl_folder) start = time.time() - res = subprocess.run( - ["xl2times"] + args, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) + res = None + if not debug: + res = subprocess.run( + ["xl2times"] + args, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + else: + # If debug option is set, run as a function call to allow stepping with a debugger. + from xl2times.__main__ import run, parse_args + + summary = run(parse_args(args)) + + # pack the results into a namedtuple pretending to be a return value from a subprocess call (as above). + res = namedtuple("stdout", ["stdout", "stderr", "returncode"])(summary, "", 0) + runtime = time.time() - start if verbose: @@ -188,7 +211,7 @@ def run_benchmark( if res.returncode != 0: print(res.stdout) print(f"ERROR: tool failed on {benchmark['name']}") - sys.exit(1) + sys.exit(7) with open(path.join(out_folder, "stdout"), "w") as f: f.write(res.stdout) @@ -211,6 +234,7 @@ def run_all_benchmarks( skip_main=False, skip_regression=False, verbose=False, + debug: bool = False, ): print("Running benchmarks", end="", flush=True) headers = ["Benchmark", "Time (s)", "GDX Diff", "Accuracy", "Correct", "Additional"] @@ -221,9 +245,10 @@ def run_all_benchmarks( skip_csv=skip_csv, run_gams=run_gams, verbose=verbose, + debug=debug, ) - with ProcessPoolExecutor() as executor: + with ProcessPoolExecutor(max_workers=max_workers) as executor: results = list(executor.map(run_a_benchmark, benchmarks)) print("\n\n" + tabulate(results, headers, floatfmt=".1f") + "\n") @@ -264,7 +289,7 @@ def run_all_benchmarks( else: if repo.is_dirty(): print("Your working directory is not clean. Skipping regression tests.") - sys.exit(1) + sys.exit(8) # Re-run benchmarks on main repo.heads.main.checkout() @@ -277,9 +302,10 @@ def run_all_benchmarks( run_gams=run_gams, out_folder="out-main", verbose=verbose, + debug=debug, ) - with ProcessPoolExecutor() as executor: + with ProcessPoolExecutor(max_workers) as executor: results_main = list(executor.map(run_a_benchmark, benchmarks)) # Print table with combined results to make comparison easier @@ -310,13 +336,13 @@ def run_all_benchmarks( ) if df.isna().values.any(): print(f"ERROR: number of benchmarks changed:\n{df}") - sys.exit(1) + sys.exit(9) accu_regressions = df[df["Correct"] < df["M Correct"]]["Benchmark"] addi_regressions = df[df["Additional"] > df["M Additional"]]["Benchmark"] time_regressions = df[df["Time (s)"] > 2 * df["M Time (s)"]]["Benchmark"] runtime_change = df["Time (s)"].sum() - df["M Time (s)"].sum() - print(f"Change in runtime: {runtime_change:+.2f}") + print(f"Change in runtime: {runtime_change:+.2f} s") correct_change = df["Correct"].sum() - df["M Correct"].sum() print(f"Change in correct rows: {correct_change:+d}") additional_change = df["Additional"].sum() - df["M Additional"].sum() @@ -330,7 +356,7 @@ def run_all_benchmarks( print(f"ERROR: additional rows regressed on: {', '.join(addi_regressions)}") if not time_regressions.empty: print(f"ERROR: runtime regressed on: {', '.join(time_regressions)}") - sys.exit(1) + sys.exit(10) # TODO also check if any new tables are missing? print("No regressions. You're awesome!") @@ -385,6 +411,12 @@ def run_all_benchmarks( default=False, help="Print output of run on each benchmark", ) + args_parser.add_argument( + "--debug", + action="store_true", + default=False, + help="Run each benchmark as a function call to allow a debugger to stop at breakpoints in benchmark runs.", + ) args = args_parser.parse_args() spec = yaml.safe_load(open(args.benchmarks_yaml)) @@ -392,17 +424,17 @@ def run_all_benchmarks( benchmark_names = [b["name"] for b in spec["benchmarks"]] if len(set(benchmark_names)) != len(benchmark_names): print("ERROR: Found duplicate name in benchmarks YAML file") - sys.exit(1) + sys.exit(11) if args.dd and args.times_dir is None: print("ERROR: --times_dir is required when using --dd") - sys.exit(1) + sys.exit(12) if args.run is not None: benchmark = next((b for b in spec["benchmarks"] if b["name"] == args.run), None) if benchmark is None: print(f"ERROR: could not find {args.run} in {args.benchmarks_yaml}") - sys.exit(1) + sys.exit(13) _, runtime, gms, acc, cor, add = run_benchmark( benchmark, @@ -411,6 +443,7 @@ def run_all_benchmarks( run_gams=args.dd, skip_csv=args.skip_csv, verbose=args.verbose, + debug=args.debug, ) print( f"Ran {args.run} in {runtime:.2f}s. {acc}% ({cor} correct, {add} additional).\n" @@ -426,4 +459,5 @@ def run_all_benchmarks( skip_main=args.skip_main, skip_regression=args.skip_regression, verbose=args.verbose, + debug=args.debug, ) diff --git a/xl2times/__main__.py b/xl2times/__main__.py index d5598f1..5f1f3f7 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -8,6 +8,8 @@ import sys import time from typing import Dict, List + +from xl2times.utils import max_workers from . import datatypes from . import excel from . import transforms @@ -31,7 +33,7 @@ def convert_xl_to_times( use_pool = True if use_pool: - with ProcessPoolExecutor() as executor: + with ProcessPoolExecutor(max_workers) as executor: for result in executor.map(excel.extract_tables, input_files): raw_tables.extend(result) else: @@ -117,7 +119,7 @@ def convert_xl_to_times( end_time = time.time() sep = "\n\n" + "=" * 80 + "\n" if verbose else "" print( - f"{sep}transform {transform.__code__.co_name} took {end_time-start_time:.2f} seconds" + f"{sep}transform {transform.__code__.co_name} took {end_time - start_time:.2f} seconds" ) if verbose: if isinstance(output, list): @@ -160,7 +162,7 @@ def read_csv_tables(input_dir: str) -> Dict[str, DataFrame]: def compare( data: Dict[str, DataFrame], ground_truth: Dict[str, DataFrame], output_dir: str -): +) -> str: print( f"Ground truth contains {len(ground_truth)} tables," f" {sum(df.shape[0] for _, df in ground_truth.items())} rows" @@ -223,13 +225,15 @@ def compare( os.path.join(output_dir, table_name + "_missing.csv"), index=False, ) - - print( + result = ( f"{total_correct_rows / total_gt_rows :.1%} of ground truth rows present" f" in output ({total_correct_rows}/{total_gt_rows})" f", {total_additional_rows} additional rows" ) + print(result) + return result + def produce_times_tables( config: datatypes.Config, input: Dict[str, DataFrame] @@ -385,42 +389,14 @@ def dump_tables(tables: List, filename: str) -> List: return tables -def main(): - args_parser = argparse.ArgumentParser() - args_parser.add_argument( - "input", - nargs="*", - help="Either an input directory, or a list of input xlsx/xlsm files to process", - ) - args_parser.add_argument( - "--regions", - type=str, - default="", - help="Comma-separated list of regions to include in the model", - ) - args_parser.add_argument( - "--output_dir", type=str, default="output", help="Output directory" - ) - args_parser.add_argument( - "--ground_truth_dir", - type=str, - help="Ground truth directory to compare with output", - ) - args_parser.add_argument("--dd", action="store_true", help="Output DD files") - args_parser.add_argument( - "--only_read", - action="store_true", - help="Read xlsx/xlsm files and stop after outputting raw_tables.txt", - ) - args_parser.add_argument("--use_pkl", action="store_true") - args_parser.add_argument( - "-v", - "--verbose", - action="store_true", - help="Verbose mode: print tables after every transform", - ) - args = args_parser.parse_args() - +def run(args) -> str | None: + """ + Runs the xl2times conversion. + Args: + args: pre-parsed command line arguments + Returns: + comparison with ground-truth string if `ground_truth_dir` is provided, else None. + """ config = datatypes.Config( "times_mapping.txt", "times-info.json", @@ -433,7 +409,7 @@ def main(): if not isinstance(args.input, list) or len(args.input) < 1: print(f"ERROR: expected at least 1 input. Got {args.input}") - sys.exit(1) + sys.exit(-1) elif len(args.input) == 1: assert os.path.isdir(args.input[0]) input_files = [ @@ -468,8 +444,67 @@ def main(): if args.ground_truth_dir: ground_truth = read_csv_tables(args.ground_truth_dir) - compare(tables, ground_truth, args.output_dir) + comparison = compare(tables, ground_truth, args.output_dir) + return comparison + else: + return None + + +def parse_args(arg_list: None | list[str]) -> argparse.Namespace: + """Parses command line arguments. + + Args: + arg_list: List of command line arguments. Uses sys.argv (default argparse behaviour) if `None`. + + Returns: + argparse.Namespace: Parsed arguments. + """ + args_parser = argparse.ArgumentParser() + args_parser.add_argument( + "input", + nargs="*", + help="Either an input directory, or a list of input xlsx/xlsm files to process", + ) + args_parser.add_argument( + "--regions", + type=str, + default="", + help="Comma-separated list of regions to include in the model", + ) + args_parser.add_argument( + "--output_dir", type=str, default="output", help="Output directory" + ) + args_parser.add_argument( + "--ground_truth_dir", + type=str, + help="Ground truth directory to compare with output", + ) + args_parser.add_argument("--dd", action="store_true", help="Output DD files") + args_parser.add_argument( + "--only_read", + action="store_true", + help="Read xlsx/xlsm files and stop after outputting raw_tables.txt", + ) + args_parser.add_argument("--use_pkl", action="store_true") + args_parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Verbose mode: print tables after every transform", + ) + args = args_parser.parse_args(arg_list) + return args + + +def main(arg_list: None | list[str] = None) -> None: + """Main entry point for the xl2times package + Returns: + None. + """ + args = parse_args(arg_list) + run(args) if __name__ == "__main__": - main() + main(sys.argv[1:]) + sys.exit(0) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 339bd07..984701f 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -10,6 +10,8 @@ from concurrent.futures import ProcessPoolExecutor import time from functools import reduce + +from .utils import max_workers from . import datatypes from . import utils @@ -2488,5 +2490,5 @@ def expand_rows_parallel( tables: List[datatypes.EmbeddedXlTable], model: datatypes.TimesModel, ) -> List[datatypes.EmbeddedXlTable]: - with ProcessPoolExecutor() as executor: + with ProcessPoolExecutor(max_workers) as executor: return list(executor.map(expand_rows, tables)) diff --git a/xl2times/utils.py b/xl2times/utils.py index 033ec9b..bec4cb9 100644 --- a/xl2times/utils.py +++ b/xl2times/utils.py @@ -1,3 +1,4 @@ +import os import re from dataclasses import replace from math import log10, floor @@ -10,6 +11,10 @@ from . import datatypes +# prevent excessive number of processes in Windows and high cpu-count machines +# TODO make this a cli param or global setting? +max_workers: int = 4 if os.name == "nt" else min(16, os.cpu_count() or 16) + def apply_composite_tag(table: datatypes.EmbeddedXlTable) -> datatypes.EmbeddedXlTable: """