From 072acf796e03b6a1d03b0c02e85ee075c8ca2775 Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Fri, 23 Feb 2024 12:22:39 +0200 Subject: [PATCH 01/13] CI: fix bug in parsing additional lines --- utils/run_benchmarks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/run_benchmarks.py b/utils/run_benchmarks.py index 33cc790..7695d67 100644 --- a/utils/run_benchmarks.py +++ b/utils/run_benchmarks.py @@ -36,7 +36,7 @@ def parse_result(output: str) -> Tuple[float, int, int]: sys.exit(2) # return (accuracy, num_correct_rows, num_additional_rows) m = m[0] - return (float(m[0]), int(m[1]), int(m[2])) + return (float(m[0]), int(m[1]), int(m[3])) def run_gams_gdxdiff( From 422450a220167c2dc3a71cdfab1cb6f96bd57079 Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Fri, 23 Feb 2024 12:40:23 +0200 Subject: [PATCH 02/13] Cache extracted XlTables to speedup repetitive runs --- xl2times/__main__.py | 59 ++++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/xl2times/__main__.py b/xl2times/__main__.py index e502768..634fd72 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -1,7 +1,7 @@ import argparse from concurrent.futures import ProcessPoolExecutor from datetime import datetime - +import hashlib from pandas.core.frame import DataFrame import pandas as pd import pickle @@ -11,6 +11,7 @@ import time from typing import Dict, List +from xl2times import __file__ as xl2times_file_path from xl2times.utils import max_workers from . import datatypes, utils from . import excel @@ -19,6 +20,34 @@ logger = utils.get_logger() +cache_dir = os.path.abspath(os.path.dirname(xl2times_file_path)) + "/.cache/" +os.makedirs(cache_dir, exist_ok=True) + + +def _read_xlsx_cached(filename: str) -> List[datatypes.EmbeddedXlTable]: + """Extract EmbeddedXlTables from xlsx file (cached). + + Since excel.extract_tables is quite slow, we cache its results in `cache_dir`. + Each file is named by the hash of the contents of an xlsx file, and contains + a tuple (filename, modified timestamp, [EmbeddedXlTable]). + """ + with open(filename, "rb") as f: + digest = hashlib.file_digest(f, "sha256") # pyright: ignore + hsh = digest.hexdigest() + if os.path.isfile(cache_dir + hsh): + fname1, _timestamp, tables = pickle.load(open(cache_dir + hsh, "rb")) + # In the extremely unlikely event that we have a hash collision, also check that + # the filename is the same: + # TODO check modified timestamp also matches + if filename == fname1: + logger.info(f"Using cached data for {filename}") + return tables + # Write extracted data to cache: + tables = excel.extract_tables(filename) + pickle.dump((filename, "TODO ModifiedTime", tables), open(cache_dir + hsh, "wb")) + return excel.extract_tables(filename) + + def convert_xl_to_times( input_files: List[str], output_dir: str, @@ -28,27 +57,15 @@ def convert_xl_to_times( verbose: bool = False, stop_after_read: bool = False, ) -> Dict[str, DataFrame]: - pickle_file = "raw_tables.pkl" - t0 = datetime.now() - if use_pkl and os.path.isfile(pickle_file): - raw_tables = pickle.load(open(pickle_file, "rb")) - logger.warning("Using pickled data not xlsx") - else: - raw_tables = [] - - use_pool = True - if use_pool: - with ProcessPoolExecutor(max_workers) as executor: - for result in executor.map(excel.extract_tables, input_files): - raw_tables.extend(result) - else: - for f in input_files: - result = excel.extract_tables(str(Path(f).absolute())) - raw_tables.extend(result) - pickle.dump(raw_tables, open(pickle_file, "wb")) + start_time = datetime.now() + with ProcessPoolExecutor(max_workers) as executor: + raw_tables = executor.map(_read_xlsx_cached, input_files) + # raw_tables is a list of lists, so flatten it: + raw_tables = [t for ts in raw_tables for t in ts] logger.info( - f"Extracted {len(raw_tables)} tables," - f" {sum(table.dataframe.shape[0] for table in raw_tables)} rows in {datetime.now() - t0}" + f"Extracted (potentially cached) {len(raw_tables)} tables," + f" {sum(table.dataframe.shape[0] for table in raw_tables)} rows" + f" in {datetime.now() - start_time}" ) if stop_after_read: From 128de34219f687ac058727a3f9b54cb0c95aa778 Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Fri, 23 Feb 2024 15:44:05 +0200 Subject: [PATCH 03/13] CI: cache the xlsx cache directory to benefit from the speedup --- .github/workflows/ci.yml | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 323aefc..2cdbf55 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,9 +3,9 @@ name: CI on: # Triggers the workflow on push or pull request events but only for the main branch push: - branches: [ main ] + branches: [main] pull_request: - branches: [ main ] + branches: [main] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -14,11 +14,19 @@ jobs: CI: runs-on: ubuntu-latest + env: + PY_VERSION: 3.10 + steps: - uses: actions/checkout@v3 with: path: xl2times + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PY_VERSION }} + - name: Install tool and dependencies working-directory: xl2times run: | @@ -40,6 +48,14 @@ jobs: source .venv/bin/activate pytest + - name: Cache XLSX cache directory + id: cache + uses: actions/cache@v4 + with: + path: xl2times/.cache + key: ${{ runner.os }}-py-${{ env.PY_VERSION }} + save-always: true + # ---------- Prepare ETSAP Demo models - uses: actions/checkout@v3 @@ -91,7 +107,6 @@ jobs: echo "$GAMS_LICENSE" > $HOME/.local/share/GAMS/gamslice.txt ls -l $HOME/.local/share/GAMS/ - # ---------- Run tool, check for regressions - name: Run tool on all benchmarks From d642ef5c087b3a93f342725d857b7cf47461eb6b Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Fri, 23 Feb 2024 15:54:05 +0200 Subject: [PATCH 04/13] Remove --use_pkl and add --no_cache --- .github/workflows/ci.yml | 2 +- xl2times/__main__.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2cdbf55..9b21785 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest env: - PY_VERSION: 3.10 + PY_VERSION: "3.10" steps: - uses: actions/checkout@v3 diff --git a/xl2times/__main__.py b/xl2times/__main__.py index 634fd72..315df62 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -53,13 +53,15 @@ def convert_xl_to_times( output_dir: str, config: datatypes.Config, model: datatypes.TimesModel, - use_pkl: bool, + no_cache: bool, verbose: bool = False, stop_after_read: bool = False, ) -> Dict[str, DataFrame]: start_time = datetime.now() with ProcessPoolExecutor(max_workers) as executor: - raw_tables = executor.map(_read_xlsx_cached, input_files) + raw_tables = executor.map( + excel.extract_tables if no_cache else _read_xlsx_cached, input_files + ) # raw_tables is a list of lists, so flatten it: raw_tables = [t for ts in raw_tables for t in ts] logger.info( @@ -452,14 +454,14 @@ def run(args: argparse.Namespace) -> str | None: args.output_dir, config, model, - args.use_pkl, + args.no_cache, verbose=args.verbose, stop_after_read=True, ) sys.exit(0) tables = convert_xl_to_times( - input_files, args.output_dir, config, model, args.use_pkl, verbose=args.verbose + input_files, args.output_dir, config, model, args.no_cache, verbose=args.verbose ) if args.dd: @@ -510,7 +512,11 @@ def parse_args(arg_list: None | list[str]) -> argparse.Namespace: action="store_true", help="Read xlsx/xlsm files and stop after outputting raw_tables.txt", ) - args_parser.add_argument("--use_pkl", action="store_true") + args_parser.add_argument( + "--no_cache", + action="store_true", + help="Ignore cache and re-extract tables from XLSX files", + ) args_parser.add_argument( "-v", "--verbose", From f53b52ec67fe4d785a92a59a0f757c6726f6aabc Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Fri, 23 Feb 2024 16:00:09 +0200 Subject: [PATCH 05/13] Upgrade Python version to 3.11 in order to use hashlib.file_digest --- .github/workflows/ci.yml | 2 +- .readthedocs.yaml | 2 +- pyproject.toml | 2 +- pyrightconfig.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9b21785..9ce86f2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest env: - PY_VERSION: "3.10" + PY_VERSION: "3.11" steps: - uses: actions/checkout@v3 diff --git a/.readthedocs.yaml b/.readthedocs.yaml index b7ca4ba..15c53be 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -8,7 +8,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.10" + python: "3.11" # Build documentation in the "docs/" directory with Sphinx sphinx: diff --git a/pyproject.toml b/pyproject.toml index 2995c0c..01429a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ name = "xl2times" version = "0.1.0" description = 'An open source tool to convert Excel input files for TIMES models to the DD format accepted by GAMS' readme = "README.md" -requires-python = ">=3.10" +requires-python = ">=3.11" license = { file = "LICENSE" } keywords = [] classifiers = [ diff --git a/pyrightconfig.json b/pyrightconfig.json index d6bcb2c..9cc76bf 100644 --- a/pyrightconfig.json +++ b/pyrightconfig.json @@ -9,7 +9,7 @@ ], "reportMissingImports": true, "reportMissingTypeStubs": false, - "pythonVersion": "3.10", + "pythonVersion": "3.11", "pythonPlatform": "All", "venv": ".venv", "venvPath": "." From 03d4d4a7c8348aa9a6c0b35ed4a23534f5b1d539 Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Fri, 23 Feb 2024 16:54:04 +0200 Subject: [PATCH 06/13] Add commit refs to all CI checkouts and use refs as cache key --- .github/workflows/ci.yml | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9ce86f2..f5b80a2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,6 +16,11 @@ jobs: env: PY_VERSION: "3.11" + REF_TIMES_model: "b488fb07f0899ee8b7e710c230b1a9414fa06f7d" + REF_demos-xlsx: "f956db07a253d4f5c60e108791ab7bb2b8136690" + REF_demos-dd: "2848a8a8e2fdcf0cdf7f83eefbdd563b0bb74e86" + REF_tim: "e820d8002adc6b1526a3bffcc439219b28d0eed5" + REF_tim-gams: "703f6a4e1d0bedd95c3ebdae534496f3a7e1b7cc" steps: - uses: actions/checkout@v3 @@ -48,30 +53,25 @@ jobs: source .venv/bin/activate pytest - - name: Cache XLSX cache directory - id: cache - uses: actions/cache@v4 - with: - path: xl2times/.cache - key: ${{ runner.os }}-py-${{ env.PY_VERSION }} - save-always: true - # ---------- Prepare ETSAP Demo models - uses: actions/checkout@v3 with: repository: etsap-TIMES/TIMES_model path: TIMES_model + ref: ${{ env.REF_TIMES_model }} - uses: actions/checkout@v3 with: repository: olejandro/demos-dd path: xl2times/benchmarks/dd + ref: ${{ env.REF_demos-dd }} - uses: actions/checkout@v3 with: repository: olejandro/demos-xlsx path: xl2times/benchmarks/xlsx + ref: ${{ env.REF_demos-xlsx }} token: ${{ secrets.GH_PAT_DEMOS_XLSX }} # ---------- Prepare TIMES Ireland Model @@ -82,11 +82,13 @@ jobs: with: repository: esma-cgep/tim path: xl2times/benchmarks/xlsx/Ireland + ref: ${{ env.REF_tim }} - uses: actions/checkout@v3 with: repository: esma-cgep/tim-gams path: xl2times/benchmarks/dd/Ireland + ref: ${{ env.REF_tim-gams }} # ---------- Install GAMS @@ -109,6 +111,18 @@ jobs: # ---------- Run tool, check for regressions + - name: Restore XLSX cache directory from cache + id: cache + uses: actions/cache/restore@v4 + with: + path: xl2times/.cache + # Cache key is refs of the input xlsx repos, since that's what is cached + key: ${{ runner.os }}-py-${{ env.PY_VERSION }}-${{ env.REF_demos-xlsx }}-${{ env.REF_tim }} + # If we can't find the exact key for the TIM repo, still use the cache if the demos repo ref matches + restore-keys: | + ${{ runner.os }}-py-${{ env.PY_VERSION }}-${{ env.REF_demos-xlsx }}- + ${{ runner.os }}-py-${{ env.PY_VERSION }}- + - name: Run tool on all benchmarks env: GAMS_LICENSE: ${{ secrets.GAMS_LICENSE }} @@ -147,3 +161,10 @@ jobs: run: | sed -n '/Benchmark *Time.*Accuracy/h;//!H;$!d;x;//p' out.txt exit $(cat retcode.txt) + + - uses: actions/cache/save@v4 + # Save the cache even if the regression tests fail + if: always() && !steps.cache-restore.outputs.cache-hit + with: + path: xl2times/.cache + key: ${{ runner.os }}-py-${{ env.PY_VERSION }}-${{ env.REF_demos-xlsx }}-${{ env.REF_tim }} From 8f35540c9aa5b4a3df72dd87757144d923298780 Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Mon, 26 Feb 2024 14:55:08 +0000 Subject: [PATCH 07/13] Fix GitHub Actions cache directory --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f5b80a2..1f6d324 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -115,7 +115,7 @@ jobs: id: cache uses: actions/cache/restore@v4 with: - path: xl2times/.cache + path: ${{ github.workspace }}/xl2times/.cache # Cache key is refs of the input xlsx repos, since that's what is cached key: ${{ runner.os }}-py-${{ env.PY_VERSION }}-${{ env.REF_demos-xlsx }}-${{ env.REF_tim }} # If we can't find the exact key for the TIM repo, still use the cache if the demos repo ref matches @@ -166,5 +166,5 @@ jobs: # Save the cache even if the regression tests fail if: always() && !steps.cache-restore.outputs.cache-hit with: - path: xl2times/.cache + path: ${{ github.workspace }}xl2times/.cache key: ${{ runner.os }}-py-${{ env.PY_VERSION }}-${{ env.REF_demos-xlsx }}-${{ env.REF_tim }} From d16843cca9297cde4cd19192ba23adfdb031452a Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Mon, 26 Feb 2024 15:08:47 +0000 Subject: [PATCH 08/13] Fix the fix --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1f6d324..85f9728 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -166,5 +166,5 @@ jobs: # Save the cache even if the regression tests fail if: always() && !steps.cache-restore.outputs.cache-hit with: - path: ${{ github.workspace }}xl2times/.cache + path: ${{ github.workspace }}/xl2times/.cache key: ${{ runner.os }}-py-${{ env.PY_VERSION }}-${{ env.REF_demos-xlsx }}-${{ env.REF_tim }} From fef73f01c36558464e907744ada6f020a874053a Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Mon, 26 Feb 2024 17:27:32 +0200 Subject: [PATCH 09/13] Debug why cache save is failing --- .github/workflows/ci.yml | 13 -- benchmarks.yml | 335 --------------------------------------- xl2times/__main__.py | 2 +- 3 files changed, 1 insertion(+), 349 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 85f9728..af2f519 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,19 +40,6 @@ jobs: python -m pip install --upgrade pip python -m pip install -e .[dev] - - name: Check code formatting - working-directory: xl2times - run: | - source .venv/bin/activate - pre-commit install - pre-commit run --all-files - - - name: Run unit tests - working-directory: xl2times - run: | - source .venv/bin/activate - pytest - # ---------- Prepare ETSAP Demo models - uses: actions/checkout@v3 diff --git a/benchmarks.yml b/benchmarks.yml index dcbb989..f943de2 100644 --- a/benchmarks.yml +++ b/benchmarks.yml @@ -13,338 +13,3 @@ benchmarks: dd_files: - "base" - "syssettings" - - name: DemoS_002-all - input_folder: DemoS_002 - dd_folder: DemoS_002-all - dd_files: - - "base" - - "syssettings" - - name: DemoS_003-all - input_folder: DemoS_003 - dd_folder: DemoS_003-all - dd_files: - - "base" - - "syssettings" - - name: DemoS_004 - input_folder: DemoS_004 - inputs: - - "BY_Trans.xlsx" - - "Sets-DemoModels.xlsx" - - "SysSettings.xlsx" - - "VT_REG_PRI_V04.xlsx" - dd_folder: DemoS_004 - dd_files: - - "base" - - "syssettings" - - name: DemoS_004a - input_folder: DemoS_004 - inputs: - - "BY_Trans.xlsx" - - "Sets-DemoModels.xlsx" - - "SysSettings.xlsx" - - "SuppXLS/Scen_Peak_RSV.xlsx" - - "VT_REG_PRI_V04.xlsx" - dd_folder: DemoS_004a - dd_files: - - "base" - - "syssettings" - - "peak_rsv" - - name: DemoS_004b - input_folder: DemoS_004 - inputs: - - "BY_Trans.xlsx" - - "Sets-DemoModels.xlsx" - - "SysSettings.xlsx" - - "SuppXLS/Scen_Peak_RSV-FLX.xlsx" - - "VT_REG_PRI_V04.xlsx" - dd_folder: DemoS_004b - dd_files: - - "base" - - "syssettings" - - "peak_rsv-flx" - - name: DemoS_004-all - input_folder: DemoS_004 - dd_folder: DemoS_004-all - dd_files: - - "base" - - "syssettings" - - "peak_rsv" - - "peak_rsv-flx" - - name: DemoS_005-all - input_folder: DemoS_005 - dd_folder: DemoS_005-all - dd_files: - - "base" - - "syssettings" - - "trade_param" - - "co2_tax" - - "elc_co2_bound" - - "peak_rsv" - - "uc_co2bnd" - - name: DemoS_006-all - input_folder: DemoS_006 - dd_folder: DemoS_006-all - dd_files: - - "base" - - "newtechs" - - "syssettings" - - "trade_param" - - "elc_co2_bound" - - "peak_rsv" - - "uc_co2bnd" - - name: DemoS_007-all - input_folder: DemoS_007 - dd_folder: DemoS_007-all - dd_files: - - "base" - - "newtechs" - - "syssettings" - - "trade_param" - - "demproj_dtcar" - - "elasticdem" - - "elc_co2_bound" - - "peak_rsv" - - "refinery" - - "tra_co2_bound" - - "uc_co2bnd" - - "uc_growth" - - name: DemoS_007-all-1r - input_folder: DemoS_007 - inputs: - - "BY_Trans.xlsx" - - "Sets-DemoModels.xlsx" - - "SysSettings.xlsx" - - "SubRES_TMPL/SubRES_NewTechs.xlsx" - - "SubRES_TMPL/SubRES_NewTechs_Trans.xlsx" - - "SuppXLS/Scen_DemProj_DTCAR.xlsx" - - "SuppXLS/Scen_ElasticDem.xlsx" - - "SuppXLS/Scen_ELC_CO2_BOUND.xlsx" - - "SuppXLS/Scen_Peak_RSV.xlsx" - - "SuppXLS/Scen_Refinery.xlsx" - - "SuppXLS/Scen_TRA_CO2_BOUND.xlsx" - - "SuppXLS/Scen_UC_CO2BND.xlsx" - - "SuppXLS/Scen_UC_Growth.xlsx" - - "SuppXLS/Trades/ScenTrade__Trade_Links.xlsx" - - "SuppXLS/Trades/ScenTrade_TRADE_PARAM.xlsx" - - "VT_REG1_PRI_V07.xlsx" - dd_folder: DemoS_007-all-1r - dd_files: - - "base" - - "newtechs" - - "syssettings" - - "trade_param" - - "demproj_dtcar" - - "elasticdem" - - "elc_co2_bound" - - "peak_rsv" - - "refinery" - - "tra_co2_bound" - - "uc_co2bnd" - - "uc_growth" - - name: DemoS_008-all - input_folder: DemoS_008 - dd_folder: DemoS_008-all - dd_files: - - "base" - - "newtechs" - - "syssettings" - - "trade_param" - - "demproj_dtcar" - - "elasticdem" - - "elc_co2_bound" - - "peak_rsv" - - "refinery" - - "tra_co2_bound" - - "uc_co2bnd" - - "uc_growth" - - "uc_nuc_maxcap" - - name: DemoS_009-all - input_folder: DemoS_009 - dd_folder: DemoS_009-all - dd_files: - - "base" - - "new-chp-dh" - - "new-ind" - - "newtechs" - - "syssettings" - - "trade_param" - - "demproj_dtcar" - - "elasticdem" - - "elc_co2_bound" - - "ind_newres" - - "peak_rsv" - - "refinery" - - "tra_co2_bound" - - "uc_co2bnd" - - "uc_dh_minprod" - - "uc_growth" - - "uc_nuc_maxcap" - - name: DemoS_010-all - input_folder: DemoS_010 - dd_folder: DemoS_010-all - dd_files: - - "base" - - "new-chp-dh" - - "new-ind" - - "newtechs" - - "syssettings" - - "dem_ref" - - "trade_param" - - "demproj_dtcar" - - "elasticdem" - - "elc_co2_bound" - - "ind_newres" - - "peak_rsv" - - "refinery" - - "tra_co2_bound" - - "uc_co2bnd" - - "uc_dh_minprod" - - "uc_growth" - - "uc_nuc_maxcap" - - name: DemoS_011-all - input_folder: DemoS_011 - dd_folder: DemoS_011-all - dd_files: - - "base" - - "new-chp-dh" - - "new-ind" - - "newtechs" - - "syssettings" - - "dem_ref" - - "trade_param" - - "bounds-uc_wsets" - - "demproj_dtcar" - - "elasticdem" - - "elc_co2_bound" - - "ind_newres" - - "peak_rsv" - - "refinery" - - "tra_co2_bound" - - "uc_co2bnd" - - "uc_dh_minprod" - - "uc_growth" - - "uc_nuc_maxcap" - - name: DemoS_012-all - input_folder: DemoS_012 - dd_folder: DemoS_012-all - dd_files: - - "base" - - "new-chp-dh" - - "new-ind" - - "newtechs" - - "syssettings" - - "dem_ref" - - "trade_param" - - "bnd_ppfossil" - - "bounds-uc_wsets" - - "co2_tax" - - "demproj_dtcar" - - "elasticdem" - - "elc_co2_bound" - - "ind_newres" - - "nuc_dscinv" - - "peak_rsv" - - "refinery" - - "solar_subsidies" - - "tra_co2_bound" - - "tradsl_tax" - - "uc_co2_regions" - - "uc_co2bnd" - - "uc_dh_minprod" - - "uc_growth" - - "uc_nuc_maxcap" - - name: Ireland - input_folder: Ireland - regions: "IE" - inputs: - - "VT_IE_AGR.xlsx" - - "VT_IE_IND.xlsx" - - "VT_IE_PWR.xlsx" - - "VT_IE_RSD.xlsx" - - "VT_IE_SRV.xlsx" - - "VT_IE_SUP.xlsx" - - "VT_IE_TRA.xlsx" - - "BY_Trans.xlsx" - - "SetRules.xlsx" - - "SuppXLS/Trades/ScenTrade__Trade_Links.xlsx" - - "SubRES_TMPL/SubRES_PWR_DH.xlsx" - - "SubRES_TMPL/SubRES_PWR_DH_Trans.xlsx" - - "SubRES_TMPL/SubRES_PWR_NewTechs.xlsx" - - "SubRES_TMPL/SubRES_PWR_NewTechs_Trans.xlsx" - - "SubRES_TMPL/SubRES_RSD_NewTechs.xlsx" - - "SubRES_TMPL/SubRES_RSD_NewTechs_Trans.xlsx" - - "SubRES_TMPL/SubRES_RSD-Retrofit.xlsx" - - "SubRES_TMPL/SubRES_RSD-Retrofit_Trans.xlsx" - - "SubRES_TMPL/SubRES_SRV_DC_ExcessHeat.xlsx" - - "SubRES_TMPL/SubRES_SRV_DC_ExcessHeat_Trans.xlsx" - - "SubRES_TMPL/SubRES_SRV_NewTechs.xlsx" - - "SubRES_TMPL/SubRES_SRV_NewTechs_Trans.xlsx" - - "SubRES_TMPL/SubRES_SUP_BioRefineries.xlsx" - - "SubRES_TMPL/SubRES_SUP_BioRefineries_Trans.xlsx" - - "SubRES_TMPL/SubRES_SUP_H2NewTechs.xlsx" - - "SubRES_TMPL/SubRES_SUP_H2NewTechs_Trans.xlsx" - - "SubRES_TMPL/SubRES_SYS_OtherNewTechs.xlsx" - - "SubRES_TMPL/SubRES_SYS_OtherNewTechs_Trans.xlsx" - - "SubRES_TMPL/SubRES_TRA_NewVehicles.xlsx" - - "SubRES_TMPL/SubRES_TRA_NewVehicles_Trans.xlsx" - - "SysSettings.xlsx" - - "SuppXLS/Scen_A_SYS_SAD_40TS.xlsx" - - "SuppXLS/Scen_B_SYS_Additional_Assumptions.xlsx" - - "SuppXLS/Scen_B_SYS_Demands.xlsx" - - "SuppXLS/Scen_B_SUP_DomBioPot_Baseline.xlsx" - - "SuppXLS/Scen_B_IND_Emi_Proc.xlsx" - - "SuppXLS/Scen_B_PWR_CCS.xlsx" - - "SuppXLS/Scen_B_SRV_DC_EH.xlsx" - - "SuppXLS/Scen_B_PWR_RNW_Potentials.xlsx" - - "SuppXLS/Scen_B_IND_Emissions.xlsx" - - "SuppXLS/Scen_B_RSD_Retrofit-Ctrl.xlsx" - - "SuppXLS/Scen_B_RSD_UC.xlsx" - - "SuppXLS/Scen_B_SRV_UC.xlsx" - - "SuppXLS/Scen_B_PWR_SNSP_Limit.xlsx" - - "SuppXLS/Scen_B_SYS_Bio_DelivCost.xlsx" - - "SuppXLS/Scen_B_SYS_Historic_Bounds.xlsx" - - "SuppXLS/Scen_B_SYS_MaxGrowthRates.xlsx" - - "SuppXLS/Scen_B_RSD_UnitBoilers.xlsx" - - "SuppXLS/Scen_B_TRA_P_ModalShares.xlsx" - - "SuppXLS/Scen_B_TRA_NewCars_Retirement.xlsx" - - "SuppXLS/Scen_B_TRA_Stock_Retirement.xlsx" - - "SuppXLS/Scen_B_TRA_Emissions.xlsx" - - "SuppXLS/Scen_B_TRA_EV_Parity.xlsx" - - "SuppXLS/Scen_B_TRA_F_ModalShares.xlsx" - dd_folder: Ireland - dd_files: - - "base" - - "pwr_dh" - - "pwr_newtechs" - - "rsd_newtechs" - - "rsd-retrofit" - - "srv_dc_excessheat" - - "srv_newtechs" - - "sup_biorefineries" - - "sup_h2newtechs" - - "sys_othernewtechs" - - "tra_newvehicles" - - "syssettings" - - "a_sys_sad_40ts" - - "b_sys_additional_assumptions" - - "b_sys_demands" - - "b_sup_dombiopot_baseline" - - "b_ind_emi_proc" - - "b_pwr_ccs" - - "b_srv_dc_eh" - - "b_pwr_rnw_potentials" - - "b_ind_emissions" - - "b_rsd_retrofit-ctrl" - - "b_rsd_uc" - - "b_srv_uc" - - "b_pwr_snsp_limit" - - "b_sys_bio_delivcost" - - "b_sys_historic_bounds" - - "b_sys_maxgrowthrates" - - "b_rsd_unitboilers" - - "b_tra_p_modalshares" - - "b_tra_newcars_retirement" - - "b_tra_stock_retirement" - - "b_tra_emissions" - - "b_tra_ev_parity" - - "b_tra_f_modalshares" diff --git a/xl2times/__main__.py b/xl2times/__main__.py index 63e0ede..c0d2c70 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -40,7 +40,7 @@ def _read_xlsx_cached(filename: str) -> List[datatypes.EmbeddedXlTable]: # the filename is the same: # TODO check modified timestamp also matches if filename == fname1: - logger.info(f"Using cached data for {filename}") + logger.info(f"Using cached data for {filename}: {cache_dir + hsh}") return tables # Write extracted data to cache: tables = excel.extract_tables(filename) From d7523a848928ba458394a8d80a82243957312c1a Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Mon, 26 Feb 2024 17:37:08 +0200 Subject: [PATCH 10/13] Debug --- xl2times/__main__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xl2times/__main__.py b/xl2times/__main__.py index c0d2c70..8a9988b 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -45,6 +45,7 @@ def _read_xlsx_cached(filename: str) -> List[datatypes.EmbeddedXlTable]: # Write extracted data to cache: tables = excel.extract_tables(filename) pickle.dump((filename, "TODO ModifiedTime", tables), open(cache_dir + hsh, "wb")) + logger.info(f"Wrote cache for {filename}: {cache_dir + hsh}") return excel.extract_tables(filename) From baeb69968e6a92844f328f81c0f7be772d3e10ed Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Mon, 26 Feb 2024 17:41:21 +0200 Subject: [PATCH 11/13] Revert "Debug why cache save is failing" This reverts commit fef73f01c36558464e907744ada6f020a874053a. --- .github/workflows/ci.yml | 13 ++ benchmarks.yml | 335 +++++++++++++++++++++++++++++++++++++++ xl2times/__main__.py | 2 +- 3 files changed, 349 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index af2f519..85f9728 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,6 +40,19 @@ jobs: python -m pip install --upgrade pip python -m pip install -e .[dev] + - name: Check code formatting + working-directory: xl2times + run: | + source .venv/bin/activate + pre-commit install + pre-commit run --all-files + + - name: Run unit tests + working-directory: xl2times + run: | + source .venv/bin/activate + pytest + # ---------- Prepare ETSAP Demo models - uses: actions/checkout@v3 diff --git a/benchmarks.yml b/benchmarks.yml index f943de2..dcbb989 100644 --- a/benchmarks.yml +++ b/benchmarks.yml @@ -13,3 +13,338 @@ benchmarks: dd_files: - "base" - "syssettings" + - name: DemoS_002-all + input_folder: DemoS_002 + dd_folder: DemoS_002-all + dd_files: + - "base" + - "syssettings" + - name: DemoS_003-all + input_folder: DemoS_003 + dd_folder: DemoS_003-all + dd_files: + - "base" + - "syssettings" + - name: DemoS_004 + input_folder: DemoS_004 + inputs: + - "BY_Trans.xlsx" + - "Sets-DemoModels.xlsx" + - "SysSettings.xlsx" + - "VT_REG_PRI_V04.xlsx" + dd_folder: DemoS_004 + dd_files: + - "base" + - "syssettings" + - name: DemoS_004a + input_folder: DemoS_004 + inputs: + - "BY_Trans.xlsx" + - "Sets-DemoModels.xlsx" + - "SysSettings.xlsx" + - "SuppXLS/Scen_Peak_RSV.xlsx" + - "VT_REG_PRI_V04.xlsx" + dd_folder: DemoS_004a + dd_files: + - "base" + - "syssettings" + - "peak_rsv" + - name: DemoS_004b + input_folder: DemoS_004 + inputs: + - "BY_Trans.xlsx" + - "Sets-DemoModels.xlsx" + - "SysSettings.xlsx" + - "SuppXLS/Scen_Peak_RSV-FLX.xlsx" + - "VT_REG_PRI_V04.xlsx" + dd_folder: DemoS_004b + dd_files: + - "base" + - "syssettings" + - "peak_rsv-flx" + - name: DemoS_004-all + input_folder: DemoS_004 + dd_folder: DemoS_004-all + dd_files: + - "base" + - "syssettings" + - "peak_rsv" + - "peak_rsv-flx" + - name: DemoS_005-all + input_folder: DemoS_005 + dd_folder: DemoS_005-all + dd_files: + - "base" + - "syssettings" + - "trade_param" + - "co2_tax" + - "elc_co2_bound" + - "peak_rsv" + - "uc_co2bnd" + - name: DemoS_006-all + input_folder: DemoS_006 + dd_folder: DemoS_006-all + dd_files: + - "base" + - "newtechs" + - "syssettings" + - "trade_param" + - "elc_co2_bound" + - "peak_rsv" + - "uc_co2bnd" + - name: DemoS_007-all + input_folder: DemoS_007 + dd_folder: DemoS_007-all + dd_files: + - "base" + - "newtechs" + - "syssettings" + - "trade_param" + - "demproj_dtcar" + - "elasticdem" + - "elc_co2_bound" + - "peak_rsv" + - "refinery" + - "tra_co2_bound" + - "uc_co2bnd" + - "uc_growth" + - name: DemoS_007-all-1r + input_folder: DemoS_007 + inputs: + - "BY_Trans.xlsx" + - "Sets-DemoModels.xlsx" + - "SysSettings.xlsx" + - "SubRES_TMPL/SubRES_NewTechs.xlsx" + - "SubRES_TMPL/SubRES_NewTechs_Trans.xlsx" + - "SuppXLS/Scen_DemProj_DTCAR.xlsx" + - "SuppXLS/Scen_ElasticDem.xlsx" + - "SuppXLS/Scen_ELC_CO2_BOUND.xlsx" + - "SuppXLS/Scen_Peak_RSV.xlsx" + - "SuppXLS/Scen_Refinery.xlsx" + - "SuppXLS/Scen_TRA_CO2_BOUND.xlsx" + - "SuppXLS/Scen_UC_CO2BND.xlsx" + - "SuppXLS/Scen_UC_Growth.xlsx" + - "SuppXLS/Trades/ScenTrade__Trade_Links.xlsx" + - "SuppXLS/Trades/ScenTrade_TRADE_PARAM.xlsx" + - "VT_REG1_PRI_V07.xlsx" + dd_folder: DemoS_007-all-1r + dd_files: + - "base" + - "newtechs" + - "syssettings" + - "trade_param" + - "demproj_dtcar" + - "elasticdem" + - "elc_co2_bound" + - "peak_rsv" + - "refinery" + - "tra_co2_bound" + - "uc_co2bnd" + - "uc_growth" + - name: DemoS_008-all + input_folder: DemoS_008 + dd_folder: DemoS_008-all + dd_files: + - "base" + - "newtechs" + - "syssettings" + - "trade_param" + - "demproj_dtcar" + - "elasticdem" + - "elc_co2_bound" + - "peak_rsv" + - "refinery" + - "tra_co2_bound" + - "uc_co2bnd" + - "uc_growth" + - "uc_nuc_maxcap" + - name: DemoS_009-all + input_folder: DemoS_009 + dd_folder: DemoS_009-all + dd_files: + - "base" + - "new-chp-dh" + - "new-ind" + - "newtechs" + - "syssettings" + - "trade_param" + - "demproj_dtcar" + - "elasticdem" + - "elc_co2_bound" + - "ind_newres" + - "peak_rsv" + - "refinery" + - "tra_co2_bound" + - "uc_co2bnd" + - "uc_dh_minprod" + - "uc_growth" + - "uc_nuc_maxcap" + - name: DemoS_010-all + input_folder: DemoS_010 + dd_folder: DemoS_010-all + dd_files: + - "base" + - "new-chp-dh" + - "new-ind" + - "newtechs" + - "syssettings" + - "dem_ref" + - "trade_param" + - "demproj_dtcar" + - "elasticdem" + - "elc_co2_bound" + - "ind_newres" + - "peak_rsv" + - "refinery" + - "tra_co2_bound" + - "uc_co2bnd" + - "uc_dh_minprod" + - "uc_growth" + - "uc_nuc_maxcap" + - name: DemoS_011-all + input_folder: DemoS_011 + dd_folder: DemoS_011-all + dd_files: + - "base" + - "new-chp-dh" + - "new-ind" + - "newtechs" + - "syssettings" + - "dem_ref" + - "trade_param" + - "bounds-uc_wsets" + - "demproj_dtcar" + - "elasticdem" + - "elc_co2_bound" + - "ind_newres" + - "peak_rsv" + - "refinery" + - "tra_co2_bound" + - "uc_co2bnd" + - "uc_dh_minprod" + - "uc_growth" + - "uc_nuc_maxcap" + - name: DemoS_012-all + input_folder: DemoS_012 + dd_folder: DemoS_012-all + dd_files: + - "base" + - "new-chp-dh" + - "new-ind" + - "newtechs" + - "syssettings" + - "dem_ref" + - "trade_param" + - "bnd_ppfossil" + - "bounds-uc_wsets" + - "co2_tax" + - "demproj_dtcar" + - "elasticdem" + - "elc_co2_bound" + - "ind_newres" + - "nuc_dscinv" + - "peak_rsv" + - "refinery" + - "solar_subsidies" + - "tra_co2_bound" + - "tradsl_tax" + - "uc_co2_regions" + - "uc_co2bnd" + - "uc_dh_minprod" + - "uc_growth" + - "uc_nuc_maxcap" + - name: Ireland + input_folder: Ireland + regions: "IE" + inputs: + - "VT_IE_AGR.xlsx" + - "VT_IE_IND.xlsx" + - "VT_IE_PWR.xlsx" + - "VT_IE_RSD.xlsx" + - "VT_IE_SRV.xlsx" + - "VT_IE_SUP.xlsx" + - "VT_IE_TRA.xlsx" + - "BY_Trans.xlsx" + - "SetRules.xlsx" + - "SuppXLS/Trades/ScenTrade__Trade_Links.xlsx" + - "SubRES_TMPL/SubRES_PWR_DH.xlsx" + - "SubRES_TMPL/SubRES_PWR_DH_Trans.xlsx" + - "SubRES_TMPL/SubRES_PWR_NewTechs.xlsx" + - "SubRES_TMPL/SubRES_PWR_NewTechs_Trans.xlsx" + - "SubRES_TMPL/SubRES_RSD_NewTechs.xlsx" + - "SubRES_TMPL/SubRES_RSD_NewTechs_Trans.xlsx" + - "SubRES_TMPL/SubRES_RSD-Retrofit.xlsx" + - "SubRES_TMPL/SubRES_RSD-Retrofit_Trans.xlsx" + - "SubRES_TMPL/SubRES_SRV_DC_ExcessHeat.xlsx" + - "SubRES_TMPL/SubRES_SRV_DC_ExcessHeat_Trans.xlsx" + - "SubRES_TMPL/SubRES_SRV_NewTechs.xlsx" + - "SubRES_TMPL/SubRES_SRV_NewTechs_Trans.xlsx" + - "SubRES_TMPL/SubRES_SUP_BioRefineries.xlsx" + - "SubRES_TMPL/SubRES_SUP_BioRefineries_Trans.xlsx" + - "SubRES_TMPL/SubRES_SUP_H2NewTechs.xlsx" + - "SubRES_TMPL/SubRES_SUP_H2NewTechs_Trans.xlsx" + - "SubRES_TMPL/SubRES_SYS_OtherNewTechs.xlsx" + - "SubRES_TMPL/SubRES_SYS_OtherNewTechs_Trans.xlsx" + - "SubRES_TMPL/SubRES_TRA_NewVehicles.xlsx" + - "SubRES_TMPL/SubRES_TRA_NewVehicles_Trans.xlsx" + - "SysSettings.xlsx" + - "SuppXLS/Scen_A_SYS_SAD_40TS.xlsx" + - "SuppXLS/Scen_B_SYS_Additional_Assumptions.xlsx" + - "SuppXLS/Scen_B_SYS_Demands.xlsx" + - "SuppXLS/Scen_B_SUP_DomBioPot_Baseline.xlsx" + - "SuppXLS/Scen_B_IND_Emi_Proc.xlsx" + - "SuppXLS/Scen_B_PWR_CCS.xlsx" + - "SuppXLS/Scen_B_SRV_DC_EH.xlsx" + - "SuppXLS/Scen_B_PWR_RNW_Potentials.xlsx" + - "SuppXLS/Scen_B_IND_Emissions.xlsx" + - "SuppXLS/Scen_B_RSD_Retrofit-Ctrl.xlsx" + - "SuppXLS/Scen_B_RSD_UC.xlsx" + - "SuppXLS/Scen_B_SRV_UC.xlsx" + - "SuppXLS/Scen_B_PWR_SNSP_Limit.xlsx" + - "SuppXLS/Scen_B_SYS_Bio_DelivCost.xlsx" + - "SuppXLS/Scen_B_SYS_Historic_Bounds.xlsx" + - "SuppXLS/Scen_B_SYS_MaxGrowthRates.xlsx" + - "SuppXLS/Scen_B_RSD_UnitBoilers.xlsx" + - "SuppXLS/Scen_B_TRA_P_ModalShares.xlsx" + - "SuppXLS/Scen_B_TRA_NewCars_Retirement.xlsx" + - "SuppXLS/Scen_B_TRA_Stock_Retirement.xlsx" + - "SuppXLS/Scen_B_TRA_Emissions.xlsx" + - "SuppXLS/Scen_B_TRA_EV_Parity.xlsx" + - "SuppXLS/Scen_B_TRA_F_ModalShares.xlsx" + dd_folder: Ireland + dd_files: + - "base" + - "pwr_dh" + - "pwr_newtechs" + - "rsd_newtechs" + - "rsd-retrofit" + - "srv_dc_excessheat" + - "srv_newtechs" + - "sup_biorefineries" + - "sup_h2newtechs" + - "sys_othernewtechs" + - "tra_newvehicles" + - "syssettings" + - "a_sys_sad_40ts" + - "b_sys_additional_assumptions" + - "b_sys_demands" + - "b_sup_dombiopot_baseline" + - "b_ind_emi_proc" + - "b_pwr_ccs" + - "b_srv_dc_eh" + - "b_pwr_rnw_potentials" + - "b_ind_emissions" + - "b_rsd_retrofit-ctrl" + - "b_rsd_uc" + - "b_srv_uc" + - "b_pwr_snsp_limit" + - "b_sys_bio_delivcost" + - "b_sys_historic_bounds" + - "b_sys_maxgrowthrates" + - "b_rsd_unitboilers" + - "b_tra_p_modalshares" + - "b_tra_newcars_retirement" + - "b_tra_stock_retirement" + - "b_tra_emissions" + - "b_tra_ev_parity" + - "b_tra_f_modalshares" diff --git a/xl2times/__main__.py b/xl2times/__main__.py index 8a9988b..0e4230b 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -40,7 +40,7 @@ def _read_xlsx_cached(filename: str) -> List[datatypes.EmbeddedXlTable]: # the filename is the same: # TODO check modified timestamp also matches if filename == fname1: - logger.info(f"Using cached data for {filename}: {cache_dir + hsh}") + logger.info(f"Using cached data for {filename}") return tables # Write extracted data to cache: tables = excel.extract_tables(filename) From 0479a1de1f56ec5ef67dba8be67345e0b7764342 Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Mon, 26 Feb 2024 17:42:31 +0200 Subject: [PATCH 12/13] Better logs --- xl2times/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xl2times/__main__.py b/xl2times/__main__.py index 0e4230b..ae2062b 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -40,12 +40,12 @@ def _read_xlsx_cached(filename: str) -> List[datatypes.EmbeddedXlTable]: # the filename is the same: # TODO check modified timestamp also matches if filename == fname1: - logger.info(f"Using cached data for {filename}") + logger.info(f"Using cached data for {filename} from {cache_dir + hsh}") return tables # Write extracted data to cache: tables = excel.extract_tables(filename) pickle.dump((filename, "TODO ModifiedTime", tables), open(cache_dir + hsh, "wb")) - logger.info(f"Wrote cache for {filename}: {cache_dir + hsh}") + logger.info(f"Saved cache for {filename} to {cache_dir + hsh}") return excel.extract_tables(filename) From 6300ffde8e1d04035f90c8dd9cad62f490a54839 Mon Sep 17 00:00:00 2001 From: Siddharth Krishna Date: Mon, 26 Feb 2024 17:43:18 +0200 Subject: [PATCH 13/13] Fix cache dir (this time I'm serious) --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 85f9728..1c858a0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -115,7 +115,7 @@ jobs: id: cache uses: actions/cache/restore@v4 with: - path: ${{ github.workspace }}/xl2times/.cache + path: ${{ github.workspace }}/xl2times/xl2times/.cache # Cache key is refs of the input xlsx repos, since that's what is cached key: ${{ runner.os }}-py-${{ env.PY_VERSION }}-${{ env.REF_demos-xlsx }}-${{ env.REF_tim }} # If we can't find the exact key for the TIM repo, still use the cache if the demos repo ref matches @@ -166,5 +166,5 @@ jobs: # Save the cache even if the regression tests fail if: always() && !steps.cache-restore.outputs.cache-hit with: - path: ${{ github.workspace }}/xl2times/.cache + path: ${{ github.workspace }}/xl2times/xl2times/.cache key: ${{ runner.os }}-py-${{ env.PY_VERSION }}-${{ env.REF_demos-xlsx }}-${{ env.REF_tim }}