etsap-TIMES · SamRWest · Mar 13, 2024 · Mar 12, 2024 · Mar 12, 2024 · Mar 12, 2024
diff --git a/.gitignore b/.gitignore
@@ -24,3 +24,4 @@ docs/api/
 *.log
 /profile.*
 xl2times/.cache/
+*.log.zip
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -5,11 +5,20 @@ repos:
       - id: check-yaml
       - id: end-of-file-fixer
       - id: trailing-whitespace
+
   - repo: https://github.com/psf/black
     rev: 22.8.0
     hooks:
       - id: black
+
   - repo: https://github.com/RobertCraigie/pyright-python
     rev: v1.1.304
     hooks:
       - id: pyright
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.3.2
+    hooks:
+      - id: ruff
+        types_or: [ python, pyi, jupyter ]
+        args: [ --fix, --exit-non-zero-on-fix ]
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,7 +36,8 @@ dev = [
     "tabulate",
     "pytest",
     "pytest-cov",
-    "poethepoet"
+    "poethepoet",
+    "ruff"
 ]
 
 [project.urls]
@@ -61,3 +62,37 @@ benchmark = { cmd = "python utils/run_benchmarks.py benchmarks.yml --run", help
 benchmark_all = { shell = "python utils/run_benchmarks.py benchmarks.yml --verbose | tee out.txt", help = "Run the project", interpreter = "posix" }
 lint = { shell = "git add .pre-commit-config.yaml & pre-commit run", help = "Run pre-commit hooks", interpreter = "posix" }
 test = { cmd = "pytest --cov-report term --cov-report html --cov=xl2times --cov=utils", help = "Run unit tests with pytest" }
+
+
+# Config for various pre-commit checks are below
+# Ruff linting rules - see https://github.com/charliermarsh/ruff and https://beta.ruff.rs/docs/rules/
+[tool.ruff]
+target-version = "py311"
+line-length = 88
+
+# Option 1: use basic rules only.
+lint.select = [
+    "E", # pycodestyle errors
+    "W", # pycodestyle warnings
+    "F", # pyflakes
+    "UP", # pyupgrade
+    "N", # pep8 naming
+    "I", # isort
+    "TID", # tidy imports
+    "UP", # pyupgrade
+    "NPY", # numpy style
+    "PL", # pylint
+#    "PD", # pandas style # TODO enable later
+#    "C90", # code complexity # TODO enable later
+]
+
+# Add specific rule codes/groups here to ignore them, or add a '#noqa' comment to the line of code to skip all checks.
+lint.ignore = [
+    "PLR", # complexity rules
+    "PD901", "PD011", # pandas 'df''
+    "E501", # line too long, handled by black
+]
+
+# Ruff rule-specific options:
+[tool.ruff.mccabe]
+max-complexity = 12 # increase max function 'complexity'
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
@@ -2,16 +2,16 @@
 
 import pandas as pd
 
-from xl2times import transforms, utils, datatypes
+from xl2times import datatypes, transforms, utils
 from xl2times.transforms import (
-    _process_comm_groups_vectorised,
     _count_comm_group_vectorised,
+    _match_wildcards,
+    _process_comm_groups_vectorised,
+    commodity_map,
     expand_rows,
     get_matching_commodities,
     get_matching_processes,
-    _match_wildcards,
     process_map,
-    commodity_map,
 )
 
 logger = utils.get_logger()

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -1,6 +1,7 @@
-from xl2times import utils
 import pandas as pd
 
+from xl2times import utils
+
 
 class TestUtils:
     def test_explode(self):

diff --git a/utils/dd_to_csv.py b/utils/dd_to_csv.py
@@ -1,10 +1,9 @@
 import argparse
-import sys
-from collections import defaultdict
 import json
 import os
+import sys
+from collections import defaultdict
 from pathlib import Path
-from typing import Dict, List, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -13,7 +12,7 @@
 
 def parse_parameter_values_from_file(
     path: Path,
-) -> Tuple[Dict[str, List], Dict[str, set]]:
+) -> tuple[dict[str, list], dict[str, set]]:
     """
     Parse *.dd to turn it into CSV format
     There are parameters and sets, and each has a slightly different format
@@ -35,11 +34,11 @@ def parse_parameter_values_from_file(
 
     """
 
-    data = list(open(path, "r"))
+    data = list(open(path))
     data = [line.rstrip() for line in data]
 
-    param_value_dict: Dict[str, List] = dict()
-    set_data_dict: Dict[str, set] = dict()
+    param_value_dict: dict[str, list] = dict()
+    set_data_dict: dict[str, set] = dict()
     index = 0
     while index < len(data):
         if data[index].startswith("PARAMETER"):
@@ -124,8 +123,8 @@ def parse_parameter_values_from_file(
 
 
 def save_data_with_headers(
-    param_data_dict: Dict[str, Union[pd.DataFrame, List[str]]],
-    headers_data: Dict[str, List[str]],
+    param_data_dict: dict[str, pd.DataFrame | list[str]],
+    headers_data: dict[str, list[str]],
     save_dir: str,
 ) -> None:
     """
@@ -157,7 +156,7 @@ def save_data_with_headers(
     return
 
 
-def generate_headers_by_attr() -> Dict[str, List[str]]:
+def generate_headers_by_attr() -> dict[str, list[str]]:
     with open("xl2times/config/times-info.json") as f:
         attributes = json.load(f)
 
@@ -173,7 +172,7 @@ def generate_headers_by_attr() -> Dict[str, List[str]]:
 
 
 def convert_dd_to_tabular(
-    basedir: str, output_dir: str, headers_by_attr: Dict[str, List[str]]
+    basedir: str, output_dir: str, headers_by_attr: dict[str, list[str]]
 ) -> None:
     dd_files = [p for p in Path(basedir).rglob("*.dd")]
 
@@ -201,15 +200,15 @@ def convert_dd_to_tabular(
     os.makedirs(set_path, exist_ok=True)
 
     # Extract headers with key=param_name and value=List[attributes]
-    lines = list(open("xl2times/config/times_mapping.txt", "r"))
+    lines = list(open("xl2times/config/times_mapping.txt"))
     headers_data = headers_by_attr
     # The following will overwrite data obtained from headers_by_attr
     # TODO: Remove once migration is done?
     for line in lines:
-        line = line.strip()
-        if line != "":
-            param_name = line.split("[")[0]
-            attributes = line.split("[")[1].split("]")[0].split(",")
+        ln = line.strip()
+        if ln != "":
+            param_name = ln.split("[")[0]
+            attributes = ln.split("[")[1].split("]")[0].split(",")
             headers_data[param_name] = [*attributes]
 
     save_data_with_headers(all_parameters, headers_data, param_path)

diff --git a/utils/run_benchmarks.py b/utils/run_benchmarks.py
@@ -9,22 +9,22 @@
 from concurrent.futures import ProcessPoolExecutor
 from functools import partial
 from os import path, symlink
-from typing import Any, Tuple
+from typing import Any
 
 import git
 import pandas as pd
 import yaml
+from dd_to_csv import main
 from tabulate import tabulate
 
-from dd_to_csv import main
 from xl2times import utils
 from xl2times.__main__ import parse_args, run
 from xl2times.utils import max_workers
 
 logger = utils.get_logger()
 
 
-def parse_result(output: str) -> Tuple[float, int, int]:
+def parse_result(output: str) -> tuple[float, int, int]:
     # find pattern in multiline string
     m = re.findall(
         r"(\d+\.\d)% of ground truth rows present in output \((\d+)/(\d+)\), (\d+) additional rows",
@@ -65,6 +65,7 @@ def run_gams_gdxdiff(
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
         text=True,
+        check=False,
     )
     if res.returncode != 0:
         logger.info(res.stdout)
@@ -96,6 +97,7 @@ def run_gams_gdxdiff(
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
         text=True,
+        check=False,
     )
     if res.returncode != 0:
         logger.info(res.stdout)
@@ -119,6 +121,7 @@ def run_gams_gdxdiff(
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
         text=True,
+        check=False,
     )
     if verbose:
         logger.info(res.stdout)
@@ -138,7 +141,7 @@ def run_benchmark(
     out_folder: str = "out",
     verbose: bool = False,
     debug: bool = False,
-) -> Tuple[str, float, str, float, int, int]:
+) -> tuple[str, float, str, float, int, int]:
     xl_folder = path.join(benchmarks_folder, "xlsx", benchmark["input_folder"])
     dd_folder = path.join(benchmarks_folder, "dd", benchmark["dd_folder"])
     csv_folder = path.join(benchmarks_folder, "csv", benchmark["name"])
@@ -160,6 +163,7 @@ def run_benchmark(
                 stderr=subprocess.STDOUT,
                 text=True,
                 shell=True if os.name == "nt" else False,
+                check=False,
             )
             if res.returncode != 0:
                 # Remove partial outputs
@@ -191,7 +195,7 @@ def run_benchmark(
     if "regions" in benchmark:
         args.extend(["--regions", benchmark["regions"]])
     if "inputs" in benchmark:
-        args.extend((path.join(xl_folder, b) for b in benchmark["inputs"]))
+        args.extend(path.join(xl_folder, b) for b in benchmark["inputs"])
     else:
         args.append(xl_folder)
     start = time.time()
@@ -203,6 +207,7 @@ def run_benchmark(
             stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT,
             text=True,
+            check=False,
         )
     else:
         # If debug option is set, run as a function call to allow stepping with a debugger.
@@ -295,7 +300,6 @@ def run_all_benchmarks(
         for benchmark in benchmarks:
             with open(
                 path.join(benchmarks_folder, "out-main", benchmark["name"], "stdout"),
-                "r",
             ) as f:
                 result = parse_result(f.readlines()[-1])
             # Use a fake runtime and GAMS result
@@ -330,7 +334,8 @@ def run_all_benchmarks(
             results_main = list(executor.map(run_a_benchmark, benchmarks))
 
     # Print table with combined results to make comparison easier
-    trunc = lambda s: s[:10] + "\u2026" if len(s) > 10 else s
+    trunc = lambda s: s[:10] + "\u2026" if len(s) > 10 else s  # noqa
+
     combined_results = [
         (
             f"{b:<20}",