From ff268dc5689435cba14f7289ec0335e546f5bc56 Mon Sep 17 00:00:00 2001
From: Sam West <sam.west@csiro.au>
Date: Thu, 22 Feb 2024 13:22:35 +1100
Subject: [PATCH] addressed review comments from @olejandro organised imports,
 appeased linters

---
 tests/test_transforms.py |  6 +-----
 utils/run_benchmarks.py  |  5 ++---
 xl2times/__main__.py     |  6 +++---
 xl2times/excel.py        |  1 -
 xl2times/transforms.py   | 21 ++++-----------------
 5 files changed, 10 insertions(+), 29 deletions(-)

diff --git a/tests/test_transforms.py b/tests/test_transforms.py
index 2810f1b..694c31f 100644
--- a/tests/test_transforms.py
+++ b/tests/test_transforms.py
@@ -1,19 +1,14 @@
 from datetime import datetime
-from typing import Callable
 
 import pandas as pd
-from loguru import logger
 
 from xl2times import transforms, utils, datatypes
 from xl2times.transforms import (
     _process_comm_groups_vectorised,
     _count_comm_group_vectorised,
-    intersect,
     expand_rows,
     get_matching_commodities,
-    filter_by_pattern,
     get_matching_processes,
-    query_columns,
     _match_uc_wildcards,
     process_map,
     commodity_map,
@@ -56,6 +51,7 @@ def make_str(df):
         lambda row: make_str(get_matching_commodities(row, dictionary)), axis=1
     )
 
+    query_columns = transforms.process_map.keys() | transforms.commodity_map.keys()
     cols_to_drop = [col for col in df.columns if col in query_columns]
 
     df = expand_rows(
diff --git a/utils/run_benchmarks.py b/utils/run_benchmarks.py
index 6548bf1..d6ae4e6 100644
--- a/utils/run_benchmarks.py
+++ b/utils/run_benchmarks.py
@@ -14,7 +14,6 @@
 import git
 import pandas as pd
 import yaml
-from loguru import logger
 from tabulate import tabulate
 
 from dd_to_csv import main
@@ -452,11 +451,11 @@ def run_all_benchmarks(
     benchmarks_folder = spec["benchmarks_folder"]
     benchmark_names = [b["name"] for b in spec["benchmarks"]]
     if len(set(benchmark_names)) != len(benchmark_names):
-        logger.error(f"Found duplicate name in benchmarks YAML file")
+        logger.error("Found duplicate name in benchmarks YAML file")
         sys.exit(11)
 
     if args.dd and args.times_dir is None:
-        logger.error(f"--times_dir is required when using --dd")
+        logger.error("--times_dir is required when using --dd")
         sys.exit(12)
 
     if args.run is not None:
diff --git a/xl2times/__main__.py b/xl2times/__main__.py
index 97b6d1a..056d425 100644
--- a/xl2times/__main__.py
+++ b/xl2times/__main__.py
@@ -29,7 +29,7 @@ def convert_xl_to_times(
     pickle_file = "raw_tables.pkl"
     if use_pkl and os.path.isfile(pickle_file):
         raw_tables = pickle.load(open(pickle_file, "rb"))
-        logger.warning(f"Using pickled data not xlsx")
+        logger.warning("Using pickled data not xlsx")
     else:
         raw_tables = []
 
@@ -248,7 +248,7 @@ def produce_times_tables(
     result = {}
     used_tables = set()
     for mapping in config.times_xl_maps:
-        if not mapping.xl_name in input:
+        if mapping.xl_name not in input:
             logger.warning(
                 f"Cannot produce table {mapping.times_name} because"
                 f" {mapping.xl_name} does not exist"
@@ -281,7 +281,7 @@ def produce_times_tables(
                 # Excel columns can be duplicated into multiple Times columns
                 for times_col, xl_col in mapping.col_map.items():
                     df[times_col] = df[xl_col]
-                cols_to_drop = [x for x in df.columns if not x in mapping.times_cols]
+                cols_to_drop = [x for x in df.columns if x not in mapping.times_cols]
                 df.drop(columns=cols_to_drop, inplace=True)
                 df.drop_duplicates(inplace=True)
                 df.reset_index(drop=True, inplace=True)
diff --git a/xl2times/excel.py b/xl2times/excel.py
index b9c4597..102d88a 100644
--- a/xl2times/excel.py
+++ b/xl2times/excel.py
@@ -4,7 +4,6 @@
 from typing import Dict, List
 import time
 from pandas.core.frame import DataFrame
-import pandas as pd
 import numpy
 import re
 from . import datatypes
diff --git a/xl2times/transforms.py b/xl2times/transforms.py
index 474221f..d0892ba 100644
--- a/xl2times/transforms.py
+++ b/xl2times/transforms.py
@@ -19,17 +19,6 @@
 from . import utils
 from .utils import max_workers
 
-query_columns = {
-    "pset_set",
-    "pset_pn",
-    "pset_pd",
-    "pset_ci",
-    "pset_co",
-    "cset_set",
-    "cset_cn",
-    "cset_cd",
-}
-
 csets_ordered_for_pcg = ["DEM", "MAT", "NRG", "ENV", "FIN"]
 default_pcg_suffixes = [
     cset + io for cset in csets_ordered_for_pcg for io in ["I", "O"]
@@ -584,8 +573,7 @@ def process_user_constraint_table(
         # TODO: apply table.uc_sets
 
         # Fill in UC_N blank cells with value from above
-        if "uc_n" in df.columns:
-            df["uc_n"] = df["uc_n"].ffill()
+        df["uc_n"] = df["uc_n"].ffill()
 
         data_columns = [
             x for x in df.columns if x not in config.known_columns[datatypes.Tag.uc_t]
@@ -2144,10 +2132,9 @@ def process_uc_wildcards(
 ) -> Dict[str, DataFrame]:
     tag = datatypes.Tag.uc_t
 
-    if tag in tqdm(tables, desc=f"Processing uc_wildcards on tables"):
+    if tag in tqdm(tables, desc="Processing uc_wildcards on tables"):
         start_time = time.time()
         df = tables[tag]
-
         dictionary = generate_topology_dictionary(tables, model)
 
         df = _match_uc_wildcards(
@@ -2241,7 +2228,7 @@ def match_wildcards(
             matching_commodities is None or len(matching_commodities) == 0
         ):  # TODO is this necessary? Try without?
             # TODO debug these
-            logger.warning(f"a row matched no processes or commodities")
+            logger.warning("a row matched no processes or commodities")
             return None
         return matching_processes, matching_commodities
 
@@ -2343,7 +2330,7 @@ def eval_and_update(
         ):
             match = match_wildcards(row)
             if match is None:
-                logger.warning(f"TFM_INS-TXT row matched neither commodity nor process")
+                logger.warning("TFM_INS-TXT row matched neither commodity nor process")
                 continue
             processes, commodities = match
             if commodities is not None: