From ff268dc5689435cba14f7289ec0335e546f5bc56 Mon Sep 17 00:00:00 2001 From: Sam West Date: Thu, 22 Feb 2024 13:22:35 +1100 Subject: [PATCH] addressed review comments from @olejandro organised imports, appeased linters --- tests/test_transforms.py | 6 +----- utils/run_benchmarks.py | 5 ++--- xl2times/__main__.py | 6 +++--- xl2times/excel.py | 1 - xl2times/transforms.py | 21 ++++----------------- 5 files changed, 10 insertions(+), 29 deletions(-) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 2810f1b..694c31f 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -1,19 +1,14 @@ from datetime import datetime -from typing import Callable import pandas as pd -from loguru import logger from xl2times import transforms, utils, datatypes from xl2times.transforms import ( _process_comm_groups_vectorised, _count_comm_group_vectorised, - intersect, expand_rows, get_matching_commodities, - filter_by_pattern, get_matching_processes, - query_columns, _match_uc_wildcards, process_map, commodity_map, @@ -56,6 +51,7 @@ def make_str(df): lambda row: make_str(get_matching_commodities(row, dictionary)), axis=1 ) + query_columns = transforms.process_map.keys() | transforms.commodity_map.keys() cols_to_drop = [col for col in df.columns if col in query_columns] df = expand_rows( diff --git a/utils/run_benchmarks.py b/utils/run_benchmarks.py index 6548bf1..d6ae4e6 100644 --- a/utils/run_benchmarks.py +++ b/utils/run_benchmarks.py @@ -14,7 +14,6 @@ import git import pandas as pd import yaml -from loguru import logger from tabulate import tabulate from dd_to_csv import main @@ -452,11 +451,11 @@ def run_all_benchmarks( benchmarks_folder = spec["benchmarks_folder"] benchmark_names = [b["name"] for b in spec["benchmarks"]] if len(set(benchmark_names)) != len(benchmark_names): - logger.error(f"Found duplicate name in benchmarks YAML file") + logger.error("Found duplicate name in benchmarks YAML file") sys.exit(11) if args.dd and args.times_dir is None: - logger.error(f"--times_dir is required when using --dd") + logger.error("--times_dir is required when using --dd") sys.exit(12) if args.run is not None: diff --git a/xl2times/__main__.py b/xl2times/__main__.py index 97b6d1a..056d425 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -29,7 +29,7 @@ def convert_xl_to_times( pickle_file = "raw_tables.pkl" if use_pkl and os.path.isfile(pickle_file): raw_tables = pickle.load(open(pickle_file, "rb")) - logger.warning(f"Using pickled data not xlsx") + logger.warning("Using pickled data not xlsx") else: raw_tables = [] @@ -248,7 +248,7 @@ def produce_times_tables( result = {} used_tables = set() for mapping in config.times_xl_maps: - if not mapping.xl_name in input: + if mapping.xl_name not in input: logger.warning( f"Cannot produce table {mapping.times_name} because" f" {mapping.xl_name} does not exist" @@ -281,7 +281,7 @@ def produce_times_tables( # Excel columns can be duplicated into multiple Times columns for times_col, xl_col in mapping.col_map.items(): df[times_col] = df[xl_col] - cols_to_drop = [x for x in df.columns if not x in mapping.times_cols] + cols_to_drop = [x for x in df.columns if x not in mapping.times_cols] df.drop(columns=cols_to_drop, inplace=True) df.drop_duplicates(inplace=True) df.reset_index(drop=True, inplace=True) diff --git a/xl2times/excel.py b/xl2times/excel.py index b9c4597..102d88a 100644 --- a/xl2times/excel.py +++ b/xl2times/excel.py @@ -4,7 +4,6 @@ from typing import Dict, List import time from pandas.core.frame import DataFrame -import pandas as pd import numpy import re from . import datatypes diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 474221f..d0892ba 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -19,17 +19,6 @@ from . import utils from .utils import max_workers -query_columns = { - "pset_set", - "pset_pn", - "pset_pd", - "pset_ci", - "pset_co", - "cset_set", - "cset_cn", - "cset_cd", -} - csets_ordered_for_pcg = ["DEM", "MAT", "NRG", "ENV", "FIN"] default_pcg_suffixes = [ cset + io for cset in csets_ordered_for_pcg for io in ["I", "O"] @@ -584,8 +573,7 @@ def process_user_constraint_table( # TODO: apply table.uc_sets # Fill in UC_N blank cells with value from above - if "uc_n" in df.columns: - df["uc_n"] = df["uc_n"].ffill() + df["uc_n"] = df["uc_n"].ffill() data_columns = [ x for x in df.columns if x not in config.known_columns[datatypes.Tag.uc_t] @@ -2144,10 +2132,9 @@ def process_uc_wildcards( ) -> Dict[str, DataFrame]: tag = datatypes.Tag.uc_t - if tag in tqdm(tables, desc=f"Processing uc_wildcards on tables"): + if tag in tqdm(tables, desc="Processing uc_wildcards on tables"): start_time = time.time() df = tables[tag] - dictionary = generate_topology_dictionary(tables, model) df = _match_uc_wildcards( @@ -2241,7 +2228,7 @@ def match_wildcards( matching_commodities is None or len(matching_commodities) == 0 ): # TODO is this necessary? Try without? # TODO debug these - logger.warning(f"a row matched no processes or commodities") + logger.warning("a row matched no processes or commodities") return None return matching_processes, matching_commodities @@ -2343,7 +2330,7 @@ def eval_and_update( ): match = match_wildcards(row) if match is None: - logger.warning(f"TFM_INS-TXT row matched neither commodity nor process") + logger.warning("TFM_INS-TXT row matched neither commodity nor process") continue processes, commodities = match if commodities is not None: