Skip to content

Commit

Permalink
addressed review comments from @olejandro
Browse files Browse the repository at this point in the history
organised imports, appeased linters
  • Loading branch information
SamRWest committed Feb 22, 2024
1 parent fcd11cd commit ff268dc
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 29 deletions.
6 changes: 1 addition & 5 deletions tests/test_transforms.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
from datetime import datetime
from typing import Callable

import pandas as pd
from loguru import logger

from xl2times import transforms, utils, datatypes
from xl2times.transforms import (
_process_comm_groups_vectorised,
_count_comm_group_vectorised,
intersect,
expand_rows,
get_matching_commodities,
filter_by_pattern,
get_matching_processes,
query_columns,
_match_uc_wildcards,
process_map,
commodity_map,
Expand Down Expand Up @@ -56,6 +51,7 @@ def make_str(df):
lambda row: make_str(get_matching_commodities(row, dictionary)), axis=1
)

query_columns = transforms.process_map.keys() | transforms.commodity_map.keys()
cols_to_drop = [col for col in df.columns if col in query_columns]

df = expand_rows(
Expand Down
5 changes: 2 additions & 3 deletions utils/run_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import git
import pandas as pd
import yaml
from loguru import logger
from tabulate import tabulate

from dd_to_csv import main
Expand Down Expand Up @@ -452,11 +451,11 @@ def run_all_benchmarks(
benchmarks_folder = spec["benchmarks_folder"]
benchmark_names = [b["name"] for b in spec["benchmarks"]]
if len(set(benchmark_names)) != len(benchmark_names):
logger.error(f"Found duplicate name in benchmarks YAML file")
logger.error("Found duplicate name in benchmarks YAML file")
sys.exit(11)

if args.dd and args.times_dir is None:
logger.error(f"--times_dir is required when using --dd")
logger.error("--times_dir is required when using --dd")
sys.exit(12)

if args.run is not None:
Expand Down
6 changes: 3 additions & 3 deletions xl2times/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def convert_xl_to_times(
pickle_file = "raw_tables.pkl"
if use_pkl and os.path.isfile(pickle_file):
raw_tables = pickle.load(open(pickle_file, "rb"))
logger.warning(f"Using pickled data not xlsx")
logger.warning("Using pickled data not xlsx")
else:
raw_tables = []

Expand Down Expand Up @@ -248,7 +248,7 @@ def produce_times_tables(
result = {}
used_tables = set()
for mapping in config.times_xl_maps:
if not mapping.xl_name in input:
if mapping.xl_name not in input:
logger.warning(
f"Cannot produce table {mapping.times_name} because"
f" {mapping.xl_name} does not exist"
Expand Down Expand Up @@ -281,7 +281,7 @@ def produce_times_tables(
# Excel columns can be duplicated into multiple Times columns
for times_col, xl_col in mapping.col_map.items():
df[times_col] = df[xl_col]
cols_to_drop = [x for x in df.columns if not x in mapping.times_cols]
cols_to_drop = [x for x in df.columns if x not in mapping.times_cols]
df.drop(columns=cols_to_drop, inplace=True)
df.drop_duplicates(inplace=True)
df.reset_index(drop=True, inplace=True)
Expand Down
1 change: 0 additions & 1 deletion xl2times/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from typing import Dict, List
import time
from pandas.core.frame import DataFrame
import pandas as pd
import numpy
import re
from . import datatypes
Expand Down
21 changes: 4 additions & 17 deletions xl2times/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,6 @@
from . import utils
from .utils import max_workers

query_columns = {
"pset_set",
"pset_pn",
"pset_pd",
"pset_ci",
"pset_co",
"cset_set",
"cset_cn",
"cset_cd",
}

csets_ordered_for_pcg = ["DEM", "MAT", "NRG", "ENV", "FIN"]
default_pcg_suffixes = [
cset + io for cset in csets_ordered_for_pcg for io in ["I", "O"]
Expand Down Expand Up @@ -584,8 +573,7 @@ def process_user_constraint_table(
# TODO: apply table.uc_sets

# Fill in UC_N blank cells with value from above
if "uc_n" in df.columns:
df["uc_n"] = df["uc_n"].ffill()
df["uc_n"] = df["uc_n"].ffill()

data_columns = [
x for x in df.columns if x not in config.known_columns[datatypes.Tag.uc_t]
Expand Down Expand Up @@ -2144,10 +2132,9 @@ def process_uc_wildcards(
) -> Dict[str, DataFrame]:
tag = datatypes.Tag.uc_t

if tag in tqdm(tables, desc=f"Processing uc_wildcards on tables"):
if tag in tqdm(tables, desc="Processing uc_wildcards on tables"):
start_time = time.time()
df = tables[tag]

dictionary = generate_topology_dictionary(tables, model)

df = _match_uc_wildcards(
Expand Down Expand Up @@ -2241,7 +2228,7 @@ def match_wildcards(
matching_commodities is None or len(matching_commodities) == 0
): # TODO is this necessary? Try without?
# TODO debug these
logger.warning(f"a row matched no processes or commodities")
logger.warning("a row matched no processes or commodities")
return None
return matching_processes, matching_commodities

Expand Down Expand Up @@ -2343,7 +2330,7 @@ def eval_and_update(
):
match = match_wildcards(row)
if match is None:
logger.warning(f"TFM_INS-TXT row matched neither commodity nor process")
logger.warning("TFM_INS-TXT row matched neither commodity nor process")
continue
processes, commodities = match
if commodities is not None:
Expand Down

0 comments on commit ff268dc

Please sign in to comment.