Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Demos version used in benchmarks #204

Merged
merged 10 commits into from
Mar 4, 2024
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
env:
PY_VERSION: "3.11"
REF_TIMES_model: "b488fb07f0899ee8b7e710c230b1a9414fa06f7d"
REF_demos-xlsx: "f956db07a253d4f5c60e108791ab7bb2b8136690"
REF_demos-xlsx: "34a2a5c044cc0bbea1357de50db2f5f02d575181"
REF_demos-dd: "2848a8a8e2fdcf0cdf7f83eefbdd563b0bb74e86"
REF_tim: "e820d8002adc6b1526a3bffcc439219b28d0eed5"
REF_tim-gams: "703f6a4e1d0bedd95c3ebdae534496f3a7e1b7cc"
Expand Down
80 changes: 80 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from xl2times import utils
import pandas as pd


class TestUtils:
def test_explode(self):
"""
Test that explode logics functions correctly
"""

input_df1 = pd.DataFrame(
[
("PRC1", 100, None, 200),
("PRC2", 100, 150, None),
("PRC3", None, 150, 200),
],
columns=["process", "act_bnd", "actbnd", "act_bnd"],
)

input_df2 = pd.DataFrame(
[
("PRC1", 100, None),
("PRC2", 100, 150),
("PRC3", None, 150),
],
columns=["process", "act_bnd", "actbnd"],
)

data_cols1 = ["act_bnd", "actbnd", "act_bnd"]
data_cols2 = ["act_bnd", "actbnd"]

correct_index1 = [0, 2, 3, 4, 7, 8]
correct_index2 = [0, 2, 3, 5]

correct_result1 = (
pd.DataFrame(
[
("PRC1", 100),
("PRC1", 200),
("PRC2", 100),
("PRC2", 150),
("PRC3", 150),
("PRC3", 200),
],
columns=["process", "value"],
index=correct_index1,
dtype=object,
),
pd.Series(
["act_bnd", "act_bnd", "act_bnd", "actbnd", "actbnd", "act_bnd"],
index=correct_index1,
),
)

correct_result2 = (
pd.DataFrame(
[
("PRC1", 100),
("PRC2", 100),
("PRC2", 150),
("PRC3", 150),
],
columns=["process", "value"],
index=correct_index2,
dtype=object,
),
pd.Series(["act_bnd", "act_bnd", "actbnd", "actbnd"], index=correct_index2),
)

output1 = utils.explode(input_df1, data_cols1)
output2 = utils.explode(input_df2, data_cols2)

assert output1[0].equals(correct_result1[0]), "Dataframes should be equal"
assert output1[1].equals(correct_result1[1]), "Series should be equal"
assert output2[0].equals(correct_result2[0]), "Dataframes should be equal"
assert output2[1].equals(correct_result2[1]), "Series should be equal"


if __name__ == "__main__":
TestUtils().test_explode()
4 changes: 2 additions & 2 deletions xl2times/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def convert_xl_to_times(
transforms.remove_comment_rows,
transforms.revalidate_input_tables,
transforms.process_regions,
transforms.process_time_periods,
transforms.remove_exreg_cols,
transforms.generate_dummy_processes,
transforms.process_time_slices,
Expand All @@ -104,13 +105,12 @@ def convert_xl_to_times(
transforms.process_flexible_import_tables, # slow
transforms.process_user_constraint_tables,
transforms.process_commodity_emissions,
transforms.generate_uc_properties,
transforms.process_commodities,
transforms.process_transform_availability,
transforms.fill_in_missing_values,
transforms.generate_uc_properties,
transforms.expand_rows_parallel, # slow
transforms.remove_invalid_values,
transforms.process_time_periods,
transforms.capitalise_some_values,
transforms.apply_fixups,
transforms.generate_commodity_groups,
Expand Down
75 changes: 53 additions & 22 deletions xl2times/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,7 @@ def process_user_constraint_table(
set(regions.upper().split(","))
)
regions = ",".join(regions)
df["region"] = df["region"].fillna(regions)
df.loc[df["region"].isna(), ["region"]] = regions

# TODO: detect RHS correctly
i = df["side"].isna()
Expand Down Expand Up @@ -741,18 +741,20 @@ def generate_uc_properties(
# Use name to populate description if it is missing
index = user_constraints["description"].isna()
if any(index):
user_constraints["description"][index] = user_constraints["uc_n"][index]
user_constraints.loc[index, ["description"]] = user_constraints["uc_n"][
index
]

# TODO: Can this (until user_constraints.explode) become a utility function?
# Handle allregions by substituting it with a list of internal regions
index = user_constraints["region"].str.lower() == "allregions"
if any(index):
user_constraints["region"][index] = [model.internal_regions]
user_constraints.loc[index, ["region"]] = ",".join(model.internal_regions)

# Handle comma-separated regions
index = user_constraints["region"].str.contains(",").fillna(value=False)
index = user_constraints["region"].str.contains(",")
if any(index):
user_constraints["region"][index] = user_constraints.apply(
user_constraints.loc[index, ["region"]] = user_constraints.apply(
lambda row: [
region
for region in str(row["region"]).split(",")
Expand Down Expand Up @@ -782,7 +784,6 @@ def fill_in_missing_values(
:return: List of tables in EmbeddedXlTable format with empty values filled in.
"""
result = []
start_year = one(utils.single_column(tables, datatypes.Tag.start_year, "value"))
# TODO there are multiple currencies
currency = utils.single_column(tables, datatypes.Tag.currencies, "currency")[0]
# The default regions for VT_* files is given by ~BookRegions_Map:
Expand Down Expand Up @@ -841,18 +842,19 @@ def fill_in_missing_values_table(table):
elif colname == "region":
# Use BookRegions_Map to fill VT_* files, and all regions for other files
matches = re.search(r"VT_([A-Za-z0-9]+)_", Path(table.filename).stem)
isna = df[colname].isna()
if matches is not None:
book = matches.group(1)
if book in vt_regions:
df = df.fillna({colname: ",".join(vt_regions[book])})
df.loc[isna, [colname]] = ",".join(vt_regions[book])
else:
logger.warning(f"book name {book} not in BookRegions_Map")
else:
df = df.fillna({colname: ",".join(model.internal_regions)})
df.loc[isna, [colname]] = ",".join(model.internal_regions)
elif colname == "year":
df = df.fillna({colname: start_year})
df.loc[df[colname].isna(), [colname]] = model.start_year
elif colname == "currency":
df = df.fillna({colname: currency})
df.loc[df[colname].isna(), [colname]] = currency

return replace(table, dataframe=df)

Expand Down Expand Up @@ -1436,7 +1438,7 @@ def generate_trade(
i = top_ire["origin"].isin(model.internal_regions) | top_ire["destination"].isin(
model.internal_regions
)
model.trade = top_ire[i].reset_index()
model.trade = top_ire[i].reset_index(drop=True)

return tables

Expand Down Expand Up @@ -1595,14 +1597,17 @@ def process_years(
model.past_years = datayears.where(lambda x: x < model.start_year).dropna()

# Modelyears is the union of pastyears and the representative years of the model (middleyears)
model.model_years = (
pd.concat(
[model.past_years, model.time_periods["m"]],
ignore_index=True,
if not model.past_years.empty:
model.model_years = (
pd.concat(
[model.past_years, model.time_periods["m"]],
ignore_index=True,
)
.drop_duplicates()
.sort_values()
)
.drop_duplicates()
.sort_values()
)
else:
model.model_years = model.time_periods["m"]

return tables

Expand Down Expand Up @@ -2300,6 +2305,7 @@ def query(
commodities: DataFrame | None,
attribute: str | None,
region: str | None,
year: int | None,
) -> pd.Index:
qs = []
if processes is not None and not processes.empty:
Expand All @@ -2310,6 +2316,8 @@ def query(
qs.append(f"attribute == '{attribute}'")
if region is not None:
qs.append(f"region == '{region}'")
if year is not None:
qs.append(f"year == {year}")
return table.query(" and ".join(qs)).index

def eval_and_update(
Expand All @@ -2329,7 +2337,7 @@ def eval_and_update(
table = tables[datatypes.Tag.fi_t]
new_tables = [table]
# Reset FI_T index so that queries can determine unique rows to update
tables[datatypes.Tag.fi_t].reset_index(inplace=True)
tables[datatypes.Tag.fi_t].reset_index(inplace=True, drop=True)

# TFM_UPD: expand wildcards in each row, query FI_T to find matching rows,
# evaluate the update formula, and add new rows to FI_T
Expand All @@ -2346,9 +2354,15 @@ def eval_and_update(
continue
processes, commodities = match
rows_to_update = query(
table, processes, commodities, row["attribute"], row["region"]
table,
processes,
commodities,
row["attribute"],
row["region"],
row["year"],
)
new_rows = table.loc[rows_to_update].copy()
new_rows["source_filename"] = row["source_filename"]
eval_and_update(new_rows, rows_to_update, row["value"])
new_tables.append(new_rows)

Expand All @@ -2375,6 +2389,7 @@ def eval_and_update(
new_rows = processes.merge(new_rows, how="cross")
if commodities is not None:
new_rows = commodities.merge(new_rows, how="cross")
new_rows["source_filename"] = row["source_filename"]
new_tables.append(new_rows)

new_tables.append(tables[datatypes.Tag.fi_t])
Expand Down Expand Up @@ -2403,7 +2418,9 @@ def eval_and_update(
assert False # All rows match either a commodity or a process

# Query for rows with matching process/commodity and region
rows_to_update = query(table, processes, commodities, None, row["region"])
rows_to_update = query(
table, processes, commodities, None, row["region"], None
)
# Overwrite (inplace) the column given by the attribute (translated by attr_prop)
# with the value from row
# E.g. if row['attribute'] == 'PRC_TSL' then we overwrite 'tslvl'
Expand All @@ -2423,7 +2440,12 @@ def eval_and_update(
processes, commodities = match if match is not None else (None, None)
# TODO should we also query on limtype?
rows_to_update = query(
table, processes, commodities, row["attribute"], row["region"]
table,
processes,
commodities,
row["attribute"],
row["region"],
row["year"],
)
new_rows = table.loc[rows_to_update].copy()
# Modify values in all '*2' columns
Expand All @@ -2432,6 +2454,7 @@ def eval_and_update(
new_rows.loc[:, c[:-1]] = v
# Evaluate 'value' column based on existing values
eval_and_update(new_rows, rows_to_update, row["value"])
new_rows["source_filename"] = row["source_filename"]
new_tables.append(new_rows)

# Add new rows to table
Expand Down Expand Up @@ -2619,6 +2642,14 @@ def convert_aliases(
df.replace({"attribute": replacement_dict}, inplace=True)
tables[table_type] = df

# Drop duplicates generated due to renaming
# TODO: Clear values in irrelevant columns before doing this
# TODO: Do this comprehensively for all relevant tables
df = tables[datatypes.Tag.fi_t]
df = df.dropna(subset="value").drop_duplicates(
subset=[col for col in df.columns if col != "value"], keep="last"
)
tables[datatypes.Tag.fi_t] = df.reset_index(drop=True)
# TODO: do this earlier
model.attributes = tables[datatypes.Tag.fi_t]
if datatypes.Tag.uc_t in tables.keys():
Expand Down
2 changes: 1 addition & 1 deletion xl2times/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def missing_value_inherit(df: DataFrame, colname: str):


def get_scalar(table_tag: str, tables: List[datatypes.EmbeddedXlTable]):
table = next(filter(lambda t: t.tag == table_tag, tables))
table = one(filter(lambda t: t.tag == table_tag, tables))
if table.dataframe.shape[0] != 1 or table.dataframe.shape[1] != 1:
raise ValueError("Not scalar table")
return table.dataframe["value"].values[0]
Expand Down
Loading