Skip to content

Commit

Permalink
Refine application of trans files (#263)
Browse files Browse the repository at this point in the history
Refine application of the tfm tables in trans files by removing any row
in generated tables that:
- for an attribute with process in indices, includes a process that is
not declared in the corresponding non-trans file;
- for an attribute with commodity in indices, includes a commodity that
is not declared in the corresponding non-trans file, base or
syssettings.

This PR should enable #256
  • Loading branch information
olejandro authored Dec 26, 2024
1 parent d2bc4da commit 1d853ca
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 14 deletions.
2 changes: 1 addition & 1 deletion xl2times/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,12 @@ def convert_xl_to_times(
transforms.generate_uc_properties,
transforms.expand_rows_parallel, # slow
transforms.remove_invalid_values,
transforms.include_tables_source,
transforms.internalise_commodities,
transforms.generate_commodity_groups,
transforms.apply_fixups,
transforms.fill_in_missing_pcgs,
transforms.generate_trade,
transforms.include_tables_source,
transforms.merge_tables,
transforms.complete_processes,
transforms.process_units,
Expand Down
65 changes: 52 additions & 13 deletions xl2times/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -2493,11 +2493,11 @@ def apply_transform_tables(
# Determine the rows that won't be updated
tables[Tag.fi_t] = _remove_invalid_rows_(
tables[Tag.fi_t], updates, modules_with_ava
)
).reset_index(drop=True)
# TODO: This should happen much earlier in the process
model.processes = _remove_invalid_rows_(
model.processes, updates, modules_with_ava
)
).reset_index(drop=True)
# TODO: should be unnecessary if model.processes is updated early enough
# Remove topology rows that are not in the processes
model.topology = pd.merge(
Expand All @@ -2507,6 +2507,28 @@ def apply_transform_tables(
how="inner",
)

# Create a dictionary of processes/commodities indexed by module name
obj_by_module = dict()
obj_by_module["process"] = (
model.processes.groupby("module_name")["process"].agg(set).to_dict()
)
obj_by_module["commodity"] = (
model.commodities.groupby("module_name")["commodity"].agg(set).to_dict()
)
# Create a dictionary of processes/commodities available in addtion to those declared in a module
obj_suppl = dict()
obj_suppl["process"] = set()
obj_suppl["commodity"] = (
obj_by_module["commodity"]
.get("BASE", set())
.union(obj_by_module["commodity"].get("SYSSETTINGS", set()))
)
# Create sets attributes that require a process/commodity index
attr_with_obj = {
obj: {attr.times_name for attr in config.times_xl_maps if obj in attr.xl_cols}
for obj in ["process", "commodity"]
}

if Tag.tfm_comgrp in tables:
table = model.commodity_groups
updates = tables[Tag.tfm_comgrp].filter(table.columns, axis=1)
Expand All @@ -2517,23 +2539,23 @@ def apply_transform_tables(
model.commodity_groups = commodity_groups.dropna()

for data_module in model.data_modules:
generated_records = []
if (
Tag.tfm_dins in tables
and data_module in tables[Tag.tfm_dins]["module_name"].unique()
):
table = tables[Tag.fi_t]
index = tables[Tag.tfm_dins]["module_name"] == data_module
updates = tables[Tag.tfm_dins][index].filter(table.columns, axis=1)
tables[Tag.fi_t] = pd.concat([table, updates], ignore_index=True)

generated_records.append(updates)
if (
Tag.tfm_ins in tables
and data_module in tables[Tag.tfm_ins]["module_name"].unique()
):
table = tables[Tag.fi_t]
index = tables[Tag.tfm_ins]["module_name"] == data_module
updates = tables[Tag.tfm_ins][index].filter(table.columns, axis=1)
tables[Tag.fi_t] = pd.concat([table, updates], ignore_index=True)
generated_records.append(updates)

if (
Tag.tfm_ins_txt in tables
Expand Down Expand Up @@ -2587,9 +2609,7 @@ def apply_transform_tables(
index = tables[Tag.tfm_upd]["module_name"] == data_module
updates = tables[Tag.tfm_upd][index]
table = tables[Tag.fi_t]
new_tables = [table]
# Reset FI_T index so that queries can determine unique rows to update
tables[Tag.fi_t].reset_index(inplace=True, drop=True)
new_tables = []

# TFM_UPD: expand wildcards in each row, query FI_T to find matching rows,
# evaluate the update formula, and add new rows to FI_T
Expand Down Expand Up @@ -2640,8 +2660,7 @@ def apply_transform_tables(
new_rows["submodule"] = row["submodule"]
new_tables.append(new_rows)

# Add new rows to table
tables[Tag.fi_t] = pd.concat(new_tables, ignore_index=True)
generated_records.append(pd.concat(new_tables, ignore_index=True))

if (
Tag.tfm_mig in tables
Expand Down Expand Up @@ -2703,9 +2722,29 @@ def apply_transform_tables(
new_rows["submodule"] = row["submodule"]
new_tables.append(new_rows)

# Add new rows to table
new_tables.append(tables[Tag.fi_t])
tables[Tag.fi_t] = pd.concat(new_tables, ignore_index=True)
generated_records.append(pd.concat(new_tables, ignore_index=True))

if generated_records:
module_data = pd.concat(generated_records, ignore_index=True)
module_type = module_data["module_type"].iloc[0]
# Explode process and commodity columns and remove invalid rows
for obj in ["process", "commodity"]:
if obj in module_data.columns:
module_data = module_data.explode(obj, ignore_index=True)
if module_type in {"base", "subres"}:
valid_objs = (
obj_by_module[obj]
.get(data_module, set())
.union(obj_suppl[obj])
)
drop = ~module_data[obj].isin(valid_objs) & module_data[
"attribute"
].isin(attr_with_obj[obj])
module_data = module_data[~drop]
if not module_data.empty:
tables[Tag.fi_t] = pd.concat(
[tables[Tag.fi_t], module_data], ignore_index=True
)

return tables

Expand Down

0 comments on commit 1d853ca

Please sign in to comment.