Skip to content

Commit

Permalink
Support external regions (#181)
Browse files Browse the repository at this point in the history
Close #162
  • Loading branch information
olejandro authored Feb 16, 2024
1 parent 51d2540 commit 7e11a40
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 26 deletions.
33 changes: 33 additions & 0 deletions benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,39 @@ benchmarks:
- "tra_co2_bound"
- "uc_co2bnd"
- "uc_growth"
- name: DemoS_007-all-1r
input_folder: DemoS_007
inputs:
- "BY_Trans.xlsx"
- "Sets-DemoModels.xlsx"
- "SysSettings.xlsx"
- "SubRES_TMPL/SubRES_NewTechs.xlsx"
- "SubRES_TMPL/SubRES_NewTechs_Trans.xlsx"
- "SuppXLS/Scen_DemProj_DTCAR.xlsx"
- "SuppXLS/Scen_ElasticDem.xlsx"
- "SuppXLS/Scen_ELC_CO2_BOUND.xlsx"
- "SuppXLS/Scen_Peak_RSV.xlsx"
- "SuppXLS/Scen_Refinery.xlsx"
- "SuppXLS/Scen_TRA_CO2_BOUND.xlsx"
- "SuppXLS/Scen_UC_CO2BND.xlsx"
- "SuppXLS/Scen_UC_Growth.xlsx"
- "SuppXLS/Trades/ScenTrade__Trade_Links.xlsx"
- "SuppXLS/Trades/ScenTrade_TRADE_PARAM.xlsx"
- "VT_REG1_PRI_V07.xlsx"
dd_folder: DemoS_007-all-1r
dd_files:
- "base"
- "newtechs"
- "syssettings"
- "trade_param"
- "demproj_dtcar"
- "elasticdem"
- "elc_co2_bound"
- "peak_rsv"
- "refinery"
- "tra_co2_bound"
- "uc_co2bnd"
- "uc_growth"
- name: DemoS_008-all
input_folder: DemoS_008
dd_folder: DemoS_008-all
Expand Down
5 changes: 4 additions & 1 deletion xl2times/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,17 @@ def convert_xl_to_times(
transform_list = [
transforms.normalize_tags_columns,
transforms.remove_fill_tables,
transforms.validate_input_tables,
lambda config, tables, model: [
transforms.remove_comment_cols(t) for t in tables
],
transforms.validate_input_tables,
transforms.remove_tables_with_formulas, # slow
transforms.normalize_column_aliases,
lambda config, tables, model: [
transforms.remove_comment_rows(config, t, model) for t in tables
],
transforms.process_regions,
transforms.remove_exreg_cols,
transforms.generate_dummy_processes,
transforms.process_time_slices,
transforms.process_transform_insert_variants,
Expand Down Expand Up @@ -421,6 +422,8 @@ def run(args) -> str | None:
else:
input_files = args.input

model.files.update([Path(path).stem for path in input_files])

if args.only_read:
tables = convert_xl_to_times(
input_files,
Expand Down
8 changes: 5 additions & 3 deletions xl2times/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ class TimesModel:
data_years: Tuple[int] = field(default_factory=tuple)
model_years: Tuple[int] = field(default_factory=tuple)
past_years: Tuple[int] = field(default_factory=tuple)
files: Set[str] = field(default_factory=set)

def external_regions(self) -> Set[str]:
return self.all_regions.difference(self.internal_regions)
Expand Down Expand Up @@ -404,9 +405,10 @@ def to_tag(s: str) -> Tag:

for valid_field_name in valid_field_names:
valid_column_names[tag_name][valid_field_name] = field_name
row_comment_chars[tag_name][field_name] = valid_field[
"row_ignore_symbol"
]

row_comment_chars[tag_name][field_name] = valid_field[
"row_ignore_symbol"
]

# TODO: Account for differences in valid field names with base_tag
if "base_tag" in tag_info:
Expand Down
144 changes: 122 additions & 22 deletions xl2times/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,49 @@ def remove_comment_cols(table: datatypes.EmbeddedXlTable) -> datatypes.EmbeddedX
return replace(table, dataframe=df)


def remove_exreg_cols(
config: datatypes.Config,
tables: List[datatypes.EmbeddedXlTable],
model: datatypes.TimesModel,
) -> List[datatypes.EmbeddedXlTable]:
"""
Remove external region columns from all the tables except tradelinks.
"""

external_regions = model.external_regions()

def remove_table_exreg_cols(
table: datatypes.EmbeddedXlTable,
) -> datatypes.EmbeddedXlTable:
"""
Return a modified copy of 'table' where columns that are external regions
have been removed.
"""

exreg_cols = [
colname
for colname in table.dataframe.columns
if colname.upper() in external_regions
]

if exreg_cols:
df = table.dataframe.drop(exreg_cols, axis=1)
return replace(table, dataframe=df)

else:
return table

# Do not do anything if external_reagions is empty
if not external_regions:
return tables
# Otherwise remove external region column from the relevant tables
else:
return [
remove_table_exreg_cols(t) if t.tag != datatypes.Tag.tradelinks else t
for t in tables
]


def remove_tables_with_formulas(
config: datatypes.Config,
tables: List[datatypes.EmbeddedXlTable],
Expand Down Expand Up @@ -521,10 +564,15 @@ def process_user_constraint_table(
table = replace(table, dataframe=df)

# Fill missing regions using defaults (if specified)
# TODO: This assumes several regions lists may be present. Handle overwritting?
regions_lists = [x for x in table.uc_sets.keys() if x.upper().startswith("R")]
if regions_lists and table.uc_sets[regions_lists[-1]] != "":
regions = table.uc_sets[regions_lists[-1]]
if regions.lower() != "allregions":
regions = model.internal_regions.intersection(
set(regions.upper().split(","))
)
regions = ",".join(regions)
df["region"] = df["region"].fillna(regions)

# TODO: detect RHS correctly
Expand Down Expand Up @@ -651,7 +699,6 @@ def fill_in_missing_values(
# The default regions for VT_* files is given by ~BookRegions_Map:
vt_regions = defaultdict(list)
brm = utils.single_table(tables, datatypes.Tag.book_regions_map).dataframe
utils.missing_value_inherit(brm, "bookname")
for _, row in brm.iterrows():
if row["region"] in model.internal_regions:
vt_regions[row["bookname"]].append(row["region"])
Expand All @@ -665,7 +712,7 @@ def fill_in_missing_values_table(table):
for colname in df.columns:
# TODO make this more declarative
if colname in ["sets", "csets", "process"]:
utils.missing_value_inherit(df, colname)
df[colname] = df[colname].ffill()
elif colname == "limtype" and table.tag == datatypes.Tag.fi_comm and False:
isna = df[colname].isna()
ismat = df["csets"] == "MAT"
Expand Down Expand Up @@ -704,7 +751,7 @@ def fill_in_missing_values_table(table):
df.loc[isna & ~isele, colname] = "ANNUAL"
elif colname == "region":
# Use BookRegions_Map to fill VT_* files, and all regions for other files
matches = re.search(r"VT_([A-Za-z0-9]+)_", Path(table.filename).name)
matches = re.search(r"VT_([A-Za-z0-9]+)_", Path(table.filename).stem)
if matches is not None:
book = matches.group(1)
if book in vt_regions:
Expand Down Expand Up @@ -780,16 +827,22 @@ def remove_invalid_values(
"""
# TODO: This should be table type specific
# TODO pull this out
# TODO: This should take into account whether a specific dimension is required
# Rules for allowing entries. Each entry of the dictionary designates a rule for a
# a given column, and the values that are allowed for that column.
constraints = {
"csets": csets_ordered_for_pcg,
"region": model.all_regions,
"region": model.internal_regions,
}

result = []
for table in tables:
# TODO: FI_T and UC_T should take into account whether a specific dimension is required
skip_tags = {datatypes.Tag.uc_t}

def remove_table_invalid_values(
table: datatypes.EmbeddedXlTable,
) -> datatypes.EmbeddedXlTable:
"""
Remove invalid entries in a table dataframe.
"""
df = table.dataframe.copy()
is_valid_list = [
df[colname].isin(values)
Expand All @@ -800,8 +853,12 @@ def remove_invalid_values(
is_valid = reduce(lambda a, b: a & b, is_valid_list)
df = df[is_valid]
df.reset_index(drop=True, inplace=True)
result.append(replace(table, dataframe=df))
return result
table = replace(table, dataframe=df)
return table

return [
remove_table_invalid_values(t) if t.tag not in skip_tags else t for t in tables
]


def process_units(
Expand Down Expand Up @@ -856,15 +913,39 @@ def process_regions(
model: datatypes.TimesModel,
) -> List[datatypes.EmbeddedXlTable]:
"""
Include IMPEXP and MINRNW together with the user-defined regions in the AllRegions set.
IMPEXP and MINRNW are external regions that are defined by default by Veda.
Read model regions and update model.internal_regions and model.all_regions.
Include IMPEXP and MINRNW in model.all_regions (defined by default by Veda).
"""

model.all_regions.update((["IMPEXP", "MINRNW"]))
# Read region settings
region_def = utils.single_table(tables, datatypes.Tag.book_regions_map).dataframe
# Harmonise the dataframe
region_def["bookname"] = region_def[["bookname"]].ffill()
region_def = (
region_def.dropna(how="any")
.apply(lambda x: x.str.upper())
.drop_duplicates(ignore_index=True)
)
# Update model.all_regions
model.all_regions.update(region_def["region"])
# Determine model.internal_regions
booknames = set(region_def["bookname"])
valid_booknames = {
b
for b in booknames
if any(re.match(rf"^VT_{b}_", file, re.IGNORECASE) for file in model.files)
}
model.internal_regions.update(
utils.single_column(tables, datatypes.Tag.book_regions_map, "region")
region_def["region"][region_def["bookname"].isin(valid_booknames)]
)
model.all_regions.update(model.internal_regions)

# Print a warning for any region treated as external
for bookname in booknames.difference(valid_booknames):
external = region_def["region"][region_def["bookname"] == bookname].to_list()
print(
f"WARNING: VT_{bookname}_* is not in model files. Treated {external} as external regions."
)

# Apply regions filter
if config.filter_regions:
Expand Down Expand Up @@ -1260,12 +1341,10 @@ def generate_trade(
.copy()
)
top_ire = pd.concat([top_ire, b_links[cols_list]])

filter_regions = model.internal_regions.union({"IMPEXP", "MINRNW"})
i = top_ire["origin"].isin(filter_regions) & top_ire["destination"].isin(
filter_regions
# Discard tradelinks if none of the regions is internal
i = top_ire["origin"].isin(model.internal_regions) | top_ire["destination"].isin(
model.internal_regions
)

model.trade = top_ire[i].reset_index()

return tables
Expand Down Expand Up @@ -1391,6 +1470,10 @@ def process_commodities(
tables: List[datatypes.EmbeddedXlTable],
model: datatypes.TimesModel,
) -> List[datatypes.EmbeddedXlTable]:
"""
Process commodities.
"""

regions = ",".join(model.internal_regions)

result = []
Expand Down Expand Up @@ -1446,6 +1529,10 @@ def process_processes(
tables: List[datatypes.EmbeddedXlTable],
model: datatypes.TimesModel,
) -> List[datatypes.EmbeddedXlTable]:
"""
Process processes.
"""

result = []
veda_sets_to_times = {"IMP": "IRE", "EXP": "IRE", "MIN": "IRE"}

Expand Down Expand Up @@ -1773,6 +1860,9 @@ def process_transform_tables(
tables: List[datatypes.EmbeddedXlTable],
model: datatypes.TimesModel,
) -> List[datatypes.EmbeddedXlTable]:
"""
Process transform tables.
"""
regions = model.internal_regions
tfm_tags = [
datatypes.Tag.tfm_ins,
Expand Down Expand Up @@ -1805,6 +1895,7 @@ def process_transform_tables(
)

# Handle Regions:
# Check whether allregions or any of model regions are among columns
if set(df.columns).isdisjoint(
{x.lower() for x in regions} | {"allregions"}
):
Expand Down Expand Up @@ -2403,8 +2494,11 @@ def complete_processes(
tables: Dict[str, DataFrame],
model: datatypes.TimesModel,
) -> Dict[str, DataFrame]:
# Generate processes based on trade links
"""
Generate processes based on trade links if not defined elsewhere
"""

# Dataframe with region, process and commodity columns (no trade direction)
trade_processes = pd.concat(
[
model.trade.loc[:, ["origin", "process", "in"]].rename(
Expand All @@ -2418,29 +2512,35 @@ def complete_processes(
sort=False,
)

# Determine undeclared trade process
undeclared_td = trade_processes.merge(
model.processes.loc[:, ["region", "process"]], how="left", indicator=True
)
# Keep only those undeclared processes that are in internal regions
undeclared_td = undeclared_td.loc[
(
undeclared_td["region"].isin(model.internal_regions)
& (undeclared_td["_merge"] == "left_only")
),
["region", "process", "commodity"],
]

# Include additional info from model.commodities
undeclared_td = undeclared_td.merge(
model.commodities.loc[:, ["region", "commodity", "csets", "ctslvl", "unit"]],
how="left",
)
# Remove unnecessary columns
undeclared_td.drop(columns=["commodity"], inplace=True)
# Rename to match columns in model.processes
undeclared_td.rename(
columns={"csets": "primarycg", "ctslvl": "tslvl", "unit": "tact"}, inplace=True
)
# Specify expected set
undeclared_td["sets"] = "IRE"
# Remove full duplicates in case generated
undeclared_td.drop_duplicates(keep="last", inplace=True)

# TODO: Handle possible duplicates
# TODO: Handle possible confilicting input
# Print warnings in case of conflicting input data
for i in ["primarycg", "tslvl", "tact"]:
duplicates = undeclared_td.loc[:, ["region", "process", i]].duplicated(
keep=False
Expand Down

0 comments on commit 7e11a40

Please sign in to comment.