Skip to content

Commit

Permalink
Create trade links (#167)
Browse files Browse the repository at this point in the history
  • Loading branch information
olejandro authored Feb 9, 2024
1 parent 871fcc5 commit 8d066d0
Show file tree
Hide file tree
Showing 3 changed files with 250 additions and 14 deletions.
4 changes: 3 additions & 1 deletion xl2times/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def convert_xl_to_times(
transforms.process_time_slices,
transforms.process_transform_insert_variants,
transforms.process_transform_tables,
transforms.process_tradelinks,
transforms.process_processes,
transforms.process_topology,
transforms.process_flexible_import_tables, # slow
Expand All @@ -86,9 +87,10 @@ def convert_xl_to_times(
transforms.apply_fixups,
transforms.generate_commodity_groups,
transforms.fill_in_missing_pcgs,
transforms.generate_top_ire,
transforms.generate_trade,
transforms.include_tables_source,
transforms.merge_tables,
transforms.complete_processes,
transforms.apply_more_fixups,
transforms.process_units,
transforms.process_years,
Expand Down
66 changes: 66 additions & 0 deletions xl2times/config/veda-tags.json
Original file line number Diff line number Diff line change
Expand Up @@ -2845,6 +2845,72 @@
"SysSettings"
]
},
{
"tag_name": "tradelinks_dins",
"tag_allowed_in": [
"TradeScen"
],
"valid_fields": [
{
"name": "reg1",
"aliases": [],
"use_name": "reg1",
"row_ignore_symbol": [
"\\I:",
"*"
]
},
{
"name": "reg2",
"aliases": [],
"use_name": "reg2",
"row_ignore_symbol": [
"\\I:",
"*"
]
},
{
"name": "comm",
"aliases": [],
"use_name": "comm",
"row_ignore_symbol": [
"\\I:"
]
},
{
"name": "comm1",
"aliases": [],
"use_name": "comm1",
"row_ignore_symbol": [
"\\I:"
]
},
{
"name": "comm2",
"aliases": [],
"use_name": "comm2",
"row_ignore_symbol": [
"\\I:"
]
},
{
"name": "tech",
"aliases": [],
"use_name": "process",
"row_ignore_symbol": [
"\\I:"
]
},
{
"name": "tradelink",
"aliases": [],
"use_name": "tradelink",
"row_ignore_symbol": [
"\\I:"
]
}
]
},
{
"tag_name": "uc_t",
"tag_allowed_in": [
Expand Down
194 changes: 181 additions & 13 deletions xl2times/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,7 @@ def remove_invalid_values(
"""
# TODO: This should be table type specific
# TODO pull this out
# TODO: This should take into account whether a specific dimension is required
# Rules for allowing entries. Each entry of the dictionary designates a rule for a
# a given column, and the values that are allowed for that column.
constraints = {
Expand Down Expand Up @@ -1081,7 +1082,7 @@ def complete_commodity_groups(
return tables


def generate_top_ire(
def generate_trade(
config: datatypes.Config,
tables: List[datatypes.EmbeddedXlTable],
model: datatypes.TimesModel,
Expand Down Expand Up @@ -1134,7 +1135,42 @@ def generate_top_ire(
top_ire.drop(columns=["region", "region2", "sets", "io"], inplace=True)
top_ire.drop_duplicates(keep="first", inplace=True, ignore_index=True)

model.trade = top_ire
cols_list = ["origin", "in", "destination", "out", "process"]
# Include trade between internal regions
for table in tables:
if table.tag == datatypes.Tag.tradelinks_dins:
df = table.dataframe
f_links = df.rename(
columns={
"reg1": "origin",
"comm1": "in",
"reg2": "destination",
"comm2": "out",
}
).copy()
top_ire = pd.concat([top_ire, f_links[cols_list]])
# Check if any of the links are bi-directional
if "b" in df["tradelink"].str.lower().unique():
b_links = (
df[df["tradelink"].str.lower() == "b"]
.rename(
columns={
"reg1": "destination",
"comm1": "out",
"reg2": "origin",
"comm2": "in",
}
)
.copy()
)
top_ire = pd.concat([top_ire, b_links[cols_list]])

filter_regions = model.internal_regions.union({"IMPEXP", "MINRNW"})
i = top_ire["origin"].isin(filter_regions) & top_ire["destination"].isin(
filter_regions
)

model.trade = top_ire[i].reset_index()

return tables

Expand Down Expand Up @@ -1475,6 +1511,84 @@ def generate_dummy_processes(
return tables


def process_tradelinks(
config: datatypes.Config,
tables: List[datatypes.EmbeddedXlTable],
model: datatypes.TimesModel,
) -> List[datatypes.EmbeddedXlTable]:
"""
Transform tradelinks to tradelinks_dins
"""

result = []
for table in tables:
if table.tag == datatypes.Tag.tradelinks:
df = table.dataframe
sheetname = table.sheetname.lower()
comm = df.columns[0]
destinations = [c for c in df.columns if c != comm]
df.rename(columns={comm: "origin"}, inplace=True)
df = pd.melt(
df, id_vars=["origin"], value_vars=destinations, var_name="destination"
)
df = df[df["value"] == 1].drop(columns=["value"])
df["destination"] = df["destination"].str.upper()
df.drop_duplicates(keep="first", inplace=True)

if sheetname == "uni":
df["tradelink"] = "u"
elif sheetname == "bi":
df["tradelink"] = "b"
else:
df["tradelink"] = 1
# Determine whether a trade link is bi- or unidirectional
td_type = (
df.groupby(["regions"])["tradelink"].agg("count").reset_index()
)
td_type.replace({"tradelink": {1: "u", 2: "b"}}, inplace=True)
df.drop(columns=["tradelink"], inplace=True)
df = df.merge(td_type, how="inner", on="regions")

# Add a column containing linked regions (directionless for bidirectional links)
df["regions"] = df.apply(
lambda row: tuple(sorted([row["origin"], row["destination"]]))
if row["tradelink"] == "b"
else tuple([row["origin"], row["destination"]]),
axis=1,
)

# Drop tradelink (bidirectional) duplicates
df.drop_duplicates(
subset=["regions", "tradelink"], keep="last", inplace=True
)
df.drop(columns=["regions"], inplace=True)
df["comm"] = comm.upper()
df["comm1"] = df["comm"]
df["comm2"] = df["comm"]
df.rename(columns={"origin": "reg1", "destination": "reg2"}, inplace=True)
# Use Veda approach to naming of trade processes
df["process"] = df.apply(
lambda row: "T"
+ "_".join(
[
row["tradelink"].upper(),
row["comm"],
row["reg1"],
row["reg2"],
"01",
]
),
axis=1,
)
result.append(
replace(table, dataframe=df, tag=datatypes.Tag.tradelinks_dins)
)
else:
result.append(table)

return result


def process_transform_insert_variants(
config: datatypes.Config,
tables: List[datatypes.EmbeddedXlTable],
Expand Down Expand Up @@ -1503,20 +1617,16 @@ def is_year(col_name):
# ~TFM_INS-TS: Gather columns whose names are years into a single "Year" column:
df = table.dataframe
if "year" in df.columns:
raise ValueError(f"TFM_INS-AT table already has Year column: {table}")
# TODO can we remove this hacky shortcut?
if (
table.tag == datatypes.Tag.tfm_ins_ts
and set(df.columns) & query_columns == {"cset_cn"}
and has_no_wildcards(df["cset_cn"])
raise ValueError(f"TFM_INS-TS table already has Year column: {table}")
# TODO: can we remove this hacky shortcut? Or should it be also applied to the AT variant?
if set(df.columns) & query_columns == {"cset_cn"} and has_no_wildcards(
df["cset_cn"]
):
df.rename(columns={"cset_cn": "commodity"}, inplace=True)
result.append(replace(table, dataframe=df, tag=datatypes.Tag.fi_t))
continue
elif (
table.tag == datatypes.Tag.tfm_ins_ts
and set(df.columns) & query_columns == {"pset_pn"}
and has_no_wildcards(df["pset_pn"])
elif set(df.columns) & query_columns == {"pset_pn"} and has_no_wildcards(
df["pset_pn"]
):
df.rename(columns={"pset_pn": "process"}, inplace=True)
result.append(replace(table, dataframe=df, tag=datatypes.Tag.fi_t))
Expand All @@ -1536,7 +1646,7 @@ def is_year(col_name):
df["year"] = df["year"].astype("int")
result.append(replace(table, dataframe=df, tag=datatypes.Tag.tfm_ins))
elif table.tag == datatypes.Tag.tfm_ins_at:
# ~TFM_INS-AT: Gather columns with attribute names into a single "Attribue" column
# ~TFM_INS-AT: Gather columns with attribute names into a single "Attribute" column
df = table.dataframe
if "attribute" in df.columns:
raise ValueError(
Expand Down Expand Up @@ -2190,6 +2300,64 @@ def fix_topology(
return tables


def complete_processes(
config: datatypes.Config,
tables: Dict[str, DataFrame],
model: datatypes.TimesModel,
) -> Dict[str, DataFrame]:
# Generate processes based on trade links

trade_processes = pd.concat(
[
model.trade.loc[:, ["origin", "process", "in"]].rename(
columns={"origin": "region", "in": "commodity"}
),
model.trade.loc[:, ["destination", "process", "out"]].rename(
columns={"destination": "region", "out": "commodity"}
),
],
ignore_index=True,
sort=False,
)

undeclared_td = trade_processes.merge(
model.processes.loc[:, ["region", "process"]], how="left", indicator=True
)
undeclared_td = undeclared_td.loc[
(
undeclared_td["region"].isin(model.internal_regions)
& (undeclared_td["_merge"] == "left_only")
),
["region", "process", "commodity"],
]

undeclared_td = undeclared_td.merge(
model.commodities.loc[:, ["region", "commodity", "csets", "ctslvl", "unit"]],
how="left",
)
undeclared_td.drop(columns=["commodity"], inplace=True)
undeclared_td.rename(
columns={"csets": "primarycg", "ctslvl": "tslvl", "unit": "tact"}, inplace=True
)
undeclared_td["sets"] = "IRE"
undeclared_td.drop_duplicates(keep="last", inplace=True)

# TODO: Handle possible duplicates
for i in ["primarycg", "tslvl", "tact"]:
duplicates = undeclared_td.loc[:, ["region", "process", i]].duplicated(
keep=False
)
if any(duplicates):
duplicates = undeclared_td.loc[duplicates, ["region", "process", i]]
processes = duplicates["process"].unique()
regions = duplicates["region"].unique()
print(f"WARNING: Multiple possible {i} for {processes} in {regions}")

model.processes = pd.concat([model.processes, undeclared_td], ignore_index=True)

return tables


def apply_more_fixups(
config: datatypes.Config,
tables: Dict[str, DataFrame],
Expand Down

0 comments on commit 8d066d0

Please sign in to comment.