From 8d066d011322368fdbbee6f481b1274fefc33e41 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Fri, 9 Feb 2024 17:00:51 -0500 Subject: [PATCH] Create trade links (#167) --- xl2times/__main__.py | 4 +- xl2times/config/veda-tags.json | 66 +++++++++++ xl2times/transforms.py | 194 ++++++++++++++++++++++++++++++--- 3 files changed, 250 insertions(+), 14 deletions(-) diff --git a/xl2times/__main__.py b/xl2times/__main__.py index f9545256..ceb2d67e 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -71,6 +71,7 @@ def convert_xl_to_times( transforms.process_time_slices, transforms.process_transform_insert_variants, transforms.process_transform_tables, + transforms.process_tradelinks, transforms.process_processes, transforms.process_topology, transforms.process_flexible_import_tables, # slow @@ -86,9 +87,10 @@ def convert_xl_to_times( transforms.apply_fixups, transforms.generate_commodity_groups, transforms.fill_in_missing_pcgs, - transforms.generate_top_ire, + transforms.generate_trade, transforms.include_tables_source, transforms.merge_tables, + transforms.complete_processes, transforms.apply_more_fixups, transforms.process_units, transforms.process_years, diff --git a/xl2times/config/veda-tags.json b/xl2times/config/veda-tags.json index d94ab8b4..f57d0025 100644 --- a/xl2times/config/veda-tags.json +++ b/xl2times/config/veda-tags.json @@ -2845,6 +2845,72 @@ "SysSettings" ] }, + { + "tag_name": "tradelinks_dins", + "tag_allowed_in": [ + "TradeScen" + ], + "valid_fields": [ + { + "name": "reg1", + "aliases": [], + "use_name": "reg1", + "row_ignore_symbol": [ + "\\I:", + "*" + ] + }, + { + "name": "reg2", + "aliases": [], + "use_name": "reg2", + "row_ignore_symbol": [ + "\\I:", + "*" + ] + }, + { + "name": "comm", + "aliases": [], + "use_name": "comm", + "row_ignore_symbol": [ + "\\I:" + ] + }, + { + "name": "comm1", + "aliases": [], + "use_name": "comm1", + "row_ignore_symbol": [ + "\\I:" + ] + }, + { + "name": "comm2", + "aliases": [], + "use_name": "comm2", + "row_ignore_symbol": [ + "\\I:" + ] + }, + { + "name": "tech", + "aliases": [], + "use_name": "process", + "row_ignore_symbol": [ + "\\I:" + ] + }, + { + "name": "tradelink", + "aliases": [], + "use_name": "tradelink", + "row_ignore_symbol": [ + "\\I:" + ] + } + ] + }, { "tag_name": "uc_t", "tag_allowed_in": [ diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 3a269d23..521a9f7f 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -716,6 +716,7 @@ def remove_invalid_values( """ # TODO: This should be table type specific # TODO pull this out + # TODO: This should take into account whether a specific dimension is required # Rules for allowing entries. Each entry of the dictionary designates a rule for a # a given column, and the values that are allowed for that column. constraints = { @@ -1081,7 +1082,7 @@ def complete_commodity_groups( return tables -def generate_top_ire( +def generate_trade( config: datatypes.Config, tables: List[datatypes.EmbeddedXlTable], model: datatypes.TimesModel, @@ -1134,7 +1135,42 @@ def generate_top_ire( top_ire.drop(columns=["region", "region2", "sets", "io"], inplace=True) top_ire.drop_duplicates(keep="first", inplace=True, ignore_index=True) - model.trade = top_ire + cols_list = ["origin", "in", "destination", "out", "process"] + # Include trade between internal regions + for table in tables: + if table.tag == datatypes.Tag.tradelinks_dins: + df = table.dataframe + f_links = df.rename( + columns={ + "reg1": "origin", + "comm1": "in", + "reg2": "destination", + "comm2": "out", + } + ).copy() + top_ire = pd.concat([top_ire, f_links[cols_list]]) + # Check if any of the links are bi-directional + if "b" in df["tradelink"].str.lower().unique(): + b_links = ( + df[df["tradelink"].str.lower() == "b"] + .rename( + columns={ + "reg1": "destination", + "comm1": "out", + "reg2": "origin", + "comm2": "in", + } + ) + .copy() + ) + top_ire = pd.concat([top_ire, b_links[cols_list]]) + + filter_regions = model.internal_regions.union({"IMPEXP", "MINRNW"}) + i = top_ire["origin"].isin(filter_regions) & top_ire["destination"].isin( + filter_regions + ) + + model.trade = top_ire[i].reset_index() return tables @@ -1475,6 +1511,84 @@ def generate_dummy_processes( return tables +def process_tradelinks( + config: datatypes.Config, + tables: List[datatypes.EmbeddedXlTable], + model: datatypes.TimesModel, +) -> List[datatypes.EmbeddedXlTable]: + """ + Transform tradelinks to tradelinks_dins + """ + + result = [] + for table in tables: + if table.tag == datatypes.Tag.tradelinks: + df = table.dataframe + sheetname = table.sheetname.lower() + comm = df.columns[0] + destinations = [c for c in df.columns if c != comm] + df.rename(columns={comm: "origin"}, inplace=True) + df = pd.melt( + df, id_vars=["origin"], value_vars=destinations, var_name="destination" + ) + df = df[df["value"] == 1].drop(columns=["value"]) + df["destination"] = df["destination"].str.upper() + df.drop_duplicates(keep="first", inplace=True) + + if sheetname == "uni": + df["tradelink"] = "u" + elif sheetname == "bi": + df["tradelink"] = "b" + else: + df["tradelink"] = 1 + # Determine whether a trade link is bi- or unidirectional + td_type = ( + df.groupby(["regions"])["tradelink"].agg("count").reset_index() + ) + td_type.replace({"tradelink": {1: "u", 2: "b"}}, inplace=True) + df.drop(columns=["tradelink"], inplace=True) + df = df.merge(td_type, how="inner", on="regions") + + # Add a column containing linked regions (directionless for bidirectional links) + df["regions"] = df.apply( + lambda row: tuple(sorted([row["origin"], row["destination"]])) + if row["tradelink"] == "b" + else tuple([row["origin"], row["destination"]]), + axis=1, + ) + + # Drop tradelink (bidirectional) duplicates + df.drop_duplicates( + subset=["regions", "tradelink"], keep="last", inplace=True + ) + df.drop(columns=["regions"], inplace=True) + df["comm"] = comm.upper() + df["comm1"] = df["comm"] + df["comm2"] = df["comm"] + df.rename(columns={"origin": "reg1", "destination": "reg2"}, inplace=True) + # Use Veda approach to naming of trade processes + df["process"] = df.apply( + lambda row: "T" + + "_".join( + [ + row["tradelink"].upper(), + row["comm"], + row["reg1"], + row["reg2"], + "01", + ] + ), + axis=1, + ) + result.append( + replace(table, dataframe=df, tag=datatypes.Tag.tradelinks_dins) + ) + else: + result.append(table) + + return result + + def process_transform_insert_variants( config: datatypes.Config, tables: List[datatypes.EmbeddedXlTable], @@ -1503,20 +1617,16 @@ def is_year(col_name): # ~TFM_INS-TS: Gather columns whose names are years into a single "Year" column: df = table.dataframe if "year" in df.columns: - raise ValueError(f"TFM_INS-AT table already has Year column: {table}") - # TODO can we remove this hacky shortcut? - if ( - table.tag == datatypes.Tag.tfm_ins_ts - and set(df.columns) & query_columns == {"cset_cn"} - and has_no_wildcards(df["cset_cn"]) + raise ValueError(f"TFM_INS-TS table already has Year column: {table}") + # TODO: can we remove this hacky shortcut? Or should it be also applied to the AT variant? + if set(df.columns) & query_columns == {"cset_cn"} and has_no_wildcards( + df["cset_cn"] ): df.rename(columns={"cset_cn": "commodity"}, inplace=True) result.append(replace(table, dataframe=df, tag=datatypes.Tag.fi_t)) continue - elif ( - table.tag == datatypes.Tag.tfm_ins_ts - and set(df.columns) & query_columns == {"pset_pn"} - and has_no_wildcards(df["pset_pn"]) + elif set(df.columns) & query_columns == {"pset_pn"} and has_no_wildcards( + df["pset_pn"] ): df.rename(columns={"pset_pn": "process"}, inplace=True) result.append(replace(table, dataframe=df, tag=datatypes.Tag.fi_t)) @@ -1536,7 +1646,7 @@ def is_year(col_name): df["year"] = df["year"].astype("int") result.append(replace(table, dataframe=df, tag=datatypes.Tag.tfm_ins)) elif table.tag == datatypes.Tag.tfm_ins_at: - # ~TFM_INS-AT: Gather columns with attribute names into a single "Attribue" column + # ~TFM_INS-AT: Gather columns with attribute names into a single "Attribute" column df = table.dataframe if "attribute" in df.columns: raise ValueError( @@ -2190,6 +2300,64 @@ def fix_topology( return tables +def complete_processes( + config: datatypes.Config, + tables: Dict[str, DataFrame], + model: datatypes.TimesModel, +) -> Dict[str, DataFrame]: + # Generate processes based on trade links + + trade_processes = pd.concat( + [ + model.trade.loc[:, ["origin", "process", "in"]].rename( + columns={"origin": "region", "in": "commodity"} + ), + model.trade.loc[:, ["destination", "process", "out"]].rename( + columns={"destination": "region", "out": "commodity"} + ), + ], + ignore_index=True, + sort=False, + ) + + undeclared_td = trade_processes.merge( + model.processes.loc[:, ["region", "process"]], how="left", indicator=True + ) + undeclared_td = undeclared_td.loc[ + ( + undeclared_td["region"].isin(model.internal_regions) + & (undeclared_td["_merge"] == "left_only") + ), + ["region", "process", "commodity"], + ] + + undeclared_td = undeclared_td.merge( + model.commodities.loc[:, ["region", "commodity", "csets", "ctslvl", "unit"]], + how="left", + ) + undeclared_td.drop(columns=["commodity"], inplace=True) + undeclared_td.rename( + columns={"csets": "primarycg", "ctslvl": "tslvl", "unit": "tact"}, inplace=True + ) + undeclared_td["sets"] = "IRE" + undeclared_td.drop_duplicates(keep="last", inplace=True) + + # TODO: Handle possible duplicates + for i in ["primarycg", "tslvl", "tact"]: + duplicates = undeclared_td.loc[:, ["region", "process", i]].duplicated( + keep=False + ) + if any(duplicates): + duplicates = undeclared_td.loc[duplicates, ["region", "process", i]] + processes = duplicates["process"].unique() + regions = duplicates["region"].unique() + print(f"WARNING: Multiple possible {i} for {processes} in {regions}") + + model.processes = pd.concat([model.processes, undeclared_td], ignore_index=True) + + return tables + + def apply_more_fixups( config: datatypes.Config, tables: Dict[str, DataFrame],