diff --git a/xl2times/__main__.py b/xl2times/__main__.py index da3f3d5..df38ff3 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -136,8 +136,9 @@ def convert_xl_to_times( transforms.expand_rows_parallel, # slow transforms.remove_invalid_values, transforms.capitalise_some_values, - transforms.apply_fixups, + transforms.internalise_commodities, transforms.generate_commodity_groups, + transforms.apply_fixups, transforms.fill_in_missing_pcgs, transforms.generate_trade, transforms.include_tables_source, diff --git a/xl2times/config/veda-attr-defaults.json b/xl2times/config/veda-attr-defaults.json index d318e23..1bceee3 100644 --- a/xl2times/config/veda-attr-defaults.json +++ b/xl2times/config/veda-attr-defaults.json @@ -897,6 +897,7 @@ "commodity-in", "commodity-out" ], + "other_indexes": ["veda_cg"], "limtype": "FX", "ts-level": "ANNUAL" }, @@ -905,8 +906,10 @@ "SHARE-I": { "defaults": { "commodity": [ - "commodity-in" + "commodity-in", + "commodity-out" ], + "other_indexes": ["veda_cg"], "limtype": "FX", "ts-level": "ANNUAL" }, @@ -915,8 +918,10 @@ "SHARE-O": { "defaults": { "commodity": [ - "commodity-out" + "commodity-out", + "commodity-in" ], + "other_indexes": ["veda_cg"], "limtype": "FX", "ts-level": "ANNUAL" }, diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 2a42788..0d5f324 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -417,8 +417,6 @@ def merge_tables( logger.warning(err) match key: - case Tag.fi_comm: - model.commodities = df case Tag.fi_process: # TODO: Find a better place for this (both info and processing) times_prc_sets = set(config.times_sets["PRC_GRP"]) @@ -1213,6 +1211,18 @@ def apply_fixups( tables: list[EmbeddedXlTable], model: TimesModel, ) -> list[EmbeddedXlTable]: + + # Generate Veda CG info + cols = ["region", "process", "commodity", "csets"] + # Exclude auxillary flows + index = model.topology["io"].isin({"IN", "OUT"}) + veda_cgs = model.topology[cols + ["io"]][index].copy() + veda_cgs.drop_duplicates(subset=cols, keep="last", inplace=True) + veda_cgs["veda_cg"] = veda_cgs["csets"] + veda_cgs["io"].str[:1] + veda_cgs = veda_cgs.set_index(["region", "process", "commodity"])[ + "veda_cg" + ].to_dict() + def apply_fixups_table(table: EmbeddedXlTable): tag = Tag.fi_t if not table.tag == tag: @@ -1252,12 +1262,13 @@ def _populate_defaults(dataframe: DataFrame, col_name: str): for col in ("commodity", "other_indexes"): _populate_defaults(df, col) - # Fill other indexes for some attributes - # FLO_SHAR - i = df["attribute"] == "SHARE-I" - df.loc[i, "other_indexes"] = "NRGI" - i = df["attribute"] == "SHARE-O" - df.loc[i, "other_indexes"] = "NRGO" + # Determine values of and fill in some indexes + if any(df["other_indexes"] == "veda_cg"): + i = df["other_indexes"] == "veda_cg" + df.loc[i, "other_indexes"] = df[i].apply( + lambda x: veda_cgs.get((x["region"], x["process"], x["commodity"])), + axis=1, + ) return replace(table, dataframe=df) @@ -1271,7 +1282,6 @@ def generate_commodity_groups( ) -> list[EmbeddedXlTable]: """Generate commodity groups.""" process_tables = [t for t in tables if t.tag == Tag.fi_process] - commodity_tables = [t for t in tables if t.tag == Tag.fi_comm] # Veda determines default PCG based on predetermined order and presence of OUT/IN commodity columns = ["region", "process", "primarycg"] @@ -1288,10 +1298,7 @@ def generate_commodity_groups( # Extract commodities and their sets by region columns = ["region", "csets", "commodity"] - comm_set = pd.DataFrame(columns=columns) - for commodity_table in commodity_tables: - df = commodity_table.dataframe[columns] - comm_set = pd.concat([comm_set, df]) + comm_set = model.commodities[columns].copy() comm_set.drop_duplicates(keep="first", inplace=True) prc_top = utils.single_table(tables, "ProcessTopology").dataframe @@ -1360,7 +1367,7 @@ def _count_comm_group_vectorised(comm_groups: pd.DataFrame) -> None: comm_groups["commoditygroup"] = ( comm_groups.groupby(["region", "process", "csets", "io"]).transform("count") )["commoditygroup"] - # set comoditygroup to 0 for io rows that aren't IN or OUT + # set commodity group to 0 for io rows that aren't IN or OUT comm_groups.loc[~comm_groups["io"].isin(["IN", "OUT"]), "commoditygroup"] = 0 @@ -1651,6 +1658,25 @@ def process_commodities( return result +def internalise_commodities( + config: Config, + tables: list[EmbeddedXlTable], + model: TimesModel, +) -> list[EmbeddedXlTable]: + """Populate model.commodities.""" + result = [] + comm_dfs = [] + for table in tables: + if table.tag != Tag.fi_comm: + result.append(table) + else: + comm_dfs.append(table.dataframe) + + model.commodities = pd.concat(comm_dfs, ignore_index=True) + + return result + + def process_processes( config: Config, tables: list[EmbeddedXlTable],