Improve handling of VEDA variants of FLO_SHAR (#245)

etsap-TIMES · Nov 15, 2024 · cfdd05f · cfdd05f
1 parent a57b045
commit cfdd05f
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 17 deletions.
diff --git a/xl2times/__main__.py b/xl2times/__main__.py
@@ -136,8 +136,9 @@ def convert_xl_to_times(
         transforms.expand_rows_parallel,  # slow
         transforms.remove_invalid_values,
         transforms.capitalise_some_values,
-        transforms.apply_fixups,
+        transforms.internalise_commodities,
         transforms.generate_commodity_groups,
+        transforms.apply_fixups,
         transforms.fill_in_missing_pcgs,
         transforms.generate_trade,
         transforms.include_tables_source,

diff --git a/xl2times/config/veda-attr-defaults.json b/xl2times/config/veda-attr-defaults.json
@@ -897,6 +897,7 @@
         "commodity-in",
         "commodity-out"
       ],
+      "other_indexes": ["veda_cg"],
       "limtype": "FX",
       "ts-level": "ANNUAL"
     },
@@ -905,8 +906,10 @@
   "SHARE-I": {
     "defaults": {
       "commodity": [
-        "commodity-in"
+        "commodity-in",
+        "commodity-out"
       ],
+      "other_indexes": ["veda_cg"],
       "limtype": "FX",
       "ts-level": "ANNUAL"
     },
@@ -915,8 +918,10 @@
   "SHARE-O": {
     "defaults": {
       "commodity": [
-        "commodity-out"
+        "commodity-out",
+        "commodity-in"
       ],
+      "other_indexes": ["veda_cg"],
       "limtype": "FX",
       "ts-level": "ANNUAL"
     },

diff --git a/xl2times/transforms.py b/xl2times/transforms.py
@@ -417,8 +417,6 @@ def merge_tables(
             logger.warning(err)
 
         match key:
-            case Tag.fi_comm:
-                model.commodities = df
             case Tag.fi_process:
                 # TODO: Find a better place for this (both info and processing)
                 times_prc_sets = set(config.times_sets["PRC_GRP"])
@@ -1213,6 +1211,18 @@ def apply_fixups(
     tables: list[EmbeddedXlTable],
     model: TimesModel,
 ) -> list[EmbeddedXlTable]:
+
+    # Generate Veda CG info
+    cols = ["region", "process", "commodity", "csets"]
+    # Exclude auxillary flows
+    index = model.topology["io"].isin({"IN", "OUT"})
+    veda_cgs = model.topology[cols + ["io"]][index].copy()
+    veda_cgs.drop_duplicates(subset=cols, keep="last", inplace=True)
+    veda_cgs["veda_cg"] = veda_cgs["csets"] + veda_cgs["io"].str[:1]
+    veda_cgs = veda_cgs.set_index(["region", "process", "commodity"])[
+        "veda_cg"
+    ].to_dict()
+
     def apply_fixups_table(table: EmbeddedXlTable):
         tag = Tag.fi_t
         if not table.tag == tag:
@@ -1252,12 +1262,13 @@ def _populate_defaults(dataframe: DataFrame, col_name: str):
         for col in ("commodity", "other_indexes"):
             _populate_defaults(df, col)
 
-        # Fill other indexes for some attributes
-        # FLO_SHAR
-        i = df["attribute"] == "SHARE-I"
-        df.loc[i, "other_indexes"] = "NRGI"
-        i = df["attribute"] == "SHARE-O"
-        df.loc[i, "other_indexes"] = "NRGO"
+        # Determine values of and fill in some indexes
+        if any(df["other_indexes"] == "veda_cg"):
+            i = df["other_indexes"] == "veda_cg"
+            df.loc[i, "other_indexes"] = df[i].apply(
+                lambda x: veda_cgs.get((x["region"], x["process"], x["commodity"])),
+                axis=1,
+            )
 
         return replace(table, dataframe=df)
 
@@ -1271,7 +1282,6 @@ def generate_commodity_groups(
 ) -> list[EmbeddedXlTable]:
     """Generate commodity groups."""
     process_tables = [t for t in tables if t.tag == Tag.fi_process]
-    commodity_tables = [t for t in tables if t.tag == Tag.fi_comm]
 
     # Veda determines default PCG based on predetermined order and presence of OUT/IN commodity
     columns = ["region", "process", "primarycg"]
@@ -1288,10 +1298,7 @@ def generate_commodity_groups(
 
     # Extract commodities and their sets by region
     columns = ["region", "csets", "commodity"]
-    comm_set = pd.DataFrame(columns=columns)
-    for commodity_table in commodity_tables:
-        df = commodity_table.dataframe[columns]
-        comm_set = pd.concat([comm_set, df])
+    comm_set = model.commodities[columns].copy()
     comm_set.drop_duplicates(keep="first", inplace=True)
 
     prc_top = utils.single_table(tables, "ProcessTopology").dataframe
@@ -1360,7 +1367,7 @@ def _count_comm_group_vectorised(comm_groups: pd.DataFrame) -> None:
     comm_groups["commoditygroup"] = (
         comm_groups.groupby(["region", "process", "csets", "io"]).transform("count")
     )["commoditygroup"]
-    # set comoditygroup to 0 for io rows that aren't IN or OUT
+    # set commodity group to 0 for io rows that aren't IN or OUT
     comm_groups.loc[~comm_groups["io"].isin(["IN", "OUT"]), "commoditygroup"] = 0
 
 
@@ -1651,6 +1658,25 @@ def process_commodities(
     return result
 
 
+def internalise_commodities(
+    config: Config,
+    tables: list[EmbeddedXlTable],
+    model: TimesModel,
+) -> list[EmbeddedXlTable]:
+    """Populate model.commodities."""
+    result = []
+    comm_dfs = []
+    for table in tables:
+        if table.tag != Tag.fi_comm:
+            result.append(table)
+        else:
+            comm_dfs.append(table.dataframe)
+
+    model.commodities = pd.concat(comm_dfs, ignore_index=True)
+
+    return result
+
+
 def process_processes(
     config: Config,
     tables: list[EmbeddedXlTable],