From 7a6014fd803b6b7d768cd466bd56e56ff44026ef Mon Sep 17 00:00:00 2001
From: Olexandr Balyk <ob@facilitate.energy>
Date: Sun, 17 Mar 2024 21:38:33 -0500
Subject: [PATCH 1/5] Allow FI_T only specifying topology

---
 xl2times/transforms.py | 51 ++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/xl2times/transforms.py b/xl2times/transforms.py
index 2ab7df1..d48ae14 100644
--- a/xl2times/transforms.py
+++ b/xl2times/transforms.py
@@ -511,11 +511,8 @@ def process_flexible_import_table(
             return table
 
         # Rename, add and remove specific columns if the circumstances are right
-        # TODO: We should do a full scale normalisation here, incl. renaming of aliases
         df = table.dataframe
 
-        nrows = df.shape[0]
-
         # Tag column no longer used to identify data columns
         # https://veda-documentation.readthedocs.io/en/latest/pages/introduction.html#veda2-0-enhanced-features
         # TODO: Include other valid column headers
@@ -541,51 +538,53 @@ def process_flexible_import_table(
         ]
         for colname in index_columns:
             if colname not in df.columns:
-                df[colname] = [None] * nrows
+                df[colname] = None
         table = replace(table, dataframe=df)
 
         df = table.dataframe
 
-        attribute = "attribute"
-        df, attribute_suffix = utils.explode(df, data_columns)
-
-        # Append the data column name to the Attribute column values
-        if nrows > 0:
-            i = df[attribute].notna()
-            df.loc[i, attribute] = df.loc[i, attribute] + "~" + attribute_suffix[i]
-            i = df[attribute].isna()
-            df.loc[i, attribute] = attribute_suffix[i]
+        if data_columns:
+            df, attribute_suffix = utils.explode(df, data_columns)
+            # Append the data column name to the Attribute column values
+            i = df["attribute"].notna()
+            df.loc[i, "attribute"] = df.loc[i, "attribute"] + "~" + attribute_suffix[i]
+            i = df["attribute"].isna()
+            df.loc[i, "attribute"] = attribute_suffix[i]
 
         # Capitalise all attributes, unless column type float
-        if df[attribute].dtype != float:
-            df[attribute] = df[attribute].str.upper()
+        if df["attribute"].dtype != float:
+            df["attribute"] = df["attribute"].str.upper()
 
         # Handle Attribute containing tilde, such as 'STOCK~2030'
-        for attr in df[attribute].unique():
-            if "~" in attr:
-                i = df[attribute] == attr
+        index = df["attribute"].str.contains("~")
+        if any(index):
+            for attr in df["attribute"][index].unique():
+                i = index & (df["attribute"] == attr)
                 parts = attr.split("~")
                 for value in parts:
                     colname, typed_value = get_colname(value)
                     if colname is None:
-                        df.loc[i, attribute] = typed_value
+                        df.loc[i, "attribute"] = typed_value
                     else:
                         df.loc[i, colname] = typed_value
 
         # Handle Other_Indexes
         other = "other_indexes"
-        for attr in df[attribute].unique():
+        for attr in df["attribute"].unique():
             if attr == "END":
-                i = df[attribute] == attr
+                i = df["attribute"] == attr
                 df.loc[i, "year"] = df.loc[i, "value"].astype("int") + 1
                 df.loc[i, other] = "EOH"
-                df.loc[i, attribute] = "PRC_NOFF"
+                df.loc[i, "attribute"] = "PRC_NOFF"
 
         df = df.reset_index(drop=True)
 
         # Should have all index_columns and VALUE
         if len(df.columns) != (len(index_columns) + 1):
-            raise ValueError(f"len(df.columns) = {len(df.columns)}")
+            if len(df.columns) == len(index_columns) and "value" not in df.columns:
+                df["value"] = None
+            else:
+                raise ValueError(f"len(df.columns) = {len(df.columns)}")
 
         df["year2"] = df.apply(
             lambda row: (
@@ -1973,10 +1972,8 @@ def process_transform_tables(
             )
 
             # Handle Regions:
-            # Check whether allregions or any of model regions are among columns
-            if set(df.columns).isdisjoint(
-                {x.lower() for x in regions} | {"allregions"}
-            ):
+            # Check whether any of model regions are among columns
+            if set(df.columns).isdisjoint({x.lower() for x in regions}):
                 if "region" not in df.columns:
                     # If there's no region information at all, this table is for all regions:
                     df["region"] = ["allregions"] * len(df)

From b121d1d70b2e9bb801bd31c0e9b76cf8c2d2180e Mon Sep 17 00:00:00 2001
From: Olexandr Balyk <ob@facilitate.energy>
Date: Tue, 19 Mar 2024 13:43:26 -0500
Subject: [PATCH 2/5] Check for presence of value column in update tables
 before updating

---
 xl2times/transforms.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/xl2times/transforms.py b/xl2times/transforms.py
index 3297dbb..bd62481 100644
--- a/xl2times/transforms.py
+++ b/xl2times/transforms.py
@@ -571,6 +571,7 @@ def process_flexible_import_table(
 
         # Should have all index_columns and VALUE
         if len(df.columns) != (len(index_columns) + 1):
+            # TODO: Should be ok to drop as long as the topology info is stored.
             if len(df.columns) == len(index_columns) and "value" not in df.columns:
                 df["value"] = None
             else:
@@ -2439,6 +2440,11 @@ def apply_transform_tables(
             total=len(updates),
             desc=f"Applying transformations from {Tag.tfm_upd.value}",
         ):
+            # No need to proceed with this row if there is no value to update with.
+            # TODO: remove rows with NAs in value column earlier?
+            if "value" not in row.index:
+                continue
+
             rows_to_update = query(
                 table,
                 row["process"],

From a55d4644c0b2690b593c8a6521ddc983c81ee97b Mon Sep 17 00:00:00 2001
From: Olexandr Balyk <ob@facilitate.energy>
Date: Tue, 19 Mar 2024 15:39:18 -0500
Subject: [PATCH 3/5] Use "value" instead of "allregions" as a default column
 name

---
 xl2times/__main__.py           |  2 +-
 xl2times/config/veda-tags.json | 20 +++++++-------
 xl2times/transforms.py         | 48 ++++++++++------------------------
 3 files changed, 25 insertions(+), 45 deletions(-)

diff --git a/xl2times/__main__.py b/xl2times/__main__.py
index be5e910..b8e5976 100644
--- a/xl2times/__main__.py
+++ b/xl2times/__main__.py
@@ -93,6 +93,7 @@ def convert_xl_to_times(
         transforms.remove_comment_rows,
         transforms.revalidate_input_tables,
         transforms.process_regions,
+        transforms.process_commodities,
         transforms.process_time_periods,
         transforms.remove_exreg_cols,
         transforms.generate_dummy_processes,
@@ -106,7 +107,6 @@ def convert_xl_to_times(
         transforms.process_flexible_import_tables,  # slow
         transforms.process_user_constraint_tables,
         transforms.process_commodity_emissions,
-        transforms.process_commodities,
         transforms.process_transform_availability,
         transforms.fill_in_missing_values,
         transforms.generate_uc_properties,
diff --git a/xl2times/config/veda-tags.json b/xl2times/config/veda-tags.json
index 83e0474..08e7226 100644
--- a/xl2times/config/veda-tags.json
+++ b/xl2times/config/veda-tags.json
@@ -799,7 +799,7 @@
           "val_field",
           "allregions"
         ],
-        "use_name": "allregions",
+        "use_name": "value",
         "row_ignore_symbol": [
           "\\I:"
         ],
@@ -1181,7 +1181,7 @@
           "val_field",
           "allregions"
         ],
-        "use_name": "allregions",
+        "use_name": "value",
         "row_ignore_symbol": [
           "\\I:"
         ],
@@ -1693,7 +1693,7 @@
           "val_field",
           "allregions"
         ],
-        "use_name": "allregions",
+        "use_name": "value",
         "row_ignore_symbol": [
           "\\I:"
         ],
@@ -2179,7 +2179,7 @@
           "val_field",
           "allregions"
         ],
-        "use_name": "allregions",
+        "use_name": "value",
         "row_ignore_symbol": [
           "\\I:"
         ],
@@ -2406,7 +2406,7 @@
           "val_field",
           "allregions"
         ],
-        "use_name": "allregions",
+        "use_name": "value",
         "row_ignore_symbol": [
           "\\I:"
         ],
@@ -2998,7 +2998,7 @@
           "val_field",
           "allregions"
         ],
-        "use_name": "allregions",
+        "use_name": "value",
         "row_ignore_symbol": [
           "\\I:"
         ],
@@ -3272,7 +3272,7 @@
           "allregions",
           "io"
         ],
-        "use_name": "allregions",
+        "use_name": "value",
         "row_ignore_symbol": [
           "\\I:"
         ],
@@ -3519,7 +3519,7 @@
           "allregions",
           "io"
         ],
-        "use_name": "allregions",
+        "use_name": "value",
         "row_ignore_symbol": [
           "\\I:"
         ],
@@ -3990,7 +3990,7 @@
           "val_field",
           "allregions"
         ],
-        "use_name": "allregions",
+        "use_name": "value",
         "row_ignore_symbol": [
           "\\I:"
         ],
@@ -4624,7 +4624,7 @@
           "val_field",
           "allregions"
         ],
-        "use_name": "allregions",
+        "use_name": "value",
         "row_ignore_symbol": [
           "\\I:"
         ],
diff --git a/xl2times/transforms.py b/xl2times/transforms.py
index bd62481..b5d5981 100644
--- a/xl2times/transforms.py
+++ b/xl2times/transforms.py
@@ -483,7 +483,6 @@ def process_flexible_import_tables(
     # TODO: update this dictionary
     legal_values = {
         "limtype": set(config.times_sets["LIM"]),
-        # TODO: check what the values for the below should be
         "timeslice": set(model.ts_tslvl["tslvl"]),
         "commodity": set(utils.merge_columns(tables, Tag.fi_comm, "commodity")),
         "region": model.internal_regions,
@@ -491,8 +490,6 @@ def process_flexible_import_tables(
         "other_indexes": {"IN", "OUT", "DEMO", "DEMI"},
     }
 
-    # TODO decide whether VedaProcessSets should become a new Enum type or part of TimesModelData type
-
     def process_flexible_import_table(
         table: EmbeddedXlTable,
     ) -> EmbeddedXlTable:
@@ -505,27 +502,13 @@ def process_flexible_import_table(
 
         # Tag column no longer used to identify data columns
         # https://veda-documentation.readthedocs.io/en/latest/pages/introduction.html#veda2-0-enhanced-features
-        # TODO: Include other valid column headers
+
         known_columns = config.known_columns[Tag.fi_t]
+        # TODO: Verify this list against other lists
         data_columns = [x for x in df.columns if x not in known_columns]
 
-        # TODO: Replace this with something similar to know columns from config
-        # Populate index columns
-        index_columns = [
-            "region",
-            "process",
-            "commodity",
-            "commodity-in",
-            "commodity-in-aux",
-            "commodity-out",
-            "commodity-out-aux",
-            "attribute",
-            "year",
-            "timeslice",
-            "limtype",
-            "currency",
-            "other_indexes",
-        ]
+        # Populate index columns (same as known columns for this table type)
+        index_columns = known_columns
         for colname in index_columns:
             if colname not in df.columns:
                 df[colname] = None
@@ -1988,6 +1971,7 @@ def process_transform_tables(
 ) -> list[EmbeddedXlTable]:
     """Process transform tables."""
     regions = model.internal_regions
+    # TODO: Add other tfm tags?
     tfm_tags = [
         Tag.tfm_dins,
         Tag.tfm_ins,
@@ -2001,23 +1985,16 @@ def process_transform_tables(
     result = []
     dropped = []
     for table in tables:
-        if not any(table.tag == tag for tag in tfm_tags):
+        tag = Tag(table.tag)
+
+        if tag not in tfm_tags:
             result.append(table)
 
-        elif table.tag in [
-            Tag.tfm_dins,
-            Tag.tfm_ins,
-            Tag.tfm_ins_txt,
-            Tag.tfm_upd,
-            Tag.tfm_mig,
-            Tag.tfm_comgrp,
-        ]:
+        elif tag in tfm_tags and tag != Tag.tfm_topins:
             df = table.dataframe.copy()
 
             # Standardize column names
-            known_columns = (
-                config.known_columns[table.tag] | config.query_columns[table.tag]
-            )
+            known_columns = config.known_columns[tag]
 
             # Handle Regions:
             # Check whether any of model regions are among columns
@@ -2025,13 +2002,16 @@ def process_transform_tables(
                 if "region" not in df.columns:
                     # If there's no region information at all, this table is for all regions:
                     df["region"] = ["allregions"] * len(df)
-                # Else, we only have a "region" column so handle it below
+                    # Else, we only have a "region" column so handle it below
             else:
                 if "region" in df.columns:
                     raise ValueError(
                         "ERROR: table has a column called region as well as columns with"
                         f" region names:\n{table}\n{df.columns}"
                     )
+                # In the absence of the "region" column values in the "value" column apply to all regions
+                if "value" in df.columns:
+                    df = df.rename(columns={"value": "allregions"})
                 # We have columns whose names are regions, so gather them into a "region" column:
                 region_cols = [
                     col_name

From 516637f0bdd75a9b7cbdf89758625dcc31bf24c7 Mon Sep 17 00:00:00 2001
From: Olexandr Balyk <ob@facilitate.energy>
Date: Tue, 19 Mar 2024 16:07:37 -0500
Subject: [PATCH 4/5] Remove unnecessary changes

---
 xl2times/transforms.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/xl2times/transforms.py b/xl2times/transforms.py
index b5d5981..c4caf78 100644
--- a/xl2times/transforms.py
+++ b/xl2times/transforms.py
@@ -2002,7 +2002,7 @@ def process_transform_tables(
                 if "region" not in df.columns:
                     # If there's no region information at all, this table is for all regions:
                     df["region"] = ["allregions"] * len(df)
-                    # Else, we only have a "region" column so handle it below
+                # Else, we only have a "region" column so handle it below
             else:
                 if "region" in df.columns:
                     raise ValueError(
@@ -2420,11 +2420,6 @@ def apply_transform_tables(
             total=len(updates),
             desc=f"Applying transformations from {Tag.tfm_upd.value}",
         ):
-            # No need to proceed with this row if there is no value to update with.
-            # TODO: remove rows with NAs in value column earlier?
-            if "value" not in row.index:
-                continue
-
             rows_to_update = query(
                 table,
                 row["process"],

From a1fcba3f0ec2153e26ba39689bc6f23c2cad774e Mon Sep 17 00:00:00 2001
From: Olexandr Balyk <ob@facilitate.energy>
Date: Wed, 20 Mar 2024 00:46:57 -0500
Subject: [PATCH 5/5] Address review comments

---
 xl2times/transforms.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/xl2times/transforms.py b/xl2times/transforms.py
index c4caf78..b2deb26 100644
--- a/xl2times/transforms.py
+++ b/xl2times/transforms.py
@@ -543,12 +543,11 @@ def process_flexible_import_table(
 
         # Handle Other_Indexes
         other = "other_indexes"
-        for attr in df["attribute"].unique():
-            if attr == "END":
-                i = df["attribute"] == attr
-                df.loc[i, "year"] = df.loc[i, "value"].astype("int") + 1
-                df.loc[i, other] = "EOH"
-                df.loc[i, "attribute"] = "PRC_NOFF"
+        if "END" in df["attribute"]:
+            i = df["attribute"] == "END"
+            df.loc[i, "year"] = df.loc[i, "value"].astype("int") + 1
+            df.loc[i, other] = "EOH"
+            df.loc[i, "attribute"] = "PRC_NOFF"
 
         df = df.reset_index(drop=True)