etsap-TIMES · siddharth-krishna · Jan 31, 2024 · Jan 31, 2024 · Jan 31, 2024 · Jan 31, 2024
diff --git a/xl2times/__main__.py b/xl2times/__main__.py
@@ -385,7 +385,7 @@ def main():
     args_parser.add_argument(
         "input",
         nargs="*",
-        help="Either an input directory, or a list of input xlsx files to process",
+        help="Either an input directory, or a list of input xlsx/xlsm files to process",
     )
     args_parser.add_argument(
         "--regions",
@@ -405,7 +405,7 @@ def main():
     args_parser.add_argument(
         "--only_read",
         action="store_true",
-        help="Read xlsx files and stop after outputting raw_tables.txt",
+        help="Read xlsx/xlsm files and stop after outputting raw_tables.txt",
     )
     args_parser.add_argument("--use_pkl", action="store_true")
     args_parser.add_argument(
@@ -433,8 +433,8 @@ def main():
         assert os.path.isdir(args.input[0])
         input_files = [
             str(path)
-            for path in Path(args.input[0]).rglob("*.xlsx")
-            if not path.name.startswith("~")
+            for path in Path(args.input[0]).rglob("*")
+            if path.suffix in [".xlsx", ".xlsm"] and not path.name.startswith("~")
         ]
         print(f"Loading {len(input_files)} files from {args.input[0]}")
     else:

diff --git a/xl2times/transforms.py b/xl2times/transforms.py
@@ -565,9 +565,6 @@ def process_user_constraint_table(
                     else:
                         df.loc[i, colname] = typed_value
 
-        # TODO: should we have a global list of column name -> type?
-        df["year"] = df["year"].astype("Int64")
-
         return replace(table, dataframe=df)
 
     return [process_user_constraint_table(t) for t in tables]
@@ -898,6 +895,10 @@ def apply_fixups_table(table: datatypes.EmbeddedXlTable):
 
         df = table.dataframe.copy()
 
+        # TODO: should we have a global list of column name -> type?
+        if "year" in df.columns:
+            df["year"] = pd.to_numeric(df["year"], errors="coerce")
+
         # Populate CommName based on defaults
         i = (
             df["attribute"]
@@ -1284,9 +1285,11 @@ def process_years(
     tables: Dict[str, DataFrame],
     model: datatypes.TimesModel,
 ) -> Dict[str, DataFrame]:
+
     # Datayears is the set of all years in ~FI_T's Year column
     # We ignore values < 1000 because those signify interpolation/extrapolation rules
     # (see Table 8 of Part IV of the Times Documentation)
+
     datayears = (
         tables[datatypes.Tag.fi_t]["year"]
         .apply(lambda x: x if (x is not str) and x >= 1000 else None)
@@ -1330,12 +1333,16 @@ def process_processes(
             )
             df.replace({"sets": veda_sets_to_times}, inplace=True)
             nrows = df.shape[0]
-            if "vintage" not in table.dataframe.columns:
-                df["vintage"] = [None] * nrows
-            if "region" not in table.dataframe.columns:
-                df.insert(1, "region", [None] * nrows)
-            if "tslvl" not in table.dataframe.columns:
-                df.insert(6, "tslvl", ["ANNUAL"] * nrows)
+            # TODO: Use info from config instead. Introduce required columns in the meta file?
+            add_columns = [
+                (1, "region"),
+                (6, "tslvl"),
+                (7, "primarycg"),
+                (8, "vintage"),
+            ]
+            for column in add_columns:
+                if column[1] not in table.dataframe.columns:
+                    df.insert(column[0], column[1], [None] * nrows)
             result.append(replace(table, dataframe=df))
 
     veda_process_sets = datatypes.EmbeddedXlTable(