From 140736b44719ee92c4b6217d888056466b8417a1 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Fri, 20 Dec 2024 08:46:24 -0500 Subject: [PATCH 1/6] Capitalise all string values in tables --- xl2times/__main__.py | 4 ++-- xl2times/transforms.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/xl2times/__main__.py b/xl2times/__main__.py index 28b8993..dcd1f90 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -115,6 +115,7 @@ def convert_xl_to_times( transforms.normalize_column_aliases, transforms.remove_comment_rows, transforms.revalidate_input_tables, + transforms.capitalise_table_values, transforms.process_regions, transforms.process_commodities, transforms.process_time_periods, @@ -135,7 +136,6 @@ def convert_xl_to_times( transforms.generate_uc_properties, transforms.expand_rows_parallel, # slow transforms.remove_invalid_values, - transforms.capitalise_some_values, transforms.internalise_commodities, transforms.generate_commodity_groups, transforms.apply_fixups, @@ -422,7 +422,7 @@ def strip_filename_prefix(table, prefix): def dump_tables(tables: list, filename: str) -> list: os.makedirs(os.path.dirname(filename), exist_ok=True) - with open(filename, "w") as text_file: + with open(filename, "w", encoding="utf-8") as text_file: for t in tables if isinstance(tables, list) else tables.items(): if isinstance(t, EmbeddedXlTable): tag = t.tag diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 6f1aaef..b8fa357 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -1186,28 +1186,28 @@ def complete_dictionary( return tables -def capitalise_some_values( +def capitalise_table_values( config: Config, tables: list[EmbeddedXlTable], model: TimesModel, ) -> list[EmbeddedXlTable]: - """Ensure that all attributes and units are uppercase.""" - # TODO: This should include other dimensions - # TODO: This should be part of normalisation + """Ensure that all table entries are uppercase.""" - colnames = ["attribute", "tact", "tcap", "unit", "sourcescen"] - - def capitalise_attributes_table(table: EmbeddedXlTable): + def capitalise_table_entries(table: EmbeddedXlTable): df = table.dataframe.copy() + # Capitalise all entries if column type string + colnames = df.select_dtypes(include="object").columns seen_cols = [colname for colname in colnames if colname in df.columns] if len(df) > 0: for seen_col in seen_cols: - df[seen_col] = df[seen_col].str.upper() + # Index of rows with string entries + i = df[seen_col].apply(lambda x: isinstance(x, str)) + df.loc[i, seen_col] = df[seen_col][i].str.upper() return replace(table, dataframe=df) else: return table - return [capitalise_attributes_table(table) for table in tables] + return [capitalise_table_entries(table) for table in tables] def _populate_defaults( From 418d8379ea220a71015e25bf3de64675df47d39b Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Fri, 20 Dec 2024 10:51:49 -0500 Subject: [PATCH 2/6] Capitalise default units for dummy import processes --- xl2times/transforms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index b8fa357..8525281 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -1804,7 +1804,7 @@ def generate_dummy_processes( # TODO: Activity units below are arbitrary. Suggest Veda devs not to have any. dummy_processes = [ ["IMP", "IMPNRGZ", "Dummy Import of NRG", "PJ", "", "NRG"], - ["IMP", "IMPMATZ", "Dummy Import of MAT", "Mt", "", "MAT"], + ["IMP", "IMPMATZ", "Dummy Import of MAT", "MT", "", "MAT"], ["IMP", "IMPDEMZ", "Dummy Import of DEM", "PJ", "", "DEM"], ] From e1a55a3419aa5d38cbfa941a8b72b6a8b100bdd8 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Fri, 20 Dec 2024 11:02:31 -0500 Subject: [PATCH 3/6] Check for True in the index --- xl2times/transforms.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 8525281..169ddf8 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -1202,7 +1202,8 @@ def capitalise_table_entries(table: EmbeddedXlTable): for seen_col in seen_cols: # Index of rows with string entries i = df[seen_col].apply(lambda x: isinstance(x, str)) - df.loc[i, seen_col] = df[seen_col][i].str.upper() + if any(i): + df.loc[i, seen_col] = df[seen_col][i].str.upper() return replace(table, dataframe=df) else: return table From bf1bae50560dbbabadd86785f01f4af59da34359 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Fri, 20 Dec 2024 11:22:25 -0500 Subject: [PATCH 4/6] Don't compare set description with GDX diff --- utils/run_benchmarks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/run_benchmarks.py b/utils/run_benchmarks.py index ad4e3da..65dee6e 100644 --- a/utils/run_benchmarks.py +++ b/utils/run_benchmarks.py @@ -118,6 +118,7 @@ def run_gams_gdxdiff( path.join(out_folder, "diffile.gdx"), "Eps=0.000001", "RelEps=0.000001", + "setDesc=N", ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, From 8ec7b7ee7e7fec508d88d2e6367edf6524793467 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Fri, 20 Dec 2024 12:25:12 -0500 Subject: [PATCH 5/6] Strip leading and trailing whitespace --- xl2times/transforms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 169ddf8..50eb176 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -1191,7 +1191,7 @@ def capitalise_table_values( tables: list[EmbeddedXlTable], model: TimesModel, ) -> list[EmbeddedXlTable]: - """Ensure that all table entries are uppercase.""" + """Ensure that all table entries are uppercase. Strip leading and trailing whitespace.""" def capitalise_table_entries(table: EmbeddedXlTable): df = table.dataframe.copy() @@ -1203,7 +1203,7 @@ def capitalise_table_entries(table: EmbeddedXlTable): # Index of rows with string entries i = df[seen_col].apply(lambda x: isinstance(x, str)) if any(i): - df.loc[i, seen_col] = df[seen_col][i].str.upper() + df.loc[i, seen_col] = df[seen_col][i].str.strip().upper() return replace(table, dataframe=df) else: return table From 5eadbfc2529a473293e74bc597cac8d6a5f36ae8 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Fri, 20 Dec 2024 12:29:05 -0500 Subject: [PATCH 6/6] Make the whitespace removal work --- xl2times/transforms.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 50eb176..b8a9938 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -1203,7 +1203,8 @@ def capitalise_table_entries(table: EmbeddedXlTable): # Index of rows with string entries i = df[seen_col].apply(lambda x: isinstance(x, str)) if any(i): - df.loc[i, seen_col] = df[seen_col][i].str.strip().upper() + df.loc[i, seen_col] = df[seen_col][i].str.upper() + df.loc[i, seen_col] = df[seen_col][i].str.strip() return replace(table, dataframe=df) else: return table