From 7ce993b207904895af84b53f701966799fb36394 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Tue, 5 Mar 2024 12:17:33 -0500 Subject: [PATCH 1/2] Allow processing of the dins table and its variants --- xl2times/__main__.py | 2 +- xl2times/transforms.py | 32 ++++++++++++++++++++++++++------ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/xl2times/__main__.py b/xl2times/__main__.py index 0feeb3c..2259a7d 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -97,7 +97,7 @@ def convert_xl_to_times( transforms.remove_exreg_cols, transforms.generate_dummy_processes, transforms.process_time_slices, - transforms.process_transform_insert_variants, + transforms.process_transform_table_variants, transforms.process_transform_tables, transforms.process_tradelinks, transforms.process_processes, diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 7a8b05f..380d56e 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -1862,7 +1862,7 @@ def process_tradelinks( return result -def process_transform_insert_variants( +def process_transform_table_variants( config: datatypes.Config, tables: List[datatypes.EmbeddedXlTable], model: datatypes.TimesModel, @@ -1886,7 +1886,12 @@ def is_year(col_name): result = [] for table in tables: - if table.tag == datatypes.Tag.tfm_ins_ts: + tag = datatypes.Tag(table.tag.split(":")[0]) + if tag in [ + datatypes.Tag.tfm_dins_ts, + datatypes.Tag.tfm_ins_ts, + datatypes.Tag.tfm_upd_ts, + ]: # ~TFM_INS-TS: Gather columns whose names are years into a single "Year" column: df = table.dataframe query_columns = config.query_columns[datatypes.Tag(table.tag)] @@ -1918,8 +1923,14 @@ def is_year(col_name): ) # Convert the year column to integer df["year"] = df["year"].astype("int") - result.append(replace(table, dataframe=df, tag=datatypes.Tag.tfm_ins)) - elif table.tag == datatypes.Tag.tfm_ins_at: + result.append( + replace(table, dataframe=df, tag=datatypes.Tag(tag.value.split("-")[0])) + ) + elif tag in [ + datatypes.Tag.tfm_dins_at, + datatypes.Tag.tfm_ins_at, + datatypes.Tag.tfm_upd_at, + ]: # ~TFM_INS-AT: Gather columns with attribute names into a single "Attribute" column df = table.dataframe if "attribute" in df.columns: @@ -1938,7 +1949,9 @@ def is_year(col_name): value_name="value", ignore_index=False, ) - result.append(replace(table, dataframe=df, tag=datatypes.Tag.tfm_ins)) + result.append( + replace(table, dataframe=df, tag=datatypes.Tag(tag.value.split("-")[0])) + ) else: result.append(table) @@ -1955,9 +1968,9 @@ def process_transform_tables( """ regions = model.internal_regions tfm_tags = [ + datatypes.Tag.tfm_dins, datatypes.Tag.tfm_ins, datatypes.Tag.tfm_ins_txt, - datatypes.Tag.tfm_dins, datatypes.Tag.tfm_topins, datatypes.Tag.tfm_upd, datatypes.Tag.tfm_mig, @@ -1971,6 +1984,7 @@ def process_transform_tables( result.append(table) elif table.tag in [ + datatypes.Tag.tfm_dins, datatypes.Tag.tfm_ins, datatypes.Tag.tfm_ins_txt, datatypes.Tag.tfm_upd, @@ -2395,6 +2409,12 @@ def eval_and_update( new_tables.append(tables[datatypes.Tag.fi_t]) tables[datatypes.Tag.fi_t] = pd.concat(new_tables, ignore_index=True) + # TODO: Move this somewhere else (i.e. no wildcard processing)? + if datatypes.Tag.tfm_dins in tables: + updates = tables[datatypes.Tag.tfm_dins] + table = tables[datatypes.Tag.fi_t] + tables[datatypes.Tag.fi_t] = pd.concat([table, updates], ignore_index=True) + if datatypes.Tag.tfm_ins_txt in tables: updates = tables[datatypes.Tag.tfm_ins_txt] From 3357bd474ff4422632eedb3c7c281ff64bc462aa Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Tue, 5 Mar 2024 16:19:41 -0500 Subject: [PATCH 2/2] Ensure only relevant columns (and with correct names) are merged --- xl2times/config/veda-tags.json | 4 ++-- xl2times/transforms.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/xl2times/config/veda-tags.json b/xl2times/config/veda-tags.json index 09903ba..83e0474 100644 --- a/xl2times/config/veda-tags.json +++ b/xl2times/config/veda-tags.json @@ -1008,7 +1008,7 @@ "cset:cn", "cset: cn" ], - "use_name": "cset_cn", + "use_name": "commodity", "row_ignore_symbol": [ "\\I:" ], @@ -1072,7 +1072,7 @@ "pset:pn", "pset: pn" ], - "use_name": "pset_pn", + "use_name": "process", "row_ignore_symbol": [ "\\I:" ], diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 380d56e..61cd340 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -2359,7 +2359,7 @@ def eval_and_update( for _, row in tqdm( updates.iterrows(), total=len(updates), - desc=f"Processing wildcard for {datatypes.Tag.tfm_upd}", + desc=f"Processing wildcard for {datatypes.Tag.tfm_upd.value}", ): if row["value"] is None: # TODO is this really needed? continue @@ -2392,7 +2392,7 @@ def eval_and_update( for _, row in tqdm( updates.iterrows(), total=len(updates), - desc=f"Processing wildcard for {datatypes.Tag.tfm_ins}", + desc=f"Processing wildcard for {datatypes.Tag.tfm_ins.value}", ): match = match_wildcards(row) # TODO perf: add matched procs/comms into column and use explode? @@ -2411,8 +2411,8 @@ def eval_and_update( # TODO: Move this somewhere else (i.e. no wildcard processing)? if datatypes.Tag.tfm_dins in tables: - updates = tables[datatypes.Tag.tfm_dins] table = tables[datatypes.Tag.fi_t] + updates = tables[datatypes.Tag.tfm_dins].filter(table.columns, axis=1) tables[datatypes.Tag.fi_t] = pd.concat([table, updates], ignore_index=True) if datatypes.Tag.tfm_ins_txt in tables: @@ -2423,7 +2423,7 @@ def eval_and_update( for _, row in tqdm( updates.iterrows(), total=len(updates), - desc=f"Processing wildcard for {datatypes.Tag.tfm_ins_txt}", + desc=f"Processing wildcard for {datatypes.Tag.tfm_ins_txt.value}", ): match = match_wildcards(row) if match is None: @@ -2454,7 +2454,7 @@ def eval_and_update( for _, row in tqdm( updates.iterrows(), total=len(updates), - desc=f"Processing wildcard for {datatypes.Tag.tfm_mig}", + desc=f"Processing wildcard for {datatypes.Tag.tfm_mig.value}", ): match = match_wildcards(row) processes, commodities = match if match is not None else (None, None)