From 7a6014fd803b6b7d768cd466bd56e56ff44026ef Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Sun, 17 Mar 2024 21:38:33 -0500 Subject: [PATCH 1/5] Allow FI_T only specifying topology --- xl2times/transforms.py | 51 ++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 2ab7df1..d48ae14 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -511,11 +511,8 @@ def process_flexible_import_table( return table # Rename, add and remove specific columns if the circumstances are right - # TODO: We should do a full scale normalisation here, incl. renaming of aliases df = table.dataframe - nrows = df.shape[0] - # Tag column no longer used to identify data columns # https://veda-documentation.readthedocs.io/en/latest/pages/introduction.html#veda2-0-enhanced-features # TODO: Include other valid column headers @@ -541,51 +538,53 @@ def process_flexible_import_table( ] for colname in index_columns: if colname not in df.columns: - df[colname] = [None] * nrows + df[colname] = None table = replace(table, dataframe=df) df = table.dataframe - attribute = "attribute" - df, attribute_suffix = utils.explode(df, data_columns) - - # Append the data column name to the Attribute column values - if nrows > 0: - i = df[attribute].notna() - df.loc[i, attribute] = df.loc[i, attribute] + "~" + attribute_suffix[i] - i = df[attribute].isna() - df.loc[i, attribute] = attribute_suffix[i] + if data_columns: + df, attribute_suffix = utils.explode(df, data_columns) + # Append the data column name to the Attribute column values + i = df["attribute"].notna() + df.loc[i, "attribute"] = df.loc[i, "attribute"] + "~" + attribute_suffix[i] + i = df["attribute"].isna() + df.loc[i, "attribute"] = attribute_suffix[i] # Capitalise all attributes, unless column type float - if df[attribute].dtype != float: - df[attribute] = df[attribute].str.upper() + if df["attribute"].dtype != float: + df["attribute"] = df["attribute"].str.upper() # Handle Attribute containing tilde, such as 'STOCK~2030' - for attr in df[attribute].unique(): - if "~" in attr: - i = df[attribute] == attr + index = df["attribute"].str.contains("~") + if any(index): + for attr in df["attribute"][index].unique(): + i = index & (df["attribute"] == attr) parts = attr.split("~") for value in parts: colname, typed_value = get_colname(value) if colname is None: - df.loc[i, attribute] = typed_value + df.loc[i, "attribute"] = typed_value else: df.loc[i, colname] = typed_value # Handle Other_Indexes other = "other_indexes" - for attr in df[attribute].unique(): + for attr in df["attribute"].unique(): if attr == "END": - i = df[attribute] == attr + i = df["attribute"] == attr df.loc[i, "year"] = df.loc[i, "value"].astype("int") + 1 df.loc[i, other] = "EOH" - df.loc[i, attribute] = "PRC_NOFF" + df.loc[i, "attribute"] = "PRC_NOFF" df = df.reset_index(drop=True) # Should have all index_columns and VALUE if len(df.columns) != (len(index_columns) + 1): - raise ValueError(f"len(df.columns) = {len(df.columns)}") + if len(df.columns) == len(index_columns) and "value" not in df.columns: + df["value"] = None + else: + raise ValueError(f"len(df.columns) = {len(df.columns)}") df["year2"] = df.apply( lambda row: ( @@ -1973,10 +1972,8 @@ def process_transform_tables( ) # Handle Regions: - # Check whether allregions or any of model regions are among columns - if set(df.columns).isdisjoint( - {x.lower() for x in regions} | {"allregions"} - ): + # Check whether any of model regions are among columns + if set(df.columns).isdisjoint({x.lower() for x in regions}): if "region" not in df.columns: # If there's no region information at all, this table is for all regions: df["region"] = ["allregions"] * len(df) From b121d1d70b2e9bb801bd31c0e9b76cf8c2d2180e Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Tue, 19 Mar 2024 13:43:26 -0500 Subject: [PATCH 2/5] Check for presence of value column in update tables before updating --- xl2times/transforms.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 3297dbb..bd62481 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -571,6 +571,7 @@ def process_flexible_import_table( # Should have all index_columns and VALUE if len(df.columns) != (len(index_columns) + 1): + # TODO: Should be ok to drop as long as the topology info is stored. if len(df.columns) == len(index_columns) and "value" not in df.columns: df["value"] = None else: @@ -2439,6 +2440,11 @@ def apply_transform_tables( total=len(updates), desc=f"Applying transformations from {Tag.tfm_upd.value}", ): + # No need to proceed with this row if there is no value to update with. + # TODO: remove rows with NAs in value column earlier? + if "value" not in row.index: + continue + rows_to_update = query( table, row["process"], From a55d4644c0b2690b593c8a6521ddc983c81ee97b Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Tue, 19 Mar 2024 15:39:18 -0500 Subject: [PATCH 3/5] Use "value" instead of "allregions" as a default column name --- xl2times/__main__.py | 2 +- xl2times/config/veda-tags.json | 20 +++++++------- xl2times/transforms.py | 48 ++++++++++------------------------ 3 files changed, 25 insertions(+), 45 deletions(-) diff --git a/xl2times/__main__.py b/xl2times/__main__.py index be5e910..b8e5976 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -93,6 +93,7 @@ def convert_xl_to_times( transforms.remove_comment_rows, transforms.revalidate_input_tables, transforms.process_regions, + transforms.process_commodities, transforms.process_time_periods, transforms.remove_exreg_cols, transforms.generate_dummy_processes, @@ -106,7 +107,6 @@ def convert_xl_to_times( transforms.process_flexible_import_tables, # slow transforms.process_user_constraint_tables, transforms.process_commodity_emissions, - transforms.process_commodities, transforms.process_transform_availability, transforms.fill_in_missing_values, transforms.generate_uc_properties, diff --git a/xl2times/config/veda-tags.json b/xl2times/config/veda-tags.json index 83e0474..08e7226 100644 --- a/xl2times/config/veda-tags.json +++ b/xl2times/config/veda-tags.json @@ -799,7 +799,7 @@ "val_field", "allregions" ], - "use_name": "allregions", + "use_name": "value", "row_ignore_symbol": [ "\\I:" ], @@ -1181,7 +1181,7 @@ "val_field", "allregions" ], - "use_name": "allregions", + "use_name": "value", "row_ignore_symbol": [ "\\I:" ], @@ -1693,7 +1693,7 @@ "val_field", "allregions" ], - "use_name": "allregions", + "use_name": "value", "row_ignore_symbol": [ "\\I:" ], @@ -2179,7 +2179,7 @@ "val_field", "allregions" ], - "use_name": "allregions", + "use_name": "value", "row_ignore_symbol": [ "\\I:" ], @@ -2406,7 +2406,7 @@ "val_field", "allregions" ], - "use_name": "allregions", + "use_name": "value", "row_ignore_symbol": [ "\\I:" ], @@ -2998,7 +2998,7 @@ "val_field", "allregions" ], - "use_name": "allregions", + "use_name": "value", "row_ignore_symbol": [ "\\I:" ], @@ -3272,7 +3272,7 @@ "allregions", "io" ], - "use_name": "allregions", + "use_name": "value", "row_ignore_symbol": [ "\\I:" ], @@ -3519,7 +3519,7 @@ "allregions", "io" ], - "use_name": "allregions", + "use_name": "value", "row_ignore_symbol": [ "\\I:" ], @@ -3990,7 +3990,7 @@ "val_field", "allregions" ], - "use_name": "allregions", + "use_name": "value", "row_ignore_symbol": [ "\\I:" ], @@ -4624,7 +4624,7 @@ "val_field", "allregions" ], - "use_name": "allregions", + "use_name": "value", "row_ignore_symbol": [ "\\I:" ], diff --git a/xl2times/transforms.py b/xl2times/transforms.py index bd62481..b5d5981 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -483,7 +483,6 @@ def process_flexible_import_tables( # TODO: update this dictionary legal_values = { "limtype": set(config.times_sets["LIM"]), - # TODO: check what the values for the below should be "timeslice": set(model.ts_tslvl["tslvl"]), "commodity": set(utils.merge_columns(tables, Tag.fi_comm, "commodity")), "region": model.internal_regions, @@ -491,8 +490,6 @@ def process_flexible_import_tables( "other_indexes": {"IN", "OUT", "DEMO", "DEMI"}, } - # TODO decide whether VedaProcessSets should become a new Enum type or part of TimesModelData type - def process_flexible_import_table( table: EmbeddedXlTable, ) -> EmbeddedXlTable: @@ -505,27 +502,13 @@ def process_flexible_import_table( # Tag column no longer used to identify data columns # https://veda-documentation.readthedocs.io/en/latest/pages/introduction.html#veda2-0-enhanced-features - # TODO: Include other valid column headers + known_columns = config.known_columns[Tag.fi_t] + # TODO: Verify this list against other lists data_columns = [x for x in df.columns if x not in known_columns] - # TODO: Replace this with something similar to know columns from config - # Populate index columns - index_columns = [ - "region", - "process", - "commodity", - "commodity-in", - "commodity-in-aux", - "commodity-out", - "commodity-out-aux", - "attribute", - "year", - "timeslice", - "limtype", - "currency", - "other_indexes", - ] + # Populate index columns (same as known columns for this table type) + index_columns = known_columns for colname in index_columns: if colname not in df.columns: df[colname] = None @@ -1988,6 +1971,7 @@ def process_transform_tables( ) -> list[EmbeddedXlTable]: """Process transform tables.""" regions = model.internal_regions + # TODO: Add other tfm tags? tfm_tags = [ Tag.tfm_dins, Tag.tfm_ins, @@ -2001,23 +1985,16 @@ def process_transform_tables( result = [] dropped = [] for table in tables: - if not any(table.tag == tag for tag in tfm_tags): + tag = Tag(table.tag) + + if tag not in tfm_tags: result.append(table) - elif table.tag in [ - Tag.tfm_dins, - Tag.tfm_ins, - Tag.tfm_ins_txt, - Tag.tfm_upd, - Tag.tfm_mig, - Tag.tfm_comgrp, - ]: + elif tag in tfm_tags and tag != Tag.tfm_topins: df = table.dataframe.copy() # Standardize column names - known_columns = ( - config.known_columns[table.tag] | config.query_columns[table.tag] - ) + known_columns = config.known_columns[tag] # Handle Regions: # Check whether any of model regions are among columns @@ -2025,13 +2002,16 @@ def process_transform_tables( if "region" not in df.columns: # If there's no region information at all, this table is for all regions: df["region"] = ["allregions"] * len(df) - # Else, we only have a "region" column so handle it below + # Else, we only have a "region" column so handle it below else: if "region" in df.columns: raise ValueError( "ERROR: table has a column called region as well as columns with" f" region names:\n{table}\n{df.columns}" ) + # In the absence of the "region" column values in the "value" column apply to all regions + if "value" in df.columns: + df = df.rename(columns={"value": "allregions"}) # We have columns whose names are regions, so gather them into a "region" column: region_cols = [ col_name From 516637f0bdd75a9b7cbdf89758625dcc31bf24c7 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Tue, 19 Mar 2024 16:07:37 -0500 Subject: [PATCH 4/5] Remove unnecessary changes --- xl2times/transforms.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index b5d5981..c4caf78 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -2002,7 +2002,7 @@ def process_transform_tables( if "region" not in df.columns: # If there's no region information at all, this table is for all regions: df["region"] = ["allregions"] * len(df) - # Else, we only have a "region" column so handle it below + # Else, we only have a "region" column so handle it below else: if "region" in df.columns: raise ValueError( @@ -2420,11 +2420,6 @@ def apply_transform_tables( total=len(updates), desc=f"Applying transformations from {Tag.tfm_upd.value}", ): - # No need to proceed with this row if there is no value to update with. - # TODO: remove rows with NAs in value column earlier? - if "value" not in row.index: - continue - rows_to_update = query( table, row["process"], From a1fcba3f0ec2153e26ba39689bc6f23c2cad774e Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Wed, 20 Mar 2024 00:46:57 -0500 Subject: [PATCH 5/5] Address review comments --- xl2times/transforms.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index c4caf78..b2deb26 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -543,12 +543,11 @@ def process_flexible_import_table( # Handle Other_Indexes other = "other_indexes" - for attr in df["attribute"].unique(): - if attr == "END": - i = df["attribute"] == attr - df.loc[i, "year"] = df.loc[i, "value"].astype("int") + 1 - df.loc[i, other] = "EOH" - df.loc[i, "attribute"] = "PRC_NOFF" + if "END" in df["attribute"]: + i = df["attribute"] == "END" + df.loc[i, "year"] = df.loc[i, "value"].astype("int") + 1 + df.loc[i, other] = "EOH" + df.loc[i, "attribute"] = "PRC_NOFF" df = df.reset_index(drop=True)