From 022cee27a4f450941ffce5c3e1402f54090f8276 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Fri, 16 Feb 2024 19:26:43 -0500 Subject: [PATCH 1/8] Update veda-tags.json --- xl2times/config/veda-tags.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xl2times/config/veda-tags.json b/xl2times/config/veda-tags.json index e84b265..1f60652 100644 --- a/xl2times/config/veda-tags.json +++ b/xl2times/config/veda-tags.json @@ -788,8 +788,8 @@ "*" ], "query_field": false, - "inherit_above": false, - "remove_first_row_if_absent": false, + "inherit_above": true, + "remove_first_row_if_absent": true, "remove_any_row_if_absent": false }, { From d4c2a112dabe5fc6a2e18fef2027e4c88abaa303 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Mon, 19 Feb 2024 00:03:11 -0500 Subject: [PATCH 2/8] Clean-up commodity group processing and storing --- xl2times/config/times_mapping.txt | 2 +- xl2times/datatypes.py | 1 - xl2times/transforms.py | 27 +++++++++++++++++---------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/xl2times/config/times_mapping.txt b/xl2times/config/times_mapping.txt index 8aba747..49b6982 100644 --- a/xl2times/config/times_mapping.txt +++ b/xl2times/config/times_mapping.txt @@ -3,7 +3,7 @@ ALL_TS[ALL_TS] = TimeSlices(TS) B[DATAYEAR,VALUE] = TimePeriods(Year,B) COM[COM] = Commodities(Commodity) COM_DESC[REG,COM,TEXT] = Commodities(Region,Commodity,Description) -COM_GMAP[REG,COM_GRP,COM] = CommodityGroupMap(Region,CommodityGroup,Commodity) +COM_GMAP[REG,COM_GRP,COM] = CommodityGroups(Region,CommodityGroup,Commodity) COM_GRP[COM_GRP] = CommodityGroups(CommodityGroup) COM_LIM[REG,COM,BD] = Commodities(Region,Commodity,LimType) COM_PEAK[REG,COM_GRP] = Attributes(Region,Commodity,Attribute:COM_PEAK,VALUE:1) diff --git a/xl2times/datatypes.py b/xl2times/datatypes.py index adda530..a72d63b 100644 --- a/xl2times/datatypes.py +++ b/xl2times/datatypes.py @@ -148,7 +148,6 @@ class TimesModel: all_regions: Set[str] = field(default_factory=set) processes: DataFrame = field(default_factory=DataFrame) commodities: DataFrame = field(default_factory=DataFrame) - com_gmap: DataFrame = field(default_factory=DataFrame) commodity_groups: DataFrame = field(default_factory=DataFrame) topology: DataFrame = field(default_factory=DataFrame) trade: DataFrame = field(default_factory=DataFrame) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 01d6ec3..21994a2 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -982,7 +982,6 @@ def complete_dictionary( "Attributes": model.attributes, "Commodities": model.commodities, "CommodityGroups": model.commodity_groups, - "CommodityGroupMap": model.com_gmap, "Processes": model.processes, "Topology": model.topology, "Trade": model.trade, @@ -1177,7 +1176,6 @@ def name_comm_group(df): i = comm_groups["commoditygroup"] != comm_groups["commodity"] model.topology = comm_groups - model.com_gmap = comm_groups.loc[i, ["region", "commoditygroup", "commodity"]] return tables @@ -1247,14 +1245,23 @@ def complete_commodity_groups( Complete the list of commodity groups """ - commodities = generate_topology_dictionary(tables, model)[ - "commodities_by_name" - ].rename(columns={"commodity": "commoditygroup"}) - cgs_in_top = model.topology["commoditygroup"].to_frame() - commodity_groups = pd.concat([commodities, cgs_in_top]) - model.commodity_groups = commodity_groups.drop_duplicates( - keep="first" - ).reset_index() + # Single member CGs i.e., CG and commodity are the same + single_cgs = ( + model.commodities[["region", "commodity"]] + .drop_duplicates(ignore_index=True) + .copy() + ) + single_cgs["commoditygroup"] = single_cgs["commodity"] + # Commodity groups from topology + top_cgs = ( + model.topology[["region", "commodity", "commoditygroup"]] + .drop_duplicates(ignore_index=True) + .copy() + ) + commodity_groups = pd.concat([single_cgs, top_cgs], ignore_index=True) + model.commodity_groups = commodity_groups.dropna().drop_duplicates( + ignore_index=True + ) return tables From 7537b07fda34ab8d04368c87ebd3d25211102aa2 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Mon, 19 Feb 2024 00:23:42 -0500 Subject: [PATCH 3/8] Do not generate extra rows for COM_GMAP --- xl2times/config/times_mapping.txt | 2 +- xl2times/transforms.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/xl2times/config/times_mapping.txt b/xl2times/config/times_mapping.txt index 49b6982..aaf5457 100644 --- a/xl2times/config/times_mapping.txt +++ b/xl2times/config/times_mapping.txt @@ -3,7 +3,7 @@ ALL_TS[ALL_TS] = TimeSlices(TS) B[DATAYEAR,VALUE] = TimePeriods(Year,B) COM[COM] = Commodities(Commodity) COM_DESC[REG,COM,TEXT] = Commodities(Region,Commodity,Description) -COM_GMAP[REG,COM_GRP,COM] = CommodityGroups(Region,CommodityGroup,Commodity) +COM_GMAP[REG,COM_GRP,COM] = CommodityGroups(Region,CommodityGroup,Commodity,Gmap:True) COM_GRP[COM_GRP] = CommodityGroups(CommodityGroup) COM_LIM[REG,COM,BD] = Commodities(Region,Commodity,LimType) COM_PEAK[REG,COM_GRP] = Attributes(Region,Commodity,Attribute:COM_PEAK,VALUE:1) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 21994a2..c24e60d 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -1258,10 +1258,9 @@ def complete_commodity_groups( .drop_duplicates(ignore_index=True) .copy() ) - commodity_groups = pd.concat([single_cgs, top_cgs], ignore_index=True) - model.commodity_groups = commodity_groups.dropna().drop_duplicates( - ignore_index=True - ) + cgs = pd.concat([single_cgs, top_cgs], ignore_index=True) + cgs["gmap"] = cgs["commoditygroup"] != cgs["commodity"] + model.commodity_groups = cgs.dropna().drop_duplicates(ignore_index=True) return tables From 11a901ccc91700b661f667ab6d8bbd2959373736 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Tue, 20 Feb 2024 00:44:10 -0500 Subject: [PATCH 4/8] Do a bit of clean-up --- xl2times/transforms.py | 49 ++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index c24e60d..9b387e9 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -1098,11 +1098,13 @@ def generate_commodity_groups( tables: List[datatypes.EmbeddedXlTable], model: datatypes.TimesModel, ) -> List[datatypes.EmbeddedXlTable]: + """ + Generate commodity groups. + """ process_tables = [t for t in tables if t.tag == datatypes.Tag.fi_process] commodity_tables = [t for t in tables if t.tag == datatypes.Tag.fi_comm] # Veda determines default PCG based on predetermined order and presence of OUT/IN commodity - columns = ["region", "process", "primarycg"] reg_prc_pcg = pd.DataFrame(columns=columns) for process_table in process_tables: @@ -1133,7 +1135,7 @@ def generate_commodity_groups( def name_comm_group(df): """ - Return the name of a commodity group based on the member count + Return the name of a commodity group based on the member count. """ if df["commoditygroup"] > 1: @@ -1173,8 +1175,6 @@ def name_comm_group(df): # TODO: Include info from ~TFM_TOPINS e.g. include RSDAHT2 in addition to RSDAHT - i = comm_groups["commoditygroup"] != comm_groups["commodity"] - model.topology = comm_groups return tables @@ -1183,7 +1183,7 @@ def name_comm_group(df): def _count_comm_group_vectorised(comm_groups: pd.DataFrame) -> None: """ Store the number of IN/OUT commodities of the same type per Region and Process in CommodityGroup. - `comm_groups` is modified in-place + `comm_groups` is modified in-place. Args: comm_groups: 'Process' DataFrame with additional columns "commoditygroup" """ @@ -1199,8 +1199,8 @@ def _count_comm_group_vectorised(comm_groups: pd.DataFrame) -> None: def _process_comm_groups_vectorised( comm_groups: pd.DataFrame, csets_ordered_for_pcg: list[str] ) -> pd.DataFrame: - """Sets the first commodity group in the list of csets_ordered_for_pcg as the default pcg for each region/process/io combination, - but setting the io="OUT" subset as default before "IN". + """Sets the first commodity group in the list of csets_ordered_for_pcg as the default + pcg for each region/process/io combination, but setting the io="OUT" subset as default before "IN". See: Section 3.7.2.2, pg 80. of `TIMES Documentation PART IV` for details. @@ -1208,12 +1208,12 @@ def _process_comm_groups_vectorised( comm_groups: 'Process' DataFrame with columns ["region", "process", "io", "csets", "commoditygroup"] csets_ordered_for_pcg: List of csets in the order they should be considered for default pcg Returns: - Processed DataFrame with a new column "DefaultVedaPCG" set to True for the default pcg in each region/process/io combination. + Processed DataFrame with a new column "DefaultVedaPCG" set to True for the default pcg in eachregion/process/io combination. """ def _set_default_veda_pcg(group): - """For a given [region, process] group, default group is set as the first cset in the `csets_ordered_for_pcg` list, which is an output, if - one exists, otherwise the first input.""" + """For a given [region, process] group, default group is set as the first cset in the `csets_ordered_for_pcg` + list, which is an output, if one exists, otherwise the first input.""" if not group["csets"].isin(csets_ordered_for_pcg).all(): return group @@ -1242,7 +1242,7 @@ def complete_commodity_groups( model: datatypes.TimesModel, ) -> Dict[str, DataFrame]: """ - Complete the list of commodity groups + Complete the list of commodity groups. """ # Single member CGs i.e., CG and commodity are the same @@ -1396,17 +1396,9 @@ def expand_pcg_from_suffix(df): how="right", ) df = pd.concat([df, default_pcgs]) + # Keep last if a row appears more than once (disregard primarycg) df.drop_duplicates( - subset=[ - "sets", - "region", - "process", - "description", - "tact", - "tcap", - "tslvl", - "vintage", - ], + subset=[c for c in df.columns if c != "primarycg"], keep="last", inplace=True, ) @@ -1588,7 +1580,7 @@ def process_topology( model: datatypes.TimesModel, ) -> List[datatypes.EmbeddedXlTable]: """ - Create topology + Create topology. """ fit_tables = [t for t in tables if t.tag.startswith(datatypes.Tag.fi_t)] @@ -2472,13 +2464,14 @@ def rename_cgs( tables: Dict[str, DataFrame], model: datatypes.TimesModel, ) -> Dict[str, DataFrame]: - df = tables.get(datatypes.Tag.fi_t) - if df is not None: - i = df["other_indexes"].isin(default_pcg_suffixes) - df.loc[i, "other_indexes"] = ( - df["process"].astype(str) + "_" + df["other_indexes"].astype(str) + + if not model.attributes.empty: + i = model.attributes["other_indexes"].isin(default_pcg_suffixes) + model.attributes.loc[i, "other_indexes"] = ( + model.attributes["process"].astype(str) + + "_" + + model.attributes["other_indexes"].astype(str) ) - tables[datatypes.Tag.fi_t] = df return tables From 91dfc5b63861533d42bfe3129be8c5cf5b882194 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Wed, 21 Feb 2024 01:20:41 -0500 Subject: [PATCH 5/8] Resolve CG names specified for attributes --- xl2times/__main__.py | 2 +- xl2times/transforms.py | 49 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/xl2times/__main__.py b/xl2times/__main__.py index 97b6d1a..0653773 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -104,8 +104,8 @@ def convert_xl_to_times( transforms.process_uc_wildcards, transforms.process_wildcards, transforms.convert_aliases, - transforms.rename_cgs, transforms.fix_topology, + transforms.resolve_remaining_cgs, transforms.complete_dictionary, transforms.convert_to_string, lambda config, tables, model: dump_tables( diff --git a/xl2times/transforms.py b/xl2times/transforms.py index a4b5810..71eb8a5 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -2460,19 +2460,56 @@ def convert_aliases( return tables -def rename_cgs( +def resolve_remaining_cgs( config: datatypes.Config, tables: Dict[str, DataFrame], model: datatypes.TimesModel, ) -> Dict[str, DataFrame]: + """ + Resolve commodity group names in model.attributes specified as commodity type. + Supplement model.commodity_groups with resolved commodity groups. + """ if not model.attributes.empty: i = model.attributes["other_indexes"].isin(default_pcg_suffixes) - model.attributes.loc[i, "other_indexes"] = ( - model.attributes["process"].astype(str) - + "_" - + model.attributes["other_indexes"].astype(str) - ) + if any(i): + # Store processes with unresolved commodity groups + check_cgs = ( + model.attributes.loc[i, ["region", "process", "other_indexes"]] + .drop_duplicates(ignore_index=True) + .copy() + ) + # Resolve commodity group names in model.attribues + model.attributes.loc[i, "other_indexes"] = ( + model.attributes["process"].astype(str) + + "_" + + model.attributes["other_indexes"].astype(str) + ) + # TODO: Combine with above to avoid repetition + check_cgs["commoditygroup"] = ( + check_cgs["process"].astype(str) + + "_" + + check_cgs["other_indexes"].astype(str) + ) + check_cgs["csets"] = check_cgs["other_indexes"].str[:3] + check_cgs["io"] = check_cgs["other_indexes"].str[3:] + check_cgs["io"] = check_cgs["io"].replace({"I": "IN", "O": "OUT"}) + check_cgs.drop(columns="other_indexes", inplace=True) + check_cgs = check_cgs.merge( + model.topology[ + ["region", "process", "commodity", "csets", "io"] + ].drop_duplicates(), + how="left", + ) + check_cgs["gmap"] = True + check_cgs = pd.concat( + [ + model.commodity_groups, + check_cgs[["region", "commodity", "commoditygroup", "gmap"]], + ], + ignore_index=True, + ) + model.commodity_groups = check_cgs.drop_duplicates().dropna() return tables From 32458c7601e1134e89c2f335d669c1f5044148e0 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Wed, 21 Feb 2024 16:38:29 -0500 Subject: [PATCH 6/8] Add user defined CGs to model.commodity_groups --- xl2times/config/veda-tags.json | 2 +- xl2times/transforms.py | 39 ++++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/xl2times/config/veda-tags.json b/xl2times/config/veda-tags.json index 1f60652..09903ba 100644 --- a/xl2times/config/veda-tags.json +++ b/xl2times/config/veda-tags.json @@ -782,7 +782,7 @@ { "name": "name", "aliases": [], - "use_name": "name", + "use_name": "commoditygroup", "row_ignore_symbol": [ "\\I:", "*" diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 71eb8a5..e27b726 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -2017,7 +2017,7 @@ def get_matching_processes(row, dictionary): ("pset_ci", "processes_by_comm_in"), ("pset_co", "processes_by_comm_out"), ]: - if row[col] is not None: + if col in row.index and row[col] is not None: matching_processes = intersect( matching_processes, filter_by_pattern(dictionary[key], row[col].upper()) ) @@ -2033,7 +2033,7 @@ def get_matching_commodities(row, dictionary): ("cset_cd", "commodities_by_desc"), ("cset_set", "commodities_by_sets"), ]: - if row[col] is not None: + if col in row.index and row[col] is not None: matching_commodities = intersect( matching_commodities, filter_by_pattern(dictionary[key], row[col].upper()), @@ -2155,13 +2155,21 @@ def process_wildcards( tables: Dict[str, DataFrame], model: datatypes.TimesModel, ) -> Dict[str, DataFrame]: + """ + Process wildcards specified int TFM tables. + """ + topology = generate_topology_dictionary(tables, model) def match_wildcards( row: pd.Series, ) -> tuple[DataFrame | None, DataFrame | None] | None: + """ + Return matching processes and commodities + """ matching_processes = get_matching_processes(row, topology) matching_commodities = get_matching_commodities(row, topology) + if (matching_processes is None or len(matching_processes) == 0) and ( matching_commodities is None or len(matching_commodities) == 0 ): # TODO is this necessary? Try without? @@ -2298,6 +2306,33 @@ def eval_and_update( new_tables.append(tables[datatypes.Tag.fi_t]) tables[datatypes.Tag.fi_t] = pd.concat(new_tables, ignore_index=True) + if datatypes.Tag.tfm_comgrp in tables: + updates = tables[datatypes.Tag.tfm_comgrp] + table = model.commodity_groups + new_tables = [] + + # Expand each row by wildcards, then add to model.commodity_groups + for _, row in updates.iterrows(): + match = match_wildcards(row) + # Convert serie to dataframe; keep only relevant columns + new_rows = pd.DataFrame([row.filter(table.columns)]) + # Match returns both processes and commodities, but only latter is relevant here + processes, commodities = match if match is not None else (None, None) + if commodities is None: + logger.warning(f"TFM_COMGRP row did not match any commodity") + else: + new_rows = commodities.merge(new_rows, how="cross") + new_tables.append(new_rows) + + # Expand model.commodity_groups with user-defined commodity groups + if new_tables: + new_tables.append(model.commodity_groups) + commodity_groups = pd.concat( + new_tables, ignore_index=True + ).drop_duplicates() + commodity_groups.loc[commodity_groups["gmap"].isna(), ["gmap"]] = True + model.commodity_groups = commodity_groups.dropna() + return tables From 07b53a18bebfa641ce84a117ff708e2ae6027509 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Thu, 22 Feb 2024 06:18:38 -0500 Subject: [PATCH 7/8] Apply suggestions from code review Co-authored-by: Siddharth Krishna --- xl2times/transforms.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index e27b726..9ea440c 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -1251,14 +1251,12 @@ def complete_commodity_groups( single_cgs = ( model.commodities[["region", "commodity"]] .drop_duplicates(ignore_index=True) - .copy() ) single_cgs["commoditygroup"] = single_cgs["commodity"] # Commodity groups from topology top_cgs = ( model.topology[["region", "commodity", "commoditygroup"]] .drop_duplicates(ignore_index=True) - .copy() ) cgs = pd.concat([single_cgs, top_cgs], ignore_index=True) cgs["gmap"] = cgs["commoditygroup"] != cgs["commodity"] @@ -2156,7 +2154,7 @@ def process_wildcards( model: datatypes.TimesModel, ) -> Dict[str, DataFrame]: """ - Process wildcards specified int TFM tables. + Process wildcards specified in TFM tables. """ topology = generate_topology_dictionary(tables, model) @@ -2314,7 +2312,7 @@ def eval_and_update( # Expand each row by wildcards, then add to model.commodity_groups for _, row in updates.iterrows(): match = match_wildcards(row) - # Convert serie to dataframe; keep only relevant columns + # Convert series to dataframe; keep only relevant columns new_rows = pd.DataFrame([row.filter(table.columns)]) # Match returns both processes and commodities, but only latter is relevant here processes, commodities = match if match is not None else (None, None) @@ -2512,7 +2510,6 @@ def resolve_remaining_cgs( check_cgs = ( model.attributes.loc[i, ["region", "process", "other_indexes"]] .drop_duplicates(ignore_index=True) - .copy() ) # Resolve commodity group names in model.attribues model.attributes.loc[i, "other_indexes"] = ( @@ -2529,7 +2526,7 @@ def resolve_remaining_cgs( check_cgs["csets"] = check_cgs["other_indexes"].str[:3] check_cgs["io"] = check_cgs["other_indexes"].str[3:] check_cgs["io"] = check_cgs["io"].replace({"I": "IN", "O": "OUT"}) - check_cgs.drop(columns="other_indexes", inplace=True) + check_cgs = check_cgs.drop(columns="other_indexes") check_cgs = check_cgs.merge( model.topology[ ["region", "process", "commodity", "csets", "io"] From f7308e7e1a9e1ce8fcc41e8defd79a9bec7428ff Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Thu, 22 Feb 2024 06:27:22 -0500 Subject: [PATCH 8/8] Reformat transforms.py --- xl2times/transforms.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 9ea440c..67ed257 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -1248,15 +1248,13 @@ def complete_commodity_groups( """ # Single member CGs i.e., CG and commodity are the same - single_cgs = ( - model.commodities[["region", "commodity"]] - .drop_duplicates(ignore_index=True) + single_cgs = model.commodities[["region", "commodity"]].drop_duplicates( + ignore_index=True ) single_cgs["commoditygroup"] = single_cgs["commodity"] # Commodity groups from topology - top_cgs = ( - model.topology[["region", "commodity", "commoditygroup"]] - .drop_duplicates(ignore_index=True) + top_cgs = model.topology[["region", "commodity", "commoditygroup"]].drop_duplicates( + ignore_index=True ) cgs = pd.concat([single_cgs, top_cgs], ignore_index=True) cgs["gmap"] = cgs["commoditygroup"] != cgs["commodity"] @@ -2507,10 +2505,9 @@ def resolve_remaining_cgs( i = model.attributes["other_indexes"].isin(default_pcg_suffixes) if any(i): # Store processes with unresolved commodity groups - check_cgs = ( - model.attributes.loc[i, ["region", "process", "other_indexes"]] - .drop_duplicates(ignore_index=True) - ) + check_cgs = model.attributes.loc[ + i, ["region", "process", "other_indexes"] + ].drop_duplicates(ignore_index=True) # Resolve commodity group names in model.attribues model.attributes.loc[i, "other_indexes"] = ( model.attributes["process"].astype(str)