From d4c2a112dabe5fc6a2e18fef2027e4c88abaa303 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Mon, 19 Feb 2024 00:03:11 -0500 Subject: [PATCH] Clean-up commodity group processing and storing --- xl2times/config/times_mapping.txt | 2 +- xl2times/datatypes.py | 1 - xl2times/transforms.py | 27 +++++++++++++++++---------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/xl2times/config/times_mapping.txt b/xl2times/config/times_mapping.txt index 8aba7479..49b6982d 100644 --- a/xl2times/config/times_mapping.txt +++ b/xl2times/config/times_mapping.txt @@ -3,7 +3,7 @@ ALL_TS[ALL_TS] = TimeSlices(TS) B[DATAYEAR,VALUE] = TimePeriods(Year,B) COM[COM] = Commodities(Commodity) COM_DESC[REG,COM,TEXT] = Commodities(Region,Commodity,Description) -COM_GMAP[REG,COM_GRP,COM] = CommodityGroupMap(Region,CommodityGroup,Commodity) +COM_GMAP[REG,COM_GRP,COM] = CommodityGroups(Region,CommodityGroup,Commodity) COM_GRP[COM_GRP] = CommodityGroups(CommodityGroup) COM_LIM[REG,COM,BD] = Commodities(Region,Commodity,LimType) COM_PEAK[REG,COM_GRP] = Attributes(Region,Commodity,Attribute:COM_PEAK,VALUE:1) diff --git a/xl2times/datatypes.py b/xl2times/datatypes.py index adda5300..a72d63b7 100644 --- a/xl2times/datatypes.py +++ b/xl2times/datatypes.py @@ -148,7 +148,6 @@ class TimesModel: all_regions: Set[str] = field(default_factory=set) processes: DataFrame = field(default_factory=DataFrame) commodities: DataFrame = field(default_factory=DataFrame) - com_gmap: DataFrame = field(default_factory=DataFrame) commodity_groups: DataFrame = field(default_factory=DataFrame) topology: DataFrame = field(default_factory=DataFrame) trade: DataFrame = field(default_factory=DataFrame) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 01d6ec34..21994a28 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -982,7 +982,6 @@ def complete_dictionary( "Attributes": model.attributes, "Commodities": model.commodities, "CommodityGroups": model.commodity_groups, - "CommodityGroupMap": model.com_gmap, "Processes": model.processes, "Topology": model.topology, "Trade": model.trade, @@ -1177,7 +1176,6 @@ def name_comm_group(df): i = comm_groups["commoditygroup"] != comm_groups["commodity"] model.topology = comm_groups - model.com_gmap = comm_groups.loc[i, ["region", "commoditygroup", "commodity"]] return tables @@ -1247,14 +1245,23 @@ def complete_commodity_groups( Complete the list of commodity groups """ - commodities = generate_topology_dictionary(tables, model)[ - "commodities_by_name" - ].rename(columns={"commodity": "commoditygroup"}) - cgs_in_top = model.topology["commoditygroup"].to_frame() - commodity_groups = pd.concat([commodities, cgs_in_top]) - model.commodity_groups = commodity_groups.drop_duplicates( - keep="first" - ).reset_index() + # Single member CGs i.e., CG and commodity are the same + single_cgs = ( + model.commodities[["region", "commodity"]] + .drop_duplicates(ignore_index=True) + .copy() + ) + single_cgs["commoditygroup"] = single_cgs["commodity"] + # Commodity groups from topology + top_cgs = ( + model.topology[["region", "commodity", "commoditygroup"]] + .drop_duplicates(ignore_index=True) + .copy() + ) + commodity_groups = pd.concat([single_cgs, top_cgs], ignore_index=True) + model.commodity_groups = commodity_groups.dropna().drop_duplicates( + ignore_index=True + ) return tables