Skip to content

Commit

Permalink
Don't drop duplicates in fi_process table when generating topology (#146
Browse files Browse the repository at this point in the history
)
  • Loading branch information
olejandro authored Dec 2, 2023
1 parent 073dd9f commit 4dade4b
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 13 deletions.
2 changes: 1 addition & 1 deletion times_reader/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def convert_xl_to_times(
transforms.remove_invalid_values,
transforms.process_time_periods,
transforms.generate_all_regions,
transforms.capitalise_attributes,
transforms.capitalise_some_values,
transforms.apply_fixups,
transforms.generate_commodity_groups,
transforms.fill_in_missing_pcgs,
Expand Down
23 changes: 11 additions & 12 deletions times_reader/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,7 @@ def fill_in_missing_values_table(table):
df[colname].fillna(start_year, inplace=True)
elif colname == "currency":
df[colname].fillna(currency, inplace=True)

return replace(table, dataframe=df)

for table in tables:
Expand Down Expand Up @@ -907,20 +908,25 @@ def generate_all_regions(
return tables


def capitalise_attributes(
def capitalise_some_values(
config: datatypes.Config,
tables: List[datatypes.EmbeddedXlTable],
) -> List[datatypes.EmbeddedXlTable]:
"""
Ensure that all attributes are uppercase
Ensure that all attributes and units are uppercase
"""

# TODO: This should include other dimensions
# TODO: This should be part of normalisation

colnames = ["attribute", "tact", "tcap", "unit"]

def capitalise_attributes_table(table: datatypes.EmbeddedXlTable):
df = table.dataframe.copy()
if "attribute" in df.columns and len(df) > 0:
df["attribute"] = df["attribute"].str.upper()
seen_cols = [colname for colname in colnames if colname in df.columns]
if len(df) > 0:
for seen_col in seen_cols:
df[seen_col] = df[seen_col].str.upper()
return replace(table, dataframe=df)
else:
return table
Expand Down Expand Up @@ -1818,17 +1824,10 @@ def generate_topology_dictionary(tables: Dict[str, DataFrame]) -> Dict[str, Data
processes = tables[datatypes.Tag.fi_process]
commodities = tables[datatypes.Tag.fi_comm]

duplicated_processes = processes[["process"]].duplicated()
if any(duplicated_processes):
duplicated_process_names = processes["process"][duplicated_processes]
print(
f"WARNING: {len(duplicated_process_names)} duplicated processes: {duplicated_process_names.values[1:3]}"
)
processes.drop_duplicates(subset="process", inplace=True)

dictionary["processes_by_name"] = (
processes[["process"]]
.dropna()
.drop_duplicates()
.set_index("process", drop=False)
.rename_axis("index")
)
Expand Down

0 comments on commit 4dade4b

Please sign in to comment.