Skip to content

Commit

Permalink
Allow processing of the dins table and its variants (#208)
Browse files Browse the repository at this point in the history
  • Loading branch information
olejandro authored Mar 6, 2024
1 parent 9ea9983 commit 36940b9
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 13 deletions.
2 changes: 1 addition & 1 deletion xl2times/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def convert_xl_to_times(
transforms.remove_exreg_cols,
transforms.generate_dummy_processes,
transforms.process_time_slices,
transforms.process_transform_insert_variants,
transforms.process_transform_table_variants,
transforms.process_transform_tables,
transforms.process_tradelinks,
transforms.process_processes,
Expand Down
4 changes: 2 additions & 2 deletions xl2times/config/veda-tags.json
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,7 @@
"cset:cn",
"cset: cn"
],
"use_name": "cset_cn",
"use_name": "commodity",
"row_ignore_symbol": [
"\\I:"
],
Expand Down Expand Up @@ -1072,7 +1072,7 @@
"pset:pn",
"pset: pn"
],
"use_name": "pset_pn",
"use_name": "process",
"row_ignore_symbol": [
"\\I:"
],
Expand Down
40 changes: 30 additions & 10 deletions xl2times/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1862,7 +1862,7 @@ def process_tradelinks(
return result


def process_transform_insert_variants(
def process_transform_table_variants(
config: datatypes.Config,
tables: List[datatypes.EmbeddedXlTable],
model: datatypes.TimesModel,
Expand All @@ -1886,7 +1886,12 @@ def is_year(col_name):

result = []
for table in tables:
if table.tag == datatypes.Tag.tfm_ins_ts:
tag = datatypes.Tag(table.tag.split(":")[0])
if tag in [
datatypes.Tag.tfm_dins_ts,
datatypes.Tag.tfm_ins_ts,
datatypes.Tag.tfm_upd_ts,
]:
# ~TFM_INS-TS: Gather columns whose names are years into a single "Year" column:
df = table.dataframe
query_columns = config.query_columns[datatypes.Tag(table.tag)]
Expand Down Expand Up @@ -1918,8 +1923,14 @@ def is_year(col_name):
)
# Convert the year column to integer
df["year"] = df["year"].astype("int")
result.append(replace(table, dataframe=df, tag=datatypes.Tag.tfm_ins))
elif table.tag == datatypes.Tag.tfm_ins_at:
result.append(
replace(table, dataframe=df, tag=datatypes.Tag(tag.value.split("-")[0]))
)
elif tag in [
datatypes.Tag.tfm_dins_at,
datatypes.Tag.tfm_ins_at,
datatypes.Tag.tfm_upd_at,
]:
# ~TFM_INS-AT: Gather columns with attribute names into a single "Attribute" column
df = table.dataframe
if "attribute" in df.columns:
Expand All @@ -1938,7 +1949,9 @@ def is_year(col_name):
value_name="value",
ignore_index=False,
)
result.append(replace(table, dataframe=df, tag=datatypes.Tag.tfm_ins))
result.append(
replace(table, dataframe=df, tag=datatypes.Tag(tag.value.split("-")[0]))
)
else:
result.append(table)

Expand All @@ -1955,9 +1968,9 @@ def process_transform_tables(
"""
regions = model.internal_regions
tfm_tags = [
datatypes.Tag.tfm_dins,
datatypes.Tag.tfm_ins,
datatypes.Tag.tfm_ins_txt,
datatypes.Tag.tfm_dins,
datatypes.Tag.tfm_topins,
datatypes.Tag.tfm_upd,
datatypes.Tag.tfm_mig,
Expand All @@ -1971,6 +1984,7 @@ def process_transform_tables(
result.append(table)

elif table.tag in [
datatypes.Tag.tfm_dins,
datatypes.Tag.tfm_ins,
datatypes.Tag.tfm_ins_txt,
datatypes.Tag.tfm_upd,
Expand Down Expand Up @@ -2345,7 +2359,7 @@ def eval_and_update(
for _, row in tqdm(
updates.iterrows(),
total=len(updates),
desc=f"Processing wildcard for {datatypes.Tag.tfm_upd}",
desc=f"Processing wildcard for {datatypes.Tag.tfm_upd.value}",
):
if row["value"] is None: # TODO is this really needed?
continue
Expand Down Expand Up @@ -2378,7 +2392,7 @@ def eval_and_update(
for _, row in tqdm(
updates.iterrows(),
total=len(updates),
desc=f"Processing wildcard for {datatypes.Tag.tfm_ins}",
desc=f"Processing wildcard for {datatypes.Tag.tfm_ins.value}",
):
match = match_wildcards(row)
# TODO perf: add matched procs/comms into column and use explode?
Expand All @@ -2395,6 +2409,12 @@ def eval_and_update(
new_tables.append(tables[datatypes.Tag.fi_t])
tables[datatypes.Tag.fi_t] = pd.concat(new_tables, ignore_index=True)

# TODO: Move this somewhere else (i.e. no wildcard processing)?
if datatypes.Tag.tfm_dins in tables:
table = tables[datatypes.Tag.fi_t]
updates = tables[datatypes.Tag.tfm_dins].filter(table.columns, axis=1)
tables[datatypes.Tag.fi_t] = pd.concat([table, updates], ignore_index=True)

if datatypes.Tag.tfm_ins_txt in tables:
updates = tables[datatypes.Tag.tfm_ins_txt]

Expand All @@ -2403,7 +2423,7 @@ def eval_and_update(
for _, row in tqdm(
updates.iterrows(),
total=len(updates),
desc=f"Processing wildcard for {datatypes.Tag.tfm_ins_txt}",
desc=f"Processing wildcard for {datatypes.Tag.tfm_ins_txt.value}",
):
match = match_wildcards(row)
if match is None:
Expand Down Expand Up @@ -2434,7 +2454,7 @@ def eval_and_update(
for _, row in tqdm(
updates.iterrows(),
total=len(updates),
desc=f"Processing wildcard for {datatypes.Tag.tfm_mig}",
desc=f"Processing wildcard for {datatypes.Tag.tfm_mig.value}",
):
match = match_wildcards(row)
processes, commodities = match if match is not None else (None, None)
Expand Down

0 comments on commit 36940b9

Please sign in to comment.