Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure the tool can run on TUSM #170

Merged
merged 3 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions xl2times/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def main():
args_parser.add_argument(
"input",
nargs="*",
help="Either an input directory, or a list of input xlsx files to process",
help="Either an input directory, or a list of input xlsx/xlsm files to process",
)
args_parser.add_argument(
"--regions",
Expand All @@ -405,7 +405,7 @@ def main():
args_parser.add_argument(
"--only_read",
action="store_true",
help="Read xlsx files and stop after outputting raw_tables.txt",
help="Read xlsx/xlsm files and stop after outputting raw_tables.txt",
)
args_parser.add_argument("--use_pkl", action="store_true")
args_parser.add_argument(
Expand Down Expand Up @@ -433,8 +433,8 @@ def main():
assert os.path.isdir(args.input[0])
input_files = [
str(path)
for path in Path(args.input[0]).rglob("*.xlsx")
if not path.name.startswith("~")
for path in Path(args.input[0]).rglob("*")
if path.suffix in [".xlsx", ".xlsm"] and not path.name.startswith("~")
]
print(f"Loading {len(input_files)} files from {args.input[0]}")
else:
Expand Down
25 changes: 16 additions & 9 deletions xl2times/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,9 +565,6 @@ def process_user_constraint_table(
else:
df.loc[i, colname] = typed_value

# TODO: should we have a global list of column name -> type?
df["year"] = df["year"].astype("Int64")

return replace(table, dataframe=df)

return [process_user_constraint_table(t) for t in tables]
Expand Down Expand Up @@ -898,6 +895,10 @@ def apply_fixups_table(table: datatypes.EmbeddedXlTable):

df = table.dataframe.copy()

# TODO: should we have a global list of column name -> type?
if "year" in df.columns:
df["year"] = pd.to_numeric(df["year"], errors="coerce")

# Populate CommName based on defaults
i = (
df["attribute"]
Expand Down Expand Up @@ -1284,9 +1285,11 @@ def process_years(
tables: Dict[str, DataFrame],
model: datatypes.TimesModel,
) -> Dict[str, DataFrame]:

# Datayears is the set of all years in ~FI_T's Year column
# We ignore values < 1000 because those signify interpolation/extrapolation rules
# (see Table 8 of Part IV of the Times Documentation)

datayears = (
tables[datatypes.Tag.fi_t]["year"]
.apply(lambda x: x if (x is not str) and x >= 1000 else None)
Expand Down Expand Up @@ -1330,12 +1333,16 @@ def process_processes(
)
df.replace({"sets": veda_sets_to_times}, inplace=True)
nrows = df.shape[0]
if "vintage" not in table.dataframe.columns:
df["vintage"] = [None] * nrows
if "region" not in table.dataframe.columns:
df.insert(1, "region", [None] * nrows)
if "tslvl" not in table.dataframe.columns:
df.insert(6, "tslvl", ["ANNUAL"] * nrows)
# TODO: Use info from config instead. Introduce required columns in the meta file?
add_columns = [
(1, "region"),
(6, "tslvl"),
(7, "primarycg"),
(8, "vintage"),
]
for column in add_columns:
if column[1] not in table.dataframe.columns:
df.insert(column[0], column[1], [None] * nrows)
result.append(replace(table, dataframe=df))

veda_process_sets = datatypes.EmbeddedXlTable(
Expand Down
Loading