Merge pull request #110 from REAM-lab/feature/post_process

Refactoring and simplification of postprocess
switch-model · Jan 28, 2023 · 0e8d986 · 0e8d986
2 parents 3fb8498 + 6933120
commit 0e8d986
Show file tree

Hide file tree

Showing 10 changed files with 181 additions and 78 deletions.
diff --git a/switch_model/wecc/get_inputs/cli.py b/switch_model/wecc/get_inputs/cli.py
@@ -1,13 +1,15 @@
 """ Script to retrieve the input data from the switch-wecc database and apply post-processing steps.
 """
 import argparse
+import importlib
 import os
 
 from switch_model.utilities import query_yes_no, StepTimer
 from switch_model.wecc.get_inputs.get_inputs import query_db
-from switch_model.wecc.get_inputs.register_post_process import run_post_process
 from switch_model.wecc.utilities import load_config
-from switch_model.wecc.get_inputs.post_process_steps import *
+
+# from switch_model.wecc.get_inputs.post_process_steps import *
+# from switch_model.wecc.get_inputs.register_post_process import run_post_process, _registered_steps
 
 
 def main():
@@ -29,7 +31,13 @@ def main():
         "want to wait for the command.",
     )
     parser.add_argument(
-        "--post-process", default=None, help="Run only this post process step."
+        "--post-process-only",
+        default=False,
+        action="store_true",
+        help="Run only post process steps.",
+    )
+    parser.add_argument(
+        "--post-process-step", default=None, help="Run only this post process step."
     )
     parser.add_argument(
         "--overwrite",
@@ -43,11 +51,36 @@ def main():
     full_config = load_config()
     switch_to_input_dir(full_config, overwrite=args.overwrite)
 
-    if args.post_process is None:
+    if not args.post_process_only and args.post_process_step is None:
         query_db(full_config, skip_cf=args.skip_cf)
-    print("Post-processing...")
-    run_post_process(full_config, step_name=args.post_process)
-    print(f"\nScript took {timer.step_time_as_str()} seconds to build input tables.")
+
+    print("\nRunning post processing...")
+
+    # Get location of post process scripts
+    post_process_path = ".".join(__name__.split(".")[:-1]) + ".post_process_steps"
+
+    def run_post_process(module):
+        """Run a function from a given module"""
+
+        # This uses python module syntax with a dot. Example: import foo.bar.test
+        mod = importlib.import_module(f".{module}", post_process_path)
+
+        post_process = getattr(mod, "post_process")
+
+        # Get specific configuration for the post process if specified
+        post_config = full_config.get(module, None)
+
+        # Run post process
+        post_process(full_config, post_config)
+
+    # Run all post process specified, otherwise run single one
+    if args.post_process_step is None:
+        for module in full_config["post_process"]:
+            run_post_process(module)
+    else:
+        run_post_process(getattr(args, "post_process_step"))
+
+    print(f"\nScript took {timer.step_time_as_str()} seconds.")
 
 
 def switch_to_input_dir(config, overwrite):

diff --git a/switch_model/wecc/get_inputs/get_inputs.py b/switch_model/wecc/get_inputs/get_inputs.py
@@ -932,6 +932,34 @@ def write_wind_to_solar_ratio(wind_to_solar_ratio):
     df.to_csv("wind_to_solar_ratio.csv", index=False)
 
 
+def write_wind_to_solar_ratio(wind_to_solar_ratio):
+    # TODO ideally we'd have a table where we can specify the wind_to_solar_ratios per period.
+    #   At the moment only the wind_to_solar_ratio is specified and which doesn't allow different values per period
+    if wind_to_solar_ratio is None:
+        return
+
+    print("wind_to_solar_ratio.csv...")
+    df = pd.read_csv("periods.csv")[["INVESTMENT_PERIOD"]]
+    df["wind_to_solar_ratio"] = wind_to_solar_ratio
+
+    # wind_to_solar_ratio.csv requires a column called wind_to_solar_ratio_const_gt that is True (1) or False (0)
+    # This column specifies whether the constraint is a greater than constraint or a less than constraint.
+    # In our case we want it to be a greater than constraint if we're trying to force wind-to-solar ratio above its default
+    # and we want it to be a less than constraint if we're trying to force the ratio below its default.
+    # Here the default is the ratio if we didn't have the constraint.
+    cutoff_ratio = 0.28
+    warnings.warn(
+        "To determine the sign of the wind-to-solar ratio constraint we have "
+        f"assumed that without the constraint, the wind-to-solar ratio is {cutoff_ratio}. "
+        f"This value was accurate for Martin's LDES runs however it may not be accurate for you. "
+        f"You should update this value in get_inputs or manually specify whether you want a greater than "
+        f"or a less than constraint."
+    )
+    df["wind_to_solar_ratio_const_gt"] = 1 if wind_to_solar_ratio > cutoff_ratio else 0
+
+    df.to_csv("wind_to_solar_ratio.csv", index=False)
+
+
 def ca_policies(db_cursor, ca_policies_scenario_id, study_timeframe_id):
     if ca_policies_scenario_id is None:
         return

diff --git a/switch_model/wecc/get_inputs/post_process_steps/add_storage.py b/switch_model/wecc/get_inputs/post_process_steps/add_storage.py
@@ -5,7 +5,6 @@
 the csvs in the inputs folder.
 """
 import pandas as pd
-
 from switch_model.wecc.get_inputs.register_post_process import register_post_process
 
 
@@ -17,16 +16,16 @@ def fetch_df(tab_name, key, config):
         "constants": 0,
         "plants": 889129113,
         "costs": 1401952285,
-        "minimums": 1049456965
+        "minimums": 1049456965,
     }
     SHEET_ID = "1SJrj039T1T95NLTs964VQnsfZgo2QWCo29x2ireVYcU"
 
     gid = TAB_NAME_GID[tab_name]
     url = f"https://docs.google.com/spreadsheet/ccc?key={SHEET_ID}&output=csv&gid={gid}"
 
-    df: pd.DataFrame = pd.read_csv(url, index_col=False) \
-        .replace("FALSE", False) \
-        .replace("TRUE", True)
+    df: pd.DataFrame = (
+        pd.read_csv(url, index_col=False).replace("FALSE", False).replace("TRUE", True)
+    )
 
     if "description" in df.columns:
         df = df.drop("description", axis=1)
@@ -43,17 +42,16 @@ def filer_by_scenario(df, scenario_column, config):
     if scenario_column in config:
         scenario = config[scenario_column]
     else:
-        scenario = input(f"Which scenario do you want for '{scenario_column}' (default 0) : ")
+        scenario = input(
+            f"Which scenario do you want for '{scenario_column}' (default 0) : "
+        )
         scenario = int(scenario) if scenario != "" else 0
     df = df[df[scenario_column] == scenario]
     return df.drop(scenario_column, axis=1)
 
 
 def cross_join(df1, df2):
-    return df1.assign(key=1).merge(
-        df2.assign(key=1),
-        on="key"
-    ).drop("key", axis=1)
+    return df1.assign(key=1).merge(df2.assign(key=1), on="key").drop("key", axis=1)
 
 
 def add_to_csv(filename, to_add, primary_key=None, append=True):
@@ -83,8 +81,12 @@ def drop_previous_candidate_storage():
 
     gen = pd.read_csv("generation_projects_info.csv", index_col=False)
     # Find generation projects that are both storage and not predetermined (i.e. candidate)
-    predetermined_gen = pd.read_csv("gen_build_predetermined.csv", index_col=False)["GENERATION_PROJECT"]
-    should_drop = (gen["gen_tech"] == STORAGE_TECH) & ~gen["GENERATION_PROJECT"].isin(predetermined_gen)
+    predetermined_gen = pd.read_csv("gen_build_predetermined.csv", index_col=False)[
+        "GENERATION_PROJECT"
+    ]
+    should_drop = (gen["gen_tech"] == STORAGE_TECH) & ~gen["GENERATION_PROJECT"].isin(
+        predetermined_gen
+    )
     # Find projects that we should drop (candidate storage)
     gen_to_drop = gen[should_drop]["GENERATION_PROJECT"]
 
@@ -99,42 +101,46 @@ def drop_previous_candidate_storage():
 
 
 @register_post_process(
-    name="add_storage",
     msg="Adding storage from Google Sheets",
-    only_with_config=True,
-<<<<<<< HEAD
-    priority=1,
-)
-def add_storage(config):
-    from switch_model.tools.add_storage import main
-
-    main(
-        run_post_solve=False,  # We will run post solve automatically right afterwards
-        scenario_config=config,
-        change_dir=False,
-    )
-=======
-    priority=1  # Increased priority (default is 2) so that it always runs before replace_plants_in_zone_all.py
 )
-def main(config):
+def post_process(config):
     # Drop previous candidate storage from inputs
     drop_previous_candidate_storage()
 
     # Get the generation storage plants from Google Sheet
-    gen_projects = fetch_df("constants", "constant_scenario", config).set_index("param_name").transpose()
-    gen_projects = cross_join(gen_projects, fetch_df("plants", "plants_scenario", config))
+    gen_projects = (
+        fetch_df("constants", "constant_scenario", config)
+        .set_index("param_name")
+        .transpose()
+    )
+    gen_projects = cross_join(
+        gen_projects, fetch_df("plants", "plants_scenario", config)
+    )
 
     # Append the storage plants to the inputs
-    add_to_csv("generation_projects_info.csv", gen_projects, primary_key="GENERATION_PROJECT")
+    add_to_csv(
+        "generation_projects_info.csv", gen_projects, primary_key="GENERATION_PROJECT"
+    )
 
     # Create min_per_tech.csv
     min_projects = fetch_df("minimums", "minimums_scenario", config)
-    add_to_csv("min_per_tech.csv", min_projects, primary_key=["gen_tech", "period"], append=False)
+    add_to_csv(
+        "min_per_tech.csv",
+        min_projects,
+        primary_key=["gen_tech", "period"],
+        append=False,
+    )
 
     # Get the plant costs from GSheets and append to costs
     storage_costs = fetch_df("costs", "costs_scenario", config)
-    storage_costs = storage_costs[storage_costs["GENERATION_PROJECT"].isin(gen_projects["GENERATION_PROJECT"])]
-    add_to_csv("gen_build_costs.csv", storage_costs, primary_key=["GENERATION_PROJECT", "build_year"])
+    storage_costs = storage_costs[
+        storage_costs["GENERATION_PROJECT"].isin(gen_projects["GENERATION_PROJECT"])
+    ]
+    add_to_csv(
+        "gen_build_costs.csv",
+        storage_costs,
+        primary_key=["GENERATION_PROJECT", "build_year"],
+    )
 
     # Create add_storage_info.csv
     pd.DataFrame([config]).transpose().to_csv("add_storage_info.csv", header=False)
@@ -144,11 +150,10 @@ def main(config):
     gen_type.columns = ["gen_tech", "energy_source"]
     gen_type["map_name"] = "default"
     gen_type["gen_type"] = "Storage"
-    pd.concat([
-        pd.read_csv("graph_tech_types.csv", index_col=False), gen_type
-    ]).to_csv("graph_tech_types.csv", index=False)
+    pd.concat([pd.read_csv("graph_tech_types.csv", index_col=False), gen_type]).to_csv(
+        "graph_tech_types.csv", index=False
+    )
 
 
 if __name__ == "__main__":
     main({})
->>>>>>> 4c70d285 (Refactor add_storage package into post process step)
diff --git a/switch_model/wecc/get_inputs/post_process_steps/aggregate_candidate_projects.py b/switch_model/wecc/get_inputs/post_process_steps/aggregate_candidate_projects.py
@@ -21,14 +21,11 @@
 
 
 @register_post_process(
-    name="aggregate_projects_by_zone",
     msg="Aggregating candidate projects by load zone for specified technologies",
-    only_with_config=True,
-    priority=4,
 )
-def post_process(config):
-    agg_techs = config["agg_techs"]
-    cf_method = config["cf_method"]
+def post_process(config, func_config):
+    agg_techs = func_config["agg_techs"]
+    cf_method = func_config["cf_method"]
     assert type(agg_techs) == list
     # Don't allow hydro to be aggregated since we haven't implemented how to handle
     # hydro_timeseries.csv

diff --git a/switch_model/wecc/get_inputs/post_process_steps/create_graph_files.py b/switch_model/wecc/get_inputs/post_process_steps/create_graph_files.py
@@ -3,8 +3,8 @@
 from switch_model.wecc.get_inputs.register_post_process import register_post_process
 
 
-@register_post_process(name="create_graph_files", msg="Creating graph files")
-def create_graph_files(_):
+@register_post_process(msg="Creating graph files")
+def post_process(config, *args, **kwargs):
     timepoints = pd.read_csv("timepoints.csv", index_col=False)
     timeseries = pd.read_csv("timeseries.csv", index_col=False)
     timepoints = timepoints.merge(

diff --git a/switch_model/wecc/get_inputs/post_process_steps/energy_cost.py b/switch_model/wecc/get_inputs/post_process_steps/energy_cost.py
@@ -0,0 +1,55 @@
+# Standard packages
+import os
+import shutil
+
+# Third-party packages
+import pandas as pd
+
+from switch_model.wecc.get_inputs.register_post_process import register_post_process
+
+
+@register_post_process(
+    msg="Change energy cost for storage candidate",
+)
+def post_process(config, func_config):
+
+    percentage = int(func_config["percentage"]) / 100
+    dtype = {"GENERATION_PROJECT": str}
+    df = pd.read_csv("generation_projects_info.csv", dtype=dtype)
+    costs = pd.read_csv("gen_build_costs.csv", dtype=dtype)
+    predetermined = pd.read_csv("gen_build_predetermined.csv", dtype=dtype)
+
+    gen_projects = df.merge(
+        costs,
+        on="GENERATION_PROJECT",
+    )
+
+    gen_projects = gen_projects.merge(
+        predetermined,
+        on=["GENERATION_PROJECT", "build_year"],
+        how="left",  # Makes a left join
+    )
+
+    # Get candiate technology only
+    candidate = gen_projects.query("build_year == 2050").query(
+        "gen_tech =='Battery_Storage'"
+    )
+
+    # Get canidate generation project id
+    candidate_ids = candidate["GENERATION_PROJECT"].values
+
+    gen_cost_mwh = costs.loc[
+        costs["GENERATION_PROJECT"].isin(candidate_ids),
+        "gen_storage_energy_overnight_cost",
+    ].astype(float)
+
+    # Set to zero column that allows technology to provide reserves
+    costs.loc[
+        costs["GENERATION_PROJECT"].isin(candidate_ids),
+        "gen_storage_energy_overnight_cost",
+    ] = (
+        gen_cost_mwh * percentage
+    )
+
+    # Save file again
+    costs.to_csv("gen_build_costs.csv", index=False)
diff --git a/switch_model/wecc/get_inputs/post_process_steps/fix_prebuild_conflict.py b/switch_model/wecc/get_inputs/post_process_steps/fix_prebuild_conflict.py
@@ -3,10 +3,8 @@
 from switch_model.wecc.get_inputs.register_post_process import register_post_process
 
 
-@register_post_process(
-    name="fix_prebuild_conflict_bug", msg="Shifting 2020 pre-build years to 2019"
-)
-def fix_prebuild_conflict_bug(_):
+@register_post_process(msg="Shifting 2020 pre-build years to 2019")
+def post_process(config, *args, **kwargs):
     """
     This post-processing step is necessary to pass the no_predetermined_bld_yr_vs_period_conflict BuildCheck.
     Basically we are moving all the 2020 predetermined build years to 2019 to avoid a conflict with the 2020 period.

diff --git a/switch_model/wecc/get_inputs/post_process_steps/only_california.py b/switch_model/wecc/get_inputs/post_process_steps/only_california.py
@@ -7,7 +7,6 @@
 @register_post_process(
     name="only_california",
     msg="Dropping all the zones outside of California",
-    only_with_config=True,
     priority=3,
 )
 def main(_):

diff --git a/switch_model/wecc/get_inputs/post_process_steps/replace_plants_in_zone_all.py b/switch_model/wecc/get_inputs/post_process_steps/replace_plants_in_zone_all.py
@@ -3,11 +3,8 @@
 from switch_model.wecc.get_inputs.register_post_process import register_post_process
 
 
-@register_post_process(
-    name="replace_all_zones_plants",
-    msg="Replacing _ALL_ZONES plants with a plant in each zone",
-)
-def replace_plants_in_zone_all(_):
+@register_post_process(msg="Replacing _ALL_ZONES plants with a plant in each zone")
+def post_process(config, *args, **kwargs):
     """
     This post-process step replaces all the generation projects that have a load called
     _ALL_ZONES with a generation project for each load zone.