From 3f3eece0ee7e09aa1098dc478ab05aece626e282 Mon Sep 17 00:00:00 2001
From: pesap <pesapsanchez@gmail.com>
Date: Thu, 12 Aug 2021 14:03:46 -0700
Subject: [PATCH 1/6] Refactoring and simplification of postprocess

---
 .../2021-07-29_create_low_hydro_scenario.py   | 80 ++++++++++++-------
 .../2021-07-30_create_no_hydro_scenario.py    | 28 +++++--
 .../2021-08-02_create_half_hydro_scenario.py  | 22 +++--
 switch_model/wecc/get_inputs/cli.py           | 47 ++++++++++-
 .../post_process_steps/add_storage.py         | 78 ++++++++++--------
 .../aggregate_candidate_projects.py           |  9 +--
 .../post_process_steps/create_graph_files.py  |  4 +-
 .../fix_prebuild_conflict.py                  |  6 +-
 .../post_process_steps/only_california.py     |  1 -
 .../replace_plants_in_zone_all.py             |  7 +-
 .../reserve_technologies.py                   |  9 +--
 .../wecc/get_inputs/register_post_process.py  | 21 ++---
 12 files changed, 195 insertions(+), 117 deletions(-)

diff --git a/database/2021-07-29_create_low_hydro_scenario.py b/database/2021-07-29_create_low_hydro_scenario.py
index e64fc894c..9a116840d 100644
--- a/database/2021-07-29_create_low_hydro_scenario.py
+++ b/database/2021-07-29_create_low_hydro_scenario.py
@@ -22,7 +22,9 @@
 new_start_year = 2020
 new_end_year = 2050
 new_scenario_id = 24
-new_scenario_name = "Lowest year (2015) repeated. Using EIA and AMPL Canada and Mex data."
+new_scenario_name = (
+    "Lowest year (2015) repeated. Using EIA and AMPL Canada and Mex data."
+)
 new_scenario_description = "Lowest year (2015) repeated from 2020 to 2050, based on data from id 21 (EIA + AMPL Canada & Mex)."
 
 
@@ -35,44 +37,66 @@ def main():
         f"""
         SELECT DISTINCT generation_plant_id FROM hydro_historical_monthly_capacity_factors
         WHERE hydro_simple_scenario_id={all_plants_scenario};
-    """)
-    hydro_plants = pd.DataFrame(db_cursor.fetchall(), columns=["generation_plant_id"])["generation_plant_id"]
+    """
+    )
+    hydro_plants = pd.DataFrame(db_cursor.fetchall(), columns=["generation_plant_id"])[
+        "generation_plant_id"
+    ]
 
     # 2. Get all the hydro flow data for the worst year
     db_cursor.execute(
         f"""
         SELECT generation_plant_id, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors
         WHERE hydro_simple_scenario_id={raw_data_scenario} and year={worst_year};
-    """)
-    worst_year_data = pd.DataFrame(db_cursor.fetchall(),
-                                   columns=["generation_plant_id", "month", "hydro_min_flow_mw", "hydro_avg_flow_mw"])
+    """
+    )
+    worst_year_data = pd.DataFrame(
+        db_cursor.fetchall(),
+        columns=[
+            "generation_plant_id",
+            "month",
+            "hydro_min_flow_mw",
+            "hydro_avg_flow_mw",
+        ],
+    )
 
     # 3. Identify plants where data is missing
-    missing_hydro_plants = hydro_plants[~hydro_plants.isin(worst_year_data["generation_plant_id"])].values
+    missing_hydro_plants = hydro_plants[
+        ~hydro_plants.isin(worst_year_data["generation_plant_id"])
+    ].values
 
     # 4. For each missing plant get the data for all the years
     db_cursor.execute(
         f"""
         SELECT generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors
         WHERE hydro_simple_scenario_id={raw_data_scenario} and generation_plant_id in ({",".join(missing_hydro_plants.astype(str))});
-    """)
-    missing_plants_data = pd.DataFrame(db_cursor.fetchall(),
-                                       columns=["generation_plant_id", "year", "month", "hydro_min_flow_mw",
-                                                "hydro_avg_flow_mw"])
+    """
+    )
+    missing_plants_data = pd.DataFrame(
+        db_cursor.fetchall(),
+        columns=[
+            "generation_plant_id",
+            "year",
+            "month",
+            "hydro_min_flow_mw",
+            "hydro_avg_flow_mw",
+        ],
+    )
 
     # 5. Pick the year with the least flow
     # Aggregate by year
-    missing_data_by_year = missing_plants_data.groupby(["generation_plant_id", "year"], as_index=False)[
-        "hydro_avg_flow_mw"].mean()
+    missing_data_by_year = missing_plants_data.groupby(
+        ["generation_plant_id", "year"], as_index=False
+    )["hydro_avg_flow_mw"].mean()
     # Select years where the flow is at its lowest
-    year_to_use = \
-    missing_data_by_year.loc[missing_data_by_year.groupby("generation_plant_id")["hydro_avg_flow_mw"].idxmin()][
-        ["generation_plant_id", "year"]]
+    year_to_use = missing_data_by_year.loc[
+        missing_data_by_year.groupby("generation_plant_id")[
+            "hydro_avg_flow_mw"
+        ].idxmin()
+    ][["generation_plant_id", "year"]]
     # Essentially filter missing_plants_data to only include keys from the right table, aka plants and years that are lowest
     missing_plants_data = missing_plants_data.merge(
-        year_to_use,
-        on=["generation_plant_id", "year"],
-        how="right"
+        year_to_use, on=["generation_plant_id", "year"], how="right"
     ).drop("year", axis=1)
 
     # 6. Add the missing data to our worst year data and verify we have data for all the plants
@@ -81,14 +105,13 @@ def main():
 
     # 7. Cross join the series with all the years from 2020 to 2050
     years = pd.Series(range(new_start_year, new_end_year + 1), name="year")
-    worst_year_data = worst_year_data.merge(
-        years,
-        how="cross"
-    )
+    worst_year_data = worst_year_data.merge(years, how="cross")
     worst_year_data["hydro_simple_scenario_id"] = new_scenario_id
 
     # 8. Complete some data checks
-    assert len(worst_year_data) == 12 * (new_end_year - new_start_year + 1) * len(hydro_plants)
+    assert len(worst_year_data) == 12 * (new_end_year - new_start_year + 1) * len(
+        hydro_plants
+    )
 
     # 9. Add data to database
     print(f"hydro_simple_scenario: {new_scenario_id}")
@@ -99,7 +122,9 @@ def main():
     print(f"To year: {new_end_year}")
     print(f"Example data:\n{worst_year_data.head()}")
 
-    if not query_yes_no("\nAre you sure you want to add this data to the database?", default="no"):
+    if not query_yes_no(
+        "\nAre you sure you want to add this data to the database?", default="no"
+    ):
         raise SystemExit
 
     db_cursor.execute(
@@ -110,9 +135,10 @@ def main():
     n = len(worst_year_data)
     start_time = time.time()
     for i, r in enumerate(worst_year_data.itertuples(index=False)):
-        if i !=0 and i % 1000 == 0:
+        if i != 0 and i % 1000 == 0:
             print(
-                f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}")
+                f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}"
+            )
         db_cursor.execute(
             f"INSERT INTO hydro_historical_monthly_capacity_factors(hydro_simple_scenario_id, generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw) "
             f"VALUES ({r.hydro_simple_scenario_id},{r.generation_plant_id},{r.year},{r.month},{r.hydro_min_flow_mw},{r.hydro_avg_flow_mw})"
diff --git a/database/2021-07-30_create_no_hydro_scenario.py b/database/2021-07-30_create_no_hydro_scenario.py
index f4f658363..ff9c1f576 100644
--- a/database/2021-07-30_create_no_hydro_scenario.py
+++ b/database/2021-07-30_create_no_hydro_scenario.py
@@ -17,8 +17,10 @@
 
 new_scenario_id = 25
 new_scenario_name = "No Hydro"
-new_scenario_description = "All average flows are zero effectively removing all hydro generation from the model." \
-                           " Represents as an extreme edge case of no hydro generation."
+new_scenario_description = (
+    "All average flows are zero effectively removing all hydro generation from the model."
+    " Represents as an extreme edge case of no hydro generation."
+)
 
 
 def main():
@@ -30,9 +32,18 @@ def main():
         f"""
         SELECT DISTINCT generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors
         WHERE hydro_simple_scenario_id={all_plants_scenario};
-    """)
-    df = pd.DataFrame(db_cursor.fetchall(),
-                      columns=["generation_plant_id", "year", "month", "hydro_min_flow_mw", "hydro_avg_flow_mw"])
+    """
+    )
+    df = pd.DataFrame(
+        db_cursor.fetchall(),
+        columns=[
+            "generation_plant_id",
+            "year",
+            "month",
+            "hydro_min_flow_mw",
+            "hydro_avg_flow_mw",
+        ],
+    )
 
     # 2. Set all the flows to zero and set the scenario id
     df["hydro_min_flow_mw"] = 0
@@ -46,7 +57,9 @@ def main():
     print(f"Num hydro plants: {df.generation_plant_id.nunique()}")
     print(f"Example data:\n{df.head()}")
 
-    if not query_yes_no("\nAre you sure you want to add this data to the database?", default="no"):
+    if not query_yes_no(
+        "\nAre you sure you want to add this data to the database?", default="no"
+    ):
         raise SystemExit
 
     db_cursor.execute(
@@ -59,7 +72,8 @@ def main():
     for i, r in enumerate(df.itertuples(index=False)):
         if i != 0 and i % 1000 == 0:
             print(
-                f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}")
+                f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}"
+            )
         db_cursor.execute(
             f"INSERT INTO hydro_historical_monthly_capacity_factors(hydro_simple_scenario_id, generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw) "
             f"VALUES ({r.hydro_simple_scenario_id},{r.generation_plant_id},{r.year},{r.month},{r.hydro_min_flow_mw},{r.hydro_avg_flow_mw})"
diff --git a/database/2021-08-02_create_half_hydro_scenario.py b/database/2021-08-02_create_half_hydro_scenario.py
index 7ee93d4b0..6bbef5e2b 100644
--- a/database/2021-08-02_create_half_hydro_scenario.py
+++ b/database/2021-08-02_create_half_hydro_scenario.py
@@ -29,9 +29,18 @@ def main():
         f"""
         SELECT DISTINCT generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors
         WHERE hydro_simple_scenario_id={all_plants_scenario};
-    """)
-    df = pd.DataFrame(db_cursor.fetchall(),
-                      columns=["generation_plant_id", "year", "month", "hydro_min_flow_mw", "hydro_avg_flow_mw"])
+    """
+    )
+    df = pd.DataFrame(
+        db_cursor.fetchall(),
+        columns=[
+            "generation_plant_id",
+            "year",
+            "month",
+            "hydro_min_flow_mw",
+            "hydro_avg_flow_mw",
+        ],
+    )
 
     # 2. Set all the flows to zero and set the scenario id
     df["hydro_avg_flow_mw"] /= 2
@@ -45,7 +54,9 @@ def main():
     print(f"Num hydro plants: {df.generation_plant_id.nunique()}")
     print(f"Example data:\n{df.head()}")
 
-    if not query_yes_no("\nAre you sure you want to add this data to the database?", default="no"):
+    if not query_yes_no(
+        "\nAre you sure you want to add this data to the database?", default="no"
+    ):
         raise SystemExit
 
     db_cursor.execute(
@@ -58,7 +69,8 @@ def main():
     for i, r in enumerate(df.itertuples(index=False)):
         if i != 0 and i % 1000 == 0:
             print(
-                f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}")
+                f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}"
+            )
         db_cursor.execute(
             f"INSERT INTO hydro_historical_monthly_capacity_factors(hydro_simple_scenario_id, generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw) "
             f"VALUES ({r.hydro_simple_scenario_id},{r.generation_plant_id},{r.year},{r.month},{r.hydro_min_flow_mw},{r.hydro_avg_flow_mw})"
diff --git a/switch_model/wecc/get_inputs/cli.py b/switch_model/wecc/get_inputs/cli.py
index a5a344506..017aa49bc 100644
--- a/switch_model/wecc/get_inputs/cli.py
+++ b/switch_model/wecc/get_inputs/cli.py
@@ -1,13 +1,15 @@
 """ Script to retrieve the input data from the switch-wecc database and apply post-processing steps.
 """
 import argparse
+import importlib
 import os
 
 from switch_model.utilities import query_yes_no, StepTimer
 from switch_model.wecc.get_inputs.get_inputs import query_db
-from switch_model.wecc.get_inputs.register_post_process import run_post_process
 from switch_model.wecc.utilities import load_config
-from switch_model.wecc.get_inputs.post_process_steps import *
+
+# from switch_model.wecc.get_inputs.post_process_steps import *
+# from switch_model.wecc.get_inputs.register_post_process import run_post_process, _registered_steps
 
 
 def main():
@@ -29,7 +31,13 @@ def main():
         "want to wait for the command.",
     )
     parser.add_argument(
-        "--post-process", default=None, help="Run only this post process step."
+        "--post-process",
+        default=False,
+        action="store_true",
+        help="Run only post process steps.",
+    )
+    parser.add_argument(
+        "--post-process-step", default=None, help="Run only this post process step."
     )
     parser.add_argument(
         "--overwrite",
@@ -45,8 +53,39 @@ def main():
 
     if args.post_process is None:
         query_db(full_config, skip_cf=args.skip_cf)
+
     print("Post-processing...")
-    run_post_process(full_config, step_name=args.post_process)
+
+    post_process_path = ".".join(__name__.split(".")[:-1]) + ".post_process_steps"
+
+    def run_post_process(module):
+        mod = importlib.import_module(f".{module}", post_process_path)
+        post_process = getattr(mod, "post_process")
+
+        # Configuration specific for the post_process
+        post_config = full_config.get(module, None)
+
+        # Run post process
+        post_process(full_config, post_config)
+
+    # Get additional post-process from the config file
+    if args.post_process_step is None:
+        for module in full_config["post_process"]:
+            run_post_process(module)
+        # run_post_process(full_config, step_name=args.post_process)
+    else:
+        run_post_process(getattr(args, "post_process_step"))
+        # breakpoint()
+        # post_process_path = '.'.join(__name__.split('.')[:-1]) + ".post_process_steps"
+        # mod = importlib.import_module(f".{module}", post_process_path)
+        # post_process = getattr(mod, "post_process")
+
+        # # Configuration specific for the post_process
+        # post_config = full_config.get(module, None)
+
+        # # Run post process
+        # post_process(full_config, post_config)
+
     print(f"\nScript took {timer.step_time_as_str()} seconds to build input tables.")
 
 
diff --git a/switch_model/wecc/get_inputs/post_process_steps/add_storage.py b/switch_model/wecc/get_inputs/post_process_steps/add_storage.py
index 5130c5237..1369647a6 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/add_storage.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/add_storage.py
@@ -17,16 +17,16 @@ def fetch_df(tab_name, key, config):
         "constants": 0,
         "plants": 889129113,
         "costs": 1401952285,
-        "minimums": 1049456965
+        "minimums": 1049456965,
     }
     SHEET_ID = "1SJrj039T1T95NLTs964VQnsfZgo2QWCo29x2ireVYcU"
 
     gid = TAB_NAME_GID[tab_name]
     url = f"https://docs.google.com/spreadsheet/ccc?key={SHEET_ID}&output=csv&gid={gid}"
 
-    df: pd.DataFrame = pd.read_csv(url, index_col=False) \
-        .replace("FALSE", False) \
-        .replace("TRUE", True)
+    df: pd.DataFrame = (
+        pd.read_csv(url, index_col=False).replace("FALSE", False).replace("TRUE", True)
+    )
 
     if "description" in df.columns:
         df = df.drop("description", axis=1)
@@ -43,17 +43,16 @@ def filer_by_scenario(df, scenario_column, config):
     if scenario_column in config:
         scenario = config[scenario_column]
     else:
-        scenario = input(f"Which scenario do you want for '{scenario_column}' (default 0) : ")
+        scenario = input(
+            f"Which scenario do you want for '{scenario_column}' (default 0) : "
+        )
         scenario = int(scenario) if scenario != "" else 0
     df = df[df[scenario_column] == scenario]
     return df.drop(scenario_column, axis=1)
 
 
 def cross_join(df1, df2):
-    return df1.assign(key=1).merge(
-        df2.assign(key=1),
-        on="key"
-    ).drop("key", axis=1)
+    return df1.assign(key=1).merge(df2.assign(key=1), on="key").drop("key", axis=1)
 
 
 def add_to_csv(filename, to_add, primary_key=None, append=True):
@@ -83,8 +82,12 @@ def drop_previous_candidate_storage():
 
     gen = pd.read_csv("generation_projects_info.csv", index_col=False)
     # Find generation projects that are both storage and not predetermined (i.e. candidate)
-    predetermined_gen = pd.read_csv("gen_build_predetermined.csv", index_col=False)["GENERATION_PROJECT"]
-    should_drop = (gen["gen_tech"] == STORAGE_TECH) & ~gen["GENERATION_PROJECT"].isin(predetermined_gen)
+    predetermined_gen = pd.read_csv("gen_build_predetermined.csv", index_col=False)[
+        "GENERATION_PROJECT"
+    ]
+    should_drop = (gen["gen_tech"] == STORAGE_TECH) & ~gen["GENERATION_PROJECT"].isin(
+        predetermined_gen
+    )
     # Find projects that we should drop (candidate storage)
     gen_to_drop = gen[should_drop]["GENERATION_PROJECT"]
 
@@ -101,40 +104,46 @@ def drop_previous_candidate_storage():
 @register_post_process(
     name="add_storage",
     msg="Adding storage from Google Sheets",
-    only_with_config=True,
-<<<<<<< HEAD
-    priority=1,
-)
-def add_storage(config):
-    from switch_model.tools.add_storage import main
-
-    main(
-        run_post_solve=False,  # We will run post solve automatically right afterwards
-        scenario_config=config,
-        change_dir=False,
-    )
-=======
-    priority=1  # Increased priority (default is 2) so that it always runs before replace_plants_in_zone_all.py
+    priority=1,  # Increased priority (default is 2) so that it always runs before replace_plants_in_zone_all.py
 )
 def main(config):
     # Drop previous candidate storage from inputs
     drop_previous_candidate_storage()
 
     # Get the generation storage plants from Google Sheet
-    gen_projects = fetch_df("constants", "constant_scenario", config).set_index("param_name").transpose()
-    gen_projects = cross_join(gen_projects, fetch_df("plants", "plants_scenario", config))
+    gen_projects = (
+        fetch_df("constants", "constant_scenario", config)
+        .set_index("param_name")
+        .transpose()
+    )
+    gen_projects = cross_join(
+        gen_projects, fetch_df("plants", "plants_scenario", config)
+    )
 
     # Append the storage plants to the inputs
-    add_to_csv("generation_projects_info.csv", gen_projects, primary_key="GENERATION_PROJECT")
+    add_to_csv(
+        "generation_projects_info.csv", gen_projects, primary_key="GENERATION_PROJECT"
+    )
 
     # Create min_per_tech.csv
     min_projects = fetch_df("minimums", "minimums_scenario", config)
-    add_to_csv("min_per_tech.csv", min_projects, primary_key=["gen_tech", "period"], append=False)
+    add_to_csv(
+        "min_per_tech.csv",
+        min_projects,
+        primary_key=["gen_tech", "period"],
+        append=False,
+    )
 
     # Get the plant costs from GSheets and append to costs
     storage_costs = fetch_df("costs", "costs_scenario", config)
-    storage_costs = storage_costs[storage_costs["GENERATION_PROJECT"].isin(gen_projects["GENERATION_PROJECT"])]
-    add_to_csv("gen_build_costs.csv", storage_costs, primary_key=["GENERATION_PROJECT", "build_year"])
+    storage_costs = storage_costs[
+        storage_costs["GENERATION_PROJECT"].isin(gen_projects["GENERATION_PROJECT"])
+    ]
+    add_to_csv(
+        "gen_build_costs.csv",
+        storage_costs,
+        primary_key=["GENERATION_PROJECT", "build_year"],
+    )
 
     # Create add_storage_info.csv
     pd.DataFrame([config]).transpose().to_csv("add_storage_info.csv", header=False)
@@ -144,11 +153,10 @@ def main(config):
     gen_type.columns = ["gen_tech", "energy_source"]
     gen_type["map_name"] = "default"
     gen_type["gen_type"] = "Storage"
-    pd.concat([
-        pd.read_csv("graph_tech_types.csv", index_col=False), gen_type
-    ]).to_csv("graph_tech_types.csv", index=False)
+    pd.concat([pd.read_csv("graph_tech_types.csv", index_col=False), gen_type]).to_csv(
+        "graph_tech_types.csv", index=False
+    )
 
 
 if __name__ == "__main__":
     main({})
->>>>>>> 4c70d285 (Refactor add_storage package into post process step)
diff --git a/switch_model/wecc/get_inputs/post_process_steps/aggregate_candidate_projects.py b/switch_model/wecc/get_inputs/post_process_steps/aggregate_candidate_projects.py
index 6f92517b1..3a3ed14ad 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/aggregate_candidate_projects.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/aggregate_candidate_projects.py
@@ -21,14 +21,11 @@
 
 
 @register_post_process(
-    name="aggregate_projects_by_zone",
     msg="Aggregating candidate projects by load zone for specified technologies",
-    only_with_config=True,
-    priority=4,
 )
-def post_process(config):
-    agg_techs = config["agg_techs"]
-    cf_method = config["cf_method"]
+def post_process(config, func_config):
+    agg_techs = func_config["agg_techs"]
+    cf_method = func_config["cf_method"]
     assert type(agg_techs) == list
     # Don't allow hydro to be aggregated since we haven't implemented how to handle
     # hydro_timeseries.csv
diff --git a/switch_model/wecc/get_inputs/post_process_steps/create_graph_files.py b/switch_model/wecc/get_inputs/post_process_steps/create_graph_files.py
index ed5c347cc..fa3c065af 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/create_graph_files.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/create_graph_files.py
@@ -3,8 +3,8 @@
 from switch_model.wecc.get_inputs.register_post_process import register_post_process
 
 
-@register_post_process(name="create_graph_files", msg="Creating graph files")
-def create_graph_files(_):
+@register_post_process(msg="Creating graph files")
+def post_process(config, *args, **kwargs):
     timepoints = pd.read_csv("timepoints.csv", index_col=False)
     timeseries = pd.read_csv("timeseries.csv", index_col=False)
     timepoints = timepoints.merge(
diff --git a/switch_model/wecc/get_inputs/post_process_steps/fix_prebuild_conflict.py b/switch_model/wecc/get_inputs/post_process_steps/fix_prebuild_conflict.py
index 5dcb9736a..70e8ca855 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/fix_prebuild_conflict.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/fix_prebuild_conflict.py
@@ -3,10 +3,8 @@
 from switch_model.wecc.get_inputs.register_post_process import register_post_process
 
 
-@register_post_process(
-    name="fix_prebuild_conflict_bug", msg="Shifting 2020 pre-build years to 2019"
-)
-def fix_prebuild_conflict_bug(_):
+@register_post_process(msg="Shifting 2020 pre-build years to 2019")
+def post_process(config, *args, **kwargs):
     """
     This post-processing step is necessary to pass the no_predetermined_bld_yr_vs_period_conflict BuildCheck.
     Basically we are moving all the 2020 predetermined build years to 2019 to avoid a conflict with the 2020 period.
diff --git a/switch_model/wecc/get_inputs/post_process_steps/only_california.py b/switch_model/wecc/get_inputs/post_process_steps/only_california.py
index 8924bf9d4..c73ce2ee7 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/only_california.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/only_california.py
@@ -7,7 +7,6 @@
 @register_post_process(
     name="only_california",
     msg="Dropping all the zones outside of California",
-    only_with_config=True,
     priority=3,
 )
 def main(_):
diff --git a/switch_model/wecc/get_inputs/post_process_steps/replace_plants_in_zone_all.py b/switch_model/wecc/get_inputs/post_process_steps/replace_plants_in_zone_all.py
index 6963ed0b4..57e7522b5 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/replace_plants_in_zone_all.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/replace_plants_in_zone_all.py
@@ -3,11 +3,8 @@
 from switch_model.wecc.get_inputs.register_post_process import register_post_process
 
 
-@register_post_process(
-    name="replace_all_zones_plants",
-    msg="Replacing _ALL_ZONES plants with a plant in each zone",
-)
-def replace_plants_in_zone_all(_):
+@register_post_process(msg="Replacing _ALL_ZONES plants with a plant in each zone")
+def post_process(config, *args, **kwargs):
     """
     This post-process step replaces all the generation projects that have a load called
     _ALL_ZONES with a generation project for each load zone.
diff --git a/switch_model/wecc/get_inputs/post_process_steps/reserve_technologies.py b/switch_model/wecc/get_inputs/post_process_steps/reserve_technologies.py
index 163988f6e..e3d7cd09b 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/reserve_technologies.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/reserve_technologies.py
@@ -10,13 +10,10 @@
 
 
 @register_post_process(
-    name="no_fosill_reserve",
-    msg="Aggregating candidate projects by load zone for specified technologies",
-    only_with_config=True,
-    priority=4
+    msg="Removing fossil fuels from reserves.",
 )
-def post_process(config):
-    """ This function sets to zero the column that allows each candidate technology to
+def post_process(config, func_config):
+    """This function sets to zero the column that allows each candidate technology to
     proividee"""
 
     fname = "generation_projects_info.csv"
diff --git a/switch_model/wecc/get_inputs/register_post_process.py b/switch_model/wecc/get_inputs/register_post_process.py
index c258116a4..2a75c92d0 100644
--- a/switch_model/wecc/get_inputs/register_post_process.py
+++ b/switch_model/wecc/get_inputs/register_post_process.py
@@ -8,13 +8,15 @@
 
 These 2 functions are kept in a separate file to avoid cyclical dependencies.
 """
+
+from functools import wraps
 import functools
 
 _registered_steps = {}
 
 
 def register_post_process(
-    name, msg=None, enabled=True, only_with_config=False, priority=2
+    msg=None,
 ):
     """
     Decorator that should be used to register a post-processing step.
@@ -27,25 +29,14 @@ def register_post_process(
     """
 
     def decorator(func):
-        @functools.wraps(func)
-        def wrapper(config=None):
-            if only_with_config and config is None:
-                return
-
+        @wraps(func)
+        def wrapper(*args, **kwargs):
             message = msg
             if message is None:
                 message = f"Running {func.__name__}"
-
             print(f"\t{message}...")
-            func(config)
-
-        wrapper.priority = priority
-
-        if name in _registered_steps:
-            raise Exception(f"Post-process step {name} already exists.")
+            func(*args, **kwargs)
 
-        if enabled:
-            _registered_steps[name] = wrapper
         return wrapper
 
     return decorator

From 4ae6fdd553d14edbafef0bece3effcc079f7e3d3 Mon Sep 17 00:00:00 2001
From: pesap <pesapsanchez@gmail.com>
Date: Thu, 12 Aug 2021 14:29:22 -0700
Subject: [PATCH 2/6] Change --post-process to post-process-only

---
 switch_model/wecc/get_inputs/cli.py | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/switch_model/wecc/get_inputs/cli.py b/switch_model/wecc/get_inputs/cli.py
index 017aa49bc..1865f998e 100644
--- a/switch_model/wecc/get_inputs/cli.py
+++ b/switch_model/wecc/get_inputs/cli.py
@@ -31,7 +31,7 @@ def main():
         "want to wait for the command.",
     )
     parser.add_argument(
-        "--post-process",
+        "--post-process-only",
         default=False,
         action="store_true",
         help="Run only post process steps.",
@@ -51,42 +51,36 @@ def main():
     full_config = load_config()
     switch_to_input_dir(full_config, overwrite=args.overwrite)
 
-    if args.post_process is None:
+    if not args.post_process_only is None:
         query_db(full_config, skip_cf=args.skip_cf)
 
-    print("Post-processing...")
+    print("\nRunning post processing...")
 
+    # Get location of post process scripts
     post_process_path = ".".join(__name__.split(".")[:-1]) + ".post_process_steps"
 
     def run_post_process(module):
+        """Run a function from a given module"""
+
+        # This uses python module syntax with a dot. Example: import foo.bar.test
         mod = importlib.import_module(f".{module}", post_process_path)
+
         post_process = getattr(mod, "post_process")
 
-        # Configuration specific for the post_process
+        # Get specific configuration for the post process if specified
         post_config = full_config.get(module, None)
 
         # Run post process
         post_process(full_config, post_config)
 
-    # Get additional post-process from the config file
+    # Run all post process specified, otherwise run single one
     if args.post_process_step is None:
         for module in full_config["post_process"]:
             run_post_process(module)
-        # run_post_process(full_config, step_name=args.post_process)
     else:
         run_post_process(getattr(args, "post_process_step"))
-        # breakpoint()
-        # post_process_path = '.'.join(__name__.split('.')[:-1]) + ".post_process_steps"
-        # mod = importlib.import_module(f".{module}", post_process_path)
-        # post_process = getattr(mod, "post_process")
-
-        # # Configuration specific for the post_process
-        # post_config = full_config.get(module, None)
-
-        # # Run post process
-        # post_process(full_config, post_config)
 
-    print(f"\nScript took {timer.step_time_as_str()} seconds to build input tables.")
+    print(f"\nScript took {timer.step_time_as_str()} seconds.")
 
 
 def switch_to_input_dir(config, overwrite):

From d9e53655c12c2195d471b867f3f06a71f039c560 Mon Sep 17 00:00:00 2001
From: pesap <pesapsanchez@gmail.com>
Date: Thu, 12 Aug 2021 14:31:44 -0700
Subject: [PATCH 3/6] Apply black format to code

---
 switch_model/wecc/get_inputs/get_inputs.py | 35 +++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/switch_model/wecc/get_inputs/get_inputs.py b/switch_model/wecc/get_inputs/get_inputs.py
index 17f3d008c..232aa860a 100755
--- a/switch_model/wecc/get_inputs/get_inputs.py
+++ b/switch_model/wecc/get_inputs/get_inputs.py
@@ -69,9 +69,10 @@ def write_csv(data: Iterable[List], fname, headers: List[str], log=True):
     "switch_model.transmission.transport.build",
     "switch_model.transmission.transport.dispatch",
     "switch_model.policies.carbon_policies",
-    "switch_model.policies.rps_unbundled",
     "switch_model.policies.min_per_tech",  # Always include since it provides useful outputs even when unused
     # "switch_model.reporting.basic_exports_wecc",
+    # Always include since by default it does nothing except output useful data
+    "switch_model.policies.wind_to_solar_ratio",
 ]
 
 
@@ -107,6 +108,7 @@ def query_db(full_config, skip_cf):
         "enable_planning_reserves",
         "generation_plant_technologies_scenario_id",
         "variable_o_m_cost_scenario_id",
+        "wind_to_solar_ratio",
     ]
 
     db_cursor.execute(
@@ -143,6 +145,7 @@ def query_db(full_config, skip_cf):
     enable_planning_reserves = s_details[18]
     generation_plant_technologies_scenario_id = s_details[19]
     variable_o_m_cost_scenario_id = s_details[20]
+    wind_to_solar_ratio = s_details[21]
 
     print(f"Scenario: {scenario_id}: {name}.")
 
@@ -749,6 +752,7 @@ def query_db(full_config, skip_cf):
             order by 1, 2;
             """,
         )
+        modules.append("switch_model.policies.rps_unbundled")
 
     ########################################################
     # BIO_SOLID SUPPLY CURVE
@@ -889,6 +893,7 @@ def query_db(full_config, skip_cf):
         )
 
     ca_policies(db_cursor, ca_policies_scenario_id, study_timeframe_id)
+    write_wind_to_solar_ratio(wind_to_solar_ratio)
     if enable_planning_reserves:
         planning_reserves(db_cursor, time_sample_id, hydro_simple_scenario_id)
     create_modules_txt()
@@ -905,6 +910,34 @@ def query_db(full_config, skip_cf):
     )
 
 
+def write_wind_to_solar_ratio(wind_to_solar_ratio):
+    # TODO ideally we'd have a table where we can specify the wind_to_solar_ratios per period.
+    #   At the moment only the wind_to_solar_ratio is specified and which doesn't allow different values per period
+    if wind_to_solar_ratio is None:
+        return
+
+    print("wind_to_solar_ratio.csv...")
+    df = pd.read_csv("periods.csv")[["INVESTMENT_PERIOD"]]
+    df["wind_to_solar_ratio"] = wind_to_solar_ratio
+
+    # wind_to_solar_ratio.csv requires a column called wind_to_solar_ratio_const_gt that is True (1) or False (0)
+    # This column specifies whether the constraint is a greater than constraint or a less than constraint.
+    # In our case we want it to be a greater than constraint if we're trying to force wind-to-solar ratio above its default
+    # and we want it to be a less than constraint if we're trying to force the ratio below its default.
+    # Here the default is the ratio if we didn't have the constraint.
+    cutoff_ratio = 0.28
+    warnings.warn(
+        "To determine the sign of the wind-to-solar ratio constraint we have "
+        f"assumed that without the constraint, the wind-to-solar ratio is {cutoff_ratio}. "
+        f"This value was accurate for Martin's LDES runs however it may not be accurate for you. "
+        f"You should update this value in get_inputs or manually specify whether you want a greater than "
+        f"or a less than constraint."
+    )
+    df["wind_to_solar_ratio_const_gt"] = 1 if wind_to_solar_ratio > cutoff_ratio else 0
+
+    df.to_csv("wind_to_solar_ratio.csv", index=False)
+
+
 def ca_policies(db_cursor, ca_policies_scenario_id, study_timeframe_id):
     if ca_policies_scenario_id is None:
         return

From 0fcedd7f525e32c4cc00f001942dedcc2f2a6540 Mon Sep 17 00:00:00 2001
From: pesap <pesapsanchez@gmail.com>
Date: Thu, 12 Aug 2021 14:37:23 -0700
Subject: [PATCH 4/6] If post-step is passed dont query the dabase

---
 switch_model/wecc/get_inputs/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/switch_model/wecc/get_inputs/cli.py b/switch_model/wecc/get_inputs/cli.py
index 1865f998e..638b19ac9 100644
--- a/switch_model/wecc/get_inputs/cli.py
+++ b/switch_model/wecc/get_inputs/cli.py
@@ -51,7 +51,7 @@ def main():
     full_config = load_config()
     switch_to_input_dir(full_config, overwrite=args.overwrite)
 
-    if not args.post_process_only is None:
+    if not args.post_process_only and args.post_process_step is None:
         query_db(full_config, skip_cf=args.skip_cf)
 
     print("\nRunning post processing...")

From 9ff8168af0685d72e15e6e3ca17f0ffdeba9c3ee Mon Sep 17 00:00:00 2001
From: pesap <pesapsanchez@gmail.com>
Date: Thu, 12 Aug 2021 16:12:35 -0700
Subject: [PATCH 5/6] Include energy_cost post_process

---
 .../post_process_steps/energy_cost.py         | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 switch_model/wecc/get_inputs/post_process_steps/energy_cost.py

diff --git a/switch_model/wecc/get_inputs/post_process_steps/energy_cost.py b/switch_model/wecc/get_inputs/post_process_steps/energy_cost.py
new file mode 100644
index 000000000..09b54b267
--- /dev/null
+++ b/switch_model/wecc/get_inputs/post_process_steps/energy_cost.py
@@ -0,0 +1,49 @@
+# Standard packages
+import os
+import shutil
+
+# Third-party packages
+import pandas as pd
+
+from switch_model.wecc.get_inputs.register_post_process import register_post_process
+
+
+@register_post_process(
+    msg="Change energy cost for storage candidate",
+)
+def post_process(config, func_config):
+
+    percentage = int(func_config["percentage"])/100
+    dtype = {"GENERATION_PROJECT": str}
+    df = pd.read_csv("generation_projects_info.csv", dtype=dtype)
+    costs = pd.read_csv("gen_build_costs.csv", dtype=dtype)
+    predetermined = pd.read_csv("gen_build_predetermined.csv",dtype=dtype)
+
+    gen_projects = df.merge(
+      costs,
+      on="GENERATION_PROJECT",
+    )
+
+    gen_projects = gen_projects.merge(
+      predetermined,
+      on=["GENERATION_PROJECT", "build_year"],
+      how="left"  # Makes a left join
+    )
+
+    # Get candiate technology only
+    candidate = gen_projects.query("build_year == 2050").query("gen_tech =='Battery_Storage'")
+
+    # Get canidate generation project id
+    candidate_ids = candidate["GENERATION_PROJECT"].values
+
+
+    gen_cost_mwh = costs.loc[costs["GENERATION_PROJECT"].isin(candidate_ids),
+                             "gen_storage_energy_overnight_cost"].astype(float)
+
+    # Set to zero column that allows technology to provide reserves
+    costs.loc[
+        costs["GENERATION_PROJECT"].isin(candidate_ids), "gen_storage_energy_overnight_cost"
+    ] = gen_cost_mwh * percentage
+
+    # Save file again
+    costs.to_csv("gen_build_costs.csv", index=False)

From 6933120454d461621da5eff0e9f05ae80daac7dd Mon Sep 17 00:00:00 2001
From: pesap <pesapsanchez@gmail.com>
Date: Mon, 27 Sep 2021 11:26:21 -0700
Subject: [PATCH 6/6] Propagate change to other post process files

---
 .../wecc/get_inputs/post_process_steps/add_storage.py        | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/switch_model/wecc/get_inputs/post_process_steps/add_storage.py b/switch_model/wecc/get_inputs/post_process_steps/add_storage.py
index 1369647a6..8c28b8320 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/add_storage.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/add_storage.py
@@ -5,7 +5,6 @@
 the csvs in the inputs folder.
 """
 import pandas as pd
-
 from switch_model.wecc.get_inputs.register_post_process import register_post_process
 
 
@@ -102,11 +101,9 @@ def drop_previous_candidate_storage():
 
 
 @register_post_process(
-    name="add_storage",
     msg="Adding storage from Google Sheets",
-    priority=1,  # Increased priority (default is 2) so that it always runs before replace_plants_in_zone_all.py
 )
-def main(config):
+def post_process(config):
     # Drop previous candidate storage from inputs
     drop_previous_candidate_storage()