Merge pull request #118 from staadecker/postprocess-fix

Cleanup and minor fixes to get_inputs post process
switch-model · Jan 28, 2023 · 6283a45 · 6283a45
2 parents 4d98cd8 + 10b7cf5
commit 6283a45
Show file tree

Hide file tree

Showing 11 changed files with 74 additions and 93 deletions.
diff --git a/switch_model/tools/templates/config.yaml b/switch_model/tools/templates/config.yaml
@@ -7,6 +7,7 @@
 # ------------------
 # General config
 # ------------------
+version: 3.0
 inputs_dir: inputs
 
 # --------------------------------
@@ -17,7 +18,7 @@ get_inputs:
   schema: switch
 
   # Scenario configuration
-  scenario_id: <FILL_IN>
+  scenario_id: 176
   #  the following parameters are optional and will override the defaults for that scenario
   #  this should only be used for preliminary exploration of scenarios or for testing
   #  if a scenario is part of research, it should be included in the database as a row in the scenarios table.
@@ -42,20 +43,28 @@ get_inputs:
   #  enable_planning_reserves:
   #  generation_plant_technologies_scenario_id:
   #  variable_o_m_cost_scenario_id:
-# add_storage was used by Martin when studying LDES
-# you likely don't need to use these parameters
-# they won't impact your runs
-# add_storage:
-  # costs_scenario: 0
-  # plants_scenario: 0
-  # constant_scenario: 0
-  # minimums_scenario: 0
-# When the following line is uncommented (regardless of its value) then only California load zones are kept
-# only_california: 0
-# When the following lines are uncommented all the Central_PV and Wind projects within the same load zone gets
-# aggregated into a single project. This helps reduce the model complexity.
-# cf_quantile is the percentile for the capacity factor to use. 1 will use the largest capacity factor
-# of all the available candidate plants, 0.5 will use the median plant and 0 will use the worst plant.
-# aggregate_projects_by_zone:
-#  agg_techs: ["Central_PV"]
-#  cf_method: "file" # Other options are "weighted_mean" and "95_quantile"
+  #  wind_to_solar_ratio:
+post_process_steps: # The following post process steps will be run in order
+#  - add_storage # Used by Martin when studying LDES, likely shouldn't uncomment this line
+#  - aggregate_candidate_projects
+  - replace_plants_in_zone_all
+  - create_graph_files
+#  - energy_cost
+  - fix_prebuild_conflict
+#  - only_california
+#  - reserve_technologies
+post_process_config:
+  # add_storage was used by Martin when studying LDES you likely don't need to uncomment these parameters
+  # add_storage:
+    # costs_scenario: 0
+    # plants_scenario: 0
+    # constant_scenario: 0
+    # minimums_scenario: 0
+
+  # When the following lines are uncommented all the Central_PV and Wind projects within the same load zone gets
+  # aggregated into a single project. This helps reduce the model complexity.
+  # cf_quantile is the percentile for the capacity factor to use. 1 will use the largest capacity factor
+  # of all the available candidate plants, 0.5 will use the median plant and 0 will use the worst plant.
+#   aggregate_candidate_projects:
+#    agg_techs: ["Central_PV"]
+#    cf_method: "file" # Other options are "weighted_mean" and "95_quantile"
diff --git a/switch_model/wecc/get_inputs/cli.py b/switch_model/wecc/get_inputs/cli.py
@@ -8,9 +8,6 @@
 from switch_model.wecc.get_inputs.get_inputs import query_db
 from switch_model.wecc.utilities import load_config
 
-# from switch_model.wecc.get_inputs.post_process_steps import *
-# from switch_model.wecc.get_inputs.register_post_process import run_post_process, _registered_steps
-
 
 def main():
     timer = StepTimer()
@@ -68,14 +65,16 @@ def run_post_process(module):
         post_process = getattr(mod, "post_process")
 
         # Get specific configuration for the post process if specified
-        post_config = full_config.get(module, None)
+        post_config = None
+        if "post_process_config" in full_config and full_config["post_process_config"] is not None:
+            post_config = full_config["post_process_config"].get(module, None)
 
         # Run post process
-        post_process(full_config, post_config)
+        post_process(post_config)
 
     # Run all post process specified, otherwise run single one
     if args.post_process_step is None:
-        for module in full_config["post_process"]:
+        for module in full_config["post_process_steps"]:
             run_post_process(module)
     else:
         run_post_process(getattr(args, "post_process_step"))

diff --git a/switch_model/wecc/get_inputs/post_process_steps/add_storage.py b/switch_model/wecc/get_inputs/post_process_steps/add_storage.py
@@ -5,7 +5,7 @@
 the csvs in the inputs folder.
 """
 import pandas as pd
-from switch_model.wecc.get_inputs.register_post_process import register_post_process
+from switch_model.wecc.get_inputs.register_post_process import post_process_step
 
 
 def fetch_df(tab_name, key, config):
@@ -100,9 +100,7 @@ def drop_previous_candidate_storage():
     costs.to_csv("gen_build_costs.csv", index=False)
 
 
-@register_post_process(
-    msg="Adding storage from Google Sheets",
-)
+@post_process_step(msg="Adding storage from Google Sheets")
 def post_process(config):
     # Drop previous candidate storage from inputs
     drop_previous_candidate_storage()

diff --git a/switch_model/wecc/get_inputs/post_process_steps/aggregate_candidate_projects.py b/switch_model/wecc/get_inputs/post_process_steps/aggregate_candidate_projects.py
@@ -17,13 +17,13 @@
 import numpy as np
 import pandas as pd
 
-from switch_model.wecc.get_inputs.register_post_process import register_post_process
+from switch_model.wecc.get_inputs.register_post_process import post_process_step
 
 
-@register_post_process(
-    msg="Aggregating candidate projects by load zone for specified technologies",
+@post_process_step(
+    msg="Aggregating candidate projects by load zone for specified technologies"
 )
-def post_process(config, func_config):
+def post_process(func_config):
     agg_techs = func_config["agg_techs"]
     cf_method = func_config["cf_method"]
     assert type(agg_techs) == list
@@ -51,7 +51,14 @@ def post_process(config, func_config):
     should_agg = df["gen_tech"].isin(agg_techs) & (~df[key].isin(predetermined))
     if cf_method == "file":
         # Filter out projects where we don't have a capacity factor
-        zonal_cf = pd.read_csv("zonal_capacity_factors.csv", index_col=False)
+        try:
+            zonal_cf = pd.read_csv("zonal_capacity_factors.csv", index_col=False)
+        except FileNotFoundError:
+            raise Exception(
+                "Post process step 'aggregate_candidate_projects' with method 'file'"
+                " requires an external zonal_capacity_factors.csv to exist. This file can be generated"
+                " using the scripts in zonal_capacity_factors.csv."
+            )
         valid_proj = df.merge(
             zonal_cf[["gen_load_zone", "gen_tech"]].drop_duplicates(),
             on=["gen_load_zone", "gen_tech"],

diff --git a/switch_model/wecc/get_inputs/post_process_steps/create_graph_files.py b/switch_model/wecc/get_inputs/post_process_steps/create_graph_files.py
@@ -1,10 +1,10 @@
 import pandas as pd
 
-from switch_model.wecc.get_inputs.register_post_process import register_post_process
+from switch_model.wecc.get_inputs.register_post_process import post_process_step
 
 
-@register_post_process(msg="Creating graph files")
-def post_process(config, *args, **kwargs):
+@post_process_step(msg="Creating graph files")
+def post_process(_):
     timepoints = pd.read_csv("timepoints.csv", index_col=False)
     timeseries = pd.read_csv("timeseries.csv", index_col=False)
     timepoints = timepoints.merge(

diff --git a/switch_model/wecc/get_inputs/post_process_steps/energy_cost.py b/switch_model/wecc/get_inputs/post_process_steps/energy_cost.py
@@ -5,13 +5,13 @@
 # Third-party packages
 import pandas as pd
 
-from switch_model.wecc.get_inputs.register_post_process import register_post_process
+from switch_model.wecc.get_inputs.register_post_process import post_process_step
 
 
-@register_post_process(
+@post_process_step(
     msg="Change energy cost for storage candidate",
 )
-def post_process(config, func_config):
+def post_process(func_config):
 
     percentage = int(func_config["percentage"]) / 100
     dtype = {"GENERATION_PROJECT": str}

diff --git a/switch_model/wecc/get_inputs/post_process_steps/fix_prebuild_conflict.py b/switch_model/wecc/get_inputs/post_process_steps/fix_prebuild_conflict.py
@@ -1,10 +1,10 @@
 import pandas as pd
 
-from switch_model.wecc.get_inputs.register_post_process import register_post_process
+from switch_model.wecc.get_inputs.register_post_process import post_process_step
 
 
-@register_post_process(msg="Shifting 2020 pre-build years to 2019")
-def post_process(config, *args, **kwargs):
+@post_process_step(msg="Shifting 2020 pre-build years to 2019")
+def post_process(_):
     """
     This post-processing step is necessary to pass the no_predetermined_bld_yr_vs_period_conflict BuildCheck.
     Basically we are moving all the 2020 predetermined build years to 2019 to avoid a conflict with the 2020 period.
@@ -15,9 +15,13 @@ def post_process(config, *args, **kwargs):
         return
 
     # Read two files that need modification
-    gen_build_costs = pd.read_csv("gen_build_costs.csv", index_col=False)
+    gen_build_costs = pd.read_csv(
+        "gen_build_costs.csv", index_col=False, dtype={"GENERATION_PROJECT": object}
+    )
     gen_build_predetermined = pd.read_csv(
-        "gen_build_predetermined.csv", index_col=False
+        "gen_build_predetermined.csv",
+        index_col=False,
+        dtype={"GENERATION_PROJECT": object},
     )
     # Save their size
     rows_prior = gen_build_costs.size, gen_build_predetermined.size

diff --git a/switch_model/wecc/get_inputs/post_process_steps/only_california.py b/switch_model/wecc/get_inputs/post_process_steps/only_california.py
@@ -1,14 +1,10 @@
 import pandas as pd
 
-from switch_model.wecc.get_inputs.register_post_process import register_post_process
+from switch_model.wecc.get_inputs.register_post_process import post_process_step
 from switch_model.tools.drop import main as drop
 
 
-@register_post_process(
-    name="only_california",
-    msg="Dropping all the zones outside of California",
-    priority=3,
-)
+@post_process_step(msg="Dropping all the zones outside of California")
 def main(_):
     df = pd.read_csv("load_zones.csv", index_col=False)
     df = df[df["LOAD_ZONE"].str.startswith("CA_")]

diff --git a/switch_model/wecc/get_inputs/post_process_steps/replace_plants_in_zone_all.py b/switch_model/wecc/get_inputs/post_process_steps/replace_plants_in_zone_all.py
@@ -1,10 +1,10 @@
 import pandas as pd
 
-from switch_model.wecc.get_inputs.register_post_process import register_post_process
+from switch_model.wecc.get_inputs.register_post_process import post_process_step
 
 
-@register_post_process(msg="Replacing _ALL_ZONES plants with a plant in each zone")
-def post_process(config, *args, **kwargs):
+@post_process_step(msg="Replacing _ALL_ZONES plants with a plant in each zone")
+def post_process(_):
     """
     This post-process step replaces all the generation projects that have a load called
     _ALL_ZONES with a generation project for each load zone.

diff --git a/switch_model/wecc/get_inputs/post_process_steps/reserve_technologies.py b/switch_model/wecc/get_inputs/post_process_steps/reserve_technologies.py
@@ -6,18 +6,15 @@
 # Third-party packages
 import pandas as pd
 
-from switch_model.wecc.get_inputs.register_post_process import register_post_process
+from switch_model.wecc.get_inputs.register_post_process import post_process_step
 
 
-@register_post_process(
-    name="no_fosill_reserve",
-    msg="Aggregating candidate projects by load zone for specified technologies",
-    only_with_config=True,
-    priority=4,
+@post_process_step(
+    msg="Removing fossil fuels from reserves.",
 )
-def post_process(config):
+def post_process(_):
     """This function sets to zero the column that allows each candidate technology to
-    proividee"""
+    provide"""
 
     fname = "generation_projects_info.csv"
     df = pd.read_csv(fname)

diff --git a/switch_model/wecc/get_inputs/register_post_process.py b/switch_model/wecc/get_inputs/register_post_process.py
@@ -1,31 +1,18 @@
 """
-This file provides two functions
-
-1. register_post_process(msg, enabled=True) which is a function decorator that allows registering a function
-as a post-process step.
-
-2. run_post_process() which runs the registered post process steps.
-
-These 2 functions are kept in a separate file to avoid cyclical dependencies.
+This file provides the decorator post_process_step(msg)
+which is a function decorator that ensures the post processing step is printed.
 """
 
 from functools import wraps
-import functools
 
-_registered_steps = {}
 
-
-def register_post_process(
+def post_process_step(
     msg=None,
 ):
     """
     Decorator that should be used to register a post-processing step.
 
     @param msg The message to display while running this step.
-    @param enabled Whether we should be using this step.
-    @param name Name of the post processing step and of the config section
-    @param only_with_config if True the step will only run if 'name' exists in the config file
-    @param priority 0 is highest priority (runs first) and larger numbers are lower priority.
     """
 
     def decorator(func):
@@ -40,19 +27,3 @@ def wrapper(*args, **kwargs):
         return wrapper
 
     return decorator
-
-
-def run_post_process(config, step_name=None):
-    """
-    Run the post processing steps.
-
-    @param config The values from config.yaml (already parsed)
-    @param step_name if step_name is None we run all the steps. If it's specified we only run that step.
-    """
-    if step_name is None:
-        for name, func in sorted(
-            _registered_steps.items(), key=lambda s: s[1].priority
-        ):
-            func(config.get(name, None))
-    else:
-        _registered_steps[step_name](config.get(step_name, None))