From 3f3eece0ee7e09aa1098dc478ab05aece626e282 Mon Sep 17 00:00:00 2001 From: pesap Date: Thu, 12 Aug 2021 14:03:46 -0700 Subject: [PATCH 1/6] Refactoring and simplification of postprocess --- .../2021-07-29_create_low_hydro_scenario.py | 80 ++++++++++++------- .../2021-07-30_create_no_hydro_scenario.py | 28 +++++-- .../2021-08-02_create_half_hydro_scenario.py | 22 +++-- switch_model/wecc/get_inputs/cli.py | 47 ++++++++++- .../post_process_steps/add_storage.py | 78 ++++++++++-------- .../aggregate_candidate_projects.py | 9 +-- .../post_process_steps/create_graph_files.py | 4 +- .../fix_prebuild_conflict.py | 6 +- .../post_process_steps/only_california.py | 1 - .../replace_plants_in_zone_all.py | 7 +- .../reserve_technologies.py | 9 +-- .../wecc/get_inputs/register_post_process.py | 21 ++--- 12 files changed, 195 insertions(+), 117 deletions(-) diff --git a/database/2021-07-29_create_low_hydro_scenario.py b/database/2021-07-29_create_low_hydro_scenario.py index e64fc894c..9a116840d 100644 --- a/database/2021-07-29_create_low_hydro_scenario.py +++ b/database/2021-07-29_create_low_hydro_scenario.py @@ -22,7 +22,9 @@ new_start_year = 2020 new_end_year = 2050 new_scenario_id = 24 -new_scenario_name = "Lowest year (2015) repeated. Using EIA and AMPL Canada and Mex data." +new_scenario_name = ( + "Lowest year (2015) repeated. Using EIA and AMPL Canada and Mex data." +) new_scenario_description = "Lowest year (2015) repeated from 2020 to 2050, based on data from id 21 (EIA + AMPL Canada & Mex)." @@ -35,44 +37,66 @@ def main(): f""" SELECT DISTINCT generation_plant_id FROM hydro_historical_monthly_capacity_factors WHERE hydro_simple_scenario_id={all_plants_scenario}; - """) - hydro_plants = pd.DataFrame(db_cursor.fetchall(), columns=["generation_plant_id"])["generation_plant_id"] + """ + ) + hydro_plants = pd.DataFrame(db_cursor.fetchall(), columns=["generation_plant_id"])[ + "generation_plant_id" + ] # 2. Get all the hydro flow data for the worst year db_cursor.execute( f""" SELECT generation_plant_id, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors WHERE hydro_simple_scenario_id={raw_data_scenario} and year={worst_year}; - """) - worst_year_data = pd.DataFrame(db_cursor.fetchall(), - columns=["generation_plant_id", "month", "hydro_min_flow_mw", "hydro_avg_flow_mw"]) + """ + ) + worst_year_data = pd.DataFrame( + db_cursor.fetchall(), + columns=[ + "generation_plant_id", + "month", + "hydro_min_flow_mw", + "hydro_avg_flow_mw", + ], + ) # 3. Identify plants where data is missing - missing_hydro_plants = hydro_plants[~hydro_plants.isin(worst_year_data["generation_plant_id"])].values + missing_hydro_plants = hydro_plants[ + ~hydro_plants.isin(worst_year_data["generation_plant_id"]) + ].values # 4. For each missing plant get the data for all the years db_cursor.execute( f""" SELECT generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors WHERE hydro_simple_scenario_id={raw_data_scenario} and generation_plant_id in ({",".join(missing_hydro_plants.astype(str))}); - """) - missing_plants_data = pd.DataFrame(db_cursor.fetchall(), - columns=["generation_plant_id", "year", "month", "hydro_min_flow_mw", - "hydro_avg_flow_mw"]) + """ + ) + missing_plants_data = pd.DataFrame( + db_cursor.fetchall(), + columns=[ + "generation_plant_id", + "year", + "month", + "hydro_min_flow_mw", + "hydro_avg_flow_mw", + ], + ) # 5. Pick the year with the least flow # Aggregate by year - missing_data_by_year = missing_plants_data.groupby(["generation_plant_id", "year"], as_index=False)[ - "hydro_avg_flow_mw"].mean() + missing_data_by_year = missing_plants_data.groupby( + ["generation_plant_id", "year"], as_index=False + )["hydro_avg_flow_mw"].mean() # Select years where the flow is at its lowest - year_to_use = \ - missing_data_by_year.loc[missing_data_by_year.groupby("generation_plant_id")["hydro_avg_flow_mw"].idxmin()][ - ["generation_plant_id", "year"]] + year_to_use = missing_data_by_year.loc[ + missing_data_by_year.groupby("generation_plant_id")[ + "hydro_avg_flow_mw" + ].idxmin() + ][["generation_plant_id", "year"]] # Essentially filter missing_plants_data to only include keys from the right table, aka plants and years that are lowest missing_plants_data = missing_plants_data.merge( - year_to_use, - on=["generation_plant_id", "year"], - how="right" + year_to_use, on=["generation_plant_id", "year"], how="right" ).drop("year", axis=1) # 6. Add the missing data to our worst year data and verify we have data for all the plants @@ -81,14 +105,13 @@ def main(): # 7. Cross join the series with all the years from 2020 to 2050 years = pd.Series(range(new_start_year, new_end_year + 1), name="year") - worst_year_data = worst_year_data.merge( - years, - how="cross" - ) + worst_year_data = worst_year_data.merge(years, how="cross") worst_year_data["hydro_simple_scenario_id"] = new_scenario_id # 8. Complete some data checks - assert len(worst_year_data) == 12 * (new_end_year - new_start_year + 1) * len(hydro_plants) + assert len(worst_year_data) == 12 * (new_end_year - new_start_year + 1) * len( + hydro_plants + ) # 9. Add data to database print(f"hydro_simple_scenario: {new_scenario_id}") @@ -99,7 +122,9 @@ def main(): print(f"To year: {new_end_year}") print(f"Example data:\n{worst_year_data.head()}") - if not query_yes_no("\nAre you sure you want to add this data to the database?", default="no"): + if not query_yes_no( + "\nAre you sure you want to add this data to the database?", default="no" + ): raise SystemExit db_cursor.execute( @@ -110,9 +135,10 @@ def main(): n = len(worst_year_data) start_time = time.time() for i, r in enumerate(worst_year_data.itertuples(index=False)): - if i !=0 and i % 1000 == 0: + if i != 0 and i % 1000 == 0: print( - f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}") + f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}" + ) db_cursor.execute( f"INSERT INTO hydro_historical_monthly_capacity_factors(hydro_simple_scenario_id, generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw) " f"VALUES ({r.hydro_simple_scenario_id},{r.generation_plant_id},{r.year},{r.month},{r.hydro_min_flow_mw},{r.hydro_avg_flow_mw})" diff --git a/database/2021-07-30_create_no_hydro_scenario.py b/database/2021-07-30_create_no_hydro_scenario.py index f4f658363..ff9c1f576 100644 --- a/database/2021-07-30_create_no_hydro_scenario.py +++ b/database/2021-07-30_create_no_hydro_scenario.py @@ -17,8 +17,10 @@ new_scenario_id = 25 new_scenario_name = "No Hydro" -new_scenario_description = "All average flows are zero effectively removing all hydro generation from the model." \ - " Represents as an extreme edge case of no hydro generation." +new_scenario_description = ( + "All average flows are zero effectively removing all hydro generation from the model." + " Represents as an extreme edge case of no hydro generation." +) def main(): @@ -30,9 +32,18 @@ def main(): f""" SELECT DISTINCT generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors WHERE hydro_simple_scenario_id={all_plants_scenario}; - """) - df = pd.DataFrame(db_cursor.fetchall(), - columns=["generation_plant_id", "year", "month", "hydro_min_flow_mw", "hydro_avg_flow_mw"]) + """ + ) + df = pd.DataFrame( + db_cursor.fetchall(), + columns=[ + "generation_plant_id", + "year", + "month", + "hydro_min_flow_mw", + "hydro_avg_flow_mw", + ], + ) # 2. Set all the flows to zero and set the scenario id df["hydro_min_flow_mw"] = 0 @@ -46,7 +57,9 @@ def main(): print(f"Num hydro plants: {df.generation_plant_id.nunique()}") print(f"Example data:\n{df.head()}") - if not query_yes_no("\nAre you sure you want to add this data to the database?", default="no"): + if not query_yes_no( + "\nAre you sure you want to add this data to the database?", default="no" + ): raise SystemExit db_cursor.execute( @@ -59,7 +72,8 @@ def main(): for i, r in enumerate(df.itertuples(index=False)): if i != 0 and i % 1000 == 0: print( - f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}") + f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}" + ) db_cursor.execute( f"INSERT INTO hydro_historical_monthly_capacity_factors(hydro_simple_scenario_id, generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw) " f"VALUES ({r.hydro_simple_scenario_id},{r.generation_plant_id},{r.year},{r.month},{r.hydro_min_flow_mw},{r.hydro_avg_flow_mw})" diff --git a/database/2021-08-02_create_half_hydro_scenario.py b/database/2021-08-02_create_half_hydro_scenario.py index 7ee93d4b0..6bbef5e2b 100644 --- a/database/2021-08-02_create_half_hydro_scenario.py +++ b/database/2021-08-02_create_half_hydro_scenario.py @@ -29,9 +29,18 @@ def main(): f""" SELECT DISTINCT generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors WHERE hydro_simple_scenario_id={all_plants_scenario}; - """) - df = pd.DataFrame(db_cursor.fetchall(), - columns=["generation_plant_id", "year", "month", "hydro_min_flow_mw", "hydro_avg_flow_mw"]) + """ + ) + df = pd.DataFrame( + db_cursor.fetchall(), + columns=[ + "generation_plant_id", + "year", + "month", + "hydro_min_flow_mw", + "hydro_avg_flow_mw", + ], + ) # 2. Set all the flows to zero and set the scenario id df["hydro_avg_flow_mw"] /= 2 @@ -45,7 +54,9 @@ def main(): print(f"Num hydro plants: {df.generation_plant_id.nunique()}") print(f"Example data:\n{df.head()}") - if not query_yes_no("\nAre you sure you want to add this data to the database?", default="no"): + if not query_yes_no( + "\nAre you sure you want to add this data to the database?", default="no" + ): raise SystemExit db_cursor.execute( @@ -58,7 +69,8 @@ def main(): for i, r in enumerate(df.itertuples(index=False)): if i != 0 and i % 1000 == 0: print( - f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}") + f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}" + ) db_cursor.execute( f"INSERT INTO hydro_historical_monthly_capacity_factors(hydro_simple_scenario_id, generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw) " f"VALUES ({r.hydro_simple_scenario_id},{r.generation_plant_id},{r.year},{r.month},{r.hydro_min_flow_mw},{r.hydro_avg_flow_mw})" diff --git a/switch_model/wecc/get_inputs/cli.py b/switch_model/wecc/get_inputs/cli.py index a5a344506..017aa49bc 100644 --- a/switch_model/wecc/get_inputs/cli.py +++ b/switch_model/wecc/get_inputs/cli.py @@ -1,13 +1,15 @@ """ Script to retrieve the input data from the switch-wecc database and apply post-processing steps. """ import argparse +import importlib import os from switch_model.utilities import query_yes_no, StepTimer from switch_model.wecc.get_inputs.get_inputs import query_db -from switch_model.wecc.get_inputs.register_post_process import run_post_process from switch_model.wecc.utilities import load_config -from switch_model.wecc.get_inputs.post_process_steps import * + +# from switch_model.wecc.get_inputs.post_process_steps import * +# from switch_model.wecc.get_inputs.register_post_process import run_post_process, _registered_steps def main(): @@ -29,7 +31,13 @@ def main(): "want to wait for the command.", ) parser.add_argument( - "--post-process", default=None, help="Run only this post process step." + "--post-process", + default=False, + action="store_true", + help="Run only post process steps.", + ) + parser.add_argument( + "--post-process-step", default=None, help="Run only this post process step." ) parser.add_argument( "--overwrite", @@ -45,8 +53,39 @@ def main(): if args.post_process is None: query_db(full_config, skip_cf=args.skip_cf) + print("Post-processing...") - run_post_process(full_config, step_name=args.post_process) + + post_process_path = ".".join(__name__.split(".")[:-1]) + ".post_process_steps" + + def run_post_process(module): + mod = importlib.import_module(f".{module}", post_process_path) + post_process = getattr(mod, "post_process") + + # Configuration specific for the post_process + post_config = full_config.get(module, None) + + # Run post process + post_process(full_config, post_config) + + # Get additional post-process from the config file + if args.post_process_step is None: + for module in full_config["post_process"]: + run_post_process(module) + # run_post_process(full_config, step_name=args.post_process) + else: + run_post_process(getattr(args, "post_process_step")) + # breakpoint() + # post_process_path = '.'.join(__name__.split('.')[:-1]) + ".post_process_steps" + # mod = importlib.import_module(f".{module}", post_process_path) + # post_process = getattr(mod, "post_process") + + # # Configuration specific for the post_process + # post_config = full_config.get(module, None) + + # # Run post process + # post_process(full_config, post_config) + print(f"\nScript took {timer.step_time_as_str()} seconds to build input tables.") diff --git a/switch_model/wecc/get_inputs/post_process_steps/add_storage.py b/switch_model/wecc/get_inputs/post_process_steps/add_storage.py index 5130c5237..1369647a6 100644 --- a/switch_model/wecc/get_inputs/post_process_steps/add_storage.py +++ b/switch_model/wecc/get_inputs/post_process_steps/add_storage.py @@ -17,16 +17,16 @@ def fetch_df(tab_name, key, config): "constants": 0, "plants": 889129113, "costs": 1401952285, - "minimums": 1049456965 + "minimums": 1049456965, } SHEET_ID = "1SJrj039T1T95NLTs964VQnsfZgo2QWCo29x2ireVYcU" gid = TAB_NAME_GID[tab_name] url = f"https://docs.google.com/spreadsheet/ccc?key={SHEET_ID}&output=csv&gid={gid}" - df: pd.DataFrame = pd.read_csv(url, index_col=False) \ - .replace("FALSE", False) \ - .replace("TRUE", True) + df: pd.DataFrame = ( + pd.read_csv(url, index_col=False).replace("FALSE", False).replace("TRUE", True) + ) if "description" in df.columns: df = df.drop("description", axis=1) @@ -43,17 +43,16 @@ def filer_by_scenario(df, scenario_column, config): if scenario_column in config: scenario = config[scenario_column] else: - scenario = input(f"Which scenario do you want for '{scenario_column}' (default 0) : ") + scenario = input( + f"Which scenario do you want for '{scenario_column}' (default 0) : " + ) scenario = int(scenario) if scenario != "" else 0 df = df[df[scenario_column] == scenario] return df.drop(scenario_column, axis=1) def cross_join(df1, df2): - return df1.assign(key=1).merge( - df2.assign(key=1), - on="key" - ).drop("key", axis=1) + return df1.assign(key=1).merge(df2.assign(key=1), on="key").drop("key", axis=1) def add_to_csv(filename, to_add, primary_key=None, append=True): @@ -83,8 +82,12 @@ def drop_previous_candidate_storage(): gen = pd.read_csv("generation_projects_info.csv", index_col=False) # Find generation projects that are both storage and not predetermined (i.e. candidate) - predetermined_gen = pd.read_csv("gen_build_predetermined.csv", index_col=False)["GENERATION_PROJECT"] - should_drop = (gen["gen_tech"] == STORAGE_TECH) & ~gen["GENERATION_PROJECT"].isin(predetermined_gen) + predetermined_gen = pd.read_csv("gen_build_predetermined.csv", index_col=False)[ + "GENERATION_PROJECT" + ] + should_drop = (gen["gen_tech"] == STORAGE_TECH) & ~gen["GENERATION_PROJECT"].isin( + predetermined_gen + ) # Find projects that we should drop (candidate storage) gen_to_drop = gen[should_drop]["GENERATION_PROJECT"] @@ -101,40 +104,46 @@ def drop_previous_candidate_storage(): @register_post_process( name="add_storage", msg="Adding storage from Google Sheets", - only_with_config=True, -<<<<<<< HEAD - priority=1, -) -def add_storage(config): - from switch_model.tools.add_storage import main - - main( - run_post_solve=False, # We will run post solve automatically right afterwards - scenario_config=config, - change_dir=False, - ) -======= - priority=1 # Increased priority (default is 2) so that it always runs before replace_plants_in_zone_all.py + priority=1, # Increased priority (default is 2) so that it always runs before replace_plants_in_zone_all.py ) def main(config): # Drop previous candidate storage from inputs drop_previous_candidate_storage() # Get the generation storage plants from Google Sheet - gen_projects = fetch_df("constants", "constant_scenario", config).set_index("param_name").transpose() - gen_projects = cross_join(gen_projects, fetch_df("plants", "plants_scenario", config)) + gen_projects = ( + fetch_df("constants", "constant_scenario", config) + .set_index("param_name") + .transpose() + ) + gen_projects = cross_join( + gen_projects, fetch_df("plants", "plants_scenario", config) + ) # Append the storage plants to the inputs - add_to_csv("generation_projects_info.csv", gen_projects, primary_key="GENERATION_PROJECT") + add_to_csv( + "generation_projects_info.csv", gen_projects, primary_key="GENERATION_PROJECT" + ) # Create min_per_tech.csv min_projects = fetch_df("minimums", "minimums_scenario", config) - add_to_csv("min_per_tech.csv", min_projects, primary_key=["gen_tech", "period"], append=False) + add_to_csv( + "min_per_tech.csv", + min_projects, + primary_key=["gen_tech", "period"], + append=False, + ) # Get the plant costs from GSheets and append to costs storage_costs = fetch_df("costs", "costs_scenario", config) - storage_costs = storage_costs[storage_costs["GENERATION_PROJECT"].isin(gen_projects["GENERATION_PROJECT"])] - add_to_csv("gen_build_costs.csv", storage_costs, primary_key=["GENERATION_PROJECT", "build_year"]) + storage_costs = storage_costs[ + storage_costs["GENERATION_PROJECT"].isin(gen_projects["GENERATION_PROJECT"]) + ] + add_to_csv( + "gen_build_costs.csv", + storage_costs, + primary_key=["GENERATION_PROJECT", "build_year"], + ) # Create add_storage_info.csv pd.DataFrame([config]).transpose().to_csv("add_storage_info.csv", header=False) @@ -144,11 +153,10 @@ def main(config): gen_type.columns = ["gen_tech", "energy_source"] gen_type["map_name"] = "default" gen_type["gen_type"] = "Storage" - pd.concat([ - pd.read_csv("graph_tech_types.csv", index_col=False), gen_type - ]).to_csv("graph_tech_types.csv", index=False) + pd.concat([pd.read_csv("graph_tech_types.csv", index_col=False), gen_type]).to_csv( + "graph_tech_types.csv", index=False + ) if __name__ == "__main__": main({}) ->>>>>>> 4c70d285 (Refactor add_storage package into post process step) diff --git a/switch_model/wecc/get_inputs/post_process_steps/aggregate_candidate_projects.py b/switch_model/wecc/get_inputs/post_process_steps/aggregate_candidate_projects.py index 6f92517b1..3a3ed14ad 100644 --- a/switch_model/wecc/get_inputs/post_process_steps/aggregate_candidate_projects.py +++ b/switch_model/wecc/get_inputs/post_process_steps/aggregate_candidate_projects.py @@ -21,14 +21,11 @@ @register_post_process( - name="aggregate_projects_by_zone", msg="Aggregating candidate projects by load zone for specified technologies", - only_with_config=True, - priority=4, ) -def post_process(config): - agg_techs = config["agg_techs"] - cf_method = config["cf_method"] +def post_process(config, func_config): + agg_techs = func_config["agg_techs"] + cf_method = func_config["cf_method"] assert type(agg_techs) == list # Don't allow hydro to be aggregated since we haven't implemented how to handle # hydro_timeseries.csv diff --git a/switch_model/wecc/get_inputs/post_process_steps/create_graph_files.py b/switch_model/wecc/get_inputs/post_process_steps/create_graph_files.py index ed5c347cc..fa3c065af 100644 --- a/switch_model/wecc/get_inputs/post_process_steps/create_graph_files.py +++ b/switch_model/wecc/get_inputs/post_process_steps/create_graph_files.py @@ -3,8 +3,8 @@ from switch_model.wecc.get_inputs.register_post_process import register_post_process -@register_post_process(name="create_graph_files", msg="Creating graph files") -def create_graph_files(_): +@register_post_process(msg="Creating graph files") +def post_process(config, *args, **kwargs): timepoints = pd.read_csv("timepoints.csv", index_col=False) timeseries = pd.read_csv("timeseries.csv", index_col=False) timepoints = timepoints.merge( diff --git a/switch_model/wecc/get_inputs/post_process_steps/fix_prebuild_conflict.py b/switch_model/wecc/get_inputs/post_process_steps/fix_prebuild_conflict.py index 5dcb9736a..70e8ca855 100644 --- a/switch_model/wecc/get_inputs/post_process_steps/fix_prebuild_conflict.py +++ b/switch_model/wecc/get_inputs/post_process_steps/fix_prebuild_conflict.py @@ -3,10 +3,8 @@ from switch_model.wecc.get_inputs.register_post_process import register_post_process -@register_post_process( - name="fix_prebuild_conflict_bug", msg="Shifting 2020 pre-build years to 2019" -) -def fix_prebuild_conflict_bug(_): +@register_post_process(msg="Shifting 2020 pre-build years to 2019") +def post_process(config, *args, **kwargs): """ This post-processing step is necessary to pass the no_predetermined_bld_yr_vs_period_conflict BuildCheck. Basically we are moving all the 2020 predetermined build years to 2019 to avoid a conflict with the 2020 period. diff --git a/switch_model/wecc/get_inputs/post_process_steps/only_california.py b/switch_model/wecc/get_inputs/post_process_steps/only_california.py index 8924bf9d4..c73ce2ee7 100644 --- a/switch_model/wecc/get_inputs/post_process_steps/only_california.py +++ b/switch_model/wecc/get_inputs/post_process_steps/only_california.py @@ -7,7 +7,6 @@ @register_post_process( name="only_california", msg="Dropping all the zones outside of California", - only_with_config=True, priority=3, ) def main(_): diff --git a/switch_model/wecc/get_inputs/post_process_steps/replace_plants_in_zone_all.py b/switch_model/wecc/get_inputs/post_process_steps/replace_plants_in_zone_all.py index 6963ed0b4..57e7522b5 100644 --- a/switch_model/wecc/get_inputs/post_process_steps/replace_plants_in_zone_all.py +++ b/switch_model/wecc/get_inputs/post_process_steps/replace_plants_in_zone_all.py @@ -3,11 +3,8 @@ from switch_model.wecc.get_inputs.register_post_process import register_post_process -@register_post_process( - name="replace_all_zones_plants", - msg="Replacing _ALL_ZONES plants with a plant in each zone", -) -def replace_plants_in_zone_all(_): +@register_post_process(msg="Replacing _ALL_ZONES plants with a plant in each zone") +def post_process(config, *args, **kwargs): """ This post-process step replaces all the generation projects that have a load called _ALL_ZONES with a generation project for each load zone. diff --git a/switch_model/wecc/get_inputs/post_process_steps/reserve_technologies.py b/switch_model/wecc/get_inputs/post_process_steps/reserve_technologies.py index 163988f6e..e3d7cd09b 100644 --- a/switch_model/wecc/get_inputs/post_process_steps/reserve_technologies.py +++ b/switch_model/wecc/get_inputs/post_process_steps/reserve_technologies.py @@ -10,13 +10,10 @@ @register_post_process( - name="no_fosill_reserve", - msg="Aggregating candidate projects by load zone for specified technologies", - only_with_config=True, - priority=4 + msg="Removing fossil fuels from reserves.", ) -def post_process(config): - """ This function sets to zero the column that allows each candidate technology to +def post_process(config, func_config): + """This function sets to zero the column that allows each candidate technology to proividee""" fname = "generation_projects_info.csv" diff --git a/switch_model/wecc/get_inputs/register_post_process.py b/switch_model/wecc/get_inputs/register_post_process.py index c258116a4..2a75c92d0 100644 --- a/switch_model/wecc/get_inputs/register_post_process.py +++ b/switch_model/wecc/get_inputs/register_post_process.py @@ -8,13 +8,15 @@ These 2 functions are kept in a separate file to avoid cyclical dependencies. """ + +from functools import wraps import functools _registered_steps = {} def register_post_process( - name, msg=None, enabled=True, only_with_config=False, priority=2 + msg=None, ): """ Decorator that should be used to register a post-processing step. @@ -27,25 +29,14 @@ def register_post_process( """ def decorator(func): - @functools.wraps(func) - def wrapper(config=None): - if only_with_config and config is None: - return - + @wraps(func) + def wrapper(*args, **kwargs): message = msg if message is None: message = f"Running {func.__name__}" - print(f"\t{message}...") - func(config) - - wrapper.priority = priority - - if name in _registered_steps: - raise Exception(f"Post-process step {name} already exists.") + func(*args, **kwargs) - if enabled: - _registered_steps[name] = wrapper return wrapper return decorator From 4ae6fdd553d14edbafef0bece3effcc079f7e3d3 Mon Sep 17 00:00:00 2001 From: pesap Date: Thu, 12 Aug 2021 14:29:22 -0700 Subject: [PATCH 2/6] Change --post-process to post-process-only --- switch_model/wecc/get_inputs/cli.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/switch_model/wecc/get_inputs/cli.py b/switch_model/wecc/get_inputs/cli.py index 017aa49bc..1865f998e 100644 --- a/switch_model/wecc/get_inputs/cli.py +++ b/switch_model/wecc/get_inputs/cli.py @@ -31,7 +31,7 @@ def main(): "want to wait for the command.", ) parser.add_argument( - "--post-process", + "--post-process-only", default=False, action="store_true", help="Run only post process steps.", @@ -51,42 +51,36 @@ def main(): full_config = load_config() switch_to_input_dir(full_config, overwrite=args.overwrite) - if args.post_process is None: + if not args.post_process_only is None: query_db(full_config, skip_cf=args.skip_cf) - print("Post-processing...") + print("\nRunning post processing...") + # Get location of post process scripts post_process_path = ".".join(__name__.split(".")[:-1]) + ".post_process_steps" def run_post_process(module): + """Run a function from a given module""" + + # This uses python module syntax with a dot. Example: import foo.bar.test mod = importlib.import_module(f".{module}", post_process_path) + post_process = getattr(mod, "post_process") - # Configuration specific for the post_process + # Get specific configuration for the post process if specified post_config = full_config.get(module, None) # Run post process post_process(full_config, post_config) - # Get additional post-process from the config file + # Run all post process specified, otherwise run single one if args.post_process_step is None: for module in full_config["post_process"]: run_post_process(module) - # run_post_process(full_config, step_name=args.post_process) else: run_post_process(getattr(args, "post_process_step")) - # breakpoint() - # post_process_path = '.'.join(__name__.split('.')[:-1]) + ".post_process_steps" - # mod = importlib.import_module(f".{module}", post_process_path) - # post_process = getattr(mod, "post_process") - - # # Configuration specific for the post_process - # post_config = full_config.get(module, None) - - # # Run post process - # post_process(full_config, post_config) - print(f"\nScript took {timer.step_time_as_str()} seconds to build input tables.") + print(f"\nScript took {timer.step_time_as_str()} seconds.") def switch_to_input_dir(config, overwrite): From d9e53655c12c2195d471b867f3f06a71f039c560 Mon Sep 17 00:00:00 2001 From: pesap Date: Thu, 12 Aug 2021 14:31:44 -0700 Subject: [PATCH 3/6] Apply black format to code --- switch_model/wecc/get_inputs/get_inputs.py | 35 +++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/switch_model/wecc/get_inputs/get_inputs.py b/switch_model/wecc/get_inputs/get_inputs.py index 17f3d008c..232aa860a 100755 --- a/switch_model/wecc/get_inputs/get_inputs.py +++ b/switch_model/wecc/get_inputs/get_inputs.py @@ -69,9 +69,10 @@ def write_csv(data: Iterable[List], fname, headers: List[str], log=True): "switch_model.transmission.transport.build", "switch_model.transmission.transport.dispatch", "switch_model.policies.carbon_policies", - "switch_model.policies.rps_unbundled", "switch_model.policies.min_per_tech", # Always include since it provides useful outputs even when unused # "switch_model.reporting.basic_exports_wecc", + # Always include since by default it does nothing except output useful data + "switch_model.policies.wind_to_solar_ratio", ] @@ -107,6 +108,7 @@ def query_db(full_config, skip_cf): "enable_planning_reserves", "generation_plant_technologies_scenario_id", "variable_o_m_cost_scenario_id", + "wind_to_solar_ratio", ] db_cursor.execute( @@ -143,6 +145,7 @@ def query_db(full_config, skip_cf): enable_planning_reserves = s_details[18] generation_plant_technologies_scenario_id = s_details[19] variable_o_m_cost_scenario_id = s_details[20] + wind_to_solar_ratio = s_details[21] print(f"Scenario: {scenario_id}: {name}.") @@ -749,6 +752,7 @@ def query_db(full_config, skip_cf): order by 1, 2; """, ) + modules.append("switch_model.policies.rps_unbundled") ######################################################## # BIO_SOLID SUPPLY CURVE @@ -889,6 +893,7 @@ def query_db(full_config, skip_cf): ) ca_policies(db_cursor, ca_policies_scenario_id, study_timeframe_id) + write_wind_to_solar_ratio(wind_to_solar_ratio) if enable_planning_reserves: planning_reserves(db_cursor, time_sample_id, hydro_simple_scenario_id) create_modules_txt() @@ -905,6 +910,34 @@ def query_db(full_config, skip_cf): ) +def write_wind_to_solar_ratio(wind_to_solar_ratio): + # TODO ideally we'd have a table where we can specify the wind_to_solar_ratios per period. + # At the moment only the wind_to_solar_ratio is specified and which doesn't allow different values per period + if wind_to_solar_ratio is None: + return + + print("wind_to_solar_ratio.csv...") + df = pd.read_csv("periods.csv")[["INVESTMENT_PERIOD"]] + df["wind_to_solar_ratio"] = wind_to_solar_ratio + + # wind_to_solar_ratio.csv requires a column called wind_to_solar_ratio_const_gt that is True (1) or False (0) + # This column specifies whether the constraint is a greater than constraint or a less than constraint. + # In our case we want it to be a greater than constraint if we're trying to force wind-to-solar ratio above its default + # and we want it to be a less than constraint if we're trying to force the ratio below its default. + # Here the default is the ratio if we didn't have the constraint. + cutoff_ratio = 0.28 + warnings.warn( + "To determine the sign of the wind-to-solar ratio constraint we have " + f"assumed that without the constraint, the wind-to-solar ratio is {cutoff_ratio}. " + f"This value was accurate for Martin's LDES runs however it may not be accurate for you. " + f"You should update this value in get_inputs or manually specify whether you want a greater than " + f"or a less than constraint." + ) + df["wind_to_solar_ratio_const_gt"] = 1 if wind_to_solar_ratio > cutoff_ratio else 0 + + df.to_csv("wind_to_solar_ratio.csv", index=False) + + def ca_policies(db_cursor, ca_policies_scenario_id, study_timeframe_id): if ca_policies_scenario_id is None: return From 0fcedd7f525e32c4cc00f001942dedcc2f2a6540 Mon Sep 17 00:00:00 2001 From: pesap Date: Thu, 12 Aug 2021 14:37:23 -0700 Subject: [PATCH 4/6] If post-step is passed dont query the dabase --- switch_model/wecc/get_inputs/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/switch_model/wecc/get_inputs/cli.py b/switch_model/wecc/get_inputs/cli.py index 1865f998e..638b19ac9 100644 --- a/switch_model/wecc/get_inputs/cli.py +++ b/switch_model/wecc/get_inputs/cli.py @@ -51,7 +51,7 @@ def main(): full_config = load_config() switch_to_input_dir(full_config, overwrite=args.overwrite) - if not args.post_process_only is None: + if not args.post_process_only and args.post_process_step is None: query_db(full_config, skip_cf=args.skip_cf) print("\nRunning post processing...") From 9ff8168af0685d72e15e6e3ca17f0ffdeba9c3ee Mon Sep 17 00:00:00 2001 From: pesap Date: Thu, 12 Aug 2021 16:12:35 -0700 Subject: [PATCH 5/6] Include energy_cost post_process --- .../post_process_steps/energy_cost.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 switch_model/wecc/get_inputs/post_process_steps/energy_cost.py diff --git a/switch_model/wecc/get_inputs/post_process_steps/energy_cost.py b/switch_model/wecc/get_inputs/post_process_steps/energy_cost.py new file mode 100644 index 000000000..09b54b267 --- /dev/null +++ b/switch_model/wecc/get_inputs/post_process_steps/energy_cost.py @@ -0,0 +1,49 @@ +# Standard packages +import os +import shutil + +# Third-party packages +import pandas as pd + +from switch_model.wecc.get_inputs.register_post_process import register_post_process + + +@register_post_process( + msg="Change energy cost for storage candidate", +) +def post_process(config, func_config): + + percentage = int(func_config["percentage"])/100 + dtype = {"GENERATION_PROJECT": str} + df = pd.read_csv("generation_projects_info.csv", dtype=dtype) + costs = pd.read_csv("gen_build_costs.csv", dtype=dtype) + predetermined = pd.read_csv("gen_build_predetermined.csv",dtype=dtype) + + gen_projects = df.merge( + costs, + on="GENERATION_PROJECT", + ) + + gen_projects = gen_projects.merge( + predetermined, + on=["GENERATION_PROJECT", "build_year"], + how="left" # Makes a left join + ) + + # Get candiate technology only + candidate = gen_projects.query("build_year == 2050").query("gen_tech =='Battery_Storage'") + + # Get canidate generation project id + candidate_ids = candidate["GENERATION_PROJECT"].values + + + gen_cost_mwh = costs.loc[costs["GENERATION_PROJECT"].isin(candidate_ids), + "gen_storage_energy_overnight_cost"].astype(float) + + # Set to zero column that allows technology to provide reserves + costs.loc[ + costs["GENERATION_PROJECT"].isin(candidate_ids), "gen_storage_energy_overnight_cost" + ] = gen_cost_mwh * percentage + + # Save file again + costs.to_csv("gen_build_costs.csv", index=False) From 6933120454d461621da5eff0e9f05ae80daac7dd Mon Sep 17 00:00:00 2001 From: pesap Date: Mon, 27 Sep 2021 11:26:21 -0700 Subject: [PATCH 6/6] Propagate change to other post process files --- .../wecc/get_inputs/post_process_steps/add_storage.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/switch_model/wecc/get_inputs/post_process_steps/add_storage.py b/switch_model/wecc/get_inputs/post_process_steps/add_storage.py index 1369647a6..8c28b8320 100644 --- a/switch_model/wecc/get_inputs/post_process_steps/add_storage.py +++ b/switch_model/wecc/get_inputs/post_process_steps/add_storage.py @@ -5,7 +5,6 @@ the csvs in the inputs folder. """ import pandas as pd - from switch_model.wecc.get_inputs.register_post_process import register_post_process @@ -102,11 +101,9 @@ def drop_previous_candidate_storage(): @register_post_process( - name="add_storage", msg="Adding storage from Google Sheets", - priority=1, # Increased priority (default is 2) so that it always runs before replace_plants_in_zone_all.py ) -def main(config): +def post_process(config): # Drop previous candidate storage from inputs drop_previous_candidate_storage()