From cf1c98a4d6d74c3ce75dc36ca2223ba9d16ac224 Mon Sep 17 00:00:00 2001 From: Gerrit De Moor Date: Mon, 5 Aug 2019 23:25:51 -0400 Subject: [PATCH 1/2] Add unittests for input validations - fuels - auxiliary - operations/init - project/init - must_run - dispatchable_no_commit - always_on (uses validations from auxiliary) - dispatchable_capacity_commit (uses validations from auxiliary) --- gridpath/auxiliary/auxiliary.py | 87 ++++++- gridpath/project/__init__.py | 179 +++++++++----- gridpath/project/fuels.py | 77 ++++-- gridpath/project/operations/__init__.py | 208 +++++++++------- .../operations/operational_types/always_on.py | 57 ++--- .../dispatchable_capacity_commit.py | 58 ++--- .../dispatchable_no_commit.py | 41 ++-- .../operations/operational_types/must_run.py | 39 ++- tests/auxiliary/test_auxiliary.py | 224 ++++++++++++++++++ tests/project/operations/test_init.py | 111 ++++++++- tests/project/test_fuels.py | 93 ++++++++ tests/project/test_init.py | 64 +++++ 12 files changed, 961 insertions(+), 277 deletions(-) diff --git a/gridpath/auxiliary/auxiliary.py b/gridpath/auxiliary/auxiliary.py index a7307697c..f5fa133d9 100644 --- a/gridpath/auxiliary/auxiliary.py +++ b/gridpath/auxiliary/auxiliary.py @@ -335,8 +335,6 @@ def check_dtypes(df, expected_dtypes): :return: List of error messages for each column with invalid datatypes. Error message specifies the column and the expected data type. List of columns with erroneous data types. - - TODO: add example """ result = [] @@ -371,3 +369,88 @@ def check_dtypes(df, expected_dtypes): # bad_columns = numeric_columns[np.invert(numeric_bool)] return result, columns + + +def check_column_sign_positive(df, columns): + """ + Checks whether the selected columns of a DataFrame are non-negative. + Helper function for input validation. + :param df: DataFrame for which to check signs. Must have a "project" + column, and columns param must be a subset of the columns in df + :param columns: list with columns that are expected to be non-negative + :return: List of error messages for each column with invalid signs. + Error message specifies the column. + """ + result = [] + for column in columns: + is_negative = (df[column] < 0) + if is_negative.any(): + bad_projects = df["project"][is_negative].values + print_bad_projects = ", ".join(bad_projects) + result.append( + "Project(s) '{}': Expected '{}' >= 0" + .format(print_bad_projects, column) + ) + + return result + + +def check_prj_columns(df, columns, required, category): + """ + Checks whether the required columns of a DataFrame are not None/NA or + whether the incompatible columns are None/NA. If required columns are + None/NA, or if incompatible columns are not None/NA, an error message + is returned. + Helper function for input validation. + :param df: DataFrame for which to check columns. Must have a "project" + column, and columns param must be a subset of the columns in df + :param columns: list of columns to check + :param required: Boolean, whether the listed columns are required or + incompatible + :param category: project category (operational_type, capacity_type, ...) + for which we're doing the input validation + :return: List of error messages for each column with invalid inputs. + Error message specifies the column. + """ + result = [] + for column in columns: + if required: + invalids = pd.isna(df[column]) + error_str = "should have inputs for" + else: + invalids = pd.notna(df[column]) + error_str = "should not have inputs for" + if invalids.any(): + bad_projects = df["project"][invalids].values + print_bad_projects = ", ".join(bad_projects) + result.append( + "Project(s) '{}'; {} {} '{}'" + .format(print_bad_projects, category, error_str, column) + ) + + return result + + +def check_constant_heat_rate(df, op_type): + """ + Check whether the projects in the DataFrame have a constant heat rate + based on the number of load points per project in the DAtaFrame + :param df: DataFrame for which to check constant heat rate. Must have + "project", "load_point_mw" columns + :param op_type: Operational type (used in error message) + :return: + """ + + results = [] + + n_load_points = df.groupby(["project"]).size() + invalids = (n_load_points > 1) + if invalids.any(): + bad_projects = invalids.index[invalids] + print_bad_projects = ", ".join(bad_projects) + results.append( + "Project(s) '{}': {} should have only 1 load point" + .format(print_bad_projects, op_type) + ) + + return results diff --git a/gridpath/project/__init__.py b/gridpath/project/__init__.py index 791190c4e..ee5298378 100644 --- a/gridpath/project/__init__.py +++ b/gridpath/project/__init__.py @@ -15,7 +15,7 @@ from gridpath.auxiliary.dynamic_components import required_capacity_modules, \ required_operational_modules, headroom_variables, footroom_variables from gridpath.auxiliary.auxiliary import check_dtypes, \ - write_validation_to_database + check_column_sign_positive, write_validation_to_database def determine_dynamic_components(d, scenario_directory, subproblem, stage): @@ -283,46 +283,38 @@ def validate_inputs(subscenarios, subproblem, stage, conn): ) ) - # Check valid numeric columns are positive + # Check valid numeric columns are non-negative numeric_columns = [k for k, v in expected_dtypes.items() if v == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) - for column in valid_numeric_columns: - is_negative = (df[column] < 0) - if is_negative.any(): - bad_projects = df["project"][is_negative].values - print_bad_projects = ", ".join(bad_projects) - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_OPERATIONAL_CHARS", - "inputs_project_operational_chars", - "Invalid numeric sign", - "Project(s) '{}'; Expected '{}' >= 0" - .format(print_bad_projects, column) - ) - ) + sign_errors = check_column_sign_positive(df, valid_numeric_columns) + for error in sign_errors: + validation_results.append( + (subscenarios.SCENARIO_ID, + __name__, + "PROJECT_OPERATIONAL_CHARS", + "inputs_project_operational_chars", + "Invalid numeric sign", + error + ) + ) - # check 0 < min stable fraction <= 1 + # Check 0 < min stable fraction <= 1 if "min_stable_level" not in error_columns: - invalids = ((df["min_stable_level"] <= 0) | - (df["min_stable_level"] > 1)) - if invalids.any(): - bad_projects = df["project"][invalids].values - print_bad_projects = ", ".join(bad_projects) + validation_errors = validate_min_stable_level(df) + for error in validation_errors: validation_results.append( (subscenarios.SCENARIO_ID, __name__, "PROJECT_OPERATIONAL_CHARS", "inputs_project_operational_chars", "Invalid min_stable_level", - "Project(s) '{}': expected 0 < min_stable_level <= 1" - .format(print_bad_projects) + error ) ) # TODO: move into database table (don't hard code) # Check that we're not combining incompatible capacity and operational types - incompatible_combinations = [ + invalid_combos = [ ("new_build_generator", "dispatchable_binary_commit"), ("new_build_generator", "dispatchable_continuous_commit"), ("new_build_generator", "hydro_curtailable"), @@ -340,61 +332,52 @@ def validate_inputs(subscenarios, subproblem, stage, conn): ("existing_gen_binary_economic_retirement", "hydro_noncurtailable"), ] - for combo in incompatible_combinations: - bad_combos = ((df["capacity_type"] == combo[0]) & - (df["operational_type"] == combo[1])) - if bad_combos.any(): - bad_projects = df['project'][bad_combos].values - print_bad_projects = ", ".join(bad_projects) - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_OPERATIONAL_CHARS, PROJECT_PORTFOLIO", - "inputs_project_operational_chars, inputs_project_portfolios", - "Invalid combination of capacity type and operational type", - "Project(s) '{}': '{}' and '{}'" - .format(print_bad_projects, combo[0], combo[1])) - ) - # check that capacity type is valid + validation_errors = validate_op_cap_combos(df, invalid_combos) + for error in validation_errors: + validation_results.append( + (subscenarios.SCENARIO_ID, + __name__, + "PROJECT_OPERATIONAL_CHARS, PROJECT_PORTFOLIO", + "inputs_project_operational_chars, inputs_project_portfolios", + "Invalid combination of capacity type and operational type", + error + ) + ) + + # Check that capacity type is valid # Note: foreign key already ensures this! valid_cap_types = c.execute( """SELECT capacity_type from mod_capacity_types""" ).fetchall() valid_cap_types = [v[0] for v in valid_cap_types] - invalids = ~df["capacity_type"].isin(valid_cap_types) - if invalids.any(): - bad_projects = df["project"][invalids].values - print_bad_projects = ", ".join(bad_projects) + validation_errors = validate_cap_types(df, valid_cap_types) + for error in validation_errors: validation_results.append( (subscenarios.SCENARIO_ID, __name__, "PROJECT_PORTFOLIO", "inputs_project_portfolios", "Invalid capacity type", - "Project(s) '{}': Invalid capacity type" - .format(print_bad_projects) + error ) ) - # check that operational type is valid + # Check that operational type is valid # Note: foreign key already ensures this! valid_op_types = c.execute( """SELECT operational_type from mod_operational_types""" ).fetchall() valid_op_types = [v[0] for v in valid_op_types] - invalids = ~df["operational_type"].isin(valid_op_types) - if invalids.any(): - bad_projects = df["project"][invalids].values - print_bad_projects = ", ".join(bad_projects) + validation_errors = validate_op_types(df, valid_op_types) + for error in validation_errors: validation_results.append( (subscenarios.SCENARIO_ID, __name__, "PROJECT_OPERATIONAL_CHARS", "inputs_project_operational_chars", "Invalid operational type", - "Project(s) '{}': Invalid operational type" - .format(print_bad_projects) + error ) ) @@ -402,6 +385,92 @@ def validate_inputs(subscenarios, subproblem, stage, conn): write_validation_to_database(validation_results, conn) +def validate_min_stable_level(df): + """ + Check 0 < min stable fraction <= 1 + :param df: + :return: + """ + results = [] + + invalids = ((df["min_stable_level"] <= 0) | + (df["min_stable_level"] > 1)) + if invalids.any(): + bad_projects = df["project"][invalids].values + print_bad_projects = ", ".join(bad_projects) + results.append( + "Project(s) '{}': expected 0 < min_stable_level <= 1" + .format(print_bad_projects) + ) + + return results + + +def validate_op_cap_combos(df, invalid_combos): + """ + Check that there's no mixing of incompatible capacity and operational types + :param df: + :param invalid_combos: + :return: + """ + results = [] + for combo in invalid_combos: + bad_combos = ((df["capacity_type"] == combo[0]) & + (df["operational_type"] == combo[1])) + if bad_combos.any(): + bad_projects = df['project'][bad_combos].values + print_bad_projects = ", ".join(bad_projects) + results.append( + "Project(s) '{}': '{}' and '{}'" + .format(print_bad_projects, combo[0], combo[1]) + ) + + return results + + +def validate_cap_types(df, valid_cap_types): + """ + Check that the specified capacity types are one of the valid capacity types + :param df: + :param valid_cap_types: + :return: + """ + results = [] + + invalids = ~df["capacity_type"].isin(valid_cap_types) + if invalids.any(): + bad_projects = df["project"][invalids].values + print_bad_projects = ", ".join(bad_projects) + results.append( + "Project(s) '{}': Invalid capacity type" + .format(print_bad_projects) + ) + + return results + + +def validate_op_types(df, valid_op_types): + """ + Check that the specified operational types are one of the valid operational + types + :param df: + :param valid_op_types: + :return: + """ + results = [] + + invalids = ~df["operational_type"].isin(valid_op_types) + if invalids.any(): + bad_projects = df["project"][invalids].values + print_bad_projects = ", ".join(bad_projects) + results.append( + "Project(s) '{}': Invalid operational type" + .format(print_bad_projects) + ) + + return results + + def write_model_inputs(inputs_directory, subscenarios, subproblem, stage, conn): """ Get inputs from database and write out the model input diff --git a/gridpath/project/fuels.py b/gridpath/project/fuels.py index 2e3250393..9ccae7c64 100644 --- a/gridpath/project/fuels.py +++ b/gridpath/project/fuels.py @@ -190,6 +190,43 @@ def validate_inputs(subscenarios, subproblem, stage, conn): ) # Check that fuels specified for projects exist in fuels table + validation_errors = validate_fuel_projects(prj_df, fuels_df) + for error in validation_errors: + validation_results.append( + (subscenarios.SCENARIO_ID, + __name__, + "PROJECT_OPERATIONAL_CHARS", + "inputs_project_operational_chars", + "Non existent fuel", + error) + ) + + # Check that fuel prices exist for the period and month + validation_errors = validate_fuel_prices(fuels_df, fuel_prices_df, + periods_months) + for error in validation_errors: + validation_results.append( + (subscenarios.SCENARIO_ID, + __name__, + "PROJECT_FUEL_PRICES", + "inputs_project_fuel_prices", + "Missing fuel price", + error + ) + ) + + # Write all input validation errors to database + write_validation_to_database(validation_results, conn) + + +def validate_fuel_projects(prj_df, fuels_df): + """ + Check that fuels specified for projects exist in fuels table + :param prj_df: + :param fuels_df: + :return: + """ + results = [] fuel_mask = pd.notna(prj_df["fuel"]) existing_fuel_mask = prj_df["fuel"].isin(fuels_df["fuel"]) invalids = fuel_mask & ~existing_fuel_mask @@ -198,35 +235,33 @@ def validate_inputs(subscenarios, subproblem, stage, conn): bad_fuels = prj_df["fuel"][invalids].values print_bad_projects = ", ".join(bad_projects) print_bad_fuels = ", ".join(bad_fuels) - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_OPERATIONAL_CHARS", - "inputs_project_operational_chars", - "Non existent fuel", - "Project(s) '{}': Specified fuel(s) '{}' do(es) not exist" - .format(print_bad_projects, print_bad_fuels) - ) + results.append( + "Project(s) '{}': Specified fuel(s) '{}' do(es) not exist" + .format(print_bad_projects, print_bad_fuels) ) - # Check that fuel prices exist for the period and month + return results + + +def validate_fuel_prices(fuels_df, fuel_prices_df, periods_months): + """ + Check that fuel prices exist for the period and month + :param fuels_df: + :param fuel_prices_df: + :param periods_months: + :return: + """ + results = [] for f in fuels_df["fuel"].values: df = fuel_prices_df[fuel_prices_df["fuel"] == f] for period, month in periods_months: if not ((df.period == period) & (df.month == month)).any(): - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_FUEL_PRICES", - "inputs_project_fuel_prices", - "Missing fuel price", - "Fuel '{}': Missing price for period '{}', month '{}')" - .format(f, str(period), str(month)) - ) + results.append( + "Fuel '{}': Missing price for period '{}', month '{}'" + .format(f, str(period), str(month)) ) - # Write all input validation errors to database - write_validation_to_database(validation_results, conn) + return results def write_model_inputs(inputs_directory, subscenarios, subproblem, stage, conn): diff --git a/gridpath/project/operations/__init__.py b/gridpath/project/operations/__init__.py index 2488370a8..dbd394340 100644 --- a/gridpath/project/operations/__init__.py +++ b/gridpath/project/operations/__init__.py @@ -17,7 +17,7 @@ from pyomo.environ import Set, Param, PositiveReals, PercentFraction, Reals from gridpath.auxiliary.auxiliary import is_number, check_dtypes, \ - write_validation_to_database + check_column_sign_positive, write_validation_to_database # TODO: should we take this out of __init__.py @@ -340,11 +340,11 @@ def get_inputs_from_database(subscenarios, subproblem, stage, conn): c2 = conn.cursor() heat_rates = c2.execute( """ - SELECT project, operational_type, fuel, heat_rate_curves_scenario_id, + SELECT project, fuel, heat_rate_curves_scenario_id, load_point_mw, average_heat_rate_mmbtu_per_mwh FROM inputs_project_portfolios INNER JOIN - (SELECT project, operational_type, fuel, heat_rate_curves_scenario_id + (SELECT project, fuel, heat_rate_curves_scenario_id FROM inputs_project_operational_chars WHERE project_operational_chars_scenario_id = {}) AS op_char USING(project) @@ -385,11 +385,6 @@ def validate_inputs(subscenarios, subproblem, stage, conn): columns=[s[0] for s in heat_rates.description] ) - # Define masks (list of true/false dependent on conditions checked) - hr_curve_mask = pd.notna(hr_df["heat_rate_curves_scenario_id"]) - fuel_mask = pd.notna(hr_df["fuel"]) - load_point_mask = pd.notna(hr_df["load_point_mw"]) - # Check data types availability: expected_dtypes = { "project": "string", @@ -408,25 +403,21 @@ def validate_inputs(subscenarios, subproblem, stage, conn): ) ) - # check 0 < availability <= 1 if "availability" not in error_columns: - invalids = ((av_df["availability"] <= 0) | - (av_df["availability"] > 1)) - if invalids.any(): - bad_projects = av_df["project"][invalids].values - print_bad_projects = ", ".join(bad_projects) + validation_errors = validate_availability(av_df) + for error in validation_errors: validation_results.append( (subscenarios.SCENARIO_ID, __name__, "PROJECT_AVAILABILITY", "inputs_project_availability", "Invalid availability", - "Project(s) '{}': expected 0 < availability <= 1" - .format(print_bad_projects) + error ) ) # Check data types heat_rates: + hr_curve_mask = pd.notna(hr_df["heat_rate_curves_scenario_id"]) sub_hr_df = hr_df[hr_curve_mask][ ["project", "load_point_mw", "average_heat_rate_mmbtu_per_mwh"] ] @@ -447,71 +438,135 @@ def validate_inputs(subscenarios, subproblem, stage, conn): ) ) - # Check valid numeric columns in heat rates are positive + # Check valid numeric columns in heat rates are non-negative numeric_columns = [k for k, v in expected_dtypes.items() if v == "numeric"] valid_numeric_columns = set(numeric_columns) - set(error_columns) - for column in valid_numeric_columns: - is_negative = (sub_hr_df[column] <= 0) - if is_negative.any(): - bad_projects = sub_hr_df["project"][is_negative].values - print_bad_projects = ", ".join(bad_projects) - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_HEAT_RATE_CURVES", - "inputs_project_heat_rate_curves", - "Invalid numeric sign", - "Project(s) '{}'; Expected '{}' > 0" - .format(print_bad_projects, column) - ) - ) + sign_errors = check_column_sign_positive(sub_hr_df, + valid_numeric_columns) + for error in sign_errors: + validation_results.append( + (subscenarios.SCENARIO_ID, + __name__, + "PROJECT_HEAT_RATE_CURVES", + "inputs_project_heat_rate_curves", + "Invalid numeric sign", + error + ) + ) # Check for consistency between fuel and heat rate curve inputs # 1. Make sure projects with fuel have a heat rate scenario specified - invalids = fuel_mask & ~hr_curve_mask - if invalids.any(): - bad_projects = hr_df["project"][invalids] - print_bad_projects = ", ".join(bad_projects) + # 2. Make sure projects without fuel have no heat rate scenario specified + validation_errors = validate_fuel_vs_heat_rates(hr_df) + for error in validation_errors: validation_results.append( (subscenarios.SCENARIO_ID, __name__, "PROJECT_OPERATIONAL_CHARS", "inputs_project_operational_chars", - "Missing heat rate scenario inputs", - "Project(s) '{}': Missing heat_rate_curves_scenario_id" - .format(print_bad_projects) + "Missing/Unnecessary heat rate scenario inputs", + error ) ) - # 2. Make sure projects without fuel have no heat rate scenario specified + + # Check that specified hr scenarios actually have inputs in the hr table + # and check that specified heat rate curves inputs are valid: + validation_errors = validate_heat_rate_curves(hr_df) + for error in validation_errors: + validation_results.append( + (subscenarios.SCENARIO_ID, + __name__, + "PROJECT_HEAT_RATE_CURVES", + "inputs_project_heat_rate_curves", + "Invalid/Missing heat rate curves inputs", + error + ) + ) + + # Write all input validation errors to database + write_validation_to_database(validation_results, conn) + + +def validate_availability(av_df): + """ + Check 0 <= availability <= 1 + :param av_df: + :return: + """ + results = [] + + invalids = ((av_df["availability"] < 0) | + (av_df["availability"] > 1)) + if invalids.any(): + bad_projects = av_df["project"][invalids].values + print_bad_projects = ", ".join(bad_projects) + results.append( + "Project(s) '{}': expected 0 <= availability <= 1" + .format(print_bad_projects) + ) + + return results + + +def validate_fuel_vs_heat_rates(hr_df): + """ + Make sure projects with fuel have a heat rate scenario specified. + Conversely, if no fuel is specified, make sure there is no heat rate + scenario specified. + :param hr_df: + :return: + """ + results = [] + + hr_curve_mask = pd.notna(hr_df["heat_rate_curves_scenario_id"]) + fuel_mask = pd.notna(hr_df["fuel"]) + + invalids = fuel_mask & ~hr_curve_mask + if invalids.any(): + bad_projects = hr_df["project"][invalids] + print_bad_projects = ", ".join(bad_projects) + results.append( + "Project(s) '{}': Missing heat_rate_curves_scenario_id" + .format(print_bad_projects) + ) + invalids = ~fuel_mask & hr_curve_mask if invalids.any(): bad_projects = pd.unique(hr_df["project"][invalids]) print_bad_projects = ", ".join(bad_projects) - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_OPERATIONAL_CHARS", - "inputs_project_operational_chars", - "Unnecessary heat rate scenario inputs", + results.append( "Project(s) '{}': No fuel specified so no heat rate expected" .format(print_bad_projects) - ) ) - # Check that specified hr scenarios actually have inputs in the hr table + return results + + +def validate_heat_rate_curves(hr_df): + """ + 1. Check that specified heat rate scenarios actually have inputs in the heat + rate curves table + 2. Check that specified heat rate curves inputs are valid: + - strictly increasing load points + - increasing total fuel burn + - convex fuel burn curve + :param hr_df: + :return: + """ + results = [] + + fuel_mask = pd.notna(hr_df["fuel"]) + hr_curve_mask = pd.notna(hr_df["heat_rate_curves_scenario_id"]) + load_point_mask = pd.notna(hr_df["load_point_mw"]) + + # Check for missing inputs in heat rates curves table invalids = hr_curve_mask & ~load_point_mask if invalids.any(): bad_projects = hr_df["project"][invalids] print_bad_projects = ", ".join(bad_projects) - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_HEAT_RATE_CURVES", - "inputs_project_heat_rate_curves", - "Missing heat rate inputs", - "Project(s) '{}': Expected at least one load point" - .format(print_bad_projects) - ) + results.append( + "Project(s) '{}': Expected at least one load point" + .format(print_bad_projects) ) # Check that each project has convex heat rates etc. @@ -531,41 +586,22 @@ def validate_inputs(subscenarios, subproblem, stage, conn): if np.any(incr_loads == 0): # note: primary key should already prohibit this - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_HEAT_RATE_CURVES", - "inputs_project_heat_rate_curves", - "Identical load points", - "Project(s) '{}': load points can not be identical" - .format(project) - ) + results.append( + "Project(s) '{}': load points can not be identical" + .format(project) ) if np.any(incr_fuel_burn <= 0): - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_HEAT_RATE_CURVES", - "inputs_project_heat_rate_curves", - "Decreasing fuel burn", - "Project(s) '{}': Total fuel burn should increase with increasing load" - .format(project) - ) + results.append( + "Project(s) '{}': Total fuel burn should increase with increasing load" + .format(project) ) if np.any(np.diff(slopes) <= 0): - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_HEAT_RATE_CURVES", - "inputs_project_heat_rate_curves", - "Non convex fuel burn function", - "Project(s) '{}': Marginal heat rate should increase with increading load" - .format(project) - ) + results.append( + "Project(s) '{}': Fuel burn should be convex, i.e. marginal heat rate should increase with increading load" + .format(project) ) - # Write all input validation errors to database - write_validation_to_database(validation_results, conn) + return results def write_model_inputs(inputs_directory, subscenarios, subproblem, stage, conn): diff --git a/gridpath/project/operations/operational_types/always_on.py b/gridpath/project/operations/operational_types/always_on.py index c999f50a0..19cdf2cda 100644 --- a/gridpath/project/operations/operational_types/always_on.py +++ b/gridpath/project/operations/operational_types/always_on.py @@ -16,7 +16,7 @@ PercentFraction, Constraint, Expression from gridpath.auxiliary.auxiliary import generator_subset_init, \ - write_validation_to_database + write_validation_to_database, check_prj_columns from gridpath.auxiliary.dynamic_components import headroom_variables, \ footroom_variables @@ -535,25 +535,21 @@ def validate_module_specific_inputs(subscenarios, subproblem, stage, conn): # Check that unit size and min stable level are specified # (not all operational types require this input) - required_columns = [ + req_columns = [ "min_stable_level", "unit_size_mw" ] - for column in required_columns: - isna = pd.isna(df[column]) - if isna.any(): - bad_projects = df["project"][isna] - print_bad_projects = ", ".join(bad_projects) - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_OPERATIONAL_CHARS", - "inputs_project_operational_chars", - "Missing inputs", - "Project(s) '{}'; Always_on should have inputs for '{}'" - .format(print_bad_projects, column) - ) - ) + validation_errors = check_prj_columns(df, req_columns, True, "Always_on") + for error in validation_errors: + validation_results.append( + (subscenarios.SCENARIO_ID, + __name__, + "PROJECT_OPERATIONAL_CHARS", + "inputs_project_operational_chars", + "Missing inputs", + error + ) + ) # Check that there are no unexpected operational inputs expected_na_columns = [ @@ -565,21 +561,18 @@ def validate_module_specific_inputs(subscenarios, subproblem, stage, conn): "charging_efficiency", "discharging_efficiency", "minimum_duration_hours" ] - for column in expected_na_columns: - notna = pd.notna(df[column]) - if notna.any(): - bad_projects = df["project"][notna] - print_bad_projects = ", ".join(bad_projects) - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_OPERATIONAL_CHARS", - "inputs_project_operational_chars", - "Unexpected inputs", - "Project(s) '{}'; Always_on should not have inputs for '{}'" - .format(print_bad_projects, column) - ) - ) + validation_errors = check_prj_columns(df, expected_na_columns, False, + "Always_on") + for error in validation_errors: + validation_results.append( + (subscenarios.SCENARIO_ID, + __name__, + "PROJECT_OPERATIONAL_CHARS", + "inputs_project_operational_chars", + "Unexpected inputs", + error + ) + ) # Write all input validation errors to database write_validation_to_database(validation_results, conn) diff --git a/gridpath/project/operations/operational_types/dispatchable_capacity_commit.py b/gridpath/project/operations/operational_types/dispatchable_capacity_commit.py index 6bae9d0ab..6e612bfc4 100644 --- a/gridpath/project/operations/operational_types/dispatchable_capacity_commit.py +++ b/gridpath/project/operations/operational_types/dispatchable_capacity_commit.py @@ -23,7 +23,7 @@ NonPositiveReals, PercentFraction, Reals, value, Expression from gridpath.auxiliary.auxiliary import generator_subset_init, \ - write_validation_to_database + write_validation_to_database, check_prj_columns from gridpath.auxiliary.dynamic_components import headroom_variables, \ footroom_variables from gridpath.project.operations.operational_types.common_functions import \ @@ -1200,46 +1200,40 @@ def validate_module_specific_inputs(subscenarios, subproblem, stage, conn): # Check that unit size and min stable level are specified # (not all operational types require this input) - required_columns = [ + req_columns = [ "min_stable_level", "unit_size_mw" ] - for column in required_columns: - isna = pd.isna(df[column]) - if isna.any(): - bad_projects = df["project"][isna] - print_bad_projects = ", ".join(bad_projects) - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_OPERATIONAL_CHARS", - "inputs_project_operational_chars", - "Missing inputs", - "Project(s) '{}'; Dispatchable_capacity_commit should have inputs for '{}'" - .format(print_bad_projects, column) - ) - ) + validation_errors = check_prj_columns(df, req_columns, True, + "Dispatchable_capacity_commit") + for error in validation_errors: + validation_results.append( + (subscenarios.SCENARIO_ID, + __name__, + "PROJECT_OPERATIONAL_CHARS", + "inputs_project_operational_chars", + "Missing inputs", + error + ) + ) # Check that there are no unexpected operational inputs expected_na_columns = [ "charging_efficiency", "discharging_efficiency", "minimum_duration_hours" ] - for column in expected_na_columns: - notna = pd.notna(df[column]) - if notna.any(): - bad_projects = df["project"][notna] - print_bad_projects = ", ".join(bad_projects) - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_OPERATIONAL_CHARS", - "inputs_project_operational_chars", - "Unexpected inputs", - "Project(s) '{}'; Dispatchable_capacity_commit should not have inputs for '{}'" - .format(print_bad_projects, column) - ) - ) + validation_errors = check_prj_columns(df, expected_na_columns, False, + "Dispatchable_capacity_commit") + for error in validation_errors: + validation_results.append( + (subscenarios.SCENARIO_ID, + __name__, + "PROJECT_OPERATIONAL_CHARS", + "inputs_project_operational_chars", + "Unexpected inputs", + error + ) + ) # Write all input validation errors to database write_validation_to_database(validation_results, conn) diff --git a/gridpath/project/operations/operational_types/dispatchable_no_commit.py b/gridpath/project/operations/operational_types/dispatchable_no_commit.py index 1a95917ff..70e85bd48 100644 --- a/gridpath/project/operations/operational_types/dispatchable_no_commit.py +++ b/gridpath/project/operations/operational_types/dispatchable_no_commit.py @@ -10,10 +10,11 @@ from pyomo.environ import Set, Var, Constraint, NonNegativeReals from gridpath.auxiliary.auxiliary import generator_subset_init,\ - write_validation_to_database + write_validation_to_database, check_prj_columns, check_constant_heat_rate from gridpath.auxiliary.dynamic_components import headroom_variables, \ footroom_variables + def add_module_specific_components(m, d): """ @@ -299,36 +300,30 @@ def validate_module_specific_inputs(subscenarios, subproblem, stage, conn): "charging_efficiency", "discharging_efficiency", "minimum_duration_hours" ] - for column in expected_na_columns: - notna = pd.notna(df[column]) - if notna.any(): - bad_projects = df["project"][notna] - print_bad_projects = ", ".join(bad_projects) - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_OPERATIONAL_CHARS", - "inputs_project_operational_chars", - "Unexpected inputs", - "Project(s) '{}'; Dispatchable_no_commit should not have inputs for '{}'" - .format(print_bad_projects, column) - ) - ) + validation_errors = check_prj_columns(df, expected_na_columns, False, + "Dispatchable_no_commit") + for error in validation_errors: + validation_results.append( + (subscenarios.SCENARIO_ID, + __name__, + "PROJECT_OPERATIONAL_CHARS", + "inputs_project_operational_chars", + "Unexpected inputs", + error + ) + ) # Check that there is only one load point (constant heat rate) - n_load_points = hr_df.groupby(["project"]).size() - invalids = (n_load_points > 1) - if invalids.any(): - bad_projects = invalids.index[invalids] - print_bad_projects = ", ".join(bad_projects) + validation_errors = check_constant_heat_rate(hr_df, + "Dispatchable_no_commit") + for error in validation_errors: validation_results.append( (subscenarios.SCENARIO_ID, __name__, "PROJECT_HEAT_RATE_CURVES", "inputs_project_heat_rate_curves", "Too many load points", - "Project(s) '{}': Dispatchable_no_commit should have only 1 load point" - .format(print_bad_projects) + error ) ) diff --git a/gridpath/project/operations/operational_types/must_run.py b/gridpath/project/operations/operational_types/must_run.py index 2995abbae..c64586412 100644 --- a/gridpath/project/operations/operational_types/must_run.py +++ b/gridpath/project/operations/operational_types/must_run.py @@ -11,7 +11,7 @@ from pyomo.environ import Constraint, Set from gridpath.auxiliary.auxiliary import generator_subset_init, \ - write_validation_to_database + write_validation_to_database, check_prj_columns, check_constant_heat_rate from gridpath.auxiliary.dynamic_components import headroom_variables, \ footroom_variables @@ -289,36 +289,29 @@ def validate_module_specific_inputs(subscenarios, subproblem, stage, conn): "charging_efficiency", "discharging_efficiency", "minimum_duration_hours" ] - for column in expected_na_columns: - notna = pd.notna(df[column]) - if notna.any(): - bad_projects = df["project"][notna] - print_bad_projects = ", ".join(bad_projects) - validation_results.append( - (subscenarios.SCENARIO_ID, - __name__, - "PROJECT_OPERATIONAL_CHARS", - "inputs_project_operational_chars", - "Unexpected inputs", - "Project(s) '{}'; Must_run should not have inputs for '{}'" - .format(print_bad_projects, column) - ) - ) + validation_errors = check_prj_columns(df, expected_na_columns, False, + "Must_run") + for error in validation_errors: + validation_results.append( + (subscenarios.SCENARIO_ID, + __name__, + "PROJECT_OPERATIONAL_CHARS", + "inputs_project_operational_chars", + "Unexpected inputs", + error + ) + ) # Check that there is only one load point (constant heat rate) - n_load_points = hr_df.groupby(["project"]).size() - invalids = (n_load_points > 1) - if invalids.any(): - bad_projects = invalids.index[invalids] - print_bad_projects = ", ".join(bad_projects) + validation_errors = check_constant_heat_rate(hr_df, "Must_run") + for error in validation_errors: validation_results.append( (subscenarios.SCENARIO_ID, __name__, "PROJECT_HEAT_RATE_CURVES", "inputs_project_heat_rate_curves", "Too many load points", - "Project(s) '{}': Must_run should have only 1 load point" - .format(print_bad_projects) + error ) ) diff --git a/tests/auxiliary/test_auxiliary.py b/tests/auxiliary/test_auxiliary.py index 624e52e9b..eaa5ec71d 100644 --- a/tests/auxiliary/test_auxiliary.py +++ b/tests/auxiliary/test_auxiliary.py @@ -3,6 +3,8 @@ from pyomo.environ import AbstractModel import unittest +import pandas as pd +import numpy as np import gridpath.auxiliary.auxiliary as auxiliary_module_to_test @@ -85,6 +87,228 @@ def test_is_number(self): self.assertEqual(True, auxiliary_module_to_test.is_number(100.5)) self.assertEqual(False, auxiliary_module_to_test.is_number("string")) + def test_check_dtypes(self): + """ + + :return: + """ + df_columns = ["project", "capacity"] + test_cases = { + # Make sure correct inputs don't throw error + 1: {"df": pd.DataFrame( + columns=df_columns, + data=[["gas_ct", 10], ["coal_plant", 20]]), + "expected_dtypes": { + "project": "string", + "capacity": "numeric"}, + "result": ([], []) + }, + # Test invalid string column + 2: {"df": pd.DataFrame( + columns=df_columns, + data=[["gas_ct", 10], ["coal_plant", "string"]]), + "expected_dtypes": { + "project": "string", + "capacity": "numeric"}, + "result": ( + ["Invalid data type for column 'capacity'; expected numeric"], + ["capacity"] + )}, + # Test invalid numeric column + 3: {"df": pd.DataFrame( + columns=df_columns, + data=[[1, 10], [1, 20]]), + "expected_dtypes": { + "project": "string", + "capacity": "numeric"}, + "result": ( + ["Invalid data type for column 'project'; expected string"], + ["project"] + )}, + # If at least one string in the column, pandas will convert + # all column data to string so there will be no error + 4: {"df": pd.DataFrame( + columns=df_columns, + data=[["gas_ct", 10], [1, 20]]), + "expected_dtypes": { + "project": "string", + "capacity": "numeric"}, + "result": ([], []) + }, + # Columns with all None are ignored + 5: {"df": pd.DataFrame( + columns=df_columns, + data=[[None, 10], [None, 20]]), + "expected_dtypes": { + "project": "string", + "capacity": "numeric"}, + "result": ([], []) + }, + # Columns with all NaN are ignored + 6: {"df": pd.DataFrame( + columns=df_columns, + data=[[np.nan, 10], [np.nan, 20]]), + "expected_dtypes": { + "project": "string", + "capacity": "numeric"}, + "result": ([], []) + }, + # Columns with some None are not ignored + 7: {"df": pd.DataFrame( + columns=df_columns, + data=[[10, 10], [None, 20]]), + "expected_dtypes": { + "project": "string", + "capacity": "numeric"}, + "result": ( + ["Invalid data type for column 'project'; expected string"], + ["project"] + )}, + # Test multiple error columns + 8: {"df": pd.DataFrame( + columns=df_columns, + data=[[10, "string"], [10, "string"]]), + "expected_dtypes": { + "project": "string", + "capacity": "numeric"}, + "result": ( + ["Invalid data type for column 'project'; expected string", + "Invalid data type for column 'capacity'; expected numeric"], + ["project", "capacity"] + )} + } + + for test_case in test_cases.keys(): + expected_tuple = test_cases[test_case]["result"] + actual_tuple = auxiliary_module_to_test.check_dtypes( + df=test_cases[test_case]["df"], + expected_dtypes=test_cases[test_case]["expected_dtypes"] + ) + self.assertTupleEqual(expected_tuple, actual_tuple) + + def test_check_column_sign_positive(self): + """ + + :return: + """ + df_columns = ["project", "load_point_mw", + "average_heat_rate_mmbtu_per_mwh"] + test_cases = { + # Make sure correct inputs don't throw error + 1: {"df": pd.DataFrame( + columns=df_columns, + data=[["gas_ct", 10, 10.5], + ["gas_ct", 20, 9], + ["coal_plant", 100, 10] + ]), + "columns": ["load_point_mw", "average_heat_rate_mmbtu_per_mwh"], + "result": [] + }, + # Sign errors are flagged; Errors are grouped by column. If >1 error + # in different columns, a separate error msgs will be created + 2: {"df": pd.DataFrame( + columns=df_columns, + data=[["gas_ct", 10, -10.5], + ["gas_ct", -20, 9], + ["coal_plant", -100, 10] + ]), + "columns": ["load_point_mw", "average_heat_rate_mmbtu_per_mwh"], + "result": ["Project(s) 'gas_ct, coal_plant': Expected 'load_point_mw' >= 0", + "Project(s) 'gas_ct': Expected 'average_heat_rate_mmbtu_per_mwh' >= 0"] + } + } + + for test_case in test_cases.keys(): + expected_list = test_cases[test_case]["result"] + actual_list = auxiliary_module_to_test.check_column_sign_positive( + df=test_cases[test_case]["df"], + columns=test_cases[test_case]["columns"] + ) + self.assertListEqual(expected_list, actual_list) + + def test_check_prj_columns(self): + """ + + :return: + """ + + df_columns = ["project", "min_stable_level", "unit_size_mw", + "startup_cost_per_mw", "shutdown_cost_per_mw"] + test_cases = { + # Make sure correct inputs don't throw error + 1: {"df": pd.DataFrame( + columns=df_columns, + data=[["nuclear", 0.5, 100, None, None]]), + "columns": ["min_stable_level", "unit_size_mw"], + "required": True, + "category": "Always_on", + "result": [] + }, + # Make sure missing required inputs are flagged + 2: {"df": pd.DataFrame( + columns=df_columns, + data=[["nuclear", None, 100, None, None]]), + "columns": ["min_stable_level", "unit_size_mw"], + "required": True, + "category": "Always_on", + "result": ["Project(s) 'nuclear'; Always_on should have inputs for 'min_stable_level'"] + }, + # Make sure incompatible inputs are flagged + 3: {"df": pd.DataFrame( + columns=df_columns, + data=[["nuclear", 0.5, 100, 1000, None]]), + "columns": ["startup_cost_per_mw", "shutdown_cost_per_mw"], + "required": False, + "category": "Always_on", + "result": ["Project(s) 'nuclear'; Always_on should not have inputs for 'startup_cost_per_mw'"] + } + } + + for test_case in test_cases.keys(): + expected_list = test_cases[test_case]["result"] + actual_list = auxiliary_module_to_test.check_prj_columns( + df=test_cases[test_case]["df"], + columns=test_cases[test_case]["columns"], + required=test_cases[test_case]["required"], + category=test_cases[test_case]["category"] + ) + self.assertListEqual(expected_list, actual_list) + + def test_check_constant_heat_rate(self): + """ + + :return: + """ + + df_columns = ["project", "load_point_mw"] + test_cases = { + # Make sure correct inputs don't throw error + 1: {"df": pd.DataFrame( + columns=df_columns, + data=[["nuclear", 100]]), + "op_type": "Always_on", + "result": [] + }, + # Make sure varying heat rates (>1 load point) is flagged + 2: {"df": pd.DataFrame( + columns=df_columns, + data=[["nuclear", 100], + ["nuclear", 200], + ["gas_ct", 10] + ]), + "op_type": "Always_on", + "result": ["Project(s) 'nuclear': Always_on should have only 1 load point"] + } + } + + for test_case in test_cases.keys(): + expected_list = test_cases[test_case]["result"] + actual_list = auxiliary_module_to_test.check_constant_heat_rate( + df=test_cases[test_case]["df"], + op_type=test_cases[test_case]["op_type"] + ) + self.assertListEqual(expected_list, actual_list) + if __name__ == "__main__": unittest.main() diff --git a/tests/project/operations/test_init.py b/tests/project/operations/test_init.py index 0b4daa382..9c89ce151 100644 --- a/tests/project/operations/test_init.py +++ b/tests/project/operations/test_init.py @@ -10,13 +10,12 @@ import sys import unittest import numpy as np +import pandas as pd from tests.common_functions import create_abstract_model, \ add_components_and_load_data from tests.project.operations.common_functions import \ get_project_operational_timepoints -from gridpath.project.operations.__init__ import \ - calculate_heat_rate_slope_intercept TEST_DATA_DIRECTORY = \ @@ -486,7 +485,7 @@ def test_calculate_heat_rate_slope_intercept(self): expected_slopes = test_cases[test_case]["slopes"] expected_intercepts = test_cases[test_case]["intercepts"] actual_slopes, actual_intercepts = \ - calculate_heat_rate_slope_intercept( + MODULE_BEING_TESTED.calculate_heat_rate_slope_intercept( project=test_cases[test_case]["project"], load_points=test_cases[test_case]["load_points"], heat_rates=test_cases[test_case]["heat_rates"] @@ -495,6 +494,112 @@ def test_calculate_heat_rate_slope_intercept(self): self.assertDictEqual(expected_slopes, actual_slopes) self.assertDictEqual(expected_intercepts, actual_intercepts) + def test_availability_validations(self): + av_df_columns = ["project", "horizon", "availability"] + test_cases = { + # Make sure correct inputs don't throw error + 1: {"av_df": pd.DataFrame( + columns=av_df_columns, + data=[["gas_ct", 201801, 1], + ["gas_ct", 201802, 0.9], + ["coal_plant", 201801, 0] + ]), + "error": [] + }, + # Negative availabilities are flagged + 2: {"av_df": pd.DataFrame( + columns=av_df_columns, + data=[["gas_ct", 201801, -1], + ["gas_ct", 201802, 0.9], + ["coal_plant", 201801, 0] + ]), + "error": ["Project(s) 'gas_ct': expected 0 <= availability <= 1"] + }, + # Availabilities > 1 are flagged + 3: {"av_df": pd.DataFrame( + columns=av_df_columns, + data=[["gas_ct", 201801, 1], + ["gas_ct", 201802, 0.9], + ["coal_plant", 201801, -0.5] + ]), + "error": ["Project(s) 'coal_plant': expected 0 <= availability <= 1"] + }, + # Make sure multiple errors are flagged correctly + 4: {"av_df": pd.DataFrame( + columns=av_df_columns, + data=[["gas_ct", 201801, 1.5], + ["gas_ct", 201802, 0.9], + ["coal_plant", 201801, -0.5] + ]), + "error": ["Project(s) 'gas_ct, coal_plant': expected 0 <= availability <= 1"] + }, + } + + for test_case in test_cases.keys(): + expected_list = test_cases[test_case]["error"] + actual_list = MODULE_BEING_TESTED.validate_availability( + av_df=test_cases[test_case]["av_df"], + ) + self.assertListEqual(expected_list, actual_list) + + def test_heat_rate_validations(self): + hr_columns = ["project", "fuel", "heat_rate_curves_scenario_id", + "load_point_mw", "average_heat_rate_mmbtu_per_mwh"] + test_cases = { + # Make sure correct inputs don't throw error + 1: {"hr_df": pd.DataFrame( + columns=hr_columns, + data=[["gas_ct", "gas", 1, 10, 10.5], + ["gas_ct", "gas", 1, 20, 9], + ["coal_plant", "coal", 1, 100, 10] + ]), + "fuel_vs_hr_error": [], + "hr_curves_error": [] + }, + # Check fuel vs heat rate curve errors + 3: {"hr_df": pd.DataFrame( + columns=hr_columns, + data=[["gas_ct", "gas", None, None, None], + ["coal_plant", None, 1, 100, 10] + ]), + "fuel_vs_hr_error": ["Project(s) 'gas_ct': Missing heat_rate_curves_scenario_id", + "Project(s) 'coal_plant': No fuel specified so no heat rate expected"], + "hr_curves_error": [] + }, + # Check heat rate curves validations + 4: {"hr_df": pd.DataFrame( + columns=hr_columns, + data=[["gas_ct1", "gas", 1, None, None], + ["gas_ct2", "gas", 1, 10, 11], + ["gas_ct2", "gas", 1, 10, 12], + ["gas_ct3", "gas", 1, 10, 11], + ["gas_ct3", "gas", 1, 20, 5], + ["gas_ct4", "gas", 1, 10, 11], + ["gas_ct4", "gas", 1, 20, 10], + ["gas_ct4", "gas", 1, 30, 9] + ]), + "fuel_vs_hr_error": [], + "hr_curves_error": ["Project(s) 'gas_ct1': Expected at least one load point", + "Project(s) 'gas_ct2': load points can not be identical", + "Project(s) 'gas_ct3': Total fuel burn should increase with increasing load", + "Project(s) 'gas_ct4': Fuel burn should be convex, i.e. marginal heat rate should increase with increading load"] + }, + + } + + for test_case in test_cases.keys(): + expected_list = test_cases[test_case]["fuel_vs_hr_error"] + actual_list = MODULE_BEING_TESTED.validate_fuel_vs_heat_rates( + hr_df=test_cases[test_case]["hr_df"] + ) + self.assertListEqual(expected_list, actual_list) + + expected_list = test_cases[test_case]["hr_curves_error"] + actual_list = MODULE_BEING_TESTED.validate_heat_rate_curves( + hr_df=test_cases[test_case]["hr_df"] + ) + self.assertListEqual(expected_list, actual_list) + if __name__ == "__main__": unittest.main() diff --git a/tests/project/test_fuels.py b/tests/project/test_fuels.py index 17dbc8bd4..66b575ad4 100644 --- a/tests/project/test_fuels.py +++ b/tests/project/test_fuels.py @@ -14,6 +14,7 @@ from tests.common_functions import create_abstract_model, \ add_components_and_load_data + TEST_DATA_DIRECTORY = \ os.path.join(os.path.dirname(__file__), "..", "test_data") @@ -133,6 +134,98 @@ def test_initialized_components(self): ) self.assertDictEqual(expected_price, actual_price) + def test_fuel_validations(self): + prj_df_columns = ["project", "fuel"] + fuels_df_columns = ["fuel", "co2_intensity_tons_per_mmbtu"] + fuel_prices_df_columns = ["fuel", "period", "month", + "fuel_price_per_mmbtu"] + test_cases = { + # Make sure correct inputs don't throw error + 1: {"prj_df": pd.DataFrame( + columns=prj_df_columns, + data=[["gas_ct", "gas"], ["coal_plant", "coal"]]), + "fuels_df": pd.DataFrame( + columns=fuels_df_columns, + data=[["gas", 0.4], ["coal", 0.8]]), + "fuel_prices_df": pd.DataFrame( + columns=fuel_prices_df_columns, + data=[["gas", 2018, 1, 3], ["gas", 2018, 2, 4], + ["coal", 2018, 1, 2], ["coal", 2018, 2, 2]]), + "periods_months": [(2018, 1), (2018, 2)], + "fuel_project_error": [], + "fuel_prices_error": [] + }, + # If a project's fuel in prj_df does not exist in the fuels_df, + # there should be an error. Similarly, if a fuel price is missing + # for a certain month/period, there should be an error. + 2: {"prj_df": pd.DataFrame( + columns=prj_df_columns, + data=[["gas_ct", "invalid_fuel"], ["coal_plant", "coal"]]), + "fuels_df": pd.DataFrame( + columns=fuels_df_columns, + data=[["gas", 0.4], ["coal", 0.8]]), + "fuel_prices_df": pd.DataFrame( + columns=fuel_prices_df_columns, + data=[["gas", 2018, 1, 3], + ["coal", 2018, 1, 2], ["coal", 2018, 2, 2]]), + "periods_months": [(2018, 1), (2018, 2)], + "fuel_project_error": [ + "Project(s) 'gas_ct': Specified fuel(s) 'invalid_fuel' do(es) not exist"], + "fuel_prices_error": [ + "Fuel 'gas': Missing price for period '2018', month '2'"] + }, + # It's okay if there are more fuels and fuels prices specified than + # needed for the active projects + 3: {"prj_df": pd.DataFrame( + columns=prj_df_columns, + data=[["gas_ct", "gas"]]), + "fuels_df": pd.DataFrame( + columns=fuels_df_columns, + data=[["gas", 0.4], ["coal", 0.8]]), + "fuel_prices_df": pd.DataFrame( + columns=fuel_prices_df_columns, + data=[["gas", 2018, 1, 3], ["gas", 2018, 2, 4], + ["coal", 2018, 1, 2], ["coal", 2018, 2, 2]]), + "periods_months": [(2018, 1), (2018, 2)], + "fuel_project_error": [], + "fuel_prices_error": [] + }, + # Test for multiple errors in a column + 4: {"prj_df": pd.DataFrame( + columns=prj_df_columns, + data=[["gas_ct", "invalid_fuel1"], ["coal_plant", "invalid_fuel2"]]), + "fuels_df": pd.DataFrame( + columns=fuels_df_columns, + data=[["gas", 0.4], ["coal", 0.8]]), + "fuel_prices_df": pd.DataFrame( + columns=fuel_prices_df_columns, + data=[["gas", 2018, 1, 3], + ["coal", 2018, 1, 2]]), + "periods_months": [(2018, 1), (2018, 2)], + "fuel_project_error": + ["Project(s) 'gas_ct, coal_plant': Specified fuel(s) 'invalid_fuel1, invalid_fuel2' do(es) not exist"], + "fuel_prices_error": + ["Fuel 'gas': Missing price for period '2018', month '2'", + "Fuel 'coal': Missing price for period '2018', month '2'"] + } + } + + for test_case in test_cases.keys(): + expected_list = test_cases[test_case]["fuel_project_error"] + actual_list = MODULE_BEING_TESTED.validate_fuel_projects( + prj_df=test_cases[test_case]["prj_df"], + fuels_df=test_cases[test_case]["fuels_df"] + ) + self.assertListEqual(expected_list, actual_list) + + expected_list = test_cases[test_case]["fuel_prices_error"] + actual_list = MODULE_BEING_TESTED.validate_fuel_prices( + fuels_df=test_cases[test_case]["fuels_df"], + fuel_prices_df=test_cases[test_case]["fuel_prices_df"], + periods_months=test_cases[test_case]["periods_months"] + ) + self.assertListEqual(expected_list, actual_list) + if __name__ == "__main__": unittest.main() diff --git a/tests/project/test_init.py b/tests/project/test_init.py index 229e33fb3..68322a4b1 100644 --- a/tests/project/test_init.py +++ b/tests/project/test_init.py @@ -227,6 +227,70 @@ def test_initialized_components(self): ) self.assertDictEqual(expected_var_om_cost, actual_var_om_cost) + def test_project_validations(self): + cols = ["project", "capacity_type", "operational_type", + "min_stable_level"] + test_cases = { + # Make sure correct inputs don't throw error + 1: {"df": pd.DataFrame( + columns=cols, + data=[["gas_ct", "new_build_generator", + "dispatchable_capacity_commit", 0.5] + ]), + "invalid_combos": [("invalid1", "invalid2")], + "valid_cap_types": ["new_build_generator"], + "valid_op_types": ["dispatchable_capacity_commit"], + "min_stable_level_error": [], + "combo_error": [], + "cap_type_error": [], + "op_type_error": [] + }, + # Make sure invalid min_stable_level, invalid combo, and invalid + # cap/op types are properly flagged + 2: {"df": pd.DataFrame( + columns=cols, + data=[["gas_ct1", "cap1", "op2", 1.5], + ["gas_ct2", "cap3", "op3", 0] + ]), + "invalid_combos": [("cap1", "op2")], + "valid_cap_types": ["cap1", "cap2"], + "valid_op_types": ["op1", "op2"], + "min_stable_level_error": ["Project(s) 'gas_ct1, gas_ct2': expected 0 < min_stable_level <= 1"], + "combo_error": ["Project(s) 'gas_ct1': 'cap1' and 'op2'"], + "cap_type_error": ["Project(s) 'gas_ct2': Invalid capacity type"], + "op_type_error": ["Project(s) 'gas_ct2': Invalid operational type"] + } + } + + for test_case in test_cases.keys(): + expected_list = test_cases[test_case]["min_stable_level_error"] + actual_list = MODULE_BEING_TESTED.validate_min_stable_level( + df=test_cases[test_case]["df"] + ) + self.assertListEqual(expected_list, actual_list) + + expected_list = test_cases[test_case]["combo_error"] + actual_list = MODULE_BEING_TESTED.validate_op_cap_combos( + df=test_cases[test_case]["df"], + invalid_combos=test_cases[test_case]["invalid_combos"] + ) + self.assertListEqual(expected_list, actual_list) + + expected_list = test_cases[test_case]["cap_type_error"] + actual_list = MODULE_BEING_TESTED.validate_cap_types( + df=test_cases[test_case]["df"], + valid_cap_types=test_cases[test_case]["valid_cap_types"] + + ) + self.assertListEqual(expected_list, actual_list) + + expected_list = test_cases[test_case]["op_type_error"] + actual_list = MODULE_BEING_TESTED.validate_op_types( + df=test_cases[test_case]["df"], + valid_op_types=test_cases[test_case]["valid_op_types"] + ) + self.assertListEqual(expected_list, actual_list) + if __name__ == "__main__": unittest.main() From a5e0e5782e34595a6f6bd1faa38844ad23404617 Mon Sep 17 00:00:00 2001 From: Gerrit De Moor Date: Thu, 15 Aug 2019 11:22:55 -0400 Subject: [PATCH 2/2] Fix zero division error and typo --- gridpath/project/operations/__init__.py | 29 ++++++++++++++----------- tests/project/operations/test_init.py | 4 ++-- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/gridpath/project/operations/__init__.py b/gridpath/project/operations/__init__.py index dbd394340..1145d9b36 100644 --- a/gridpath/project/operations/__init__.py +++ b/gridpath/project/operations/__init__.py @@ -579,10 +579,7 @@ def validate_heat_rate_curves(hr_df): heat_rates = hr_slice["average_heat_rate_mmbtu_per_mwh"].values if len(load_points) > 1: - fuel_burn = load_points * heat_rates incr_loads = np.diff(load_points) - incr_fuel_burn = np.diff(fuel_burn) - slopes = incr_fuel_burn / incr_loads if np.any(incr_loads == 0): # note: primary key should already prohibit this @@ -590,16 +587,22 @@ def validate_heat_rate_curves(hr_df): "Project(s) '{}': load points can not be identical" .format(project) ) - if np.any(incr_fuel_burn <= 0): - results.append( - "Project(s) '{}': Total fuel burn should increase with increasing load" - .format(project) - ) - if np.any(np.diff(slopes) <= 0): - results.append( - "Project(s) '{}': Fuel burn should be convex, i.e. marginal heat rate should increase with increading load" - .format(project) - ) + + else: + fuel_burn = load_points * heat_rates + incr_fuel_burn = np.diff(fuel_burn) + slopes = incr_fuel_burn / incr_loads + + if np.any(incr_fuel_burn <= 0): + results.append( + "Project(s) '{}': Total fuel burn should increase with increasing load" + .format(project) + ) + if np.any(np.diff(slopes) <= 0): + results.append( + "Project(s) '{}': Fuel burn should be convex, i.e. marginal heat rate should increase with increading load" + .format(project) + ) return results diff --git a/tests/project/operations/test_init.py b/tests/project/operations/test_init.py index 9c89ce151..858266dc8 100644 --- a/tests/project/operations/test_init.py +++ b/tests/project/operations/test_init.py @@ -557,7 +557,7 @@ def test_heat_rate_validations(self): "hr_curves_error": [] }, # Check fuel vs heat rate curve errors - 3: {"hr_df": pd.DataFrame( + 2: {"hr_df": pd.DataFrame( columns=hr_columns, data=[["gas_ct", "gas", None, None, None], ["coal_plant", None, 1, 100, 10] @@ -567,7 +567,7 @@ def test_heat_rate_validations(self): "hr_curves_error": [] }, # Check heat rate curves validations - 4: {"hr_df": pd.DataFrame( + 3: {"hr_df": pd.DataFrame( columns=hr_columns, data=[["gas_ct1", "gas", 1, None, None], ["gas_ct2", "gas", 1, 10, 11],