From cf1c98a4d6d74c3ce75dc36ca2223ba9d16ac224 Mon Sep 17 00:00:00 2001
From: Gerrit De Moor <gerrit.dm@gmail.com>
Date: Mon, 5 Aug 2019 23:25:51 -0400
Subject: [PATCH 1/2] Add unittests for input validations

 - fuels
 - auxiliary
 - operations/init
 - project/init
 - must_run
 - dispatchable_no_commit
 - always_on (uses validations from auxiliary)
 - dispatchable_capacity_commit (uses validations from auxiliary)
---
 gridpath/auxiliary/auxiliary.py               |  87 ++++++-
 gridpath/project/__init__.py                  | 179 +++++++++-----
 gridpath/project/fuels.py                     |  77 ++++--
 gridpath/project/operations/__init__.py       | 208 +++++++++-------
 .../operations/operational_types/always_on.py |  57 ++---
 .../dispatchable_capacity_commit.py           |  58 ++---
 .../dispatchable_no_commit.py                 |  41 ++--
 .../operations/operational_types/must_run.py  |  39 ++-
 tests/auxiliary/test_auxiliary.py             | 224 ++++++++++++++++++
 tests/project/operations/test_init.py         | 111 ++++++++-
 tests/project/test_fuels.py                   |  93 ++++++++
 tests/project/test_init.py                    |  64 +++++
 12 files changed, 961 insertions(+), 277 deletions(-)

diff --git a/gridpath/auxiliary/auxiliary.py b/gridpath/auxiliary/auxiliary.py
index a7307697c..f5fa133d9 100644
--- a/gridpath/auxiliary/auxiliary.py
+++ b/gridpath/auxiliary/auxiliary.py
@@ -335,8 +335,6 @@ def check_dtypes(df, expected_dtypes):
     :return: List of error messages for each column with invalid datatypes.
         Error message specifies the column and the expected data type.
         List of columns with erroneous data types.
-
-    TODO: add example
     """
 
     result = []
@@ -371,3 +369,88 @@ def check_dtypes(df, expected_dtypes):
     #     bad_columns = numeric_columns[np.invert(numeric_bool)]
 
     return result, columns
+
+
+def check_column_sign_positive(df, columns):
+    """
+    Checks whether the selected columns of a DataFrame are non-negative.
+    Helper function for input validation.
+    :param df: DataFrame for which to check signs. Must have a "project"
+        column, and columns param must be a subset of the columns in df
+    :param columns: list with columns that are expected to be non-negative
+    :return: List of error messages for each column with invalid signs.
+        Error message specifies the column.
+    """
+    result = []
+    for column in columns:
+        is_negative = (df[column] < 0)
+        if is_negative.any():
+            bad_projects = df["project"][is_negative].values
+            print_bad_projects = ", ".join(bad_projects)
+            result.append(
+                 "Project(s) '{}': Expected '{}' >= 0"
+                 .format(print_bad_projects, column)
+                 )
+
+    return result
+
+
+def check_prj_columns(df, columns, required, category):
+    """
+    Checks whether the required columns of a DataFrame are not None/NA or
+    whether the incompatible columns are None/NA. If required columns are
+    None/NA, or if incompatible columns are not None/NA, an error message
+    is returned.
+    Helper function for input validation.
+    :param df: DataFrame for which to check columns. Must have a "project"
+        column, and columns param must be a subset of the columns in df
+    :param columns: list of columns to check
+    :param required: Boolean, whether the listed columns are required or
+        incompatible
+    :param category: project category (operational_type, capacity_type, ...)
+        for which we're doing the input validation
+    :return: List of error messages for each column with invalid inputs.
+        Error message specifies the column.
+    """
+    result = []
+    for column in columns:
+        if required:
+            invalids = pd.isna(df[column])
+            error_str = "should have inputs for"
+        else:
+            invalids = pd.notna(df[column])
+            error_str = "should not have inputs for"
+        if invalids.any():
+            bad_projects = df["project"][invalids].values
+            print_bad_projects = ", ".join(bad_projects)
+            result.append(
+                "Project(s) '{}'; {} {} '{}'"
+                .format(print_bad_projects, category, error_str, column)
+                 )
+
+    return result
+
+
+def check_constant_heat_rate(df, op_type):
+    """
+    Check whether the projects in the DataFrame have a constant heat rate
+    based on the number of load points per project in the DAtaFrame
+    :param df: DataFrame for which to check constant heat rate. Must have
+        "project", "load_point_mw" columns
+    :param op_type: Operational type (used in error message)
+    :return:
+    """
+
+    results = []
+
+    n_load_points = df.groupby(["project"]).size()
+    invalids = (n_load_points > 1)
+    if invalids.any():
+        bad_projects = invalids.index[invalids]
+        print_bad_projects = ", ".join(bad_projects)
+        results.append(
+            "Project(s) '{}': {} should have only 1 load point"
+            .format(print_bad_projects, op_type)
+        )
+
+    return results
diff --git a/gridpath/project/__init__.py b/gridpath/project/__init__.py
index 791190c4e..ee5298378 100644
--- a/gridpath/project/__init__.py
+++ b/gridpath/project/__init__.py
@@ -15,7 +15,7 @@
 from gridpath.auxiliary.dynamic_components import required_capacity_modules, \
     required_operational_modules, headroom_variables, footroom_variables
 from gridpath.auxiliary.auxiliary import check_dtypes, \
-    write_validation_to_database
+    check_column_sign_positive, write_validation_to_database
 
 
 def determine_dynamic_components(d, scenario_directory, subproblem, stage):
@@ -283,46 +283,38 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
              )
         )
 
-    # Check valid numeric columns are positive
+    # Check valid numeric columns are non-negative
     numeric_columns = [k for k, v in expected_dtypes.items() if v == "numeric"]
     valid_numeric_columns = set(numeric_columns) - set(error_columns)
-    for column in valid_numeric_columns:
-        is_negative = (df[column] < 0)
-        if is_negative.any():
-            bad_projects = df["project"][is_negative].values
-            print_bad_projects = ", ".join(bad_projects)
-            validation_results.append(
-                (subscenarios.SCENARIO_ID,
-                 __name__,
-                 "PROJECT_OPERATIONAL_CHARS",
-                 "inputs_project_operational_chars",
-                 "Invalid numeric sign",
-                 "Project(s) '{}'; Expected '{}' >= 0"
-                 .format(print_bad_projects, column)
-                 )
-            )
+    sign_errors = check_column_sign_positive(df, valid_numeric_columns)
+    for error in sign_errors:
+        validation_results.append(
+            (subscenarios.SCENARIO_ID,
+             __name__,
+             "PROJECT_OPERATIONAL_CHARS",
+             "inputs_project_operational_chars",
+             "Invalid numeric sign",
+             error
+             )
+        )
 
-    # check 0 < min stable fraction <= 1
+    # Check 0 < min stable fraction <= 1
     if "min_stable_level" not in error_columns:
-        invalids = ((df["min_stable_level"] <= 0) |
-                    (df["min_stable_level"] > 1))
-        if invalids.any():
-            bad_projects = df["project"][invalids].values
-            print_bad_projects = ", ".join(bad_projects)
+        validation_errors = validate_min_stable_level(df)
+        for error in validation_errors:
             validation_results.append(
                 (subscenarios.SCENARIO_ID,
                  __name__,
                  "PROJECT_OPERATIONAL_CHARS",
                  "inputs_project_operational_chars",
                  "Invalid min_stable_level",
-                 "Project(s) '{}': expected 0 < min_stable_level <= 1"
-                 .format(print_bad_projects)
+                 error
                  )
             )
 
     # TODO: move into database table (don't hard code)
     # Check that we're not combining incompatible capacity and operational types
-    incompatible_combinations = [
+    invalid_combos = [
         ("new_build_generator", "dispatchable_binary_commit"),
         ("new_build_generator", "dispatchable_continuous_commit"),
         ("new_build_generator", "hydro_curtailable"),
@@ -340,61 +332,52 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
         ("existing_gen_binary_economic_retirement",
          "hydro_noncurtailable"),
     ]
-    for combo in incompatible_combinations:
-        bad_combos = ((df["capacity_type"] == combo[0]) &
-                      (df["operational_type"] == combo[1]))
-        if bad_combos.any():
-            bad_projects = df['project'][bad_combos].values
-            print_bad_projects = ", ".join(bad_projects)
-            validation_results.append(
-                (subscenarios.SCENARIO_ID,
-                 __name__,
-                 "PROJECT_OPERATIONAL_CHARS, PROJECT_PORTFOLIO",
-                 "inputs_project_operational_chars, inputs_project_portfolios",
-                 "Invalid combination of capacity type and operational type",
-                 "Project(s) '{}': '{}' and '{}'"
-                 .format(print_bad_projects, combo[0], combo[1]))
-            )
 
-    # check that capacity type is valid
+    validation_errors = validate_op_cap_combos(df, invalid_combos)
+    for error in validation_errors:
+        validation_results.append(
+            (subscenarios.SCENARIO_ID,
+             __name__,
+             "PROJECT_OPERATIONAL_CHARS, PROJECT_PORTFOLIO",
+             "inputs_project_operational_chars, inputs_project_portfolios",
+             "Invalid combination of capacity type and operational type",
+             error
+             )
+        )
+
+    # Check that capacity type is valid
     # Note: foreign key already ensures this!
     valid_cap_types = c.execute(
         """SELECT capacity_type from mod_capacity_types"""
     ).fetchall()
     valid_cap_types = [v[0] for v in valid_cap_types]
-    invalids = ~df["capacity_type"].isin(valid_cap_types)
-    if invalids.any():
-        bad_projects = df["project"][invalids].values
-        print_bad_projects = ", ".join(bad_projects)
+    validation_errors = validate_cap_types(df, valid_cap_types)
+    for error in validation_errors:
         validation_results.append(
             (subscenarios.SCENARIO_ID,
              __name__,
              "PROJECT_PORTFOLIO",
              "inputs_project_portfolios",
              "Invalid capacity type",
-             "Project(s) '{}': Invalid capacity type"
-             .format(print_bad_projects)
+             error
              )
         )
 
-    # check that operational type is valid
+    # Check that operational type is valid
     # Note: foreign key already ensures this!
     valid_op_types = c.execute(
         """SELECT operational_type from mod_operational_types"""
     ).fetchall()
     valid_op_types = [v[0] for v in valid_op_types]
-    invalids = ~df["operational_type"].isin(valid_op_types)
-    if invalids.any():
-        bad_projects = df["project"][invalids].values
-        print_bad_projects = ", ".join(bad_projects)
+    validation_errors = validate_op_types(df, valid_op_types)
+    for error in validation_errors:
         validation_results.append(
             (subscenarios.SCENARIO_ID,
              __name__,
              "PROJECT_OPERATIONAL_CHARS",
              "inputs_project_operational_chars",
              "Invalid operational type",
-             "Project(s) '{}': Invalid operational type"
-             .format(print_bad_projects)
+             error
              )
         )
 
@@ -402,6 +385,92 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
     write_validation_to_database(validation_results, conn)
 
 
+def validate_min_stable_level(df):
+    """
+    Check 0 < min stable fraction <= 1
+    :param df:
+    :return:
+    """
+    results = []
+
+    invalids = ((df["min_stable_level"] <= 0) |
+                (df["min_stable_level"] > 1))
+    if invalids.any():
+        bad_projects = df["project"][invalids].values
+        print_bad_projects = ", ".join(bad_projects)
+        results.append(
+            "Project(s) '{}': expected 0 < min_stable_level <= 1"
+            .format(print_bad_projects)
+        )
+
+    return results
+
+
+def validate_op_cap_combos(df, invalid_combos):
+    """
+    Check that there's no mixing of incompatible capacity and operational types
+    :param df:
+    :param invalid_combos:
+    :return:
+    """
+    results = []
+    for combo in invalid_combos:
+        bad_combos = ((df["capacity_type"] == combo[0]) &
+                      (df["operational_type"] == combo[1]))
+        if bad_combos.any():
+            bad_projects = df['project'][bad_combos].values
+            print_bad_projects = ", ".join(bad_projects)
+            results.append(
+                "Project(s) '{}': '{}' and '{}'"
+                .format(print_bad_projects, combo[0], combo[1])
+            )
+
+    return results
+
+
+def validate_cap_types(df, valid_cap_types):
+    """
+    Check that the specified capacity types are one of the valid capacity types
+    :param df:
+    :param valid_cap_types:
+    :return:
+    """
+    results = []
+
+    invalids = ~df["capacity_type"].isin(valid_cap_types)
+    if invalids.any():
+        bad_projects = df["project"][invalids].values
+        print_bad_projects = ", ".join(bad_projects)
+        results.append(
+            "Project(s) '{}': Invalid capacity type"
+            .format(print_bad_projects)
+        )
+
+    return results
+
+
+def validate_op_types(df, valid_op_types):
+    """
+    Check that the specified operational types are one of the valid operational
+    types
+    :param df:
+    :param valid_op_types:
+    :return:
+    """
+    results = []
+
+    invalids = ~df["operational_type"].isin(valid_op_types)
+    if invalids.any():
+        bad_projects = df["project"][invalids].values
+        print_bad_projects = ", ".join(bad_projects)
+        results.append(
+            "Project(s) '{}': Invalid operational type"
+            .format(print_bad_projects)
+        )
+
+    return results
+
+
 def write_model_inputs(inputs_directory, subscenarios, subproblem, stage, conn):
     """
     Get inputs from database and write out the model input
diff --git a/gridpath/project/fuels.py b/gridpath/project/fuels.py
index 2e3250393..9ccae7c64 100644
--- a/gridpath/project/fuels.py
+++ b/gridpath/project/fuels.py
@@ -190,6 +190,43 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
         )
 
     # Check that fuels specified for projects exist in fuels table
+    validation_errors = validate_fuel_projects(prj_df, fuels_df)
+    for error in validation_errors:
+        validation_results.append(
+            (subscenarios.SCENARIO_ID,
+             __name__,
+             "PROJECT_OPERATIONAL_CHARS",
+             "inputs_project_operational_chars",
+             "Non existent fuel",
+             error)
+        )
+
+    # Check that fuel prices exist for the period and month
+    validation_errors = validate_fuel_prices(fuels_df, fuel_prices_df,
+                                             periods_months)
+    for error in validation_errors:
+        validation_results.append(
+            (subscenarios.SCENARIO_ID,
+             __name__,
+             "PROJECT_FUEL_PRICES",
+             "inputs_project_fuel_prices",
+             "Missing fuel price",
+             error
+             )
+        )
+
+    # Write all input validation errors to database
+    write_validation_to_database(validation_results, conn)
+
+
+def validate_fuel_projects(prj_df, fuels_df):
+    """
+    Check that fuels specified for projects exist in fuels table
+    :param prj_df:
+    :param fuels_df:
+    :return:
+    """
+    results = []
     fuel_mask = pd.notna(prj_df["fuel"])
     existing_fuel_mask = prj_df["fuel"].isin(fuels_df["fuel"])
     invalids = fuel_mask & ~existing_fuel_mask
@@ -198,35 +235,33 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
         bad_fuels = prj_df["fuel"][invalids].values
         print_bad_projects = ", ".join(bad_projects)
         print_bad_fuels = ", ".join(bad_fuels)
-        validation_results.append(
-            (subscenarios.SCENARIO_ID,
-             __name__,
-             "PROJECT_OPERATIONAL_CHARS",
-             "inputs_project_operational_chars",
-             "Non existent fuel",
-             "Project(s) '{}': Specified fuel(s) '{}' do(es) not exist"
-             .format(print_bad_projects, print_bad_fuels)
-             )
+        results.append(
+            "Project(s) '{}': Specified fuel(s) '{}' do(es) not exist"
+            .format(print_bad_projects, print_bad_fuels)
         )
 
-    # Check that fuel prices exist for the period and month
+    return results
+
+
+def validate_fuel_prices(fuels_df, fuel_prices_df, periods_months):
+    """
+    Check that fuel prices exist for the period and month
+    :param fuels_df:
+    :param fuel_prices_df:
+    :param periods_months:
+    :return:
+    """
+    results = []
     for f in fuels_df["fuel"].values:
         df = fuel_prices_df[fuel_prices_df["fuel"] == f]
         for period, month in periods_months:
             if not ((df.period == period) & (df.month == month)).any():
-                validation_results.append(
-                    (subscenarios.SCENARIO_ID,
-                     __name__,
-                     "PROJECT_FUEL_PRICES",
-                     "inputs_project_fuel_prices",
-                     "Missing fuel price",
-                     "Fuel '{}': Missing price for period '{}', month '{}')"
-                     .format(f, str(period), str(month))
-                     )
+                results.append(
+                    "Fuel '{}': Missing price for period '{}', month '{}'"
+                    .format(f, str(period), str(month))
                 )
 
-    # Write all input validation errors to database
-    write_validation_to_database(validation_results, conn)
+    return results
 
 
 def write_model_inputs(inputs_directory, subscenarios, subproblem, stage, conn):
diff --git a/gridpath/project/operations/__init__.py b/gridpath/project/operations/__init__.py
index 2488370a8..dbd394340 100644
--- a/gridpath/project/operations/__init__.py
+++ b/gridpath/project/operations/__init__.py
@@ -17,7 +17,7 @@
 from pyomo.environ import Set, Param, PositiveReals, PercentFraction, Reals
 
 from gridpath.auxiliary.auxiliary import is_number, check_dtypes, \
-    write_validation_to_database
+    check_column_sign_positive, write_validation_to_database
 
 
 # TODO: should we take this out of __init__.py
@@ -340,11 +340,11 @@ def get_inputs_from_database(subscenarios, subproblem, stage, conn):
     c2 = conn.cursor()
     heat_rates = c2.execute(
         """
-        SELECT project, operational_type, fuel, heat_rate_curves_scenario_id, 
+        SELECT project, fuel, heat_rate_curves_scenario_id, 
         load_point_mw, average_heat_rate_mmbtu_per_mwh
         FROM inputs_project_portfolios
         INNER JOIN
-        (SELECT project, operational_type, fuel, heat_rate_curves_scenario_id
+        (SELECT project, fuel, heat_rate_curves_scenario_id
         FROM inputs_project_operational_chars
         WHERE project_operational_chars_scenario_id = {}) AS op_char
         USING(project)
@@ -385,11 +385,6 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
         columns=[s[0] for s in heat_rates.description]
     )
 
-    # Define masks (list of true/false dependent on conditions checked)
-    hr_curve_mask = pd.notna(hr_df["heat_rate_curves_scenario_id"])
-    fuel_mask = pd.notna(hr_df["fuel"])
-    load_point_mask = pd.notna(hr_df["load_point_mw"])
-
     # Check data types availability:
     expected_dtypes = {
         "project": "string",
@@ -408,25 +403,21 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
              )
         )
 
-    # check 0 < availability <= 1
     if "availability" not in error_columns:
-        invalids = ((av_df["availability"] <= 0) |
-                    (av_df["availability"] > 1))
-        if invalids.any():
-            bad_projects = av_df["project"][invalids].values
-            print_bad_projects = ", ".join(bad_projects)
+        validation_errors = validate_availability(av_df)
+        for error in validation_errors:
             validation_results.append(
                 (subscenarios.SCENARIO_ID,
                  __name__,
                  "PROJECT_AVAILABILITY",
                  "inputs_project_availability",
                  "Invalid availability",
-                 "Project(s) '{}': expected 0 < availability <= 1"
-                 .format(print_bad_projects)
+                 error
                  )
             )
 
     # Check data types heat_rates:
+    hr_curve_mask = pd.notna(hr_df["heat_rate_curves_scenario_id"])
     sub_hr_df = hr_df[hr_curve_mask][
         ["project", "load_point_mw", "average_heat_rate_mmbtu_per_mwh"]
     ]
@@ -447,71 +438,135 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
              )
         )
 
-    # Check valid numeric columns in heat rates are positive
+    # Check valid numeric columns in heat rates are non-negative
     numeric_columns = [k for k, v in expected_dtypes.items() if v == "numeric"]
     valid_numeric_columns = set(numeric_columns) - set(error_columns)
-    for column in valid_numeric_columns:
-        is_negative = (sub_hr_df[column] <= 0)
-        if is_negative.any():
-            bad_projects = sub_hr_df["project"][is_negative].values
-            print_bad_projects = ", ".join(bad_projects)
-            validation_results.append(
-                (subscenarios.SCENARIO_ID,
-                 __name__,
-                 "PROJECT_HEAT_RATE_CURVES",
-                 "inputs_project_heat_rate_curves",
-                 "Invalid numeric sign",
-                 "Project(s) '{}'; Expected '{}' > 0"
-                 .format(print_bad_projects, column)
-                 )
-            )
+    sign_errors = check_column_sign_positive(sub_hr_df,
+                                                   valid_numeric_columns)
+    for error in sign_errors:
+        validation_results.append(
+            (subscenarios.SCENARIO_ID,
+             __name__,
+             "PROJECT_HEAT_RATE_CURVES",
+             "inputs_project_heat_rate_curves",
+             "Invalid numeric sign",
+             error
+             )
+        )
 
     # Check for consistency between fuel and heat rate curve inputs
     # 1. Make sure projects with fuel have a heat rate scenario specified
-    invalids = fuel_mask & ~hr_curve_mask
-    if invalids.any():
-        bad_projects = hr_df["project"][invalids]
-        print_bad_projects = ", ".join(bad_projects)
+    # 2. Make sure projects without fuel have no heat rate scenario specified
+    validation_errors = validate_fuel_vs_heat_rates(hr_df)
+    for error in validation_errors:
         validation_results.append(
             (subscenarios.SCENARIO_ID,
              __name__,
              "PROJECT_OPERATIONAL_CHARS",
              "inputs_project_operational_chars",
-             "Missing heat rate scenario inputs",
-             "Project(s) '{}': Missing heat_rate_curves_scenario_id"
-             .format(print_bad_projects)
+             "Missing/Unnecessary heat rate scenario inputs",
+             error
              )
         )
-    # 2. Make sure projects without fuel have no heat rate scenario specified
+
+    # Check that specified hr scenarios actually have inputs in the hr table
+    # and check that specified heat rate curves inputs are valid:
+    validation_errors = validate_heat_rate_curves(hr_df)
+    for error in validation_errors:
+        validation_results.append(
+            (subscenarios.SCENARIO_ID,
+             __name__,
+             "PROJECT_HEAT_RATE_CURVES",
+             "inputs_project_heat_rate_curves",
+             "Invalid/Missing heat rate curves inputs",
+             error
+             )
+        )
+
+    # Write all input validation errors to database
+    write_validation_to_database(validation_results, conn)
+
+
+def validate_availability(av_df):
+    """
+    Check 0 <= availability <= 1
+    :param av_df:
+    :return:
+    """
+    results = []
+
+    invalids = ((av_df["availability"] < 0) |
+                (av_df["availability"] > 1))
+    if invalids.any():
+        bad_projects = av_df["project"][invalids].values
+        print_bad_projects = ", ".join(bad_projects)
+        results.append(
+            "Project(s) '{}': expected 0 <= availability <= 1"
+            .format(print_bad_projects)
+        )
+
+    return results
+
+
+def validate_fuel_vs_heat_rates(hr_df):
+    """
+    Make sure projects with fuel have a heat rate scenario specified.
+    Conversely, if no fuel is specified, make sure there is no heat rate
+    scenario specified.
+    :param hr_df:
+    :return:
+    """
+    results = []
+
+    hr_curve_mask = pd.notna(hr_df["heat_rate_curves_scenario_id"])
+    fuel_mask = pd.notna(hr_df["fuel"])
+
+    invalids = fuel_mask & ~hr_curve_mask
+    if invalids.any():
+        bad_projects = hr_df["project"][invalids]
+        print_bad_projects = ", ".join(bad_projects)
+        results.append(
+            "Project(s) '{}': Missing heat_rate_curves_scenario_id"
+            .format(print_bad_projects)
+        )
+
     invalids = ~fuel_mask & hr_curve_mask
     if invalids.any():
         bad_projects = pd.unique(hr_df["project"][invalids])
         print_bad_projects = ", ".join(bad_projects)
-        validation_results.append(
-            (subscenarios.SCENARIO_ID,
-             __name__,
-             "PROJECT_OPERATIONAL_CHARS",
-             "inputs_project_operational_chars",
-             "Unnecessary heat rate scenario inputs",
+        results.append(
              "Project(s) '{}': No fuel specified so no heat rate expected"
              .format(print_bad_projects)
-             )
         )
 
-    # Check that specified hr scenarios actually have inputs in the hr table
+    return results
+
+
+def validate_heat_rate_curves(hr_df):
+    """
+    1. Check that specified heat rate scenarios actually have inputs in the heat
+       rate curves table
+    2. Check that specified heat rate curves inputs are valid:
+        - strictly increasing load points
+        - increasing total fuel burn
+        - convex fuel burn curve
+    :param hr_df:
+    :return:
+    """
+    results = []
+
+    fuel_mask = pd.notna(hr_df["fuel"])
+    hr_curve_mask = pd.notna(hr_df["heat_rate_curves_scenario_id"])
+    load_point_mask = pd.notna(hr_df["load_point_mw"])
+
+    # Check for missing inputs in heat rates curves table
     invalids = hr_curve_mask & ~load_point_mask
     if invalids.any():
         bad_projects = hr_df["project"][invalids]
         print_bad_projects = ", ".join(bad_projects)
-        validation_results.append(
-            (subscenarios.SCENARIO_ID,
-             __name__,
-             "PROJECT_HEAT_RATE_CURVES",
-             "inputs_project_heat_rate_curves",
-             "Missing heat rate inputs",
-             "Project(s) '{}': Expected at least one load point"
-             .format(print_bad_projects)
-             )
+        results.append(
+            "Project(s) '{}': Expected at least one load point"
+            .format(print_bad_projects)
         )
 
     # Check that each project has convex heat rates etc.
@@ -531,41 +586,22 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
 
             if np.any(incr_loads == 0):
                 # note: primary key should already prohibit this
-                validation_results.append(
-                    (subscenarios.SCENARIO_ID,
-                     __name__,
-                     "PROJECT_HEAT_RATE_CURVES",
-                     "inputs_project_heat_rate_curves",
-                     "Identical load points",
-                     "Project(s) '{}': load points can not be identical"
-                     .format(project)
-                     )
+                results.append(
+                    "Project(s) '{}': load points can not be identical"
+                    .format(project)
                 )
             if np.any(incr_fuel_burn <= 0):
-                validation_results.append(
-                    (subscenarios.SCENARIO_ID,
-                     __name__,
-                     "PROJECT_HEAT_RATE_CURVES",
-                     "inputs_project_heat_rate_curves",
-                     "Decreasing fuel burn",
-                     "Project(s) '{}': Total fuel burn should increase with increasing load"
-                     .format(project)
-                     )
+                results.append(
+                    "Project(s) '{}': Total fuel burn should increase with increasing load"
+                    .format(project)
                 )
             if np.any(np.diff(slopes) <= 0):
-                validation_results.append(
-                    (subscenarios.SCENARIO_ID,
-                     __name__,
-                     "PROJECT_HEAT_RATE_CURVES",
-                     "inputs_project_heat_rate_curves",
-                     "Non convex fuel burn function",
-                     "Project(s) '{}': Marginal heat rate should increase with increading load"
-                     .format(project)
-                     )
+                results.append(
+                    "Project(s) '{}': Fuel burn should be convex, i.e. marginal heat rate should increase with increading load"
+                    .format(project)
                 )
 
-    # Write all input validation errors to database
-    write_validation_to_database(validation_results, conn)
+    return results
 
 
 def write_model_inputs(inputs_directory, subscenarios, subproblem, stage, conn):
diff --git a/gridpath/project/operations/operational_types/always_on.py b/gridpath/project/operations/operational_types/always_on.py
index c999f50a0..19cdf2cda 100644
--- a/gridpath/project/operations/operational_types/always_on.py
+++ b/gridpath/project/operations/operational_types/always_on.py
@@ -16,7 +16,7 @@
     PercentFraction, Constraint, Expression
 
 from gridpath.auxiliary.auxiliary import generator_subset_init, \
-    write_validation_to_database
+    write_validation_to_database, check_prj_columns
 from gridpath.auxiliary.dynamic_components import headroom_variables, \
     footroom_variables
 
@@ -535,25 +535,21 @@ def validate_module_specific_inputs(subscenarios, subproblem, stage, conn):
 
     # Check that unit size and min stable level are specified
     # (not all operational types require this input)
-    required_columns = [
+    req_columns = [
         "min_stable_level",
         "unit_size_mw"
     ]
-    for column in required_columns:
-        isna = pd.isna(df[column])
-        if isna.any():
-            bad_projects = df["project"][isna]
-            print_bad_projects = ", ".join(bad_projects)
-            validation_results.append(
-                (subscenarios.SCENARIO_ID,
-                 __name__,
-                 "PROJECT_OPERATIONAL_CHARS",
-                 "inputs_project_operational_chars",
-                 "Missing inputs",
-                 "Project(s) '{}'; Always_on should have inputs for '{}'"
-                 .format(print_bad_projects, column)
-                 )
-            )
+    validation_errors = check_prj_columns(df, req_columns, True, "Always_on")
+    for error in validation_errors:
+        validation_results.append(
+            (subscenarios.SCENARIO_ID,
+             __name__,
+             "PROJECT_OPERATIONAL_CHARS",
+             "inputs_project_operational_chars",
+             "Missing inputs",
+             error
+             )
+        )
 
     # Check that there are no unexpected operational inputs
     expected_na_columns = [
@@ -565,21 +561,18 @@ def validate_module_specific_inputs(subscenarios, subproblem, stage, conn):
         "charging_efficiency", "discharging_efficiency",
         "minimum_duration_hours"
     ]
-    for column in expected_na_columns:
-        notna = pd.notna(df[column])
-        if notna.any():
-            bad_projects = df["project"][notna]
-            print_bad_projects = ", ".join(bad_projects)
-            validation_results.append(
-                (subscenarios.SCENARIO_ID,
-                 __name__,
-                 "PROJECT_OPERATIONAL_CHARS",
-                 "inputs_project_operational_chars",
-                 "Unexpected inputs",
-                 "Project(s) '{}'; Always_on should not have inputs for '{}'"
-                 .format(print_bad_projects, column)
-                 )
-            )
+    validation_errors = check_prj_columns(df, expected_na_columns, False,
+                                          "Always_on")
+    for error in validation_errors:
+        validation_results.append(
+            (subscenarios.SCENARIO_ID,
+             __name__,
+             "PROJECT_OPERATIONAL_CHARS",
+             "inputs_project_operational_chars",
+             "Unexpected inputs",
+             error
+             )
+        )
 
     # Write all input validation errors to database
     write_validation_to_database(validation_results, conn)
diff --git a/gridpath/project/operations/operational_types/dispatchable_capacity_commit.py b/gridpath/project/operations/operational_types/dispatchable_capacity_commit.py
index 6bae9d0ab..6e612bfc4 100644
--- a/gridpath/project/operations/operational_types/dispatchable_capacity_commit.py
+++ b/gridpath/project/operations/operational_types/dispatchable_capacity_commit.py
@@ -23,7 +23,7 @@
     NonPositiveReals, PercentFraction, Reals, value, Expression
 
 from gridpath.auxiliary.auxiliary import generator_subset_init, \
-    write_validation_to_database
+    write_validation_to_database, check_prj_columns
 from gridpath.auxiliary.dynamic_components import headroom_variables, \
     footroom_variables
 from gridpath.project.operations.operational_types.common_functions import \
@@ -1200,46 +1200,40 @@ def validate_module_specific_inputs(subscenarios, subproblem, stage, conn):
 
     # Check that unit size and min stable level are specified
     # (not all operational types require this input)
-    required_columns = [
+    req_columns = [
         "min_stable_level",
         "unit_size_mw"
     ]
-    for column in required_columns:
-        isna = pd.isna(df[column])
-        if isna.any():
-            bad_projects = df["project"][isna]
-            print_bad_projects = ", ".join(bad_projects)
-            validation_results.append(
-                (subscenarios.SCENARIO_ID,
-                 __name__,
-                 "PROJECT_OPERATIONAL_CHARS",
-                 "inputs_project_operational_chars",
-                 "Missing inputs",
-                 "Project(s) '{}'; Dispatchable_capacity_commit should have inputs for '{}'"
-                 .format(print_bad_projects, column)
-                 )
-            )
+    validation_errors = check_prj_columns(df, req_columns, True,
+                                          "Dispatchable_capacity_commit")
+    for error in validation_errors:
+        validation_results.append(
+            (subscenarios.SCENARIO_ID,
+             __name__,
+             "PROJECT_OPERATIONAL_CHARS",
+             "inputs_project_operational_chars",
+             "Missing inputs",
+             error
+             )
+        )
 
     # Check that there are no unexpected operational inputs
     expected_na_columns = [
         "charging_efficiency", "discharging_efficiency",
         "minimum_duration_hours"
     ]
-    for column in expected_na_columns:
-        notna = pd.notna(df[column])
-        if notna.any():
-            bad_projects = df["project"][notna]
-            print_bad_projects = ", ".join(bad_projects)
-            validation_results.append(
-                (subscenarios.SCENARIO_ID,
-                 __name__,
-                 "PROJECT_OPERATIONAL_CHARS",
-                 "inputs_project_operational_chars",
-                 "Unexpected inputs",
-                 "Project(s) '{}'; Dispatchable_capacity_commit should not have inputs for '{}'"
-                 .format(print_bad_projects, column)
-                 )
-            )
+    validation_errors = check_prj_columns(df, expected_na_columns, False,
+                                          "Dispatchable_capacity_commit")
+    for error in validation_errors:
+        validation_results.append(
+            (subscenarios.SCENARIO_ID,
+             __name__,
+             "PROJECT_OPERATIONAL_CHARS",
+             "inputs_project_operational_chars",
+             "Unexpected inputs",
+             error
+             )
+        )
 
     # Write all input validation errors to database
     write_validation_to_database(validation_results, conn)
diff --git a/gridpath/project/operations/operational_types/dispatchable_no_commit.py b/gridpath/project/operations/operational_types/dispatchable_no_commit.py
index 1a95917ff..70e85bd48 100644
--- a/gridpath/project/operations/operational_types/dispatchable_no_commit.py
+++ b/gridpath/project/operations/operational_types/dispatchable_no_commit.py
@@ -10,10 +10,11 @@
 from pyomo.environ import Set, Var, Constraint, NonNegativeReals
 
 from gridpath.auxiliary.auxiliary import generator_subset_init,\
-    write_validation_to_database
+    write_validation_to_database, check_prj_columns, check_constant_heat_rate
 from gridpath.auxiliary.dynamic_components import headroom_variables, \
     footroom_variables
 
+
 def add_module_specific_components(m, d):
     """
 
@@ -299,36 +300,30 @@ def validate_module_specific_inputs(subscenarios, subproblem, stage, conn):
         "charging_efficiency", "discharging_efficiency",
         "minimum_duration_hours"
     ]
-    for column in expected_na_columns:
-        notna = pd.notna(df[column])
-        if notna.any():
-            bad_projects = df["project"][notna]
-            print_bad_projects = ", ".join(bad_projects)
-            validation_results.append(
-                (subscenarios.SCENARIO_ID,
-                 __name__,
-                 "PROJECT_OPERATIONAL_CHARS",
-                 "inputs_project_operational_chars",
-                 "Unexpected inputs",
-                 "Project(s) '{}'; Dispatchable_no_commit should not have inputs for '{}'"
-                 .format(print_bad_projects, column)
-                 )
-            )
+    validation_errors = check_prj_columns(df, expected_na_columns, False,
+                                          "Dispatchable_no_commit")
+    for error in validation_errors:
+        validation_results.append(
+            (subscenarios.SCENARIO_ID,
+             __name__,
+             "PROJECT_OPERATIONAL_CHARS",
+             "inputs_project_operational_chars",
+             "Unexpected inputs",
+             error
+             )
+        )
 
     # Check that there is only one load point (constant heat rate)
-    n_load_points = hr_df.groupby(["project"]).size()
-    invalids = (n_load_points > 1)
-    if invalids.any():
-        bad_projects = invalids.index[invalids]
-        print_bad_projects = ", ".join(bad_projects)
+    validation_errors = check_constant_heat_rate(hr_df,
+                                                 "Dispatchable_no_commit")
+    for error in validation_errors:
         validation_results.append(
             (subscenarios.SCENARIO_ID,
              __name__,
              "PROJECT_HEAT_RATE_CURVES",
              "inputs_project_heat_rate_curves",
              "Too many load points",
-             "Project(s) '{}': Dispatchable_no_commit should have only 1 load point"
-             .format(print_bad_projects)
+             error
              )
         )
 
diff --git a/gridpath/project/operations/operational_types/must_run.py b/gridpath/project/operations/operational_types/must_run.py
index 2995abbae..c64586412 100644
--- a/gridpath/project/operations/operational_types/must_run.py
+++ b/gridpath/project/operations/operational_types/must_run.py
@@ -11,7 +11,7 @@
 from pyomo.environ import Constraint, Set
 
 from gridpath.auxiliary.auxiliary import generator_subset_init, \
-    write_validation_to_database
+    write_validation_to_database, check_prj_columns, check_constant_heat_rate
 from gridpath.auxiliary.dynamic_components import headroom_variables, \
     footroom_variables
 
@@ -289,36 +289,29 @@ def validate_module_specific_inputs(subscenarios, subproblem, stage, conn):
         "charging_efficiency", "discharging_efficiency",
         "minimum_duration_hours"
     ]
-    for column in expected_na_columns:
-        notna = pd.notna(df[column])
-        if notna.any():
-            bad_projects = df["project"][notna]
-            print_bad_projects = ", ".join(bad_projects)
-            validation_results.append(
-                (subscenarios.SCENARIO_ID,
-                 __name__,
-                 "PROJECT_OPERATIONAL_CHARS",
-                 "inputs_project_operational_chars",
-                 "Unexpected inputs",
-                 "Project(s) '{}'; Must_run should not have inputs for '{}'"
-                 .format(print_bad_projects, column)
-                 )
-            )
+    validation_errors = check_prj_columns(df, expected_na_columns, False,
+                                          "Must_run")
+    for error in validation_errors:
+        validation_results.append(
+            (subscenarios.SCENARIO_ID,
+             __name__,
+             "PROJECT_OPERATIONAL_CHARS",
+             "inputs_project_operational_chars",
+             "Unexpected inputs",
+             error
+             )
+        )
 
     # Check that there is only one load point (constant heat rate)
-    n_load_points = hr_df.groupby(["project"]).size()
-    invalids = (n_load_points > 1)
-    if invalids.any():
-        bad_projects = invalids.index[invalids]
-        print_bad_projects = ", ".join(bad_projects)
+    validation_errors = check_constant_heat_rate(hr_df, "Must_run")
+    for error in validation_errors:
         validation_results.append(
             (subscenarios.SCENARIO_ID,
              __name__,
              "PROJECT_HEAT_RATE_CURVES",
              "inputs_project_heat_rate_curves",
              "Too many load points",
-             "Project(s) '{}': Must_run should have only 1 load point"
-             .format(print_bad_projects)
+             error
              )
         )
 
diff --git a/tests/auxiliary/test_auxiliary.py b/tests/auxiliary/test_auxiliary.py
index 624e52e9b..eaa5ec71d 100644
--- a/tests/auxiliary/test_auxiliary.py
+++ b/tests/auxiliary/test_auxiliary.py
@@ -3,6 +3,8 @@
 
 from pyomo.environ import AbstractModel
 import unittest
+import pandas as pd
+import numpy as np
 
 import gridpath.auxiliary.auxiliary as auxiliary_module_to_test
 
@@ -85,6 +87,228 @@ def test_is_number(self):
         self.assertEqual(True, auxiliary_module_to_test.is_number(100.5))
         self.assertEqual(False, auxiliary_module_to_test.is_number("string"))
 
+    def test_check_dtypes(self):
+        """
+
+        :return:
+        """
+        df_columns = ["project", "capacity"]
+        test_cases = {
+            # Make sure correct inputs don't throw error
+            1: {"df": pd.DataFrame(
+                    columns=df_columns,
+                    data=[["gas_ct", 10], ["coal_plant", 20]]),
+                "expected_dtypes": {
+                    "project": "string",
+                    "capacity": "numeric"},
+                "result": ([], [])
+                },
+            # Test invalid string column
+            2: {"df": pd.DataFrame(
+                columns=df_columns,
+                data=[["gas_ct", 10], ["coal_plant", "string"]]),
+                "expected_dtypes": {
+                    "project": "string",
+                    "capacity": "numeric"},
+                "result": (
+                    ["Invalid data type for column 'capacity'; expected numeric"],
+                    ["capacity"]
+                )},
+            # Test invalid numeric column
+            3: {"df": pd.DataFrame(
+                columns=df_columns,
+                data=[[1, 10], [1, 20]]),
+                "expected_dtypes": {
+                    "project": "string",
+                    "capacity": "numeric"},
+                "result": (
+                    ["Invalid data type for column 'project'; expected string"],
+                    ["project"]
+                )},
+            # If at least one string in the column, pandas will convert
+            # all column data to string so there will be no error
+            4: {"df": pd.DataFrame(
+                columns=df_columns,
+                data=[["gas_ct", 10], [1, 20]]),
+                "expected_dtypes": {
+                    "project": "string",
+                    "capacity": "numeric"},
+                "result": ([], [])
+                },
+            # Columns with all None are ignored
+            5: {"df": pd.DataFrame(
+                columns=df_columns,
+                data=[[None, 10], [None, 20]]),
+                "expected_dtypes": {
+                    "project": "string",
+                    "capacity": "numeric"},
+                "result": ([], [])
+                },
+            # Columns with all NaN are ignored
+            6: {"df": pd.DataFrame(
+                columns=df_columns,
+                data=[[np.nan, 10], [np.nan, 20]]),
+                "expected_dtypes": {
+                    "project": "string",
+                    "capacity": "numeric"},
+                "result": ([], [])
+                },
+            # Columns with some None are not ignored
+            7: {"df": pd.DataFrame(
+                columns=df_columns,
+                data=[[10, 10], [None, 20]]),
+                "expected_dtypes": {
+                    "project": "string",
+                    "capacity": "numeric"},
+                "result": (
+                    ["Invalid data type for column 'project'; expected string"],
+                    ["project"]
+                )},
+            # Test multiple error columns
+            8: {"df": pd.DataFrame(
+                columns=df_columns,
+                data=[[10, "string"], [10, "string"]]),
+                "expected_dtypes": {
+                    "project": "string",
+                    "capacity": "numeric"},
+                "result": (
+                    ["Invalid data type for column 'project'; expected string",
+                     "Invalid data type for column 'capacity'; expected numeric"],
+                    ["project", "capacity"]
+                )}
+        }
+
+        for test_case in test_cases.keys():
+            expected_tuple = test_cases[test_case]["result"]
+            actual_tuple = auxiliary_module_to_test.check_dtypes(
+                df=test_cases[test_case]["df"],
+                expected_dtypes=test_cases[test_case]["expected_dtypes"]
+            )
+            self.assertTupleEqual(expected_tuple, actual_tuple)
+
+    def test_check_column_sign_positive(self):
+        """
+
+        :return:
+        """
+        df_columns = ["project", "load_point_mw",
+                      "average_heat_rate_mmbtu_per_mwh"]
+        test_cases = {
+            # Make sure correct inputs don't throw error
+            1: {"df": pd.DataFrame(
+                columns=df_columns,
+                data=[["gas_ct", 10, 10.5],
+                      ["gas_ct", 20, 9],
+                      ["coal_plant", 100, 10]
+                      ]),
+                "columns": ["load_point_mw", "average_heat_rate_mmbtu_per_mwh"],
+                "result": []
+                },
+            # Sign errors are flagged; Errors are grouped by column. If >1 error
+            # in different columns, a separate error msgs will be created
+            2: {"df": pd.DataFrame(
+                    columns=df_columns,
+                    data=[["gas_ct", 10, -10.5],
+                          ["gas_ct", -20, 9],
+                          ["coal_plant", -100, 10]
+                          ]),
+                "columns": ["load_point_mw", "average_heat_rate_mmbtu_per_mwh"],
+                "result": ["Project(s) 'gas_ct, coal_plant': Expected 'load_point_mw' >= 0",
+                           "Project(s) 'gas_ct': Expected 'average_heat_rate_mmbtu_per_mwh' >= 0"]
+                }
+        }
+
+        for test_case in test_cases.keys():
+            expected_list = test_cases[test_case]["result"]
+            actual_list = auxiliary_module_to_test.check_column_sign_positive(
+                df=test_cases[test_case]["df"],
+                columns=test_cases[test_case]["columns"]
+            )
+            self.assertListEqual(expected_list, actual_list)
+
+    def test_check_prj_columns(self):
+        """
+
+        :return:
+        """
+
+        df_columns = ["project", "min_stable_level", "unit_size_mw",
+                      "startup_cost_per_mw", "shutdown_cost_per_mw"]
+        test_cases = {
+            # Make sure correct inputs don't throw error
+            1: {"df": pd.DataFrame(
+                    columns=df_columns,
+                    data=[["nuclear", 0.5, 100, None, None]]),
+                "columns": ["min_stable_level", "unit_size_mw"],
+                "required": True,
+                "category": "Always_on",
+                "result": []
+                },
+            # Make sure missing required inputs are flagged
+            2: {"df": pd.DataFrame(
+                    columns=df_columns,
+                    data=[["nuclear", None, 100, None, None]]),
+                "columns": ["min_stable_level", "unit_size_mw"],
+                "required": True,
+                "category": "Always_on",
+                "result": ["Project(s) 'nuclear'; Always_on should have inputs for 'min_stable_level'"]
+                },
+            # Make sure incompatible inputs are flagged
+            3: {"df": pd.DataFrame(
+                    columns=df_columns,
+                    data=[["nuclear", 0.5, 100, 1000, None]]),
+                "columns": ["startup_cost_per_mw", "shutdown_cost_per_mw"],
+                "required": False,
+                "category": "Always_on",
+                "result": ["Project(s) 'nuclear'; Always_on should not have inputs for 'startup_cost_per_mw'"]
+                }
+        }
+
+        for test_case in test_cases.keys():
+            expected_list = test_cases[test_case]["result"]
+            actual_list = auxiliary_module_to_test.check_prj_columns(
+                df=test_cases[test_case]["df"],
+                columns=test_cases[test_case]["columns"],
+                required=test_cases[test_case]["required"],
+                category=test_cases[test_case]["category"]
+            )
+            self.assertListEqual(expected_list, actual_list)
+
+    def test_check_constant_heat_rate(self):
+        """
+
+        :return:
+        """
+
+        df_columns = ["project", "load_point_mw"]
+        test_cases = {
+            # Make sure correct inputs don't throw error
+            1: {"df": pd.DataFrame(
+                    columns=df_columns,
+                    data=[["nuclear", 100]]),
+                "op_type": "Always_on",
+                "result": []
+                },
+            # Make sure varying heat rates (>1 load point) is flagged
+            2: {"df": pd.DataFrame(
+                    columns=df_columns,
+                    data=[["nuclear", 100],
+                          ["nuclear", 200],
+                          ["gas_ct", 10]
+                          ]),
+                "op_type": "Always_on",
+                "result": ["Project(s) 'nuclear': Always_on should have only 1 load point"]
+                }
+        }
+
+        for test_case in test_cases.keys():
+            expected_list = test_cases[test_case]["result"]
+            actual_list = auxiliary_module_to_test.check_constant_heat_rate(
+                df=test_cases[test_case]["df"],
+                op_type=test_cases[test_case]["op_type"]
+            )
+            self.assertListEqual(expected_list, actual_list)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/project/operations/test_init.py b/tests/project/operations/test_init.py
index 0b4daa382..9c89ce151 100644
--- a/tests/project/operations/test_init.py
+++ b/tests/project/operations/test_init.py
@@ -10,13 +10,12 @@
 import sys
 import unittest
 import numpy as np
+import pandas as pd
 
 from tests.common_functions import create_abstract_model, \
     add_components_and_load_data
 from tests.project.operations.common_functions import \
     get_project_operational_timepoints
-from gridpath.project.operations.__init__ import \
-    calculate_heat_rate_slope_intercept
 
 
 TEST_DATA_DIRECTORY = \
@@ -486,7 +485,7 @@ def test_calculate_heat_rate_slope_intercept(self):
             expected_slopes = test_cases[test_case]["slopes"]
             expected_intercepts = test_cases[test_case]["intercepts"]
             actual_slopes, actual_intercepts = \
-                calculate_heat_rate_slope_intercept(
+                MODULE_BEING_TESTED.calculate_heat_rate_slope_intercept(
                     project=test_cases[test_case]["project"],
                     load_points=test_cases[test_case]["load_points"],
                     heat_rates=test_cases[test_case]["heat_rates"]
@@ -495,6 +494,112 @@ def test_calculate_heat_rate_slope_intercept(self):
             self.assertDictEqual(expected_slopes, actual_slopes)
             self.assertDictEqual(expected_intercepts, actual_intercepts)
 
+    def test_availability_validations(self):
+        av_df_columns = ["project", "horizon", "availability"]
+        test_cases = {
+            # Make sure correct inputs don't throw error
+            1: {"av_df": pd.DataFrame(
+                columns=av_df_columns,
+                data=[["gas_ct", 201801, 1],
+                      ["gas_ct", 201802, 0.9],
+                      ["coal_plant", 201801, 0]
+                      ]),
+                "error": []
+                },
+            # Negative availabilities are flagged
+            2: {"av_df": pd.DataFrame(
+                columns=av_df_columns,
+                data=[["gas_ct", 201801, -1],
+                      ["gas_ct", 201802, 0.9],
+                      ["coal_plant", 201801, 0]
+                      ]),
+                "error": ["Project(s) 'gas_ct': expected 0 <= availability <= 1"]
+                },
+            # Availabilities > 1 are flagged
+            3: {"av_df": pd.DataFrame(
+                columns=av_df_columns,
+                data=[["gas_ct", 201801, 1],
+                      ["gas_ct", 201802, 0.9],
+                      ["coal_plant", 201801, -0.5]
+                      ]),
+                "error": ["Project(s) 'coal_plant': expected 0 <= availability <= 1"]
+                },
+            # Make sure multiple errors are flagged correctly
+            4: {"av_df": pd.DataFrame(
+                columns=av_df_columns,
+                data=[["gas_ct", 201801, 1.5],
+                      ["gas_ct", 201802, 0.9],
+                      ["coal_plant", 201801, -0.5]
+                      ]),
+                "error": ["Project(s) 'gas_ct, coal_plant': expected 0 <= availability <= 1"]
+                },
+        }
+
+        for test_case in test_cases.keys():
+            expected_list = test_cases[test_case]["error"]
+            actual_list = MODULE_BEING_TESTED.validate_availability(
+                av_df=test_cases[test_case]["av_df"],
+            )
+            self.assertListEqual(expected_list, actual_list)
+
+    def test_heat_rate_validations(self):
+        hr_columns = ["project", "fuel", "heat_rate_curves_scenario_id",
+                      "load_point_mw", "average_heat_rate_mmbtu_per_mwh"]
+        test_cases = {
+            # Make sure correct inputs don't throw error
+            1: {"hr_df": pd.DataFrame(
+                    columns=hr_columns,
+                    data=[["gas_ct", "gas", 1, 10, 10.5],
+                          ["gas_ct", "gas", 1, 20, 9],
+                          ["coal_plant", "coal", 1, 100, 10]
+                          ]),
+                "fuel_vs_hr_error": [],
+                "hr_curves_error": []
+                },
+            # Check fuel vs heat rate curve errors
+            3: {"hr_df": pd.DataFrame(
+                columns=hr_columns,
+                data=[["gas_ct", "gas", None, None, None],
+                      ["coal_plant", None, 1, 100, 10]
+                      ]),
+                "fuel_vs_hr_error": ["Project(s) 'gas_ct': Missing heat_rate_curves_scenario_id",
+                                     "Project(s) 'coal_plant': No fuel specified so no heat rate expected"],
+                "hr_curves_error": []
+                },
+            # Check heat rate curves validations
+            4: {"hr_df": pd.DataFrame(
+                columns=hr_columns,
+                data=[["gas_ct1", "gas", 1, None, None],
+                      ["gas_ct2", "gas", 1, 10, 11],
+                      ["gas_ct2", "gas", 1, 10, 12],
+                      ["gas_ct3", "gas", 1, 10, 11],
+                      ["gas_ct3", "gas", 1, 20, 5],
+                      ["gas_ct4", "gas", 1, 10, 11],
+                      ["gas_ct4", "gas", 1, 20, 10],
+                      ["gas_ct4", "gas", 1, 30, 9]
+                      ]),
+                "fuel_vs_hr_error": [],
+                "hr_curves_error": ["Project(s) 'gas_ct1': Expected at least one load point",
+                                    "Project(s) 'gas_ct2': load points can not be identical",
+                                    "Project(s) 'gas_ct3': Total fuel burn should increase with increasing load",
+                                    "Project(s) 'gas_ct4': Fuel burn should be convex, i.e. marginal heat rate should increase with increading load"]
+                },
+
+        }
+
+        for test_case in test_cases.keys():
+            expected_list = test_cases[test_case]["fuel_vs_hr_error"]
+            actual_list = MODULE_BEING_TESTED.validate_fuel_vs_heat_rates(
+                hr_df=test_cases[test_case]["hr_df"]
+            )
+            self.assertListEqual(expected_list, actual_list)
+
+            expected_list = test_cases[test_case]["hr_curves_error"]
+            actual_list = MODULE_BEING_TESTED.validate_heat_rate_curves(
+                hr_df=test_cases[test_case]["hr_df"]
+            )
+            self.assertListEqual(expected_list, actual_list)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/project/test_fuels.py b/tests/project/test_fuels.py
index 17dbc8bd4..66b575ad4 100644
--- a/tests/project/test_fuels.py
+++ b/tests/project/test_fuels.py
@@ -14,6 +14,7 @@
 from tests.common_functions import create_abstract_model, \
     add_components_and_load_data
 
+
 TEST_DATA_DIRECTORY = \
     os.path.join(os.path.dirname(__file__), "..", "test_data")
 
@@ -133,6 +134,98 @@ def test_initialized_components(self):
         )
         self.assertDictEqual(expected_price, actual_price)
 
+    def test_fuel_validations(self):
+        prj_df_columns = ["project", "fuel"]
+        fuels_df_columns = ["fuel", "co2_intensity_tons_per_mmbtu"]
+        fuel_prices_df_columns = ["fuel", "period", "month",
+                                  "fuel_price_per_mmbtu"]
+        test_cases = {
+            # Make sure correct inputs don't throw error
+            1: {"prj_df": pd.DataFrame(
+                    columns=prj_df_columns,
+                    data=[["gas_ct", "gas"], ["coal_plant", "coal"]]),
+                "fuels_df": pd.DataFrame(
+                    columns=fuels_df_columns,
+                    data=[["gas", 0.4], ["coal", 0.8]]),
+                "fuel_prices_df": pd.DataFrame(
+                    columns=fuel_prices_df_columns,
+                    data=[["gas", 2018, 1, 3], ["gas", 2018, 2, 4],
+                          ["coal", 2018, 1, 2], ["coal", 2018, 2, 2]]),
+                "periods_months": [(2018, 1), (2018, 2)],
+                "fuel_project_error": [],
+                "fuel_prices_error": []
+                },
+            # If a project's fuel in prj_df does not exist in the fuels_df,
+            # there should be an error. Similarly, if a fuel price is missing
+            # for a certain month/period, there should be an error.
+            2: {"prj_df": pd.DataFrame(
+                    columns=prj_df_columns,
+                    data=[["gas_ct", "invalid_fuel"], ["coal_plant", "coal"]]),
+                "fuels_df": pd.DataFrame(
+                    columns=fuels_df_columns,
+                    data=[["gas", 0.4], ["coal", 0.8]]),
+                "fuel_prices_df": pd.DataFrame(
+                    columns=fuel_prices_df_columns,
+                    data=[["gas", 2018, 1, 3],
+                          ["coal", 2018, 1, 2], ["coal", 2018, 2, 2]]),
+                "periods_months": [(2018, 1), (2018, 2)],
+                "fuel_project_error": [
+                    "Project(s) 'gas_ct': Specified fuel(s) 'invalid_fuel' do(es) not exist"],
+                "fuel_prices_error": [
+                    "Fuel 'gas': Missing price for period '2018', month '2'"]
+                },
+            # It's okay if there are more fuels and fuels prices specified than
+            # needed for the active projects
+            3: {"prj_df": pd.DataFrame(
+                    columns=prj_df_columns,
+                    data=[["gas_ct", "gas"]]),
+                "fuels_df": pd.DataFrame(
+                    columns=fuels_df_columns,
+                    data=[["gas", 0.4], ["coal", 0.8]]),
+                "fuel_prices_df": pd.DataFrame(
+                    columns=fuel_prices_df_columns,
+                    data=[["gas", 2018, 1, 3], ["gas", 2018, 2, 4],
+                          ["coal", 2018, 1, 2], ["coal", 2018, 2, 2]]),
+                "periods_months": [(2018, 1), (2018, 2)],
+                "fuel_project_error": [],
+                "fuel_prices_error": []
+                },
+            # Test for multiple errors in a column
+            4: {"prj_df": pd.DataFrame(
+                columns=prj_df_columns,
+                data=[["gas_ct", "invalid_fuel1"], ["coal_plant", "invalid_fuel2"]]),
+                "fuels_df": pd.DataFrame(
+                    columns=fuels_df_columns,
+                    data=[["gas", 0.4], ["coal", 0.8]]),
+                "fuel_prices_df": pd.DataFrame(
+                    columns=fuel_prices_df_columns,
+                    data=[["gas", 2018, 1, 3],
+                          ["coal", 2018, 1, 2]]),
+                "periods_months": [(2018, 1), (2018, 2)],
+                "fuel_project_error":
+                    ["Project(s) 'gas_ct, coal_plant': Specified fuel(s) 'invalid_fuel1, invalid_fuel2' do(es) not exist"],
+                "fuel_prices_error":
+                    ["Fuel 'gas': Missing price for period '2018', month '2'",
+                     "Fuel 'coal': Missing price for period '2018', month '2'"]
+                }
+        }
+
+        for test_case in test_cases.keys():
+            expected_list = test_cases[test_case]["fuel_project_error"]
+            actual_list = MODULE_BEING_TESTED.validate_fuel_projects(
+                prj_df=test_cases[test_case]["prj_df"],
+                fuels_df=test_cases[test_case]["fuels_df"]
+            )
+            self.assertListEqual(expected_list, actual_list)
+
+            expected_list = test_cases[test_case]["fuel_prices_error"]
+            actual_list = MODULE_BEING_TESTED.validate_fuel_prices(
+                fuels_df=test_cases[test_case]["fuels_df"],
+                fuel_prices_df=test_cases[test_case]["fuel_prices_df"],
+                periods_months=test_cases[test_case]["periods_months"]
+            )
+            self.assertListEqual(expected_list, actual_list)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/project/test_init.py b/tests/project/test_init.py
index 229e33fb3..68322a4b1 100644
--- a/tests/project/test_init.py
+++ b/tests/project/test_init.py
@@ -227,6 +227,70 @@ def test_initialized_components(self):
         )
         self.assertDictEqual(expected_var_om_cost, actual_var_om_cost)
 
+    def test_project_validations(self):
+        cols = ["project", "capacity_type", "operational_type",
+                "min_stable_level"]
+        test_cases = {
+            # Make sure correct inputs don't throw error
+            1: {"df": pd.DataFrame(
+                    columns=cols,
+                    data=[["gas_ct", "new_build_generator",
+                           "dispatchable_capacity_commit", 0.5]
+                          ]),
+                "invalid_combos": [("invalid1", "invalid2")],
+                "valid_cap_types": ["new_build_generator"],
+                "valid_op_types": ["dispatchable_capacity_commit"],
+                "min_stable_level_error": [],
+                "combo_error": [],
+                "cap_type_error": [],
+                "op_type_error": []
+                },
+            # Make sure invalid min_stable_level, invalid combo, and invalid
+            # cap/op types are properly flagged
+            2: {"df": pd.DataFrame(
+                columns=cols,
+                data=[["gas_ct1", "cap1", "op2", 1.5],
+                      ["gas_ct2", "cap3", "op3", 0]
+                      ]),
+                "invalid_combos": [("cap1", "op2")],
+                "valid_cap_types": ["cap1", "cap2"],
+                "valid_op_types": ["op1", "op2"],
+                "min_stable_level_error": ["Project(s) 'gas_ct1, gas_ct2': expected 0 < min_stable_level <= 1"],
+                "combo_error": ["Project(s) 'gas_ct1': 'cap1' and 'op2'"],
+                "cap_type_error": ["Project(s) 'gas_ct2': Invalid capacity type"],
+                "op_type_error": ["Project(s) 'gas_ct2': Invalid operational type"]
+                }
+        }
+
+        for test_case in test_cases.keys():
+            expected_list = test_cases[test_case]["min_stable_level_error"]
+            actual_list = MODULE_BEING_TESTED.validate_min_stable_level(
+                df=test_cases[test_case]["df"]
+            )
+            self.assertListEqual(expected_list, actual_list)
+
+            expected_list = test_cases[test_case]["combo_error"]
+            actual_list = MODULE_BEING_TESTED.validate_op_cap_combos(
+                df=test_cases[test_case]["df"],
+                invalid_combos=test_cases[test_case]["invalid_combos"]
+            )
+            self.assertListEqual(expected_list, actual_list)
+
+            expected_list = test_cases[test_case]["cap_type_error"]
+            actual_list = MODULE_BEING_TESTED.validate_cap_types(
+                df=test_cases[test_case]["df"],
+                valid_cap_types=test_cases[test_case]["valid_cap_types"]
+
+            )
+            self.assertListEqual(expected_list, actual_list)
+
+            expected_list = test_cases[test_case]["op_type_error"]
+            actual_list = MODULE_BEING_TESTED.validate_op_types(
+                df=test_cases[test_case]["df"],
+                valid_op_types=test_cases[test_case]["valid_op_types"]
+            )
+            self.assertListEqual(expected_list, actual_list)
+
 
 if __name__ == "__main__":
     unittest.main()

From a5e0e5782e34595a6f6bd1faa38844ad23404617 Mon Sep 17 00:00:00 2001
From: Gerrit De Moor <gerrit.dm@gmail.com>
Date: Thu, 15 Aug 2019 11:22:55 -0400
Subject: [PATCH 2/2] Fix zero division error and typo

---
 gridpath/project/operations/__init__.py | 29 ++++++++++++++-----------
 tests/project/operations/test_init.py   |  4 ++--
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/gridpath/project/operations/__init__.py b/gridpath/project/operations/__init__.py
index dbd394340..1145d9b36 100644
--- a/gridpath/project/operations/__init__.py
+++ b/gridpath/project/operations/__init__.py
@@ -579,10 +579,7 @@ def validate_heat_rate_curves(hr_df):
         heat_rates = hr_slice["average_heat_rate_mmbtu_per_mwh"].values
 
         if len(load_points) > 1:
-            fuel_burn = load_points * heat_rates
             incr_loads = np.diff(load_points)
-            incr_fuel_burn = np.diff(fuel_burn)
-            slopes = incr_fuel_burn / incr_loads
 
             if np.any(incr_loads == 0):
                 # note: primary key should already prohibit this
@@ -590,16 +587,22 @@ def validate_heat_rate_curves(hr_df):
                     "Project(s) '{}': load points can not be identical"
                     .format(project)
                 )
-            if np.any(incr_fuel_burn <= 0):
-                results.append(
-                    "Project(s) '{}': Total fuel burn should increase with increasing load"
-                    .format(project)
-                )
-            if np.any(np.diff(slopes) <= 0):
-                results.append(
-                    "Project(s) '{}': Fuel burn should be convex, i.e. marginal heat rate should increase with increading load"
-                    .format(project)
-                )
+
+            else:
+                fuel_burn = load_points * heat_rates
+                incr_fuel_burn = np.diff(fuel_burn)
+                slopes = incr_fuel_burn / incr_loads
+
+                if np.any(incr_fuel_burn <= 0):
+                    results.append(
+                        "Project(s) '{}': Total fuel burn should increase with increasing load"
+                        .format(project)
+                    )
+                if np.any(np.diff(slopes) <= 0):
+                    results.append(
+                        "Project(s) '{}': Fuel burn should be convex, i.e. marginal heat rate should increase with increading load"
+                        .format(project)
+                    )
 
     return results
 
diff --git a/tests/project/operations/test_init.py b/tests/project/operations/test_init.py
index 9c89ce151..858266dc8 100644
--- a/tests/project/operations/test_init.py
+++ b/tests/project/operations/test_init.py
@@ -557,7 +557,7 @@ def test_heat_rate_validations(self):
                 "hr_curves_error": []
                 },
             # Check fuel vs heat rate curve errors
-            3: {"hr_df": pd.DataFrame(
+            2: {"hr_df": pd.DataFrame(
                 columns=hr_columns,
                 data=[["gas_ct", "gas", None, None, None],
                       ["coal_plant", None, 1, 100, 10]
@@ -567,7 +567,7 @@ def test_heat_rate_validations(self):
                 "hr_curves_error": []
                 },
             # Check heat rate curves validations
-            4: {"hr_df": pd.DataFrame(
+            3: {"hr_df": pd.DataFrame(
                 columns=hr_columns,
                 data=[["gas_ct1", "gas", 1, None, None],
                       ["gas_ct2", "gas", 1, 10, 11],