Skip to content

Commit

Permalink
Add unittests for input validations
Browse files Browse the repository at this point in the history
 - fuels
 - auxiliary/check_dtypes
  • Loading branch information
gerritdm committed Aug 7, 2019
1 parent 97d2f57 commit b70e787
Show file tree
Hide file tree
Showing 4 changed files with 246 additions and 23 deletions.
2 changes: 0 additions & 2 deletions gridpath/auxiliary/auxiliary.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,6 @@ def check_dtypes(df, expected_dtypes):
:return: List of error messages for each column with invalid datatypes.
Error message specifies the column and the expected data type.
List of columns with erroneous data types.
TODO: add example
"""

result = []
Expand Down
77 changes: 56 additions & 21 deletions gridpath/project/fuels.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,43 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
)

# Check that fuels specified for projects exist in fuels table
validation_errors = validate_fuel_projects(prj_df, fuels_df)
for error in validation_errors:
validation_results.append(
(subscenarios.SCENARIO_ID,
__name__,
"PROJECT_OPERATIONAL_CHARS",
"inputs_project_operational_chars",
"Non existent fuel",
error)
)

# Check that fuel prices exist for the period and month
validation_errors = validate_fuel_prices(fuels_df, fuel_prices_df,
periods_months)
for error in validation_errors:
validation_results.append(
(subscenarios.SCENARIO_ID,
__name__,
"PROJECT_FUEL_PRICES",
"inputs_project_fuel_prices",
"Missing fuel price",
error
)
)

# Write all input validation errors to database
write_validation_to_database(validation_results, conn)


def validate_fuel_projects(prj_df, fuels_df):
"""
Check that fuels specified for projects exist in fuels table
:param prj_df:
:param fuels_df:
:return:
"""
results = []
fuel_mask = pd.notna(prj_df["fuel"])
existing_fuel_mask = prj_df["fuel"].isin(fuels_df["fuel"])
invalids = fuel_mask & ~existing_fuel_mask
Expand All @@ -198,35 +235,33 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
bad_fuels = prj_df["fuel"][invalids].values
print_bad_projects = ", ".join(bad_projects)
print_bad_fuels = ", ".join(bad_fuels)
validation_results.append(
(subscenarios.SCENARIO_ID,
__name__,
"PROJECT_OPERATIONAL_CHARS",
"inputs_project_operational_chars",
"Non existent fuel",
"Project(s) '{}': Specified fuel(s) '{}' do(es) not exist"
.format(print_bad_projects, print_bad_fuels)
)
results.append(
"Project(s) '{}': Specified fuel(s) '{}' do(es) not exist"
.format(print_bad_projects, print_bad_fuels)
)

# Check that fuel prices exist for the period and month
return results


def validate_fuel_prices(fuels_df, fuel_prices_df, periods_months):
"""
Check that fuel prices exist for the period and month
:param fuels_df:
:param fuel_prices_df:
:param periods_months:
:return:
"""
results = []
for f in fuels_df["fuel"].values:
df = fuel_prices_df[fuel_prices_df["fuel"] == f]
for period, month in periods_months:
if not ((df.period == period) & (df.month == month)).any():
validation_results.append(
(subscenarios.SCENARIO_ID,
__name__,
"PROJECT_FUEL_PRICES",
"inputs_project_fuel_prices",
"Missing fuel price",
"Fuel '{}': Missing price for period '{}', month '{}')"
.format(f, str(period), str(month))
)
results.append(
"Fuel '{}': Missing price for period '{}', month '{}'"
.format(f, str(period), str(month))
)

# Write all input validation errors to database
write_validation_to_database(validation_results, conn)
return results


def write_model_inputs(inputs_directory, subscenarios, subproblem, stage, conn):
Expand Down
100 changes: 100 additions & 0 deletions tests/auxiliary/test_auxiliary.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

from pyomo.environ import AbstractModel
import unittest
import pandas as pd
import numpy as np

import gridpath.auxiliary.auxiliary as auxiliary_module_to_test

Expand Down Expand Up @@ -85,6 +87,104 @@ def test_is_number(self):
self.assertEqual(True, auxiliary_module_to_test.is_number(100.5))
self.assertEqual(False, auxiliary_module_to_test.is_number("string"))

def test_check_dtypes(self):
"""
:return:
"""
test_cases = {
# Make sure correct inputs don't throw error
1: {"df": pd.DataFrame(
columns=["project", "capacity"],
data=[["gas_ct", 10], ["coal_plant", 20]]),
"expected_dtypes": {
"project": "string",
"capacity": "numeric"},
"result": ([], [])
},
# Test invalid string column
2: {"df": pd.DataFrame(
columns=["project", "capacity"],
data=[["gas_ct", 10], ["coal_plant", "string"]]),
"expected_dtypes": {
"project": "string",
"capacity": "numeric"},
"result": (
["Invalid data type for column 'capacity'; expected numeric"],
["capacity"]
)},
# Test invalid numeric column
3: {"df": pd.DataFrame(
columns=["project", "capacity"],
data=[[1, 10], [1, 20]]),
"expected_dtypes": {
"project": "string",
"capacity": "numeric"},
"result": (
["Invalid data type for column 'project'; expected string"],
["project"]
)},
# If at least one string in the column, pandas will convert
# all column data to string so there will be no error
4: {"df": pd.DataFrame(
columns=["project", "capacity"],
data=[["gas_ct", 10], [1, 20]]),
"expected_dtypes": {
"project": "string",
"capacity": "numeric"},
"result": ([], [])
},
# Columns with all None are ignored
5: {"df": pd.DataFrame(
columns=["project", "capacity"],
data=[[None, 10], [None, 20]]),
"expected_dtypes": {
"project": "string",
"capacity": "numeric"},
"result": ([], [])
},
# Columns with all NaN are ignored
6: {"df": pd.DataFrame(
columns=["project", "capacity"],
data=[[np.nan, 10], [np.nan, 20]]),
"expected_dtypes": {
"project": "string",
"capacity": "numeric"},
"result": ([], [])
},
# Columns with some None are not ignored
7: {"df": pd.DataFrame(
columns=["project", "capacity"],
data=[[10, 10], [None, 20]]),
"expected_dtypes": {
"project": "string",
"capacity": "numeric"},
"result": (
["Invalid data type for column 'project'; expected string"],
["project"]
)},
# Test multiple error columns
8: {"df": pd.DataFrame(
columns=["project", "capacity"],
data=[[10, "string"], [10, "string"]]),
"expected_dtypes": {
"project": "string",
"capacity": "numeric"},
"result": (
["Invalid data type for column 'project'; expected string",
"Invalid data type for column 'capacity'; expected numeric"],
["project", "capacity"]
)}
}

for test_case in test_cases.keys():
expected_tuple = test_cases[test_case]["result"]
actual_tuple = auxiliary_module_to_test.check_dtypes(
df=test_cases[test_case]["df"],
expected_dtypes=test_cases[test_case]["expected_dtypes"]
)
self.assertTupleEqual(expected_tuple, actual_tuple)


if __name__ == "__main__":
unittest.main()
90 changes: 90 additions & 0 deletions tests/project/test_fuels.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

from tests.common_functions import create_abstract_model, \
add_components_and_load_data
from gridpath.project.fuels import \
validate_fuel_projects, validate_fuel_prices

TEST_DATA_DIRECTORY = \
os.path.join(os.path.dirname(__file__), "..", "test_data")
Expand Down Expand Up @@ -133,6 +135,94 @@ def test_initialized_components(self):
)
self.assertDictEqual(expected_price, actual_price)

def test_fuel_validations(self):
test_cases = {
# Make sure correct inputs don't throw error
1: {"prj_df": pd.DataFrame(
columns=["project", "fuel"],
data=[["gas_ct", "gas"], ["coal_plant", "coal"]]),
"fuels_df": pd.DataFrame(
columns=["fuel", "co2_intensity_tons_per_mmbtu"],
data=[["gas", 0.4], ["coal", 0.8]]),
"fuel_prices_df": pd.DataFrame(
columns=["fuel", "period", "month", "fuel_price_per_mmbtu"],
data=[["gas", 2018, 1, 3], ["gas", 2018, 2, 4],
["coal", 2018, 1, 2], ["coal", 2018, 2, 2]]),
"periods_months": [(2018, 1), (2018, 2)],
"fuel_project_error": [],
"fuel_prices_error": []
},
# If a project's fuel in prj_df does not exist in the fuels_df,
# there should be an error. Similarly, if a fuel price is missing
# for a certain month/period, there should be an error.
2: {"prj_df": pd.DataFrame(
columns=["project", "fuel"],
data=[["gas_ct", "invalid_fuel"], ["coal_plant", "coal"]]),
"fuels_df": pd.DataFrame(
columns=["fuel", "co2_intensity_tons_per_mmbtu"],
data=[["gas", 0.4], ["coal", 0.8]]),
"fuel_prices_df": pd.DataFrame(
columns=["fuel", "period", "month", "fuel_price_per_mmbtu"],
data=[["gas", 2018, 1, 3],
["coal", 2018, 1, 2], ["coal", 2018, 2, 2]]),
"periods_months": [(2018, 1), (2018, 2)],
"fuel_project_error": [
"Project(s) 'gas_ct': Specified fuel(s) 'invalid_fuel' do(es) not exist"],
"fuel_prices_error": [
"Fuel 'gas': Missing price for period '2018', month '2'"]
},
# It's okay if there are more fuels and fuels prices specified than
# needed for the active projects
3: {"prj_df": pd.DataFrame(
columns=["project", "fuel"],
data=[["gas_ct", "gas"]]),
"fuels_df": pd.DataFrame(
columns=["fuel", "co2_intensity_tons_per_mmbtu"],
data=[["gas", 0.4], ["coal", 0.8]]),
"fuel_prices_df": pd.DataFrame(
columns=["fuel", "period", "month", "fuel_price_per_mmbtu"],
data=[["gas", 2018, 1, 3], ["gas", 2018, 2, 4],
["coal", 2018, 1, 2], ["coal", 2018, 2, 2]]),
"periods_months": [(2018, 1), (2018, 2)],
"fuel_project_error": [],
"fuel_prices_error": []
},
# Test for multiple errors in a column
4: {"prj_df": pd.DataFrame(
columns=["project", "fuel"],
data=[["gas_ct", "invalid_fuel1"], ["coal_plant", "invalid_fuel2"]]),
"fuels_df": pd.DataFrame(
columns=["fuel", "co2_intensity_tons_per_mmbtu"],
data=[["gas", 0.4], ["coal", 0.8]]),
"fuel_prices_df": pd.DataFrame(
columns=["fuel", "period", "month", "fuel_price_per_mmbtu"],
data=[["gas", 2018, 1, 3],
["coal", 2018, 1, 2]]),
"periods_months": [(2018, 1), (2018, 2)],
"fuel_project_error":
["Project(s) 'gas_ct, coal_plant': Specified fuel(s) 'invalid_fuel1, invalid_fuel2' do(es) not exist"],
"fuel_prices_error":
["Fuel 'gas': Missing price for period '2018', month '2'",
"Fuel 'coal': Missing price for period '2018', month '2'"]
}
}

for test_case in test_cases.keys():
expected_list = test_cases[test_case]["fuel_project_error"]
actual_list = validate_fuel_projects(
prj_df=test_cases[test_case]["prj_df"],
fuels_df=test_cases[test_case]["fuels_df"]
)
self.assertListEqual(expected_list, actual_list)

expected_list = test_cases[test_case]["fuel_prices_error"]
actual_list = validate_fuel_prices(
fuels_df=test_cases[test_case]["fuels_df"],
fuel_prices_df=test_cases[test_case]["fuel_prices_df"],
periods_months=test_cases[test_case]["periods_months"]
)
self.assertListEqual(expected_list, actual_list)


if __name__ == "__main__":
unittest.main()

0 comments on commit b70e787

Please sign in to comment.