Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add unittests for input validations #88

Merged
merged 2 commits into from
Aug 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 85 additions & 2 deletions gridpath/auxiliary/auxiliary.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,6 @@ def check_dtypes(df, expected_dtypes):
:return: List of error messages for each column with invalid datatypes.
Error message specifies the column and the expected data type.
List of columns with erroneous data types.

TODO: add example
"""

result = []
Expand Down Expand Up @@ -371,3 +369,88 @@ def check_dtypes(df, expected_dtypes):
# bad_columns = numeric_columns[np.invert(numeric_bool)]

return result, columns


def check_column_sign_positive(df, columns):
"""
Checks whether the selected columns of a DataFrame are non-negative.
Helper function for input validation.
:param df: DataFrame for which to check signs. Must have a "project"
column, and columns param must be a subset of the columns in df
:param columns: list with columns that are expected to be non-negative
:return: List of error messages for each column with invalid signs.
Error message specifies the column.
"""
result = []
for column in columns:
is_negative = (df[column] < 0)
if is_negative.any():
bad_projects = df["project"][is_negative].values
print_bad_projects = ", ".join(bad_projects)
result.append(
"Project(s) '{}': Expected '{}' >= 0"
.format(print_bad_projects, column)
)

return result


def check_prj_columns(df, columns, required, category):
"""
Checks whether the required columns of a DataFrame are not None/NA or
whether the incompatible columns are None/NA. If required columns are
None/NA, or if incompatible columns are not None/NA, an error message
is returned.
Helper function for input validation.
:param df: DataFrame for which to check columns. Must have a "project"
column, and columns param must be a subset of the columns in df
:param columns: list of columns to check
:param required: Boolean, whether the listed columns are required or
incompatible
:param category: project category (operational_type, capacity_type, ...)
for which we're doing the input validation
:return: List of error messages for each column with invalid inputs.
Error message specifies the column.
"""
result = []
for column in columns:
if required:
invalids = pd.isna(df[column])
error_str = "should have inputs for"
else:
invalids = pd.notna(df[column])
error_str = "should not have inputs for"
if invalids.any():
bad_projects = df["project"][invalids].values
print_bad_projects = ", ".join(bad_projects)
result.append(
"Project(s) '{}'; {} {} '{}'"
.format(print_bad_projects, category, error_str, column)
)

return result


def check_constant_heat_rate(df, op_type):
"""
Check whether the projects in the DataFrame have a constant heat rate
based on the number of load points per project in the DAtaFrame
:param df: DataFrame for which to check constant heat rate. Must have
"project", "load_point_mw" columns
:param op_type: Operational type (used in error message)
:return:
"""

results = []

n_load_points = df.groupby(["project"]).size()
invalids = (n_load_points > 1)
if invalids.any():
bad_projects = invalids.index[invalids]
print_bad_projects = ", ".join(bad_projects)
results.append(
"Project(s) '{}': {} should have only 1 load point"
.format(print_bad_projects, op_type)
)

return results
179 changes: 124 additions & 55 deletions gridpath/project/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from gridpath.auxiliary.dynamic_components import required_capacity_modules, \
required_operational_modules, headroom_variables, footroom_variables
from gridpath.auxiliary.auxiliary import check_dtypes, \
write_validation_to_database
check_column_sign_positive, write_validation_to_database


def determine_dynamic_components(d, scenario_directory, subproblem, stage):
Expand Down Expand Up @@ -283,46 +283,38 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
)
)

# Check valid numeric columns are positive
# Check valid numeric columns are non-negative
numeric_columns = [k for k, v in expected_dtypes.items() if v == "numeric"]
valid_numeric_columns = set(numeric_columns) - set(error_columns)
for column in valid_numeric_columns:
is_negative = (df[column] < 0)
if is_negative.any():
bad_projects = df["project"][is_negative].values
print_bad_projects = ", ".join(bad_projects)
validation_results.append(
(subscenarios.SCENARIO_ID,
__name__,
"PROJECT_OPERATIONAL_CHARS",
"inputs_project_operational_chars",
"Invalid numeric sign",
"Project(s) '{}'; Expected '{}' >= 0"
.format(print_bad_projects, column)
)
)
sign_errors = check_column_sign_positive(df, valid_numeric_columns)
for error in sign_errors:
validation_results.append(
(subscenarios.SCENARIO_ID,
__name__,
"PROJECT_OPERATIONAL_CHARS",
"inputs_project_operational_chars",
"Invalid numeric sign",
error
)
)

# check 0 < min stable fraction <= 1
# Check 0 < min stable fraction <= 1
if "min_stable_level" not in error_columns:
invalids = ((df["min_stable_level"] <= 0) |
(df["min_stable_level"] > 1))
if invalids.any():
bad_projects = df["project"][invalids].values
print_bad_projects = ", ".join(bad_projects)
validation_errors = validate_min_stable_level(df)
for error in validation_errors:
validation_results.append(
(subscenarios.SCENARIO_ID,
__name__,
"PROJECT_OPERATIONAL_CHARS",
"inputs_project_operational_chars",
"Invalid min_stable_level",
"Project(s) '{}': expected 0 < min_stable_level <= 1"
.format(print_bad_projects)
error
)
)

# TODO: move into database table (don't hard code)
# Check that we're not combining incompatible capacity and operational types
incompatible_combinations = [
invalid_combos = [
("new_build_generator", "dispatchable_binary_commit"),
("new_build_generator", "dispatchable_continuous_commit"),
("new_build_generator", "hydro_curtailable"),
Expand All @@ -340,68 +332,145 @@ def validate_inputs(subscenarios, subproblem, stage, conn):
("existing_gen_binary_economic_retirement",
"hydro_noncurtailable"),
]
for combo in incompatible_combinations:
bad_combos = ((df["capacity_type"] == combo[0]) &
(df["operational_type"] == combo[1]))
if bad_combos.any():
bad_projects = df['project'][bad_combos].values
print_bad_projects = ", ".join(bad_projects)
validation_results.append(
(subscenarios.SCENARIO_ID,
__name__,
"PROJECT_OPERATIONAL_CHARS, PROJECT_PORTFOLIO",
"inputs_project_operational_chars, inputs_project_portfolios",
"Invalid combination of capacity type and operational type",
"Project(s) '{}': '{}' and '{}'"
.format(print_bad_projects, combo[0], combo[1]))
)

# check that capacity type is valid
validation_errors = validate_op_cap_combos(df, invalid_combos)
for error in validation_errors:
validation_results.append(
(subscenarios.SCENARIO_ID,
__name__,
"PROJECT_OPERATIONAL_CHARS, PROJECT_PORTFOLIO",
"inputs_project_operational_chars, inputs_project_portfolios",
"Invalid combination of capacity type and operational type",
error
)
)

# Check that capacity type is valid
# Note: foreign key already ensures this!
valid_cap_types = c.execute(
"""SELECT capacity_type from mod_capacity_types"""
).fetchall()
valid_cap_types = [v[0] for v in valid_cap_types]
invalids = ~df["capacity_type"].isin(valid_cap_types)
if invalids.any():
bad_projects = df["project"][invalids].values
print_bad_projects = ", ".join(bad_projects)
validation_errors = validate_cap_types(df, valid_cap_types)
for error in validation_errors:
validation_results.append(
(subscenarios.SCENARIO_ID,
__name__,
"PROJECT_PORTFOLIO",
"inputs_project_portfolios",
"Invalid capacity type",
"Project(s) '{}': Invalid capacity type"
.format(print_bad_projects)
error
)
)

# check that operational type is valid
# Check that operational type is valid
# Note: foreign key already ensures this!
valid_op_types = c.execute(
"""SELECT operational_type from mod_operational_types"""
).fetchall()
valid_op_types = [v[0] for v in valid_op_types]
invalids = ~df["operational_type"].isin(valid_op_types)
if invalids.any():
bad_projects = df["project"][invalids].values
print_bad_projects = ", ".join(bad_projects)
validation_errors = validate_op_types(df, valid_op_types)
for error in validation_errors:
validation_results.append(
(subscenarios.SCENARIO_ID,
__name__,
"PROJECT_OPERATIONAL_CHARS",
"inputs_project_operational_chars",
"Invalid operational type",
"Project(s) '{}': Invalid operational type"
.format(print_bad_projects)
error
)
)

# Write all input validation errors to database
write_validation_to_database(validation_results, conn)


def validate_min_stable_level(df):
"""
Check 0 < min stable fraction <= 1
:param df:
:return:
"""
results = []

invalids = ((df["min_stable_level"] <= 0) |
(df["min_stable_level"] > 1))
if invalids.any():
bad_projects = df["project"][invalids].values
print_bad_projects = ", ".join(bad_projects)
results.append(
"Project(s) '{}': expected 0 < min_stable_level <= 1"
.format(print_bad_projects)
)

return results


def validate_op_cap_combos(df, invalid_combos):
"""
Check that there's no mixing of incompatible capacity and operational types
:param df:
:param invalid_combos:
:return:
"""
results = []
for combo in invalid_combos:
bad_combos = ((df["capacity_type"] == combo[0]) &
(df["operational_type"] == combo[1]))
if bad_combos.any():
bad_projects = df['project'][bad_combos].values
print_bad_projects = ", ".join(bad_projects)
results.append(
"Project(s) '{}': '{}' and '{}'"
.format(print_bad_projects, combo[0], combo[1])
)

return results


def validate_cap_types(df, valid_cap_types):
"""
Check that the specified capacity types are one of the valid capacity types
:param df:
:param valid_cap_types:
:return:
"""
results = []

invalids = ~df["capacity_type"].isin(valid_cap_types)
if invalids.any():
bad_projects = df["project"][invalids].values
print_bad_projects = ", ".join(bad_projects)
results.append(
"Project(s) '{}': Invalid capacity type"
.format(print_bad_projects)
)

return results


def validate_op_types(df, valid_op_types):
"""
Check that the specified operational types are one of the valid operational
types
:param df:
:param valid_op_types:
:return:
"""
results = []

invalids = ~df["operational_type"].isin(valid_op_types)
if invalids.any():
bad_projects = df["project"][invalids].values
print_bad_projects = ", ".join(bad_projects)
results.append(
"Project(s) '{}': Invalid operational type"
.format(print_bad_projects)
)

return results


def write_model_inputs(inputs_directory, subscenarios, subproblem, stage, conn):
"""
Get inputs from database and write out the model input
Expand Down
Loading