Skip to content

Commit

Permalink
[FEATURE] Update Batch.validate() API to accept expectation paramet…
Browse files Browse the repository at this point in the history
…ers (#10456)
  • Loading branch information
cdkini authored Oct 3, 2024
1 parent a0ce1fc commit e3e3b32
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 57 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,39 +52,37 @@ def set_up_context_for_example(context):
# </snippet>


# TODO: Post 1.0 this functionality should be enabled and these examples can be inserted into the example script above.
# Corresponding text has been included (as comments) in test_an_expectation.md
# # Alternatively, define an Expectation that uses an Expectation Parameter dictionary:
# expectation = gx.expectations.ExpectColumnMaxToBeBetween(
# column="passenger_count",
# min_value={"$PARAMETER": "expect_passenger_max_to_be_above"},
# max_value={"$PARAMETER": "expect_passenger_max_to_be_below"},
# )
#
# # Define the Expectation Parameter values and test the Expectation:
# # <snippet name="docs/docusaurus/docs/core/define_expectations/_examples/test_an_expectation.py - test expectation with expectation parameters">
# runtime_expectation_parameters = {
# "expect_passenger_max_to_be_above": 4,
# "expect_passenger_max_to_be_below": 6,
# }
# validation_results = batch.validate(
# expectation, expectation_parameters=runtime_expectation_parameters
# )
# # </snippet>
#
# # Evaluate the Validation Results:
#
# print(validation_results)
#
#
# # If needed, update the Expectation Parameter dictionary and test again:
# # <snippet name="docs/docusaurus/docs/core/define_expectations/_examples/test_an_expectation.py - modify and retest Expectation Parameters dictionary">
# runtime_expectation_parameters = {
# "expect_passenger_max_to_be_above": 1,
# "expect_passenger_max_to_be_below": 6,
# }
# validation_results = batch.validate(
# expectation, expectation_parameters=runtime_expectation_parameters
# )
# print(validation_results)
# # </snippet>
# Alternatively, define an Expectation that uses an Expectation Parameter dictionary:
expectation = gx.expectations.ExpectColumnMaxToBeBetween(
column="passenger_count",
min_value={"$PARAMETER": "expect_passenger_max_to_be_above"},
max_value={"$PARAMETER": "expect_passenger_max_to_be_below"},
)

# Define the Expectation Parameter values and test the Expectation:
# <snippet name="docs/docusaurus/docs/core/define_expectations/_examples/test_an_expectation.py - test expectation with expectation parameters">
runtime_expectation_parameters = {
"expect_passenger_max_to_be_above": 4,
"expect_passenger_max_to_be_below": 6,
}
validation_results = batch.validate(
expectation, expectation_parameters=runtime_expectation_parameters
)
# </snippet>

# Evaluate the Validation Results:

print(validation_results)


# If needed, update the Expectation Parameter dictionary and test again:
# <snippet name="docs/docusaurus/docs/core/define_expectations/_examples/test_an_expectation.py - modify and retest Expectation Parameters dictionary">
runtime_expectation_parameters = {
"expect_passenger_max_to_be_above": 1,
"expect_passenger_max_to_be_below": 6,
}
validation_results = batch.validate(
expectation, expectation_parameters=runtime_expectation_parameters
)
print(validation_results)
# </snippet>
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ Data can be validated against individual Expectations. This workflow is general
```python title="Python" name="docs/docusaurus/docs/core/define_expectations/_examples/test_an_expectation.py - test expectation with preset parameters"
```

<!-- In this example, the Expectation to test was defined to take Expectation Parameters at runtime:
In this example, the Expectation to test was defined to take Expectation Parameters at runtime:

```python title="Python" name="docs/docusaurus/docs/core/define_expectations/_examples/test_an_expectation.py - test expectation with expectation parameters"
``` -->
```python title="Python" name="docs/docusaurus/docs/core/define_expectations/_examples/test_an_expectation.py - test expectation with expectation parameters"
```

2. Evaluate the returned Validation Results.

Expand Down Expand Up @@ -139,10 +139,10 @@ Data can be validated against individual Expectations. This workflow is general
}
```

<!-- When an Expectation uses an Expectation Parameter dictionary you don't have to modify anything on the Expectation object. Instead, update the dictionary with new values and then test it with the updated dictionary:
When an Expectation uses an Expectation Parameter dictionary you don't have to modify anything on the Expectation object. Instead, update the dictionary with new values and then test it with the updated dictionary:

```python title="Python input" name="docs/docusaurus/docs/core/define_expectations/_examples/test_an_expectation.py - modify and retest Expectation Parameters dictionary"
``` -->
```python title="Python input" name="docs/docusaurus/docs/core/define_expectations/_examples/test_an_expectation.py - modify and retest Expectation Parameters dictionary"
```

For more information about Validation Results, what they contain, and how to adjust their verbosity see [Choose result format](../trigger_actions_based_on_results/choose_a_result_format/choose_a_result_format.md).

Expand Down
5 changes: 3 additions & 2 deletions great_expectations/checkpoint/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
from great_expectations.render.renderer.renderer import Renderer

if TYPE_CHECKING:
from great_expectations.core.suite_parameters import SuiteParameterDict
from great_expectations.data_context.store.validation_definition_store import (
ValidationDefinitionStore,
)
Expand Down Expand Up @@ -273,7 +274,7 @@ def _deserialize_identifier_bundles_to_validation_definitions(
def run(
self,
batch_parameters: Dict[str, Any] | None = None,
expectation_parameters: Dict[str, Any] | None = None,
expectation_parameters: SuiteParameterDict | None = None,
run_id: RunIdentifier | None = None,
) -> CheckpointResult:
if not self.validation_definitions:
Expand Down Expand Up @@ -312,7 +313,7 @@ def _submit_analytics_event(self):
def _run_validation_definitions(
self,
batch_parameters: Dict[str, Any] | None,
expectation_parameters: Dict[str, Any] | None,
expectation_parameters: SuiteParameterDict | None,
result_format: ResultFormatUnion,
run_id: RunIdentifier,
) -> Dict[ValidationResultIdentifier, ExpectationSuiteValidationResult]:
Expand Down
5 changes: 3 additions & 2 deletions great_expectations/core/validation_definition.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import datetime
from typing import TYPE_CHECKING, Any, Optional, Union
from typing import TYPE_CHECKING, Optional, Union

import great_expectations.exceptions as gx_exceptions
from great_expectations._docs_decorators import public_api
Expand Down Expand Up @@ -46,6 +46,7 @@
ExpectationSuiteValidationResult,
)
from great_expectations.core.result_format import ResultFormatUnion
from great_expectations.core.suite_parameters import SuiteParameterDict
from great_expectations.data_context.store.validation_results_store import (
ValidationResultsStore,
)
Expand Down Expand Up @@ -245,7 +246,7 @@ def run(
*,
checkpoint_id: Optional[str] = None,
batch_parameters: Optional[BatchParameters] = None,
expectation_parameters: Optional[dict[str, Any]] = None,
expectation_parameters: Optional[SuiteParameterDict] = None,
result_format: ResultFormatUnion = DEFAULT_RESULT_FORMAT,
run_id: RunIdentifier | None = None,
) -> ExpectationSuiteValidationResult:
Expand Down
20 changes: 16 additions & 4 deletions great_expectations/datasource/fluent/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
from typing_extensions import TypeAlias, TypeGuard

from great_expectations.core.result_format import ResultFormatUnion
from great_expectations.core.suite_parameters import SuiteParameterDict

MappingIntStrAny = Mapping[Union[int, str], Any]
AbstractSetIntStr = AbstractSet[Union[int, str]]
Expand Down Expand Up @@ -1122,6 +1123,7 @@ def validate(
expect: Expectation,
*,
result_format: ResultFormatUnion = DEFAULT_RESULT_FORMAT,
expectation_parameters: Optional[SuiteParameterDict] = None,
) -> ExpectationValidationResult: ...

@overload
Expand All @@ -1130,6 +1132,7 @@ def validate(
expect: ExpectationSuite,
*,
result_format: ResultFormatUnion = DEFAULT_RESULT_FORMAT,
expectation_parameters: Optional[SuiteParameterDict] = None,
) -> ExpectationSuiteValidationResult: ...

@public_api
Expand All @@ -1138,14 +1141,19 @@ def validate(
expect: Expectation | ExpectationSuite,
*,
result_format: ResultFormatUnion = DEFAULT_RESULT_FORMAT,
expectation_parameters: Optional[SuiteParameterDict] = None,
) -> ExpectationValidationResult | ExpectationSuiteValidationResult:
from great_expectations.core import ExpectationSuite
from great_expectations.expectations.expectation import Expectation

if isinstance(expect, Expectation):
return self._validate_expectation(expect, result_format=result_format)
return self._validate_expectation(
expect, result_format=result_format, expectation_parameters=expectation_parameters
)
elif isinstance(expect, ExpectationSuite):
return self._validate_expectation_suite(expect, result_format=result_format)
return self._validate_expectation_suite(
expect, result_format=result_format, expectation_parameters=expectation_parameters
)
else:
# If we are type checking, we should never fall through to this case. However, exploratory # noqa: E501
# workflows are not being type checked.
Expand All @@ -1157,19 +1165,23 @@ def _validate_expectation(
self,
expect: Expectation,
result_format: ResultFormatUnion,
expectation_parameters: Optional[SuiteParameterDict] = None,
) -> ExpectationValidationResult:
return self._create_validator(
result_format=result_format,
).validate_expectation(expect)
).validate_expectation(expectation=expect, expectation_parameters=expectation_parameters)

def _validate_expectation_suite(
self,
expect: ExpectationSuite,
result_format: ResultFormatUnion,
expectation_parameters: Optional[SuiteParameterDict] = None,
) -> ExpectationSuiteValidationResult:
return self._create_validator(
result_format=result_format,
).validate_expectation_suite(expect)
).validate_expectation_suite(
expectation_suite=expect, expectation_parameters=expectation_parameters
)

def _create_validator(self, *, result_format: ResultFormatUnion) -> V1Validator:
from great_expectations.validator.v1_validator import Validator as V1Validator
Expand Down
18 changes: 11 additions & 7 deletions great_expectations/validator/v1_validator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from functools import cached_property
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Optional

from great_expectations import __version__ as ge_version
from great_expectations.core.expectation_validation_result import (
Expand All @@ -21,6 +21,7 @@
from great_expectations.core import ExpectationSuite
from great_expectations.core.batch_definition import BatchDefinition
from great_expectations.core.result_format import ResultFormatUnion
from great_expectations.core.suite_parameters import SuiteParameterDict
from great_expectations.datasource.fluent.batch_request import BatchParameters
from great_expectations.expectations.expectation import (
Expectation,
Expand Down Expand Up @@ -49,23 +50,26 @@ def __init__(
def validate_expectation(
self,
expectation: Expectation,
expectation_parameters: Optional[dict[str, Any]] = None,
expectation_parameters: Optional[SuiteParameterDict] = None,
) -> ExpectationValidationResult:
"""Run a single expectation against the batch definition"""
results = self._validate_expectation_configs([expectation.configuration])
results = self._validate_expectation_configs(
expectation_configs=[expectation.configuration],
expectation_parameters=expectation_parameters,
)

assert len(results) == 1
return results[0]

def validate_expectation_suite(
self,
expectation_suite: ExpectationSuite,
expectation_parameters: Optional[dict[str, Any]] = None,
expectation_parameters: Optional[SuiteParameterDict] = None,
) -> ExpectationSuiteValidationResult:
"""Run an expectation suite against the batch definition"""
results = self._validate_expectation_configs(
expectation_suite.expectation_configurations,
expectation_parameters,
expectation_configs=expectation_suite.expectation_configurations,
expectation_parameters=expectation_parameters,
)
statistics = calc_validation_statistics(results)

Expand Down Expand Up @@ -112,7 +116,7 @@ def _wrapped_validator(self) -> OldValidator:
def _validate_expectation_configs(
self,
expectation_configs: list[ExpectationConfiguration],
expectation_parameters: Optional[dict[str, Any]] = None,
expectation_parameters: Optional[SuiteParameterDict] = None,
) -> list[ExpectationValidationResult]:
"""Run a list of expectation configurations against the batch definition"""
processed_expectation_configs = self._wrapped_validator.process_expectations_for_validation(
Expand Down
49 changes: 49 additions & 0 deletions tests/datasource/fluent/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,55 @@ def test_batch_validate_expectation_suite(
assert result.success is True


@pytest.mark.filesystem
def test_batch_validate_expectation_with_expectation_params(
pandas_setup: Tuple[AbstractDataContext, Batch],
):
_, batch = pandas_setup

expectation = gx.expectations.ExpectColumnMaxToBeBetween(
column="passenger_count",
min_value={"$PARAMETER": "expect_passenger_max_to_be_above"},
max_value={"$PARAMETER": "expect_passenger_max_to_be_below"},
)
result = batch.validate(
expectation,
expectation_parameters={
"expect_passenger_max_to_be_above": 1,
"expect_passenger_max_to_be_below": 10,
},
)
# Asserts on result
assert result.success is True


@pytest.mark.filesystem
def test_batch_validate_expectation_suite_with_expectation_params(
pandas_setup: Tuple[AbstractDataContext, Batch],
):
context, batch = pandas_setup

# Make Expectation Suite
suite = context.suites.add(ExpectationSuite(name="my_suite"))
suite.add_expectation(
gx.expectations.ExpectColumnMaxToBeBetween(
column="passenger_count",
min_value={"$PARAMETER": "expect_passenger_max_to_be_above"},
max_value={"$PARAMETER": "expect_passenger_max_to_be_below"},
)
)
# Validate
result = batch.validate(
suite,
expectation_parameters={
"expect_passenger_max_to_be_above": 1,
"expect_passenger_max_to_be_below": 10,
},
)
# Asserts on result
assert result.success is True


@pytest.mark.filesystem
def test_batch_validate_with_updated_expectation(
pandas_setup: Tuple[AbstractDataContext, Batch],
Expand Down

0 comments on commit e3e3b32

Please sign in to comment.