Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MAINTENANCE] dry up validation status calc #8962

Merged
merged 6 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 5 additions & 35 deletions great_expectations/data_asset/data_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from collections import Counter, defaultdict
from collections.abc import Hashable
from functools import wraps
from typing import Any, Dict, List, NamedTuple, Optional, Union
from typing import Any, Dict, List, Optional, Union

from marshmallow import ValidationError

Expand All @@ -35,6 +35,9 @@
recursively_convert_to_json_serializable,
)
from great_expectations.exceptions import GreatExpectationsError
from great_expectations.validator.validation_statistics import (
calc_validation_statistics,
)

logger = logging.getLogger(__name__)
logging.captureWarnings(True)
Expand Down Expand Up @@ -850,7 +853,7 @@ def validate( # noqa: C901, PLR0912, PLR0913, PLR0915

results.append(result)

statistics = _calc_validation_statistics(results)
statistics = calc_validation_statistics(results)

if only_return_failures:
abbrev_results = []
Expand Down Expand Up @@ -1163,36 +1166,3 @@ def test_expectation_function(self, function, *args, **kwargs):

new_function = self.expectation(argspec)(function)
return new_function(self, *args, **kwargs)


class ValidationStatistics(NamedTuple):
evaluated_expectations: int
successful_expectations: int
unsuccessful_expectations: int
success_percent: float | None
success: bool


def _calc_validation_statistics(validation_results) -> ValidationStatistics:
"""
Calculate summary statistics for the validation results and
return ``ExpectationStatistics``.
"""
# calc stats
successful_expectations = sum(exp.success for exp in validation_results)
evaluated_expectations = len(validation_results)
unsuccessful_expectations = evaluated_expectations - successful_expectations
success = successful_expectations == evaluated_expectations
try:
success_percent = successful_expectations / evaluated_expectations * 100
except ZeroDivisionError:
# success_percent = float("nan")
success_percent = None

return ValidationStatistics(
successful_expectations=successful_expectations,
evaluated_expectations=evaluated_expectations,
unsuccessful_expectations=unsuccessful_expectations,
success=success,
success_percent=success_percent,
)
42 changes: 42 additions & 0 deletions great_expectations/validator/validation_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from __future__ import annotations

from typing import TYPE_CHECKING, NamedTuple

if TYPE_CHECKING:
from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)


class ValidationStatistics(NamedTuple):
evaluated_expectations: int
successful_expectations: int
unsuccessful_expectations: int
success_percent: float | None
success: bool


def calc_validation_statistics(
validation_results: list[ExpectationValidationResult],
) -> ValidationStatistics:
"""
Calculate summary statistics for the validation results and
return ``ExpectationStatistics``.
"""
Comment on lines +19 to +25
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we envision this ever living on an object?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IDK I thought about just making it a static method on ValidationStatistics. I don't really have strong opinions though. Seems like wrapping it in another class might be YAGNI though.

# calc stats
evaluated_expectations = len(validation_results)
successful_expectations = len([exp for exp in validation_results if exp.success])
unsuccessful_expectations = evaluated_expectations - successful_expectations
success = successful_expectations == evaluated_expectations
try:
success_percent = successful_expectations / evaluated_expectations * 100
except ZeroDivisionError:
success_percent = None

return ValidationStatistics(
successful_expectations=successful_expectations,
evaluated_expectations=evaluated_expectations,
unsuccessful_expectations=unsuccessful_expectations,
success=success,
success_percent=success_percent,
)
42 changes: 4 additions & 38 deletions great_expectations/validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
Callable,
Dict,
List,
NamedTuple,
Optional,
Sequence,
Set,
Expand Down Expand Up @@ -82,6 +81,9 @@
MetricEdge,
ValidationGraph,
)
from great_expectations.validator.validation_statistics import (
calc_validation_statistics,
)

logger = logging.getLogger(__name__)
logging.captureWarnings(True)
Expand Down Expand Up @@ -155,14 +157,6 @@ def get_metric_configurations(self) -> List[MetricConfiguration]:
return list(self.metric_configurations.values())


class ValidationStatistics(NamedTuple):
evaluated_expectations: int
successful_expectations: int
unsuccessful_expectations: int
success_percent: float | None
success: bool


@public_api
class Validator:
"""Validator is the key object used to create Expectations, validate Expectations, and get Metrics for Expectations.
Expand Down Expand Up @@ -1691,7 +1685,7 @@ def validate( # noqa: C901, PLR0912, PLR0913, PLR0915
if self._include_rendered_content:
for validation_result in results:
validation_result.render()
statistics = self._calc_validation_statistics(results)
statistics = calc_validation_statistics(results)

if only_return_failures:
abbrev_results = []
Expand Down Expand Up @@ -1943,34 +1937,6 @@ def _get_runtime_configuration(

return runtime_configuration

@staticmethod
def _calc_validation_statistics(
validation_results: List[ExpectationValidationResult],
) -> ValidationStatistics:
"""
Calculate summary statistics for the validation results and
return ``ExpectationStatistics``.
"""
# calc stats
evaluated_expectations = len(validation_results)
successful_expectations = len(
[exp for exp in validation_results if exp.success]
)
unsuccessful_expectations = evaluated_expectations - successful_expectations
success = successful_expectations == evaluated_expectations
try:
success_percent = successful_expectations / evaluated_expectations * 100
except ZeroDivisionError:
success_percent = None

return ValidationStatistics(
successful_expectations=successful_expectations,
evaluated_expectations=evaluated_expectations,
unsuccessful_expectations=unsuccessful_expectations,
success=success,
success_percent=success_percent,
)

def convert_to_checkpoint_validations_list(
self,
) -> list[CheckpointValidationConfig]:
Expand Down
68 changes: 0 additions & 68 deletions tests/test_great_expectations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,6 @@
expectationSuiteSchema,
)
from great_expectations.core.expectation_suite import ExpectationSuite
from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)
from great_expectations.data_asset.data_asset import (
ValidationStatistics,
_calc_validation_statistics,
)
from great_expectations.data_context.util import file_relative_path
from great_expectations.dataset import MetaPandasDataset, PandasDataset
from great_expectations.exceptions import InvalidCacheValueError
Expand Down Expand Up @@ -260,67 +253,6 @@ def test_validate_catch_invalid_parameter(empty_data_context):
)


@pytest.mark.unit
def test_stats_no_expectations():
expectation_results = []
actual = _calc_validation_statistics(expectation_results)

# pay attention to these two
assert None is actual.success_percent
assert True is actual.success
# the rest is boring
assert 0 == actual.successful_expectations
assert 0 == actual.evaluated_expectations
assert 0 == actual.unsuccessful_expectations


@pytest.mark.unit
def test_stats_no_successful_expectations():
expectation_results = [ExpectationValidationResult(success=False)]
actual = _calc_validation_statistics(expectation_results)
expected = ValidationStatistics(1, 0, 1, 0.0, False)
assert expected == actual

expectation_results = [
ExpectationValidationResult(success=False),
ExpectationValidationResult(success=False),
ExpectationValidationResult(success=False),
]
actual = _calc_validation_statistics(expectation_results)
expected = ValidationStatistics(3, 0, 3, 0.0, False)
assert expected == actual


@pytest.mark.unit
def test_stats_all_successful_expectations():
expectation_results = [
ExpectationValidationResult(success=True),
]
actual = _calc_validation_statistics(expectation_results)
expected = ValidationStatistics(1, 1, 0, 100.0, True)
assert expected == actual

expectation_results = [
ExpectationValidationResult(success=True),
ExpectationValidationResult(success=True),
ExpectationValidationResult(success=True),
]
actual = _calc_validation_statistics(expectation_results)
expected = ValidationStatistics(3, 3, 0, 100.0, True)
assert expected == actual


@pytest.mark.unit
def test_stats_mixed_expectations():
expectation_results = [
ExpectationValidationResult(success=False),
ExpectationValidationResult(success=True),
]
actual = _calc_validation_statistics(expectation_results)
expected = ValidationStatistics(2, 1, 1, 50.0, False)
assert expected == actual


class TestIO(unittest.TestCase):
@pytest.mark.filesystem
def test_read_csv(self):
Expand Down
70 changes: 70 additions & 0 deletions tests/validator/test_validation_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import pytest

from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)
from great_expectations.validator.validation_statistics import (
ValidationStatistics,
calc_validation_statistics,
)


@pytest.mark.unit
def test_stats_no_expectations():
expectation_results = []
actual = calc_validation_statistics(expectation_results)

# pay attention to these two
assert None is actual.success_percent
assert True is actual.success
# the rest is boring
assert 0 == actual.successful_expectations
assert 0 == actual.evaluated_expectations
assert 0 == actual.unsuccessful_expectations


@pytest.mark.unit
def test_stats_no_successful_expectations():
expectation_results = [ExpectationValidationResult(success=False)]
actual = calc_validation_statistics(expectation_results)
expected = ValidationStatistics(1, 0, 1, 0.0, False)
assert expected == actual

expectation_results = [
ExpectationValidationResult(success=False),
ExpectationValidationResult(success=False),
ExpectationValidationResult(success=False),
]
actual = calc_validation_statistics(expectation_results)
expected = ValidationStatistics(3, 0, 3, 0.0, False)
assert expected == actual


@pytest.mark.unit
def test_stats_all_successful_expectations():
expectation_results = [
ExpectationValidationResult(success=True),
]
actual = calc_validation_statistics(expectation_results)
expected = ValidationStatistics(1, 1, 0, 100.0, True)
assert expected == actual

expectation_results = [
ExpectationValidationResult(success=True),
ExpectationValidationResult(success=True),
ExpectationValidationResult(success=True),
]
actual = calc_validation_statistics(expectation_results)
expected = ValidationStatistics(3, 3, 0, 100.0, True)
assert expected == actual


@pytest.mark.unit
def test_stats_mixed_expectations():
expectation_results = [
ExpectationValidationResult(success=False),
ExpectationValidationResult(success=True),
]
actual = calc_validation_statistics(expectation_results)
expected = ValidationStatistics(2, 1, 1, 50.0, False)
assert expected == actual
Loading