Skip to content

Commit

Permalink
[MAINTENANCE] dry up validation status calc (#8962)
Browse files Browse the repository at this point in the history
  • Loading branch information
tyler-hoffman authored Nov 15, 2023
1 parent 4ded445 commit a25fe64
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 141 deletions.
40 changes: 5 additions & 35 deletions great_expectations/data_asset/data_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from collections import Counter, defaultdict
from collections.abc import Hashable
from functools import wraps
from typing import Any, Dict, List, NamedTuple, Optional, Union
from typing import Any, Dict, List, Optional, Union

from marshmallow import ValidationError

Expand All @@ -35,6 +35,9 @@
recursively_convert_to_json_serializable,
)
from great_expectations.exceptions import GreatExpectationsError
from great_expectations.validator.validation_statistics import (
calc_validation_statistics,
)

logger = logging.getLogger(__name__)
logging.captureWarnings(True)
Expand Down Expand Up @@ -850,7 +853,7 @@ def validate( # noqa: C901, PLR0912, PLR0913, PLR0915

results.append(result)

statistics = _calc_validation_statistics(results)
statistics = calc_validation_statistics(results)

if only_return_failures:
abbrev_results = []
Expand Down Expand Up @@ -1163,36 +1166,3 @@ def test_expectation_function(self, function, *args, **kwargs):

new_function = self.expectation(argspec)(function)
return new_function(self, *args, **kwargs)


class ValidationStatistics(NamedTuple):
evaluated_expectations: int
successful_expectations: int
unsuccessful_expectations: int
success_percent: float | None
success: bool


def _calc_validation_statistics(validation_results) -> ValidationStatistics:
"""
Calculate summary statistics for the validation results and
return ``ExpectationStatistics``.
"""
# calc stats
successful_expectations = sum(exp.success for exp in validation_results)
evaluated_expectations = len(validation_results)
unsuccessful_expectations = evaluated_expectations - successful_expectations
success = successful_expectations == evaluated_expectations
try:
success_percent = successful_expectations / evaluated_expectations * 100
except ZeroDivisionError:
# success_percent = float("nan")
success_percent = None

return ValidationStatistics(
successful_expectations=successful_expectations,
evaluated_expectations=evaluated_expectations,
unsuccessful_expectations=unsuccessful_expectations,
success=success,
success_percent=success_percent,
)
42 changes: 42 additions & 0 deletions great_expectations/validator/validation_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from __future__ import annotations

from typing import TYPE_CHECKING, NamedTuple

if TYPE_CHECKING:
from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)


class ValidationStatistics(NamedTuple):
evaluated_expectations: int
successful_expectations: int
unsuccessful_expectations: int
success_percent: float | None
success: bool


def calc_validation_statistics(
validation_results: list[ExpectationValidationResult],
) -> ValidationStatistics:
"""
Calculate summary statistics for the validation results and
return ``ExpectationStatistics``.
"""
# calc stats
evaluated_expectations = len(validation_results)
successful_expectations = len([exp for exp in validation_results if exp.success])
unsuccessful_expectations = evaluated_expectations - successful_expectations
success = successful_expectations == evaluated_expectations
try:
success_percent = successful_expectations / evaluated_expectations * 100
except ZeroDivisionError:
success_percent = None

return ValidationStatistics(
successful_expectations=successful_expectations,
evaluated_expectations=evaluated_expectations,
unsuccessful_expectations=unsuccessful_expectations,
success=success,
success_percent=success_percent,
)
42 changes: 4 additions & 38 deletions great_expectations/validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
Callable,
Dict,
List,
NamedTuple,
Optional,
Sequence,
Set,
Expand Down Expand Up @@ -82,6 +81,9 @@
MetricEdge,
ValidationGraph,
)
from great_expectations.validator.validation_statistics import (
calc_validation_statistics,
)

logger = logging.getLogger(__name__)
logging.captureWarnings(True)
Expand Down Expand Up @@ -155,14 +157,6 @@ def get_metric_configurations(self) -> List[MetricConfiguration]:
return list(self.metric_configurations.values())


class ValidationStatistics(NamedTuple):
evaluated_expectations: int
successful_expectations: int
unsuccessful_expectations: int
success_percent: float | None
success: bool


@public_api
class Validator:
"""Validator is the key object used to create Expectations, validate Expectations, and get Metrics for Expectations.
Expand Down Expand Up @@ -1691,7 +1685,7 @@ def validate( # noqa: C901, PLR0912, PLR0913, PLR0915
if self._include_rendered_content:
for validation_result in results:
validation_result.render()
statistics = self._calc_validation_statistics(results)
statistics = calc_validation_statistics(results)

if only_return_failures:
abbrev_results = []
Expand Down Expand Up @@ -1943,34 +1937,6 @@ def _get_runtime_configuration(

return runtime_configuration

@staticmethod
def _calc_validation_statistics(
validation_results: List[ExpectationValidationResult],
) -> ValidationStatistics:
"""
Calculate summary statistics for the validation results and
return ``ExpectationStatistics``.
"""
# calc stats
evaluated_expectations = len(validation_results)
successful_expectations = len(
[exp for exp in validation_results if exp.success]
)
unsuccessful_expectations = evaluated_expectations - successful_expectations
success = successful_expectations == evaluated_expectations
try:
success_percent = successful_expectations / evaluated_expectations * 100
except ZeroDivisionError:
success_percent = None

return ValidationStatistics(
successful_expectations=successful_expectations,
evaluated_expectations=evaluated_expectations,
unsuccessful_expectations=unsuccessful_expectations,
success=success,
success_percent=success_percent,
)

def convert_to_checkpoint_validations_list(
self,
) -> list[CheckpointValidationConfig]:
Expand Down
68 changes: 0 additions & 68 deletions tests/test_great_expectations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,6 @@
expectationSuiteSchema,
)
from great_expectations.core.expectation_suite import ExpectationSuite
from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)
from great_expectations.data_asset.data_asset import (
ValidationStatistics,
_calc_validation_statistics,
)
from great_expectations.data_context.util import file_relative_path
from great_expectations.dataset import MetaPandasDataset, PandasDataset
from great_expectations.exceptions import InvalidCacheValueError
Expand Down Expand Up @@ -260,67 +253,6 @@ def test_validate_catch_invalid_parameter(empty_data_context):
)


@pytest.mark.unit
def test_stats_no_expectations():
expectation_results = []
actual = _calc_validation_statistics(expectation_results)

# pay attention to these two
assert None is actual.success_percent
assert True is actual.success
# the rest is boring
assert 0 == actual.successful_expectations
assert 0 == actual.evaluated_expectations
assert 0 == actual.unsuccessful_expectations


@pytest.mark.unit
def test_stats_no_successful_expectations():
expectation_results = [ExpectationValidationResult(success=False)]
actual = _calc_validation_statistics(expectation_results)
expected = ValidationStatistics(1, 0, 1, 0.0, False)
assert expected == actual

expectation_results = [
ExpectationValidationResult(success=False),
ExpectationValidationResult(success=False),
ExpectationValidationResult(success=False),
]
actual = _calc_validation_statistics(expectation_results)
expected = ValidationStatistics(3, 0, 3, 0.0, False)
assert expected == actual


@pytest.mark.unit
def test_stats_all_successful_expectations():
expectation_results = [
ExpectationValidationResult(success=True),
]
actual = _calc_validation_statistics(expectation_results)
expected = ValidationStatistics(1, 1, 0, 100.0, True)
assert expected == actual

expectation_results = [
ExpectationValidationResult(success=True),
ExpectationValidationResult(success=True),
ExpectationValidationResult(success=True),
]
actual = _calc_validation_statistics(expectation_results)
expected = ValidationStatistics(3, 3, 0, 100.0, True)
assert expected == actual


@pytest.mark.unit
def test_stats_mixed_expectations():
expectation_results = [
ExpectationValidationResult(success=False),
ExpectationValidationResult(success=True),
]
actual = _calc_validation_statistics(expectation_results)
expected = ValidationStatistics(2, 1, 1, 50.0, False)
assert expected == actual


class TestIO(unittest.TestCase):
@pytest.mark.filesystem
def test_read_csv(self):
Expand Down
70 changes: 70 additions & 0 deletions tests/validator/test_validation_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import pytest

from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)
from great_expectations.validator.validation_statistics import (
ValidationStatistics,
calc_validation_statistics,
)


@pytest.mark.unit
def test_stats_no_expectations():
expectation_results = []
actual = calc_validation_statistics(expectation_results)

# pay attention to these two
assert None is actual.success_percent
assert True is actual.success
# the rest is boring
assert 0 == actual.successful_expectations
assert 0 == actual.evaluated_expectations
assert 0 == actual.unsuccessful_expectations


@pytest.mark.unit
def test_stats_no_successful_expectations():
expectation_results = [ExpectationValidationResult(success=False)]
actual = calc_validation_statistics(expectation_results)
expected = ValidationStatistics(1, 0, 1, 0.0, False)
assert expected == actual

expectation_results = [
ExpectationValidationResult(success=False),
ExpectationValidationResult(success=False),
ExpectationValidationResult(success=False),
]
actual = calc_validation_statistics(expectation_results)
expected = ValidationStatistics(3, 0, 3, 0.0, False)
assert expected == actual


@pytest.mark.unit
def test_stats_all_successful_expectations():
expectation_results = [
ExpectationValidationResult(success=True),
]
actual = calc_validation_statistics(expectation_results)
expected = ValidationStatistics(1, 1, 0, 100.0, True)
assert expected == actual

expectation_results = [
ExpectationValidationResult(success=True),
ExpectationValidationResult(success=True),
ExpectationValidationResult(success=True),
]
actual = calc_validation_statistics(expectation_results)
expected = ValidationStatistics(3, 3, 0, 100.0, True)
assert expected == actual


@pytest.mark.unit
def test_stats_mixed_expectations():
expectation_results = [
ExpectationValidationResult(success=False),
ExpectationValidationResult(success=True),
]
actual = calc_validation_statistics(expectation_results)
expected = ValidationStatistics(2, 1, 1, 50.0, False)
assert expected == actual

0 comments on commit a25fe64

Please sign in to comment.