diff --git a/great_expectations/checkpoint/checkpoint.py b/great_expectations/checkpoint/checkpoint.py index c331a74d48cb..8c8e46f8e1fe 100644 --- a/great_expectations/checkpoint/checkpoint.py +++ b/great_expectations/checkpoint/checkpoint.py @@ -27,10 +27,10 @@ root_validator, validator, ) -from great_expectations.core.added_diagnostics import CheckpointAddedDiagnostics from great_expectations.core.expectation_validation_result import ( ExpectationSuiteValidationResult, # noqa: TCH001 ) +from great_expectations.core.freshness_diagnostics import CheckpointFreshnessDiagnostics from great_expectations.core.result_format import DEFAULT_RESULT_FORMAT, ResultFormatUnion from great_expectations.core.run_identifier import RunIdentifier from great_expectations.core.serdes import _IdentifierBundle @@ -165,8 +165,8 @@ def run( if not self.validation_definitions: raise CheckpointRunWithoutValidationDefinitionError() - diagnostics = self.is_added() - if not diagnostics.is_added: + diagnostics = self.is_fresh() + if not diagnostics.success: # The checkpoint itself is not added but all children are - we can add it for the user if not diagnostics.parent_added and diagnostics.children_added: self._add_to_store() @@ -269,11 +269,11 @@ def _sort_actions(self) -> List[CheckpointAction]: return priority_actions + secondary_actions - def is_added(self) -> CheckpointAddedDiagnostics: - checkpoint_diagnostics = CheckpointAddedDiagnostics( + def is_fresh(self) -> CheckpointFreshnessDiagnostics: + checkpoint_diagnostics = CheckpointFreshnessDiagnostics( errors=[] if self.id else [CheckpointNotAddedError(name=self.name)] ) - validation_definition_diagnostics = [vd.is_added() for vd in self.validation_definitions] + validation_definition_diagnostics = [vd.is_fresh() for vd in self.validation_definitions] checkpoint_diagnostics.update_with_children(*validation_definition_diagnostics) return checkpoint_diagnostics diff --git a/great_expectations/core/batch_definition.py b/great_expectations/core/batch_definition.py index ea67a6d0ed3f..9232affc5962 100644 --- a/great_expectations/core/batch_definition.py +++ b/great_expectations/core/batch_definition.py @@ -7,13 +7,18 @@ # if we move this import into the TYPE_CHECKING block, we need to provide the # Partitioner class when we update forward refs, so we just import here. -from great_expectations.core.added_diagnostics import ( - BatchDefinitionAddedDiagnostics, +from great_expectations.core.freshness_diagnostics import ( + BatchDefinitionFreshnessDiagnostics, ) from great_expectations.core.partitioners import ColumnPartitioner, FileNamePartitioner from great_expectations.core.serdes import _EncodedValidationData, _IdentifierBundle +from great_expectations.data_context.data_context.context_factory import project_manager from great_expectations.exceptions.exceptions import ( BatchDefinitionNotAddedError, + BatchDefinitionNotFoundError, + BatchDefinitionNotFreshError, + DataAssetNotFoundError, + DatasourceNotFoundError, ) if TYPE_CHECKING: @@ -21,7 +26,7 @@ BatchParameters, BatchRequest, ) - from great_expectations.datasource.fluent.interfaces import Batch, DataAsset + from great_expectations.datasource.fluent.interfaces import Batch, DataAsset, Datasource # Depending on the Asset PartitionerT = TypeVar("PartitionerT", ColumnPartitioner, FileNamePartitioner, None) @@ -76,14 +81,62 @@ def get_batch(self, batch_parameters: Optional[BatchParameters] = None) -> Batch batch_request = self.build_batch_request(batch_parameters=batch_parameters) return self.data_asset.get_batch(batch_request) - def is_added(self) -> BatchDefinitionAddedDiagnostics: - return BatchDefinitionAddedDiagnostics( + def is_fresh(self) -> BatchDefinitionFreshnessDiagnostics: + diagnostics = self._is_added() + if not diagnostics.success: + return diagnostics + return self._is_fresh() + + def _is_added(self) -> BatchDefinitionFreshnessDiagnostics: + return BatchDefinitionFreshnessDiagnostics( errors=[] if self.id else [BatchDefinitionNotAddedError(name=self.name)] ) + def _is_fresh(self) -> BatchDefinitionFreshnessDiagnostics: + datasource_dict = project_manager.get_datasources() + + datasource: Datasource | None + try: + datasource = datasource_dict[self.data_asset.datasource.name] + except KeyError: + datasource = None + if not datasource: + return BatchDefinitionFreshnessDiagnostics( + errors=[ + DatasourceNotFoundError( + f"Could not find datasource '{self.data_asset.datasource.name}'" + ) + ] + ) + + try: + asset = datasource.get_asset(self.data_asset.name) + except LookupError: + asset = None + if not asset: + return BatchDefinitionFreshnessDiagnostics( + errors=[DataAssetNotFoundError(f"Could not find asset '{self.data_asset.name}'")] + ) + + batch_def: BatchDefinition | None + try: + batch_def = asset.get_batch_definition(self.name) + except KeyError: + batch_def = None + if not batch_def: + return BatchDefinitionFreshnessDiagnostics( + errors=[ + BatchDefinitionNotFoundError(f"Could not find batch definition '{self.name}'") + ] + ) + + return BatchDefinitionFreshnessDiagnostics( + errors=[] if self == batch_def else [BatchDefinitionNotFreshError(name=self.name)] + ) + def identifier_bundle(self) -> _EncodedValidationData: # Utilized as a custom json_encoder - diagnostics = self.is_added() + diagnostics = self.is_fresh() diagnostics.raise_for_error() asset = self.data_asset diff --git a/great_expectations/core/expectation_suite.py b/great_expectations/core/expectation_suite.py index 163402b0ed33..0107bad285eb 100644 --- a/great_expectations/core/expectation_suite.py +++ b/great_expectations/core/expectation_suite.py @@ -28,14 +28,19 @@ ExpectationSuiteExpectationDeletedEvent, ExpectationSuiteExpectationUpdatedEvent, ) +from great_expectations.compatibility.pydantic import ValidationError as PydanticValidationError from great_expectations.compatibility.typing_extensions import override -from great_expectations.core.added_diagnostics import ( - ExpectationSuiteAddedDiagnostics, +from great_expectations.core.freshness_diagnostics import ( + ExpectationSuiteFreshnessDiagnostics, ) from great_expectations.core.serdes import _IdentifierBundle from great_expectations.data_context.data_context.context_factory import project_manager from great_expectations.exceptions.exceptions import ( + ExpectationSuiteError, ExpectationSuiteNotAddedError, + ExpectationSuiteNotFoundError, + ExpectationSuiteNotFreshError, + StoreBackendError, ) from great_expectations.types import SerializableDictDot from great_expectations.util import ( @@ -45,6 +50,7 @@ if TYPE_CHECKING: from great_expectations.alias_types import JSONValues + from great_expectations.data_context.store.expectations_store import ExpectationsStore from great_expectations.expectations.expectation import Expectation from great_expectations.expectations.expectation_configuration import ( ExpectationConfiguration, @@ -105,7 +111,9 @@ def __init__( # noqa: PLR0913 self.meta = meta self.notes = notes - self._store = project_manager.get_expectations_store() + @property + def _store(self) -> ExpectationsStore: + return project_manager.get_expectations_store() @property def _include_rendered_content(self) -> bool: @@ -248,11 +256,43 @@ def save(self) -> None: key = self._store.get_key(name=self.name, id=self.id) self._store.update(key=key, value=self) - def is_added(self) -> ExpectationSuiteAddedDiagnostics: - return ExpectationSuiteAddedDiagnostics( + def is_fresh(self) -> ExpectationSuiteFreshnessDiagnostics: + diagnostics = self._is_added() + if not diagnostics.success: + return diagnostics + return self._is_fresh() + + def _is_added(self) -> ExpectationSuiteFreshnessDiagnostics: + return ExpectationSuiteFreshnessDiagnostics( errors=[] if self.id else [ExpectationSuiteNotAddedError(name=self.name)] ) + def _is_fresh(self) -> ExpectationSuiteFreshnessDiagnostics: + suite_dict: dict | None + try: + key = self._store.get_key(name=self.name, id=self.id) + suite_dict = self._store.get(key=key) + except StoreBackendError: + suite_dict = None + if not suite_dict: + return ExpectationSuiteFreshnessDiagnostics( + errors=[ExpectationSuiteNotFoundError(name=self.name)] + ) + + suite: ExpectationSuite | None + try: + suite = self._store.deserialize_suite_dict(suite_dict=suite_dict) + except PydanticValidationError: + suite = None + if not suite: + return ExpectationSuiteFreshnessDiagnostics( + errors=[ExpectationSuiteError(f"Could not deserialize suite '{self.name}'")] + ) + + return ExpectationSuiteFreshnessDiagnostics( + errors=[] if self == suite else [ExpectationSuiteNotFreshError(name=self.name)] + ) + def _has_been_saved(self) -> bool: """Has this ExpectationSuite been persisted to a Store?""" # todo: this should only check local keys instead of potentially querying the remote backend @@ -599,7 +639,7 @@ def render(self) -> None: def identifier_bundle(self) -> _IdentifierBundle: # Utilized as a custom json_encoder - diagnostics = self.is_added() + diagnostics = self.is_fresh() diagnostics.raise_for_error() return _IdentifierBundle(name=self.name, id=self.id) diff --git a/great_expectations/core/factory/suite_factory.py b/great_expectations/core/factory/suite_factory.py index 4bd7b10ba3f9..ea755e623c13 100644 --- a/great_expectations/core/factory/suite_factory.py +++ b/great_expectations/core/factory/suite_factory.py @@ -96,7 +96,7 @@ def get(self, name: str) -> ExpectationSuite: if not self._store.has_key(key=key): raise DataContextError(f"ExpectationSuite with name {name} was not found.") # noqa: TRY003 suite_dict = self._store.get(key=key) - return self._deserialize(suite_dict) + return self._store.deserialize_suite_dict(suite_dict) @public_api @override @@ -113,7 +113,7 @@ def all(self) -> Iterable[ExpectationSuite]: bad_dicts: list[Any] = [] for suite_dict in dicts: try: - deserializable_suites.append(self._deserialize(suite_dict)) + deserializable_suites.append(self._store.deserialize_suite_dict(suite_dict)) except PydanticValidationError as e: bad_dicts.append(suite_dict) self._store.submit_all_deserialization_event(e) @@ -121,10 +121,3 @@ def all(self) -> Iterable[ExpectationSuite]: self._store.submit_all_deserialization_event(e) raise return deserializable_suites - - def _deserialize(self, suite_dict: dict) -> ExpectationSuite: - # TODO: Move this logic to the store - suite = ExpectationSuite(**suite_dict) - if self._include_rendered_content: - suite.render() - return suite diff --git a/great_expectations/core/added_diagnostics.py b/great_expectations/core/freshness_diagnostics.py similarity index 57% rename from great_expectations/core/added_diagnostics.py rename to great_expectations/core/freshness_diagnostics.py index 97ce61f638a4..d90ea72f3caf 100644 --- a/great_expectations/core/added_diagnostics.py +++ b/great_expectations/core/freshness_diagnostics.py @@ -1,6 +1,5 @@ from __future__ import annotations -from abc import abstractmethod from dataclasses import dataclass from typing import ClassVar, Tuple, Type @@ -10,7 +9,7 @@ CheckpointNotAddedError, CheckpointRelatedResourcesNotAddedError, ExpectationSuiteNotAddedError, - ResourceNotAddedError, + GreatExpectationsError, ResourcesNotAddedError, ValidationDefinitionNotAddedError, ValidationDefinitionRelatedResourcesNotAddedError, @@ -18,59 +17,59 @@ @dataclass -class AddedDiagnostics: +class FreshnessDiagnostics: """ - Wrapper around a list of errors; used to determine if a resource has been added successfully. + Wrapper around a list of errors; used to determine if a resource has been added successfully + and is "fresh" or up-to-date with its persisted equivalent. Note that some resources may have dependencies on other resources - in order to be considered - "added", the root resource and all of its dependencies must be added successfully. + "fresh", the root resource and all of its dependencies must be "fresh". For example, a Checkpoint may have dependencies on ValidationDefinitions, which may have dependencies on ExpectationSuites and BatchDefinitions. - GX requires that all resources are added successfully before they can be used to prevent + GX requires that all resources are persisted successfully before they can be used to prevent unexpected behavior. """ - errors: list[ResourceNotAddedError] + raise_for_error_class: ClassVar[Type[ResourcesNotAddedError]] = ResourcesNotAddedError + errors: list[GreatExpectationsError] @property - def is_added(self) -> bool: + def success(self) -> bool: return len(self.errors) == 0 - @abstractmethod def raise_for_error(self) -> None: """ Conditionally raises an error if the resource has not been added successfully; should prescribe the correct action(s) to take. """ - raise NotImplementedError + if not self.success: + raise self.raise_for_error_class(errors=self.errors) @dataclass -class _ChildAddedDiagnostics(AddedDiagnostics): - @override - def raise_for_error(self) -> None: - if not self.is_added: - raise self.errors[0] # Child node so only one error +class BatchDefinitionFreshnessDiagnostics(FreshnessDiagnostics): + pass @dataclass -class BatchDefinitionAddedDiagnostics(_ChildAddedDiagnostics): +class ExpectationSuiteFreshnessDiagnostics(FreshnessDiagnostics): pass @dataclass -class ExpectationSuiteAddedDiagnostics(_ChildAddedDiagnostics): - pass +class _ParentFreshnessDiagnostics(FreshnessDiagnostics): + """ + Freshness diagnostics for a class that has a natural parent/child relationship + with other classes. + All errors throughout the hierarchy should be collected in the parent diagnostics object. + """ -@dataclass -class _ParentAddedDiagnostics(AddedDiagnostics): - parent_error_class: ClassVar[Type[ResourceNotAddedError]] - children_error_classes: ClassVar[Tuple[Type[ResourceNotAddedError], ...]] - raise_for_error_class: ClassVar[Type[ResourcesNotAddedError]] + parent_error_class: ClassVar[Type[GreatExpectationsError]] + children_error_classes: ClassVar[Tuple[Type[GreatExpectationsError], ...]] - def update_with_children(self, *children_diagnostics: AddedDiagnostics) -> None: + def update_with_children(self, *children_diagnostics: FreshnessDiagnostics) -> None: for diagnostics in children_diagnostics: # Child errors should be prepended to parent errors so diagnostics are in order self.errors = diagnostics.errors + self.errors @@ -85,14 +84,14 @@ def children_added(self) -> bool: @override def raise_for_error(self) -> None: - if not self.is_added: + if not self.success: raise self.raise_for_error_class(errors=self.errors) @dataclass -class ValidationDefinitionAddedDiagnostics(_ParentAddedDiagnostics): - parent_error_class: ClassVar[Type[ResourceNotAddedError]] = ValidationDefinitionNotAddedError - children_error_classes: ClassVar[Tuple[Type[ResourceNotAddedError], ...]] = ( +class ValidationDefinitionFreshnessDiagnostics(_ParentFreshnessDiagnostics): + parent_error_class: ClassVar[Type[GreatExpectationsError]] = ValidationDefinitionNotAddedError + children_error_classes: ClassVar[Tuple[Type[GreatExpectationsError], ...]] = ( ExpectationSuiteNotAddedError, BatchDefinitionNotAddedError, ) @@ -102,9 +101,9 @@ class ValidationDefinitionAddedDiagnostics(_ParentAddedDiagnostics): @dataclass -class CheckpointAddedDiagnostics(_ParentAddedDiagnostics): - parent_error_class: ClassVar[Type[ResourceNotAddedError]] = CheckpointNotAddedError - children_error_classes: ClassVar[Tuple[Type[ResourceNotAddedError], ...]] = ( +class CheckpointFreshnessDiagnostics(_ParentFreshnessDiagnostics): + parent_error_class: ClassVar[Type[GreatExpectationsError]] = CheckpointNotAddedError + children_error_classes: ClassVar[Tuple[Type[GreatExpectationsError], ...]] = ( ValidationDefinitionNotAddedError, ) raise_for_error_class: ClassVar[Type[ResourcesNotAddedError]] = ( diff --git a/great_expectations/core/validation_definition.py b/great_expectations/core/validation_definition.py index 2e04809601bd..6b27efbba2f4 100644 --- a/great_expectations/core/validation_definition.py +++ b/great_expectations/core/validation_definition.py @@ -12,13 +12,13 @@ validator, ) from great_expectations.constants import DATAFRAME_REPLACEMENT_STR -from great_expectations.core.added_diagnostics import ( - ValidationDefinitionAddedDiagnostics, -) from great_expectations.core.batch_definition import BatchDefinition from great_expectations.core.expectation_suite import ( ExpectationSuite, ) +from great_expectations.core.freshness_diagnostics import ( + ValidationDefinitionFreshnessDiagnostics, +) from great_expectations.core.result_format import DEFAULT_RESULT_FORMAT from great_expectations.core.run_identifier import RunIdentifier from great_expectations.core.serdes import _EncodedValidationData, _IdentifierBundle @@ -123,12 +123,12 @@ def data_source(self) -> Datasource: def _validation_results_store(self) -> ValidationResultsStore: return project_manager.get_validation_results_store() - def is_added(self) -> ValidationDefinitionAddedDiagnostics: - validation_definition_diagnostics = ValidationDefinitionAddedDiagnostics( + def is_fresh(self) -> ValidationDefinitionFreshnessDiagnostics: + validation_definition_diagnostics = ValidationDefinitionFreshnessDiagnostics( errors=[] if self.id else [ValidationDefinitionNotAddedError(name=self.name)] ) - suite_diagnostics = self.suite.is_added() - data_diagnostics = self.data.is_added() + suite_diagnostics = self.suite.is_fresh() + data_diagnostics = self.data.is_fresh() validation_definition_diagnostics.update_with_children(suite_diagnostics, data_diagnostics) return validation_definition_diagnostics @@ -243,8 +243,8 @@ def run( run_id: An identifier for this run. Typically, this should be set to None and it will be generated by this call. """ - diagnostics = self.is_added() - if not diagnostics.is_added: + diagnostics = self.is_fresh() + if not diagnostics.success: # The validation definition itself is not added but all children are - we can add it for the user # noqa: E501 if not diagnostics.parent_added and diagnostics.children_added: self._add_to_store() @@ -326,7 +326,7 @@ def _get_expectation_suite_and_validation_result_ids( def identifier_bundle(self) -> _IdentifierBundle: # Utilized as a custom json_encoder - diagnostics = self.is_added() + diagnostics = self.is_fresh() diagnostics.raise_for_error() return _IdentifierBundle(name=self.name, id=self.id) diff --git a/great_expectations/data_context/store/expectations_store.py b/great_expectations/data_context/store/expectations_store.py index 211dc8f0f00c..b6a1316837d5 100644 --- a/great_expectations/data_context/store/expectations_store.py +++ b/great_expectations/data_context/store/expectations_store.py @@ -341,6 +341,12 @@ def deserialize(self, value): # type: ignore[explicit-override] # FIXME else: raise TypeError(f"Cannot deserialize value of unknown type: {type(value)}") # noqa: TRY003 + def deserialize_suite_dict(self, suite_dict: dict) -> ExpectationSuite: + suite = ExpectationSuite(**suite_dict) + if suite._include_rendered_content: + suite.render() + return suite + def get_key( self, name: str, id: Optional[str] = None ) -> GXCloudIdentifier | ExpectationSuiteIdentifier: diff --git a/great_expectations/exceptions/exceptions.py b/great_expectations/exceptions/exceptions.py index ea35330175b9..4d6a27bfbf99 100644 --- a/great_expectations/exceptions/exceptions.py +++ b/great_expectations/exceptions/exceptions.py @@ -20,6 +20,16 @@ def __init__(self, message) -> None: super().__init__(message) +class GreatExpectationsAggregateError(ValueError): + def __init__(self, errors: list[GreatExpectationsError]) -> None: + self._errors = errors + super().__init__("\n\t" + "\n\t".join(str(e) for e in errors)) + + @property + def errors(self) -> list[GreatExpectationsError]: + return self._errors + + class GreatExpectationsValidationError(ValidationError, GreatExpectationsError): def __init__(self, message, validation_error=None) -> None: self.message = message @@ -42,14 +52,8 @@ class ResourceNotAddedError(DataContextError): pass -class ResourcesNotAddedError(ValueError): - def __init__(self, errors: list[ResourceNotAddedError]) -> None: - self._errors = errors - super().__init__("\n\t" + "\n\t".join(str(e) for e in errors)) - - @property - def errors(self) -> list[ResourceNotAddedError]: - return self._errors +class ResourcesNotAddedError(GreatExpectationsAggregateError): + pass class ExpectationSuiteError(DataContextError): @@ -65,6 +69,14 @@ def __init__(self, name: str) -> None: ) +class ExpectationSuiteNotFreshError(ResourceNotAddedError): + def __init__(self, name: str) -> None: + super().__init__( + f"ExpectationSuite '{name}' has changed since it has last been saved. " + "Please update with `.save()`, then try your action again." + ) + + class ValidationDefinitionError(DataContextError): pass @@ -360,10 +372,10 @@ def __init__(self, module_name, package_name, class_name) -> None: class ExpectationSuiteNotFoundError(GreatExpectationsError): - def __init__(self, data_asset_name) -> None: - self.data_asset_name = data_asset_name - self.message = f"No expectation suite found for data_asset_name {data_asset_name}" - super().__init__(self.message) + def __init__(self, name: str) -> None: + super().__init__( + f"ExpectationSuite '{name}' not found. Please check the name and try again." + ) class BatchDefinitionError(DataContextError): @@ -372,6 +384,13 @@ def __init__(self, message) -> None: super().__init__(self.message) +class BatchDefinitionNotFoundError(BatchDefinitionError): + def __init__(self, name: str) -> None: + super().__init__( + f"BatchDefinition '{name}' not found. Please check the name and try again." + ) + + class BatchDefinitionNotAddedError(ResourceNotAddedError): def __init__(self, name: str) -> None: super().__init__( @@ -380,6 +399,14 @@ def __init__(self, name: str) -> None: ) +class BatchDefinitionNotFreshError(ResourceNotAddedError): + def __init__(self, name: str) -> None: + super().__init__( + f"BatchDefinition '{name}' has changed since it has last been saved. " + "Please update using the parent asset or data source, then try your action again." + ) + + class BatchSpecError(DataContextError): def __init__(self, message) -> None: self.message = message @@ -404,6 +431,10 @@ class DatasourceNotFoundError(DataContextError): pass +class DataAssetNotFoundError(DataContextError): + pass + + class DataAssetInitializationError(GreatExpectationsError): def __init__(self, message: str) -> None: self.message = f"Cannot initialize data asset: {message}" diff --git a/tests/checkpoint/test_checkpoint.py b/tests/checkpoint/test_checkpoint.py index 4ebd42c6c1d0..157689253caf 100644 --- a/tests/checkpoint/test_checkpoint.py +++ b/tests/checkpoint/test_checkpoint.py @@ -26,17 +26,17 @@ ) from great_expectations.compatibility.pydantic import ValidationError from great_expectations.constants import DATAFRAME_REPLACEMENT_STR -from great_expectations.core.added_diagnostics import ( - BatchDefinitionAddedDiagnostics, - ExpectationSuiteAddedDiagnostics, - ValidationDefinitionAddedDiagnostics, -) from great_expectations.core.batch_definition import BatchDefinition from great_expectations.core.expectation_suite import ExpectationSuite from great_expectations.core.expectation_validation_result import ( ExpectationSuiteValidationResult, ExpectationValidationResult, ) +from great_expectations.core.freshness_diagnostics import ( + BatchDefinitionFreshnessDiagnostics, + ExpectationSuiteFreshnessDiagnostics, + ValidationDefinitionFreshnessDiagnostics, +) from great_expectations.core.result_format import ResultFormat from great_expectations.core.run_identifier import RunIdentifier from great_expectations.core.validation_definition import ValidationDefinition @@ -186,8 +186,8 @@ def validation_definition_1( return_value=json.dumps({"id": str(uuid.uuid4()), "name": name}), ), mock.patch.object( ValidationDefinition, - "is_added", - return_value=ValidationDefinitionAddedDiagnostics(errors=[]), + "is_fresh", + return_value=ValidationDefinitionFreshnessDiagnostics(errors=[]), ): yield in_memory_context.validation_definitions.add(vc) @@ -207,8 +207,8 @@ def validation_definition_2( return_value=json.dumps({"id": str(uuid.uuid4()), "name": name}), ), mock.patch.object( ValidationDefinition, - "is_added", - return_value=ValidationDefinitionAddedDiagnostics(errors=[]), + "is_fresh", + return_value=ValidationDefinitionFreshnessDiagnostics(errors=[]), ): yield in_memory_context.validation_definitions.add(vc) @@ -498,13 +498,15 @@ class TestCheckpointResult: def mock_suite(self, mocker: MockerFixture): suite = mocker.Mock(spec=ExpectationSuite) suite.name = self.suite_name - suite.is_added.return_value = ExpectationSuiteAddedDiagnostics(errors=[]) + suite.is_fresh.return_value = ExpectationSuiteFreshnessDiagnostics(errors=[]) return suite @pytest.fixture def mock_batch_def(self, mocker: MockerFixture): bd = mocker.Mock(spec=BatchDefinition) - bd._copy_and_set_values().is_added.return_value = BatchDefinitionAddedDiagnostics(errors=[]) + bd._copy_and_set_values().is_fresh.return_value = BatchDefinitionFreshnessDiagnostics( + errors=[] + ) return bd @pytest.fixture @@ -921,7 +923,7 @@ def test_checkpoint_run_adds_requisite_ids(self, tmp_path: pathlib.Path): @pytest.mark.parametrize( - "id,validation_def_id,suite_id,batch_def_id,is_added,error_list", + "id,validation_def_id,suite_id,batch_def_id,is_fresh,error_list", [ pytest.param( str(uuid.uuid4()), @@ -1087,14 +1089,26 @@ def test_checkpoint_run_adds_requisite_ids(self, tmp_path: pathlib.Path): ], ) @pytest.mark.unit -def test_is_added( +def test_is_fresh( + in_memory_runtime_context, id: str | None, validation_def_id: str | None, suite_id: str | None, batch_def_id: str | None, - is_added: bool, + is_fresh: bool, error_list: list[Type[ResourceNotAddedError]], ): + context = in_memory_runtime_context + batch_definition = ( + context.data_sources.add_pandas(name="my_pandas_ds") + .add_csv_asset(name="my_csv_asset", filepath_or_buffer="data.csv") + .add_batch_definition(name="my_batch_def") + ) + batch_definition.id = batch_def_id # Fluent API will add an ID but manually overriding for test + + suite = context.suites.add(ExpectationSuite(name="my_suite")) + suite.id = suite_id # Store will add an ID but manually overriding for test + checkpoint = Checkpoint( name="my_checkpoint", id=id, @@ -1102,12 +1116,12 @@ def test_is_added( ValidationDefinition( name="my_validation_definition", id=validation_def_id, - suite=ExpectationSuite(name="my_suite", id=suite_id), - data=BatchDefinition(name="my_batch_def", id=batch_def_id), + suite=suite, + data=batch_definition, ) ], ) - diagnostics = checkpoint.is_added() + diagnostics = checkpoint.is_fresh() - assert diagnostics.is_added is is_added + assert diagnostics.success is is_fresh assert [type(err) for err in diagnostics.errors] == error_list diff --git a/tests/core/factory/test_suite_factory.py b/tests/core/factory/test_suite_factory.py index 9387d4a8cc2b..0b7179eac0fa 100644 --- a/tests/core/factory/test_suite_factory.py +++ b/tests/core/factory/test_suite_factory.py @@ -36,11 +36,11 @@ def test_suite_factory_get_uses_store_get(): set_context(context) # Act - result = factory.get(name=name) + factory.get(name=name) # Assert store.get.assert_called_once_with(key=key) - assert result == ExpectationSuite(name=name) + store.deserialize_suite_dict.assert_called_once_with(suite_dict) @pytest.mark.unit diff --git a/tests/core/test_batch_definition.py b/tests/core/test_batch_definition.py index 82a60906f5ab..4fcd9bf17609 100644 --- a/tests/core/test_batch_definition.py +++ b/tests/core/test_batch_definition.py @@ -13,6 +13,8 @@ from great_expectations.datasource.fluent.interfaces import Batch, DataAsset from great_expectations.exceptions.exceptions import ( BatchDefinitionNotAddedError, + BatchDefinitionNotFreshError, + ResourcesNotAddedError, ) if TYPE_CHECKING: @@ -115,22 +117,54 @@ def test_identifier_bundle_no_id_raises_error(in_memory_runtime_context): batch_definition.id = None - with pytest.raises(BatchDefinitionNotAddedError): + with pytest.raises(ResourcesNotAddedError) as e: batch_definition.identifier_bundle() + assert len(e.value.errors) == 1 + assert isinstance(e.value.errors[0], BatchDefinitionNotAddedError) + @pytest.mark.parametrize( - "id,is_added,num_errors", + "id,is_fresh,num_errors", [ pytest.param(str(uuid.uuid4()), True, 0, id="added"), pytest.param(None, False, 1, id="not_added"), ], ) @pytest.mark.unit -def test_is_added(id: str | None, is_added: bool, num_errors: int): - batch_definition = BatchDefinition(name="my_batch_def", id=id) - diagnostics = batch_definition.is_added() +def test_is_fresh_is_added( + in_memory_runtime_context, id: str | None, is_fresh: bool, num_errors: int +): + context = in_memory_runtime_context + batch_definition = ( + context.data_sources.add_pandas(name="my_pandas_ds") + .add_csv_asset(name="my_csv_asset", filepath_or_buffer="data.csv") + .add_batch_definition(name="my_batch_def") + ) + batch_definition.id = id # Fluent API will add an ID but manually overriding for test + diagnostics = batch_definition.is_fresh() - assert diagnostics.is_added is is_added + assert diagnostics.success is is_fresh assert len(diagnostics.errors) == num_errors assert all(isinstance(err, BatchDefinitionNotAddedError) for err in diagnostics.errors) + + +@pytest.mark.cloud +def test_is_fresh_freshness(empty_cloud_context_fluent): + # Ephemeral/file use a cacheable datasource dict so freshness + # with batch definitions is a Cloud-only concern + context = empty_cloud_context_fluent + batch_definition = ( + context.data_sources.add_pandas(name="my_pandas_ds") + .add_csv_asset(name="my_csv_asset", filepath_or_buffer="data.csv") + .add_batch_definition(name="my_batch_def") + ) + + batching_regex = re.compile(r"data_(?P\d{4})-(?P\d{2}).csv") + partitioner = FileNamePartitionerYearly(regex=batching_regex) + batch_definition.partitioner = partitioner + + diagnostics = batch_definition.is_fresh() + assert diagnostics.success is False + assert len(diagnostics.errors) == 1 + assert isinstance(diagnostics.errors[0], BatchDefinitionNotFreshError) diff --git a/tests/core/test_expectation_suite.py b/tests/core/test_expectation_suite.py index a8b523bfab7e..7199c7743141 100644 --- a/tests/core/test_expectation_suite.py +++ b/tests/core/test_expectation_suite.py @@ -93,14 +93,6 @@ def suite_with_single_expectation( class TestInit: """Tests related to ExpectationSuite.__init__()""" - @pytest.mark.unit - def test_instantiate_with_no_context_raises(self): - set_context(None) - with pytest.raises(gx_exceptions.DataContextRequiredError): - ExpectationSuite( - name="i've made a huge mistake", - ) - @pytest.mark.unit def test_expectation_suite_init_defaults( self, @@ -1077,11 +1069,14 @@ def test_expectation_save_callback_emits_event( @pytest.mark.unit -def test_identifier_bundle_with_existing_id(): - suite = ExpectationSuite(name="my_suite", id="fa34fbb7-124d-42ff-9760-e410ee4584a0") +def test_identifier_bundle_with_existing_id(in_memory_runtime_context): + context = in_memory_runtime_context + suite = context.suites.add(ExpectationSuite(name="my_suite")) + suite_id = suite.id assert suite.identifier_bundle() == _IdentifierBundle( - name="my_suite", id="fa34fbb7-124d-42ff-9760-e410ee4584a0" + name="my_suite", + id=suite_id, ) @@ -1090,23 +1085,30 @@ def test_identifier_bundle_no_id_raises_error(): _ = gx.get_context(mode="ephemeral") suite = ExpectationSuite(name="my_suite", id=None) - with pytest.raises(gx_exceptions.ExpectationSuiteNotAddedError): + with pytest.raises(gx_exceptions.ResourcesNotAddedError) as e: suite.identifier_bundle() + assert len(e.value.errors) == 1 + assert isinstance(e.value.errors[0], gx_exceptions.ExpectationSuiteNotAddedError) + @pytest.mark.parametrize( - "id,is_added,num_errors", + "id,is_fresh,num_errors", [ pytest.param(str(uuid.uuid4()), True, 0, id="added"), pytest.param(None, False, 1, id="not_added"), ], ) @pytest.mark.unit -def test_is_added(id: str | None, is_added: bool, num_errors: int): - suite = ExpectationSuite(name="my_suite", id=id) - diagnostics = suite.is_added() +def test_is_fresh_is_added( + in_memory_runtime_context, id: str | None, is_fresh: bool, num_errors: int +): + context = in_memory_runtime_context + suite = context.suites.add(ExpectationSuite(name="my_suite")) + suite.id = id # Stores will add an ID but manually overriding for test + diagnostics = suite.is_fresh() - assert diagnostics.is_added is is_added + assert diagnostics.success is is_fresh assert len(diagnostics.errors) == num_errors assert all( isinstance(err, gx_exceptions.ExpectationSuiteNotAddedError) for err in diagnostics.errors @@ -1181,3 +1183,17 @@ def test_save_on_individual_expectation_updates_rendered_content( 4, 5, ] + + +@pytest.mark.unit +def test_is_fresh_freshness(in_memory_runtime_context): + context = in_memory_runtime_context + + suite = context.suites.add(ExpectationSuite(name="my_suite")) + + suite.expectations = [gxe.ExpectColumnDistinctValuesToBeInSet(column="a", value_set=[1, 2, 3])] + + diagnostics = suite.is_fresh() + assert diagnostics.success is False + assert len(diagnostics.errors) == 1 + assert isinstance(diagnostics.errors[0], gx_exceptions.ExpectationSuiteNotFreshError) diff --git a/tests/core/test_validation_definition.py b/tests/core/test_validation_definition.py index 81f89cd0ab01..1258f7ec19cb 100644 --- a/tests/core/test_validation_definition.py +++ b/tests/core/test_validation_definition.py @@ -392,6 +392,7 @@ def test_persists_validation_results_for_cloud( ): expectation = gxe.ExpectColumnMaxToBeBetween(column="foo", max_value=1) cloud_validation_definition.suite.add_expectation(expectation=expectation) + cloud_validation_definition.suite.save() mock_validator.graph_validate.return_value = [ ExpectationValidationResult(success=True, expectation_config=expectation.configuration) ] @@ -415,6 +416,7 @@ def test_cloud_validation_def_creates_rendered_content( ): expectation = gxe.ExpectColumnMaxToBeBetween(column="foo", max_value=1) cloud_validation_definition.suite.add_expectation(expectation=expectation) + cloud_validation_definition.suite.save() mock_validator.graph_validate.return_value = [ ExpectationValidationResult(success=True, expectation_config=expectation.configuration) ] @@ -790,7 +792,7 @@ def test_save_success(mocker: MockerFixture, validation_definition: ValidationDe @pytest.mark.parametrize( - "id,suite_id,batch_def_id,is_added,error_list", + "id,suite_id,batch_def_id,is_fresh,error_list", [ pytest.param( str(uuid.uuid4()), @@ -863,20 +865,33 @@ def test_save_success(mocker: MockerFixture, validation_definition: ValidationDe ], ) @pytest.mark.unit -def test_is_added( +def test_is_fresh( + in_memory_runtime_context, id: str | None, suite_id: str | None, batch_def_id: str | None, - is_added: bool, + is_fresh: bool, error_list: list[Type[ResourceNotAddedError]], ): + context = in_memory_runtime_context + + batch_definition = ( + context.data_sources.add_pandas(name="my_pandas_ds") + .add_csv_asset(name="my_csv_asset", filepath_or_buffer="data.csv") + .add_batch_definition(name="my_batch_def") + ) + batch_definition.id = batch_def_id # Fluent API will add an ID but manually overriding for test + + suite = context.suites.add(ExpectationSuite(name="my_suite")) + suite.id = suite_id # Store will add an ID but manually overriding for test + validation_definition = ValidationDefinition( name="my_validation_definition", id=id, - suite=ExpectationSuite(name="my_suite", id=suite_id), - data=BatchDefinition(name="my_batch_def", id=batch_def_id), + suite=suite, + data=batch_definition, ) - diagnostics = validation_definition.is_added() + diagnostics = validation_definition.is_fresh() - assert diagnostics.is_added is is_added + assert diagnostics.success is is_fresh assert [type(err) for err in diagnostics.errors] == error_list diff --git a/tests/datasource/fluent/integration/integration_test_utils.py b/tests/datasource/fluent/integration/integration_test_utils.py index fc03249d3e5e..00dc346a6488 100644 --- a/tests/datasource/fluent/integration/integration_test_utils.py +++ b/tests/datasource/fluent/integration/integration_test_utils.py @@ -49,6 +49,7 @@ def run_checkpoint_and_data_doc( validator.expect_column_median_to_be_between(column="passenger_count", min_value=1, max_value=4) suite = validator.expectation_suite + suite.save() batch_def = asset.add_batch_definition(name="my_batch_definition") # Configure and run a checkpoint