Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Implement suite factory add_or_update #10796

Merged
merged 7 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions great_expectations/core/factory/suite_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def _include_rendered_content(self) -> bool:
def add(self, suite: ExpectationSuite) -> ExpectationSuite:
"""Add an ExpectationSuite to the collection.

Parameters:
Args:
suite: ExpectationSuite to add

Raises:
Expand Down Expand Up @@ -62,7 +62,7 @@ def add(self, suite: ExpectationSuite) -> ExpectationSuite:
def delete(self, name: str) -> None:
"""Delete an ExpectationSuite from the collection.

Parameters:
Args:
name: The name of the ExpectationSuite to delete

Raises:
Expand All @@ -89,7 +89,7 @@ def delete(self, name: str) -> None:
def get(self, name: str) -> ExpectationSuite:
"""Get an ExpectationSuite from the collection by name.

Parameters:
Args:
name: Name of ExpectationSuite to get

Raises:
Expand Down Expand Up @@ -125,3 +125,33 @@ def all(self) -> Iterable[ExpectationSuite]:
self._store.submit_all_deserialization_event(e)
raise
return deserializable_suites

@public_api
def add_or_update(self, suite: ExpectationSuite) -> ExpectationSuite:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add a docstring here please?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for catching! meant to do that and missed it. added in 5acc32a

"""Add or update an ExpectationSuite by name.

If an ExpectationSuite with the same name exists, overwrite it, otherwise
create a new ExpectationSuite. On update, Expectations in the Suite which
match a previously existing Expectation maintain a stable ID, and
Expectations which have changed receive a new ID.

Args:
suite: ExpectationSuite to add or update
"""
try:
existing_suite = self.get(name=suite.name)
except DataContextError:
return self.add(suite=suite)

# add IDs to expectations that haven't changed
existing_expectations = existing_suite.expectations
for expectation in suite.expectations:
try:
index = existing_expectations.index(expectation)
expectation.id = existing_expectations[index].id
except ValueError:
pass # expectation is new or updated

suite.id = existing_suite.id
suite.save()
return suite
267 changes: 266 additions & 1 deletion tests/core/factory/test_suite_factory.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import re
from copy import copy
from typing import Dict
from unittest import mock
from unittest.mock import (
ANY,
Mock, # noqa: TID251
)
from unittest.mock import ANY as ANY_TEST_ARG
from unittest.mock import Mock # noqa: TID251

import pytest
from pytest_mock import MockerFixture
Expand All @@ -19,6 +23,10 @@
from great_expectations.data_context.data_context.context_factory import set_context
from great_expectations.data_context.store import ExpectationsStore
from great_expectations.exceptions import DataContextError
from great_expectations.expectations import (
ExpectColumnDistinctValuesToContainSet,
ExpectColumnSumToBeBetween,
)
from great_expectations.types import SerializableDictDot


Expand Down Expand Up @@ -325,6 +333,263 @@ def test_suite_factory_all_with_bad_pydantic_config(
assert re.match("pydantic.*ValidationError", analytics_submit_args.error_type)


class TestSuiteFactoryAddOrUpdate:
@pytest.mark.filesystem
def test_add_empty_new_suite__filesystem(self, empty_data_context):
self._test_add_empty_new_suite(empty_data_context)

@pytest.mark.cloud
def test_add_empty_new_suite__cloud(self, empty_cloud_context_fluent):
self._test_add_empty_new_suite(empty_cloud_context_fluent)

@pytest.mark.unit
def test_add_empty_new_suite__ephemeral(self, ephemeral_context_with_defaults):
self._test_add_empty_new_suite(ephemeral_context_with_defaults)

def _test_add_empty_new_suite(self, context: AbstractDataContext):
# arrange
suite_name = "suite A"
suite = ExpectationSuite(name=suite_name)

# act
created_suite = context.suites.add_or_update(suite=suite)

# assert
assert created_suite.id
context.suites.get(suite_name)

@pytest.mark.filesystem
def test_add_new_suite_with_expectations_filesystem(self, empty_data_context):
self._test_add_new_suite_with_expectations(empty_data_context)

@pytest.mark.cloud
def test_add_new_suite_with_expectations__cloud(self, empty_cloud_context_fluent):
self._test_add_new_suite_with_expectations(empty_cloud_context_fluent)

@pytest.mark.unit
def test_add_new_suite_with_expectations__ephemeral(self, ephemeral_context_with_defaults):
self._test_add_new_suite_with_expectations(ephemeral_context_with_defaults)

def _test_add_new_suite_with_expectations(self, context: AbstractDataContext):
# arrange
suite_name = "suite A"
expectations = [
ExpectColumnSumToBeBetween(
column="col A",
min_value=0,
max_value=10,
),
ExpectColumnDistinctValuesToContainSet(
column="col B",
value_set=["a", "b", "c"],
),
]
suite = ExpectationSuite(
name=suite_name,
expectations=[copy(exp) for exp in expectations],
)

# act
created_suite = context.suites.add_or_update(suite=suite)

# assert
assert created_suite.id
context.suites.get(suite_name)
for exp, created_exp in zip(expectations, created_suite.expectations):
assert created_exp.id
exp.id = ANY
assert exp == created_exp

@pytest.mark.filesystem
def test_update_existing_suite_adds_expectations__filesystem(self, empty_data_context):
self._test_update_existing_suite_adds_expectations(empty_data_context)

@pytest.mark.cloud
def test_update_existing_suite_adds_expectations__cloud(self, empty_cloud_context_fluent):
self._test_update_existing_suite_adds_expectations(empty_cloud_context_fluent)

@pytest.mark.unit
def test_update_existing_suite_adds_expectations__ephemeral(
self, ephemeral_context_with_defaults
):
self._test_update_existing_suite_adds_expectations(ephemeral_context_with_defaults)

def _test_update_existing_suite_adds_expectations(self, context: AbstractDataContext):
# arrange
suite_name = "suite A"
expectations = [
ExpectColumnSumToBeBetween(
column="col A",
min_value=0,
max_value=10,
),
ExpectColumnDistinctValuesToContainSet(
column="col B",
value_set=["a", "b", "c"],
),
]
suite = ExpectationSuite(
name=suite_name,
expectations=[copy(exp) for exp in expectations],
)
existing_suite = context.suites.add(suite=ExpectationSuite(name=suite_name))

# act
updated_suite = context.suites.add_or_update(suite=suite)

# assert
assert updated_suite.id == existing_suite.id
for exp, created_exp in zip(expectations, updated_suite.expectations):
assert created_exp.id
exp.id = ANY
assert exp == created_exp

@pytest.mark.filesystem
def test_update_existing_suite_updates_expectations__filesystem(self, empty_data_context):
self._test_update_existing_suite_updates_expectations(empty_data_context)

@pytest.mark.cloud
def test_update_existing_suite_updates_expectations__cloud(self, empty_cloud_context_fluent):
self._test_update_existing_suite_updates_expectations(empty_cloud_context_fluent)

@pytest.mark.unit
def test_update_existing_suite_updates_expectations__ephemeral(
self, ephemeral_context_with_defaults
):
self._test_update_existing_suite_updates_expectations(ephemeral_context_with_defaults)

def _test_update_existing_suite_updates_expectations(self, context: AbstractDataContext):
# arrange
suite_name = "suite A"
expectations = [
ExpectColumnSumToBeBetween(
column="col A",
min_value=0,
max_value=10,
),
ExpectColumnDistinctValuesToContainSet(
column="col B",
value_set=["a", "b", "c"],
),
]
existing_suite = context.suites.add(
suite=ExpectationSuite(
name=suite_name,
expectations=[copy(exp) for exp in expectations],
)
)
new_col_name = "col C"
for exp in expectations:
exp.column = new_col_name
suite = ExpectationSuite(
name=suite_name,
expectations=[copy(exp) for exp in expectations],
)

# act
updated_suite = context.suites.add_or_update(suite=suite)

# assert
assert updated_suite.id == existing_suite.id
for exp, created_exp in zip(expectations, updated_suite.expectations):
assert created_exp.id
exp.id = ANY
assert exp == created_exp
assert created_exp.column == new_col_name # type: ignore[attr-defined] # column exists

for old_exp, new_exp in zip(existing_suite.expectations, updated_suite.expectations):
# expectations have been deleted and re added, not updated
assert old_exp.id != new_exp.id

@pytest.mark.filesystem
def test_update_existing_suite_deletes_expectations__filesystem(self, empty_data_context):
self._test_update_existing_suite_deletes_expectations(empty_data_context)

@pytest.mark.cloud
def test_update_existing_suite_deletes_expectations__cloud(self, empty_cloud_context_fluent):
self._test_update_existing_suite_deletes_expectations(empty_cloud_context_fluent)

@pytest.mark.unit
def test_update_existing_suite_deletes_expectations__ephemeral(
self, ephemeral_context_with_defaults
):
self._test_update_existing_suite_deletes_expectations(ephemeral_context_with_defaults)

def _test_update_existing_suite_deletes_expectations(self, context: AbstractDataContext):
# arrange
suite_name = "suite A"
expectations = [
ExpectColumnSumToBeBetween(
column="col A",
min_value=0,
max_value=10,
),
ExpectColumnDistinctValuesToContainSet(
column="col B",
value_set=["a", "b", "c"],
),
]
existing_suite = context.suites.add(
suite=ExpectationSuite(
name=suite_name,
expectations=[copy(exp) for exp in expectations],
)
)
new_col_name = "col C"
for exp in expectations:
exp.column = new_col_name
suite = ExpectationSuite(
name=suite_name,
expectations=[],
)

# act
updated_suite = context.suites.add_or_update(suite=suite)

# assert
assert updated_suite.id == existing_suite.id
assert updated_suite.expectations == []

@pytest.mark.filesystem
def test_add_or_update_is_idempotent__filesystem(self, empty_data_context):
self._test_add_or_update_is_idempotent(empty_data_context)

@pytest.mark.cloud
def test_add_or_update_is_idempotent__cloud(self, empty_cloud_context_fluent):
self._test_add_or_update_is_idempotent(empty_cloud_context_fluent)

@pytest.mark.unit
def test_add_or_update_is_idempotent__ephemeral(self, ephemeral_context_with_defaults):
self._test_add_or_update_is_idempotent(ephemeral_context_with_defaults)

def _test_add_or_update_is_idempotent(self, context: AbstractDataContext):
# arrange
suite_name = "suite A"
expectations = [
ExpectColumnSumToBeBetween(
column="col A",
min_value=0,
max_value=10,
),
ExpectColumnDistinctValuesToContainSet(
column="col B",
value_set=["a", "b", "c"],
),
]
suite = ExpectationSuite(
name=suite_name,
expectations=[copy(exp) for exp in expectations],
)

# act
suite_1 = context.suites.add_or_update(suite=suite)
suite_2 = context.suites.add_or_update(suite=suite)
suite_3 = context.suites.add_or_update(suite=suite)

# assert
assert suite_1 == suite_2 == suite_3


class TestSuiteFactoryAnalytics:
@pytest.mark.filesystem
def test_suite_factory_add_emits_event_filesystem(self, empty_data_context):
Expand Down
Loading