-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[MAINTENANCE] Add tests around expectations (#10688)
- Loading branch information
1 parent
50ea23a
commit 44382ef
Showing
72 changed files
with
5,195 additions
and
5,287 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 4 additions & 0 deletions
4
tests/integration/data_sources_and_expectations/expectations/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
"""Tests around individual expectations. | ||
Files here should have a 1:1 relationship with the expectations they test. | ||
""" |
107 changes: 107 additions & 0 deletions
107
..._sources_and_expectations/expectations/test_expect_column_distinct_values_to_be_in_set.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
from datetime import datetime | ||
|
||
import pandas as pd | ||
import pytest | ||
|
||
import great_expectations.expectations as gxe | ||
from great_expectations.core.result_format import ResultFormat | ||
from great_expectations.datasource.fluent.interfaces import Batch | ||
from tests.integration.conftest import parameterize_batch_for_data_sources | ||
from tests.integration.data_sources_and_expectations.test_canonical_expectations import ( | ||
ALL_DATA_SOURCES, | ||
DATA_SOURCES_THAT_SUPPORT_DATE_COMPARISONS, | ||
JUST_PANDAS_DATA_SOURCES, | ||
) | ||
|
||
COL_NAME = "my_col" | ||
|
||
ONES_AND_TWOS = pd.DataFrame({COL_NAME: [1, 2, 2, 2]}) | ||
|
||
|
||
@parameterize_batch_for_data_sources(data_source_configs=ALL_DATA_SOURCES, data=ONES_AND_TWOS) | ||
def test_success_complete_results(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToBeInSet(column=COL_NAME, value_set=[1, 2]) | ||
result = batch_for_datasource.validate(expectation, result_format=ResultFormat.COMPLETE) | ||
assert result.success | ||
assert result.to_json_dict()["result"] == { | ||
"details": { | ||
"value_counts": [ | ||
{"value": 1, "count": 1}, | ||
{"value": 2, "count": 3}, | ||
] | ||
}, | ||
"observed_value": [1, 2], | ||
} | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=ALL_DATA_SOURCES, | ||
data=pd.DataFrame({COL_NAME: ["foo", "bar"]}), | ||
) | ||
def test_strings(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToBeInSet( | ||
column=COL_NAME, value_set=["foo", "bar", "baz"] | ||
) | ||
result = batch_for_datasource.validate(expectation) | ||
assert result.success | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=DATA_SOURCES_THAT_SUPPORT_DATE_COMPARISONS, | ||
data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()]}), # noqa: DTZ001 | ||
) | ||
def test_dates(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToBeInSet( | ||
column=COL_NAME, | ||
value_set=[datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()], # noqa: DTZ001 | ||
) | ||
result = batch_for_datasource.validate(expectation) | ||
assert result.success | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=JUST_PANDAS_DATA_SOURCES, data=pd.DataFrame({COL_NAME: [1, 2, None]}) | ||
) | ||
def test_ignores_nulls(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToBeInSet(column=COL_NAME, value_set=[1, 2]) | ||
result = batch_for_datasource.validate(expectation) | ||
assert result.success | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=JUST_PANDAS_DATA_SOURCES, data=ONES_AND_TWOS | ||
) | ||
def test_data_is_subset(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToBeInSet(column=COL_NAME, value_set=[1, 2, 3]) | ||
result = batch_for_datasource.validate(expectation) | ||
assert result.success | ||
|
||
|
||
@pytest.mark.xfail(strict=True) | ||
@parameterize_batch_for_data_sources( | ||
data_source_configs=JUST_PANDAS_DATA_SOURCES, data=ONES_AND_TWOS | ||
) | ||
def test_empty_value_set(batch_for_datasource: Batch) -> None: | ||
"""Failing test that seems like a (pretty minor) bug""" | ||
expectation = gxe.ExpectColumnDistinctValuesToBeInSet(column=COL_NAME, value_set=[]) | ||
result = batch_for_datasource.validate(expectation) | ||
assert not result.success | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=JUST_PANDAS_DATA_SOURCES, data=ONES_AND_TWOS | ||
) | ||
def test_value_set_is_none(batch_for_datasource: Batch) -> None: | ||
# why do we even allow this?!? | ||
expectation = gxe.ExpectColumnDistinctValuesToBeInSet(column=COL_NAME, value_set=None) | ||
result = batch_for_datasource.validate(expectation) | ||
assert result.success | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=JUST_PANDAS_DATA_SOURCES, data=ONES_AND_TWOS | ||
) | ||
def test_failure(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToBeInSet(column=COL_NAME, value_set=[1]) | ||
result = batch_for_datasource.validate(expectation) | ||
assert not result.success |
83 changes: 83 additions & 0 deletions
83
...ources_and_expectations/expectations/test_expect_column_distinct_values_to_contain_set.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
from datetime import datetime | ||
|
||
import pandas as pd | ||
|
||
import great_expectations.expectations as gxe | ||
from great_expectations.core.result_format import ResultFormat | ||
from great_expectations.datasource.fluent.interfaces import Batch | ||
from tests.integration.conftest import parameterize_batch_for_data_sources | ||
from tests.integration.data_sources_and_expectations.test_canonical_expectations import ( | ||
ALL_DATA_SOURCES, | ||
DATA_SOURCES_THAT_SUPPORT_DATE_COMPARISONS, | ||
JUST_PANDAS_DATA_SOURCES, | ||
) | ||
|
||
COL_NAME = "my_col" | ||
|
||
ONES_AND_TWOS = pd.DataFrame({COL_NAME: [1, 2, 2, 2]}) | ||
|
||
|
||
@parameterize_batch_for_data_sources(data_source_configs=ALL_DATA_SOURCES, data=ONES_AND_TWOS) | ||
def test_success_complete_results(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToContainSet(column=COL_NAME, value_set=[1, 2]) | ||
result = batch_for_datasource.validate(expectation, result_format=ResultFormat.COMPLETE) | ||
assert result.success | ||
assert result.to_json_dict()["result"] == { | ||
"details": { | ||
"value_counts": [ | ||
{"value": 1, "count": 1}, | ||
{"value": 2, "count": 3}, | ||
] | ||
}, | ||
"observed_value": [1, 2], | ||
} | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=ALL_DATA_SOURCES, | ||
data=pd.DataFrame({COL_NAME: ["foo", "bar"]}), | ||
) | ||
def test_strings(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToContainSet(column=COL_NAME, value_set=["foo"]) | ||
result = batch_for_datasource.validate(expectation) | ||
assert result.success | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=DATA_SOURCES_THAT_SUPPORT_DATE_COMPARISONS, | ||
data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()]}), # noqa: DTZ001 | ||
) | ||
def test_dates(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToContainSet( | ||
column=COL_NAME, | ||
value_set=[datetime(2024, 11, 19).date()], # noqa: DTZ001 | ||
) | ||
result = batch_for_datasource.validate(expectation) | ||
assert result.success | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=JUST_PANDAS_DATA_SOURCES, data=pd.DataFrame({COL_NAME: [1, 2, None]}) | ||
) | ||
def test_ignores_nulls(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToContainSet(column=COL_NAME, value_set=[1, 2]) | ||
result = batch_for_datasource.validate(expectation) | ||
assert result.success | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=JUST_PANDAS_DATA_SOURCES, data=pd.DataFrame({COL_NAME: [1, 2, None]}) | ||
) | ||
def test_data_is_superset(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToContainSet(column=COL_NAME, value_set=[1]) | ||
result = batch_for_datasource.validate(expectation) | ||
assert result.success | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=JUST_PANDAS_DATA_SOURCES, data=ONES_AND_TWOS | ||
) | ||
def test_failure(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToContainSet(column=COL_NAME, value_set=[1, 2, 3]) | ||
result = batch_for_datasource.validate(expectation) | ||
assert not result.success |
81 changes: 81 additions & 0 deletions
81
..._sources_and_expectations/expectations/test_expect_column_distinct_values_to_equal_set.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
from datetime import datetime | ||
from typing import Optional | ||
|
||
import pandas as pd | ||
import pytest | ||
|
||
import great_expectations.expectations as gxe | ||
from great_expectations.core.result_format import ResultFormat | ||
from great_expectations.datasource.fluent.interfaces import Batch | ||
from tests.integration.conftest import parameterize_batch_for_data_sources | ||
from tests.integration.data_sources_and_expectations.test_canonical_expectations import ( | ||
ALL_DATA_SOURCES, | ||
DATA_SOURCES_THAT_SUPPORT_DATE_COMPARISONS, | ||
JUST_PANDAS_DATA_SOURCES, | ||
) | ||
|
||
COL_NAME = "my_col" | ||
|
||
ONES_AND_TWOS = pd.DataFrame({COL_NAME: [1, 2, 2, 2]}) | ||
|
||
|
||
@parameterize_batch_for_data_sources(data_source_configs=ALL_DATA_SOURCES, data=ONES_AND_TWOS) | ||
def test_success_complete_results(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToEqualSet(column=COL_NAME, value_set=[1, 2]) | ||
result = batch_for_datasource.validate(expectation, result_format=ResultFormat.COMPLETE) | ||
assert result.success | ||
assert result.to_json_dict()["result"] == { | ||
"details": { | ||
"value_counts": [ | ||
{"value": 1, "count": 1}, | ||
{"value": 2, "count": 3}, | ||
] | ||
}, | ||
"observed_value": [1, 2], | ||
} | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=ALL_DATA_SOURCES, | ||
data=pd.DataFrame({COL_NAME: ["foo", "bar"]}), | ||
) | ||
def test_strings(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToEqualSet( | ||
column=COL_NAME, value_set=["foo", "bar"] | ||
) | ||
result = batch_for_datasource.validate(expectation) | ||
assert result.success | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=DATA_SOURCES_THAT_SUPPORT_DATE_COMPARISONS, | ||
data=pd.DataFrame({COL_NAME: [datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()]}), # noqa: DTZ001 | ||
) | ||
def test_dates(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToEqualSet( | ||
column=COL_NAME, | ||
value_set=[datetime(2024, 11, 19).date(), datetime(2024, 11, 20).date()], # noqa: DTZ001 | ||
) | ||
result = batch_for_datasource.validate(expectation) | ||
assert result.success | ||
|
||
|
||
@parameterize_batch_for_data_sources( | ||
data_source_configs=JUST_PANDAS_DATA_SOURCES, data=pd.DataFrame({COL_NAME: [1, 2, None]}) | ||
) | ||
def test_ignores_nulls(batch_for_datasource: Batch) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToEqualSet(column=COL_NAME, value_set=[1, 2]) | ||
result = batch_for_datasource.validate(expectation) | ||
assert result.success | ||
|
||
|
||
@pytest.mark.parametrize("value_set", [None, [], [1], [1, 4], [1, 2, 3]]) | ||
@parameterize_batch_for_data_sources( | ||
data_source_configs=JUST_PANDAS_DATA_SOURCES, data=ONES_AND_TWOS | ||
) | ||
def test_fails_if_data_is_not_equal( | ||
batch_for_datasource: Batch, value_set: Optional[list[int]] | ||
) -> None: | ||
expectation = gxe.ExpectColumnDistinctValuesToEqualSet(column=COL_NAME, value_set=value_set) | ||
result = batch_for_datasource.validate(expectation) | ||
assert not result.success |
Oops, something went wrong.