From c0ac013554f9e0851977aac509e43dc11ea55559 Mon Sep 17 00:00:00 2001 From: Chetan Kini Date: Mon, 16 Sep 2024 14:41:35 -0400 Subject: [PATCH 1/4] simplify id logic --- .../data_context/abstract_data_context.py | 26 +- .../test_data_context_in_code_config.py | 230 +----------------- 2 files changed, 10 insertions(+), 246 deletions(-) diff --git a/great_expectations/data_context/data_context/abstract_data_context.py b/great_expectations/data_context/data_context/abstract_data_context.py index 00aa7c6b372f..a28e49fc8d85 100644 --- a/great_expectations/data_context/data_context/abstract_data_context.py +++ b/great_expectations/data_context/data_context/abstract_data_context.py @@ -59,7 +59,7 @@ ValidationDefinitionFactory, ) from great_expectations.core.yaml_handler import YAMLHandler -from great_expectations.data_context.store import Store, TupleStoreBackend +from great_expectations.data_context.store import Store from great_expectations.data_context.templates import CONFIG_VARIABLES_TEMPLATE from great_expectations.data_context.types.base import ( DataContextConfig, @@ -228,17 +228,15 @@ def __init__(self, runtime_environment: Optional[dict] = None) -> None: self._datasource_store = self._init_datasource_store() self._init_datasources() - # Init data_context_id - self._data_context_id = self._construct_data_context_id() - - # Override the project_config data_context_id if an expectations_store was already set up - self.config.data_context_id = self._data_context_id - self._suite_parameter_dependencies: dict = {} self._init_data_source_manager() self._attach_fluent_config_datasources_and_build_data_connectors(self.fluent_config) + + # Analytics + self._data_context_id = self._construct_data_context_id() + self.config.data_context_id = self._data_context_id self._init_analytics() submit_event(event=DataContextInitializedEvent()) @@ -2030,16 +2028,10 @@ def _init_datasources(self) -> None: datasource._rebuild_asset_data_connectors() def _construct_data_context_id(self) -> uuid.UUID | None: - # Choose the id of the currently-configured expectations store, if it is a persistent store - expectations_store = self.stores[self.expectations_store_name] - if isinstance(expectations_store.store_backend, TupleStoreBackend): - # suppress_warnings since a warning will already have been issued during the store creation # noqa: E501 - # if there was an invalid store config - return expectations_store.store_backend_id_warnings_suppressed - - # Otherwise choose the id stored in the project_config - else: - return self.variables.data_context_id + if not self.variables.data_context_id: + self.variables.data_context_id = uuid.uuid4() + self.variables.save() + return self.variables.data_context_id def get_validation_result( # noqa: C901 self, diff --git a/tests/data_context/test_data_context_in_code_config.py b/tests/data_context/test_data_context_in_code_config.py index 4bfa21fbb14c..f138c241f0fe 100644 --- a/tests/data_context/test_data_context_in_code_config.py +++ b/tests/data_context/test_data_context_in_code_config.py @@ -7,7 +7,7 @@ from moto import mock_s3 from great_expectations.data_context import get_context -from great_expectations.data_context.store import StoreBackend, TupleS3StoreBackend +from great_expectations.data_context.store import StoreBackend from great_expectations.data_context.types.base import DataContextConfig @@ -119,234 +119,6 @@ def list_s3_bucket_contents(bucket: str, prefix: str) -> Set[str]: } -@pytest.mark.aws_deps -@mock_s3 -def test_DataContext_construct_data_context_id_uses_id_of_currently_configured_expectations_store( - aws_credentials, -): - """ - What does this test and why? - - A DataContext should have an id. This ID should come from either: - 1. configured expectations store store_backend_id - 2. great_expectations.yml - 3. new generated id from DataContextConfig - This test verifies that DataContext._construct_data_context_id - uses the store_backend_id from the currently configured expectations store - when instantiating the DataContext - """ - - store_backend_id_filename = StoreBackend.STORE_BACKEND_ID_KEY[0] - bucket = "leakybucket" - expectations_store_prefix = "expectations_store_prefix" - validation_results_store_prefix = "validation_results_store_prefix" - data_docs_store_prefix = "data_docs_store_prefix" - data_context_prefix = "" - - # Create a bucket in Moto's mock AWS environment - conn = boto3.resource("s3", region_name="us-east-1") - conn.create_bucket(Bucket=bucket) - - # Create a TupleS3StoreBackend - # Initialize without store_backend_id and check that the store_backend_id is generated correctly - s3_expectations_store_backend = TupleS3StoreBackend( - filepath_template="my_file_{0}", - bucket=bucket, - prefix=expectations_store_prefix, - ) - # Make sure store_backend_id is not the error string - store_error_uuid = uuid.UUID("00000000-0000-0000-0000-00000000e003") - s3_expectations_store_backend_id = s3_expectations_store_backend.store_backend_id - assert s3_expectations_store_backend_id != store_error_uuid - - # Make sure the bucket contents are as expected - bucket_contents_after_creating_expectation_store = list_s3_bucket_contents( - bucket=bucket, prefix=data_context_prefix - ) - assert bucket_contents_after_creating_expectation_store == { - f"{expectations_store_prefix}/{store_backend_id_filename}" - } - - # Make sure the store_backend_id from the file is equal to reading from the property - expectations_store_backend_id_from_s3_file = get_store_backend_id_from_s3( - bucket=bucket, - prefix=expectations_store_prefix, - key=store_backend_id_filename, - ) - assert expectations_store_backend_id_from_s3_file == s3_expectations_store_backend_id - - # Create a DataContext (note existing expectations store already set up) - in_code_data_context_project_config = build_in_code_data_context_project_config( - bucket="leakybucket", - expectations_store_prefix=expectations_store_prefix, - validation_results_store_prefix=validation_results_store_prefix, - data_docs_store_prefix=data_docs_store_prefix, - ) - in_code_data_context = get_context(project_config=in_code_data_context_project_config) - bucket_contents_after_instantiating_get_context = list_s3_bucket_contents( - bucket=bucket, prefix=data_context_prefix - ) - assert bucket_contents_after_instantiating_get_context == { - f"{expectations_store_prefix}/{store_backend_id_filename}", - f"{validation_results_store_prefix}/{store_backend_id_filename}", - } - - # Make sure ids are consistent - in_code_data_context_expectations_store_store_backend_id = in_code_data_context.stores[ - "expectations_S3_store" - ].store_backend_id - in_code_data_context_data_context_id = in_code_data_context.data_context_id - constructed_data_context_id = in_code_data_context._construct_data_context_id() - assert ( - in_code_data_context_expectations_store_store_backend_id - == in_code_data_context_data_context_id - == expectations_store_backend_id_from_s3_file - == s3_expectations_store_backend_id - == constructed_data_context_id - ) - - -@pytest.mark.aws_deps -@mock_s3 -def test_DataContext_construct_data_context_id_uses_id_stored_in_DataContextConfig_if_no_configured_expectations_store( # noqa: E501 - monkeypatch, aws_credentials -): - """ - What does this test and why? - - A DataContext should have an id. This ID should come from either: - 1. configured expectations store store_backend_id - 2. great_expectations.yml - 3. new generated id from DataContextConfig - This test verifies that DataContext._construct_data_context_id - uses the data_context_id from DataContextConfig when there is no configured expectations store - when instantiating the DataContext, - and also that this data_context_id is used to configure the expectations_store.store_backend_id - """ - bucket = "leakybucket" - expectations_store_prefix = "expectations_store_prefix" - validation_results_store_prefix = "validation_results_store_prefix" - data_docs_store_prefix = "data_docs_store_prefix" - manually_created_uuid = uuid.UUID("00000000-0000-0000-0000-000000000eee") - - # Create a bucket in Moto's mock AWS environment - conn = boto3.resource("s3", region_name="us-east-1") - conn.create_bucket(Bucket=bucket) - - # Create a DataContext (note NO existing expectations store already set up) - in_code_data_context_project_config = build_in_code_data_context_project_config( - bucket="leakybucket", - expectations_store_prefix=expectations_store_prefix, - validation_results_store_prefix=validation_results_store_prefix, - data_docs_store_prefix=data_docs_store_prefix, - ) - # Manually set the data_context_id in the project_config - in_code_data_context_project_config.data_context_id = manually_created_uuid - in_code_data_context = get_context(project_config=in_code_data_context_project_config) - - # Make sure the manually set data_context_id is propagated to all the appropriate places - assert ( - manually_created_uuid - == in_code_data_context.data_context_id - == in_code_data_context.stores[ - in_code_data_context.expectations_store_name - ].store_backend_id - ) - - -@pytest.mark.big -@mock_s3 -def test_suppress_store_backend_id_is_true_for_inactive_stores(): - """ - What does this test and why? - - Trying to read / set the store_backend_id for inactive stores should not be attempted during DataContext initialization. This test ensures that the _suppress_store_backend_id parameter is set to True for inactive stores. - - """ # noqa: E501 - - bucket = "leakybucket" - expectations_store_prefix = "expectations_store_prefix" - validation_results_store_prefix = "validation_results_store_prefix" - data_docs_store_prefix = "data_docs_store_prefix" - - # Create a bucket in Moto's mock AWS environment - conn = boto3.resource("s3", region_name="us-east-1") - conn.create_bucket(Bucket=bucket) - - # Create a DataContext - # Add inactive stores - inactive_bucket = "inactive_leakybucket" - stores = { - "expectations_S3_store": { - "class_name": "ExpectationsStore", - "store_backend": { - "class_name": "TupleS3StoreBackend", - "bucket": bucket, - "prefix": expectations_store_prefix, - }, - }, - "validation_results_S3_store": { - "class_name": "ValidationResultsStore", - "store_backend": { - "class_name": "TupleS3StoreBackend", - "bucket": bucket, - "prefix": validation_results_store_prefix, - }, - }, - "inactive_expectations_S3_store": { - "class_name": "ExpectationsStore", - "store_backend": { - "class_name": "TupleS3StoreBackend", - "bucket": inactive_bucket, - "prefix": expectations_store_prefix, - }, - }, - "inactive_validation_results_S3_store": { - "class_name": "ValidationResultsStore", - "store_backend": { - "class_name": "TupleS3StoreBackend", - "bucket": inactive_bucket, - "prefix": validation_results_store_prefix, - }, - }, - } - in_code_data_context_project_config = build_in_code_data_context_project_config( - bucket="leakybucket", - expectations_store_prefix=expectations_store_prefix, - validation_results_store_prefix=validation_results_store_prefix, - data_docs_store_prefix=data_docs_store_prefix, - stores=stores, - ) - in_code_data_context = get_context(project_config=in_code_data_context_project_config) - - # Check here that suppress_store_backend_id == True for inactive stores - # and False for active stores - assert ( - in_code_data_context.stores.get( - "inactive_expectations_S3_store" - ).store_backend._suppress_store_backend_id - is True - ) - assert ( - in_code_data_context.stores.get( - "inactive_validation_results_S3_store" - ).store_backend._suppress_store_backend_id - is True - ) - assert ( - in_code_data_context.stores.get( - "expectations_S3_store" - ).store_backend._suppress_store_backend_id - is False - ) - assert ( - in_code_data_context.stores.get( - "validation_results_S3_store" - ).store_backend._suppress_store_backend_id - is False - ) - - @pytest.mark.aws_deps @mock_s3 def test_inaccessible_active_bucket_warning_messages(caplog, aws_credentials): From f9b863aabb7b0d28fa6fa2fb89a23a80cf91b501 Mon Sep 17 00:00:00 2001 From: Chetan Kini Date: Mon, 16 Sep 2024 14:47:50 -0400 Subject: [PATCH 2/4] remove test --- .../test_data_context_in_code_config.py | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/tests/data_context/test_data_context_in_code_config.py b/tests/data_context/test_data_context_in_code_config.py index f138c241f0fe..62f3b1857a09 100644 --- a/tests/data_context/test_data_context_in_code_config.py +++ b/tests/data_context/test_data_context_in_code_config.py @@ -119,6 +119,147 @@ def list_s3_bucket_contents(bucket: str, prefix: str) -> Set[str]: } +@pytest.mark.aws_deps +@mock_s3 +def test_DataContext_construct_data_context_id_uses_id_stored_in_DataContextConfig_if_no_configured_expectations_store( # noqa: E501 + monkeypatch, aws_credentials +): + """ + What does this test and why? + + A DataContext should have an id. This ID should come from either: + 1. configured expectations store store_backend_id + 2. great_expectations.yml + 3. new generated id from DataContextConfig + This test verifies that DataContext._construct_data_context_id + uses the data_context_id from DataContextConfig when there is no configured expectations store + when instantiating the DataContext, + and also that this data_context_id is used to configure the expectations_store.store_backend_id + """ + bucket = "leakybucket" + expectations_store_prefix = "expectations_store_prefix" + validation_results_store_prefix = "validation_results_store_prefix" + data_docs_store_prefix = "data_docs_store_prefix" + manually_created_uuid = uuid.UUID("00000000-0000-0000-0000-000000000eee") + + # Create a bucket in Moto's mock AWS environment + conn = boto3.resource("s3", region_name="us-east-1") + conn.create_bucket(Bucket=bucket) + + # Create a DataContext (note NO existing expectations store already set up) + in_code_data_context_project_config = build_in_code_data_context_project_config( + bucket="leakybucket", + expectations_store_prefix=expectations_store_prefix, + validation_results_store_prefix=validation_results_store_prefix, + data_docs_store_prefix=data_docs_store_prefix, + ) + # Manually set the data_context_id in the project_config + in_code_data_context_project_config.data_context_id = manually_created_uuid + in_code_data_context = get_context(project_config=in_code_data_context_project_config) + + # Make sure the manually set data_context_id is propagated to all the appropriate places + assert ( + manually_created_uuid + == in_code_data_context.data_context_id + == in_code_data_context.stores[ + in_code_data_context.expectations_store_name + ].store_backend_id + ) + + +@pytest.mark.big +@mock_s3 +def test_suppress_store_backend_id_is_true_for_inactive_stores(): + """ + What does this test and why? + + Trying to read / set the store_backend_id for inactive stores should not be attempted during DataContext initialization. This test ensures that the _suppress_store_backend_id parameter is set to True for inactive stores. + + """ # noqa: E501 + + bucket = "leakybucket" + expectations_store_prefix = "expectations_store_prefix" + validation_results_store_prefix = "validation_results_store_prefix" + data_docs_store_prefix = "data_docs_store_prefix" + + # Create a bucket in Moto's mock AWS environment + conn = boto3.resource("s3", region_name="us-east-1") + conn.create_bucket(Bucket=bucket) + + # Create a DataContext + # Add inactive stores + inactive_bucket = "inactive_leakybucket" + stores = { + "expectations_S3_store": { + "class_name": "ExpectationsStore", + "store_backend": { + "class_name": "TupleS3StoreBackend", + "bucket": bucket, + "prefix": expectations_store_prefix, + }, + }, + "validation_results_S3_store": { + "class_name": "ValidationResultsStore", + "store_backend": { + "class_name": "TupleS3StoreBackend", + "bucket": bucket, + "prefix": validation_results_store_prefix, + }, + }, + "inactive_expectations_S3_store": { + "class_name": "ExpectationsStore", + "store_backend": { + "class_name": "TupleS3StoreBackend", + "bucket": inactive_bucket, + "prefix": expectations_store_prefix, + }, + }, + "inactive_validation_results_S3_store": { + "class_name": "ValidationResultsStore", + "store_backend": { + "class_name": "TupleS3StoreBackend", + "bucket": inactive_bucket, + "prefix": validation_results_store_prefix, + }, + }, + } + in_code_data_context_project_config = build_in_code_data_context_project_config( + bucket="leakybucket", + expectations_store_prefix=expectations_store_prefix, + validation_results_store_prefix=validation_results_store_prefix, + data_docs_store_prefix=data_docs_store_prefix, + stores=stores, + ) + in_code_data_context = get_context(project_config=in_code_data_context_project_config) + + # Check here that suppress_store_backend_id == True for inactive stores + # and False for active stores + assert ( + in_code_data_context.stores.get( + "inactive_expectations_S3_store" + ).store_backend._suppress_store_backend_id + is True + ) + assert ( + in_code_data_context.stores.get( + "inactive_validation_results_S3_store" + ).store_backend._suppress_store_backend_id + is True + ) + assert ( + in_code_data_context.stores.get( + "expectations_S3_store" + ).store_backend._suppress_store_backend_id + is False + ) + assert ( + in_code_data_context.stores.get( + "validation_results_S3_store" + ).store_backend._suppress_store_backend_id + is False + ) + + @pytest.mark.aws_deps @mock_s3 def test_inaccessible_active_bucket_warning_messages(caplog, aws_credentials): From 98fb7f06f1031ad0e89678dfc25f8bdb6aa0600d Mon Sep 17 00:00:00 2001 From: Chetan Kini Date: Mon, 16 Sep 2024 14:50:39 -0400 Subject: [PATCH 3/4] unit tests --- tests/data_context/test_data_context.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/data_context/test_data_context.py b/tests/data_context/test_data_context.py index e6d7b1ed77fa..2451dfc3d6ea 100644 --- a/tests/data_context/test_data_context.py +++ b/tests/data_context/test_data_context.py @@ -862,3 +862,19 @@ def test_set_oss_id_with_existing_config( "oss_id", ] assert oss_id == uuid.UUID(config["analytics"]["oss_id"]) + + +@pytest.mark.unit +def test_context_instantiation_sets_data_context_id(): + context = gx.get_context(mode="ephemeral") + assert context.data_context_id + + +@pytest.mark.unit +def test_context_instantiation_grabs_existing_data_context_id(tmp_path: pathlib.Path): + project_root_dir = tmp_path / "my_project_root_dir" + context = gx.get_context(mode="file", project_root_dir=project_root_dir) + data_context_id = context.data_context_id + context = gx.get_context(mode="file", project_root_dir=project_root_dir) + + assert context.data_context_id == data_context_id From 432423bd5ea80f19df15f07616802da43fa9d123 Mon Sep 17 00:00:00 2001 From: Chetan Kini Date: Mon, 16 Sep 2024 14:53:00 -0400 Subject: [PATCH 4/4] more updates to tests --- tests/data_context/test_data_context.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/data_context/test_data_context.py b/tests/data_context/test_data_context.py index 2451dfc3d6ea..a5bbf2b5bf7f 100644 --- a/tests/data_context/test_data_context.py +++ b/tests/data_context/test_data_context.py @@ -867,7 +867,7 @@ def test_set_oss_id_with_existing_config( @pytest.mark.unit def test_context_instantiation_sets_data_context_id(): context = gx.get_context(mode="ephemeral") - assert context.data_context_id + assert context.data_context_id is not None @pytest.mark.unit @@ -877,4 +877,5 @@ def test_context_instantiation_grabs_existing_data_context_id(tmp_path: pathlib. data_context_id = context.data_context_id context = gx.get_context(mode="file", project_root_dir=project_root_dir) + assert data_context_id is not None assert context.data_context_id == data_context_id