Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MAINTENANCE] Simplify logic around data context ID creation #10404

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
ValidationDefinitionFactory,
)
from great_expectations.core.yaml_handler import YAMLHandler
from great_expectations.data_context.store import Store, TupleStoreBackend
from great_expectations.data_context.store import Store
from great_expectations.data_context.templates import CONFIG_VARIABLES_TEMPLATE
from great_expectations.data_context.types.base import (
DataContextConfig,
Expand Down Expand Up @@ -228,17 +228,15 @@ def __init__(self, runtime_environment: Optional[dict] = None) -> None:
self._datasource_store = self._init_datasource_store()
self._init_datasources()

# Init data_context_id
self._data_context_id = self._construct_data_context_id()

# Override the project_config data_context_id if an expectations_store was already set up
self.config.data_context_id = self._data_context_id

self._suite_parameter_dependencies: dict = {}

self._init_data_source_manager()

self._attach_fluent_config_datasources_and_build_data_connectors(self.fluent_config)

# Analytics
self._data_context_id = self._construct_data_context_id()
self.config.data_context_id = self._data_context_id
self._init_analytics()
submit_event(event=DataContextInitializedEvent())

Expand Down Expand Up @@ -2030,16 +2028,10 @@ def _init_datasources(self) -> None:
datasource._rebuild_asset_data_connectors()

def _construct_data_context_id(self) -> uuid.UUID | None:
# Choose the id of the currently-configured expectations store, if it is a persistent store
expectations_store = self.stores[self.expectations_store_name]
if isinstance(expectations_store.store_backend, TupleStoreBackend):
# suppress_warnings since a warning will already have been issued during the store creation # noqa: E501
# if there was an invalid store config
return expectations_store.store_backend_id_warnings_suppressed

# Otherwise choose the id stored in the project_config
else:
return self.variables.data_context_id
if not self.variables.data_context_id:
self.variables.data_context_id = uuid.uuid4()
self.variables.save()
return self.variables.data_context_id

def get_validation_result( # noqa: C901
self,
Expand Down
17 changes: 17 additions & 0 deletions tests/data_context/test_data_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,3 +862,20 @@ def test_set_oss_id_with_existing_config(
"oss_id",
]
assert oss_id == uuid.UUID(config["analytics"]["oss_id"])


@pytest.mark.unit
def test_context_instantiation_sets_data_context_id():
context = gx.get_context(mode="ephemeral")
assert context.data_context_id is not None


@pytest.mark.unit
def test_context_instantiation_grabs_existing_data_context_id(tmp_path: pathlib.Path):
project_root_dir = tmp_path / "my_project_root_dir"
context = gx.get_context(mode="file", project_root_dir=project_root_dir)
data_context_id = context.data_context_id
context = gx.get_context(mode="file", project_root_dir=project_root_dir)

assert data_context_id is not None
assert context.data_context_id == data_context_id
89 changes: 1 addition & 88 deletions tests/data_context/test_data_context_in_code_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from moto import mock_s3

from great_expectations.data_context import get_context
from great_expectations.data_context.store import StoreBackend, TupleS3StoreBackend
from great_expectations.data_context.store import StoreBackend
from great_expectations.data_context.types.base import DataContextConfig


Expand Down Expand Up @@ -119,93 +119,6 @@ def list_s3_bucket_contents(bucket: str, prefix: str) -> Set[str]:
}


@pytest.mark.aws_deps
@mock_s3
def test_DataContext_construct_data_context_id_uses_id_of_currently_configured_expectations_store(
aws_credentials,
Comment on lines -122 to -125
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We no longer have this conditional logic based on expectations store - we don't need these tests

):
"""
What does this test and why?

A DataContext should have an id. This ID should come from either:
1. configured expectations store store_backend_id
2. great_expectations.yml
3. new generated id from DataContextConfig
This test verifies that DataContext._construct_data_context_id
uses the store_backend_id from the currently configured expectations store
when instantiating the DataContext
"""

store_backend_id_filename = StoreBackend.STORE_BACKEND_ID_KEY[0]
bucket = "leakybucket"
expectations_store_prefix = "expectations_store_prefix"
validation_results_store_prefix = "validation_results_store_prefix"
data_docs_store_prefix = "data_docs_store_prefix"
data_context_prefix = ""

# Create a bucket in Moto's mock AWS environment
conn = boto3.resource("s3", region_name="us-east-1")
conn.create_bucket(Bucket=bucket)

# Create a TupleS3StoreBackend
# Initialize without store_backend_id and check that the store_backend_id is generated correctly
s3_expectations_store_backend = TupleS3StoreBackend(
filepath_template="my_file_{0}",
bucket=bucket,
prefix=expectations_store_prefix,
)
# Make sure store_backend_id is not the error string
store_error_uuid = uuid.UUID("00000000-0000-0000-0000-00000000e003")
s3_expectations_store_backend_id = s3_expectations_store_backend.store_backend_id
assert s3_expectations_store_backend_id != store_error_uuid

# Make sure the bucket contents are as expected
bucket_contents_after_creating_expectation_store = list_s3_bucket_contents(
bucket=bucket, prefix=data_context_prefix
)
assert bucket_contents_after_creating_expectation_store == {
f"{expectations_store_prefix}/{store_backend_id_filename}"
}

# Make sure the store_backend_id from the file is equal to reading from the property
expectations_store_backend_id_from_s3_file = get_store_backend_id_from_s3(
bucket=bucket,
prefix=expectations_store_prefix,
key=store_backend_id_filename,
)
assert expectations_store_backend_id_from_s3_file == s3_expectations_store_backend_id

# Create a DataContext (note existing expectations store already set up)
in_code_data_context_project_config = build_in_code_data_context_project_config(
bucket="leakybucket",
expectations_store_prefix=expectations_store_prefix,
validation_results_store_prefix=validation_results_store_prefix,
data_docs_store_prefix=data_docs_store_prefix,
)
in_code_data_context = get_context(project_config=in_code_data_context_project_config)
bucket_contents_after_instantiating_get_context = list_s3_bucket_contents(
bucket=bucket, prefix=data_context_prefix
)
assert bucket_contents_after_instantiating_get_context == {
f"{expectations_store_prefix}/{store_backend_id_filename}",
f"{validation_results_store_prefix}/{store_backend_id_filename}",
}

# Make sure ids are consistent
in_code_data_context_expectations_store_store_backend_id = in_code_data_context.stores[
"expectations_S3_store"
].store_backend_id
in_code_data_context_data_context_id = in_code_data_context.data_context_id
constructed_data_context_id = in_code_data_context._construct_data_context_id()
assert (
in_code_data_context_expectations_store_store_backend_id
== in_code_data_context_data_context_id
== expectations_store_backend_id_from_s3_file
== s3_expectations_store_backend_id
== constructed_data_context_id
)


@pytest.mark.aws_deps
@mock_s3
def test_DataContext_construct_data_context_id_uses_id_stored_in_DataContextConfig_if_no_configured_expectations_store( # noqa: E501
Expand Down
Loading