Skip to content

Commit

Permalink
Revert "remove all gx artifacts, and run gx scripts in CI"
Browse files Browse the repository at this point in the history
This reverts commit 3e7e9f6.
  • Loading branch information
usefulalgorithm committed Oct 30, 2024
1 parent 0de3c5d commit 76f4a61
Show file tree
Hide file tree
Showing 31 changed files with 1,203 additions and 51 deletions.
6 changes: 0 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@ jobs:
run: |
poetry run coverage run -m pytest
poetry run coverage xml
env:
SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }}
SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }}
SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }}
SNOWFLAKE_ROLE: ${{ secrets.SNOWFLAKE_ROLE }}
SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }}
- name: Codecov
uses: codecov/codecov-action@v4
Expand Down
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,6 @@ poetry.toml
# Minio system files
tests/s3/data/.minio.sys

# GX artifacts
tests/great_expectations/*/gx
# GX snowflake config in tests
tests/great_expectations/snowflake/config.yml

Expand Down
26 changes: 1 addition & 25 deletions tests/great_expectations/README.md
Original file line number Diff line number Diff line change
@@ -1,27 +1,5 @@
# GX test cases

A test case in the unit test is a directory that has a file named `run.py`. In the file, there should be a `run` method that cleans up the `gx` directory, creates the context in the directory and use `file` as the mode. In other words, it should look like this:

```python
import great_expectations as gx
import shutil
from pathlib import Path

def run() -> None:

# Clear gx context.
if (current_path / "gx").exists():
shutil.rmtree(current_path / "gx")

# Create Data Context.
context = gx.get_context(mode="file", project_root_dir=current_path)

# ... snipped ...

if __name__ == "__main__":
run()
```

Running `run.py` in each directory will regenerate the `gx` directory, which is where GX artifacts are stored.

## Snowflake
Expand All @@ -36,6 +14,4 @@ role: role
warehouse: wh
```
### CI
Pass environment variables so that the test can run.
When running `run.py`, remember to substitute the connection string (which has sensitive information inside!) with some bogus value in `great_expectations.yml`. The crawler can still parse it if the values are fake.
Empty file.
12 changes: 12 additions & 0 deletions tests/great_expectations/basic_sql/gx/checkpoints/checkpoint.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"actions": [],
"id": "0d8680ae-ac87-4334-8bf5-76207091625a",
"name": "checkpoint",
"result_format": "SUMMARY",
"validation_definitions": [
{
"id": "10b8e369-5441-4000-be26-c4049430ab33",
"name": "validation definition"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
store_backend_id = f5c57cbe-c5dc-4917-bd68-bb3663887857
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"expectations": [
{
"id": "e41d3066-c656-4293-9bbd-97d69a68e973",
"kwargs": {
"column": "passenger_count",
"max_value": 6.0,
"min_value": 1.0
},
"meta": {},
"type": "expect_column_values_to_be_between"
},
{
"id": "49ef5837-a282-402c-9980-8516d3c4133a",
"kwargs": {
"column": "fare_amount",
"min_value": 0.0
},
"meta": {},
"type": "expect_column_values_to_be_between"
}
],
"id": "df694500-a2ab-46f8-916f-82741b003464",
"meta": {
"great_expectations_version": "1.2.0"
},
"name": "expectations",
"notes": null
}
101 changes: 101 additions & 0 deletions tests/great_expectations/basic_sql/gx/great_expectations.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
---
# Welcome to Great Expectations! Always know what to expect from your data.
#
# Here you can define datasources, batch kwargs generators, integrations and
# more. This file is intended to be committed to your repo. For help with
# configuration please:
# - Read our docs: https://docs.greatexpectations.io/docs/guides/connecting_to_your_data/connect_to_data_overview/#2-configure-your-datasource
# - Join our slack channel: http://greatexpectations.io/slack

# config_version refers to the syntactic version of this config file, and is used in maintaining backwards compatibility
# It is auto-generated and usually does not need to be changed.
config_version: 4.0

# This config file supports variable substitution which enables: 1) keeping
# secrets out of source control & 2) environment-based configuration changes
# such as staging vs prod.
#
# When GX encounters substitution syntax (like `my_key: ${my_value}` or
# `my_key: $my_value`) in the great_expectations.yml file, it will attempt
# to replace the value of `my_key` with the value from an environment
# variable `my_value` or a corresponding key read from this config file,
# which is defined through the `config_variables_file_path`.
# Environment variables take precedence over variables defined here.
#
# Substitution values defined here can be a simple (non-nested) value,
# nested value such as a dictionary, or an environment variable (i.e. ${ENV_VAR})
#
#
# https://docs.greatexpectations.io/docs/guides/setup/configuring_data_contexts/how_to_configure_credentials


config_variables_file_path: uncommitted/config_variables.yml

# The plugins_directory will be added to your python path for custom modules
# used to override and extend Great Expectations.
plugins_directory: plugins/

stores:
# Stores are configurable places to store things like Expectations, Validations
# Data Docs, and more. These are for advanced users only - most users can simply
# leave this section alone.
expectations_store:
class_name: ExpectationsStore
store_backend:
class_name: TupleFilesystemStoreBackend
base_directory: expectations/

validation_results_store:
class_name: ValidationResultsStore
store_backend:
class_name: TupleFilesystemStoreBackend
base_directory: uncommitted/validations/

checkpoint_store:
class_name: CheckpointStore
store_backend:
class_name: TupleFilesystemStoreBackend
suppress_store_backend_id: true
base_directory: checkpoints/

validation_definition_store:
class_name: ValidationDefinitionStore
store_backend:
class_name: TupleFilesystemStoreBackend
base_directory: validation_definitions/

expectations_store_name: expectations_store
validation_results_store_name: validation_results_store
checkpoint_store_name: checkpoint_store

data_docs_sites:
# Data Docs make it simple to visualize data quality in your project. These
# include Expectations, Validations & Profiles. The are built for all
# Datasources from JSON artifacts in the local repo including validations &
# profiles from the uncommitted directory. Read more at https://docs.greatexpectations.io/docs/terms/data_docs
local_site:
class_name: SiteBuilder
show_how_to_buttons: true
store_backend:
class_name: TupleFilesystemStoreBackend
base_directory: uncommitted/data_docs/local_site/
site_index_builder:
class_name: DefaultSiteIndexBuilder
fluent_datasources:
postgres db:
type: postgres
id: 07c83059-7610-43bb-af9d-4ae4193ef7b0
assets:
taxi data:
type: table
id: c98454e1-397d-4791-9cb8-2222ab88191f
batch_metadata: {}
batch_definitions:
batch definition:
id: 9c2ad11c-3090-46e1-b907-9d3ecd002541
partitioner:
table_name: nyc_taxi_data
schema_name:
connection_string: postgresql+psycopg2://try_gx:[email protected]/gx_example_db
analytics_enabled:
data_context_id: f5c57cbe-c5dc-4917-bd68-bb3663887857
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*index page*/
.ge-index-page-site-name-title {}
.ge-index-page-table-container {}
.ge-index-page-table {}
.ge-index-page-table-profiling-links-header {}
.ge-index-page-table-expectations-links-header {}
.ge-index-page-table-validations-links-header {}
.ge-index-page-table-profiling-links-list {}
.ge-index-page-table-profiling-links-item {}
.ge-index-page-table-expectation-suite-link {}
.ge-index-page-table-validation-links-list {}
.ge-index-page-table-validation-links-item {}

/*breadcrumbs*/
.ge-breadcrumbs {}
.ge-breadcrumbs-item {}

/*navigation sidebar*/
.ge-navigation-sidebar-container {}
.ge-navigation-sidebar-content {}
.ge-navigation-sidebar-title {}
.ge-navigation-sidebar-link {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
# This config file supports variable substitution which enables: 1) keeping
# secrets out of source control & 2) environment-based configuration changes
# such as staging vs prod.
#
# When GX encounters substitution syntax (like `my_key: ${my_value}` or
# `my_key: $my_value`) in the great_expectations.yml file, it will attempt
# to replace the value of `my_key` with the value from an environment
# variable `my_value` or a corresponding key read from this config file,
# which is defined through the `config_variables_file_path`.
# Environment variables take precedence over variables defined here.
#
# Substitution values defined here can be a simple (non-nested) value,
# nested value such as a dictionary, or an environment variable (i.e. ${ENV_VAR})
#
#
# https://docs.greatexpectations.io/docs/guides/setup/configuring_data_contexts/how_to_configure_credentials

instance_id: 9cb489d8-6ac4-4418-9ea9-10ccc2486881
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
store_backend_id = ebcfc9a2-5d58-42be-ae05-9aad1582e4d5
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
{
"id": null,
"meta": {
"active_batch_definition": {
"batch_identifiers": {},
"data_asset_name": "taxi data",
"data_connector_name": "fluent",
"datasource_name": "postgres db"
},
"batch_markers": {
"ge_load_time": "20241030T150056.987943Z"
},
"batch_parameters": null,
"batch_spec": {
"batch_identifiers": {},
"data_asset_name": "taxi data",
"schema_name": null,
"table_name": "nyc_taxi_data",
"type": "table"
},
"checkpoint_id": "0d8680ae-ac87-4334-8bf5-76207091625a",
"great_expectations_version": "1.2.0",
"run_id": {
"run_name": null,
"run_time": "2024-10-30T23:00:56.985672+08:00"
},
"validation_id": "10b8e369-5441-4000-be26-c4049430ab33",
"validation_time": "2024-10-30T15:00:56.985672+00:00"
},
"results": [
{
"exception_info": {
"exception_message": null,
"exception_traceback": null,
"raised_exception": false
},
"expectation_config": {
"id": "e41d3066-c656-4293-9bbd-97d69a68e973",
"kwargs": {
"batch_id": "postgres db-taxi data",
"column": "passenger_count",
"max_value": 6.0,
"min_value": 1.0
},
"meta": {},
"type": "expect_column_values_to_be_between"
},
"meta": {},
"result": {
"element_count": 20000,
"missing_count": 0,
"missing_percent": 0.0,
"partial_unexpected_counts": [],
"partial_unexpected_list": [],
"unexpected_count": 0,
"unexpected_percent": 0.0,
"unexpected_percent_nonmissing": 0.0,
"unexpected_percent_total": 0.0
},
"success": true
},
{
"exception_info": {
"exception_message": null,
"exception_traceback": null,
"raised_exception": false
},
"expectation_config": {
"id": "49ef5837-a282-402c-9980-8516d3c4133a",
"kwargs": {
"batch_id": "postgres db-taxi data",
"column": "fare_amount",
"min_value": 0.0
},
"meta": {},
"type": "expect_column_values_to_be_between"
},
"meta": {},
"result": {
"element_count": 20000,
"missing_count": 0,
"missing_percent": 0.0,
"partial_unexpected_counts": [
{
"count": 4,
"value": -52.0
},
{
"count": 2,
"value": -5.5
},
{
"count": 2,
"value": -4.0
},
{
"count": 2,
"value": -3.0
},
{
"count": 2,
"value": -0.1
},
{
"count": 2,
"value": -0.01
}
],
"partial_unexpected_list": [
-0.01,
-52.0,
-0.1,
-5.5,
-3.0,
-52.0,
-4.0,
-0.01,
-52.0,
-0.1,
-5.5,
-3.0,
-52.0,
-4.0
],
"unexpected_count": 14,
"unexpected_percent": 0.06999999999999999,
"unexpected_percent_nonmissing": 0.06999999999999999,
"unexpected_percent_total": 0.06999999999999999
},
"success": false
}
],
"statistics": {
"evaluated_expectations": 2,
"success_percent": 50.0,
"successful_expectations": 1,
"unsuccessful_expectations": 1
},
"success": false,
"suite_name": "expectations",
"suite_parameters": {}
}
Loading

0 comments on commit 76f4a61

Please sign in to comment.