Skip to content

Commit

Permalink
[DOCS] Puts 1.0 examples for how to customize Expectations under test. (
Browse files Browse the repository at this point in the history
  • Loading branch information
Rachel-Reverie authored Aug 20, 2024
1 parent 23b5257 commit 2fd5143
Show file tree
Hide file tree
Showing 8 changed files with 322 additions and 122 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"","Name","PClass","Age","Sex","Survived","SexCode"
"1","Allen, Miss Elisabeth Walton","1st",29,"female",1,1
"2","Allison, Miss Helen Loraine","1st",2,"female",0,1
"3","Allison, Mr Hudson Joshua Creighton","1st",30,"male",0,0
"4","Allison, Mrs Hudson JC (Bessie Waldo Daniels)","1st",25,"female",0,1
"5","Allison, Master Hudson Trevor","1st",0.92,"male",1,0
"6","Anderson, Mr Harry","1st",47,"male",1,0
"7","Andrews, Miss Kornelia Theodosia","1st",63,"female",1,1
"8","Andrews, Mr Thomas, jr","1st",39,"male",0,0
"358","Caldwell, Mrs Albert Francis (Sylvia Mae Harbaugh)","2nd",26,"female",1,1
"359","Caldwell, Master Alden Gates","2nd",0.83,"male",1,0
"360","Cameron, Miss Clear","2nd",31,"female",1,1
"361","Campbell, Mr William","2nd",NA,"male",0,0
"362","Carbines, Mr William","2nd",19,"male",0,0
"363","Carter, Rev Ernest Courtenay","2nd",54,"male",0,0
"364","Carter, Mrs Ernest Courtenay (Lillian Hughes)","2nd",44,"female",0,1
"365","Chapman, Mr Charles Henry","2nd",52,"male",0,0
"366","Chapman, Mr John Henry","2nd",30,"male",0,0
"367","Chapman, Mrs John Henry (Elizabeth Lawry)","2nd",30,"female",0,1
"368","Christy, Mrs Alice Frances","2nd",NA,"female",1,1
"369","Christy, Miss Julie","2nd",NA,"female",1,1

32 changes: 32 additions & 0 deletions docs/docusaurus/docs/components/examples_under_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,36 @@
),
]

docs_examples_customize_expectations = [
IntegrationTestFixture(
# To test, run:
# pytest --docs-tests -k "docs_example_define_a_custom_expectation_class" tests/integration/test_script_runner.py
name="docs_example_define_a_custom_expectation_class",
user_flow_script="docs/docusaurus/docs/core/customize_expectations/_examples/define_a_custom_expectation_class.py",
data_dir="docs/docusaurus/docs/components/_testing/test_data_sets/single_test_file",
# data_context_dir="",
backend_dependencies=[],
),
IntegrationTestFixture(
# To test, run:
# pytest --docs-tests -k "docs_example_expectation_row_conditions" tests/integration/test_script_runner.py
name="docs_example_expectation_row_conditions",
user_flow_script="docs/docusaurus/docs/core/customize_expectations/_examples/expectation_row_conditions.py",
data_dir="docs/docusaurus/docs/components/_testing/test_data_sets/titantic_test_file",
# data_context_dir="",
backend_dependencies=[],
),
IntegrationTestFixture(
# To test, run:
# pytest --docs-tests -k "docs_example_use_sql_to_define_a_custom_expectation" tests/integration/test_script_runner.py
name="docs_example_use_sql_to_define_a_custom_expectation",
user_flow_script="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py",
data_dir="tests/test_sets/taxi_yellow_tripdata_samples/sqlite",
# data_context_dir="",
backend_dependencies=[],
),
]

# Extend the docs_tests list with the above sublists (only the docs_tests list is imported
# into `test_script_runner.py` and actually used in CI checks).

Expand All @@ -473,6 +503,8 @@

docs_tests.extend(example_scripts_for_define_expectations)

docs_tests.extend(docs_examples_customize_expectations)

docs_tests.extend(docs_examples_trigger_actions_based_on_validation_results)

docs_tests.extend(learn_data_quality_use_cases)
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""
This is an example script for how to define a Custom Expectation class.
To test, run:
pytest --docs-tests -k "docs_example_define_a_custom_expectation_class" tests/integration/test_script_runner.py
"""


def set_up_context_for_example(context):
# Create the Data Source
source_folder = "./data/folder_with_data"
data_source_name = "my_data_source"
data_source = context.data_sources.add_pandas_filesystem(
name=data_source_name, base_directory=source_folder
)
assert data_source.name == data_source_name

# Add a Data Asset
asset_name = "my_data_asset"
data_asset = data_source.add_csv_asset(name=asset_name)
assert data_asset.name == asset_name

# Add a Batch Definition
batch_definition_name = "my_batch_definition"
batch_definition_regex = (
r"yellow_tripdata_sample_(?P<year>\d{4})-(?P<month>\d{2})\.csv"
)
batch_definition = data_asset.add_batch_definition_monthly(
name=batch_definition_name, regex=batch_definition_regex
)
assert batch_definition.name == batch_definition_name


# EXAMPLE SCRIPT STARTS HERE:
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/define_a_custom_expectation_class.py - full code example">
import great_expectations as gx

context = gx.get_context()
# Hide this
set_up_context_for_example(context)


# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/define_a_custom_expectation_class.py - define description attribute for a cusom Expectation">
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/define_a_custom_expectation_class.py - define default attributes for a custom Expectation class">
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/define_a_custom_expectation_class.py - define a custom Expectation subclass">
class ExpectValidPassengerCount(gx.expectations.ExpectColumnValuesToBeBetween):
# </snippet>
column: str = "passenger_count"
min_value: int = 1
max_value: int = 6
# </snippet>
description: str = "There should be between **1** and **6** passengers."


# </snippet>

# Create an instance of the custom Expectation
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/define_a_custom_expectation_class.py - instantiate a Custom Expectation">
expectation = ExpectValidPassengerCount() # Uses the predefined default values
# </snippet>

# Optional. Test the Expectation with some sample data
data_source_name = "my_data_source"
asset_name = "my_data_asset"
batch_definition_name = "my_batch_definition"
batch = (
context.data_sources.get(data_source_name)
.get_asset(asset_name)
.get_batch_definition(batch_definition_name)
.get_batch()
)

print(batch.validate(expectation))
# </snippet>
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
This is an example script for how to use Expectation row conditions.
To test, run:
pytest --docs-tests -k "doc_example_expectation_row_conditions" tests/integration/test_script_runner.py
"""


def set_up_context_for_example(context):
# Create the Data Source
source_folder = "./data/folder_with_data"
data_source_name = "my_data_source"
data_source = context.data_sources.add_pandas_filesystem(
name=data_source_name, base_directory=source_folder
)
assert data_source.name == data_source_name

# Add a Data Asset
asset_name = "my_data_asset"
data_asset = data_source.add_csv_asset(name=asset_name)
assert data_asset.name == asset_name

# Add a Batch Definition
batch_definition_name = "titantic_passengers"
batch_definition_path = "titantic.csv"

batch_definition = data_asset.add_batch_definition_path(
name=batch_definition_name, path=batch_definition_path
)
assert batch_definition.name == batch_definition_name


# EXAMPLE SCRIPT STARTS HERE:
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/expectation_row_conditions.py - full code example">
import great_expectations as gx

context = gx.get_context()
# Hide this
set_up_context_for_example(context)

# Get a Batch for testing the Expectations:
data_source_name = "my_data_source"
data_asset_name = "my_data_asset"
batch_definition_name = "titantic_passengers"
batch = (
context.data_sources.get(data_source_name)
.get_asset(data_asset_name)
.get_batch_definition(batch_definition_name)
.get_batch()
)

# An unconditional Expectation is defined without the `row_condition` or `condition_parser` parameters:
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/expectation_row_conditions.py - example unconditional Expectation">
expectation = gx.expectations.ExpectColumnValuesToBeInSet(
column="Survived", value_set=[0, 1]
)
# </snippet>

# Test the Expectation:
print(batch.validate(expectation))

# A Conditional Expectation for a pandas Data Source would be defined like this:
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/expectation_row_conditions.py - example conditional Expectation">
conditional_expectation = gx.expectations.ExpectColumnValuesToBeInSet(
column="Survived",
value_set=[1],
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/expectation_row_conditions.py - pandas example row_condition">
condition_parser="pandas",
row_condition='PClass=="1st"',
# </snippet>
)
# </snippet>

# Test the Conditional Expectation:
print(batch.validate(conditional_expectation))

# A Conditional Expectation for a Spark or SQL Data Source would be defined like this:
conditional_expectation = gx.expectations.ExpectColumnValuesToBeInSet(
column="Survived",
value_set=[1],
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/expectation_row_conditions.py - spark example row_condition">
condition_parser="spark",
row_condition='PClass=="1st"',
# </snippet>
)
# </snippet>
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
This is an example script for how to use SQL to define a custom Expectation.
To test, run:
pytest --docs-tests -k "docs_example_use_sql_to_define_a_custom_expectation" tests/integration/test_script_runner.py
"""


def set_up_context_for_example(context):
# Create the Data Source
connection_string = "sqlite:///data/yellow_tripdata.db"
data_source_name = "my_sql_data_source"
data_source = context.data_sources.add_sqlite(
name=data_source_name, connection_string=connection_string
)
assert data_source.name == data_source_name

# Add a Data Asset
asset_name = "my_data_asset"
database_table_name = "yellow_tripdata_sample_2019_01"
data_asset = data_source.add_table_asset(
table_name=database_table_name, name=asset_name
)
assert data_asset.name == asset_name

# Add a Batch Definition
batch_definition_name = "my_batch_definition"
batch_definition = data_asset.add_batch_definition_whole_table(
batch_definition_name
)
assert batch_definition.name == batch_definition_name


# EXAMPLE SCRIPT STARTS HERE:
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - full code example">
import great_expectations as gx


# Define a custom Expectation that uses SQL by subclassing UnexpectedRowsExpectation
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define a custom UnexpectedRowsExpectation">
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define the query for an UnexpectedRowsExpectation">
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define a more descriptive name for an UnexpectedRowsExpectation">
class ExpectPassengerCountToBeLegal(
gx.expectations.expectation.UnexpectedRowsExpectation
):
# </snippet>
unexpected_rows_query: str = (
"SELECT * FROM {batch} WHERE passenger_count > 6 or passenger_count < 0"
)
# </snippet>
description: str = "There should be no more than **6** passengers."


# </snippet>

context = gx.get_context()
# Hide this
set_up_context_for_example(context)

# Instantiate the custom Expectation
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - instantiate the custom SQL Expectation">
expectation = ExpectPassengerCountToBeLegal()
# </snippet>

# Test the Expectation
data_source_name = "my_sql_data_source"
data_asset_name = "my_data_asset"
batch_definition_name = "my_batch_definition"
batch = (
context.get_datasource(data_source_name)
.get_asset(data_asset_name)
.get_batch_definition(batch_definition_name)
.get_batch()
)

batch.validate(expectation)
# </snippet>
Loading

0 comments on commit 2fd5143

Please sign in to comment.