Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DOCS] Puts 1.0 examples for how to customize Expectations under test. #10235

Merged
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"","Name","PClass","Age","Sex","Survived","SexCode"
"1","Allen, Miss Elisabeth Walton","1st",29,"female",1,1
"2","Allison, Miss Helen Loraine","1st",2,"female",0,1
"3","Allison, Mr Hudson Joshua Creighton","1st",30,"male",0,0
"4","Allison, Mrs Hudson JC (Bessie Waldo Daniels)","1st",25,"female",0,1
"5","Allison, Master Hudson Trevor","1st",0.92,"male",1,0
"6","Anderson, Mr Harry","1st",47,"male",1,0
"7","Andrews, Miss Kornelia Theodosia","1st",63,"female",1,1
"8","Andrews, Mr Thomas, jr","1st",39,"male",0,0
"358","Caldwell, Mrs Albert Francis (Sylvia Mae Harbaugh)","2nd",26,"female",1,1
"359","Caldwell, Master Alden Gates","2nd",0.83,"male",1,0
"360","Cameron, Miss Clear","2nd",31,"female",1,1
"361","Campbell, Mr William","2nd",NA,"male",0,0
"362","Carbines, Mr William","2nd",19,"male",0,0
"363","Carter, Rev Ernest Courtenay","2nd",54,"male",0,0
"364","Carter, Mrs Ernest Courtenay (Lillian Hughes)","2nd",44,"female",0,1
"365","Chapman, Mr Charles Henry","2nd",52,"male",0,0
"366","Chapman, Mr John Henry","2nd",30,"male",0,0
"367","Chapman, Mrs John Henry (Elizabeth Lawry)","2nd",30,"female",0,1
"368","Christy, Mrs Alice Frances","2nd",NA,"female",1,1
"369","Christy, Miss Julie","2nd",NA,"female",1,1

38 changes: 35 additions & 3 deletions docs/docusaurus/docs/components/examples_under_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,36 @@

docs_tests = []

docs_examples_customize_expectations = [
IntegrationTestFixture(
# To test, run:
# pytest --docs-tests -k "docs_example_define_a_custom_expectation_class" tests/integration/test_script_runner.py
name="docs_example_define_a_custom_expectation_class",
user_flow_script="docs/docusaurus/docs/core/customize_expectations/_examples/define_a_custom_expectation_class.py",
data_dir="docs/docusaurus/docs/components/_testing/test_data_sets/single_test_file",
# data_context_dir="",
backend_dependencies=[],
),
IntegrationTestFixture(
# To test, run:
# pytest --docs-tests -k "doc_example_expectation_row_conditions" tests/integration/test_script_runner.py
Rachel-Reverie marked this conversation as resolved.
Show resolved Hide resolved
name="doc_example_expectation_row_conditions",
user_flow_script="docs/docusaurus/docs/core/customize_expectations/_examples/expectation_row_conditions.py",
data_dir="docs/docusaurus/docs/components/_testing/test_data_sets/titantic_test_file",
# data_context_dir="",
backend_dependencies=[],
),
IntegrationTestFixture(
# To test, run:
# pytest --docs-tests -k "docs_example_use_sql_to_define_a_custom_expectation" tests/integration/test_script_runner.py
name="docs_example_use_sql_to_define_a_custom_expectation",
user_flow_script="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py",
data_dir="tests/test_sets/taxi_yellow_tripdata_samples/sqlite",
# data_context_dir="",
backend_dependencies=[],
),
]

docs_example_scripts_run_validations = [
# Create a Validation Definition
IntegrationTestFixture(
Expand Down Expand Up @@ -246,6 +276,7 @@
),
]


learn_data_quality_use_cases = [
# Schema.
IntegrationTestFixture(
Expand Down Expand Up @@ -273,11 +304,12 @@

# Extend the docs_tests list with the above sublists (only the docs_tests list is imported
# into `test_script_runner.py` and actually used in CI checks).

docs_tests.extend(docs_example_scripts_run_validations)

docs_tests.extend(connect_to_filesystem_data_create_a_data_source)
docs_tests.extend(connect_to_filesystem_data_create_a_data_asset)
docs_tests.extend(connect_to_filesystem_data_create_a_batch_definition)

docs_tests.extend(docs_examples_customize_expectations)

docs_tests.extend(docs_example_scripts_run_validations)

docs_tests.extend(learn_data_quality_use_cases)
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""
This is an example script for how to define a Custom Expectation class.

To test, run:
pytest --docs-tests -k "docs_example_define_a_custom_expectation_class" tests/integration/test_script_runner.py
"""


def set_up_context_for_example(context):
# Create the Data Source
source_folder = "./data/folder_with_data"
data_source_name = "my_data_source"
data_source = context.data_sources.add_pandas_filesystem(
name=data_source_name, base_directory=source_folder
)
assert data_source.name == data_source_name

# Add a Data Asset
asset_name = "my_data_asset"
data_asset = data_source.add_csv_asset(name=asset_name)
assert data_asset.name == asset_name

# Add a Batch Definition
batch_definition_name = "my_batch_definition"
batch_definition_regex = (
r"yellow_tripdata_sample_(?P<year>\d{4})-(?P<month>\d{2})\.csv"
)
batch_definition = data_asset.add_batch_definition_monthly(
name=batch_definition_name, regex=batch_definition_regex
)
assert batch_definition.name == batch_definition_name


# EXAMPLE SCRIPT STARTS HERE:
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/define_a_custom_expectation_class.py - full code example">
import great_expectations as gx

context = gx.get_context()
# Hide this
set_up_context_for_example(context)


# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/define_a_custom_expectation_class.py - define description attribute for a cusom Expectation">
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/define_a_custom_expectation_class.py - define default attributes for a custom Expectation class">
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/define_a_custom_expectation_class.py - define a custom Expectation subclass">
class ExpectValidPassengerCount(gx.expectations.ExpectColumnValuesToBeBetween):
# </snippet>
column: str = "passenger_count"
min_value: int = 1
max_value: int = 6
# </snippet>
description: str = "There should be between **1** and **6** passengers."


# </snippet>

# Create an instance of the custom Expectation
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/define_a_custom_expectation_class.py - instantiate a Custom Expectation">
expectation = ExpectValidPassengerCount() # Uses the predefined default values
# </snippet>

# Optional. Test the Expectation with some sample data
data_source_name = "my_data_source"
asset_name = "my_data_asset"
batch_definition_name = "my_batch_definition"
batch = (
context.data_sources.get(data_source_name)
.get_asset(asset_name)
.get_batch_definition(batch_definition_name)
.get_batch()
)

print(batch.validate(expectation))
# </snippet>
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
This is an example script for how to use Expectation row conditions.

To test, run:
pytest --docs-tests -k "doc_example_expectation_row_conditions" tests/integration/test_script_runner.py
"""


def set_up_context_for_example(context):
# Create the Data Source
source_folder = "./data/folder_with_data"
data_source_name = "my_data_source"
data_source = context.data_sources.add_pandas_filesystem(
name=data_source_name, base_directory=source_folder
)
assert data_source.name == data_source_name

# Add a Data Asset
asset_name = "my_data_asset"
data_asset = data_source.add_csv_asset(name=asset_name)
assert data_asset.name == asset_name

# Add a Batch Definition
batch_definition_name = "titantic_passengers"
batch_definition_path = "titantic.csv"

batch_definition = data_asset.add_batch_definition_path(
name=batch_definition_name, path=batch_definition_path
)
assert batch_definition.name == batch_definition_name


# EXAMPLE SCRIPT STARTS HERE:
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/expectation_row_conditions.py - full code example">
import great_expectations as gx

context = gx.get_context()
# Hide this
set_up_context_for_example(context)

# Get a Batch for testing the Expectations:
data_source_name = "my_data_source"
data_asset_name = "my_data_asset"
batch_definition_name = "titantic_passengers"
batch = (
context.data_sources.get(data_source_name)
.get_asset(data_asset_name)
.get_batch_definition(batch_definition_name)
.get_batch()
)

# An unconditional Expectation is defined without the `row_condition` or `condition_parser` parameters:
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/expectation_row_conditions.py - example unconditional Expectation">
expectation = gx.expectations.ExpectColumnValuesToBeInSet(
column="Survived", value_set=[0, 1]
)
# </snippet>

# Test the Expectation:
print(batch.validate(expectation))

# A Conditional Expectation for a pandas Data Source would be defined like this:
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/expectation_row_conditions.py - example conditional Expectation">
conditional_expectation = gx.expectations.ExpectColumnValuesToBeInSet(
column="Survived",
value_set=[1],
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/expectation_row_conditions.py - pandas example row_condition">
condition_parser="pandas",
row_condition='PClass=="1st"',
# </snippet>
)
# </snippet>

# Test the Conditional Expectation:
print(batch.validate(conditional_expectation))

# A Conditional Expectation for a Spark or SQL Data Source would be defined like this:
conditional_expectation = gx.expectations.ExpectColumnValuesToBeInSet(
column="Survived",
value_set=[1],
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/expectation_row_conditions.py - spark example row_condition">
condition_parser="spark",
row_condition='PClass=="1st"',
# </snippet>
)
# </snippet>
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""
This is an example script for how to use SQL to define a custom Expectation.

To test, run:
pytest --docs-tests -k "docs_example_use_sql_to_define_a_custom_expectation" tests/integration/test_script_runner.py
"""


def set_up_context_for_example(context):
# Create the Data Source
connection_string = "sqlite:///data/yellow_tripdata.db"
data_source_name = "my_sql_data_source"
data_source = context.data_sources.add_sqlite(
name=data_source_name, connection_string=connection_string
)
assert data_source.name == data_source_name

# Add a Data Asset
asset_name = "my_data_asset"
database_table_name = "yellow_tripdata_sample_2019_01"
data_asset = data_source.add_table_asset(
table_name=database_table_name, name=asset_name
)
assert data_asset.name == asset_name

# Add a Batch Definition
batch_definition_name = "my_batch_definition"
batch_definition = data_asset.add_batch_definition_whole_table(
batch_definition_name
)
assert batch_definition.name == batch_definition_name


# EXAMPLE SCRIPT STARTS HERE:
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - full code example">
import great_expectations as gx


# Define a custom Expectation that uses SQL by subclassing UnexpectedRowsExpectation
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define a custom UnexpectedRowsExpectation">
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define the query for an UnexpectedRowsExpectation">
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define a more descriptive name for an UnexpectedRowsExpectation">
class ExpectPassengerCountToBeLegal(
gx.expectations.expectation.UnexpectedRowsExpectation
):
# </snippet>
unexpected_rows_query: str = "SELECT * FROM {batch} WHERE passenger_count > 6"
Rachel-Reverie marked this conversation as resolved.
Show resolved Hide resolved
# </snippet>
description: str = "There should be no more than **6** passengers."


# </snippet>

context = gx.get_context()
# Hide this
Rachel-Reverie marked this conversation as resolved.
Show resolved Hide resolved
set_up_context_for_example(context)

# Instantiate the custom Expectation
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - instantiate the custom SQL Expectation">
expectation = ExpectPassengerCountToBeLegal()
# </snippet>

# Test the Expectation
data_source_name = "my_sql_data_source"
data_asset_name = "my_data_asset"
batch_definition_name = "my_batch_definition"
batch = (
context.get_datasource(data_source_name)
.get_asset(data_asset_name)
.get_batch_definition(batch_definition_name)
.get_batch()
)

batch.validate(expectation)
# </snippet>
Loading
Loading