Skip to content

Commit

Permalink
Puts 1.0 example code for how to create an SQL Expectation from Unexp…
Browse files Browse the repository at this point in the history
…ectedRowsExpectation into scripts under test.
  • Loading branch information
Rachel-Reverie committed Aug 19, 2024
1 parent d01e529 commit 72746b7
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 45 deletions.
9 changes: 9 additions & 0 deletions docs/docusaurus/docs/components/examples_under_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@
# data_context_dir="",
backend_dependencies=[],
),
IntegrationTestFixture(
# To test, run:
# pytest --docs-tests -k "docs_example_use_sql_to_define_a_custom_expectation" tests/integration/test_script_runner.py
name="docs_example_use_sql_to_define_a_custom_expectation",
user_flow_script="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py",
data_dir="tests/test_sets/taxi_yellow_tripdata_samples/sqlite",
# data_context_dir="",
backend_dependencies=[],
),
]

learn_data_quality_use_cases = [
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""
This is an example script for how to use SQL to define a custom Expectation.
To test, run:
pytest --docs-tests -k "docs_example_use_sql_to_define_a_custom_expectation" tests/integration/test_script_runner.py
"""


def set_up_context_for_example(context):
# Create the Data Source
connection_string = "sqlite:///data/yellow_tripdata.db"
data_source_name = "my_sql_data_source"
data_source = context.data_sources.add_sqlite(
name=data_source_name, connection_string=connection_string
)
assert data_source.name == data_source_name

# Add a Data Asset
asset_name = "my_data_asset"
database_table_name = "yellow_tripdata_sample_2019_01"
data_asset = data_source.add_table_asset(
table_name=database_table_name, name=asset_name
)
assert data_asset.name == asset_name

# Add a Batch Definition
batch_definition_name = "my_batch_definition"
batch_definition = data_asset.add_batch_definition_whole_table(
batch_definition_name
)
assert batch_definition.name == batch_definition_name


# EXAMPLE SCRIPT STARTS HERE:
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - full code example">
import great_expectations as gx


# Define a custom Expectation that uses SQL by subclassing UnexpectedRowsExpectation
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define a custom UnexpectedRowsExpectation">
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define the query for an UnexpectedRowsExpectation">
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define a more descriptive name for an UnexpectedRowsExpectation">
class ExpectPassengerCountToBeLegal(
gx.expectations.expectation.UnexpectedRowsExpectation
):
# </snippet>
unexpected_rows_query: str = "SELECT * FROM {batch} WHERE passenger_count > 6"
# </snippet>
description: str = "There should be no more than **6** passengers."


# </snippet>

context = gx.get_context()
# Hide this
set_up_context_for_example(context)

# Instantiate the custom Expectation
# <snippet name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - instantiate the custom SQL Expectation">
expectation = ExpectPassengerCountToBeLegal()
# </snippet>

# Test the Expectation
data_source_name = "my_sql_data_source"
data_asset_name = "my_data_asset"
batch_definition_name = "my_batch_definition"
batch = (
context.get_datasource(data_source_name)
.get_asset(data_asset_name)
.get_batch_definition(batch_definition_name)
.get_batch()
)

batch.validate(expectation)
# </snippet>
Original file line number Diff line number Diff line change
Expand Up @@ -28,79 +28,43 @@ You customize an `UnexpectedRowsExpectation` in essentially the same manner as y

<TabItem value="procedure" label="Procedure">

1. Import the `UnexpectedRowsExpectation` class:

```python title="Python"
from great_expectations.expectations import UnexpectedRowsExpectation
```

2. Create a new Expectation class that inherits the `UnexpectedRowsExpectation` class.
1. Create a new Expectation class that inherits the `UnexpectedRowsExpectation` class.

The class name `UnexpectedRowsExpectation` describes the functionality of the Expectation: it finds rows with unexpected values. When you create a customized Expectation class you can provide a class name that is more indicative of your specific use case. In this example, the customized subclass of `UnexpectedRowsExpectation` will be used to find invalid passenger counts in taxi trip data:

```python title="Python"
class ExpectPassengerCountToBeLegal(UnexpectedRowsExpectation):
```python title="Python" name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define a more descriptive name for an UnexpectedRowsExpectation"
```

3. Override the Expectation's `unexpected_rows_query` attribute.
2. Override the Expectation's `unexpected_rows_query` attribute.

The `unexpected_rows_query` attribute is a SQL or Spark-SQL query that returns a selection of rows from the Batch of data being validated. By default, rows that are returned have failed the validation check.

Although the `unexpected_rows_query` should be written in standard SQL or Spark-SQL syntax, it must also contain the special `{batch}` placeholder. When the Expectation is evaluated, the `{batch}` placeholder will be replaced with the Batch of data that is validated.

In this example, `unexpected_rows_query` will select any rows where the passenger count is greater than `6`. These rows will fail validation for this Expectation:

```python title="Python"
class ExpectPassengerCountToBeLegal(UnexpectedRowsExpectation):
# highlight-start
unexpected_rows_query: str = "SELECT * FROM {batch} WHERE passenger_count > 6"
# highlight-end
```python title="Python" name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define the query for an UnexpectedRowsExpectation"
```

5. Customize the rendering of the new Expectation when displayed in Data Docs.
3. Customize the rendering of the new Expectation when displayed in Data Docs.

As with other Expectations, the `description` attribute contains the text describing the customized Expectation when your results are rendered into Data Docs. It can be set when an Expectation class is defined or edited as an attribute of an Expectation instance. You can format the `description` string with Markdown syntax:

```python title="Python"
class ExpectPassengerCountToBeLegal(UnexpectedRowsExpectation):
column: str = "passenger_count"
unexpected_rows_query: str = "SELECT * FROM {batch} WHERE passenger_count > 6"
# highlight-start
description: str = "There should be no more than **6** passengers."
# highlight-end
```python title="Python" name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - define a custom UnexpectedRowsExpectation"
```

6. Use the customized subclass as an Expectation.
4. Use the customized subclass as an Expectation.

Once the customized Expectation subclass has been defined, instances of it can be created, added to Expectation Suites, and validated just like any other Expectation class:

```python title="Python"
expectation = ExpectPassengerCountToBeLegal()
```python title="Python" name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - instantiate the custom SQL Expectation"
```

</TabItem>

<TabItem value="sample_code" label="Sample code">

```python title="Python"
import great_expectations as gx
from great_expectations.expectations import UnexpectedRowsExpectation

class ExpectPassengerCountToBeLegal(UnexpectedRowsExpectation):
unexpected_rows_query: str = "SELECT * FROM {batch} WHERE passenger_count > 6"
description: str = "There should be no more than **6** passengers."

context = gx.get_context()

expectation = ExpectPassengerCountToBeLegal() # Uses the predefined default values

data_source_name = "my_taxi_data"
asset_name = "2018_taxi_data"
batch_definition_name = "all_records_in_asset"
batch = context.get_datasource(datasource_name).get_asset(asset_name).get_batch_definition(batch_definition_name=batch_definition_name).get_batch()

batch.validate(expectation)

```python title="Python" name="docs/docusaurus/docs/core/customize_expectations/_examples/use_sql_to_define_a_custom_expectation.py - full code example"
```

</TabItem>
Expand Down

0 comments on commit 72746b7

Please sign in to comment.