Skip to content

Commit

Permalink
[DOCS] Puts 1.0 documentation example code for how to run validations…
Browse files Browse the repository at this point in the history
… under test (#10230)
  • Loading branch information
Rachel-Reverie authored Aug 19, 2024
1 parent 147f8f8 commit 0fd5529
Show file tree
Hide file tree
Showing 7 changed files with 200 additions and 99 deletions.
26 changes: 26 additions & 0 deletions docs/docusaurus/docs/components/examples_under_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,29 @@

docs_tests = []

docs_example_scripts_run_validations = [
# Create a Validation Definition
IntegrationTestFixture(
# To test, run:
# pytest --docs-tests -k "docs_example_create_a_validation_definition" tests/integration/test_script_runner.py
name="docs_example_create_a_validation_definition",
user_flow_script="docs/docusaurus/docs/core/run_validations/_examples/create_a_validation_definition.py",
data_dir="docs/docusaurus/docs/components/_testing/test_data_sets/single_test_file",
# data_context_dir="",
backend_dependencies=[],
),
# Run a Validation Definition
IntegrationTestFixture(
# To test, run:
# pytest --docs-tests -k "docs_example_run_a_validation_definition" tests/integration/test_script_runner.py
name="docs_example_run_a_validation_definition",
user_flow_script="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py",
data_dir="docs/docusaurus/docs/components/_testing/test_data_sets/single_test_file",
# data_context_dir="",
backend_dependencies=[],
),
]

connect_to_filesystem_data_create_a_data_source = [
# Local, pandas/spark
IntegrationTestFixture(
Expand Down Expand Up @@ -250,6 +273,9 @@

# Extend the docs_tests list with the above sublists (only the docs_tests list is imported
# into `test_script_runner.py` and actually used in CI checks).

docs_tests.extend(docs_example_scripts_run_validations)

docs_tests.extend(connect_to_filesystem_data_create_a_data_source)
docs_tests.extend(connect_to_filesystem_data_create_a_data_asset)
docs_tests.extend(connect_to_filesystem_data_create_a_batch_definition)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
This is an example script for how to create a Validation Definition.
To test, run:
pytest --docs-tests -k "docs_example_create_a_validation_definition" tests/integration/test_script_runner.py
"""


def set_up_context_for_example(context):
# Create a Data Source
data_source = context.data_sources.add_pandas_filesystem(
name="my_data_source", base_directory="./data/folder_with_data"
)
# Create a Data Asset
data_asset = data_source.add_csv_asset(name="my_data_asset")
# Create a Batch Definition
data_asset.add_batch_definition_path(
name="my_batch_definition", path="yellow_tripdata_sample_2019-01.csv"
)
# Create an Expectation Suite
suite = context.suites.add(gx.ExpectationSuite(name="my_expectation_suite"))
# Add some Expectations
suite.add_expectation(
gx.expectations.ExpectColumnValuesToNotBeNull(column="pickup_datetime")
)
suite.add_expectation(
gx.expectations.ExpectColumnValuesToNotBeNull(column="passenger_count")
)


# EXAMPLE SCRIPT STARTS HERE:
# <snippet name="docs/docusaurus/docs/core/run_validations/_examples/create_a_validation_definition.py - full code example">
import great_expectations as gx

context = gx.get_context()
# Hide this
set_up_context_for_example(context)

# Retrieve an Expectation Suite
# <snippet name="docs/docusaurus/docs/core/run_validations/_examples/create_a_validation_definition.py - retrieve an Expectation Suite">
expectation_suite_name = "my_expectation_suite"
expectation_suite = context.suites.get(name=expectation_suite_name)
# </snippet>

# Retrieve a Batch Definition
# <snippet name="docs/docusaurus/docs/core/run_validations/_examples/create_a_validation_definition.py - retrieve a Batch Definition">
data_source_name = "my_data_source"
data_asset_name = "my_data_asset"
batch_definition_name = "my_batch_definition"
batch_definition = (
context.get_datasource(data_source_name)
.get_asset(data_asset_name)
.get_batch_definition(batch_definition_name)
)
# </snippet>

# Create a Validation Definition
# <snippet name="docs/docusaurus/docs/core/run_validations/_examples/create_a_validation_definition.py - create a Validation Definition">
definition_name = "my_validation_definition"
validation_definition = gx.ValidationDefinition(
data=batch_definition, suite=expectation_suite, name=definition_name
)
# </snippet>

# Add the Validation Definition to the Data Context
# <snippet name="docs/docusaurus/docs/core/run_validations/_examples/create_a_validation_definition.py - save the Validation Definition to the Data Context">
validation_definition = context.validation_definitions.add(validation_definition)
# </snippet>
# </snippet>
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""
This is an example script for how to run a Validation Definition.
To test, run:
"""


def set_up_context_for_example(context):
# Create a Batch Definition
batch_definition = (
context.data_sources.add_pandas_filesystem(
name="my_data_source", base_directory="./data/folder_with_data"
)
.add_csv_asset(name="my_data_asset")
.add_batch_definition_path(
name="my_batch_definition", path="yellow_tripdata_sample_2019-01.csv"
)
)

# Create an Expectation Suite
expectation_suite = context.suites.add(
gx.ExpectationSuite(name="my_expectation_suite")
)
# Add some Expectations
expectation_suite.add_expectation(
gx.expectations.ExpectColumnValuesToNotBeNull(column="pickup_datetime")
)
expectation_suite.add_expectation(
gx.expectations.ExpectColumnValuesToNotBeNull(column="passenger_count")
)
# Create a Validation Definition
context.validation_definitions.add(
gx.ValidationDefinition(
data=batch_definition,
suite=expectation_suite,
name="my_validation_definition",
)
)


# EXAMPLE SCRIPT STARTS HERE:
# <snippet name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - full code example">
import great_expectations as gx

context = gx.get_context()
# Hide this
set_up_context_for_example(context)

# Retrieve the Validation Definition
# <snippet name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - retrieve a Validation Definition">
validation_definition_name = "my_validation_definition"
validation_definition = context.validation_definitions.get(validation_definition_name)
# </snippet>

# Run the Validation Definition
# <snippet name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - run a Validation Definition">
validation_results = validation_definition.run()
# </snippet>

# Review the Validation Results
# <snippet name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - review Validation Results">
print(validation_results)
# </snippet>
# </snippet>


# TODO: Set up the example script to use a GX Cloud Data Context and then
# add this portion back in.
# # Get the URL for Validation Results in GX Cloud
# # <snippet name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - get GX Cloud Validation Result URL">
# print(validation_results.results_url)
# # </snippet>
Original file line number Diff line number Diff line change
Expand Up @@ -20,106 +20,45 @@ A Validation Definition is a fixed reference that links a Batch of data to an Ex

- <PrereqPythonInstalled/>.
- <PrereqGxInstalled/>.
- <PrereqPreconfiguredDataContext/>.
- <PrereqPreconfiguredDataContext/>. In this guide the variable `context` is assumed to contain your Data Context.
- <PrereqPreconfiguredDataSourceAndAsset/>.
- <PrereqPreconfiguredExpectationSuiteAndExpectations/>.

<Tabs>

<TabItem value="procedure" label="Procedure">

1. Import the `ValidationDefinition` class from the {GxData.product_name} library:

```python title="Python"
from great_expectations.core import ValidationDefinition
```

2. Request a Data Context:

```python title="Python"
import great_expectations as gx

context = gx.get_context()
```

3. Retrieve an Expectation Suite with Expectations.
1. Retrieve an Expectation Suite with Expectations.

Update the value of `suite_name` in the following code with the name of your Expectation Suite. Then execute the code to retrieve that Expectation Suite:

```python title="Python"
suite_name = "<NAME OF AN EXISTING EXPECTATION SUITE>"
suite = context.get_expectation_suite(suite_name)
```python title="Python" name="docs/docusaurus/docs/core/run_validations/_examples/create_a_validation_definition.py - retrieve an Expectation Suite"
```

4. Retrieve the Batch Definition that describes the data to associate with the Expectation Suite.
2. Retrieve the Batch Definition that describes the data to associate with the Expectation Suite.

Update the values of `data_source_name`, `data_asset_name`, and `batch_definition_name` in the following code with the names of your previously defined Data Source, one of its Data Assets, and a Batch Definition for that Data Asset. Then execute the code to retrieve the Batch Definition:

```python title="Python"
data_source_name = "my_datasource"
data_asset_name = "my_data_asset"
batch_definition_name = "my_batch_definition"

batch_definition = context.get_datasource(data_source_name).get_asset(data_asset_name).get_batch_definition(batch_definition_name)
```python title="Python" name="docs/docusaurus/docs/core/run_validations/_examples/create_a_validation_definition.py - retrieve a Batch Definition"
```

5. Create a `ValidationDefinition` instance using the Batch Definition, Expectation Suite, and a unique name.
3. Create a `ValidationDefinition` instance using the Batch Definition, Expectation Suite, and a unique name.

Update the value of `definition_name` with a descriptive name that indicates the purpose of the Validation Definition. Then execute the code to create your Validation Definition:

```python title="Python"
definition_name = "My Validation Definition"
validation_definition = ValidationDefinition(data=batch_definition, suite=suite, name=definition_name)
```

6. Optional. Save the Validation Definition to your Data Context.

```python title="Python"
validation_definition = context.validation_definitions.add(validation_definition)
```python title="Python" name="docs/docusaurus/docs/core/run_validations/_examples/create_a_validation_definition.py - create a Validation Definition"
```

:::tip

You can add a Validation Definition to your Data Context at the same time as you create it with the following code:
4. Optional. Save the Validation Definition to your Data Context.

```python title="Python"
definition_name = "My second Validation Definition"
validation_definition = context.validation_definitions.add(ValidationDefinition(data=batch_definition, suite=suite, name=definition_name))
```python title="Python" name="docs/docusaurus/docs/core/run_validations/_examples/create_a_validation_definition.py - save the Validation Definition to the Data Context"
```

:::

</TabItem>

<TabItem value="sample_code" label="Sample code">

```python showLineNumbers title="Python"
from great_expectations.core import ValidationDefinition
import great_expectations as gx

context = gx.get_context()

suite_name = "my_expectation_suite"
suite = context.suites.get(name=suite_name)

data_source_name = "my_datasource"
data_asset_name = "my_data_asset"
batch_definition_name = "my_batch_definition"
batch_definition = context.get_datasource(data_source_name).get_asset(data_asset_name).get_batch_definition(batch_definition_name)

# highlight-start
definition_name = "My Validation Definition"
validation_definition = ValidationDefinition(data=batch_definition, suite=suite, name=definition_name)
# highlight-end

# highlight-start
validation_definition = context.validation_definitions.add(validation_definition)
# highlight-end

# highlight-start
new_definition_name = "My second Validation Definition"
validation_definition = context.validation_definitions.add(ValidationDefinition(data=batch_definition, suite=suite, name=new_definition_name))
# highlight-end
```python showLineNumbers title="Python" name="docs/docusaurus/docs/core/run_validations/_examples/create_a_validation_definition.py - full code example"
```

</TabItem>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import PrereqValidationDefinition from '../_core_components/prerequisites/_valid

- <PrereqPythonInstalled/>.
- <PrereqGxInstalled/>.
- <PrereqPreconfiguredDataContext/>.
- <PrereqPreconfiguredDataContext/>. In this guide the variable `context` is assumed to contain your Data Context.
- <PrereqValidationDefinition/>.

<Tabs>
Expand All @@ -26,18 +26,12 @@ import PrereqValidationDefinition from '../_core_components/prerequisites/_valid

If you have created a new Validation Definition you can use the object returned by your Data Context's `.validation_definitions.add(...)` method. Alternatively, you can retrieve a previously configured Validation Definition by updating the variable `validation_definition_name` in the following code and executing it:

```python title="Python
import great_expectations as gx
context = gx.get_context()

validation_definition_name = "my_validation_definition"
validation_definition = context.validation_definitions.get(validation_definition_name)
```python title="Python name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - retrieve a Validation Definition"
```

2. Execute the Validation Definition's `run()` method:

```python title="Python"
validation_result = validation_definition.run()
```python title="Python" name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - run a Validation Definition"
```

Validation Results are automatically saved in your Data Context when a Validation Definition's `run()` method is called. For convenience, the `run()` method also returns the Validation Results as an object you can review.
Expand All @@ -50,8 +44,7 @@ import PrereqValidationDefinition from '../_core_components/prerequisites/_valid

3. Review the Validation Results:

```python title="Python"
print(validation_result)
```python title="Python" name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - review Validation Results"
```

When you print the returned Validation Result object you will recieve a yaml representation of the results. By default this will include a `"results"` list that includes each Expectation in your Validation Definition's Expectation Suite, whether the Expectation was successfully met or failed to pass, and some sumarized information explaining the why the Expectation succeeded or failed.
Expand All @@ -61,7 +54,7 @@ import PrereqValidationDefinition from '../_core_components/prerequisites/_valid
GX Cloud users can view the Validation Results in the GX Cloud UI by following the url provided with:

```python title="Python"
print(validation_result.result_url)
print(validation_results.results_url)
```

:::
Expand All @@ -70,22 +63,7 @@ import PrereqValidationDefinition from '../_core_components/prerequisites/_valid

<TabItem value="sample_code" label="Sample code">

```python showLineNumbers title="Python"
import great_expectations as gx

context = gx.get_context()

validation_definition_name = "my_validation_definition"
validation_definition = context.validation_definitions.get(validation_definition_name)

# highlight-next-line
validation_result = validation_definition.run()

# highlight-next-line
print(validation_result)

# highlight-next-line
print(validation_result.results_url)
```python showLineNumbers title="Python" name="docs/docusaurus/docs/core/run_validations/_examples/run_a_validation_definition.py - full code example"
```

</TabItem>
Expand Down
12 changes: 12 additions & 0 deletions docs/docusaurus/docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,18 @@ module.exports = {
prism: {
additionalLanguages: ['bash'],
theme: require('./src/theme/CodeBlock/theme'),
magicComments: [
// Remember to extend the default highlight class name as well!
{
className: 'theme-code-block-highlighted-line',
line: 'highlight-next-line',
block: {start: 'highlight-start', end: 'highlight-end'},
},
{
className: 'code-block-hide-line',
line: 'Hide this',
},
]
},
colorMode: {
disableSwitch: true,
Expand Down
Loading

0 comments on commit 0fd5529

Please sign in to comment.