From 8fdfd6b8454c5f592687746286a3a8c643de0174 Mon Sep 17 00:00:00 2001
From: Paul Lam <paul@quantisan.com>
Date: Mon, 9 Sep 2024 11:08:54 +0900
Subject: [PATCH] moved code snippets into CI

---
 .../learn/data_quality_use_cases/volume.md    |  47 ++-----
 .../volume_resources/volume_expectations.py   | 116 ++++++++++++++++++
 2 files changed, 123 insertions(+), 40 deletions(-)
 create mode 100644 docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py
diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume.md b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume.md
index eeb4e71e02f9..474c36362d76 100644
--- a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume.md
+++ b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume.md
@@ -46,11 +46,7 @@ Ensures that the number of rows in a dataset falls within a specified range.
 
 **Use Case**: Validate that daily transaction volumes are within expected bounds, alerting to unusual spikes or drops in activity.
 
-```py
-gxe.ExpectTableRowCountToBeBetween(
-    min_value=1000
-    max_value=1500
-)
+```python title="" name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py ExpectTableRowCountToBeBetween"
 ```
 
 <sup>View `ExpectTableRowCountToBeBetween` in the [Expectation Gallery](https://greatexpectations.io/expectations/expect_table_row_count_to_be_between).</sup>
@@ -62,10 +58,7 @@ Verifies that the dataset contains exactly the specified number of records.
 
 **Use Case**: Ensure that a specific number of records are processed, useful for batch operations or reconciliation tasks.
 
-```py
-gxe.ExpectTableRowCountToEqual(
-    value=300
-)
+```python title="" name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py ExpectTableRowCountToEqual"
 ```
 
 <sup>View `ExpectTableRowCountToEqual` in the [Expectation Gallery](https://greatexpectations.io/expectations/expect_table_row_count_to_equal).</sup>
@@ -77,10 +70,7 @@ Compares the row count of the current table to another table within the same dat
 
 **Use Case**: Verify data consistency across different stages of a pipeline or between source and target systems.
 
-```py
-gxe.ExpectTableRowCountToEqualOtherTable(
-    other_table_name="transactions_summary"
-)
+```python title="" name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py ExpectTableRowCountToEqualOtherTable"
 ```
 
 <sup>View `ExpectTableRowCountToEqualOtherTable` in the [Expectation Gallery](https://greatexpectations.io/expectations/expect_table_row_count_to_equal_other_table).</sup>
@@ -100,10 +90,7 @@ gxe.ExpectTableRowCountToEqualOtherTable(
 
 **GX solution**: Implement checks to ensure data volume consistency between source and target systems in a data reconciliation process.
 
-```python
-gxe.ExpectTableRowCountToEqualOtherTable(
-    other_table_name="target_system_transactions"
-)
+```python title="" name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py reconciliation_across_systems"
 ```
 
 ### Monitoring data volume in real-time streaming pipelines
@@ -112,11 +99,7 @@ gxe.ExpectTableRowCountToEqualOtherTable(
 
 **GX solution**: Implement checks to monitor data volume in real-time streaming pipelines and alert when anomalies are detected.
 
-```python
-gxe.ExpectTableRowCountToBeBetween(
-    min_value=1000,
-    max_value=1500
-)
+```python title="" name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py monitoring_streaming_pipelines"
 ```
 
 ### Batch processing verification
@@ -125,10 +108,7 @@ gxe.ExpectTableRowCountToBeBetween(
 
 **GX solution**: Validate that each processed batch contains exactly the expected number of records.
 
-```python
-gxe.ExpectTableRowCountToEqual(
-    value=300
-)
+```python title="" name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py batch_processing_verification"
 ```
 
 ## Avoid common volume validation pitfalls
@@ -153,20 +133,7 @@ While volume management is a critical component of data quality, it's just one f
 
 3. Develop a multifaceted approach that combines volume checks with other [crucial data quality aspects](/reference/learn/data_quality_use_cases/dq_use_cases_lp.md), such as data integrity, schema evolution, and distribution analysis. For instance, consider coupling volume checks with schema validation:
 
-```python
-gxe.ExpectTableRowCountToBeBetween(
-    min_value=1000
-    max_value=1500
-)
-
-gxe.ExpectTableColumnsToMatchOrderedList(
-    column_list=[
-        "sender_account_number",
-        "recipient_account_number",
-        "transfer_amount",
-        "transfer_date",
-    ]
-)
+```python title="" name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py combined_checks"
 ```
 
 This combination allows you to monitor for unexpected data growth while simultaneously ensuring structural consistency, providing a more robust validation framework.
diff --git a/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py
new file mode 100644
index 000000000000..161933ca1519
--- /dev/null
+++ b/docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py
@@ -0,0 +1,116 @@
+"""
+To run this test locally, use the postgresql database docker container.
+
+1. From the repo root dir, run:
+cd assets/docker/postgresql
+docker compose up
+
+2. Run the following command from the repo root dir in a second terminal:
+pytest --postgresql --docs-tests -k "data_quality_use_case_missingness_expectations" tests/integration/test_script_runner.py
+"""
+
+# This section loads sample data to use for CI testing of the script.
+import pathlib
+
+import great_expectations as gx
+import great_expectations.expectations as gxe
+from tests.test_utils import load_data_into_test_database
+
+CONNECTION_STRING = "postgresql+psycopg2://postgres:@localhost/test_ci"
+
+GX_ROOT_DIR = pathlib.Path(gx.__file__).parent.parent
+
+# Add test data to database for testing.
+load_data_into_test_database(
+    table_name="transfers",
+    csv_path=str(
+        GX_ROOT_DIR / "tests/test_sets/learn_data_quality_use_cases/volume_financial_transfers.csv"
+    ),
+    connection_string=CONNECTION_STRING,
+)
+
+context = gx.get_context()
+
+datasource = context.data_sources.add_postgres(
+    "postgres database", connection_string=CONNECTION_STRING
+)
+
+data_asset = datasource.add_table_asset(name="data asset", table_name="transfers")
+batch_definition = data_asset.add_batch_definition_whole_table("batch definition")
+batch = batch_definition.get_batch()
+
+suite = context.suites.add(gx.ExpectationSuite(name="example missingness expectations"))
+
+#############################
+# Start Expectation snippets.
+
+suite.add_expectation(
+    # <snippet name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py ExpectTableRowCountToBeBetween">
+    gxe.ExpectTableRowCountToBeBetween(
+        min_value=2,
+        max_value=5
+    )
+    # </snippet>
+)
+
+suite.add_expectation(
+    # <snippet name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py ExpectTableRowCountToEqual">
+    gxe.ExpectTableRowCountToEqual(
+        value=4
+    )
+    # </snippet>
+)
+
+suite.add_expectation(
+    # <snippet name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py ExpectTableRowCountToEqualOtherTable">
+    gxe.ExpectTableRowCountToEqualOtherTable(
+        other_table_name="transactions_summary"
+    )
+    # </snippet>
+)
+
+suite.add_expectation(
+    # <snippet name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py reconciliation_across_systems">
+    gxe.ExpectTableRowCountToEqualOtherTable(
+        other_table_name="target_system_transactions"
+    )
+    # </snippet>
+)
+
+suite.add_expectation(
+    # <snippet name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py monitoring_streaming_pipelines">
+    gxe.ExpectTableRowCountToBeBetween(
+        min_value=2,
+        max_value=5
+    )
+    # </snippet>
+)
+
+suite.add_expectation(
+    # <snippet name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py batch_processing_verification">
+    gxe.ExpectTableRowCountToEqual(
+        value=4
+    )
+    # </snippet>
+)
+
+# TODO fix this
+# suite.add_expectation(
+#     # <snippet name="docs/docusaurus/docs/reference/learn/data_quality_use_cases/volume_resources/volume_expectations.py combined_checks">
+#     gxe.ExpectTableRowCountToBeBetween(
+#         min_value=2,
+#         max_value=5
+#     ),
+#
+#     gxe.ExpectTableColumnsToMatchOrderedList(
+#         column_list=[
+#             "sender_account_number",
+#             "recipient_account_number",
+#             "transfer_amount",
+#             "transfer_date",
+#         ]
+#     )
+#     # </snippet>
+# )
+
+results = batch.validate(suite)