great-expectations · Rachel-Reverie · Aug 20, 2024 · Aug 14, 2024 · Aug 16, 2024 · Aug 16, 2024
diff --git a/...rus/docs/components/_testing/test_data_contexts/dataframe_datasource_pandas/gx/.gitignore b/...rus/docs/components/_testing/test_data_contexts/dataframe_datasource_pandas/gx/.gitignore
@@ -0,0 +1,2 @@
+
+uncommitted/
diff --git a/...sting/test_data_contexts/dataframe_datasource_pandas/gx/expectations/.ge_store_backend_id b/...sting/test_data_contexts/dataframe_datasource_pandas/gx/expectations/.ge_store_backend_id
@@ -0,0 +1 @@
+store_backend_id = ed62c7bd-023a-4b27-bd49-e1f9bd825c43
diff --git a/...ponents/_testing/test_data_contexts/dataframe_datasource_pandas/gx/great_expectations.yml b/...ponents/_testing/test_data_contexts/dataframe_datasource_pandas/gx/great_expectations.yml
@@ -0,0 +1,91 @@
+# Welcome to Great Expectations! Always know what to expect from your data.
+#
+# Here you can define datasources, batch kwargs generators, integrations and
+# more. This file is intended to be committed to your repo. For help with
+# configuration please:
+#   - Read our docs: https://docs.greatexpectations.io/docs/guides/connecting_to_your_data/connect_to_data_overview/#2-configure-your-datasource
+#   - Join our slack channel: http://greatexpectations.io/slack
+
+# config_version refers to the syntactic version of this config file, and is used in maintaining backwards compatibility
+# It is auto-generated and usually does not need to be changed.
+config_version: 4.0
+
+# This config file supports variable substitution which enables: 1) keeping
+# secrets out of source control & 2) environment-based configuration changes
+# such as staging vs prod.
+#
+# When GX encounters substitution syntax (like `my_key: ${my_value}` or
+# `my_key: $my_value`) in the great_expectations.yml file, it will attempt
+# to replace the value of `my_key` with the value from an environment
+# variable `my_value` or a corresponding key read from this config file,
+# which is defined through the `config_variables_file_path`.
+# Environment variables take precedence over variables defined here.
+#
+# Substitution values defined here can be a simple (non-nested) value,
+# nested value such as a dictionary, or an environment variable (i.e. ${ENV_VAR})
+#
+#
+# https://docs.greatexpectations.io/docs/guides/setup/configuring_data_contexts/how_to_configure_credentials
+
+config_variables_file_path: uncommitted/config_variables.yml
+
+# The plugins_directory will be added to your python path for custom modules
+# used to override and extend Great Expectations.
+plugins_directory: plugins/
+
+stores:
+  # Stores are configurable places to store things like Expectations, Validations
+  # Data Docs, and more. These are for advanced users only - most users can simply
+  # leave this section alone.
+  #
+  # Three stores are required: expectations, validations, and
+  # suite_parameters, and must exist with a valid store entry. Additional
+  # stores can be configured for uses such as data_docs, etc.
+  expectations_store:
+    class_name: ExpectationsStore
+    store_backend:
+      class_name: TupleFilesystemStoreBackend
+      base_directory: expectations/
+
+  validation_results_store:
+    class_name: ValidationResultsStore
+    store_backend:
+      class_name: TupleFilesystemStoreBackend
+      base_directory: uncommitted/validations/
+
+  checkpoint_store:
+    class_name: CheckpointStore
+    store_backend:
+      class_name: TupleFilesystemStoreBackend
+      suppress_store_backend_id: true
+      base_directory: checkpoints/
+
+  validation_definition_store:
+    class_name: ValidationDefinitionStore
+    store_backend:
+      class_name: TupleFilesystemStoreBackend
+      base_directory: validation_definitions/
+
+expectations_store_name: expectations_store
+validation_results_store_name: validation_results_store
+checkpoint_store_name: checkpoint_store
+
+data_docs_sites:
+  # Data Docs make it simple to visualize data quality in your project. These
+  # include Expectations, Validations & Profiles. The are built for all
+  # Datasources from JSON artifacts in the local repo including validations &
+  # profiles from the uncommitted directory. Read more at https://docs.greatexpectations.io/docs/terms/data_docs
+  local_site:
+    class_name: SiteBuilder
+    show_how_to_buttons: true
+    store_backend:
+      class_name: TupleFilesystemStoreBackend
+      base_directory: uncommitted/data_docs/local_site/
+    site_index_builder:
+      class_name: DefaultSiteIndexBuilder
+
+analytics_enabled: true
+fluent_datasources:
+  my_dataframe_data_source:
+    type: pandas
+data_context_id: ed62c7bd-023a-4b27-bd49-e1f9bd825c43
diff --git a/...ataframe_datasource_pandas/gx/plugins/custom_data_docs/styles/data_docs_custom_styles.css b/...ataframe_datasource_pandas/gx/plugins/custom_data_docs/styles/data_docs_custom_styles.css
@@ -0,0 +1,22 @@
+/*index page*/
+.ge-index-page-site-name-title {}
+.ge-index-page-table-container {}
+.ge-index-page-table {}
+.ge-index-page-table-profiling-links-header {}
+.ge-index-page-table-expectations-links-header {}
+.ge-index-page-table-validations-links-header {}
+.ge-index-page-table-profiling-links-list {}
+.ge-index-page-table-profiling-links-item {}
+.ge-index-page-table-expectation-suite-link {}
+.ge-index-page-table-validation-links-list {}
+.ge-index-page-table-validation-links-item {}
+
+/*breadcrumbs*/
+.ge-breadcrumbs {}
+.ge-breadcrumbs-item {}
+
+/*navigation sidebar*/
+.ge-navigation-sidebar-container {}
+.ge-navigation-sidebar-content {}
+.ge-navigation-sidebar-title {}
+.ge-navigation-sidebar-link {}
diff --git a/...ta_contexts/filesystem_datasource_aws_pandas_no_assets/gx/uncomitted/config_variables.yml b/...ta_contexts/filesystem_datasource_aws_pandas_no_assets/gx/uncomitted/config_variables.yml
@@ -0,0 +1,18 @@
+# This config file supports variable substitution which enables: 1) keeping
+# secrets out of source control & 2) environment-based configuration changes
+# such as staging vs prod.
+#
+# When GX encounters substitution syntax (like `my_key: ${my_value}` or
+# `my_key: $my_value`) in the great_expectations.yml file, it will attempt
+# to replace the value of `my_key` with the value from an environment
+# variable `my_value` or a corresponding key read from this config file,
+# which is defined through the `config_variables_file_path`.
+# Environment variables take precedence over variables defined here.
+#
+# Substitution values defined here can be a simple (non-nested) value,
+# nested value such as a dictionary, or an environment variable (i.e. ${ENV_VAR})
+#
+#
+# https://docs.greatexpectations.io/docs/guides/setup/configuring_data_contexts/how_to_configure_credentials
+
+instance_id: e0814d0f-1fce-48e0-8609-33f412321338
diff --git a/docs/docusaurus/docs/components/examples_under_test.py b/docs/docusaurus/docs/components/examples_under_test.py
@@ -8,6 +8,67 @@
 
 docs_tests = []
 
+connect_to_dataframe_data = [
+    # Create a Data Source, pandas/spark
+    IntegrationTestFixture(
+        # To test, run:
+        # pytest --docs-tests --spark -k "create_a_df_data_source_spark" tests/integration/test_script_runner.py
+        name="create_a_df_data_source_spark",
+        user_flow_script="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_spark_df_data_source.py",
+        # data_dir="",
+        # data_context_dir="",
+        backend_dependencies=[BackendDependencies.SPARK],
+    ),
+    IntegrationTestFixture(
+        # To test, run:
+        # pytest --docs-tests -k "create_a_df_data_source_pandas" tests/integration/test_script_runner.py
+        name="create_a_df_data_source_pandas",
+        user_flow_script="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_pandas_df_data_source.py",
+        # data_dir="",
+        # data_context_dir="",
+        backend_dependencies=[],
+    ),
+    # Create a Data Asset, pandas
+    IntegrationTestFixture(
+        # To test, run:
+        # pytest --docs-tests -k "create_a_df_data_asset_pandas" tests/integration/test_script_runner.py
+        name="create_a_df_data_asset_pandas",
+        user_flow_script="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_pandas_df_data_asset.py",
+        data_dir="docs/docusaurus/docs/components/_testing/test_data_sets/single_test_file",
+        # data_context_dir="",
+        backend_dependencies=[],
+    ),
+    # Create a Batch Definition, pandas
+    IntegrationTestFixture(
+        # To test, run:
+        # pytest --docs-tests -k "create_a_df_batch_definition_pandas" tests/integration/test_script_runner.py
+        name="create_a_df_batch_definition_pandas",
+        user_flow_script="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_pandas_df_batch_definition.py",
+        data_dir="docs/docusaurus/docs/components/_testing/test_data_sets/single_test_file",
+        # data_context_dir="",
+        backend_dependencies=[],
+    ),
+    # Batch Parameters, for a Batch Definition/for a Validation Definition
+    IntegrationTestFixture(
+        # To test, run:
+        # pytest --docs-tests --spark -k "df_batch_parameters_for_batch_definition" tests/integration/test_script_runner.py
+        name="df_batch_parameters_for_batch_definition",
+        user_flow_script="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_batch_parameters_batch_definition.py",
+        data_dir="docs/docusaurus/docs/components/_testing/test_data_sets/single_test_file",
+        # data_context_dir="",
+        backend_dependencies=[BackendDependencies.SPARK],
+    ),
+    IntegrationTestFixture(
+        # To test, run:
+        # pytest --docs-tests -k "df_batch_parameters_for_validation_definition" tests/integration/test_script_runner.py
+        name="df_batch_parameters_for_validation_definition",
+        user_flow_script="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_batch_parameters_validation_definition.py",
+        data_dir="docs/docusaurus/docs/components/_testing/test_data_sets/single_test_file",
+        # data_context_dir="",
+        backend_dependencies=[],
+    ),
+]
+
 connect_to_filesystem_data_create_a_data_source = [
     # Local, pandas/spark
     IntegrationTestFixture(
@@ -250,6 +311,9 @@
 
 # Extend the docs_tests list with the above sublists (only the docs_tests list is imported
 # into `test_script_runner.py` and actually used in CI checks).
+
+docs_tests.extend(connect_to_dataframe_data)
+
 docs_tests.extend(connect_to_filesystem_data_create_a_data_source)
 docs_tests.extend(connect_to_filesystem_data_create_a_data_asset)
 docs_tests.extend(connect_to_filesystem_data_create_a_batch_definition)

diff --git a/...urus/docs/core/connect_to_data/dataframes/_examples/_batch_parameters_batch_definition.py b/...urus/docs/core/connect_to_data/dataframes/_examples/_batch_parameters_batch_definition.py
@@ -0,0 +1,60 @@
+# Define Batch Parameters for a Spark dataframe
+# <snippet name="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_batch_parameters_batch_definition.py - spark dataframe">
+from pyspark.sql import SparkSession
+
+csv = "./data/folder_with_data/yellow_tripdata_sample_2019-01.csv"
+spark = SparkSession.builder.appName("Read CSV").getOrCreate()
+dataframe = spark.read.csv(csv, header=True, inferSchema=True)
+
+batch_parameters = {"dataframe": dataframe}
+# </snippet>
+
+# Define Batch Parameters for a pandas dataframe
+# <snippet name="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_batch_parameters_batch_definition.py - pandas dataframe">
+import pandas
+
+csv_path = "./data/folder_with_data/yellow_tripdata_sample_2019-01.csv"
+dataframe = pandas.read_csv(csv_path)
+
+# <snippet name="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_batch_parameters_batch_definition.py - batch parameters example">
+batch_parameters = {"dataframe": dataframe}
+# </snippet>
+# </snippet>
+
+
+def setup_context_for_example(context):
+    data_source = context.data_sources.add_pandas(name="my_data_source")
+    data_asset = data_source.add_dataframe_asset(name="my_dataframe_data_asset")
+    data_asset.add_batch_definition_whole_dataframe("my_batch_definition")
+
+
+# <snippet name="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_batch_parameters_batch_definition.py - batch.validate() example">
+import great_expectations as gx
+
+context = gx.get_context()
+# Hide this
+setup_context_for_example(context)
+
+# Retrieve the dataframe Batch Definition
+data_source_name = "my_data_source"
+data_asset_name = "my_dataframe_data_asset"
+batch_definition_name = "my_batch_definition"
+batch_definition = (
+    context.data_sources.get(data_source_name)
+    .get_asset(data_asset_name)
+    .get_batch_definition(batch_definition_name)
+)
+
+# Create an Expectation to test
+expectation = gx.expectations.ExpectColumnValuesToBeBetween(
+    column="passenger_count", max_value=6, min_value=1
+)
+
+# Get the dataframe as a Batch
+# highlight-next-line
+batch = batch_definition.get_batch(batch_parameters=batch_parameters)
+
+# Test the Expectation
+validation_results = batch.validate(expectation)
+print(validation_results)
+# </snippet>
diff --git a/...docs/core/connect_to_data/dataframes/_examples/_batch_parameters_validation_definition.py b/...docs/core/connect_to_data/dataframes/_examples/_batch_parameters_validation_definition.py
@@ -0,0 +1,45 @@
+# Define Batch Parameters for a pandas dataframe
+import pandas
+
+csv_path = "./data/folder_with_data/yellow_tripdata_sample_2019-01.csv"
+dataframe = pandas.read_csv(csv_path)
+
+batch_parameters = {"dataframe": dataframe}
+
+
+def set_up_context_for_example(context):
+    data_source = context.data_sources.add_pandas(name="my_data_source")
+    data_asset = data_source.add_dataframe_asset(name="my_dataframe_data_asset")
+    batch_definition = data_asset.add_batch_definition_whole_dataframe(
+        "my_batch_definition"
+    )
+
+    # Create an Expectation Suite
+    suite = context.suites.add(gx.ExpectationSuite(name="my_expectation_suite"))
+    # Add an Expectation to the Expectation Suite
+    suite.add_expectation(
+        gx.expectations.ExpectColumnValuesToNotBeNull(column="pickup_datetime")
+    )
+    # Add a Validation Definition
+    context.validation_definitions.add(
+        gx.ValidationDefinition(
+            data=batch_definition, suite=suite, name="my_validation_definition"
+        )
+    )
+
+
+# <snippet name="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_batch_parameters_validation_definition.py - validation_definition.validate() example">
+import great_expectations as gx
+
+context = gx.get_context()
+# Hide this
+set_up_context_for_example(context)
+
+# Retrieve a Validation Definition that uses the dataframe Batch Definition
+validation_definition_name = "my_validation_definition"
+validation_definition = context.validation_definitions.get(validation_definition_name)
+
+# Validate the dataframe by passing it to the Validation Definition as Batch Parameters.
+validation_results = validation_definition.run(batch_parameters=batch_parameters)
+print(validation_results)
+# </snippet>
diff --git a/.../docusaurus/docs/core/connect_to_data/dataframes/_examples/_pandas_df_batch_definition.py b/.../docusaurus/docs/core/connect_to_data/dataframes/_examples/_pandas_df_batch_definition.py
@@ -0,0 +1,34 @@
+# <snippet name="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_pandas_df_batch_definition.py - full code example">
+import great_expectations as gx
+
+context = gx.get_context()
+# Hide this
+assert type(context).__name__ == "EphemeralDataContext"
+# Hide this
+# SETUP FOR THE EXAMPLE:
+# Hide this
+data_source = context.data_sources.add_pandas(name="my_data_source")
+# Hide this
+data_asset = data_source.add_dataframe_asset(name="my_dataframe_data_asset")
+
+# Retrieve the Data Asset
+# <snippet name="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_pandas_df_batch_definition.py - retrieve Data Asset">
+data_source_name = "my_data_source"
+data_asset_name = "my_dataframe_data_asset"
+data_asset = context.data_sources.get(data_source_name).get_asset(data_asset_name)
+# </snippet>
+
+# Define the Batch Definition name
+# <snippet name="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_pandas_df_batch_definition.py - define Batch Definition name">
+batch_definition_name = "my_batch_definition"
+# </snippet>
+
+# Add a Batch Definition to the Data Asset
+# <snippet name="docs/docusaurus/docs/core/connect_to_data/dataframes/_examples/_pandas_df_batch_definition.py - add Batch Definition">
+batch_definition = data_asset.add_batch_definition_whole_dataframe(
+    batch_definition_name
+)
+# </snippet>
+# Hide this
+assert batch_definition.name == batch_definition_name
+# </snippet>
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		store_backend_id = ed62c7bd-023a-4b27-bd49-e1f9bd825c43