types work

great-expectations · Dec 18, 2024 · d766ddd · d766ddd
1 parent 47fffd5
commit d766ddd
Show file tree

Hide file tree

Showing 3 changed files with 50 additions and 19 deletions.
diff --git a/great_expectations/compatibility/databricks.py b/great_expectations/compatibility/databricks.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-import sqlalchemy.types as sqltypes
-
 from great_expectations.compatibility.not_imported import NotImported
 
 DATABRICKS_CONNECT_NOT_IMPORTED = NotImported(
@@ -34,13 +32,6 @@
 except (ImportError, AttributeError):
     TINYINT = DATABRICKS_CONNECT_NOT_IMPORTED  # type: ignore[misc, assignment]
 
-try:
-    # sqlalchemy >= 2.0
-    DOUBLE = sqltypes.DOUBLE
-except (ImportError, AttributeError):
-    # sqlalchemy <= 2.0
-    DOUBLE = sqltypes.DECIMAL  # type: ignore[misc, assignment]
-
 
 class DATABRICKS_TYPES:
     """Namespace for Databricks dialect types"""
@@ -49,4 +40,3 @@ class DATABRICKS_TYPES:
     STRING = STRING
     TINYINT = TINYINT
     TIMESTAMP = TIMESTAMP
-    DOUBLE = DOUBLE
diff --git a/...ata_sources_and_expectations/expectations/test_expect_column_values_to_be_in_type_list.py b/...ata_sources_and_expectations/expectations/test_expect_column_values_to_be_in_type_list.py
@@ -5,6 +5,9 @@
 import great_expectations.expectations as gxe
 from great_expectations.compatibility.databricks import DATABRICKS_TYPES
 from great_expectations.compatibility.snowflake import SNOWFLAKE_TYPES
+from great_expectations.compatibility.sqlalchemy import (
+    sqlalchemy as sa,
+)
 from great_expectations.core.result_format import ResultFormat
 from great_expectations.datasource.fluent.interfaces import Batch
 from tests.integration.conftest import parameterize_batch_for_data_sources
@@ -428,12 +431,6 @@ def test_success_complete_snowflake(
             ),
             id="TIMESTAMP_NTZ",
         ),
-        pytest.param(
-            gxe.ExpectColumnValuesToBeInTypeList(
-                column="DOUBLE", type_list=["DOUBLE", "FLOAT", "DECIMAL"]
-            ),
-            id="DOUBLE",
-        ),
         pytest.param(
             gxe.ExpectColumnValuesToBeInTypeList(column="FLOAT", type_list=["FLOAT"]),
             id="FLOAT",
@@ -479,7 +476,6 @@ def test_success_complete_snowflake(
                 "DATE": sqltypes.DATE,
                 "TIMESTAMP_NTZ": DATABRICKS_TYPES.TIMESTAMP_NTZ,
                 "TIMESTAMP": DATABRICKS_TYPES.TIMESTAMP,
-                "DOUBLE": DATABRICKS_TYPES.DOUBLE,
                 "FLOAT": sqltypes.Float,
                 "INT": sqltypes.Integer,
                 "DECIMAL": sqltypes.Numeric,
@@ -543,3 +539,49 @@ def test_success_complete_databricks(
     assert isinstance(result_dict["observed_value"], str)
     assert isinstance(expectation.type_list, list)
     assert result_dict["observed_value"] in expectation.type_list
+
+
+@pytest.mark.skipif(
+    sa.__version__ < "2.0.0", reason="DOUBLE type is not available in SA until 2.0.0"
+)
+@pytest.mark.parametrize(
+    "expectation",
+    [
+        pytest.param(
+            gxe.ExpectColumnValuesToBeInTypeList(column="DOUBLE", type_list=["DOUBLE", "FLOAT"]),
+            id="DOUBLE",
+        )
+    ],
+)
+@parameterize_batch_for_data_sources(
+    data_source_configs=[
+        DatabricksDatasourceTestConfig(
+            column_types={
+                "DOUBLE": sqltypes.Double,
+            }
+        )
+    ],
+    data=pd.DataFrame(
+        {
+            "DOUBLE": [1.0, 2.0, 3.0],
+        },
+        dtype="object",
+    ),
+)
+def test_success_complete_databricks_double_type_only(
+    batch_for_datasource: Batch, expectation: gxe.ExpectColumnValuesToBeInTypeList
+) -> None:
+    """What does this test and why?
+
+    Databricks mostly uses SqlA types directly, but the double type is
+    only available after sqlalchemy 2.0. We therefore split up the test
+    into 2 parts, with this test being skipped if the SA version is too low.
+    """
+    result = batch_for_datasource.validate(expectation, result_format=ResultFormat.COMPLETE)
+    result_dict = result.to_json_dict()["result"]
+
+    assert result.success
+    assert isinstance(result_dict, dict)
+    assert isinstance(result_dict["observed_value"], str)
+    assert isinstance(expectation.type_list, list)
+    assert result_dict["observed_value"] in expectation.type_list
diff --git a/...ion/data_sources_and_expectations/expectations/test_expect_column_values_to_be_of_type.py b/...ion/data_sources_and_expectations/expectations/test_expect_column_values_to_be_of_type.py
@@ -66,13 +66,12 @@ def test_success_for_type__INTEGER(batch_for_datasource: Batch) -> None:
     assert result.success
 
 
-@pytest.mark.xfail
 @parameterize_batch_for_data_sources(
     data_source_configs=[DatabricksDatasourceTestConfig()],
     data=DATA,
 )
 def test_success_for_type__Integer(batch_for_datasource: Batch) -> None:
-    expectation = gxe.ExpectColumnValuesToBeOfType(column=INTEGER_COLUMN, type_="Integer")
+    expectation = gxe.ExpectColumnValuesToBeOfType(column=INTEGER_COLUMN, type_="INT")
     result = batch_for_datasource.validate(expectation)
     assert result.success