Skip to content

Commit

Permalink
types work
Browse files Browse the repository at this point in the history
  • Loading branch information
Shinnnyshinshin committed Dec 18, 2024
1 parent 47fffd5 commit d766ddd
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 19 deletions.
10 changes: 0 additions & 10 deletions great_expectations/compatibility/databricks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

import sqlalchemy.types as sqltypes

from great_expectations.compatibility.not_imported import NotImported

DATABRICKS_CONNECT_NOT_IMPORTED = NotImported(
Expand Down Expand Up @@ -34,13 +32,6 @@
except (ImportError, AttributeError):
TINYINT = DATABRICKS_CONNECT_NOT_IMPORTED # type: ignore[misc, assignment]

try:
# sqlalchemy >= 2.0
DOUBLE = sqltypes.DOUBLE
except (ImportError, AttributeError):
# sqlalchemy <= 2.0
DOUBLE = sqltypes.DECIMAL # type: ignore[misc, assignment]


class DATABRICKS_TYPES:
"""Namespace for Databricks dialect types"""
Expand All @@ -49,4 +40,3 @@ class DATABRICKS_TYPES:
STRING = STRING
TINYINT = TINYINT
TIMESTAMP = TIMESTAMP
DOUBLE = DOUBLE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import great_expectations.expectations as gxe
from great_expectations.compatibility.databricks import DATABRICKS_TYPES
from great_expectations.compatibility.snowflake import SNOWFLAKE_TYPES
from great_expectations.compatibility.sqlalchemy import (
sqlalchemy as sa,
)
from great_expectations.core.result_format import ResultFormat
from great_expectations.datasource.fluent.interfaces import Batch
from tests.integration.conftest import parameterize_batch_for_data_sources
Expand Down Expand Up @@ -428,12 +431,6 @@ def test_success_complete_snowflake(
),
id="TIMESTAMP_NTZ",
),
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(
column="DOUBLE", type_list=["DOUBLE", "FLOAT", "DECIMAL"]
),
id="DOUBLE",
),
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="FLOAT", type_list=["FLOAT"]),
id="FLOAT",
Expand Down Expand Up @@ -479,7 +476,6 @@ def test_success_complete_snowflake(
"DATE": sqltypes.DATE,
"TIMESTAMP_NTZ": DATABRICKS_TYPES.TIMESTAMP_NTZ,
"TIMESTAMP": DATABRICKS_TYPES.TIMESTAMP,
"DOUBLE": DATABRICKS_TYPES.DOUBLE,
"FLOAT": sqltypes.Float,
"INT": sqltypes.Integer,
"DECIMAL": sqltypes.Numeric,
Expand Down Expand Up @@ -543,3 +539,49 @@ def test_success_complete_databricks(
assert isinstance(result_dict["observed_value"], str)
assert isinstance(expectation.type_list, list)
assert result_dict["observed_value"] in expectation.type_list


@pytest.mark.skipif(
sa.__version__ < "2.0.0", reason="DOUBLE type is not available in SA until 2.0.0"
)
@pytest.mark.parametrize(
"expectation",
[
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="DOUBLE", type_list=["DOUBLE", "FLOAT"]),
id="DOUBLE",
)
],
)
@parameterize_batch_for_data_sources(
data_source_configs=[
DatabricksDatasourceTestConfig(
column_types={
"DOUBLE": sqltypes.Double,
}
)
],
data=pd.DataFrame(
{
"DOUBLE": [1.0, 2.0, 3.0],
},
dtype="object",
),
)
def test_success_complete_databricks_double_type_only(
batch_for_datasource: Batch, expectation: gxe.ExpectColumnValuesToBeInTypeList
) -> None:
"""What does this test and why?
Databricks mostly uses SqlA types directly, but the double type is
only available after sqlalchemy 2.0. We therefore split up the test
into 2 parts, with this test being skipped if the SA version is too low.
"""
result = batch_for_datasource.validate(expectation, result_format=ResultFormat.COMPLETE)
result_dict = result.to_json_dict()["result"]

assert result.success
assert isinstance(result_dict, dict)
assert isinstance(result_dict["observed_value"], str)
assert isinstance(expectation.type_list, list)
assert result_dict["observed_value"] in expectation.type_list
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,12 @@ def test_success_for_type__INTEGER(batch_for_datasource: Batch) -> None:
assert result.success


@pytest.mark.xfail
@parameterize_batch_for_data_sources(
data_source_configs=[DatabricksDatasourceTestConfig()],
data=DATA,
)
def test_success_for_type__Integer(batch_for_datasource: Batch) -> None:
expectation = gxe.ExpectColumnValuesToBeOfType(column=INTEGER_COLUMN, type_="Integer")
expectation = gxe.ExpectColumnValuesToBeOfType(column=INTEGER_COLUMN, type_="INT")
result = batch_for_datasource.validate(expectation)
assert result.success

Expand Down

0 comments on commit d766ddd

Please sign in to comment.