Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MAINTENANCE] Adding databricks compatibility types #10787

Merged
Merged
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
167fbac
adding databricks types
Shinnnyshinshin Dec 17, 2024
c18c9e5
Update databricks.py
Shinnnyshinshin Dec 17, 2024
51df6f7
Update databricks.py
Shinnnyshinshin Dec 17, 2024
b05b84c
type ignore
Shinnnyshinshin Dec 17, 2024
af769b1
a little more general
Shinnnyshinshin Dec 17, 2024
ce007c5
Update databricks.py
Shinnnyshinshin Dec 17, 2024
03e39c2
Update databricks.py
Shinnnyshinshin Dec 17, 2024
f5ccb4c
Update databricks.py
Shinnnyshinshin Dec 18, 2024
090554c
Merge branch 'm/zelda-1183/zelda-1188/adding-databricks-types' into m…
Shinnnyshinshin Dec 18, 2024
2467925
Update test_expect_column_values_to_be_in_type_list.py
Shinnnyshinshin Dec 18, 2024
806648f
Update databricks.py
Shinnnyshinshin Dec 18, 2024
cfec5e2
Merge branch 'm/zelda-1183/zelda-1188/adding-databricks-types' into m…
Shinnnyshinshin Dec 18, 2024
9d6f510
the fix
Shinnnyshinshin Dec 18, 2024
2066819
Merge branch 'develop' into m/zelda-1183/zelda-1188/adding-databricks…
Shinnnyshinshin Dec 18, 2024
0d68ca2
Update databricks.py
Shinnnyshinshin Dec 18, 2024
7efe84a
Merge branch 'm/zelda-1183/zelda-1188/adding-databricks-types' of htt…
Shinnnyshinshin Dec 18, 2024
f559b46
Merge branch 'm/zelda-1183/zelda-1188/adding-databricks-types' into m…
Shinnnyshinshin Dec 18, 2024
d6cf5dd
adding test
Shinnnyshinshin Dec 18, 2024
c0f90ea
a better import
Shinnnyshinshin Dec 18, 2024
e6f540a
Merge branch 'm/zelda-1183/zelda-1188/adding-databricks-types' into m…
Shinnnyshinshin Dec 18, 2024
9b15bb5
tests and new types
Shinnnyshinshin Dec 18, 2024
a87db03
Update databricks.py
Shinnnyshinshin Dec 18, 2024
7dba978
a bit of clean up before review.
Shinnnyshinshin Dec 18, 2024
bdfa30e
Update test_expect_column_values_to_be_in_type_list.py
Shinnnyshinshin Dec 18, 2024
c06a069
Merge branch 'develop' into m/zelda-1183/zelda-1188/adding-databricks…
Shinnnyshinshin Dec 18, 2024
4525003
only the needed changes.
Shinnnyshinshin Dec 18, 2024
58ff217
Merge branch 'm/zelda-1183/zelda-1188/adding-databricks-types' of htt…
Shinnnyshinshin Dec 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update test_expect_column_values_to_be_in_type_list.py
Shinnnyshinshin committed Dec 18, 2024
commit bdfa30e409339292bfe7620f08750a1d43da8d64
Original file line number Diff line number Diff line change
@@ -3,7 +3,6 @@
import sqlalchemy.types as sqltypes

import great_expectations.expectations as gxe
from great_expectations.compatibility.databricks import DATABRICKS_TYPES
from great_expectations.compatibility.snowflake import SNOWFLAKE_TYPES
from great_expectations.core.result_format import ResultFormat
from great_expectations.datasource.fluent.interfaces import Batch
@@ -380,164 +379,3 @@ def test_success_complete_snowflake(
assert isinstance(result_dict["observed_value"], str)
assert isinstance(expectation.type_list, list)
assert result_dict["observed_value"] in expectation.type_list


@pytest.mark.parametrize(
"expectation",
[
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="STRING", type_list=["STRING"]),
id="STRING",
),
# SqlA Text gets converted to Databricks STRING
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="TEXT", type_list=["STRING"]),
id="TEXT",
),
# SqlA UNICODE gets converted to Databricks STRING
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="UNICODE", type_list=["STRING"]),
id="UNICODE",
),
# SqlA UNICODE_TEXT gets converted to Databricks STRING
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="UNICODE_TEXT", type_list=["STRING"]),
id="UNICODE_TEXT",
),
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="BOOLEAN", type_list=["BOOLEAN"]),
id="BOOLEAN",
),
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(
column="DECIMAL", type_list=["DECIMAL", "DECIMAL(10, 0)"]
),
id="DECIMAL",
),
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="DATE", type_list=["DATE"]),
id="DATE",
),
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="TIMESTAMP", type_list=["TIMESTAMP"]),
id="TIMESTAMP",
),
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(
column="TIMESTAMP_NTZ", type_list=["TIMESTAMP_NTZ"]
),
id="TIMESTAMP_NTZ",
),
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="DOUBLE", type_list=["DOUBLE", "FLOAT"]),
id="DOUBLE",
),
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="FLOAT", type_list=["FLOAT"]),
id="FLOAT",
),
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="INT", type_list=["INT"]),
id="INT",
),
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(column="TINYINT", type_list=["TINYINT"]),
id="TINYINT",
),
pytest.param(
gxe.ExpectColumnValuesToBeInTypeList(
column="DECIMAL", type_list=["DECIMAL", "DECIMAL(10, 0)"]
),
id="DECIMAL",
),
# SqlA Time gets converted to Databricks STRING,
# but is not supported by our testing framework
# pytest.param(
# gxe.ExpectColumnValuesToBeInTypeList(column="TIME", type_list=["STRING"]),
# id="TIME",
# ),
# SqlA UUID gets converted to Databricks STRING,
# but is not supported by our testing framework.
# pytest.param(
# gxe.ExpectColumnValuesToBeInTypeList(column="UUID", type_list=["STRING"]),
# id="UUID",
# )
],
)
@parameterize_batch_for_data_sources(
data_source_configs=[
DatabricksDatasourceTestConfig(
column_types={
"STRING": DATABRICKS_TYPES.STRING,
"TEXT": sqltypes.Text,
"UNICODE": sqltypes.Unicode,
"UNICODE_TEXT": sqltypes.UnicodeText,
"BIGINT": sqltypes.BigInteger,
"BOOLEAN": sqltypes.BOOLEAN,
"DATE": sqltypes.DATE,
"TIMESTAMP_NTZ": DATABRICKS_TYPES.TIMESTAMP_NTZ,
"TIMESTAMP": DATABRICKS_TYPES.TIMESTAMP,
"DOUBLE": sqltypes.DOUBLE,
"FLOAT": sqltypes.FLOAT,
"INT": sqltypes.Integer,
"DECIMAL": sqltypes.Numeric,
"SMALLINT": sqltypes.SmallInteger,
"TINYINT": DATABRICKS_TYPES.TINYINT,
# "TIME": sqltypes.Time,
# "UUID": sqltypes.UUID,
}
)
],
data=pd.DataFrame(
{
"STRING": ["a", "b", "c"],
"TEXT": ["a", "b", "c"],
"UNICODE": ["\u00e9", "\u00e9", "\u00e9"],
"UNICODE_TEXT": ["a", "b", "c"],
"BIGINT": [1111, 2222, 3333],
"BOOLEAN": [True, True, False],
"DATE": [
"2021-01-01",
"2021-01-02",
"2021-01-03",
],
"TIMESTAMP_NTZ": [
"2021-01-01 00:00:00",
"2021-01-02 00:00:00",
"2021-01-03 00:00:00",
],
"TIMESTAMP": [
"2021-01-01 00:00:00",
"2021-01-02 00:00:00",
"2021-01-03 00:00:00",
],
"DOUBLE": [1.0, 2.0, 3.0],
"FLOAT": [1.0, 2.0, 3.0],
"INT": [1, 2, 3],
"DECIMAL": [1.1, 2.2, 3.3],
"SMALLINT": [1, 2, 3],
# "TIME": [
# sa.Time("22:17:33.123456"),
# sa.Time("22:17:33.123456"),
# sa.Time("22:17:33.123456"),
# ],
# "UUID": [
# uuid.UUID("905993ea-f50e-4284-bea0-5be3f0ed7031"),
# uuid.UUID("9406b631-fa2f-41cf-b666-f9a2ac3118c1"),
# uuid.UUID("47538f05-32e3-4594-80e2-0b3b33257ae7")
# ],
},
dtype="object",
),
)
def test_success_complete_databricks(
batch_for_datasource: Batch, expectation: gxe.ExpectColumnValuesToBeInTypeList
) -> None:
result = batch_for_datasource.validate(expectation, result_format=ResultFormat.COMPLETE)
result_dict = result.to_json_dict()["result"]

assert result.success
assert isinstance(result_dict, dict)
assert isinstance(result_dict["observed_value"], str)
assert isinstance(expectation.type_list, list)
assert result_dict["observed_value"] in expectation.type_list