Skip to content

Commit

Permalink
ensure datatype present on each col
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-jhilgart committed May 9, 2024
1 parent 940de24 commit 29d5d4d
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 1 deletion.
5 changes: 4 additions & 1 deletion semantic_model_generator/sqlgen/generate_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import sqlglot
from sqlglot.dialects.snowflake import Snowflake

from semantic_model_generator.validate.fields import validate_contains_datatype_for_each_col
from semantic_model_generator.protos.semantic_model_pb2 import (
Dimension,
FullyQualifiedTable,
Expand Down Expand Up @@ -120,7 +120,10 @@ def generate_select_with_all_cols(table: Table, limit: int) -> str:
Returns:
str: A SQL statement formatted for Snowflake.
"""


select = _create_select_statement(table, limit)

validate_contains_datatype_for_each_col(table)

return _convert_to_snowflake_sql(select)
29 changes: 29 additions & 0 deletions semantic_model_generator/tests/generate_sql_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@
sample_values=["1000.50", "2000.75", "1500.00"],
)

measure_example_no_data_type= Measure(
name="Total_Sales",
synonyms=["Sales", "Revenue"],
description="Total sales amount",
expr="sales_amount",
sample_values=["1000.50", "2000.75", "1500.00"],
)


time_dimension_example = TimeDimension(
name="Date",
synonyms=["Time"],
Expand Down Expand Up @@ -122,6 +131,15 @@
)


_TEST_TABLE_MISSING_DATATYPE = Table(
name="Transactions",
synonyms=["Transaction Records"],
description="Table containing transaction records",
base_table=fully_qualified_table_example,
measures=[measure_example_no_data_type],
)


def test_valid_table_sql_with_expr():
want = "SELECT region_code AS Region, sales_amount - sales_total AS Total_Sales, transaction_date AS Date FROM SalesDB.public.transactions LIMIT 100"
generated_sql = generate_select_with_all_cols(_TEST_VALID_TABLE, 100)
Expand Down Expand Up @@ -162,3 +180,14 @@ def test_table_invalid_col_expr():
str(excinfo.value)
== "Aggregations aren't allowed in columns yet. Please remove from SUM(sales_amount) as Total_Sales."
)

def test_table_missing_datatype():
with pytest.raises(ValueError) as excinfo:
_ = generate_select_with_all_cols(_TEST_TABLE_MISSING_DATATYPE, 100)

assert (
str(excinfo.value)
== "Your Semantic Model contains a col Total_Sales that does not have the `data_type` field. Please add."
)


13 changes: 13 additions & 0 deletions semantic_model_generator/validate/fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from semantic_model_generator.protos.semantic_model_pb2 import Table

def validate_contains_datatype_for_each_col(table: Table) -> None:
# Ensure every col for every table has 'data_type' present.
for dim_col in table.dimensions:
if dim_col.data_type is None or len(dim_col.data_type) < 2: # account for spaces
raise ValueError(f"Your Semantic Model contains a col {dim_col.name} that does not have the `data_type` field. Please add.")
for measure_col in table.measures:
if measure_col.data_type is None or len(measure_col.data_type) < 2: # account for spaces
raise ValueError(f"Your Semantic Model contains a col {measure_col.name} that does not have the `data_type` field. Please add.")
for time_dim_col in table.time_dimensions:
if time_dim_col.data_type is None or len(time_dim_col.data_type) < 2: # account for spaces
raise ValueError(f"Your Semantic Model contains a col {time_dim_col.name} that does not have the `data_type` field. Please add.")

0 comments on commit 29d5d4d

Please sign in to comment.