From cfd1aa425de1e4f4c8d0a19ebfff34b27af90373 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Tue, 10 Dec 2024 10:49:48 +0530 Subject: [PATCH 1/3] WIP --- .../powerbi/powerbi-lexical-grammar.rule | 4 +++ .../integration/powerbi/test_m_parser.py | 29 +++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule index 51a0dff288558f..ee2027ca9d3923 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule @@ -384,6 +384,10 @@ argument_list: WS_INLINE? expression | "\"" identifier "\"" "," argument_list | "[" identifier "]" | "[" identifier "]" "," argument_list + | "{{" identifier "}}" + | "{{" identifier "," argument_list "," "}}" + | "{{" expression "}}" + | "{{" expression "," argument_list "," "}}" | empty_string | empty_string "," argument_list | WS_INLINE diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py index 63821f9038a88c..75fe511ac5073e 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py +++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py @@ -1171,3 +1171,32 @@ def test_m_query_timeout(mock_get_lark_parser): assert ( is_entry_present ), 'Warning message "M-Query Parsing Timeout" should be present in reporter' + +def test_for_each_pattern1(): + q: str = 'let\n Source = Snowflake.Databases("xaa48144.snowflakecomputing.com", "COMPUTE_WH", [Role="ACCOUNTADMIN"]),\n SNOWFLAKE_SAMPLE_DATA_Database = Source{[Name="SNOWFLAKE_SAMPLE_DATA", Kind="Database"]}[Data],\n TPCDS_SF100TCL_Schema = SNOWFLAKE_SAMPLE_DATA_Database{[Name="TPCDS_SF100TCL", Kind="Schema"]}[Data],\n ITEM_Table = TPCDS_SF100TCL_Schema{[Name="ITEM", Kind="Table"]}[Data],\n \n // Group by I_BRAND and calculate the count\n BrandCountsTable = Table.Group(ITEM_Table, {"I_BRAND"}, {{"BrandCount", each Table.RowCount(_), Int64.Type}})\nin\n BrandCountsTable' + + table: powerbi_data_classes.Table = powerbi_data_classes.Table( + columns=[], + measures=[], + expression=q, + name="virtual_order_table", + full_name="OrderDataSet.virtual_order_table", + ) + + reporter = PowerBiDashboardSourceReport() + + ctx, config, platform_instance_resolver = get_default_instances() + + data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables( + table, + reporter, + ctx=ctx, + config=config, + platform_instance_resolver=platform_instance_resolver, + )[0].upstreams + + assert len(data_platform_tables) == 1 + assert ( + data_platform_tables[0].urn + == "urn:li:dataset:(urn:li:dataPlatform:snowflake,pbi_test.test.testtable,PROD)" + ) From 580aebe6f8f526968095e091a027a4e8161bcdb5 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Mon, 16 Dec 2024 14:23:07 +0530 Subject: [PATCH 2/3] WIP --- .../source/powerbi/powerbi-lexical-grammar.rule | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule index ee2027ca9d3923..83d681aba125d7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule @@ -245,6 +245,8 @@ operator_or_punctuator: "," | "=>" | ".." | "..." + | "{{" + | "}}" document: section_document | expression_document @@ -275,6 +277,7 @@ expression: logical_or_expression | if_expression | error_raising_expression | error_handling_expression + | outer_expression logical_or_expression: logical_and_expression @@ -376,6 +379,8 @@ sql_content: /(?:[^\"\\]|\\[\"]|\"\"|\#\(lf\))+/ sql_string: "\"" sql_content "\"" +outer_expression: "{{" expression "}}" + argument_list: WS_INLINE? expression | WS_INLINE? expression WS_INLINE? "," WS_INLINE? argument_list | WS_INLINE? sql_string @@ -384,10 +389,6 @@ argument_list: WS_INLINE? expression | "\"" identifier "\"" "," argument_list | "[" identifier "]" | "[" identifier "]" "," argument_list - | "{{" identifier "}}" - | "{{" identifier "," argument_list "," "}}" - | "{{" expression "}}" - | "{{" expression "," argument_list "," "}}" | empty_string | empty_string "," argument_list | WS_INLINE From 6def8944902527ebe7898429b986c904c1179d2f Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Thu, 19 Dec 2024 16:23:00 +0530 Subject: [PATCH 3/3] fix m-query for connector test --- .../source/powerbi/powerbi-lexical-grammar.rule | 13 +++++++++++-- .../tests/integration/powerbi/test_m_parser.py | 15 +++++++++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule index 83d681aba125d7..f237e2503317f2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule @@ -21,6 +21,11 @@ // | empty_string // | empty_string "," argument_list // - Added sql_string in any_literal +// - Added WS_INLINE? in field expression +// Added to ignore any comments +// %ignore WS // Ignore whitespace +// %ignore CPP_COMMENT // Ignore single-line comments +// %ignore C_COMMENT // Ignore multi-line comments lexical_unit: lexical_elements? @@ -414,7 +419,7 @@ record_expression: "[" field_list? "]" field_list: field | field "," field_list -field: field_name WS_INLINE? "=" WS_INLINE? expression +field: WS_INLINE? field_name WS_INLINE? "=" WS_INLINE? expression field_name: generalized_identifier | quoted_identifier @@ -626,4 +631,8 @@ any_literal: record_literal %import common.DIGIT %import common.LF %import common.CR -%import common.ESCAPED_STRING \ No newline at end of file +%import common.ESCAPED_STRING + +%ignore WS // Ignore whitespace +%ignore CPP_COMMENT // Ignore single-line comments +%ignore C_COMMENT // Ignore multi-line comments \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py index 75fe511ac5073e..832d00d9c54702 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py +++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py @@ -1172,15 +1172,16 @@ def test_m_query_timeout(mock_get_lark_parser): is_entry_present ), 'Warning message "M-Query Parsing Timeout" should be present in reporter' -def test_for_each_pattern1(): + +def test_comments_in_m_query(): q: str = 'let\n Source = Snowflake.Databases("xaa48144.snowflakecomputing.com", "COMPUTE_WH", [Role="ACCOUNTADMIN"]),\n SNOWFLAKE_SAMPLE_DATA_Database = Source{[Name="SNOWFLAKE_SAMPLE_DATA", Kind="Database"]}[Data],\n TPCDS_SF100TCL_Schema = SNOWFLAKE_SAMPLE_DATA_Database{[Name="TPCDS_SF100TCL", Kind="Schema"]}[Data],\n ITEM_Table = TPCDS_SF100TCL_Schema{[Name="ITEM", Kind="Table"]}[Data],\n \n // Group by I_BRAND and calculate the count\n BrandCountsTable = Table.Group(ITEM_Table, {"I_BRAND"}, {{"BrandCount", each Table.RowCount(_), Int64.Type}})\nin\n BrandCountsTable' table: powerbi_data_classes.Table = powerbi_data_classes.Table( columns=[], measures=[], expression=q, - name="virtual_order_table", - full_name="OrderDataSet.virtual_order_table", + name="pet_price_index", + full_name="datalake.sandbox_pet.pet_price_index", ) reporter = PowerBiDashboardSourceReport() @@ -1193,10 +1194,16 @@ def test_for_each_pattern1(): ctx=ctx, config=config, platform_instance_resolver=platform_instance_resolver, + parameters={ + "hostname": "xyz.databricks.com", + "http_path": "/sql/1.0/warehouses/abc", + "catalog": "cat", + "schema": "public", + }, )[0].upstreams assert len(data_platform_tables) == 1 assert ( data_platform_tables[0].urn - == "urn:li:dataset:(urn:li:dataPlatform:snowflake,pbi_test.test.testtable,PROD)" + == "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpcds_sf100tcl.item,PROD)" )