diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule index 51a0dff288558f..f237e2503317f2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule @@ -21,6 +21,11 @@ // | empty_string // | empty_string "," argument_list // - Added sql_string in any_literal +// - Added WS_INLINE? in field expression +// Added to ignore any comments +// %ignore WS // Ignore whitespace +// %ignore CPP_COMMENT // Ignore single-line comments +// %ignore C_COMMENT // Ignore multi-line comments lexical_unit: lexical_elements? @@ -245,6 +250,8 @@ operator_or_punctuator: "," | "=>" | ".." | "..." + | "{{" + | "}}" document: section_document | expression_document @@ -275,6 +282,7 @@ expression: logical_or_expression | if_expression | error_raising_expression | error_handling_expression + | outer_expression logical_or_expression: logical_and_expression @@ -376,6 +384,8 @@ sql_content: /(?:[^\"\\]|\\[\"]|\"\"|\#\(lf\))+/ sql_string: "\"" sql_content "\"" +outer_expression: "{{" expression "}}" + argument_list: WS_INLINE? expression | WS_INLINE? expression WS_INLINE? "," WS_INLINE? argument_list | WS_INLINE? sql_string @@ -409,7 +419,7 @@ record_expression: "[" field_list? "]" field_list: field | field "," field_list -field: field_name WS_INLINE? "=" WS_INLINE? expression +field: WS_INLINE? field_name WS_INLINE? "=" WS_INLINE? expression field_name: generalized_identifier | quoted_identifier @@ -621,4 +631,8 @@ any_literal: record_literal %import common.DIGIT %import common.LF %import common.CR -%import common.ESCAPED_STRING \ No newline at end of file +%import common.ESCAPED_STRING + +%ignore WS // Ignore whitespace +%ignore CPP_COMMENT // Ignore single-line comments +%ignore C_COMMENT // Ignore multi-line comments \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py index 63821f9038a88c..832d00d9c54702 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py +++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py @@ -1171,3 +1171,39 @@ def test_m_query_timeout(mock_get_lark_parser): assert ( is_entry_present ), 'Warning message "M-Query Parsing Timeout" should be present in reporter' + + +def test_comments_in_m_query(): + q: str = 'let\n Source = Snowflake.Databases("xaa48144.snowflakecomputing.com", "COMPUTE_WH", [Role="ACCOUNTADMIN"]),\n SNOWFLAKE_SAMPLE_DATA_Database = Source{[Name="SNOWFLAKE_SAMPLE_DATA", Kind="Database"]}[Data],\n TPCDS_SF100TCL_Schema = SNOWFLAKE_SAMPLE_DATA_Database{[Name="TPCDS_SF100TCL", Kind="Schema"]}[Data],\n ITEM_Table = TPCDS_SF100TCL_Schema{[Name="ITEM", Kind="Table"]}[Data],\n \n // Group by I_BRAND and calculate the count\n BrandCountsTable = Table.Group(ITEM_Table, {"I_BRAND"}, {{"BrandCount", each Table.RowCount(_), Int64.Type}})\nin\n BrandCountsTable' + + table: powerbi_data_classes.Table = powerbi_data_classes.Table( + columns=[], + measures=[], + expression=q, + name="pet_price_index", + full_name="datalake.sandbox_pet.pet_price_index", + ) + + reporter = PowerBiDashboardSourceReport() + + ctx, config, platform_instance_resolver = get_default_instances() + + data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables( + table, + reporter, + ctx=ctx, + config=config, + platform_instance_resolver=platform_instance_resolver, + parameters={ + "hostname": "xyz.databricks.com", + "http_path": "/sql/1.0/warehouses/abc", + "catalog": "cat", + "schema": "public", + }, + )[0].upstreams + + assert len(data_platform_tables) == 1 + assert ( + data_platform_tables[0].urn + == "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpcds_sf100tcl.item,PROD)" + )