diff --git a/metaphor/power_bi/extractor.py b/metaphor/power_bi/extractor.py index bc3a5ebd..0e1bbb7f 100644 --- a/metaphor/power_bi/extractor.py +++ b/metaphor/power_bi/extractor.py @@ -140,14 +140,18 @@ def map_wi_dataflow_to_pipeline(self, workspace: WorkspaceInfo) -> None: and "document" in dataflow["pbi:mashup"] ): document_str = dataflow["pbi:mashup"]["document"] - sources, _ = PowerQueryParser.parse_query_expression( - "", - [], - document_str or "", - self._snowflake_account, - ) - - self._dataflow_sources[data_flow_id] = sources + try: + sources, _ = PowerQueryParser.parse_query_expression( + "", + [], + document_str or "", + self._snowflake_account, + ) + self._dataflow_sources[data_flow_id] = sources + except Exception as e: + logger.error( + f"Failed to parse expression for dataflow {data_flow_id}: {e}" + ) pipeline = Pipeline( logical_id=PipelineLogicalID( diff --git a/metaphor/power_bi/power_bi_client.py b/metaphor/power_bi/power_bi_client.py index 4afada43..d7e24f10 100644 --- a/metaphor/power_bi/power_bi_client.py +++ b/metaphor/power_bi/power_bi_client.py @@ -154,7 +154,7 @@ class WorkspaceInfoDataflow(BaseModel): configuredBy: Optional[str] = None modifiedBy: Optional[str] = None modifiedDateTime: Optional[str] = None - refreshSchedule: Optional[PowerBiRefreshSchedule] + refreshSchedule: Optional[PowerBiRefreshSchedule] = None class WorkspaceInfo(BaseModel): diff --git a/pyproject.toml b/pyproject.toml index 963371b9..6e5291ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.12.49" +version = "0.12.50" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] diff --git a/tests/power_bi/data/dataflow_2.json b/tests/power_bi/data/dataflow_2.json new file mode 100644 index 00000000..ae1cdeee --- /dev/null +++ b/tests/power_bi/data/dataflow_2.json @@ -0,0 +1,20 @@ +{ + "name": "Dataflow", + "description": "A dataflow", + "version": "1.0", + "culture": "en-US", + "modifiedTime": "2023-09-19T06:08:01.3550729+00:00", + "ppdf:outputFileFormat": "Csv", + "pbi:mashup": { + "fastCombine": false, + "allowNativeQueries": false, + "queriesMetadata": { + "TABLE_NAME": { + "queryId": "query_id", + "queryName": "TABLE_NAME", + "loadEnabled": true + } + }, + "document": "section Section1;\r\nshared ENTITY_NAME = let\r\n Source = GoogleAnalytics.Accounts(\"account.snowflakecomputing.com\", \"COMPUTE_WH\", [Role = null, CreateNavigationProperties = null, ConnectionTimeout = null, CommandTimeout = null]),\r\n Navigation = Source{[Name = \"DB\", Kind = \"Database\"]}[Data],\r\n #\"Navigation 1\" = Navigation{[Name = \"SCHEMA\", Kind = \"Schema\"]}[Data],\r\n #\"Navigation 2\" = #\"Navigation 1\"{[Name = \"TABLE_NAME\", Kind = \"Table\"]}[Data]\r\nin\r\n #\"Navigation 2\";\r\n" + } +} diff --git a/tests/power_bi/expected.json b/tests/power_bi/expected.json index 88f26b2e..a23baa5e 100644 --- a/tests/power_bi/expected.json +++ b/tests/power_bi/expected.json @@ -461,5 +461,18 @@ }, "dataflowUrl": "https://app.powerbi.com/groups/workspace-1/dataflows/00000000-0000-0000-0001-00000000000A" } + }, + { + "logicalId": { + "name": "00000000-0000-0000-0002-00000000000A", + "type": "POWER_BI_DATAFLOW" + }, + "powerBiDataflow": { + "content": "{\"name\": \"Dataflow\", \"description\": \"A dataflow\", \"version\": \"1.0\", \"culture\": \"en-US\", \"modifiedTime\": \"2023-09-19T06:08:01.3550729+00:00\", \"ppdf:outputFileFormat\": \"Csv\", \"pbi:mashup\": {\"fastCombine\": false, \"allowNativeQueries\": false, \"queriesMetadata\": {\"TABLE_NAME\": {\"queryId\": \"query_id\", \"queryName\": \"TABLE_NAME\", \"loadEnabled\": true}}, \"document\": \"section Section1;\\r\\nshared ENTITY_NAME = let\\r\\n Source = GoogleAnalytics.Accounts(\\\"account.snowflakecomputing.com\\\", \\\"COMPUTE_WH\\\", [Role = null, CreateNavigationProperties = null, ConnectionTimeout = null, CommandTimeout = null]),\\r\\n Navigation = Source{[Name = \\\"DB\\\", Kind = \\\"Database\\\"]}[Data],\\r\\n #\\\"Navigation 1\\\" = Navigation{[Name = \\\"SCHEMA\\\", Kind = \\\"Schema\\\"]}[Data],\\r\\n #\\\"Navigation 2\\\" = #\\\"Navigation 1\\\"{[Name = \\\"TABLE_NAME\\\", Kind = \\\"Table\\\"]}[Data]\\r\\nin\\r\\n #\\\"Navigation 2\\\";\\r\\n\"}}", + "dataflowUrl": "https://app.powerbi.com/groups/workspace-1/dataflows/00000000-0000-0000-0002-00000000000A", + "description": "", + "document": "section Section1;\r\nshared ENTITY_NAME = let\r\n Source = GoogleAnalytics.Accounts(\"account.snowflakecomputing.com\", \"COMPUTE_WH\", [Role = null, CreateNavigationProperties = null, ConnectionTimeout = null, CommandTimeout = null]),\r\n Navigation = Source{[Name = \"DB\", Kind = \"Database\"]}[Data],\r\n #\"Navigation 1\" = Navigation{[Name = \"SCHEMA\", Kind = \"Schema\"]}[Data],\r\n #\"Navigation 2\" = #\"Navigation 1\"{[Name = \"TABLE_NAME\", Kind = \"Table\"]}[Data]\r\nin\r\n #\"Navigation 2\";\r\n", + "name": "Dataflow 2" + } } ] diff --git a/tests/power_bi/test_extractor.py b/tests/power_bi/test_extractor.py index 822f5d62..e402e64b 100644 --- a/tests/power_bi/test_extractor.py +++ b/tests/power_bi/test_extractor.py @@ -172,6 +172,7 @@ async def test_extractor(mock_client: MagicMock, test_root_dir: str): } dataflow_id = "00000000-0000-0000-0001-00000000000A" + dataflow_id2 = "00000000-0000-0000-0002-00000000000A" mock_instance.get_workspace_info = MagicMock( return_value=[ @@ -338,7 +339,12 @@ async def test_extractor(mock_client: MagicMock, test_root_dir: str): notifyOption="MailOnFailure", times=["1:00:00"], ), - ) + ), + WorkspaceInfoDataflow( + objectId=dataflow_id2, + name="Dataflow 2", + description="", + ), ], ) ] @@ -385,8 +391,11 @@ def fake_get_user_subscriptions(user_id: str) -> List[PowerBISubscription]: ), ] - def fake_export_dataflow(workspace_id: str, dataflow_id: str) -> dict: - return load_json(f"{test_root_dir}/power_bi/data/dataflow_1.json") + def fake_export_dataflow(workspace_id: str, df_id: str) -> dict: + if df_id == dataflow_id: + return load_json(f"{test_root_dir}/power_bi/data/dataflow_1.json") + else: + return load_json(f"{test_root_dir}/power_bi/data/dataflow_2.json") mock_instance.get_datasets.side_effect = fake_get_datasets mock_instance.get_reports.side_effect = fake_get_reports