Skip to content

Commit

Permalink
Safely parse power query for dataflow (#604)
Browse files Browse the repository at this point in the history
* Safe parse power query for dataflow

* Bump version
  • Loading branch information
elic-eon authored Sep 21, 2023
1 parent b6a2610 commit 325545e
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 13 deletions.
20 changes: 12 additions & 8 deletions metaphor/power_bi/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,18 @@ def map_wi_dataflow_to_pipeline(self, workspace: WorkspaceInfo) -> None:
and "document" in dataflow["pbi:mashup"]
):
document_str = dataflow["pbi:mashup"]["document"]
sources, _ = PowerQueryParser.parse_query_expression(
"",
[],
document_str or "",
self._snowflake_account,
)

self._dataflow_sources[data_flow_id] = sources
try:
sources, _ = PowerQueryParser.parse_query_expression(
"",
[],
document_str or "",
self._snowflake_account,
)
self._dataflow_sources[data_flow_id] = sources
except Exception as e:
logger.error(
f"Failed to parse expression for dataflow {data_flow_id}: {e}"
)

pipeline = Pipeline(
logical_id=PipelineLogicalID(
Expand Down
2 changes: 1 addition & 1 deletion metaphor/power_bi/power_bi_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ class WorkspaceInfoDataflow(BaseModel):
configuredBy: Optional[str] = None
modifiedBy: Optional[str] = None
modifiedDateTime: Optional[str] = None
refreshSchedule: Optional[PowerBiRefreshSchedule]
refreshSchedule: Optional[PowerBiRefreshSchedule] = None


class WorkspaceInfo(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.12.49"
version = "0.12.50"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down
20 changes: 20 additions & 0 deletions tests/power_bi/data/dataflow_2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"name": "Dataflow",
"description": "A dataflow",
"version": "1.0",
"culture": "en-US",
"modifiedTime": "2023-09-19T06:08:01.3550729+00:00",
"ppdf:outputFileFormat": "Csv",
"pbi:mashup": {
"fastCombine": false,
"allowNativeQueries": false,
"queriesMetadata": {
"TABLE_NAME": {
"queryId": "query_id",
"queryName": "TABLE_NAME",
"loadEnabled": true
}
},
"document": "section Section1;\r\nshared ENTITY_NAME = let\r\n Source = GoogleAnalytics.Accounts(\"account.snowflakecomputing.com\", \"COMPUTE_WH\", [Role = null, CreateNavigationProperties = null, ConnectionTimeout = null, CommandTimeout = null]),\r\n Navigation = Source{[Name = \"DB\", Kind = \"Database\"]}[Data],\r\n #\"Navigation 1\" = Navigation{[Name = \"SCHEMA\", Kind = \"Schema\"]}[Data],\r\n #\"Navigation 2\" = #\"Navigation 1\"{[Name = \"TABLE_NAME\", Kind = \"Table\"]}[Data]\r\nin\r\n #\"Navigation 2\";\r\n"
}
}
13 changes: 13 additions & 0 deletions tests/power_bi/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -461,5 +461,18 @@
},
"dataflowUrl": "https://app.powerbi.com/groups/workspace-1/dataflows/00000000-0000-0000-0001-00000000000A"
}
},
{
"logicalId": {
"name": "00000000-0000-0000-0002-00000000000A",
"type": "POWER_BI_DATAFLOW"
},
"powerBiDataflow": {
"content": "{\"name\": \"Dataflow\", \"description\": \"A dataflow\", \"version\": \"1.0\", \"culture\": \"en-US\", \"modifiedTime\": \"2023-09-19T06:08:01.3550729+00:00\", \"ppdf:outputFileFormat\": \"Csv\", \"pbi:mashup\": {\"fastCombine\": false, \"allowNativeQueries\": false, \"queriesMetadata\": {\"TABLE_NAME\": {\"queryId\": \"query_id\", \"queryName\": \"TABLE_NAME\", \"loadEnabled\": true}}, \"document\": \"section Section1;\\r\\nshared ENTITY_NAME = let\\r\\n Source = GoogleAnalytics.Accounts(\\\"account.snowflakecomputing.com\\\", \\\"COMPUTE_WH\\\", [Role = null, CreateNavigationProperties = null, ConnectionTimeout = null, CommandTimeout = null]),\\r\\n Navigation = Source{[Name = \\\"DB\\\", Kind = \\\"Database\\\"]}[Data],\\r\\n #\\\"Navigation 1\\\" = Navigation{[Name = \\\"SCHEMA\\\", Kind = \\\"Schema\\\"]}[Data],\\r\\n #\\\"Navigation 2\\\" = #\\\"Navigation 1\\\"{[Name = \\\"TABLE_NAME\\\", Kind = \\\"Table\\\"]}[Data]\\r\\nin\\r\\n #\\\"Navigation 2\\\";\\r\\n\"}}",
"dataflowUrl": "https://app.powerbi.com/groups/workspace-1/dataflows/00000000-0000-0000-0002-00000000000A",
"description": "",
"document": "section Section1;\r\nshared ENTITY_NAME = let\r\n Source = GoogleAnalytics.Accounts(\"account.snowflakecomputing.com\", \"COMPUTE_WH\", [Role = null, CreateNavigationProperties = null, ConnectionTimeout = null, CommandTimeout = null]),\r\n Navigation = Source{[Name = \"DB\", Kind = \"Database\"]}[Data],\r\n #\"Navigation 1\" = Navigation{[Name = \"SCHEMA\", Kind = \"Schema\"]}[Data],\r\n #\"Navigation 2\" = #\"Navigation 1\"{[Name = \"TABLE_NAME\", Kind = \"Table\"]}[Data]\r\nin\r\n #\"Navigation 2\";\r\n",
"name": "Dataflow 2"
}
}
]
15 changes: 12 additions & 3 deletions tests/power_bi/test_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ async def test_extractor(mock_client: MagicMock, test_root_dir: str):
}

dataflow_id = "00000000-0000-0000-0001-00000000000A"
dataflow_id2 = "00000000-0000-0000-0002-00000000000A"

mock_instance.get_workspace_info = MagicMock(
return_value=[
Expand Down Expand Up @@ -338,7 +339,12 @@ async def test_extractor(mock_client: MagicMock, test_root_dir: str):
notifyOption="MailOnFailure",
times=["1:00:00"],
),
)
),
WorkspaceInfoDataflow(
objectId=dataflow_id2,
name="Dataflow 2",
description="",
),
],
)
]
Expand Down Expand Up @@ -385,8 +391,11 @@ def fake_get_user_subscriptions(user_id: str) -> List[PowerBISubscription]:
),
]

def fake_export_dataflow(workspace_id: str, dataflow_id: str) -> dict:
return load_json(f"{test_root_dir}/power_bi/data/dataflow_1.json")
def fake_export_dataflow(workspace_id: str, df_id: str) -> dict:
if df_id == dataflow_id:
return load_json(f"{test_root_dir}/power_bi/data/dataflow_1.json")
else:
return load_json(f"{test_root_dir}/power_bi/data/dataflow_2.json")

mock_instance.get_datasets.side_effect = fake_get_datasets
mock_instance.get_reports.side_effect = fake_get_reports
Expand Down

0 comments on commit 325545e

Please sign in to comment.