Skip to content

Commit

Permalink
Power bi crawler should use new MCE model (#617)
Browse files Browse the repository at this point in the history
* Extract PBI workspace as hierarchy

* Update models version

* bump version

* Add tests
  • Loading branch information
elic-eon authored Oct 6, 2023
1 parent c1a0f5d commit f17e900
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 44 deletions.
7 changes: 6 additions & 1 deletion metaphor/common/event_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from metaphor.models.metadata_change_event import (
Dashboard,
Dataset,
Hierarchy,
KnowledgeCard,
MetadataChangeEvent,
Metric,
Expand All @@ -21,7 +22,9 @@
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

ENTITY_TYPES = Union[Dashboard, Dataset, Metric, KnowledgeCard, VirtualView, QueryLogs]
ENTITY_TYPES = Union[
Dashboard, Dataset, Metric, KnowledgeCard, VirtualView, QueryLogs, Hierarchy
]


class EventUtil:
Expand Down Expand Up @@ -57,6 +60,8 @@ def build_event(entity: ENTITY_TYPES):
return EventUtil._build_event(virtual_view=entity)
elif type(entity) is QueryLogs:
return EventUtil._build_event(query_logs=entity)
elif type(entity) is Hierarchy:
return EventUtil._build_event(hierarchy=entity)
else:
raise TypeError(f"invalid entity type {type(entity)}")

Expand Down
42 changes: 39 additions & 3 deletions metaphor/power_bi/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
DashboardUpstream,
EntityType,
EntityUpstream,
Hierarchy,
HierarchyInfo,
HierarchyLogicalID,
HierarchyType,
Pipeline,
PipelineLogicalID,
PipelineType,
Expand Down Expand Up @@ -87,6 +91,7 @@ def __init__(self, config: PowerBIRunConfig):
self._virtual_views: Dict[str, VirtualView] = {}
self._pipelines: Dict[str, Pipeline] = {}
self._dataflow_sources: Dict[str, List[EntityId]] = {}
self._hierarchies: List[Hierarchy] = []
self._snowflake_account = config.snowflake_account

async def extract(self) -> Collection[ENTITY_TYPES]:
Expand All @@ -100,14 +105,15 @@ async def extract(self) -> Collection[ENTITY_TYPES]:
apps = self._client.get_apps()
app_map = {app.id: app for app in apps}

workspaces = []
workspaces: List[WorkspaceInfo] = []
for workspace_ids in chunks(
self._workspaces, PowerBIClient.MAX_WORKSPACES_PER_SCAN
):
workspaces.extend(self._client.get_workspace_info(workspace_ids))

for workspace in workspaces:
self.map_wi_dataflow_to_pipeline(workspace)
self.map_workspace_to_hierarchy(workspace)

# As there may be cross-workspace reference in dashboards & reports,
# we must process the datasets across all workspaces first
Expand All @@ -126,9 +132,39 @@ async def extract(self) -> Collection[ENTITY_TYPES]:
entities.extend(self._virtual_views.values())
entities.extend(self._dashboards.values())
entities.extend(self._pipelines.values())
entities.extend(self._hierarchies)

return entities

def map_workspace_to_hierarchy(self, workspace: WorkspaceInfo) -> None:
workspace_id = workspace.id

pbi_workspace = PbiWorkspace(
name=workspace.name,
url=f"https://app.powerbi.com/groups/{workspace_id}",
)
hierarchy_info = HierarchyInfo(
description=workspace.description,
type=HierarchyType.POWER_BI_WORKSPACE,
power_bi_workspace=pbi_workspace,
)
self._hierarchies.append(
Hierarchy(
logical_id=HierarchyLogicalID(
path=[VirtualViewType.POWER_BI_DATASET.value, workspace_id]
),
hierarchy_info=hierarchy_info,
)
)
self._hierarchies.append(
Hierarchy(
logical_id=HierarchyLogicalID(
path=[DashboardPlatform.POWER_BI.value, workspace_id]
),
hierarchy_info=hierarchy_info,
)
)

def map_wi_dataflow_to_pipeline(self, workspace: WorkspaceInfo) -> None:
for wdf in workspace.dataflows:
data_flow_id = wdf.objectId
Expand Down Expand Up @@ -277,7 +313,7 @@ def map_wi_datasets_to_virtual_views(self, workspace: WorkspaceInfo) -> None:
url=ds.webUrl,
source_datasets=source_entity_ids,
description=wds.description,
workspace=PbiWorkspace(id=workspace.id, name=workspace.name),
workspace_id=workspace.id,
last_refreshed=last_refreshed,
),
entity_upstream=EntityUpstream(
Expand Down Expand Up @@ -516,7 +552,7 @@ def _make_power_bi_info(
) -> PowerBIInfo:
pbi_info = PowerBIInfo(
power_bi_dashboard_type=type,
workspace=PbiWorkspace(id=workspace.id, name=workspace.name),
workspace_id=workspace.id,
created_by=dashboard.createdBy,
created_date_time=safe_parse_ISO8601(dashboard.createdDateTime),
modified_by=dashboard.modifiedBy,
Expand Down
1 change: 1 addition & 0 deletions metaphor/power_bi/power_bi_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ class WorkspaceInfo(BaseModel):
name: Optional[str]
type: Optional[str]
state: str
description: Optional[str]
reports: List[WorkspaceInfoReport] = []
datasets: List[WorkspaceInfoDataset] = []
dashboards: List[WorkspaceInfoDashboard] = []
Expand Down
19 changes: 9 additions & 10 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.12.59"
version = "0.12.60"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down Expand Up @@ -29,7 +29,7 @@ google-cloud-logging = { version = "^3.5.0", optional = true }
jsonschema = "^4.18.6"
lkml = { version = "^1.3.1", optional = true }
looker-sdk = { version = "^23.6.0", optional = true }
metaphor-models = "0.26.13"
metaphor-models = "0.27.0"
msal = { version = "^1.20.0", optional = true }
pycarlo = { version = "^0.8.1", optional = true }
pydantic = "^1.10.0"
Expand Down
55 changes: 55 additions & 0 deletions tests/common/test_event_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from metaphor.common.event_util import EventUtil
from metaphor.models.metadata_change_event import (
Dashboard,
Dataset,
Hierarchy,
HierarchyLogicalID,
KnowledgeCard,
MetadataChangeEvent,
Metric,
Pipeline,
QueryLogs,
VirtualView,
)


def test_build_event():
event_utils = EventUtil()

assert event_utils.build_event(Dashboard()) == MetadataChangeEvent(
dashboard=Dashboard()
)
assert event_utils.build_event(Dataset()) == MetadataChangeEvent(dataset=Dataset())
assert event_utils.build_event(Hierarchy()) == MetadataChangeEvent(
hierarchy=Hierarchy()
)
assert event_utils.build_event(KnowledgeCard()) == MetadataChangeEvent(
knowledge_card=KnowledgeCard()
)
assert event_utils.build_event(Metric()) == MetadataChangeEvent(metric=Metric())
assert event_utils.build_event(Pipeline()) == MetadataChangeEvent(
pipeline=Pipeline()
)
assert event_utils.build_event(QueryLogs()) == MetadataChangeEvent(
query_logs=QueryLogs()
)
assert event_utils.build_event(VirtualView()) == MetadataChangeEvent(
virtual_view=VirtualView()
)


def test_trim_event():
event_utils = EventUtil()

assert event_utils.trim_event(Dashboard()) == {}
assert event_utils.trim_event(Dataset()) == {}
assert event_utils.trim_event(Hierarchy()) == {}
assert event_utils.trim_event(KnowledgeCard()) == {}
assert event_utils.trim_event(Metric()) == {}
assert event_utils.trim_event(Pipeline()) == {}
assert event_utils.trim_event(QueryLogs()) == {}
assert event_utils.trim_event(VirtualView()) == {}

assert event_utils.trim_event(
Hierarchy(logical_id=HierarchyLogicalID(path=["a", "b"]))
) == {"logicalId": {"path": ["a", "b"]}}
65 changes: 37 additions & 28 deletions tests/power_bi/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,7 @@
}
],
"url": "https://powerbi.com/00000000-0000-0000-0000-000000000002",
"workspace": {
"id": "workspace-1",
"name": "Workspace"
}
"workspaceId": "workspace-1"
},
"structure": {
"directories": [
Expand Down Expand Up @@ -191,10 +188,7 @@
}
],
"url": "https://powerbi.com/00000000-0000-0000-0000-000000000003",
"workspace": {
"id": "workspace-1",
"name": "Workspace"
}
"workspaceId": "workspace-1"
},
"structure": {
"directories": [
Expand Down Expand Up @@ -238,10 +232,7 @@
}
],
"url": "https://powerbi.com/00000000-0000-0000-0001-000000000003",
"workspace": {
"id": "workspace-1",
"name": "Workspace"
}
"workspaceId": "workspace-1"
},
"structure": {
"directories": [
Expand All @@ -261,10 +252,7 @@
"name": "foo app"
},
"powerBiDashboardType": "REPORT",
"workspace": {
"id": "workspace-1",
"name": "Workspace"
},
"workspaceId": "workspace-1",
"createdBy": "[email protected]",
"createdDateTime": "2022-04-06T04:25:06.777000+00:00",
"endorsement": {
Expand Down Expand Up @@ -311,10 +299,7 @@
"description": "This is a report about bar",
"powerBi": {
"powerBiDashboardType": "REPORT",
"workspace": {
"id": "workspace-1",
"name": "Workspace"
}
"workspaceId": "workspace-1"
},
"title": "Bar Report"
},
Expand Down Expand Up @@ -358,10 +343,7 @@
"name": "bar app"
},
"powerBiDashboardType": "DASHBOARD",
"workspace": {
"id": "workspace-1",
"name": "Workspace"
},
"workspaceId": "workspace-1",
"subscriptions": [
{
"artifactDisplayName": "Dashboard A",
Expand Down Expand Up @@ -422,10 +404,7 @@
"dashboardType": "POWER_BI_DASHBOARD",
"powerBi": {
"powerBiDashboardType": "DASHBOARD",
"workspace": {
"id": "workspace-1",
"name": "Workspace"
}
"workspaceId": "workspace-1"
},
"title": "Dashboard B"
},
Expand Down Expand Up @@ -486,5 +465,35 @@
"document": "section Section1;\r\nshared ENTITY_NAME = let\r\n Source = GoogleAnalytics.Accounts(\"account.snowflakecomputing.com\", \"COMPUTE_WH\", [Role = null, CreateNavigationProperties = null, ConnectionTimeout = null, CommandTimeout = null]),\r\n Navigation = Source{[Name = \"DB\", Kind = \"Database\"]}[Data],\r\n #\"Navigation 1\" = Navigation{[Name = \"SCHEMA\", Kind = \"Schema\"]}[Data],\r\n #\"Navigation 2\" = #\"Navigation 1\"{[Name = \"TABLE_NAME\", Kind = \"Table\"]}[Data]\r\nin\r\n #\"Navigation 2\";\r\n",
"name": "Dataflow 2"
}
},
{
"hierarchyInfo": {
"powerBiWorkspace": {
"name": "Workspace",
"url": "https://app.powerbi.com/groups/workspace-1"
},
"type": "POWER_BI_WORKSPACE"
},
"logicalId": {
"path": [
"POWER_BI_DATASET",
"workspace-1"
]
}
},
{
"hierarchyInfo": {
"powerBiWorkspace": {
"name": "Workspace",
"url": "https://app.powerbi.com/groups/workspace-1"
},
"type": "POWER_BI_WORKSPACE"
},
"logicalId": {
"path": [
"POWER_BI",
"workspace-1"
]
}
}
]

0 comments on commit f17e900

Please sign in to comment.