Skip to content

Commit

Permalink
Power BI report dashboard owner editor metadata (#602)
Browse files Browse the repository at this point in the history
* Add owner editor metadata for PBI dashboard

* bump version

* Improve test coverage

* Bump version

* Bump versions

* Address reviewer comments

* Address reviewer comments
  • Loading branch information
elic-eon authored Sep 22, 2023
1 parent 325545e commit e00ad7d
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 25 deletions.
17 changes: 17 additions & 0 deletions metaphor/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
from hashlib import md5
from typing import Any, Callable, Dict, List, Optional, Union

from dateutil.parser import isoparse

from metaphor.common.logger import get_logger

logger = get_logger()


def start_of_day(daysAgo=0) -> datetime:
"""Returns the start of day in UTC time, for today or N days ago"""
Expand Down Expand Up @@ -47,6 +53,17 @@ def to_utc_time(time: datetime) -> datetime:
return time.replace(tzinfo=timezone.utc)


def safe_parse_ISO8601(iso8601_str: Optional[str]) -> Optional[datetime]:
"""Safely convert ISO 8601 string to UTC datetime"""
if iso8601_str is None:
return None
try:
return isoparse(iso8601_str).replace(tzinfo=timezone.utc)
except Exception:
logger.error(f"Failed to parse ISO8061 time: {iso8601_str}")
return None


def convert_to_float(value: Optional[Union[float, int, str]]) -> Optional[float]:
"""Converts a value to float, return None if the original value is None or NaN or INF"""
return (
Expand Down
51 changes: 33 additions & 18 deletions metaphor/power_bi/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
from typing import Collection, Dict, List, Optional
from urllib import parse

from dateutil.parser import isoparse

from metaphor.common.base_extractor import BaseExtractor
from metaphor.common.entity_id import EntityId, to_pipeline_entity_id_from_logical_id
from metaphor.common.event_util import ENTITY_TYPES
from metaphor.common.logger import get_logger
from metaphor.common.utils import chunks, unique_list
from metaphor.common.utils import chunks, safe_parse_ISO8601, unique_list
from metaphor.models.crawler_run_metadata import Platform
from metaphor.models.metadata_change_event import (
AssetStructure,
Expand All @@ -34,7 +32,12 @@
from metaphor.models.metadata_change_event import (
PowerBIDataset as VirtualViewPowerBIDataset,
)
from metaphor.models.metadata_change_event import PowerBIDatasetTable, PowerBIInfo
from metaphor.models.metadata_change_event import (
PowerBIDatasetTable,
PowerBIEndorsement,
PowerBIEndorsementType,
PowerBIInfo,
)
from metaphor.models.metadata_change_event import PowerBIMeasure as PbiMeasure
from metaphor.models.metadata_change_event import PowerBIRefreshSchedule
from metaphor.models.metadata_change_event import PowerBISubscription as Subscription
Expand All @@ -58,6 +61,7 @@
PowerBiSubscriptionUser,
PowerBITile,
WorkspaceInfo,
WorkspaceInfoDashboardBase,
)
from metaphor.power_bi.power_query_parser import PowerQueryParser

Expand Down Expand Up @@ -163,11 +167,7 @@ def map_wi_dataflow_to_pipeline(self, workspace: WorkspaceInfo) -> None:
description=wdf.description,
document=document_str,
modified_by=wdf.configuredBy,
modified_date_time=isoparse(wdf.modifiedDateTime).replace(
tzinfo=timezone.utc
)
if wdf.modifiedDateTime
else None,
modified_date_time=safe_parse_ISO8601(wdf.modifiedDateTime),
content=json.dumps(dataflow) if dataflow else None,
name=wdf.name,
refresh_schedule=PowerBIRefreshSchedule(
Expand Down Expand Up @@ -316,7 +316,7 @@ def map_wi_reports_to_dashboard(
continue

pbi_info = self._make_power_bi_info(
PowerBIDashboardType.REPORT, workspace, wi_report.appId, app_map
PowerBIDashboardType.REPORT, workspace, wi_report, app_map
)

# The "app" version of report doesn't have pages
Expand Down Expand Up @@ -378,7 +378,7 @@ def map_wi_dashboards_to_dashboard(
continue

pbi_info = self._make_power_bi_info(
PowerBIDashboardType.DASHBOARD, workspace, wi_dashboard.appId, app_map
PowerBIDashboardType.DASHBOARD, workspace, wi_dashboard, app_map
)

dashboard = Dashboard(
Expand Down Expand Up @@ -511,19 +511,38 @@ def safe_parse_date(datetime_str: Optional[str]) -> Optional[datetime]:
def _make_power_bi_info(
type: PowerBIDashboardType,
workspace: WorkspaceInfo,
app_id: Optional[str],
dashboard: WorkspaceInfoDashboardBase,
app_map: Dict[str, PowerBIApp],
) -> PowerBIInfo:
pbi_info = PowerBIInfo(
power_bi_dashboard_type=type,
workspace=PbiWorkspace(id=workspace.id, name=workspace.name),
created_by=dashboard.createdBy,
created_date_time=safe_parse_ISO8601(dashboard.createdDateTime),
modified_by=dashboard.modifiedBy,
modified_date_time=safe_parse_ISO8601(dashboard.modifiedDateTime),
)

if app_id is not None:
if dashboard.appId is not None:
app_id = dashboard.appId
app = app_map.get(app_id)
if app is not None:
pbi_info.app = PbiApp(id=app.id, name=app.name)

if dashboard.endorsementDetails is not None:
try:
endorsement = PowerBIEndorsementType(
dashboard.endorsementDetails.endorsement
)
pbi_info.endorsement = PowerBIEndorsement(
endorsement=endorsement,
certified_by=dashboard.endorsementDetails.certifiedBy,
)
except ValueError:
logger.warn(
f"Endorsement type {dashboard.endorsementDetails.endorsement} are not supported"
)

return pbi_info

@staticmethod
Expand All @@ -543,11 +562,7 @@ def _find_last_completed_refresh(
except StopIteration:
return None

try:
return isoparse(refresh.endTime).replace(tzinfo=timezone.utc)
except Exception:
logger.error(f"Failed to parse refresh time: {refresh.endTime}")
return None
return safe_parse_ISO8601(refresh.endTime)

@staticmethod
def _get_dashboard_id_from_url(url: str) -> Optional[str]:
Expand Down
20 changes: 16 additions & 4 deletions metaphor/power_bi/power_bi_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ class PowerBITable(BaseModel):
source: List[Any] = []


class EndorsementDetails(BaseModel):
endorsement: str
certifiedBy: Optional[str] = ""


class UpstreamDataflow(BaseModel):
targetDataflowId: str

Expand All @@ -112,17 +117,24 @@ class WorkspaceInfoDataset(BaseModel):

upstreamDataflows: Optional[List[UpstreamDataflow]] = None
upstreamDatasets: Optional[Any]
endorsementDetails: Optional[EndorsementDetails] = None


class WorkspaceInfoDashboard(BaseModel):
class WorkspaceInfoDashboardBase(BaseModel):
id: str
appId: Optional[str] = None
createdDateTime: Optional[str] = None
modifiedDateTime: Optional[str] = None
createdBy: Optional[str] = None
modifiedBy: Optional[str] = None
endorsementDetails: Optional[EndorsementDetails] = None


class WorkspaceInfoDashboard(WorkspaceInfoDashboardBase):
displayName: str


class WorkspaceInfoReport(BaseModel):
id: str
appId: Optional[str] = None
class WorkspaceInfoReport(WorkspaceInfoDashboardBase):
name: str
datasetId: Optional[str] = None
description: str = ""
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.12.50"
version = "0.12.51"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down
14 changes: 14 additions & 0 deletions tests/common/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
filter_empty_strings,
must_set_exactly_one,
removesuffix,
safe_parse_ISO8601,
start_of_day,
unique_list,
)
Expand Down Expand Up @@ -100,3 +101,16 @@ def test_remove_suffix():
assert removesuffix("abcdefg", "gf") == "abcdefg"
assert removesuffix("example.com/index.html", "index.html") == "example.com/"
assert removesuffix("example.com/index.html", "/index.html") == "example.com"


def test_safe_parse_ISO8061():
assert safe_parse_ISO8601(None) is None
assert (
safe_parse_ISO8601("2023-09-20T08:10:15Z").isoformat()
== "2023-09-20T08:10:15+00:00"
)
assert (
safe_parse_ISO8601("2023-09-20T08:10:15").isoformat()
== "2023-09-20T08:10:15+00:00"
)
assert safe_parse_ISO8601("isvalid") is None
16 changes: 14 additions & 2 deletions tests/power_bi/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,15 @@
"workspace": {
"id": "workspace-1",
"name": "Workspace"
}
},
"createdBy": "[email protected]",
"createdDateTime": "2022-04-06T04:25:06.777000+00:00",
"endorsement": {
"certifiedBy": "[email protected]",
"endorsement": "Promoted"
},
"modifiedBy": "[email protected]",
"modifiedDateTime": "2022-04-06T04:25:06.777000+00:00"
},
"title": "Foo Report"
},
Expand Down Expand Up @@ -369,7 +377,11 @@
}
]
}
]
],
"createdBy": "[email protected]",
"createdDateTime": "2022-04-06T04:25:06.777000+00:00",
"modifiedBy": "[email protected]",
"modifiedDateTime": "2022-04-06T04:25:06.777000+00:00"
},
"title": "Dashboard A"
},
Expand Down
15 changes: 15 additions & 0 deletions tests/power_bi/test_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from metaphor.power_bi.config import PowerBIRunConfig
from metaphor.power_bi.extractor import PowerBIExtractor
from metaphor.power_bi.power_bi_client import (
EndorsementDetails,
PowerBIApp,
PowerBIDashboard,
PowerBIDataset,
Expand Down Expand Up @@ -187,6 +188,13 @@ async def test_extractor(mock_client: MagicMock, test_root_dir: str):
name=report1.name,
datasetId=report1.datasetId,
description="This is a report about foo",
createdBy="[email protected]",
createdDateTime="2022-04-06T04:25:06.777",
modifiedBy="[email protected]",
modifiedDateTime="2022-04-06T04:25:06.777",
endorsementDetails=EndorsementDetails(
endorsement="Promoted", certifiedBy="[email protected]"
),
),
WorkspaceInfoReport(
id=report1_app.id,
Expand All @@ -200,6 +208,9 @@ async def test_extractor(mock_client: MagicMock, test_root_dir: str):
name=report2.name,
datasetId=report2.datasetId,
description="This is a report about bar",
endorsementDetails=EndorsementDetails(
endorsement="Invalid", certifiedBy="[email protected]"
),
),
],
datasets=[
Expand Down Expand Up @@ -301,6 +312,10 @@ async def test_extractor(mock_client: MagicMock, test_root_dir: str):
WorkspaceInfoDashboard(
displayName="Dashboard A",
id=dashboard1_id,
createdBy="[email protected]",
createdDateTime="2022-04-06T04:25:06.777",
modifiedBy="[email protected]",
modifiedDateTime="2022-04-06T04:25:06.777",
),
WorkspaceInfoDashboard(
displayName="Dashboard A",
Expand Down

0 comments on commit e00ad7d

Please sign in to comment.