Skip to content

Commit

Permalink
Top level folders for QuickSight entities (#988)
Browse files Browse the repository at this point in the history
* Add top-level folder for QuickSight assets

* Refactor create_hierarchy
  • Loading branch information
elic-eon authored Sep 24, 2024
1 parent 167ccb7 commit 026c102
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 61 deletions.
33 changes: 33 additions & 0 deletions metaphor/common/hierarchy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import List

from metaphor.models.metadata_change_event import (
AssetPlatform,
Hierarchy,
HierarchyInfo,
HierarchyLogicalID,
HierarchyType,
)


def create_hierarchy(
platform: AssetPlatform,
path: List[str],
name: str = "",
hierarchy_type: HierarchyType = HierarchyType.VIRTUAL_HIERARCHY,
) -> Hierarchy:
"""
Create a hierarchy with name
"""
return Hierarchy(
logical_id=HierarchyLogicalID(
path=[platform.value] + path,
),
hierarchy_info=(
HierarchyInfo(
name=name,
type=hierarchy_type,
)
if name
else None
),
)
19 changes: 7 additions & 12 deletions metaphor/looker/folder.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from dataclasses import dataclass
from typing import Dict, List, Optional

from metaphor.common.hierarchy import create_hierarchy
from metaphor.common.logger import get_logger
from metaphor.models.metadata_change_event import (
DashboardPlatform,
AssetPlatform,
Hierarchy,
HierarchyInfo,
HierarchyLogicalID,
HierarchyType,
)

Expand Down Expand Up @@ -58,13 +57,9 @@ def _build_hierarchies(
if folder_id in folder_hierarchies or folder is None:
continue

hierarchy = Hierarchy(
logical_id=HierarchyLogicalID(
path=[DashboardPlatform.LOOKER.value] + directories[: i + 1]
),
hierarchy_info=HierarchyInfo(
type=HierarchyType.LOOKER_FOLDER, name=folder.name
),
folder_hierarchies[folder_id] = create_hierarchy(
platform=AssetPlatform.LOOKER,
name=folder.name,
path=directories[: i + 1],
hierarchy_type=HierarchyType.LOOKER_FOLDER,
)

folder_hierarchies[folder_id] = hierarchy
11 changes: 10 additions & 1 deletion metaphor/quick_sight/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
)
from metaphor.quick_sight.client import Client
from metaphor.quick_sight.config import QuickSightRunConfig
from metaphor.quick_sight.folder import (
DASHBOARD_DIRECTORIES,
DATA_SET_DIRECTORIES,
create_top_level_folders,
)
from metaphor.quick_sight.lineage import (
extract_virtual_view_schema,
extract_virtual_view_upstream,
Expand Down Expand Up @@ -103,6 +108,7 @@ def _make_entities_list(self) -> Collection[ENTITY_TYPES]:
entities: List[ENTITY_TYPES] = []
entities.extend(self._virtual_views.values())
entities.extend(self._dashboards.values())
entities.extend(create_top_level_folders())
return entities

def _init_virtual_view(self, arn: str, data_set: DataSet) -> VirtualView:
Expand All @@ -111,7 +117,9 @@ def _init_virtual_view(self, arn: str, data_set: DataSet) -> VirtualView:
name=arn,
type=VirtualViewType.QUICK_SIGHT,
),
structure=AssetStructure(name=data_set.Name),
structure=AssetStructure(
name=data_set.Name, directories=DATA_SET_DIRECTORIES
),
source_info=SourceInfo(
created_at_source=data_set.CreatedTime,
last_updated=data_set.LastUpdatedTime,
Expand All @@ -136,6 +144,7 @@ def _init_dashboard(self, arn: str, dashboard: Dashboard) -> MetaphorDashboard:
),
structure=AssetStructure(
name=dashboard.Name,
directories=DASHBOARD_DIRECTORIES,
),
)

Expand Down
16 changes: 16 additions & 0 deletions metaphor/quick_sight/folder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from typing import List

from metaphor.common.hierarchy import create_hierarchy
from metaphor.models.metadata_change_event import AssetPlatform, Hierarchy

DASHBOARD_DIRECTORIES = ["DASHBOARD"]
DATA_SET_DIRECTORIES = ["DATA_SET"]


def create_top_level_folders() -> List[Hierarchy]:
platform = AssetPlatform.QUICK_SIGHT

return [
create_hierarchy(platform, DASHBOARD_DIRECTORIES, "Dashboards"),
create_hierarchy(platform, DATA_SET_DIRECTORIES, "DataSets"),
]
41 changes: 22 additions & 19 deletions metaphor/thought_spot/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@
to_virtual_view_entity_id,
)
from metaphor.common.event_util import ENTITY_TYPES
from metaphor.common.hierarchy import create_hierarchy
from metaphor.common.logger import get_logger
from metaphor.common.utils import unique_list
from metaphor.models.crawler_run_metadata import Platform
from metaphor.models.metadata_change_event import (
AssetPlatform,
AssetStructure,
Chart,
Dashboard,
Expand All @@ -30,6 +32,7 @@
EntityType,
EntityUpstream,
FieldMapping,
HierarchyType,
SourceField,
SourceInfo,
SystemTag,
Expand Down Expand Up @@ -59,7 +62,6 @@
)
from metaphor.thought_spot.utils import (
ThoughtSpot,
create_virtual_hierarchy,
from_list,
getColumnTransformation,
mapping_chart_type,
Expand Down Expand Up @@ -102,24 +104,7 @@ async def extract(self) -> Collection[ENTITY_TYPES]:

self.fetch_virtual_views()
self.fetch_dashboards()

virtual_hierarchies = [
create_virtual_hierarchy(
name="Answer", path=[ThoughtSpotDashboardType.ANSWER.name]
),
create_virtual_hierarchy(
name="Liveboard", path=[ThoughtSpotDashboardType.LIVEBOARD.name]
),
create_virtual_hierarchy(
name="Table", path=[ThoughtSpotDataObjectType.TABLE.name]
),
create_virtual_hierarchy(
name="View", path=[ThoughtSpotDataObjectType.VIEW.name]
),
create_virtual_hierarchy(
name="Worksheet", path=[ThoughtSpotDataObjectType.WORKSHEET.name]
),
]
virtual_hierarchies = self._create_virtual_hierarchies()

return list(
chain(
Expand Down Expand Up @@ -246,6 +231,24 @@ def populate_virtual_views(
)
self._virtual_views[table_id] = view

@staticmethod
def _create_virtual_hierarchies():
return [
create_hierarchy(
name=name,
path=[enum_value.value],
platform=AssetPlatform.THOUGHT_SPOT,
hierarchy_type=HierarchyType.THOUGHT_SPOT_VIRTUAL_HIERARCHY,
)
for name, enum_value in [
("Answer", ThoughtSpotDashboardType.ANSWER),
("Liveboard", ThoughtSpotDashboardType.LIVEBOARD),
("Table", ThoughtSpotDataObjectType.TABLE),
("View", ThoughtSpotDataObjectType.VIEW),
("Worksheet", ThoughtSpotDataObjectType.WORKSHEET),
]
]

@staticmethod
def build_column_expr_map(tml: TMLObject):
def build_formula_map(tml_table):
Expand Down
16 changes: 0 additions & 16 deletions metaphor/thought_spot/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,7 @@
from metaphor.common.utils import chunks
from metaphor.models.metadata_change_event import (
ChartType,
DashboardPlatform,
DataPlatform,
Hierarchy,
HierarchyInfo,
HierarchyLogicalID,
HierarchyType,
ThoughtSpotDataObjectType,
)
from metaphor.thought_spot.config import ThoughtSpotRunConfig
Expand Down Expand Up @@ -278,14 +273,3 @@ def getColumnTransformation(target_column: Column) -> Optional[str]:
if target_column.expression is None or target_column.expression.token is None:
return None
return str(target_column.expression.token)


def create_virtual_hierarchy(name: str, path: List[str]) -> Hierarchy:
return Hierarchy(
logical_id=HierarchyLogicalID(
path=[DashboardPlatform.THOUGHT_SPOT.name] + path
),
hierarchy_info=HierarchyInfo(
name=name, type=HierarchyType.THOUGHT_SPOT_VIRTUAL_HIERARCHY
),
)
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.14.107"
version = "0.14.108"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down Expand Up @@ -42,7 +42,7 @@ llama-index-readers-confluence = { version = "^0.1.4", optional = true }
llama-index-readers-notion = { version = "^0.1.6", optional = true }
looker-sdk = { version = "^24.2.0", optional = true }
lxml = { version = "~=5.0.0", optional = true }
metaphor-models = "0.38.3"
metaphor-models = "0.39.1"
more-itertools = { version = "^10.1.0", optional = true }
msal = { version = "^1.28.0", optional = true }
msgraph-beta-sdk = { version = "~1.4.0", optional = true }
Expand Down
59 changes: 52 additions & 7 deletions tests/quick_sight/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,10 @@
"lastUpdated": "2024-09-19T17:30:15.605000+08:00"
},
"structure": {
"name": "locations"
"name": "locations",
"directories": [
"DATA_SET"
]
}
},
{
Expand Down Expand Up @@ -382,7 +385,10 @@
"lastUpdated": "2024-09-19T12:20:38.550000+08:00"
},
"structure": {
"name": "sample_sales_records"
"name": "sample_sales_records",
"directories": [
"DATA_SET"
]
}
},
{
Expand Down Expand Up @@ -452,7 +458,10 @@
"lastUpdated": "2024-09-18T16:42:39.096000+08:00"
},
"structure": {
"name": "Bike data"
"name": "Bike data",
"directories": [
"DATA_SET"
]
}
},
{
Expand Down Expand Up @@ -622,7 +631,10 @@
"lastUpdated": "2024-09-18T16:09:07.953000+08:00"
},
"structure": {
"name": "orders"
"name": "orders",
"directories": [
"DATA_SET"
]
}
},
{
Expand All @@ -649,7 +661,10 @@
"lastUpdated": "2024-09-12T21:01:13.244000+08:00"
},
"structure": {
"name": "Sales"
"name": "Sales",
"directories": [
"DASHBOARD"
]
}
},
{
Expand All @@ -676,7 +691,10 @@
"lastUpdated": "2024-09-18T16:24:00.923000+08:00"
},
"structure": {
"name": "Bike rides"
"name": "Bike rides",
"directories": [
"DASHBOARD"
]
}
},
{
Expand All @@ -703,7 +721,34 @@
"lastUpdated": "2024-09-18T16:09:35.228000+08:00"
},
"structure": {
"name": "Jaffle Shop Orders"
"name": "Jaffle Shop Orders",
"directories": [
"DASHBOARD"
]
}
},
{
"hierarchyInfo": {
"name": "Dashboards",
"type": "VIRTUAL_HIERARCHY"
},
"logicalId": {
"path": [
"QUICK_SIGHT",
"DASHBOARD"
]
}
},
{
"hierarchyInfo": {
"name": "DataSets",
"type": "VIRTUAL_HIERARCHY"
},
"logicalId": {
"path": [
"QUICK_SIGHT",
"DATA_SET"
]
}
}
]

0 comments on commit 026c102

Please sign in to comment.