From cae080775f7c3b890e97914ff098d0ee9fdaf990 Mon Sep 17 00:00:00 2001 From: Scott Ssuyi Huang Date: Tue, 13 Aug 2024 00:38:41 +0800 Subject: [PATCH] Skip looker user folder hierarchies (#948) * Only extract used folder * bump version * Update tests * Address comments * Add tests * Update metaphor/looker/folder.py Co-authored-by: Tsung-Ju Lii * Update metaphor/looker/folder.py Co-authored-by: Tsung-Ju Lii * Update metaphor/looker/folder.py Co-authored-by: Tsung-Ju Lii * Format --------- Co-authored-by: Tsung-Ju Lii --- metaphor/looker/extractor.py | 26 ++++++----------------- metaphor/looker/folder.py | 41 ++++++++++++++++++++++++++++++++++-- pyproject.toml | 2 +- tests/looker/test_folder.py | 34 ++++++++++++++++++++++++++---- 4 files changed, 76 insertions(+), 27 deletions(-) diff --git a/metaphor/looker/extractor.py b/metaphor/looker/extractor.py index ca2d2604..7272e376 100644 --- a/metaphor/looker/extractor.py +++ b/metaphor/looker/extractor.py @@ -1,5 +1,5 @@ import os -from typing import Collection, Dict, Iterable, Iterator, List, Sequence, Set, Tuple +from typing import Collection, Dict, Iterable, List, Sequence, Set, Tuple from metaphor.common.git import clone_repo from metaphor.models.crawler_run_metadata import Platform @@ -29,9 +29,6 @@ DashboardPlatform, EntityUpstream, Hierarchy, - HierarchyInfo, - HierarchyLogicalID, - HierarchyType, SourceInfo, SystemContact, SystemContacts, @@ -81,6 +78,7 @@ def __init__(self, config: LookerRunConfig) -> None: self._project_source_url = config.project_source_url self._include_personal_folders = config.include_personal_folders self._explore_view_folder_name = config.explore_view_folder_name + self._folders: Dict[str, Hierarchy] = {} # Load config using environment variables instead from looker.ini file # See https://github.com/looker-open-source/sdk-codegen#environment-variable-configuration @@ -121,7 +119,7 @@ async def extract(self) -> Collection[ENTITY_TYPES]: entities: List[ENTITY_TYPES] = [] entities.extend(dashboards) entities.extend(virtual_views) - entities.extend(self.build_hierarchy(folder_map)) + entities.extend(self._folders.values()) return entities def _fetch_folders(self) -> FolderMap: @@ -237,7 +235,9 @@ def _fetch_dashboards( structure=( AssetStructure( directories=build_directories( - dashboard.folder.id, folder_map + dashboard.folder.id, + folder_map, + self._folders, ), name=dashboard.title, ) @@ -320,17 +320,3 @@ def _extract_charts( source_entities=list(explore_ids), ), ) - - def build_hierarchy( - self, folder_map: Dict[str, FolderMetadata] - ) -> Iterator[Hierarchy]: - for folder in folder_map.values(): - yield Hierarchy( - logical_id=HierarchyLogicalID( - path=[DashboardPlatform.LOOKER.value] - + build_directories(folder.id, folder_map) - ), - hierarchy_info=HierarchyInfo( - type=HierarchyType.LOOKER_FOLDER, name=folder.name - ), - ) diff --git a/metaphor/looker/folder.py b/metaphor/looker/folder.py index cfe86a61..09d9064a 100644 --- a/metaphor/looker/folder.py +++ b/metaphor/looker/folder.py @@ -2,6 +2,13 @@ from typing import Dict, List, Optional from metaphor.common.logger import get_logger +from metaphor.models.metadata_change_event import ( + DashboardPlatform, + Hierarchy, + HierarchyInfo, + HierarchyLogicalID, + HierarchyType, +) @dataclass @@ -16,7 +23,11 @@ class FolderMetadata: logger = get_logger() -def build_directories(folder_id: str, folder_map: FolderMap) -> List[str]: +def build_directories( + folder_id: str, + folder_map: FolderMap, + folder_hierarchies: Dict[str, Hierarchy], +) -> List[str]: directories: List[str] = [] while True: @@ -28,6 +39,32 @@ def build_directories(folder_id: str, folder_map: FolderMap) -> List[str]: directories.insert(0, folder.id) if folder.parent_id is None: - return directories + break folder_id = folder.parent_id + + _build_hierarchies(directories, folder_map, folder_hierarchies) + + return directories + + +def _build_hierarchies( + directories: List[str], + folder_map: FolderMap, + folder_hierarchies: Dict[str, Hierarchy], +): + for i, folder_id in enumerate(directories): + folder = folder_map.get(folder_id) + if folder_id in folder_hierarchies or folder is None: + continue + + hierarchy = Hierarchy( + logical_id=HierarchyLogicalID( + path=[DashboardPlatform.LOOKER.value] + directories[: i + 1] + ), + hierarchy_info=HierarchyInfo( + type=HierarchyType.LOOKER_FOLDER, name=folder.name + ), + ) + + folder_hierarchies[folder_id] = hierarchy diff --git a/pyproject.toml b/pyproject.toml index 8d8f0846..9a4cc861 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.75" +version = "0.14.76" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] diff --git a/tests/looker/test_folder.py b/tests/looker/test_folder.py index ef94c848..625049b5 100644 --- a/tests/looker/test_folder.py +++ b/tests/looker/test_folder.py @@ -1,4 +1,4 @@ -from metaphor.looker.folder import FolderMetadata, build_directories +from metaphor.looker.folder import FolderMetadata, _build_hierarchies, build_directories def test_build_directories(test_root_dir) -> None: @@ -9,6 +9,32 @@ def test_build_directories(test_root_dir) -> None: "3": FolderMetadata(id="3", name="folder3", parent_id="2"), } - assert build_directories("1", folder_map) == ["1"] - assert build_directories("2", folder_map) == ["1", "2"] - assert build_directories("3", folder_map) == ["1", "2", "3"] + folder_hierarchies: dict = {} + + assert build_directories("1", folder_map, folder_hierarchies) == ["1"] + assert build_directories("2", folder_map, folder_hierarchies) == ["1", "2"] + assert build_directories("3", folder_map, folder_hierarchies) == ["1", "2", "3"] + assert len(folder_hierarchies) == 3 + + +def test_build_hierarchy(test_root_dir) -> None: + + folder_map = { + "1": FolderMetadata(id="1", name="folder1", parent_id=None), + "2": FolderMetadata(id="2", name="folder2", parent_id="1"), + "3": FolderMetadata(id="3", name="folder3", parent_id="2"), + } + + folders: dict = {} + + _build_hierarchies(["1", "2", "3"], folder_map, folders) + assert len(folders) == 3 + assert folders["1"].hierarchy_info.name == "folder1" + assert folders["1"].logical_id.path == ["LOOKER", "1"] + assert folders["2"].hierarchy_info.name == "folder2" + assert folders["2"].logical_id.path == ["LOOKER", "1", "2"] + assert folders["3"].hierarchy_info.name == "folder3" + assert folders["3"].logical_id.path == ["LOOKER", "1", "2", "3"] + + _build_hierarchies(["4"], folder_map, folders) + assert len(folders) == 3