From e0ee13968e2561f8e77a297c04e737a74a15af45 Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Mon, 12 Aug 2024 16:52:16 +0800 Subject: [PATCH 1/9] Only extract used folder --- metaphor/looker/extractor.py | 26 ++++++------------------- metaphor/looker/folder.py | 37 ++++++++++++++++++++++++++++++++++-- tests/looker/test_folder.py | 10 +++++++--- 3 files changed, 48 insertions(+), 25 deletions(-) diff --git a/metaphor/looker/extractor.py b/metaphor/looker/extractor.py index ca2d2604..7272e376 100644 --- a/metaphor/looker/extractor.py +++ b/metaphor/looker/extractor.py @@ -1,5 +1,5 @@ import os -from typing import Collection, Dict, Iterable, Iterator, List, Sequence, Set, Tuple +from typing import Collection, Dict, Iterable, List, Sequence, Set, Tuple from metaphor.common.git import clone_repo from metaphor.models.crawler_run_metadata import Platform @@ -29,9 +29,6 @@ DashboardPlatform, EntityUpstream, Hierarchy, - HierarchyInfo, - HierarchyLogicalID, - HierarchyType, SourceInfo, SystemContact, SystemContacts, @@ -81,6 +78,7 @@ def __init__(self, config: LookerRunConfig) -> None: self._project_source_url = config.project_source_url self._include_personal_folders = config.include_personal_folders self._explore_view_folder_name = config.explore_view_folder_name + self._folders: Dict[str, Hierarchy] = {} # Load config using environment variables instead from looker.ini file # See https://github.com/looker-open-source/sdk-codegen#environment-variable-configuration @@ -121,7 +119,7 @@ async def extract(self) -> Collection[ENTITY_TYPES]: entities: List[ENTITY_TYPES] = [] entities.extend(dashboards) entities.extend(virtual_views) - entities.extend(self.build_hierarchy(folder_map)) + entities.extend(self._folders.values()) return entities def _fetch_folders(self) -> FolderMap: @@ -237,7 +235,9 @@ def _fetch_dashboards( structure=( AssetStructure( directories=build_directories( - dashboard.folder.id, folder_map + dashboard.folder.id, + folder_map, + self._folders, ), name=dashboard.title, ) @@ -320,17 +320,3 @@ def _extract_charts( source_entities=list(explore_ids), ), ) - - def build_hierarchy( - self, folder_map: Dict[str, FolderMetadata] - ) -> Iterator[Hierarchy]: - for folder in folder_map.values(): - yield Hierarchy( - logical_id=HierarchyLogicalID( - path=[DashboardPlatform.LOOKER.value] - + build_directories(folder.id, folder_map) - ), - hierarchy_info=HierarchyInfo( - type=HierarchyType.LOOKER_FOLDER, name=folder.name - ), - ) diff --git a/metaphor/looker/folder.py b/metaphor/looker/folder.py index cfe86a61..e8a4a307 100644 --- a/metaphor/looker/folder.py +++ b/metaphor/looker/folder.py @@ -2,6 +2,13 @@ from typing import Dict, List, Optional from metaphor.common.logger import get_logger +from metaphor.models.metadata_change_event import ( + DashboardPlatform, + Hierarchy, + HierarchyInfo, + HierarchyLogicalID, + HierarchyType, +) @dataclass @@ -16,7 +23,9 @@ class FolderMetadata: logger = get_logger() -def build_directories(folder_id: str, folder_map: FolderMap) -> List[str]: +def build_directories( + folder_id: str, folder_map: FolderMap, folders: Dict[str, Hierarchy] +) -> List[str]: directories: List[str] = [] while True: @@ -28,6 +37,30 @@ def build_directories(folder_id: str, folder_map: FolderMap) -> List[str]: directories.insert(0, folder.id) if folder.parent_id is None: - return directories + break folder_id = folder.parent_id + + build_hierarchies(directories, folder_map, folders) + + return directories + + +def build_hierarchies( + directories: List[str], folder_map: FolderMap, folders: Dict[str, Hierarchy] +): + for i, folder_id in enumerate(directories): + folder = folder_map.get(folder_id) + if folder_id in folders or folder is None: + continue + + hierarchy = Hierarchy( + logical_id=HierarchyLogicalID( + path=[DashboardPlatform.LOOKER.value] + directories[: i + 1] + ), + hierarchy_info=HierarchyInfo( + type=HierarchyType.LOOKER_FOLDER, name=folder.name + ), + ) + + folders[folder_id] = hierarchy diff --git a/tests/looker/test_folder.py b/tests/looker/test_folder.py index ef94c848..ad05ce8a 100644 --- a/tests/looker/test_folder.py +++ b/tests/looker/test_folder.py @@ -9,6 +9,10 @@ def test_build_directories(test_root_dir) -> None: "3": FolderMetadata(id="3", name="folder3", parent_id="2"), } - assert build_directories("1", folder_map) == ["1"] - assert build_directories("2", folder_map) == ["1", "2"] - assert build_directories("3", folder_map) == ["1", "2", "3"] + folders: dict = {} + + assert build_directories("1", folder_map, folders) == ["1"] + assert build_directories("2", folder_map, folders) == ["1", "2"] + assert build_directories("3", folder_map, folders) == ["1", "2", "3"] + + assert len(folders) == 3 From 3365b502604573b906f663d26ffb2740c58eabe6 Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Mon, 12 Aug 2024 16:53:15 +0800 Subject: [PATCH 2/9] bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8d8f0846..9a4cc861 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.75" +version = "0.14.76" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] From fa0447114d80bbfa45d95f02c6357f8669565967 Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Mon, 12 Aug 2024 17:03:58 +0800 Subject: [PATCH 3/9] Update tests --- metaphor/looker/folder.py | 4 ++-- tests/looker/test_folder.py | 21 ++++++++++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/metaphor/looker/folder.py b/metaphor/looker/folder.py index e8a4a307..8065ad15 100644 --- a/metaphor/looker/folder.py +++ b/metaphor/looker/folder.py @@ -41,12 +41,12 @@ def build_directories( folder_id = folder.parent_id - build_hierarchies(directories, folder_map, folders) + _build_hierarchies(directories, folder_map, folders) return directories -def build_hierarchies( +def _build_hierarchies( directories: List[str], folder_map: FolderMap, folders: Dict[str, Hierarchy] ): for i, folder_id in enumerate(directories): diff --git a/tests/looker/test_folder.py b/tests/looker/test_folder.py index ad05ce8a..8aecbf3d 100644 --- a/tests/looker/test_folder.py +++ b/tests/looker/test_folder.py @@ -1,4 +1,4 @@ -from metaphor.looker.folder import FolderMetadata, build_directories +from metaphor.looker.folder import FolderMetadata, _build_hierarchies, build_directories def test_build_directories(test_root_dir) -> None: @@ -14,5 +14,24 @@ def test_build_directories(test_root_dir) -> None: assert build_directories("1", folder_map, folders) == ["1"] assert build_directories("2", folder_map, folders) == ["1", "2"] assert build_directories("3", folder_map, folders) == ["1", "2", "3"] + assert len(folders) == 3 + + +def test_build_hierarchy(test_root_dir) -> None: + + folder_map = { + "1": FolderMetadata(id="1", name="folder1", parent_id=None), + "2": FolderMetadata(id="2", name="folder2", parent_id="1"), + "3": FolderMetadata(id="3", name="folder3", parent_id="2"), + } + + folders: dict = {} + _build_hierarchies(["1", "2", "3"], folder_map, folders) assert len(folders) == 3 + assert folders["1"].hierarchy_info.name == "folder1" + assert folders["1"].logical_id.path == ["LOOKER", "1"] + assert folders["2"].hierarchy_info.name == "folder2" + assert folders["2"].logical_id.path == ["LOOKER", "1", "2"] + assert folders["3"].hierarchy_info.name == "folder3" + assert folders["3"].logical_id.path == ["LOOKER", "1", "2", "3"] From 93fbdb34e2309e0c316361129ccfad066aa99195 Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Mon, 12 Aug 2024 17:18:11 +0800 Subject: [PATCH 4/9] Address comments --- metaphor/looker/folder.py | 6 ++++-- tests/looker/test_folder.py | 10 +++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/metaphor/looker/folder.py b/metaphor/looker/folder.py index 8065ad15..7692db6e 100644 --- a/metaphor/looker/folder.py +++ b/metaphor/looker/folder.py @@ -24,7 +24,9 @@ class FolderMetadata: def build_directories( - folder_id: str, folder_map: FolderMap, folders: Dict[str, Hierarchy] + folder_id: str, + folder_map: FolderMap, + folder_hierarchies: Dict[str, Hierarchy], ) -> List[str]: directories: List[str] = [] @@ -41,7 +43,7 @@ def build_directories( folder_id = folder.parent_id - _build_hierarchies(directories, folder_map, folders) + _build_hierarchies(directories, folder_map, folder_hierarchies) return directories diff --git a/tests/looker/test_folder.py b/tests/looker/test_folder.py index 8aecbf3d..13430914 100644 --- a/tests/looker/test_folder.py +++ b/tests/looker/test_folder.py @@ -9,12 +9,12 @@ def test_build_directories(test_root_dir) -> None: "3": FolderMetadata(id="3", name="folder3", parent_id="2"), } - folders: dict = {} + folder_hierarchies: dict = {} - assert build_directories("1", folder_map, folders) == ["1"] - assert build_directories("2", folder_map, folders) == ["1", "2"] - assert build_directories("3", folder_map, folders) == ["1", "2", "3"] - assert len(folders) == 3 + assert build_directories("1", folder_map, folder_hierarchies) == ["1"] + assert build_directories("2", folder_map, folder_hierarchies) == ["1", "2"] + assert build_directories("3", folder_map, folder_hierarchies) == ["1", "2", "3"] + assert len(folder_hierarchies) == 3 def test_build_hierarchy(test_root_dir) -> None: From 88c8a785b6e459f8bfdc12f9e780e4b842098004 Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Mon, 12 Aug 2024 17:35:54 +0800 Subject: [PATCH 5/9] Add tests --- tests/looker/test_folder.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/looker/test_folder.py b/tests/looker/test_folder.py index 13430914..625049b5 100644 --- a/tests/looker/test_folder.py +++ b/tests/looker/test_folder.py @@ -35,3 +35,6 @@ def test_build_hierarchy(test_root_dir) -> None: assert folders["2"].logical_id.path == ["LOOKER", "1", "2"] assert folders["3"].hierarchy_info.name == "folder3" assert folders["3"].logical_id.path == ["LOOKER", "1", "2", "3"] + + _build_hierarchies(["4"], folder_map, folders) + assert len(folders) == 3 From d582b4f88441f8d9164cce140f27e61a1464b778 Mon Sep 17 00:00:00 2001 From: Scott Ssuyi Huang Date: Mon, 12 Aug 2024 18:34:47 +0800 Subject: [PATCH 6/9] Update metaphor/looker/folder.py Co-authored-by: Tsung-Ju Lii --- metaphor/looker/folder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metaphor/looker/folder.py b/metaphor/looker/folder.py index 7692db6e..5f2cf987 100644 --- a/metaphor/looker/folder.py +++ b/metaphor/looker/folder.py @@ -49,7 +49,7 @@ def build_directories( def _build_hierarchies( - directories: List[str], folder_map: FolderMap, folders: Dict[str, Hierarchy] + directories: List[str], folder_map: FolderMap, folder_hierarchies: Dict[str, Hierarchy] ): for i, folder_id in enumerate(directories): folder = folder_map.get(folder_id) From 17c865c2a089f8e1b014b6fe4b2b0e23eddc6c5b Mon Sep 17 00:00:00 2001 From: Scott Ssuyi Huang Date: Mon, 12 Aug 2024 18:34:54 +0800 Subject: [PATCH 7/9] Update metaphor/looker/folder.py Co-authored-by: Tsung-Ju Lii --- metaphor/looker/folder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metaphor/looker/folder.py b/metaphor/looker/folder.py index 5f2cf987..ce29dc4b 100644 --- a/metaphor/looker/folder.py +++ b/metaphor/looker/folder.py @@ -65,4 +65,4 @@ def _build_hierarchies( ), ) - folders[folder_id] = hierarchy + folder_hierarchies[folder_id] = hierarchy From 739f0c6dc94e892c5e7ed2f329a6f30c457f6984 Mon Sep 17 00:00:00 2001 From: Scott Ssuyi Huang Date: Mon, 12 Aug 2024 23:32:19 +0800 Subject: [PATCH 8/9] Update metaphor/looker/folder.py Co-authored-by: Tsung-Ju Lii --- metaphor/looker/folder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metaphor/looker/folder.py b/metaphor/looker/folder.py index ce29dc4b..16b8a41a 100644 --- a/metaphor/looker/folder.py +++ b/metaphor/looker/folder.py @@ -53,7 +53,7 @@ def _build_hierarchies( ): for i, folder_id in enumerate(directories): folder = folder_map.get(folder_id) - if folder_id in folders or folder is None: + if folder_id in folder_hierarchies or folder is None: continue hierarchy = Hierarchy( From 2592afefdad285f5a7cc48b73863fe7c47819bcf Mon Sep 17 00:00:00 2001 From: Scott Huang Date: Tue, 13 Aug 2024 00:32:10 +0800 Subject: [PATCH 9/9] Format --- metaphor/looker/folder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metaphor/looker/folder.py b/metaphor/looker/folder.py index 16b8a41a..09d9064a 100644 --- a/metaphor/looker/folder.py +++ b/metaphor/looker/folder.py @@ -49,7 +49,9 @@ def build_directories( def _build_hierarchies( - directories: List[str], folder_map: FolderMap, folder_hierarchies: Dict[str, Hierarchy] + directories: List[str], + folder_map: FolderMap, + folder_hierarchies: Dict[str, Hierarchy], ): for i, folder_id in enumerate(directories): folder = folder_map.get(folder_id)