From afe16425d217ff543b98dd5dbac25acced25af84 Mon Sep 17 00:00:00 2001 From: Scott Ssuyi Huang Date: Thu, 8 Aug 2024 02:58:59 +0800 Subject: [PATCH] Extract owner and description for looker dashboards (#943) * Extract system contact & description * Bump version * Update model version * Bump version --- metaphor/looker/extractor.py | 47 ++++++++++++++++++++++++-- metaphor/tableau/extractor.py | 4 +-- metaphor/unity_catalog/extractor.py | 6 ++-- poetry.lock | 11 +++--- pyproject.toml | 4 +-- tests/looker/expected.json | 16 +++++++++ tests/looker/expected_alternative.json | 16 +++++++++ tests/looker/test_extractor.py | 9 +++-- 8 files changed, 97 insertions(+), 16 deletions(-) diff --git a/metaphor/looker/extractor.py b/metaphor/looker/extractor.py index 7c899811..a22b2ca5 100644 --- a/metaphor/looker/extractor.py +++ b/metaphor/looker/extractor.py @@ -19,6 +19,7 @@ from metaphor.looker.folder import FolderMap, FolderMetadata, build_directories from metaphor.looker.lookml_parser import ModelMap, fullname, parse_project from metaphor.models.metadata_change_event import ( + AssetPlatform, AssetStructure, Chart, ChartType, @@ -32,6 +33,9 @@ HierarchyLogicalID, HierarchyType, SourceInfo, + SystemContact, + SystemContacts, + SystemDescription, VirtualViewType, ) @@ -106,8 +110,9 @@ async def extract(self) -> Collection[ENTITY_TYPES]: ) folder_map = self._fetch_folders() + user_map = self._fetch_users() - dashboards = self._fetch_dashboards(model_map, folder_map) + dashboards = self._fetch_dashboards(model_map, folder_map, user_map) entities: List[ENTITY_TYPES] = [] entities.extend(dashboards) @@ -129,8 +134,20 @@ def _fetch_folders(self) -> FolderMap: return folder_map + def _fetch_users(self) -> Dict[str, str]: + user_map: Dict[str, str] = {} + users = self._sdk.all_users() + json_dump_to_debug_file(users, "all_users.json") + + for user in users: + if user.email is None or user.id is None: + continue + user_map[user.id] = user.email + + return user_map + def _fetch_dashboards( - self, model_map: ModelMap, folder_map: FolderMap + self, model_map: ModelMap, folder_map: FolderMap, user_map: Dict[str, str] ) -> List[Dashboard]: dashboards: List[Dashboard] = [] @@ -142,6 +159,9 @@ def _fetch_dashboards( try: dashboard = self._sdk.dashboard(dashboard_id=basic_dashboard.id) + json_dump_to_debug_file( + dashboard, f"{basic_dashboard.id}_dashboard.json" + ) except Exception as error: logger.error(f"Failed to fetch dashboard {basic_dashboard.id}: {error}") continue @@ -188,6 +208,20 @@ def _fetch_dashboards( assert dashboard.id is not None + owner_email = user_map.get(dashboard.user_id or "") + system_contacts = ( + SystemContacts( + contacts=[ + SystemContact( + email=owner_email, + system_contact_source=AssetPlatform.LOOKER, + ) + ] + ) + if owner_email + else None + ) + dashboards.append( Dashboard( logical_id=DashboardLogicalID( @@ -206,6 +240,15 @@ def _fetch_dashboards( if dashboard.folder and dashboard.folder.id else None ), + system_contacts=system_contacts, + system_description=( + SystemDescription( + description=dashboard.description, + platform=AssetPlatform.LOOKER, + ) + if dashboard.description + else None + ), ) ) diff --git a/metaphor/tableau/extractor.py b/metaphor/tableau/extractor.py index 0b731951..129bfeb8 100644 --- a/metaphor/tableau/extractor.py +++ b/metaphor/tableau/extractor.py @@ -23,6 +23,7 @@ from metaphor.common.logger import get_logger, json_dump_to_debug_file from metaphor.models.crawler_run_metadata import Platform from metaphor.models.metadata_change_event import ( + AssetPlatform, AssetStructure, Chart, Dashboard, @@ -36,7 +37,6 @@ SourceInfo, SystemContact, SystemContacts, - SystemContactSource, SystemTag, SystemTags, SystemTagSource, @@ -229,7 +229,7 @@ def _get_system_contacts( system_contact = SystemContact( email=self._users[user_id].email, - system_contact_source=SystemContactSource.TABLEAU, + system_contact_source=AssetPlatform.TABLEAU, ) return SystemContacts(contacts=[system_contact]) diff --git a/metaphor/unity_catalog/extractor.py b/metaphor/unity_catalog/extractor.py index 6910d454..66131b55 100644 --- a/metaphor/unity_catalog/extractor.py +++ b/metaphor/unity_catalog/extractor.py @@ -21,6 +21,7 @@ from metaphor.common.utils import to_utc_datetime_from_timestamp from metaphor.models.crawler_run_metadata import Platform from metaphor.models.metadata_change_event import ( + AssetPlatform, DataPlatform, Dataset, DatasetLogicalID, @@ -39,7 +40,6 @@ SQLSchema, SystemContact, SystemContacts, - SystemContactSource, SystemTag, SystemTags, SystemTagSource, @@ -366,7 +366,7 @@ def _init_dataset(self, table_info: TableInfo) -> Dataset: contacts=[ SystemContact( email=owner, - system_contact_source=SystemContactSource.UNITY_CATALOG, + system_contact_source=AssetPlatform.UNITY_CATALOG, ) ] ) @@ -751,7 +751,7 @@ def _init_volume(self, volume: VolumeInfo): contacts=[ SystemContact( email=volume.owner, - system_contact_source=SystemContactSource.UNITY_CATALOG, + system_contact_source=AssetPlatform.UNITY_CATALOG, ) ] ) diff --git a/poetry.lock b/poetry.lock index 05a2655a..06766e6e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiohttp" @@ -3135,13 +3135,13 @@ files = [ [[package]] name = "metaphor-models" -version = "0.37.2" +version = "0.38.0" description = "" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "metaphor_models-0.37.2-py3-none-any.whl", hash = "sha256:b0527ef9ce21781ff4ad5161c2db14e2f1a9fb4034eaced69a7a4e107fbfe794"}, - {file = "metaphor_models-0.37.2.tar.gz", hash = "sha256:060d174af83eca0e940d741c27eaa685b7b494075f19182529a73905400b9b4b"}, + {file = "metaphor_models-0.38.0-py3-none-any.whl", hash = "sha256:729569146f348b9c830c8dafa93a416d4c5708dae197cb7c3bc071075ca02f5a"}, + {file = "metaphor_models-0.38.0.tar.gz", hash = "sha256:dc93d7edcdce15994ad326f57d04ff8604aad1d03f3eee1e53dc25bf08664b01"}, ] [[package]] @@ -4948,6 +4948,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -6534,4 +6535,4 @@ unity-catalog = ["databricks-sdk", "databricks-sql-connector", "sqlglot"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.12" -content-hash = "08d77fb141acbcb7cadebc9146eccd3cb445444520d989f2d019b1e5a4e005a9" +content-hash = "7f3a33ece426597d3eea7dc3c6bb8efc6d3970c1ff798f7ae7510226aa4ed7a7" diff --git a/pyproject.toml b/pyproject.toml index 81c030df..279d0da0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.71" +version = "0.14.72" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] @@ -42,7 +42,7 @@ llama-index-readers-confluence = { version = "^0.1.4", optional = true } llama-index-readers-notion = { version = "^0.1.6", optional = true } looker-sdk = { version = "^24.2.0", optional = true } lxml = { version = "~=5.0.0", optional = true } -metaphor-models = "0.37.2" +metaphor-models = "0.38.0" more-itertools = { version = "^10.1.0", optional = true } msal = { version = "^1.28.0", optional = true } msgraph-beta-sdk = { version = "~1.4.0", optional = true } diff --git a/tests/looker/expected.json b/tests/looker/expected.json index b45124db..345d6f36 100644 --- a/tests/looker/expected.json +++ b/tests/looker/expected.json @@ -28,6 +28,10 @@ "2" ], "name": "first" + }, + "systemDescription": { + "description": "first dashboard", + "platform": "LOOKER" } }, { @@ -59,6 +63,18 @@ "2" ], "name": "old version dashboard" + }, + "systemContacts": { + "contacts": [ + { + "email": "foo@bar.com", + "systemContactSource": "LOOKER" + } + ] + }, + "systemDescription": { + "description": "foo", + "platform": "LOOKER" } } ] diff --git a/tests/looker/expected_alternative.json b/tests/looker/expected_alternative.json index bb3358d3..35b6467e 100644 --- a/tests/looker/expected_alternative.json +++ b/tests/looker/expected_alternative.json @@ -28,6 +28,10 @@ "2" ], "name": "first" + }, + "systemDescription": { + "description": "first dashboard", + "platform": "LOOKER" } }, { @@ -59,6 +63,18 @@ "2" ], "name": "old version dashboard" + }, + "systemContacts": { + "contacts": [ + { + "email": "foo@bar.com", + "systemContactSource": "LOOKER" + } + ] + }, + "systemDescription": { + "description": "foo", + "platform": "LOOKER" } } ] diff --git a/tests/looker/test_extractor.py b/tests/looker/test_extractor.py index d65e7a43..2c6fd6de 100644 --- a/tests/looker/test_extractor.py +++ b/tests/looker/test_extractor.py @@ -94,6 +94,7 @@ def create_extractor(config: LookerRunConfig): ), Dashboard( id="4", + user_id="1", title="old version dashboard", description="foo", preferred_viewer=None, @@ -139,12 +140,16 @@ def create_extractor(config: LookerRunConfig): "4": FolderMetadata(id="4", name="personal descendant", parent_id="3"), } - dashboards = create_extractor(config)._fetch_dashboards(models, folders) + users = { + "1": "foo@bar.com", + } + + dashboards = create_extractor(config)._fetch_dashboards(models, folders, users) events = [EventUtil.trim_event(e) for e in dashboards] assert events == load_json(f"{test_root_dir}/looker/expected.json") config.alternative_base_url = "http://dev.test" - dashboards = create_extractor(config)._fetch_dashboards(models, folders) + dashboards = create_extractor(config)._fetch_dashboards(models, folders, users) events = [EventUtil.trim_event(e) for e in dashboards] assert events == load_json(f"{test_root_dir}/looker/expected_alternative.json")