From 2e544614f12bf2ad8e758b2fd742ee14c6998825 Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware <159135491+sagar-salvi-apptware@users.noreply.github.com> Date: Thu, 19 Dec 2024 12:41:40 +0530 Subject: [PATCH] feat(ingest): add looker meta extractor support in sql parsing (#12062) Co-authored-by: Mayuri N Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> --- .../datahub/configuration/source_common.py | 13 ++ .../ingestion/source/looker/looker_common.py | 56 +++++- .../source/looker/looker_lib_wrapper.py | 14 +- .../ingestion/source/looker/looker_source.py | 13 +- .../ingestion/source/powerbi/config.py | 15 +- .../powerbi/dataplatform_instance_resolver.py | 2 +- .../source/powerbi/m_query/pattern_handler.py | 2 +- .../source/snowflake/snowflake_v2.py | 1 + .../sql_parsing/sql_parsing_aggregator.py | 2 +- .../sql_parsing/tool_meta_extractor.py | 121 ++++++++++++- .../looker/golden_looker_mces.json | 56 ++++++ .../looker/golden_test_allow_ingest.json | 53 ++++++ ...olden_test_external_project_view_mces.json | 53 ++++++ .../looker/golden_test_file_path_ingest.json | 53 ++++++ ...olden_test_folder_path_pattern_ingest.json | 53 ++++++ .../golden_test_independent_look_ingest.json | 170 +++++++++++++----- .../looker/golden_test_ingest.json | 54 ++++++ .../looker/golden_test_ingest_joins.json | 53 ++++++ .../golden_test_ingest_unaliased_joins.json | 53 ++++++ ...en_test_non_personal_independent_look.json | 71 ++++++++ .../looker_mces_golden_deleted_stateful.json | 68 ++++++- .../looker/looker_mces_usage_history.json | 53 ++++++ .../tests/integration/looker/test_looker.py | 20 +++ .../sql_parsing/test_tool_meta_extractor.py | 44 ++++- .../state/test_redundant_run_skip_handler.py | 6 +- .../platformresource/PlatformResourceType.pdl | 6 +- 26 files changed, 1026 insertions(+), 79 deletions(-) diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py index 44c737f1bd13d4..8e41e9fb917878 100644 --- a/metadata-ingestion/src/datahub/configuration/source_common.py +++ b/metadata-ingestion/src/datahub/configuration/source_common.py @@ -63,3 +63,16 @@ class DatasetLineageProviderConfigBase(EnvConfigMixin): default=None, description="A holder for platform -> platform_instance mappings to generate correct dataset urns", ) + + +class PlatformDetail(ConfigModel): + platform_instance: Optional[str] = Field( + default=None, + description="DataHub platform instance name. To generate correct urn for upstream dataset, this should match " + "with platform instance name used in ingestion " + "recipe of other datahub sources.", + ) + env: str = Field( + default=DEFAULT_ENV, + description="The environment that all assets produced by DataHub platform ingestion source belong to", + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 57a251ef2ed14f..a66962f962255f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -31,6 +31,10 @@ from pydantic.class_validators import validator import datahub.emitter.mce_builder as builder +from datahub.api.entities.platformresource.platform_resource import ( + PlatformResource, + PlatformResourceKey, +) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import ContainerKey, create_embed_mcp from datahub.ingestion.api.report import Report @@ -106,7 +110,7 @@ from datahub.utilities.url_util import remove_port_from_url CORPUSER_DATAHUB = "urn:li:corpuser:datahub" - +LOOKER = "looker" logger = logging.getLogger(__name__) @@ -1411,6 +1415,7 @@ class LookerDashboardSourceReport(StaleEntityRemovalSourceReport): resolved_user_ids: int = 0 email_ids_missing: int = 0 # resolved users with missing email addresses + looker_user_count: int = 0 _looker_api: Optional[LookerAPI] = None query_latency: Dict[str, datetime.timedelta] = dataclasses_field( @@ -1614,9 +1619,21 @@ def get_urn_dashboard_id(self): class LookerUserRegistry: looker_api_wrapper: LookerAPI fields: str = ",".join(["id", "email", "display_name", "first_name", "last_name"]) + _user_cache: Dict[str, LookerUser] = {} - def __init__(self, looker_api: LookerAPI): + def __init__(self, looker_api: LookerAPI, report: LookerDashboardSourceReport): self.looker_api_wrapper = looker_api + self.report = report + self._initialize_user_cache() + + def _initialize_user_cache(self) -> None: + raw_users: Sequence[User] = self.looker_api_wrapper.all_users( + user_fields=self.fields + ) + + for raw_user in raw_users: + looker_user = LookerUser.create_looker_user(raw_user) + self._user_cache[str(looker_user.id)] = looker_user def get_by_id(self, id_: str) -> Optional[LookerUser]: if not id_: @@ -1624,6 +1641,9 @@ def get_by_id(self, id_: str) -> Optional[LookerUser]: logger.debug(f"Will get user {id_}") + if str(id_) in self._user_cache: + return self._user_cache.get(str(id_)) + raw_user: Optional[User] = self.looker_api_wrapper.get_user( str(id_), user_fields=self.fields ) @@ -1632,3 +1652,35 @@ def get_by_id(self, id_: str) -> Optional[LookerUser]: looker_user = LookerUser.create_looker_user(raw_user) return looker_user + + def to_platform_resource( + self, platform_instance: Optional[str] + ) -> Iterable[MetadataChangeProposalWrapper]: + try: + platform_resource_key = PlatformResourceKey( + platform=LOOKER, + resource_type="USER_ID_MAPPING", + platform_instance=platform_instance, + primary_key="", + ) + + # Extract user email mappings + user_email_cache = { + user_id: user.email + for user_id, user in self._user_cache.items() + if user.email + } + + platform_resource = PlatformResource.create( + key=platform_resource_key, + value=user_email_cache, + ) + + self.report.looker_user_count = len(user_email_cache) + yield from platform_resource.to_mcps() + + except Exception as exc: + self.report.warning( + message="Failed to generate platform resource for looker id mappings", + exc=exc, + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py index ab55d4e15e5de4..c3f2a110136c45 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py @@ -68,6 +68,7 @@ class LookerAPIStats(BaseModel): get_look_calls: int = 0 search_looks_calls: int = 0 search_dashboards_calls: int = 0 + all_user_calls: int = 0 class LookerAPI: @@ -135,7 +136,7 @@ def get_available_permissions(self) -> Set[str]: return permissions - @lru_cache(maxsize=1000) + @lru_cache(maxsize=5000) def get_user(self, id_: str, user_fields: str) -> Optional[User]: self.client_stats.user_calls += 1 try: @@ -154,6 +155,17 @@ def get_user(self, id_: str, user_fields: str) -> Optional[User]: # User not found return None + def all_users(self, user_fields: str) -> Sequence[User]: + self.client_stats.all_user_calls += 1 + try: + return self.client.all_users( + fields=cast(str, user_fields), + transport_options=self.transport_options, + ) + except SDKError as e: + logger.warning(f"Failure was {e}") + return [] + def execute_query(self, write_query: WriteQuery) -> List[Dict]: logger.debug(f"Executing query {write_query}") self.client_stats.query_calls += 1 diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index cd8ccb8217257c..815c5dfb1c0147 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -145,7 +145,9 @@ def __init__(self, config: LookerDashboardSourceConfig, ctx: PipelineContext): self.source_config: LookerDashboardSourceConfig = config self.reporter: LookerDashboardSourceReport = LookerDashboardSourceReport() self.looker_api: LookerAPI = LookerAPI(self.source_config) - self.user_registry: LookerUserRegistry = LookerUserRegistry(self.looker_api) + self.user_registry: LookerUserRegistry = LookerUserRegistry( + self.looker_api, self.reporter + ) self.explore_registry: LookerExploreRegistry = LookerExploreRegistry( self.looker_api, self.reporter, self.source_config ) @@ -1673,5 +1675,14 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: yield usage_mcp.as_workunit() self.reporter.report_stage_end("usage_extraction") + # Dump looker user resource mappings. + logger.info("Ingesting looker user resource mapping workunits") + self.reporter.report_stage_start("user_resource_extraction") + yield from auto_workunit( + self.user_registry.to_platform_resource( + self.source_config.platform_instance + ) + ) + def get_report(self) -> SourceReport: return self.reporter diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py index f7458c4eb4d5b5..b49d40a0c7eb6a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py @@ -9,7 +9,7 @@ import datahub.emitter.mce_builder as builder from datahub.configuration.common import AllowDenyPattern, ConfigModel -from datahub.configuration.source_common import DatasetSourceConfigMixin +from datahub.configuration.source_common import DatasetSourceConfigMixin, PlatformDetail from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated from datahub.ingestion.source.common.subtypes import BIAssetSubTypes from datahub.ingestion.source.state.stale_entity_removal_handler import ( @@ -232,19 +232,6 @@ def default_for_dataset_type_mapping() -> Dict[str, str]: return dict_ -class PlatformDetail(ConfigModel): - platform_instance: Optional[str] = pydantic.Field( - default=None, - description="DataHub platform instance name. To generate correct urn for upstream dataset, this should match " - "with platform instance name used in ingestion " - "recipe of other datahub sources.", - ) - env: str = pydantic.Field( - default=builder.DEFAULT_ENV, - description="The environment that all assets produced by DataHub platform ingestion source belong to", - ) - - class DataBricksPlatformDetail(PlatformDetail): """ metastore is an additional field used in Databricks connector to generate the dataset urn diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py index baaa8d5b85ae10..6d51e853a2fb06 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py @@ -2,8 +2,8 @@ from abc import ABC, abstractmethod from typing import Union +from datahub.configuration.source_common import PlatformDetail from datahub.ingestion.source.powerbi.config import ( - PlatformDetail, PowerBiDashboardSourceConfig, PowerBIPlatformDetail, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py index ffaed79f4e42a6..63520bd731de86 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py @@ -5,13 +5,13 @@ from lark import Tree +from datahub.configuration.source_common import PlatformDetail from datahub.emitter import mce_builder as builder from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.source.powerbi.config import ( Constant, DataBricksPlatformDetail, DataPlatformPair, - PlatformDetail, PowerBiDashboardSourceConfig, PowerBiDashboardSourceReport, PowerBIPlatformDetail, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index c3a7912c40e8ee..e5883dd0349a3a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -540,6 +540,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: identifiers=self.identifiers, schema_resolver=schema_resolver, discovered_tables=discovered_datasets, + graph=self.ctx.graph, ) # TODO: This is slightly suboptimal because we create two SqlParsingAggregator instances with different configs diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index 79ea98d1c7f54e..f81eb291e89e1d 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -490,7 +490,7 @@ def __init__( self._exit_stack.push(self._query_usage_counts) # Tool Extractor - self._tool_meta_extractor = ToolMetaExtractor() + self._tool_meta_extractor = ToolMetaExtractor.create(graph) self.report.tool_meta_report = self._tool_meta_extractor.report def close(self) -> None: diff --git a/metadata-ingestion/src/datahub/sql_parsing/tool_meta_extractor.py b/metadata-ingestion/src/datahub/sql_parsing/tool_meta_extractor.py index 0d85002776e5e2..5af9d9d4f0fffc 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/tool_meta_extractor.py +++ b/metadata-ingestion/src/datahub/sql_parsing/tool_meta_extractor.py @@ -1,3 +1,4 @@ +import contextlib import json import logging from dataclasses import dataclass, field @@ -5,8 +6,15 @@ from typing_extensions import Protocol +from datahub.api.entities.platformresource.platform_resource import ( + ElasticPlatformResourceQuery, + PlatformResource, + PlatformResourceSearchFields, +) from datahub.ingestion.api.report import Report +from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn +from datahub.utilities.search_utils import LogicalOperator from datahub.utilities.stats_collections import int_top_k_dict UrnStr = str @@ -31,6 +39,7 @@ def _get_last_line(query: str) -> str: @dataclass class ToolMetaExtractorReport(Report): num_queries_meta_extracted: Dict[str, int] = field(default_factory=int_top_k_dict) + failures: List[str] = field(default_factory=list) class ToolMetaExtractor: @@ -42,14 +51,81 @@ class ToolMetaExtractor: by warehouse query logs. """ - def __init__(self) -> None: - self.report = ToolMetaExtractorReport() + def __init__( + self, + report: ToolMetaExtractorReport, + looker_user_mapping: Optional[Dict[str, str]] = None, + ) -> None: + self.report = report self.known_tool_extractors: List[Tuple[str, Callable[[QueryLog], bool]]] = [ ( "mode", self._extract_mode_query, - ) + ), + ( + "looker", + self._extract_looker_query, + ), ] + # maps user id (as string) to email address + self.looker_user_mapping = looker_user_mapping + + @classmethod + def create( + cls, + graph: Optional[DataHubGraph] = None, + ) -> "ToolMetaExtractor": + report = ToolMetaExtractorReport() + looker_user_mapping = None + if graph: + try: + looker_user_mapping = cls.extract_looker_user_mapping_from_graph( + graph, report + ) + except Exception as e: + report.failures.append( + f"Unexpected error during Looker user metadata extraction: {str(e)}" + ) + + return cls(report, looker_user_mapping) + + @classmethod + def extract_looker_user_mapping_from_graph( + cls, graph: DataHubGraph, report: ToolMetaExtractorReport + ) -> Optional[Dict[str, str]]: + looker_user_mapping = None + query = ( + ElasticPlatformResourceQuery.create_from() + .group(LogicalOperator.AND) + .add_field_match(PlatformResourceSearchFields.PLATFORM, "looker") + .add_field_match( + PlatformResourceSearchFields.RESOURCE_TYPE, + "USER_ID_MAPPING", + ) + .end() + ) + platform_resources = list( + PlatformResource.search_by_filters(query=query, graph_client=graph) + ) + + if len(platform_resources) > 1: + report.failures.append( + "Looker user metadata extraction failed. Found more than one looker user id mappings." + ) + else: + platform_resource = platform_resources[0] + + if ( + platform_resource + and platform_resource.resource_info + and platform_resource.resource_info.value + ): + with contextlib.suppress(ValueError, AssertionError): + value = platform_resource.resource_info.value.as_raw_json() + if value: + looker_user_mapping = value + + return looker_user_mapping def _extract_mode_query(self, entry: QueryLog) -> bool: """ @@ -78,14 +154,49 @@ def _extract_mode_query(self, entry: QueryLog) -> bool: return True + def _extract_looker_query(self, entry: QueryLog) -> bool: + """ + Returns: + bool: whether QueryLog entry is that of looker and looker user info + is extracted into entry. + """ + if not self.looker_user_mapping: + return False + + last_line = _get_last_line(entry.query_text) + + if not (last_line.startswith("--") and "Looker Query Context" in last_line): + return False + + start_quote_idx = last_line.index("'") + end_quote_idx = last_line.rindex("'") + if start_quote_idx == -1 or end_quote_idx == -1: + return False + + looker_json_raw = last_line[start_quote_idx + 1 : end_quote_idx] + looker_json = json.loads(looker_json_raw) + + user_id = str(looker_json["user_id"]) + email = self.looker_user_mapping.get(user_id) + if not email: + return False + + original_user = entry.user + + entry.user = email_to_user_urn(email) + entry.extra_info = entry.extra_info or {} + entry.extra_info["user_via"] = original_user + + return True + def extract_bi_metadata(self, entry: QueryLog) -> bool: for tool, meta_extractor in self.known_tool_extractors: try: if meta_extractor(entry): self.report.num_queries_meta_extracted[tool] += 1 return True - except Exception: - logger.debug("Tool metadata extraction failed with error : {e}") + except Exception as e: + logger.debug(f"Tool metadata extraction failed with error : {e}") return False diff --git a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json index a9c445b5986efe..6ae772c134cb32 100644 --- a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json +++ b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json @@ -842,6 +842,62 @@ "pipelineName": "stateful-looker-pipeline" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "USER_ID_MAPPING", + "primaryKey": "", + "value": { + "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}", + "contentType": "JSON" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "stateful-looker-pipeline" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "stateful-looker-pipeline" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "stateful-looker-pipeline" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(looker,dashboard_elements.10)", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json index af9c62a2a41803..d7620980a9cedb 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json @@ -497,6 +497,59 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "USER_ID_MAPPING", + "primaryKey": "", + "value": { + "blob": "{}", + "contentType": "JSON" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json index b89bc356b48fdc..13963af55bfe56 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json @@ -735,6 +735,59 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "USER_ID_MAPPING", + "primaryKey": "", + "value": { + "blob": "{}", + "contentType": "JSON" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json index 810fefd8f6cb85..f11d060102851c 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json @@ -735,6 +735,59 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "USER_ID_MAPPING", + "primaryKey": "", + "value": { + "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}", + "contentType": "JSON" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json index 3d78397f54a235..f6e39dd5286cd0 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_folder_path_pattern_ingest.json @@ -828,6 +828,59 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "USER_ID_MAPPING", + "primaryKey": "", + "value": { + "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}", + "contentType": "JSON" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json index 5a540e61e768d7..203bed843155c8 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json @@ -464,6 +464,21 @@ "/Folders/Shared" ] } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:test-1@looker.com", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } } ] } @@ -708,6 +723,21 @@ "/Folders/Personal" ] } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:test-2@looker.com", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } } ] } @@ -1108,12 +1138,12 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/Explore/sales_model" + "/Explore/data" ] } }, @@ -1126,12 +1156,12 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "project": "lkml_samples", - "model": "sales_model", + "model": "data", "looker.explore.label": "My Explore View", - "looker.explore.name": "sales_explore", + "looker.explore.name": "my_view", "looker.explore.file": "test_source_file.lkml" }, - "externalUrl": "https://looker.company.com/explore/sales_model/sales_explore", + "externalUrl": "https://looker.company.com/explore/data/my_view", "name": "My Explore View", "description": "lorem ipsum", "tags": [] @@ -1153,7 +1183,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "sales_explore", + "schemaName": "my_view", "platform": "urn:li:dataPlatform:looker", "version": 0, "created": { @@ -1208,7 +1238,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1227,12 +1257,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "embed", "aspect": { "json": { - "renderUrl": "https://looker.company.com/embed/explore/sales_model/sales_explore" + "renderUrl": "https://looker.company.com/embed/explore/data/my_view" } }, "systemMetadata": { @@ -1244,12 +1274,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5" + "container": "urn:li:container:59a5aa45397364e6882e793f1bc77b42" } }, "systemMetadata": { @@ -1261,7 +1291,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1271,8 +1301,8 @@ "id": "Explore" }, { - "id": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5", - "urn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5" + "id": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "urn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42" } ] } @@ -1287,12 +1317,12 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/Explore/data" + "/Explore/order_model" ] } }, @@ -1305,12 +1335,12 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "project": "lkml_samples", - "model": "data", + "model": "order_model", "looker.explore.label": "My Explore View", - "looker.explore.name": "my_view", + "looker.explore.name": "order_explore", "looker.explore.file": "test_source_file.lkml" }, - "externalUrl": "https://looker.company.com/explore/data/my_view", + "externalUrl": "https://looker.company.com/explore/order_model/order_explore", "name": "My Explore View", "description": "lorem ipsum", "tags": [] @@ -1332,7 +1362,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "my_view", + "schemaName": "order_explore", "platform": "urn:li:dataPlatform:looker", "version": 0, "created": { @@ -1387,7 +1417,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1406,12 +1436,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", "changeType": "UPSERT", "aspectName": "embed", "aspect": { "json": { - "renderUrl": "https://looker.company.com/embed/explore/data/my_view" + "renderUrl": "https://looker.company.com/embed/explore/order_model/order_explore" } }, "systemMetadata": { @@ -1423,12 +1453,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:59a5aa45397364e6882e793f1bc77b42" + "container": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60" } }, "systemMetadata": { @@ -1440,7 +1470,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1450,8 +1480,8 @@ "id": "Explore" }, { - "id": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", - "urn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42" + "id": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60", + "urn": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60" } ] } @@ -1466,12 +1496,12 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/Explore/order_model" + "/Explore/sales_model" ] } }, @@ -1484,12 +1514,12 @@ "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { "project": "lkml_samples", - "model": "order_model", + "model": "sales_model", "looker.explore.label": "My Explore View", - "looker.explore.name": "order_explore", + "looker.explore.name": "sales_explore", "looker.explore.file": "test_source_file.lkml" }, - "externalUrl": "https://looker.company.com/explore/order_model/order_explore", + "externalUrl": "https://looker.company.com/explore/sales_model/sales_explore", "name": "My Explore View", "description": "lorem ipsum", "tags": [] @@ -1511,7 +1541,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "order_explore", + "schemaName": "sales_explore", "platform": "urn:li:dataPlatform:looker", "version": 0, "created": { @@ -1566,7 +1596,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1585,12 +1615,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", "changeType": "UPSERT", "aspectName": "embed", "aspect": { "json": { - "renderUrl": "https://looker.company.com/embed/explore/order_model/order_explore" + "renderUrl": "https://looker.company.com/embed/explore/sales_model/sales_explore" } }, "systemMetadata": { @@ -1602,12 +1632,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60" + "container": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5" } }, "systemMetadata": { @@ -1619,7 +1649,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1629,8 +1659,8 @@ "id": "Explore" }, { - "id": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60", - "urn": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60" + "id": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5", + "urn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5" } ] } @@ -1705,6 +1735,62 @@ "pipelineName": "execution-1" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "USER_ID_MAPPING", + "primaryKey": "", + "value": { + "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}", + "contentType": "JSON" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json index 9ac95b8482a475..87af50f95ed6bb 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json @@ -793,6 +793,60 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:8436a2a37c4a7e81fb08c9c8415d2e4b", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "USER_ID_MAPPING", + "primaryKey": "", + "value": { + "blob": "{}", + "contentType": "JSON" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:8436a2a37c4a7e81fb08c9c8415d2e4b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:looker,ap-south-1)" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:8436a2a37c4a7e81fb08c9c8415d2e4b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json index 3a2c6359ea63c2..b990ce7c67dab6 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json @@ -759,6 +759,59 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "USER_ID_MAPPING", + "primaryKey": "", + "value": { + "blob": "{}", + "contentType": "JSON" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json index 007eee348aeaf8..391192b3d16f36 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json @@ -513,6 +513,59 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "USER_ID_MAPPING", + "primaryKey": "", + "value": { + "blob": "{}", + "contentType": "JSON" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", diff --git a/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json b/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json index 859b9163d7aad6..4909a6af73a225 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json @@ -464,6 +464,21 @@ "/Folders/Shared" ] } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:test-1@looker.com", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } } ] } @@ -1185,6 +1200,62 @@ "pipelineName": "execution-1" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "USER_ID_MAPPING", + "primaryKey": "", + "value": { + "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}", + "contentType": "JSON" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json index 8256c984afb274..ddeb5428b1d726 100644 --- a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json +++ b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json @@ -762,6 +762,62 @@ "pipelineName": "stateful-looker-pipeline" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "USER_ID_MAPPING", + "primaryKey": "", + "value": { + "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}", + "contentType": "JSON" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "stateful-looker-pipeline" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "stateful-looker-pipeline" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "stateful-looker-pipeline" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", @@ -814,8 +870,8 @@ } }, { - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(looker,dashboards.11)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,bogus data.explore.my_view,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -831,8 +887,8 @@ } }, { - "entityType": "chart", - "entityUrn": "urn:li:chart:(looker,dashboard_elements.10)", + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.11)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -865,8 +921,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,bogus data.explore.my_view,PROD)", + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.10)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json index 0b3530f9c24629..594983c8fb0f2a 100644 --- a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json +++ b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json @@ -678,6 +678,59 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "USER_ID_MAPPING", + "primaryKey": "", + "value": { + "blob": "{\"1\": \"test-1@looker.com\", \"2\": \"test-2@looker.com\", \"3\": \"test-3@looker.com\"}", + "contentType": "JSON" + } + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:1cec84235c544a141e63dd2077da2562", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "chart", "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py index 8bbf14709ff9fb..a39de8384efb23 100644 --- a/metadata-ingestion/tests/integration/looker/test_looker.py +++ b/metadata-ingestion/tests/integration/looker/test_looker.py @@ -83,6 +83,7 @@ def test_looker_ingest(pytestconfig, tmp_path, mock_time): with mock.patch("looker_sdk.init40") as mock_sdk: mock_sdk.return_value = mocked_client setup_mock_dashboard(mocked_client) + mocked_client.run_inline_query.side_effect = side_effect_query_inline setup_mock_explore(mocked_client) test_resources_dir = pytestconfig.rootpath / "tests/integration/looker" @@ -319,6 +320,7 @@ def setup_mock_look(mocked_client): mocked_client.all_looks.return_value = [ Look( id="1", + user_id="1", title="Outer Look", description="I am not part of any Dashboard", query_id="1", @@ -327,6 +329,7 @@ def setup_mock_look(mocked_client): Look( id="2", title="Personal Look", + user_id="2", description="I am not part of any Dashboard and in personal folder", query_id="2", folder=FolderBase( @@ -561,6 +564,20 @@ def get_user( mocked_client.user.side_effect = get_user +def setup_mock_all_user(mocked_client): + def all_users( + fields: Optional[str] = None, + transport_options: Optional[transport.TransportOptions] = None, + ) -> List[User]: + return [ + User(id="1", email="test-1@looker.com"), + User(id="2", email="test-2@looker.com"), + User(id="3", email="test-3@looker.com"), + ] + + mocked_client.all_users.side_effect = all_users + + def side_effect_query_inline( result_format: str, body: WriteQuery, transport_options: Optional[TransportOptions] ) -> str: @@ -714,6 +731,7 @@ def test_looker_ingest_usage_history(pytestconfig, tmp_path, mock_time): mocked_client.run_inline_query.side_effect = side_effect_query_inline setup_mock_explore(mocked_client) setup_mock_user(mocked_client) + setup_mock_all_user(mocked_client) test_resources_dir = pytestconfig.rootpath / "tests/integration/looker" @@ -946,6 +964,8 @@ def ingest_independent_looks( mock_sdk.return_value = mocked_client setup_mock_dashboard(mocked_client) setup_mock_explore(mocked_client) + setup_mock_user(mocked_client) + setup_mock_all_user(mocked_client) setup_mock_look(mocked_client) test_resources_dir = pytestconfig.rootpath / "tests/integration/looker" diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_tool_meta_extractor.py b/metadata-ingestion/tests/unit/sql_parsing/test_tool_meta_extractor.py index 6f590b53071467..f6566f007f5e6b 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_tool_meta_extractor.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_tool_meta_extractor.py @@ -1,11 +1,14 @@ from datahub.configuration.datetimes import parse_absolute_time from datahub.metadata.urns import CorpUserUrn from datahub.sql_parsing.sql_parsing_aggregator import PreparsedQuery -from datahub.sql_parsing.tool_meta_extractor import ToolMetaExtractor +from datahub.sql_parsing.tool_meta_extractor import ( + ToolMetaExtractor, + ToolMetaExtractorReport, +) def test_extract_mode_metadata() -> None: - extractor = ToolMetaExtractor() + extractor = ToolMetaExtractor(report=ToolMetaExtractorReport()) query = """\ select * from LONG_TAIL_COMPANIONS.ADOPTION.PET_PROFILES LIMIT 100 @@ -30,8 +33,42 @@ def test_extract_mode_metadata() -> None: assert extractor.report.num_queries_meta_extracted["mode"] == 1 +def test_extract_looker_metadata() -> None: + extractor = ToolMetaExtractor( + report=ToolMetaExtractorReport(), looker_user_mapping={"7": "john.doe@xyz.com"} + ) + looker_query = """\ +SELECT + all_entities_extended_sibling."ENTITY" AS "all_entities_extended_sibling.entity_type", + COUNT(DISTINCT ( all_entities_extended_sibling."URN" )) AS "all_entities_extended_sibling.distinct_count" +FROM "PUBLIC"."ALL_ENTITIES" + AS all_entities_extended_sibling +GROUP BY + 1 +ORDER BY + 1 +FETCH NEXT 50 ROWS ONLY +-- Looker Query Context '{"user_id":7,"history_slug":"264797031bc403cf382cbefbe3700849","instance_slug":"32654f2ffadf10b1949d4009e52fc6a4"}' +""" + + entry = PreparsedQuery( + query_id=None, + query_text=looker_query, + upstreams=[], + downstream=None, + column_lineage=None, + column_usage=None, + inferred_schema=None, + user=CorpUserUrn("mode"), + timestamp=parse_absolute_time("2021-08-01T01:02:03Z"), + ) + assert extractor.extract_bi_metadata(entry) + assert entry.user == CorpUserUrn("john.doe") + assert extractor.report.num_queries_meta_extracted["looker"] == 1 + + def test_extract_no_metadata() -> None: - extractor = ToolMetaExtractor() + extractor = ToolMetaExtractor(report=ToolMetaExtractorReport()) query = """\ select * from LONG_TAIL_COMPANIONS.ADOPTION.PET_PROFILES LIMIT 100 @@ -53,3 +90,4 @@ def test_extract_no_metadata() -> None: assert not extractor.extract_bi_metadata(entry) assert extractor.report.num_queries_meta_extracted["mode"] == 0 + assert extractor.report.num_queries_meta_extracted["looker"] == 0 diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py index 85c86f8d205d9a..5631ad2c69f949 100644 --- a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_redundant_run_skip_handler.py @@ -37,7 +37,11 @@ def stateful_source(mock_datahub_graph: DataHubGraph) -> Iterable[SnowflakeV2Sou ), ) - with mock.patch("snowflake.connector.connect"): + with mock.patch( + "datahub.sql_parsing.sql_parsing_aggregator.ToolMetaExtractor.create", + ) as mock_checkpoint, mock.patch("snowflake.connector.connect"): + mock_checkpoint.return_value = mock.MagicMock() + yield SnowflakeV2Source(ctx=ctx, config=config) diff --git a/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl b/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl index 2f36eda9141abb..1a1dbea4359fbd 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/platformresource/PlatformResourceType.pdl @@ -9,9 +9,13 @@ enum PlatformResourceType { /** * e.g. a Slack member resource, Looker user resource, etc. */ - USER_INFO, + USER_INFO, /** * e.g. a Slack channel */ CONVERSATION + /** + * e.g. Looker mapping of all user ids + */ + USER_ID_MAPPING }