diff --git a/metaphor/thought_spot/extractor.py b/metaphor/thought_spot/extractor.py index 12dee876..25e6e04c 100644 --- a/metaphor/thought_spot/extractor.py +++ b/metaphor/thought_spot/extractor.py @@ -49,12 +49,12 @@ ) from metaphor.thought_spot.config import ThoughtSpotRunConfig from metaphor.thought_spot.models import ( - AnswerMetadataDetail, + AnswerMetadata, ConnectionDetail, DataSourceTypeEnum, Header, - LiveBoardMetadataDetail, - LogicalTableMetadataDetail, + LiveBoardMetadata, + LogicalTableMetadata, TableMappingInfo, Tag, TMLObject, @@ -119,11 +119,11 @@ def fetch_virtual_views(self): data_objects = ThoughtSpot.fetch_tables(self._client) - def is_source_valid(table: LogicalTableMetadataDetail): + def is_source_valid(table: LogicalTableMetadata): """ Table should source from a connection """ - return table.dataSourceId in connections + return table.metadata_detail.dataSourceId in connections tables = filter(is_source_valid, data_objects) @@ -136,15 +136,13 @@ def is_source_valid(table: LogicalTableMetadataDetail): self.populate_lineage(connections, tables) self.populate_formula() - def populate_logical_column_mapping( - self, tables: Dict[str, LogicalTableMetadataDetail] - ): + def populate_logical_column_mapping(self, tables: Dict[str, LogicalTableMetadata]): for table in tables.values(): - table_id = table.header.id + table_id = table.metadata_detail.header.id view_id = VirtualViewLogicalID( name=table_id, type=VirtualViewType.THOUGHT_SPOT_DATA_OBJECT ) - for column in table.columns: + for column in table.metadata_detail.columns: self._column_references[column.header.id] = ColumnReference( entity_id=str(EntityId(EntityType.VIRTUAL_VIEW, view_id)), field=column.header.name, @@ -153,20 +151,23 @@ def populate_logical_column_mapping( def populate_virtual_views( self, connections: Dict[str, ConnectionDetail], - tables: Dict[str, LogicalTableMetadataDetail], + tables: Dict[str, LogicalTableMetadata], ): for table in tables.values(): - table_id = table.header.id - table_type = mapping_data_object_type(table.type) + table_detail = table.metadata_detail + table_id = table_detail.header.id + table_type = mapping_data_object_type(table_detail.type) field_mappings = [] - for column in table.columns: + for column in table_detail.columns: field_mapping = FieldMapping(destination=column.header.name, sources=[]) assert field_mapping.sources is not None - if table.dataSourceTypeEnum != DataSourceTypeEnum.DEFAULT: + if table_detail.dataSourceTypeEnum != DataSourceTypeEnum.DEFAULT: # the table upstream is external source, i.e. BigQuery - table_mapping_info = table.logicalTableContent.tableMappingInfo + table_mapping_info = ( + table_detail.logicalTableContent.tableMappingInfo + ) if table_mapping_info is None: logger.warning( f"tableMappingInfo is missing, skip for column: {column.header.name}" @@ -176,7 +177,7 @@ def populate_virtual_views( source_entity_id = self.find_entity_id_from_connection( connections, table_mapping_info, - table.dataSourceId, + table_detail.dataSourceId, ) field_mapping.sources.append( SourceField( @@ -207,7 +208,7 @@ def populate_virtual_views( ), structure=AssetStructure( directories=[table_type.name], - name=table.header.name, + name=table_detail.header.name, ), thought_spot=ThoughtSpotDataObject( columns=[ @@ -217,17 +218,18 @@ def populate_virtual_views( type=column.dataType if column.dataType else column.type, optional_type=column.optionalType, ) - for column in table.columns + for column in table_detail.columns ], - name=table.header.name, - description=table.header.description, + name=table_detail.header.name, + description=table_detail.header.description, type=table_type, url=f"{self._base_url}/#/data/tables/{table_id}", + is_verified=table.metadata_header.isVerified, ), entity_upstream=EntityUpstream( field_mappings=field_mappings if field_mappings else None ), - system_tags=self._get_system_tags(table.header.tags), + system_tags=self._get_system_tags(table_detail.header.tags), ) self._virtual_views[table_id] = view @@ -296,13 +298,13 @@ def populate_formula(self): def populate_lineage( self, connections: Dict[str, ConnectionDetail], - tables: Dict[str, LogicalTableMetadataDetail], + tables: Dict[str, LogicalTableMetadata], ): """ Populate lineage between tables/worksheets/views """ for view in self._virtual_views.values(): - table = tables[view.logical_id.name] + table = tables[view.logical_id.name].metadata_detail if table.dataSourceTypeEnum != DataSourceTypeEnum.DEFAULT: # SQL_VIEW case @@ -501,14 +503,15 @@ def fetch_dashboards(self): liveboards = ThoughtSpot.fetch_liveboards(self._client) self.populate_liveboards(liveboards) - def populate_answers(self, answers: List[AnswerMetadataDetail]): + def populate_answers(self, answers: List[AnswerMetadata]): for answer in answers: - answer_id = answer.header.id + detail = answer.metadata_detail + answer_id = detail.header.id visualizations = [ # Use answer.header instead as viz.header contain only dummy values - (viz, answer.header, "") - for sheet in answer.reportContent.sheets + (viz, detail.header, "") + for sheet in detail.reportContent.sheets for viz in sheet.sheetContent.visualizations if viz.vizContent.vizType == "CHART" ] @@ -520,29 +523,30 @@ def populate_answers(self, answers: List[AnswerMetadataDetail]): ), structure=AssetStructure( directories=[ThoughtSpotDashboardType.ANSWER.name], - name=answer.header.name, + name=detail.header.name, ), dashboard_info=DashboardInfo( - description=answer.header.description, - title=answer.header.name, + description=detail.header.description, + title=detail.header.name, charts=self._populate_charts( visualizations, self._base_url, answer_id ), thought_spot=ThoughtSpotInfo( type=ThoughtSpotDashboardType.ANSWER, + is_verified=answer.metadata_header.isVerified, ), dashboard_type=DashboardType.THOUGHT_SPOT_ANSWER, ), source_info=SourceInfo( main_url=f"{self._base_url}/#/saved-answer/{answer_id}", ), - system_tags=self._get_system_tags(answer.header.tags), + system_tags=self._get_system_tags(detail.header.tags), ) self._dashboards[answer_id] = dashboard - def populate_answers_lineage(self, answers: List[AnswerMetadataDetail]): - ids = [answer.header.id for answer in answers] + def populate_answers_lineage(self, answers: List[AnswerMetadata]): + ids = [answer.metadata_detail.header.id for answer in answers] for tml_result in ThoughtSpot.fetch_tml(self._client, ids): if not tml_result.edoc: continue @@ -620,14 +624,15 @@ def get_field_mappings_from_answer_sql( return field_mappings - def populate_liveboards(self, liveboards: List[LiveBoardMetadataDetail]): + def populate_liveboards(self, liveboards: List[LiveBoardMetadata]): for board in liveboards: - board_id = board.header.id + detail = board.metadata_detail + board_id = detail.header.id - resolvedObjects = board.header.resolvedObjects + resolvedObjects = detail.header.resolvedObjects answers = { viz.header.id: resolvedObjects[viz.vizContent.refVizId] - for sheet in board.reportContent.sheets + for sheet in detail.reportContent.sheets for viz in sheet.sheetContent.visualizations if viz.vizContent.refVizId } @@ -649,17 +654,18 @@ def populate_liveboards(self, liveboards: List[LiveBoardMetadataDetail]): ), structure=AssetStructure( directories=[ThoughtSpotDashboardType.LIVEBOARD.name], - name=board.header.name, + name=detail.header.name, ), dashboard_info=DashboardInfo( - description=board.header.description, - title=board.header.name, + description=detail.header.description, + title=detail.header.name, charts=self._populate_charts( visualizations, self._base_url, board_id ), thought_spot=ThoughtSpotInfo( type=ThoughtSpotDashboardType.LIVEBOARD, embed_url=f"{self._base_url}/#/embed/viz/{board_id}", + is_verified=board.metadata_header.isVerified, ), dashboard_type=DashboardType.THOUGHT_SPOT_LIVEBOARD, ), @@ -672,7 +678,7 @@ def populate_liveboards(self, liveboards: List[LiveBoardMetadataDetail]): visualizations ), ), - system_tags=self._get_system_tags(board.header.tags), + system_tags=self._get_system_tags(detail.header.tags), ) self._dashboards[board_id] = dashboard diff --git a/metaphor/thought_spot/models.py b/metaphor/thought_spot/models.py index 9c3d35af..4de5cf5f 100644 --- a/metaphor/thought_spot/models.py +++ b/metaphor/thought_spot/models.py @@ -25,6 +25,10 @@ def __repr__(self): return self.id +class MetadataHeader(BaseModel): + isVerified: bool + + class Reference(BaseModel): id: str name: str @@ -170,6 +174,10 @@ class LogicalTableMetadataDetail(Metadata): class LogicalTableMetadata(BaseModel): metadata_detail: LogicalTableMetadataDetail + metadata_header: MetadataHeader + + def __repr__(self): + return self.metadata_detail.header.id class AnswerMetadataDetail(Metadata): @@ -179,6 +187,10 @@ class AnswerMetadataDetail(Metadata): class AnswerMetadata(BaseModel): metadata_detail: AnswerMetadataDetail + metadata_header: MetadataHeader + + def __repr__(self): + return self.metadata_detail.header.id class LiveBoardMetadataDetail(Metadata): @@ -189,6 +201,10 @@ class LiveBoardMetadataDetail(Metadata): class LiveBoardMetadata(BaseModel): metadata_detail: LiveBoardMetadataDetail + metadata_header: MetadataHeader + + def __repr__(self): + return self.metadata_detail.header.id class TMLResult(BaseModel): diff --git a/metaphor/thought_spot/utils.py b/metaphor/thought_spot/utils.py index 01e5eba0..989520c5 100644 --- a/metaphor/thought_spot/utils.py +++ b/metaphor/thought_spot/utils.py @@ -14,14 +14,11 @@ from metaphor.thought_spot.config import ThoughtSpotRunConfig from metaphor.thought_spot.models import ( AnswerMetadata, - AnswerMetadataDetail, Connection, ConnectionDetail, ConnectionType, LiveBoardMetadata, - LiveBoardMetadataDetail, LogicalTableMetadata, - LogicalTableMetadataDetail, SourceType, TMLResult, ) @@ -143,8 +140,8 @@ def fetch_connections(client: TSRestApiV2) -> List[ConnectionDetail]: return connection_details @classmethod - def fetch_tables(cls, client: TSRestApiV2) -> List[LogicalTableMetadataDetail]: - table_details: List[LogicalTableMetadataDetail] = [] + def fetch_tables(cls, client: TSRestApiV2) -> List[LogicalTableMetadata]: + table_details: List[LogicalTableMetadata] = [] batch_count = 0 batch_size = 100 @@ -167,15 +164,15 @@ def fetch_tables(cls, client: TSRestApiV2) -> List[LogicalTableMetadataDetail]: for table in TypeAdapter(List[LogicalTableMetadata]).validate_python( response ): - table_details.append(table.metadata_detail) + table_details.append(table) logger.info(f"Extract #{len(table_details)} tables") return table_details @classmethod - def fetch_answers(cls, client: TSRestApiV2) -> List[AnswerMetadataDetail]: - answer_details: List[AnswerMetadataDetail] = [] + def fetch_answers(cls, client: TSRestApiV2) -> List[AnswerMetadata]: + answer_details: List[AnswerMetadata] = [] batch_count = 0 batch_size = 100 @@ -196,15 +193,15 @@ def fetch_answers(cls, client: TSRestApiV2) -> List[AnswerMetadataDetail]: batch_count += 1 for answer in TypeAdapter(List[AnswerMetadata]).validate_python(response): - answer_details.append(answer.metadata_detail) + answer_details.append(answer) logger.info(f"Extract #{len(answer_details)} liveboards") return answer_details @classmethod - def fetch_liveboards(cls, client: TSRestApiV2) -> List[LiveBoardMetadataDetail]: - liveboard_details: List[LiveBoardMetadataDetail] = [] + def fetch_liveboards(cls, client: TSRestApiV2) -> List[LiveBoardMetadata]: + liveboard_details: List[LiveBoardMetadata] = [] batch_count = 0 batch_size = 100 @@ -227,7 +224,7 @@ def fetch_liveboards(cls, client: TSRestApiV2) -> List[LiveBoardMetadataDetail]: for liveboard in TypeAdapter(List[LiveBoardMetadata]).validate_python( response ): - liveboard_details.append(liveboard.metadata_detail) + liveboard_details.append(liveboard) logger.info(f"Extract #{len(liveboard_details)} liveboards") diff --git a/poetry.lock b/poetry.lock index 282aed65..682bedcf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -3343,13 +3343,13 @@ files = [ [[package]] name = "metaphor-models" -version = "0.41.1" +version = "0.41.2" description = "" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "metaphor_models-0.41.1-py3-none-any.whl", hash = "sha256:d8ca097a7d4406a5acddbb64f102a73c955b221e293b2fc27fa17702cb54c377"}, - {file = "metaphor_models-0.41.1.tar.gz", hash = "sha256:8034155dca525b8348b7850cc7a6064dcca7903a9ea47c56308786be90eb4499"}, + {file = "metaphor_models-0.41.2-py3-none-any.whl", hash = "sha256:75e20dad91c6f8d6615746a719097efc50e9cd41c662fd1b9837d40790d19177"}, + {file = "metaphor_models-0.41.2.tar.gz", hash = "sha256:1aa9324007e9934ab6ff7539512c49cba4c6b800479cdde8875e395defc6a473"}, ] [[package]] @@ -7187,4 +7187,4 @@ unity-catalog = ["databricks-sdk", "databricks-sql-connector", "sqlglot"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "16fc7b1cb9646a415cf277908e5fdcd67688926d61490e8c93d79ce674e057d5" +content-hash = "894dfd145edf272f9bcacd4326abe0a49a6ca0e5d99a05e6215fb60bc73185cb" diff --git a/pyproject.toml b/pyproject.toml index 5f4eafe5..47c1736d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.141" +version = "0.14.142" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] @@ -43,7 +43,7 @@ llama-index-readers-confluence = { version = "^0.1.4", optional = true } llama-index-readers-notion = { version = "^0.1.6", optional = true } looker-sdk = { version = "^24.2.0", optional = true } lxml = { version = "~=5.0.0", optional = true } -metaphor-models = "0.41.1" +metaphor-models = "0.41.2" more-itertools = { version = "^10.1.0", optional = true } msal = { version = "^1.28.0", optional = true } msgraph-beta-sdk = { version = "~1.4.0", optional = true } diff --git a/tests/thought_spot/data/answers.json b/tests/thought_spot/data/answers.json index c4a88e07..763cea0c 100644 --- a/tests/thought_spot/data/answers.json +++ b/tests/thought_spot/data/answers.json @@ -45,6 +45,9 @@ }, "complete": true, "incompleteDetail": [] + }, + "metadata_header": { + "isVerified": false } } ] diff --git a/tests/thought_spot/data/data_objects.json b/tests/thought_spot/data/data_objects.json index 87daf0ac..d0188608 100644 --- a/tests/thought_spot/data/data_objects.json +++ b/tests/thought_spot/data/data_objects.json @@ -22,6 +22,9 @@ } ] } + }, + "metadata_header": { + "isVerified": false } }, { @@ -67,6 +70,9 @@ } ] } + }, + "metadata_header": { + "isVerified": false } }, { @@ -115,6 +121,9 @@ "description": "This is view1", "tags": [] } + }, + "metadata_header": { + "isVerified": false } }, { @@ -147,6 +156,9 @@ "tags": [], "type": "SQL_VIEW" } + }, + "metadata_header": { + "isVerified": false } }, { @@ -179,6 +191,9 @@ "tags": [], "type": "SQL_VIEW" } + }, + "metadata_header": { + "isVerified": false } } ] diff --git a/tests/thought_spot/data/liveboards.json b/tests/thought_spot/data/liveboards.json index 15b58fa4..949091cc 100644 --- a/tests/thought_spot/data/liveboards.json +++ b/tests/thought_spot/data/liveboards.json @@ -107,6 +107,9 @@ } ] } + }, + "metadata_header": { + "isVerified": false } } ] diff --git a/tests/thought_spot/expected.json b/tests/thought_spot/expected.json index a88f9137..e46d7c4a 100644 --- a/tests/thought_spot/expected.json +++ b/tests/thought_spot/expected.json @@ -27,7 +27,8 @@ "name": "Worksheet 1", "sourceVirtualViews": [], "type": "WORKSHEET", - "url": "http://base.url/#/data/tables/worksheet1" + "url": "http://base.url/#/data/tables/worksheet1", + "isVerified": false } }, { @@ -78,7 +79,8 @@ "DATASET~29779E6A8F6548832D2305896A583002" ], "type": "TABLE", - "url": "http://base.url/#/data/tables/table1" + "url": "http://base.url/#/data/tables/table1", + "isVerified": false } }, { @@ -139,7 +141,8 @@ "VIRTUAL_VIEW~F13FAE9D17C5631FD2E1025CE8BC7F5C" ], "type": "VIEW", - "url": "http://base.url/#/data/tables/view1" + "url": "http://base.url/#/data/tables/view1", + "isVerified": false } }, { @@ -186,7 +189,8 @@ "DATASET~9A61719497E3AC013ACBCA83F9F732B7" ], "type": "VIEW", - "url": "http://base.url/#/data/tables/sql_view_1" + "url": "http://base.url/#/data/tables/sql_view_1", + "isVerified": false } }, { @@ -233,7 +237,8 @@ "DATASET~844586420073B959F7B75FED699C23E9" ], "type": "VIEW", - "url": "http://base.url/#/data/tables/sql_view_2" + "url": "http://base.url/#/data/tables/sql_view_2", + "isVerified": false } }, { @@ -248,7 +253,8 @@ "dashboardType": "THOUGHT_SPOT_ANSWER", "description": "This is answer1", "thoughtSpot": { - "type": "ANSWER" + "type": "ANSWER", + "isVerified": false }, "title": "Answer 1" }, @@ -319,7 +325,8 @@ "description": "This is board1", "thoughtSpot": { "embedUrl": "http://base.url/#/embed/viz/board1", - "type": "LIVEBOARD" + "type": "LIVEBOARD", + "isVerified": false }, "title": "Board 1" },