Skip to content

Commit

Permalink
Extract ThoughtSpot verified label (#1027)
Browse files Browse the repository at this point in the history
* Extract thoughtspot verified label

* Bump version
  • Loading branch information
elic-eon authored Nov 1, 2024
1 parent 2379c1f commit b8b0c02
Show file tree
Hide file tree
Showing 9 changed files with 115 additions and 68 deletions.
90 changes: 48 additions & 42 deletions metaphor/thought_spot/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@
)
from metaphor.thought_spot.config import ThoughtSpotRunConfig
from metaphor.thought_spot.models import (
AnswerMetadataDetail,
AnswerMetadata,
ConnectionDetail,
DataSourceTypeEnum,
Header,
LiveBoardMetadataDetail,
LogicalTableMetadataDetail,
LiveBoardMetadata,
LogicalTableMetadata,
TableMappingInfo,
Tag,
TMLObject,
Expand Down Expand Up @@ -119,11 +119,11 @@ def fetch_virtual_views(self):

data_objects = ThoughtSpot.fetch_tables(self._client)

def is_source_valid(table: LogicalTableMetadataDetail):
def is_source_valid(table: LogicalTableMetadata):
"""
Table should source from a connection
"""
return table.dataSourceId in connections
return table.metadata_detail.dataSourceId in connections

tables = filter(is_source_valid, data_objects)

Expand All @@ -136,15 +136,13 @@ def is_source_valid(table: LogicalTableMetadataDetail):
self.populate_lineage(connections, tables)
self.populate_formula()

def populate_logical_column_mapping(
self, tables: Dict[str, LogicalTableMetadataDetail]
):
def populate_logical_column_mapping(self, tables: Dict[str, LogicalTableMetadata]):
for table in tables.values():
table_id = table.header.id
table_id = table.metadata_detail.header.id
view_id = VirtualViewLogicalID(
name=table_id, type=VirtualViewType.THOUGHT_SPOT_DATA_OBJECT
)
for column in table.columns:
for column in table.metadata_detail.columns:
self._column_references[column.header.id] = ColumnReference(
entity_id=str(EntityId(EntityType.VIRTUAL_VIEW, view_id)),
field=column.header.name,
Expand All @@ -153,20 +151,23 @@ def populate_logical_column_mapping(
def populate_virtual_views(
self,
connections: Dict[str, ConnectionDetail],
tables: Dict[str, LogicalTableMetadataDetail],
tables: Dict[str, LogicalTableMetadata],
):
for table in tables.values():
table_id = table.header.id
table_type = mapping_data_object_type(table.type)
table_detail = table.metadata_detail
table_id = table_detail.header.id
table_type = mapping_data_object_type(table_detail.type)

field_mappings = []
for column in table.columns:
for column in table_detail.columns:
field_mapping = FieldMapping(destination=column.header.name, sources=[])

assert field_mapping.sources is not None
if table.dataSourceTypeEnum != DataSourceTypeEnum.DEFAULT:
if table_detail.dataSourceTypeEnum != DataSourceTypeEnum.DEFAULT:
# the table upstream is external source, i.e. BigQuery
table_mapping_info = table.logicalTableContent.tableMappingInfo
table_mapping_info = (
table_detail.logicalTableContent.tableMappingInfo
)
if table_mapping_info is None:
logger.warning(
f"tableMappingInfo is missing, skip for column: {column.header.name}"
Expand All @@ -176,7 +177,7 @@ def populate_virtual_views(
source_entity_id = self.find_entity_id_from_connection(
connections,
table_mapping_info,
table.dataSourceId,
table_detail.dataSourceId,
)
field_mapping.sources.append(
SourceField(
Expand Down Expand Up @@ -207,7 +208,7 @@ def populate_virtual_views(
),
structure=AssetStructure(
directories=[table_type.name],
name=table.header.name,
name=table_detail.header.name,
),
thought_spot=ThoughtSpotDataObject(
columns=[
Expand All @@ -217,17 +218,18 @@ def populate_virtual_views(
type=column.dataType if column.dataType else column.type,
optional_type=column.optionalType,
)
for column in table.columns
for column in table_detail.columns
],
name=table.header.name,
description=table.header.description,
name=table_detail.header.name,
description=table_detail.header.description,
type=table_type,
url=f"{self._base_url}/#/data/tables/{table_id}",
is_verified=table.metadata_header.isVerified,
),
entity_upstream=EntityUpstream(
field_mappings=field_mappings if field_mappings else None
),
system_tags=self._get_system_tags(table.header.tags),
system_tags=self._get_system_tags(table_detail.header.tags),
)
self._virtual_views[table_id] = view

Expand Down Expand Up @@ -296,13 +298,13 @@ def populate_formula(self):
def populate_lineage(
self,
connections: Dict[str, ConnectionDetail],
tables: Dict[str, LogicalTableMetadataDetail],
tables: Dict[str, LogicalTableMetadata],
):
"""
Populate lineage between tables/worksheets/views
"""
for view in self._virtual_views.values():
table = tables[view.logical_id.name]
table = tables[view.logical_id.name].metadata_detail

if table.dataSourceTypeEnum != DataSourceTypeEnum.DEFAULT:
# SQL_VIEW case
Expand Down Expand Up @@ -501,14 +503,15 @@ def fetch_dashboards(self):
liveboards = ThoughtSpot.fetch_liveboards(self._client)
self.populate_liveboards(liveboards)

def populate_answers(self, answers: List[AnswerMetadataDetail]):
def populate_answers(self, answers: List[AnswerMetadata]):
for answer in answers:
answer_id = answer.header.id
detail = answer.metadata_detail
answer_id = detail.header.id

visualizations = [
# Use answer.header instead as viz.header contain only dummy values
(viz, answer.header, "")
for sheet in answer.reportContent.sheets
(viz, detail.header, "")
for sheet in detail.reportContent.sheets
for viz in sheet.sheetContent.visualizations
if viz.vizContent.vizType == "CHART"
]
Expand All @@ -520,29 +523,30 @@ def populate_answers(self, answers: List[AnswerMetadataDetail]):
),
structure=AssetStructure(
directories=[ThoughtSpotDashboardType.ANSWER.name],
name=answer.header.name,
name=detail.header.name,
),
dashboard_info=DashboardInfo(
description=answer.header.description,
title=answer.header.name,
description=detail.header.description,
title=detail.header.name,
charts=self._populate_charts(
visualizations, self._base_url, answer_id
),
thought_spot=ThoughtSpotInfo(
type=ThoughtSpotDashboardType.ANSWER,
is_verified=answer.metadata_header.isVerified,
),
dashboard_type=DashboardType.THOUGHT_SPOT_ANSWER,
),
source_info=SourceInfo(
main_url=f"{self._base_url}/#/saved-answer/{answer_id}",
),
system_tags=self._get_system_tags(answer.header.tags),
system_tags=self._get_system_tags(detail.header.tags),
)

self._dashboards[answer_id] = dashboard

def populate_answers_lineage(self, answers: List[AnswerMetadataDetail]):
ids = [answer.header.id for answer in answers]
def populate_answers_lineage(self, answers: List[AnswerMetadata]):
ids = [answer.metadata_detail.header.id for answer in answers]
for tml_result in ThoughtSpot.fetch_tml(self._client, ids):
if not tml_result.edoc:
continue
Expand Down Expand Up @@ -620,14 +624,15 @@ def get_field_mappings_from_answer_sql(

return field_mappings

def populate_liveboards(self, liveboards: List[LiveBoardMetadataDetail]):
def populate_liveboards(self, liveboards: List[LiveBoardMetadata]):
for board in liveboards:
board_id = board.header.id
detail = board.metadata_detail
board_id = detail.header.id

resolvedObjects = board.header.resolvedObjects
resolvedObjects = detail.header.resolvedObjects
answers = {
viz.header.id: resolvedObjects[viz.vizContent.refVizId]
for sheet in board.reportContent.sheets
for sheet in detail.reportContent.sheets
for viz in sheet.sheetContent.visualizations
if viz.vizContent.refVizId
}
Expand All @@ -649,17 +654,18 @@ def populate_liveboards(self, liveboards: List[LiveBoardMetadataDetail]):
),
structure=AssetStructure(
directories=[ThoughtSpotDashboardType.LIVEBOARD.name],
name=board.header.name,
name=detail.header.name,
),
dashboard_info=DashboardInfo(
description=board.header.description,
title=board.header.name,
description=detail.header.description,
title=detail.header.name,
charts=self._populate_charts(
visualizations, self._base_url, board_id
),
thought_spot=ThoughtSpotInfo(
type=ThoughtSpotDashboardType.LIVEBOARD,
embed_url=f"{self._base_url}/#/embed/viz/{board_id}",
is_verified=board.metadata_header.isVerified,
),
dashboard_type=DashboardType.THOUGHT_SPOT_LIVEBOARD,
),
Expand All @@ -672,7 +678,7 @@ def populate_liveboards(self, liveboards: List[LiveBoardMetadataDetail]):
visualizations
),
),
system_tags=self._get_system_tags(board.header.tags),
system_tags=self._get_system_tags(detail.header.tags),
)

self._dashboards[board_id] = dashboard
Expand Down
16 changes: 16 additions & 0 deletions metaphor/thought_spot/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ def __repr__(self):
return self.id


class MetadataHeader(BaseModel):
isVerified: bool


class Reference(BaseModel):
id: str
name: str
Expand Down Expand Up @@ -170,6 +174,10 @@ class LogicalTableMetadataDetail(Metadata):

class LogicalTableMetadata(BaseModel):
metadata_detail: LogicalTableMetadataDetail
metadata_header: MetadataHeader

def __repr__(self):
return self.metadata_detail.header.id


class AnswerMetadataDetail(Metadata):
Expand All @@ -179,6 +187,10 @@ class AnswerMetadataDetail(Metadata):

class AnswerMetadata(BaseModel):
metadata_detail: AnswerMetadataDetail
metadata_header: MetadataHeader

def __repr__(self):
return self.metadata_detail.header.id


class LiveBoardMetadataDetail(Metadata):
Expand All @@ -189,6 +201,10 @@ class LiveBoardMetadataDetail(Metadata):

class LiveBoardMetadata(BaseModel):
metadata_detail: LiveBoardMetadataDetail
metadata_header: MetadataHeader

def __repr__(self):
return self.metadata_detail.header.id


class TMLResult(BaseModel):
Expand Down
21 changes: 9 additions & 12 deletions metaphor/thought_spot/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,11 @@
from metaphor.thought_spot.config import ThoughtSpotRunConfig
from metaphor.thought_spot.models import (
AnswerMetadata,
AnswerMetadataDetail,
Connection,
ConnectionDetail,
ConnectionType,
LiveBoardMetadata,
LiveBoardMetadataDetail,
LogicalTableMetadata,
LogicalTableMetadataDetail,
SourceType,
TMLResult,
)
Expand Down Expand Up @@ -143,8 +140,8 @@ def fetch_connections(client: TSRestApiV2) -> List[ConnectionDetail]:
return connection_details

@classmethod
def fetch_tables(cls, client: TSRestApiV2) -> List[LogicalTableMetadataDetail]:
table_details: List[LogicalTableMetadataDetail] = []
def fetch_tables(cls, client: TSRestApiV2) -> List[LogicalTableMetadata]:
table_details: List[LogicalTableMetadata] = []

batch_count = 0
batch_size = 100
Expand All @@ -167,15 +164,15 @@ def fetch_tables(cls, client: TSRestApiV2) -> List[LogicalTableMetadataDetail]:
for table in TypeAdapter(List[LogicalTableMetadata]).validate_python(
response
):
table_details.append(table.metadata_detail)
table_details.append(table)

logger.info(f"Extract #{len(table_details)} tables")

return table_details

@classmethod
def fetch_answers(cls, client: TSRestApiV2) -> List[AnswerMetadataDetail]:
answer_details: List[AnswerMetadataDetail] = []
def fetch_answers(cls, client: TSRestApiV2) -> List[AnswerMetadata]:
answer_details: List[AnswerMetadata] = []

batch_count = 0
batch_size = 100
Expand All @@ -196,15 +193,15 @@ def fetch_answers(cls, client: TSRestApiV2) -> List[AnswerMetadataDetail]:
batch_count += 1

for answer in TypeAdapter(List[AnswerMetadata]).validate_python(response):
answer_details.append(answer.metadata_detail)
answer_details.append(answer)

logger.info(f"Extract #{len(answer_details)} liveboards")

return answer_details

@classmethod
def fetch_liveboards(cls, client: TSRestApiV2) -> List[LiveBoardMetadataDetail]:
liveboard_details: List[LiveBoardMetadataDetail] = []
def fetch_liveboards(cls, client: TSRestApiV2) -> List[LiveBoardMetadata]:
liveboard_details: List[LiveBoardMetadata] = []

batch_count = 0
batch_size = 100
Expand All @@ -227,7 +224,7 @@ def fetch_liveboards(cls, client: TSRestApiV2) -> List[LiveBoardMetadataDetail]:
for liveboard in TypeAdapter(List[LiveBoardMetadata]).validate_python(
response
):
liveboard_details.append(liveboard.metadata_detail)
liveboard_details.append(liveboard)

logger.info(f"Extract #{len(liveboard_details)} liveboards")

Expand Down
10 changes: 5 additions & 5 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit b8b0c02

Please sign in to comment.