Skip to content

Commit

Permalink
[ThoughtSpot] Use new connection endpoint (#598)
Browse files Browse the repository at this point in the history
* WIP

* [ThoughtSpot] Use connection/search endpoint

* Bump version

* Bump version

* Add invalid configurtaion
  • Loading branch information
elic-eon authored Sep 13, 2023
1 parent 03ea4e7 commit 7534fdd
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 96 deletions.
19 changes: 13 additions & 6 deletions metaphor/thought_spot/extractor.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from itertools import chain
from typing import Collection, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -44,7 +45,7 @@
from metaphor.thought_spot.config import ThoughtSpotRunConfig
from metaphor.thought_spot.models import (
AnswerMetadataDetail,
ConnectionMetadataDetail,
ConnectionDetail,
DataSourceTypeEnum,
Header,
LiveBoardMetadataDetail,
Expand Down Expand Up @@ -136,7 +137,7 @@ def populate_logical_column_mapping(

def populate_virtual_views(
self,
connections: Dict[str, ConnectionMetadataDetail],
connections: Dict[str, ConnectionDetail],
tables: Dict[str, LogicalTableMetadataDetail],
):
for table in tables.values():
Expand Down Expand Up @@ -258,7 +259,7 @@ def populate_formula(self):

def populate_lineage(
self,
connections: Dict[str, ConnectionMetadataDetail],
connections: Dict[str, ConnectionDetail],
tables: Dict[str, LogicalTableMetadataDetail],
):
"""
Expand Down Expand Up @@ -324,22 +325,28 @@ def populate_lineage(

@staticmethod
def get_source_entity_id_from_connection(
connections: Dict[str, ConnectionMetadataDetail],
connections: Dict[str, ConnectionDetail],
normalized_name: str,
source_id: str,
) -> str:
connection = connections[source_id]

try:
accountName = json.loads(connection.configuration).get("accountName")
except json.decoder.JSONDecodeError:
accountName = None

return str(
to_dataset_entity_id(
normalized_name,
mapping_data_platform(connection.type),
account=connection.dataSourceContent.configuration.accountName,
account=accountName,
)
)

@staticmethod
def find_entity_id_from_connection(
connections: Dict[str, ConnectionMetadataDetail],
connections: Dict[str, ConnectionDetail],
mapping: TableMappingInfo,
source_id: str,
) -> str:
Expand Down
18 changes: 7 additions & 11 deletions metaphor/thought_spot/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class Header(BaseModel):
description: Optional[str]
tags: List[Tag] = []

def __repr__(self):
return self.id


class Reference(BaseModel):
id: str
Expand Down Expand Up @@ -142,25 +145,18 @@ class LiveBoardHeader(Header):
resolvedObjects: Dict[str, ResolvedObject]


class ConnectionHeader(Header):
type: str
authorName: str = ""


class LogicalTable(Metadata):
columns: List[ColumnMetadata]
logicalTableContent: LogicalTableContent


class ConnectionMetadataDetail(Metadata):
header: Header
class ConnectionDetail(Header):
type: ConnectionType
dataSourceContent: DataSourceContent
logicalTableList: List[LogicalTable]
configuration: str


class ConnectionMetadata(BaseModel):
metadata_detail: ConnectionMetadataDetail
class Connection(BaseModel):
details: ConnectionDetail


class LogicalTableMetadataDetail(Metadata):
Expand Down
21 changes: 11 additions & 10 deletions metaphor/thought_spot/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
from metaphor.thought_spot.models import (
AnswerMetadata,
AnswerMetadataDetail,
ConnectionMetadata,
ConnectionMetadataDetail,
Connection,
ConnectionDetail,
ConnectionType,
LiveBoardMetadata,
LiveBoardMetadataDetail,
Expand Down Expand Up @@ -107,32 +107,33 @@ def create_client(config: ThoughtSpotRunConfig) -> TSRestApiV2:
return client

@staticmethod
def fetch_connections(client: TSRestApiV2) -> List[ConnectionMetadataDetail]:
def fetch_connections(client: TSRestApiV2) -> List[ConnectionDetail]:
supported_platform = set(ConnectionType)

connection_details: List[ConnectionMetadataDetail] = []
connection_details: List[ConnectionDetail] = []

batch_count = 0
batch_size = 100

while True:
search_response = client.metadata_search(
search_response = client.connection_search(
{
"metadata": [{"type": "CONNECTION"}],
"include_details": True,
"record_size": batch_size,
"record_offset": batch_count * batch_size,
}
)
json_dump_to_debug_file(search_response, "metadata_search__connection.json")
json_dump_to_debug_file(
search_response, f"connection_search__{batch_count}.json"
)

batch_count += 1

connections = parse_obj_as(List[ConnectionMetadata], search_response)
connections = parse_obj_as(List[Connection], search_response)

for connection in connections:
if connection.metadata_detail.type in supported_platform:
connection_details.append(connection.metadata_detail)
if connection.details.type in supported_platform:
connection_details.append(connection.details)

if len(connections) < batch_size:
break
Expand Down
19 changes: 9 additions & 10 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.12.44"
version = "0.12.45"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down Expand Up @@ -45,7 +45,7 @@ SQLAlchemy = { version = "^1.4.46", optional = true}
sql-metadata = { version = "^2.8.0", optional = true }
sqllineage = { version = "~=1.3.8", optional = true }
tableauserverclient = { version = "^0.25", optional = true }
thoughtspot_rest_api_v1 = { version = "1.4.1", optional = true }
thoughtspot_rest_api_v1 = { version = "1.5.3", optional = true }

[tool.poetry.extras]
all = [
Expand Down
73 changes: 20 additions & 53 deletions tests/thought_spot/data/connections.json
Original file line number Diff line number Diff line change
@@ -1,59 +1,26 @@
[
{
"metadata_detail": {
"header": {
"id": "conn1",
"name": "Connection 1",
"description": "connection 1 description",
"tags": []
},
"details": {
"name": "BigQuery",
"type": "RDBMS_GCP_BIGQUERY",
"logicalTableList": [
{
"columns": [
{
"header": {
"id": "table1.col1",
"name": "col1"
},
"dataType": "VARCHAR",
"type": "ATTRIBUTE",
"sources": []
}
],
"logicalTableContent": {
"joinType": "INNER",
"worksheetType": "VIEW",
"tableMappingInfo": {
"databaseName": "project",
"schemaName": "schema",
"tableName": "table",
"tableType": "TABLE"
}
},
"type": "ONE_TO_ONE_LOGICAL",
"dataSourceId": "conn1",
"dataSourceTypeEnum": "RDBMS_GCP_BIGQUERY",
"header": {
"id": "table1",
"name": "Table 1",
"description": "This is table1",
"tags": [
{
"name": "table",
"isDeleted": false,
"isHidden": false,
"isDeprecated": false
}
]
}
}
],
"dataSourceContent": {
"configuration": {
"project_id": "project"
}
}
"id": "conn1",
"configuration": "{\"oauth_pvt_key\":\"******\",\"project_id\":\"google-project\",\"password\":\"******\"}"
}
},
{
"details": {
"name": "Snowflake",
"type": "RDBMS_SNOWFLAKE",
"id": "conn2",
"configuration": "{\"password\":\"******\",\"role\":\"SNOWFLAKE_ROLE\",\"warehouse\":\"WH\",\"accountName\":\"account\",\"user\":\"user\"}"
}
},
{
"details": {
"name": "Snowflake",
"type": "RDBMS_SNOWFLAKE",
"id": "conn3",
"configuration": "..."
}
}
]
34 changes: 33 additions & 1 deletion tests/thought_spot/data/data_objects.json
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@
"worksheetType": "VIEW"
},
"type": "SQL_VIEW",
"dataSourceId": "conn1",
"dataSourceId": "conn2",
"dataSourceTypeEnum": "RDBMS_SNOWFLAKE",
"header": {
"id": "sql_view_1",
Expand All @@ -148,5 +148,37 @@
"type": "SQL_VIEW"
}
}
},
{
"metadata_detail": {
"columns": [
{
"header": {
"id": "sql_col2_id",
"name": "sql_col2"
},
"dataType": "VARCHAR",
"type": "ATTRIBUTE",
"columnMappingInfo": {
"columnName": "sql_col2"
},
"sources": []
}
],
"logicalTableContent": {
"sqlQuery": "select c as sql_col2 from project.schema.table",
"joinType": "INNER",
"worksheetType": "VIEW"
},
"type": "SQL_VIEW",
"dataSourceId": "conn3",
"dataSourceTypeEnum": "RDBMS_SNOWFLAKE",
"header": {
"id": "sql_view_2",
"name": "JOIN SQL view",
"tags": [],
"type": "SQL_VIEW"
}
}
}
]
Loading

0 comments on commit 7534fdd

Please sign in to comment.