Skip to content

Commit

Permalink
Drop crawler-side SQL parsing from Metabase connector (#840)
Browse files Browse the repository at this point in the history
* Drop crawler-side SQL parsing from Metabase crawler

* Drop sqlmetadata dep from metabase extra
  • Loading branch information
mars-lan authored Apr 22, 2024
1 parent 771de4b commit 83a1845
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 57 deletions.
71 changes: 16 additions & 55 deletions metaphor/metabase/extractor.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import json
from dataclasses import dataclass
from itertools import chain
from typing import Collection, Dict, List, Optional, Set, Tuple, Union
from typing import Collection, Dict, List, Optional, Union

import requests
from sql_metadata import Parser

from metaphor.common.base_extractor import BaseExtractor
from metaphor.common.entity_id import dataset_normalized_name, to_dataset_entity_id
Expand Down Expand Up @@ -305,10 +304,7 @@ def _parse_chart(self, card: Dict) -> Optional[ChartInfo]:
upstream_tables.add(dataset_id)

elif query_type == "native":
chart_query, dataset_ids = self._parse_native_query(dataset_query)
if chart_query is not None:
chart.query = chart_query
upstream_tables.update(dataset_ids)
chart.query = self._parse_native_query(dataset_query)

else:
logger.error(f"Unsupported query type {query_type}")
Expand Down Expand Up @@ -348,53 +344,18 @@ def _get_table_by_id(self, table_id: int) -> Optional[str]:
self._tables[table_id] = dataset_id
return dataset_id

def _parse_native_query(
self, dataset_query: Dict
) -> Tuple[Optional[ChartQuery], Set[str]]:
try:
native_query = dataset_query["native"]["query"]
tables = Parser(native_query).tables

database_id = dataset_query.get("database", 0)
database = self._databases.get(database_id)
if database is None:
raise ValueError(f"database {database_id} not found")

chart_query = ChartQuery(
query=native_query,
platform=database.platform,
account=database.account,
default_database=database.database,
default_schema=database.schema,
)
except Exception as e:
logger.error(f"Failed to get native query: {e}")
return None, set()

try:
dataset_ids = set()
for table in tables:
segments = table.count(".") + 1
if segments == 3:
dataset_name = table
elif segments == 2:
dataset_name = f"{database.database}.{table}"
elif segments == 1:
dataset_name = f"{database.database}.{database.schema}.{table}"
else:
raise ValueError(f"invalid table name {table}")

dataset_ids.add(
str(
to_dataset_entity_id(
dataset_name.replace("`", "").lower(),
database.platform,
database.account,
)
)
)
def _parse_native_query(self, dataset_query: Dict) -> ChartQuery:
native_query = dataset_query["native"]["query"]

return chart_query, dataset_ids
except Exception as e:
logger.error(f"SQL parsing error: {e}, query: {native_query}")
return chart_query, set()
database_id = dataset_query.get("database", 0)
database = self._databases.get(database_id)
if database is None:
raise ValueError(f"database {database_id} not found")

return ChartQuery(
query=native_query,
platform=database.platform,
account=database.account,
default_database=database.database,
default_schema=database.schema,
)
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.13.175"
version = "0.13.177"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down Expand Up @@ -121,7 +121,7 @@ dbt = []
hive = ["pyhive", "sasl", "thrift", "thrift-sasl"]
kafka = ["confluent-kafka", "avro", "grpcio-tools"]
looker = ["GitPython", "lkml", "looker-sdk", "sql-metadata"]
metabase = ["sql-metadata"]
metabase = []
monday = ["llama-index", "llama-index-embeddings-azure-openai"]
monte_carlo = ["pycarlo"]
mssql = ["pymssql"]
Expand Down

0 comments on commit 83a1845

Please sign in to comment.