Skip to content

Commit

Permalink
[sc-25976] Add database_defaults to Metabase crawler config (#846)
Browse files Browse the repository at this point in the history
  • Loading branch information
usefulalgorithm authored Apr 25, 2024
1 parent 06264e8 commit 7b0bd80
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 5 deletions.
10 changes: 10 additions & 0 deletions metaphor/metabase/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ password: <password>
### Optional Configurations
#### Database Defaults
Metabase's API does not provide information on the default schema used to execute [native queries](https://www.metabase.com/glossary/native_query). This makes it difficult to parse the lineage precisely. When this happens, use `database_defaults` to manually set the [database](https://www.metabase.com/docs/latest/databases/start)'s default schema:

```yaml
database_defaults:
- id: <id of the database in Metabase>
default_schema: <default schema for the database>
```

#### Output Destination

See [Output Config](../common/docs/output.md) for more information.
Expand Down
11 changes: 11 additions & 0 deletions metaphor/metabase/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
from dataclasses import field
from typing import List, Optional

from pydantic.dataclasses import dataclass

from metaphor.common.base_config import BaseConfig
from metaphor.common.dataclass import ConnectorConfig


@dataclass
class MetabaseDatabaseDefaults:
id: int
default_schema: Optional[str] = None


@dataclass(config=ConnectorConfig)
class MetabaseRunConfig(BaseConfig):
server_url: str
username: str
password: str

database_defaults: List[MetabaseDatabaseDefaults] = field(default_factory=list)
22 changes: 19 additions & 3 deletions metaphor/metabase/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def __init__(self, config: MetabaseRunConfig):
self._username = config.username
self._password = config.password
self._session = requests.session()
self._database_defaults = config.database_defaults

self._databases: Dict[int, DatabaseInfo] = {}
self._dashboards: Dict[int, Dashboard] = {}
Expand Down Expand Up @@ -210,21 +211,36 @@ def _parse_database(self, database: Dict) -> None:
database_id = database["id"]
platform = self._db_engine_mapping.get(database["engine"])
details = database.get("details")
default_schema = next(
(
dd.default_schema
for dd in self._database_defaults
if dd.id == database["id"]
),
None,
)

if details is None:
# not able to get connection details, possibly due to lack of Admin permission
return

if platform == DataPlatform.SNOWFLAKE:
self._databases[database_id] = DatabaseInfo(
platform, details.get("db"), None, details.get("account")
platform,
details.get("db"),
default_schema,
details.get("account"),
)
elif platform == DataPlatform.BIGQUERY:
self._databases[database_id] = DatabaseInfo(
platform, details.get("project-id"), details.get("dataset-id"), None
platform,
details.get("project-id"),
default_schema or details.get("dataset-id"),
None,
)
elif platform == DataPlatform.REDSHIFT:
self._databases[database_id] = DatabaseInfo(
platform, details.get("db"), None, None
platform, details.get("db"), default_schema, None
)
# platform not in _db_engine_mapping are not supported

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.13.181"
version = "0.13.182"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down
5 changes: 5 additions & 0 deletions tests/metabase/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@
server_url: https://metaphor.metabaseapp.com
username: foo
password: bar
database_defaults:
- id: 1
default_schema: SCH
- id: 2
default_schema: SCH2
output: {}
12 changes: 11 additions & 1 deletion tests/metabase/test_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from metaphor.common.base_config import OutputConfig
from metaphor.metabase.config import MetabaseRunConfig
from metaphor.metabase.config import MetabaseDatabaseDefaults, MetabaseRunConfig


def test_yaml_config(test_root_dir):
Expand All @@ -9,5 +9,15 @@ def test_yaml_config(test_root_dir):
server_url="https://metaphor.metabaseapp.com",
username="foo",
password="bar",
database_defaults=[
MetabaseDatabaseDefaults(
id=1,
default_schema="SCH",
),
MetabaseDatabaseDefaults(
id=2,
default_schema="SCH2",
),
],
output=OutputConfig(),
)
34 changes: 34 additions & 0 deletions tests/metabase/test_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,37 @@ async def test_extractor(
events = [EventUtil.trim_event(e) for e in await extractor.extract()]

assert events == load_json(f"{test_root_dir}/metabase/expected.json")


def test_parse_database(test_root_dir: str):
config = MetabaseRunConfig.from_yaml_file(f"{test_root_dir}/metabase/config.yml")
extractor = MetabaseExtractor(config)
bq_database = {
"details": {
"project-id": "bq_db",
},
"id": 1,
"engine": "bigquery-cloud-sdk",
}

redshift_database = {
"details": {
"db": "redshift_db",
},
"id": 2,
"engine": "redshift",
}

snowflake_database = {
"details": {"db": "snowflake_db", "account": "[email protected]"},
"id": 3,
"engine": "snowflake",
}

for database in [bq_database, redshift_database, snowflake_database]:
extractor._parse_database(database)

assert len(extractor._databases) == 3
assert extractor._databases[1].schema == "SCH"
assert extractor._databases[2].schema == "SCH2"
assert not extractor._databases[3].schema

0 comments on commit 7b0bd80

Please sign in to comment.