Skip to content

Commit

Permalink
Fix dbt 1.6.6 manifest schema validation errors (#627)
Browse files Browse the repository at this point in the history
  • Loading branch information
mars-lan authored Oct 13, 2023
1 parent ba7f4ed commit 27ea1a7
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 4 deletions.
25 changes: 22 additions & 3 deletions metaphor/dbt/manifest_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,14 +213,16 @@ def __init__(
if self._account and platform == DataPlatform.SNOWFLAKE:
self._account = normalize_snowflake_account(self._account)

def sanitize_manifest(self, manifest_json: Dict, schema_version: str) -> Dict:
@staticmethod
def sanitize_manifest(manifest_json: Dict, schema_version: str) -> Dict:
manifest_json = deepcopy(manifest_json)

# It's possible for dbt to generate "docs block" in the manifest that doesn't
# conform to the JSON schema. Specifically, the "name" field can be None in
# some cases. Since the field is not actually used, it's safe to clear it out to
# avoid hitting any validation issues.
manifest_json["docs"] = {}
if manifest_json.get("docs") is not None:
manifest_json["docs"] = {}

# dbt can erroneously generate null for test's "depends_on"
# Filter these out for now
Expand All @@ -245,6 +247,23 @@ def sanitize_manifest(self, manifest_json: Dict, schema_version: str) -> Dict:
for measure in semantic_model.get("measures", []):
measure.pop("label", None)

# Temporarily strip off all the extra "fill_nulls_with" &
# "join_to_teimspine" fields in metrics' "type_params" until
# https://github.com/dbt-labs/dbt-core/issues/8851 is fixed
for _, metric in manifest_json.get("metrics", {}).items():
type_params = metric.get("type_params", {})

measure = type_params.get("measure", {})
if measure is not None:
measure.pop("fill_nulls_with", None)
measure.pop("join_to_timespine", None)

input_measures = type_params.get("input_measures", [])
if input_measures is not None:
for input_measure in input_measures:
input_measure.pop("fill_nulls_with", None)
input_measure.pop("join_to_timespine", None)

return manifest_json

def parse(self, manifest_json: Dict) -> None:
Expand All @@ -257,7 +276,7 @@ def parse(self, manifest_json: Dict) -> None:
)
logger.info(f"parsing manifest.json {schema_version} ...")

manifest_json = self.sanitize_manifest(manifest_json, schema_version)
manifest_json = ManifestParser.sanitize_manifest(manifest_json, schema_version)

dbt_manifest_class = dbt_version_manifest_class_map.get(schema_version)
if dbt_manifest_class is None:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.13.4"
version = "0.13.5"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down
125 changes: 125 additions & 0 deletions tests/dbt/test_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from metaphor.common.event_util import EventUtil
from metaphor.dbt.config import DbtRunConfig, MetaOwnership, MetaTag
from metaphor.dbt.extractor import DbtExtractor
from metaphor.dbt.manifest_parser import ManifestParser
from tests.test_utils import load_json


Expand Down Expand Up @@ -89,3 +90,127 @@ async def _test_project(
events = [EventUtil.trim_event(e) for e in await extractor.extract()]

assert events == load_json(expected)


def test_sanitize_manifest_empty_docs(test_root_dir):
manifest = {
"docs": {"foo": "bar"},
}

assert ManifestParser.sanitize_manifest(manifest, "v10") == {
"docs": {},
}


def test_sanitize_manifest_strip_null_tests_depends_on(test_root_dir):
manifest = {
"nodes": {
"test.example": {
"depends_on": {
"macros": [None, "macro1"],
"nodes": ["node1", None, "node2"],
}
}
}
}

assert ManifestParser.sanitize_manifest(manifest, "v10") == {
"nodes": {
"test.example": {
"depends_on": {
"macros": ["macro1"],
"nodes": ["node1", "node2"],
}
}
}
}


def test_sanitize_manifest_strip_semantic_models_labels(test_root_dir):
manifest = {
"semantic_models": {
"model1": {
"label": "foo",
"entities": [
{
"name": "entity1",
"label": "bar",
}
],
"dimensions": [
{
"name": "dim1",
"label": "baz",
}
],
"measures": [
{
"name": "measure1",
"label": "qux",
}
],
}
}
}

assert ManifestParser.sanitize_manifest(manifest, "v10") == {
"semantic_models": {
"model1": {
"entities": [
{
"name": "entity1",
}
],
"dimensions": [
{
"name": "dim1",
}
],
"measures": [
{
"name": "measure1",
}
],
}
}
}


def test_sanitize_manifest_strip_metric_type_params_extra_fields(test_root_dir):
manifest = {
"metrics": {
"metric1": {
"type_params": {
"measure": {
"name": "measure1",
"fill_nulls_with": "foo",
"join_to_timespine": "bar",
},
"input_measures": [
{
"name": "measure2",
"fill_nulls_with": "foo",
"join_to_timespine": "bar",
}
],
}
}
}
}

assert ManifestParser.sanitize_manifest(manifest, "v10") == {
"metrics": {
"metric1": {
"type_params": {
"measure": {
"name": "measure1",
},
"input_measures": [
{
"name": "measure2",
}
],
}
}
}
}

0 comments on commit 27ea1a7

Please sign in to comment.