From c1a0f5d562f6ec8e4c9542b61c41d62bc848ab2f Mon Sep 17 00:00:00 2001 From: Mars Lan Date: Tue, 3 Oct 2023 20:58:05 -0700 Subject: [PATCH] Fix dbt connector by removing undocumented fields (#615) * Fix dbt connector by removing undocumented fields * Add test for safe_del --- .pre-commit-config.yaml | 2 +- metaphor/dbt/gen_models.sh | 18 +++++++++++------- metaphor/dbt/manifest_parser.py | 14 ++++++++++++++ poetry.lock | 16 ++++++++-------- pyproject.toml | 6 +++--- 5 files changed, 37 insertions(+), 19 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 116fdf8e..27b02763 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,7 +27,7 @@ repos: exclude: ^(metaphor/dbt/generated/.+)$ - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.3.0 + rev: v1.5.1 hooks: - id: mypy additional_dependencies: diff --git a/metaphor/dbt/gen_models.sh b/metaphor/dbt/gen_models.sh index 8678449c..15718a94 100755 --- a/metaphor/dbt/gen_models.sh +++ b/metaphor/dbt/gen_models.sh @@ -1,6 +1,9 @@ #!/bin/bash set -e +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)" +cd ${SCRIPT_DIR} + # Generate various data models for dbt manifest & catalog using official JSON schemas if [ $# -ne 2 ]; then @@ -11,14 +14,14 @@ fi SCHEMA="$1" VERSION="$2" -URL=https://schemas.getdbt.com/dbt/$SCHEMA/$VERSION.json +URL=https://schemas.getdbt.com/dbt/${SCHEMA}/${VERSION}.json OUTPUT=generated/dbt_${SCHEMA}_${VERSION}.py CLASS_NAME="" -if [[ "$SCHEMA" == "manifest" ]]; then +if [[ "${SCHEMA}" == "manifest" ]]; then CLASS_NAME="DbtManifest" -elif [[ "$SCHEMA" == "catalog" ]]; then +elif [[ "${SCHEMA}" == "catalog" ]]; then CLASS_NAME="DbtCatalog" else echo -e "Choose either 'manifest' or 'catalog'" @@ -26,10 +29,11 @@ else fi poetry run datamodel-codegen \ - --url $URL \ - --class-name $CLASS_NAME \ + --url ${URL} \ + --class-name ${CLASS_NAME} \ --enum-field-as-literal all \ - --output $OUTPUT + --input-file-type jsonschema \ + --output ${OUTPUT} # Disable mypy type-checking for generated files -sed -i '' '1s;^;# mypy: ignore-errors\n\n;' $OUTPUT +sed -i '' '1s;^;# mypy: ignore-errors\n\n;' ${OUTPUT} diff --git a/metaphor/dbt/manifest_parser.py b/metaphor/dbt/manifest_parser.py index 2579aa18..fea7c7ce 100644 --- a/metaphor/dbt/manifest_parser.py +++ b/metaphor/dbt/manifest_parser.py @@ -244,6 +244,20 @@ def sanitize_manifest(self, manifest_json: Dict, schema_version: str) -> Dict: # avoid hitting any validation issues. manifest_json["docs"] = {} + # Temporarily strip off all the extra "labels" in "semantic_models" until + # https://github.com/dbt-labs/dbt-core/issues/8763 is fixed + for _, semantic_model in manifest_json.get("semantic_models", {}).items(): + semantic_model.pop("label", None) + + for entity in semantic_model.get("entities", []): + entity.pop("label", None) + + for dimension in semantic_model.get("dimensions", []): + dimension.pop("label", None) + + for measure in semantic_model.get("measures", []): + measure.pop("label", None) + return manifest_json def parse(self, manifest_json: Dict) -> None: diff --git a/poetry.lock b/poetry.lock index c859ac70..b9495bc3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -1329,13 +1329,13 @@ dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest ( [[package]] name = "datamodel-code-generator" -version = "0.21.4" +version = "0.22.0" description = "Datamodel Code Generator" optional = false python-versions = ">=3.7,<4.0" files = [ - {file = "datamodel_code_generator-0.21.4-py3-none-any.whl", hash = "sha256:aef3bcaa68be085a98ec2050f9299d1f85eceee72dcac13fce11a788b98daa9f"}, - {file = "datamodel_code_generator-0.21.4.tar.gz", hash = "sha256:79982f67413e41e7910a6e52dbcc87d0c17d7fc8a2dcf2bf93dd550cfcab6c25"}, + {file = "datamodel_code_generator-0.22.0-py3-none-any.whl", hash = "sha256:5cf8fc4fb6fe7aa750595a558cd4fcd43e36e862f40b0fa4cc123b4548b16a1e"}, + {file = "datamodel_code_generator-0.22.0.tar.gz", hash = "sha256:73ebcefa498e39d0f210923856cb4a498bacc3b7bdea140cca7324e25f5c581b"}, ] [package.dependencies] @@ -1845,12 +1845,12 @@ files = [ google-auth = ">=2.14.1,<3.0dev" googleapis-common-protos = ">=1.56.2,<2.0dev" grpcio = [ + {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""}, {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, - {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ + {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""}, {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, - {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" requests = ">=2.18.0,<3.0.0dev" @@ -4793,7 +4793,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\")"} [package.extras] aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] @@ -5405,4 +5405,4 @@ unity-catalog = ["databricks-cli"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "44d69fcaa536bdda15a6c8731222d6c44e6b533bd9c2e06bbba7eca4d311313a" +content-hash = "df35ab575ec28f10f7c8f6c9aaa3761cb897f6cc877a6fcb7fc8987f2ba5df9a" diff --git a/pyproject.toml b/pyproject.toml index be465ed2..430efcef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.12.58" +version = "0.12.59" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] @@ -92,11 +92,11 @@ apache-airflow = "^2.6.3" bandit = "^1.7.2" black = "^23.3.0" coverage = "^7.1.0" -datamodel-code-generator = { extras = ["http"], version = "^0.21.0" } +datamodel-code-generator = { extras = ["http"], version = "^0.22.0" } flake8 = "^6.0.0" freezegun = "^1.2.2" isort = "^5.11.4" -mypy = "^1.3" +mypy = "^1.5.1" pytest = "^7.2.1" pytest-asyncio = "^0.21.0" pytest-cov = "^4.0.0"