From f1bb4f6f6ab9f6499f2049e560eaf8c417eb46b7 Mon Sep 17 00:00:00 2001 From: Mars Lan Date: Mon, 5 Aug 2024 08:35:46 -0700 Subject: [PATCH] Normalize dbt owner email address --- metaphor/common/entity_id.py | 2 +- poetry.lock | 45 +++++++++++++++++++++++---- pyproject.toml | 7 +++-- tests/dbt/data/trial_v4/expected.json | 2 +- tests/dbt/data/trial_v4/manifest.json | 2 +- 5 files changed, 47 insertions(+), 11 deletions(-) diff --git a/metaphor/common/entity_id.py b/metaphor/common/entity_id.py index 864aadc9..a8777ebc 100644 --- a/metaphor/common/entity_id.py +++ b/metaphor/common/entity_id.py @@ -82,7 +82,7 @@ def to_person_entity_id(email: str) -> EntityId: """ return EntityId( EntityType.PERSON, - PersonLogicalID(email=email), + PersonLogicalID(email=email.lower()), ) diff --git a/poetry.lock b/poetry.lock index 0e48808c..05a2655a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiohttp" @@ -3764,6 +3764,20 @@ files = [ [package.dependencies] cryptography = ">=3.2.1" +[[package]] +name = "outcome" +version = "1.3.0.post0" +description = "Capture the outcome of Python function calls." +optional = false +python-versions = ">=3.7" +files = [ + {file = "outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b"}, + {file = "outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8"}, +] + +[package.dependencies] +attrs = ">=19.2.0" + [[package]] name = "oyaml" version = "1.0" @@ -4934,7 +4948,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -5485,7 +5498,7 @@ secure-local-storage = ["keyring (>=23.1.0,<25.0.0)"] name = "sortedcontainers" version = "2.4.0" description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" -optional = true +optional = false python-versions = "*" files = [ {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, @@ -6072,6 +6085,26 @@ kerberos = ["requests-kerberos"] sqlalchemy = ["sqlalchemy (>=1.3)"] tests = ["black", "httpretty (<1.1)", "isort", "pre-commit", "pytest", "pytest-runner", "requests-kerberos", "sqlalchemy (>=1.3)"] +[[package]] +name = "trio" +version = "0.26.1" +description = "A friendly Python library for async concurrency and I/O" +optional = false +python-versions = ">=3.8" +files = [ + {file = "trio-0.26.1-py3-none-any.whl", hash = "sha256:998bbdc5797621e1976c86820b1bc341cc66b51d2618a31cc8720ddd7df8affe"}, + {file = "trio-0.26.1.tar.gz", hash = "sha256:6d2fe7ee656146d598ec75128ff4a2386576801b42b691f4a91cc2c18508544a"}, +] + +[package.dependencies] +attrs = ">=23.2.0" +cffi = {version = ">=1.14", markers = "os_name == \"nt\" and implementation_name != \"pypy\""} +exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} +idna = "*" +outcome = "*" +sniffio = ">=1.3.0" +sortedcontainers = "*" + [[package]] name = "types-attrs" version = "19.1.0" @@ -6469,12 +6502,12 @@ doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linke test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] [extras] -all = ["GitPython", "SQLAlchemy", "asyncpg", "avro", "azure-identity", "azure-mgmt-datafactory", "beautifulsoup4", "confluent-kafka", "databricks-sdk", "databricks-sql-connector", "fastavro", "google-cloud-bigquery", "google-cloud-logging", "gql", "grpcio-tools", "lkml", "llama-index", "llama-index-embeddings-azure-openai", "llama-index-readers-confluence", "llama-index-readers-notion", "looker-sdk", "lxml", "more-itertools", "msal", "msgraph-beta-sdk", "oracledb", "parse", "pycarlo", "pyhive", "pymssql", "pymysql", "sasl", "snowflake-connector-python", "sql-metadata", "sqlglot", "sqllineage", "tableauserverclient", "thoughtspot_rest_api_v1", "thrift", "thrift-sasl", "trino"] +all = ["GitPython", "SQLAlchemy", "asyncpg", "avro", "azure-identity", "azure-mgmt-datafactory", "beautifulsoup4", "confluent-kafka", "databricks-sdk", "databricks-sql-connector", "fastavro", "google-cloud-bigquery", "google-cloud-logging", "gql", "grpcio-tools", "httpx", "lkml", "llama-index", "llama-index-embeddings-azure-openai", "llama-index-readers-confluence", "llama-index-readers-notion", "looker-sdk", "lxml", "more-itertools", "msal", "msgraph-beta-sdk", "oracledb", "parse", "pycarlo", "pyhive", "pymssql", "pymysql", "sasl", "snowflake-connector-python", "sql-metadata", "sqlglot", "sqllineage", "tableauserverclient", "thoughtspot_rest_api_v1", "thrift", "thrift-sasl", "trino"] bigquery = ["google-cloud-bigquery", "google-cloud-logging", "sql-metadata"] confluence = ["llama-index", "llama-index-embeddings-azure-openai", "llama-index-readers-confluence"] datafactory = ["azure-identity", "azure-mgmt-datafactory"] datahub = ["gql"] -dbt = [] +dbt = ["httpx"] hive = ["pyhive", "sasl", "thrift", "thrift-sasl"] kafka = ["avro", "confluent-kafka", "grpcio-tools"] looker = ["GitPython", "lkml", "looker-sdk"] @@ -6501,4 +6534,4 @@ unity-catalog = ["databricks-sdk", "databricks-sql-connector", "sqlglot"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.12" -content-hash = "27b50f23d4cbf3a1985a0442fbc1a1df51dca090aaae4d6b46f4424acdf62e1c" +content-hash = "08d77fb141acbcb7cadebc9146eccd3cb445444520d989f2d019b1e5a4e005a9" diff --git a/pyproject.toml b/pyproject.toml index ccebc528..bf3656cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.66" +version = "0.14.67" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] @@ -33,6 +33,7 @@ google-cloud-bigquery = { version = "^3.25.0", optional = true } google-cloud-logging = { version = "^3.5.0", optional = true } gql = { extras = ["requests"], version = "^3.4.1", optional = true } grpcio-tools = { version = "^1.59.3", optional = true } +httpx = "^0.27.0" jsonschema = "^4.18.6" lkml = { version = "^1.3.1", optional = true } llama-index = { version = "^0.10.19", optional = true } @@ -70,6 +71,7 @@ thoughtspot_rest_api_v1 = { version = "1.5.3", optional = true } thrift = { version = "^0.16.0", optional = true } thrift-sasl = { version = "^0.4.3", optional = true } trino = { version = "^0.327.0", optional = true } +trio = "^0.26.1" [tool.poetry.extras] all = [ @@ -87,6 +89,7 @@ all = [ "google-cloud-logging", "gql", "grpcio-tools", + "httpx", "lkml", "looker-sdk", "llama-index", @@ -121,7 +124,7 @@ bigquery = ["google-cloud-bigquery", "google-cloud-logging", "sql-metadata"] confluence = ["llama-index", "llama-index-embeddings-azure-openai", "llama-index-readers-confluence"] datafactory = ["azure-identity", "azure-mgmt-datafactory"] datahub = ["gql"] -dbt = [] +dbt = ["httpx"] hive = ["pyhive", "sasl", "thrift", "thrift-sasl"] kafka = ["confluent-kafka", "avro", "grpcio-tools"] looker = ["GitPython", "lkml", "looker-sdk"] diff --git a/tests/dbt/data/trial_v4/expected.json b/tests/dbt/data/trial_v4/expected.json index bfd5827a..82e1aa05 100644 --- a/tests/dbt/data/trial_v4/expected.json +++ b/tests/dbt/data/trial_v4/expected.json @@ -180,7 +180,7 @@ "meta": [ { "key": "owner", - "value": "\"yi@metaphor.io\"" + "value": "\"YI@METAPHOR.IO\"" }, { "key": "pii", diff --git a/tests/dbt/data/trial_v4/manifest.json b/tests/dbt/data/trial_v4/manifest.json index 83a17640..596948c9 100644 --- a/tests/dbt/data/trial_v4/manifest.json +++ b/tests/dbt/data/trial_v4/manifest.json @@ -26,7 +26,7 @@ "database": null, "tags": [], "meta": { - "owner": "yi@metaphor.io", + "owner": "YI@METAPHOR.IO", "pii": true }, "materialized": "view",