From 3990b6c3ec863430512b29321867663018dfafbf Mon Sep 17 00:00:00 2001 From: Scott Ssuyi Huang Date: Thu, 25 Jan 2024 17:15:09 +0800 Subject: [PATCH] Extract created information (#764) * Extract table creation time * Update packages * Bump version * Bump version --- metaphor/bigquery/extractor.py | 4 ++ metaphor/snowflake/extractor.py | 19 +++++++-- metaphor/unity_catalog/extractor.py | 12 +++--- metaphor/unity_catalog/utils.py | 4 ++ poetry.lock | 55 ++++++++++++++++----------- pyproject.toml | 4 +- tests/bigquery/expected.json | 8 ++++ tests/bigquery/test_extractor.py | 4 ++ tests/snowflake/test_extractor.py | 31 +++++++++++++-- tests/unity_catalog/expected.json | 8 +++- tests/unity_catalog/test_extractor.py | 2 + 11 files changed, 114 insertions(+), 37 deletions(-) diff --git a/metaphor/bigquery/extractor.py b/metaphor/bigquery/extractor.py index bafa993a..5d1ec8a2 100644 --- a/metaphor/bigquery/extractor.py +++ b/metaphor/bigquery/extractor.py @@ -50,6 +50,7 @@ QueryLog, SchemaField, SchemaType, + SourceInfo, SQLSchema, TypeEnum, ) @@ -199,6 +200,9 @@ def _parse_table(project_id, bq_table: bigquery.table.Table) -> Dataset: structure=DatasetStructure( database=project_id, schema=bq_table.dataset_id, table=bq_table.table_id ), + source_info=SourceInfo( + created_at_source=bq_table.created, last_updated=bq_table.modified + ), ) # See https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.table.Table.html#google.cloud.bigquery.table.Table diff --git a/metaphor/snowflake/extractor.py b/metaphor/snowflake/extractor.py index ea62a6ed..938e58d5 100644 --- a/metaphor/snowflake/extractor.py +++ b/metaphor/snowflake/extractor.py @@ -174,7 +174,7 @@ def _fetch_shared_databases(cursor: SnowflakeCursor) -> List[str]: return [db[3].lower() for db in cursor if db[1] == "INBOUND"] FETCH_TABLE_QUERY = """ - SELECT table_catalog, table_schema, table_name, table_type, COMMENT, row_count, bytes + SELECT table_catalog, table_schema, table_name, table_type, COMMENT, row_count, bytes, created FROM information_schema.tables WHERE table_schema != 'INFORMATION_SCHEMA' ORDER BY table_schema, table_name @@ -199,6 +199,7 @@ def _fetch_tables( comment, row_count, table_bytes, + created, ) in cursor: normalized_name = dataset_normalized_name(database, schema, name) if not self._filter.include_table(database, schema, name): @@ -213,7 +214,14 @@ def _fetch_tables( continue self._datasets[normalized_name] = self._init_dataset( - database, schema, name, table_type, comment, row_count, table_bytes + database, + schema, + name, + table_type, + comment, + row_count, + table_bytes, + created, ) tables[normalized_name] = DatasetInfo(database, schema, name, table_type) @@ -564,6 +572,7 @@ def _fetch_streams(self, cursor: SnowflakeCursor, database: str, schema: str): cursor.execute(f"SHOW STREAMS IN {schema}") for entry in cursor: ( + create_on, stream_name, comment, source_name, @@ -572,6 +581,7 @@ def _fetch_streams(self, cursor: SnowflakeCursor, database: str, schema: str): stream_type_str, stale_after, ) = ( + entry[0], entry[1], entry[5], entry[6], @@ -598,6 +608,7 @@ def _fetch_streams(self, cursor: SnowflakeCursor, database: str, schema: str): comment=comment, row_count=row_count, table_bytes=None, # Not applicable to streams + created=create_on, ) def _to_dataset_eid(x: str) -> str: @@ -784,6 +795,7 @@ def _init_dataset( comment: str, row_count: Optional[int], table_bytes: Optional[float], + created: Optional[datetime] = None, ) -> Dataset: normalized_name = dataset_normalized_name(database, schema, table) dataset = Dataset() @@ -793,7 +805,8 @@ def _init_dataset( ) dataset.source_info = SourceInfo( - main_url=SnowflakeExtractor.build_table_url(self._account, normalized_name) + main_url=SnowflakeExtractor.build_table_url(self._account, normalized_name), + created_at_source=created, ) sql_schema = SQLSchema() diff --git a/metaphor/unity_catalog/extractor.py b/metaphor/unity_catalog/extractor.py index 15df3552..0098bd62 100644 --- a/metaphor/unity_catalog/extractor.py +++ b/metaphor/unity_catalog/extractor.py @@ -1,4 +1,3 @@ -import datetime import json import logging import re @@ -52,6 +51,7 @@ build_query_log_filter_by, create_api, create_connection, + from_timestamp_ms, list_column_lineage, list_table_lineage, ) @@ -226,7 +226,11 @@ def _init_dataset(self, table_info: TableInfo) -> Dataset: ) main_url = self._get_source_url(database, schema_name, table_name) - dataset.source_info = SourceInfo(main_url=main_url) + dataset.source_info = SourceInfo( + main_url=main_url, + created_at_source=from_timestamp_ms(table_info.created_at), + last_updated=from_timestamp_ms(table_info.updated_at), + ) dataset.unity_catalog = UnityCatalog( table_type=UnityCatalogTableType[table_info.table_type.value], @@ -334,9 +338,7 @@ def _get_query_logs(self) -> QueryLogs: ): start_time = None if query_info.query_start_time_ms is not None: - start_time = datetime.datetime.fromtimestamp( - query_info.query_start_time_ms / 1000, tz=datetime.timezone.utc - ) + start_time = from_timestamp_ms(query_info.query_start_time_ms) user_id, email = user_id_or_email(query_info.user_name) diff --git a/metaphor/unity_catalog/utils.py b/metaphor/unity_catalog/utils.py index dbf87e65..71a7c271 100644 --- a/metaphor/unity_catalog/utils.py +++ b/metaphor/unity_catalog/utils.py @@ -121,3 +121,7 @@ def create_connection( def create_api(host: str, token: str) -> WorkspaceClient: return WorkspaceClient(host=host, token=token) + + +def from_timestamp_ms(timestamp: int) -> datetime.datetime: + return datetime.datetime.fromtimestamp(timestamp / 1000, tz=datetime.timezone.utc) diff --git a/poetry.lock b/poetry.lock index 103bff53..ced52438 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. [[package]] name = "aiohttp" @@ -581,8 +581,8 @@ files = [ jmespath = ">=0.7.1,<2.0.0" python-dateutil = ">=2.1,<3.0.0" urllib3 = [ - {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""}, + {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, ] [package.extras] @@ -1030,8 +1030,8 @@ files = [ [package.dependencies] lz4 = ">=4.0.2,<5.0.0" numpy = [ - {version = ">=1.16.6", markers = "python_version >= \"3.8\" and python_version < \"3.11\""}, {version = ">=1.23.4", markers = "python_version >= \"3.11\""}, + {version = ">=1.16.6", markers = "python_version >= \"3.8\" and python_version < \"3.11\""}, ] oauthlib = ">=3.1.0,<4.0.0" openpyxl = ">=3.0.10,<4.0.0" @@ -1081,9 +1081,9 @@ isort = ">=4.3.21,<6.0" jinja2 = ">=2.10.1,<4.0" packaging = "*" pydantic = [ + {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"4.0\""}, {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""}, {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"4.0\""}, ] pyyaml = ">=6.0.1" toml = {version = ">=0.10.0,<1.0.0", markers = "python_version < \"3.11\""} @@ -1506,12 +1506,12 @@ files = [ google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" @@ -1641,8 +1641,8 @@ google-cloud-audit-log = ">=0.1.0,<1.0.0dev" google-cloud-core = ">=2.0.0,<3.0.0dev" grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" proto-plus = [ - {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, {version = ">=1.22.2,<2.0.0dev", markers = "python_version >= \"3.11\""}, + {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" @@ -2509,16 +2509,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -2595,13 +2585,13 @@ files = [ [[package]] name = "metaphor-models" -version = "0.30.13" +version = "0.30.15" description = "" optional = false python-versions = ">=3.8,<4.0" files = [ - {file = "metaphor_models-0.30.13-py3-none-any.whl", hash = "sha256:da3e3d6609b54bbaafb1ae29fa78ba97afca37316192dfcb93488031ec431074"}, - {file = "metaphor_models-0.30.13.tar.gz", hash = "sha256:14cc143c66fae3615c0d6da8e79cf12cee4e8cc14251463a091e9d1f8e7f1835"}, + {file = "metaphor_models-0.30.15-py3-none-any.whl", hash = "sha256:ace8c404be5175c9d2e3e741406bba409f41847af533b8f66062e67dde6a0553"}, + {file = "metaphor_models-0.30.15.tar.gz", hash = "sha256:c48a91fbd98aebe695510f0748e56faf1933fbb15e32bf364c498f030a8acff2"}, ] [[package]] @@ -3216,9 +3206,9 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, {version = ">=1.20.3", markers = "python_version < \"3.10\""}, {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -4767,30 +4757,51 @@ description = "Database Abstraction Library" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ + {file = "SQLAlchemy-1.4.51-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:1a09d5bd1a40d76ad90e5570530e082ddc000e1d92de495746f6257dc08f166b"}, {file = "SQLAlchemy-1.4.51-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2be4e6294c53f2ec8ea36486b56390e3bcaa052bf3a9a47005687ccf376745d1"}, {file = "SQLAlchemy-1.4.51-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ca484ca11c65e05639ffe80f20d45e6be81fbec7683d6c9a15cd421e6e8b340"}, {file = "SQLAlchemy-1.4.51-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0535d5b57d014d06ceeaeffd816bb3a6e2dddeb670222570b8c4953e2d2ea678"}, {file = "SQLAlchemy-1.4.51-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af55cc207865d641a57f7044e98b08b09220da3d1b13a46f26487cc2f898a072"}, + {file = "SQLAlchemy-1.4.51-cp310-cp310-win32.whl", hash = "sha256:7af40425ac535cbda129d9915edcaa002afe35d84609fd3b9d6a8c46732e02ee"}, + {file = "SQLAlchemy-1.4.51-cp310-cp310-win_amd64.whl", hash = "sha256:8d1d7d63e5d2f4e92a39ae1e897a5d551720179bb8d1254883e7113d3826d43c"}, + {file = "SQLAlchemy-1.4.51-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:eaeeb2464019765bc4340214fca1143081d49972864773f3f1e95dba5c7edc7d"}, {file = "SQLAlchemy-1.4.51-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7deeae5071930abb3669b5185abb6c33ddfd2398f87660fafdb9e6a5fb0f3f2f"}, {file = "SQLAlchemy-1.4.51-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0892e7ac8bc76da499ad3ee8de8da4d7905a3110b952e2a35a940dab1ffa550e"}, + {file = "SQLAlchemy-1.4.51-cp311-cp311-win32.whl", hash = "sha256:50e074aea505f4427151c286955ea025f51752fa42f9939749336672e0674c81"}, + {file = "SQLAlchemy-1.4.51-cp311-cp311-win_amd64.whl", hash = "sha256:3b0cd89a7bd03f57ae58263d0f828a072d1b440c8c2949f38f3b446148321171"}, + {file = "SQLAlchemy-1.4.51-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a33cb3f095e7d776ec76e79d92d83117438b6153510770fcd57b9c96f9ef623d"}, {file = "SQLAlchemy-1.4.51-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cacc0b2dd7d22a918a9642fc89840a5d3cee18a0e1fe41080b1141b23b10916"}, {file = "SQLAlchemy-1.4.51-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:245c67c88e63f1523e9216cad6ba3107dea2d3ee19adc359597a628afcabfbcb"}, + {file = "SQLAlchemy-1.4.51-cp312-cp312-win32.whl", hash = "sha256:8e702e7489f39375601c7ea5a0bef207256828a2bc5986c65cb15cd0cf097a87"}, + {file = "SQLAlchemy-1.4.51-cp312-cp312-win_amd64.whl", hash = "sha256:0525c4905b4b52d8ccc3c203c9d7ab2a80329ffa077d4bacf31aefda7604dc65"}, + {file = "SQLAlchemy-1.4.51-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:1980e6eb6c9be49ea8f89889989127daafc43f0b1b6843d71efab1514973cca0"}, {file = "SQLAlchemy-1.4.51-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ec7a0ed9b32afdf337172678a4a0e6419775ba4e649b66f49415615fa47efbd"}, {file = "SQLAlchemy-1.4.51-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:352df882088a55293f621328ec33b6ffca936ad7f23013b22520542e1ab6ad1b"}, {file = "SQLAlchemy-1.4.51-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:86a22143a4001f53bf58027b044da1fb10d67b62a785fc1390b5c7f089d9838c"}, {file = "SQLAlchemy-1.4.51-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c37bc677690fd33932182b85d37433845de612962ed080c3e4d92f758d1bd894"}, + {file = "SQLAlchemy-1.4.51-cp36-cp36m-win32.whl", hash = "sha256:d0a83afab5e062abffcdcbcc74f9d3ba37b2385294dd0927ad65fc6ebe04e054"}, + {file = "SQLAlchemy-1.4.51-cp36-cp36m-win_amd64.whl", hash = "sha256:a61184c7289146c8cff06b6b41807c6994c6d437278e72cf00ff7fe1c7a263d1"}, + {file = "SQLAlchemy-1.4.51-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:3f0ef620ecbab46e81035cf3dedfb412a7da35340500ba470f9ce43a1e6c423b"}, {file = "SQLAlchemy-1.4.51-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c55040d8ea65414de7c47f1a23823cd9f3fad0dc93e6b6b728fee81230f817b"}, {file = "SQLAlchemy-1.4.51-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38ef80328e3fee2be0a1abe3fe9445d3a2e52a1282ba342d0dab6edf1fef4707"}, {file = "SQLAlchemy-1.4.51-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f8cafa6f885a0ff5e39efa9325195217bb47d5929ab0051636610d24aef45ade"}, {file = "SQLAlchemy-1.4.51-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8f2df79a46e130235bc5e1bbef4de0583fb19d481eaa0bffa76e8347ea45ec6"}, + {file = "SQLAlchemy-1.4.51-cp37-cp37m-win32.whl", hash = "sha256:f2e5b6f5cf7c18df66d082604a1d9c7a2d18f7d1dbe9514a2afaccbb51cc4fc3"}, + {file = "SQLAlchemy-1.4.51-cp37-cp37m-win_amd64.whl", hash = "sha256:5e180fff133d21a800c4f050733d59340f40d42364fcb9d14f6a67764bdc48d2"}, + {file = "SQLAlchemy-1.4.51-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:7d8139ca0b9f93890ab899da678816518af74312bb8cd71fb721436a93a93298"}, {file = "SQLAlchemy-1.4.51-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb18549b770351b54e1ab5da37d22bc530b8bfe2ee31e22b9ebe650640d2ef12"}, {file = "SQLAlchemy-1.4.51-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55e699466106d09f028ab78d3c2e1f621b5ef2c8694598242259e4515715da7c"}, {file = "SQLAlchemy-1.4.51-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2ad16880ccd971ac8e570550fbdef1385e094b022d6fc85ef3ce7df400dddad3"}, {file = "SQLAlchemy-1.4.51-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b97fd5bb6b7c1a64b7ac0632f7ce389b8ab362e7bd5f60654c2a418496be5d7f"}, + {file = "SQLAlchemy-1.4.51-cp38-cp38-win32.whl", hash = "sha256:cecb66492440ae8592797dd705a0cbaa6abe0555f4fa6c5f40b078bd2740fc6b"}, + {file = "SQLAlchemy-1.4.51-cp38-cp38-win_amd64.whl", hash = "sha256:39b02b645632c5fe46b8dd30755682f629ffbb62ff317ecc14c998c21b2896ff"}, + {file = "SQLAlchemy-1.4.51-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:b03850c290c765b87102959ea53299dc9addf76ca08a06ea98383348ae205c99"}, {file = "SQLAlchemy-1.4.51-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e646b19f47d655261b22df9976e572f588185279970efba3d45c377127d35349"}, {file = "SQLAlchemy-1.4.51-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3cf56cc36d42908495760b223ca9c2c0f9f0002b4eddc994b24db5fcb86a9e4"}, {file = "SQLAlchemy-1.4.51-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0d661cff58c91726c601cc0ee626bf167b20cc4d7941c93c5f3ac28dc34ddbea"}, {file = "SQLAlchemy-1.4.51-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3823dda635988e6744d4417e13f2e2b5fe76c4bf29dd67e95f98717e1b094cad"}, + {file = "SQLAlchemy-1.4.51-cp39-cp39-win32.whl", hash = "sha256:b00cf0471888823b7a9f722c6c41eb6985cf34f077edcf62695ac4bed6ec01ee"}, + {file = "SQLAlchemy-1.4.51-cp39-cp39-win_amd64.whl", hash = "sha256:a055ba17f4675aadcda3005df2e28a86feb731fdcc865e1f6b4f209ed1225cba"}, {file = "SQLAlchemy-1.4.51.tar.gz", hash = "sha256:e7908c2025eb18394e32d65dd02d2e37e17d733cdbe7d78231c2b6d7eb20cdb9"}, ] @@ -5550,4 +5561,4 @@ unity-catalog = ["databricks-sdk", "databricks-sql-connector"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.12" -content-hash = "08af8eda92d754d8ec0cce1e2434b1f0802b5754a3abd53e24d23b55a86d6151" +content-hash = "188b8b1db5683cc5657de31d9b40a2fa1a176192b9a6a8921c04e1838e2dfb2a" diff --git a/pyproject.toml b/pyproject.toml index 73a2c58d..fadffc7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.13.111" +version = "0.13.112" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] @@ -36,7 +36,7 @@ jsonschema = "^4.18.6" lkml = { version = "^1.3.1", optional = true } llama-hub = {version = "0.0.67", optional = true } looker-sdk = { version = "^23.6.0", optional = true } -metaphor-models = "0.30.13" +metaphor-models = "0.30.15" more-itertools = { version = "^10.1.0", optional = true } msal = { version = "^1.20.0", optional = true } msgraph-beta-sdk = { version = "1.0.0", optional = true } diff --git a/tests/bigquery/expected.json b/tests/bigquery/expected.json index 7393d8b9..58526b87 100644 --- a/tests/bigquery/expected.json +++ b/tests/bigquery/expected.json @@ -36,6 +36,10 @@ "database": "project1", "schema": "dataset1", "table": "table1" + }, + "sourceInfo": { + "createdAtSource": "2000-01-01T00:00:00+00:00", + "lastUpdated": "2000-01-02T00:00:00+00:00" } }, { @@ -92,6 +96,10 @@ "database": "project1", "schema": "dataset1", "table": "table2" + }, + "sourceInfo": { + "createdAtSource": "2000-01-01T00:00:00+00:00", + "lastUpdated": "2000-01-02T00:00:00+00:00" } }, { diff --git a/tests/bigquery/test_extractor.py b/tests/bigquery/test_extractor.py index c5f93f38..f551b95a 100644 --- a/tests/bigquery/test_extractor.py +++ b/tests/bigquery/test_extractor.py @@ -36,6 +36,7 @@ def mock_table_full( num_bytes=0, num_rows=0, modified=datetime.fromisoformat("2000-01-01"), + created=datetime.fromisoformat("2000-01-01"), ): table = MagicMock() table.dataset_id = dataset_id @@ -48,6 +49,7 @@ def mock_table_full( table.num_bytes = num_bytes table.num_rows = num_rows table.modified = modified.replace(tzinfo=timezone.utc) + table.created = created.replace(tzinfo=timezone.utc) return table @@ -126,6 +128,7 @@ async def test_extractor( mock_build_client, { ("dataset1", "table1"): mock_table_full( + created=datetime.fromisoformat("2000-01-01"), dataset_id="dataset1", table_id="table1", table_type="TABLE", @@ -149,6 +152,7 @@ async def test_extractor( num_rows=100, ), ("dataset1", "table2"): mock_table_full( + created=datetime.fromisoformat("2000-01-01"), dataset_id="dataset1", table_id="table2", table_type="VIEW", diff --git a/tests/snowflake/test_extractor.py b/tests/snowflake/test_extractor.py index 7161120a..af202dd9 100644 --- a/tests/snowflake/test_extractor.py +++ b/tests/snowflake/test_extractor.py @@ -85,8 +85,26 @@ def test_fetch_tables(mock_connect: MagicMock): mock_cursor.__iter__.return_value = iter( [ - (database, schema, table_name, table_type, "comment1", 10, 20000), - (database, schema, "foo.bar", table_type, "", 0, 0), + ( + database, + schema, + table_name, + table_type, + "comment1", + 10, + 20000, + None, + ), + ( + database, + schema, + "foo.bar", + table_type, + "", + 0, + 0, + datetime.fromisoformat("2024-01-01"), + ), ] ) @@ -176,7 +194,14 @@ def test_fetch_table_info_with_unknown_type(mock_connect: MagicMock): extractor._conn = mock_connect dataset = extractor._init_dataset( - "db", "schema", "table", "BAD_TYPE", "comment", None, None + "db", + "schema", + "table", + "BAD_TYPE", + "comment", + None, + None, + None, ) assert dataset.schema.sql_schema.materialization is None diff --git a/tests/unity_catalog/expected.json b/tests/unity_catalog/expected.json index 8ceea2f1..151e034f 100644 --- a/tests/unity_catalog/expected.json +++ b/tests/unity_catalog/expected.json @@ -50,7 +50,9 @@ ] }, "sourceInfo": { - "mainUrl": "http://dummy.host/explore/data/catalog/schema/table" + "mainUrl": "http://dummy.host/explore/data/catalog/schema/table", + "createdAtSource": "1970-01-01T00:00:00+00:00", + "lastUpdated": "1970-01-01T00:00:00+00:00" }, "structure": { "database": "catalog", @@ -116,7 +118,9 @@ } }, "sourceInfo": { - "mainUrl": "http://dummy.host/explore/data/catalog/schema/view" + "mainUrl": "http://dummy.host/explore/data/catalog/schema/view", + "createdAtSource": "1970-01-01T00:00:00+00:00", + "lastUpdated": "1970-01-01T00:00:00+00:00" }, "structure": { "database": "catalog", diff --git a/tests/unity_catalog/test_extractor.py b/tests/unity_catalog/test_extractor.py index a35cd3e7..a725790f 100644 --- a/tests/unity_catalog/test_extractor.py +++ b/tests/unity_catalog/test_extractor.py @@ -79,6 +79,7 @@ def mock_list_tables(catalog, schema): properties={ "delta.lastCommitTimestamp": "1664444422000", }, + created_at=0, ), Table( name="view", @@ -115,6 +116,7 @@ def mock_list_tables(catalog, schema): "view.referredTempFunctionsNames": "[]", "view.catalogAndNamespace.part.1": "default", }, + created_at=0, ), ]