diff --git a/metaphor/bigquery/extractor.py b/metaphor/bigquery/extractor.py index cc67a6d3..b32bf06e 100644 --- a/metaphor/bigquery/extractor.py +++ b/metaphor/bigquery/extractor.py @@ -1,5 +1,6 @@ import re from concurrent.futures import ThreadPoolExecutor +from datetime import datetime from typing import ( Any, Collection, @@ -224,13 +225,10 @@ def parse_schema(bq_table: bigquery.table.Table) -> DatasetSchema: table_schema=bq_table.mview_query, ) elif bq_table.table_type == "SNAPSHOT": + schema.sql_schema = SQLSchema( materialization=MaterializationType.SNAPSHOT, - snapshot_time=( - bq_table.snapshot_definition.snapshot_time - if bq_table.snapshot_definition - else None - ), + snapshot_time=BigQueryExtractor._get_snapshot_time(bq_table), ) else: raise ValueError(f"Unexpected table type {bq_table.table_type}") @@ -239,6 +237,18 @@ def parse_schema(bq_table: bigquery.table.Table) -> DatasetSchema: return schema + @staticmethod + def _get_snapshot_time(bq_table: bigquery.table.Table) -> Optional[datetime]: + # bigquery client fails to parse snapshot time sometimes + # See https://github.com/googleapis/python-bigquery/issues/1986 + try: + if bq_table.snapshot_definition: + return bq_table.snapshot_definition.snapshot_time + except ValueError: + return None + + return None + # See https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.schema.SchemaField.html#google.cloud.bigquery.schema.SchemaField @staticmethod def _parse_fields( diff --git a/pyproject.toml b/pyproject.toml index 2b5266cd..5b1dc08c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.62" +version = "0.14.63" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "]