From 810c06bf0e2975860bbf3c7d7f67241c3f8946d9 Mon Sep 17 00:00:00 2001 From: Mars Lan Date: Thu, 1 Aug 2024 07:27:35 -0700 Subject: [PATCH 1/2] Add temporary try-catch to handle snapshot time parsing issue --- metaphor/bigquery/extractor.py | 20 +++++++++++++++----- pyproject.toml | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/metaphor/bigquery/extractor.py b/metaphor/bigquery/extractor.py index cc67a6d3..b32bf06e 100644 --- a/metaphor/bigquery/extractor.py +++ b/metaphor/bigquery/extractor.py @@ -1,5 +1,6 @@ import re from concurrent.futures import ThreadPoolExecutor +from datetime import datetime from typing import ( Any, Collection, @@ -224,13 +225,10 @@ def parse_schema(bq_table: bigquery.table.Table) -> DatasetSchema: table_schema=bq_table.mview_query, ) elif bq_table.table_type == "SNAPSHOT": + schema.sql_schema = SQLSchema( materialization=MaterializationType.SNAPSHOT, - snapshot_time=( - bq_table.snapshot_definition.snapshot_time - if bq_table.snapshot_definition - else None - ), + snapshot_time=BigQueryExtractor._get_snapshot_time(bq_table), ) else: raise ValueError(f"Unexpected table type {bq_table.table_type}") @@ -239,6 +237,18 @@ def parse_schema(bq_table: bigquery.table.Table) -> DatasetSchema: return schema + @staticmethod + def _get_snapshot_time(bq_table: bigquery.table.Table) -> Optional[datetime]: + # bigquery client fails to parse snapshot time sometimes + # See https://github.com/googleapis/python-bigquery/issues/1986 + try: + if bq_table.snapshot_definition: + return bq_table.snapshot_definition.snapshot_time + except ValueError: + return None + + return None + # See https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.schema.SchemaField.html#google.cloud.bigquery.schema.SchemaField @staticmethod def _parse_fields( diff --git a/pyproject.toml b/pyproject.toml index cb518e73..5f0eea4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.61" +version = "0.14.62" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] From e42eab219daa859f3e9a76edd6005e07e4b0da98 Mon Sep 17 00:00:00 2001 From: Mars Lan Date: Thu, 1 Aug 2024 21:22:10 -0700 Subject: [PATCH 2/2] Bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2b5266cd..5b1dc08c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.62" +version = "0.14.63" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "]