From 85c21760c8d1a8ec7271d5499ff8f75a690d3a4a Mon Sep 17 00:00:00 2001 From: David Sanchez Date: Wed, 4 Oct 2023 18:10:58 +0200 Subject: [PATCH 1/3] Fix comparison to extract comments from complex types --- .../src/datahub/ingestion/source/bigquery_v2/bigquery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index f6adbcf033bcc5..7061c62f90af22 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -1043,7 +1043,7 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]: # Remove all the [version=2.0].[type=struct]. tags to get the field path if ( re.sub(r"\[.*?\]\.", "", field.fieldPath, 0, re.MULTILINE) - == col.field_path + == col.field_path.lower() ): field.description = col.comment schema_fields[idx] = field From 28b29f7ca8a1635f4d88f2959089d0984120cab0 Mon Sep 17 00:00:00 2001 From: David Sanchez Date: Wed, 25 Oct 2023 15:09:55 +0200 Subject: [PATCH 2/3] Update metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py add suggestion Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> --- .../src/datahub/ingestion/source/bigquery_v2/bigquery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 7061c62f90af22..7e6d8e57c1f2ae 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -1042,7 +1042,7 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]: for idx, field in enumerate(schema_fields): # Remove all the [version=2.0].[type=struct]. tags to get the field path if ( - re.sub(r"\[.*?\]\.", "", field.fieldPath, 0, re.MULTILINE) + re.sub(r"\[.*?\]\.", "", field.fieldPath.lower(), 0, re.MULTILINE) == col.field_path.lower() ): field.description = col.comment From dd96372ed15dabe5a4a2fde89b89a5b1d6284081 Mon Sep 17 00:00:00 2001 From: David Sanchez Date: Thu, 26 Oct 2023 10:23:01 +0200 Subject: [PATCH 3/3] Fix lint --- .../src/datahub/ingestion/source/bigquery_v2/bigquery.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 4c3117f93befb3..6959a483130106 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -1050,7 +1050,13 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]: for idx, field in enumerate(schema_fields): # Remove all the [version=2.0].[type=struct]. tags to get the field path if ( - re.sub(r"\[.*?\]\.", "", field.fieldPath.lower(), 0, re.MULTILINE) + re.sub( + r"\[.*?\]\.", + "", + field.fieldPath.lower(), + 0, + re.MULTILINE, + ) == col.field_path.lower() ): field.description = col.comment