From d71fcd322f478d194a5bd2c8454d97ba193ada55 Mon Sep 17 00:00:00 2001 From: Maiara Reinaldo Date: Mon, 4 Mar 2024 09:47:04 +0000 Subject: [PATCH] feat(ingest/glue): add catalog alias to the urn --- .../src/datahub/ingestion/source/aws/glue.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index aa7e5aa352a3e2..3d0977c27bec0b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -104,6 +104,7 @@ DEFAULT_PLATFORM = "glue" +AWS_DATA_CATALOG = "awsdatacatalog" VALID_PLATFORMS = [DEFAULT_PLATFORM, "athena"] @@ -161,6 +162,10 @@ class GlueSourceConfig( stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field( default=None, description="" ) + catalog_alias: str = Field( + default=AWS_DATA_CATALOG, + description="The catalog alias to be used in the dataset URN.", + ) def is_profiling_enabled(self) -> bool: return self.profiling is not None and is_profiling_enabled( @@ -953,7 +958,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: dataset_urn = make_dataset_urn_with_platform_instance( platform=self.platform, - name=full_table_name, + name=f"{self.source_config.catalog_alias}.{full_table_name}", env=self.env, platform_instance=self.source_config.platform_instance, )