diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index aa7e5aa352a3e2..48cbcadf2787fa 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -104,6 +104,7 @@ DEFAULT_PLATFORM = "glue" +AWS_DATA_CATALOG = "awsdatacatalog" VALID_PLATFORMS = [DEFAULT_PLATFORM, "athena"] @@ -161,6 +162,10 @@ class GlueSourceConfig( stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field( default=None, description="" ) + catalog_alias: str = Field( + default=AWS_DATA_CATALOG, + description="The catalog alias to be used in the dataset URN.", + ) def is_profiling_enabled(self) -> bool: return self.profiling is not None and is_profiling_enabled( @@ -424,7 +429,7 @@ def process_dataflow_node( # we know that the table will already be covered when ingesting Glue tables node_urn = make_dataset_urn_with_platform_instance( platform=self.platform, - name=full_table_name, + name=f"{self.source_config.catalog_alias}.{full_table_name}", env=self.env, platform_instance=self.source_config.platform_instance, ) @@ -953,7 +958,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: dataset_urn = make_dataset_urn_with_platform_instance( platform=self.platform, - name=full_table_name, + name=f"{self.source_config.catalog_alias}.{full_table_name}", env=self.env, platform_instance=self.source_config.platform_instance, )