From 89b03090e559f96bc43f4661f745ac97004239be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Mon, 23 Dec 2024 22:52:50 +0100 Subject: [PATCH 1/6] fix(tableau): fixes wrong argument when reauthenticating --- .../ingestion/source/tableau/tableau.py | 52 ++++++++++++++----- .../tableau/test_tableau_ingest.py | 5 +- 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 984cf9357199d6..4de58e2c6ac6cc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -618,6 +618,12 @@ def update_table( self.parsed_columns = parsed_columns +@dataclass +class SiteIdContentUrl: + site_id: str + site_content_url: str + + class TableauSourceReport(StaleEntityRemovalSourceReport): get_all_datasources_query_failed: bool = False num_get_datasource_query_failures: int = 0 @@ -770,7 +776,6 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: config=self.config, ctx=self.ctx, site=site, - site_id=site.id, report=self.report, server=self.server, platform=self.platform, @@ -789,7 +794,11 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: site_source = TableauSiteSource( config=self.config, ctx=self.ctx, - site=site, + site=site + if site + else SiteIdContentUrl( + site_id=self.server.site_id, site_content_url=self.config.site + ), site_id=self.server.site_id, report=self.report, server=self.server, @@ -823,8 +832,7 @@ def __init__( self, config: TableauConfig, ctx: PipelineContext, - site: Optional[SiteItem], - site_id: Optional[str], + site: Union[SiteItem, SiteIdContentUrl], report: TableauSourceReport, server: Server, platform: str, @@ -835,13 +843,18 @@ def __init__( self.ctx: PipelineContext = ctx self.platform = platform - self.site: Optional[SiteItem] = site - if site_id is not None: - self.site_id: str = site_id + self.site: Optional[SiteItem] = None + if isinstance(site, SiteItem): + self.site = site + assert site.id is not None, "Site ID is required" + self.site_id = site.id + self.site_content_url = site.content_url + elif isinstance(site, SiteIdContentUrl): + self.site = None + self.site_id = site.site_id + self.site_content_url = site.site_content_url else: - assert self.site is not None, "site or site_id is required" - assert self.site.id is not None, "site_id is required when site is provided" - self.site_id = self.site.id + raise AssertionError("site or site id+content_url pair is required") self.database_tables: Dict[str, DatabaseTable] = {} self.tableau_stat_registry: Dict[str, UsageStat] = {} @@ -895,10 +908,21 @@ def dataset_browse_prefix(self) -> str: # datasets also have the env in the browse path return f"/{self.config.env.lower()}{self.no_env_browse_prefix}" - def _re_authenticate(self): - # Sign-in again may not be enough because Tableau sometimes caches invalid sessions - # so we need to recreate the Tableau Server object - self.server = self.config.make_tableau_client(self.site_id) + def _re_authenticate(self) -> None: + if self.site_content_url: + assert self.site_content_url is not None, "Site Content URL is required" + self.report.info( + message="Re-authenticating to Tableau", + context=f"site='{self.site_content_url}'", + ) + # Sign-in again may not be enough because Tableau sometimes caches invalid sessions + # so we need to recreate the Tableau Server object + self.server = self.config.make_tableau_client(self.site_content_url) + else: + self.report.warning( + message="Site Content URL is not set. Unable to re-authenticate.", + context=f"site_id={self.site_id}, site={self.site}", + ) @property def site_content_url(self) -> Optional[str]: diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index c3a8880bf20a09..97c7ba7c2c68a7 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -26,6 +26,7 @@ from datahub.ingestion.run.pipeline import Pipeline, PipelineContext from datahub.ingestion.source.tableau import tableau_constant as c from datahub.ingestion.source.tableau.tableau import ( + SiteIdContentUrl, TableauConfig, TableauProject, TableauSiteSource, @@ -1008,7 +1009,7 @@ def check_lineage_metadata( config=config, ctx=context, platform="tableau", - site=SiteItem(name="Site 1", content_url="site1"), + site=SiteIdContentUrl(site_id="id1", site_content_url="site1"), site_id="site1", report=TableauSourceReport(), server=Server("https://test-tableau-server.com"), @@ -1314,7 +1315,6 @@ def test_permission_warning(pytestconfig, tmp_path, mock_datahub_graph): config=mock.MagicMock(), ctx=mock.MagicMock(), site=mock.MagicMock(), - site_id=None, server=mock_sdk.return_value, report=reporter, ) @@ -1372,7 +1372,6 @@ def test_extract_project_hierarchy(extract_project_hierarchy, allowed_projects): ctx=context, platform="tableau", site=SiteItem(name="Site 1", content_url="site1"), - site_id="site1", report=TableauSourceReport(), server=Server("https://test-tableau-server.com"), ) From 0e05c0e795b22cb5138cc3d39d6f17b3292484d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Tue, 24 Dec 2024 10:12:16 +0100 Subject: [PATCH 2/6] Update tableau.py --- .../src/datahub/ingestion/source/tableau/tableau.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 4de58e2c6ac6cc..c82bdb5d2b9027 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -911,10 +911,7 @@ def dataset_browse_prefix(self) -> str: def _re_authenticate(self) -> None: if self.site_content_url: assert self.site_content_url is not None, "Site Content URL is required" - self.report.info( - message="Re-authenticating to Tableau", - context=f"site='{self.site_content_url}'", - ) + self.report.info(f"Re-authenticating to Tableau site='{self.site_content_url}'") # Sign-in again may not be enough because Tableau sometimes caches invalid sessions # so we need to recreate the Tableau Server object self.server = self.config.make_tableau_client(self.site_content_url) From 2e9ce7101a20cb265e3363787c206b959542a99b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Tue, 24 Dec 2024 10:19:01 +0100 Subject: [PATCH 3/6] fixup --- .../src/datahub/ingestion/source/tableau/tableau.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index c82bdb5d2b9027..323798dc4ba81b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -799,7 +799,6 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: else SiteIdContentUrl( site_id=self.server.site_id, site_content_url=self.config.site ), - site_id=self.server.site_id, report=self.report, server=self.server, platform=self.platform, From a4adcf00a2eef3f79ef07ed67d98c3e9f546ef91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Tue, 24 Dec 2024 10:51:57 +0100 Subject: [PATCH 4/6] lint fix --- .../src/datahub/ingestion/source/tableau/tableau.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 323798dc4ba81b..a78fb8eaa9ab61 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -910,7 +910,9 @@ def dataset_browse_prefix(self) -> str: def _re_authenticate(self) -> None: if self.site_content_url: assert self.site_content_url is not None, "Site Content URL is required" - self.report.info(f"Re-authenticating to Tableau site='{self.site_content_url}'") + self.report.info( + f"Re-authenticating to Tableau site='{self.site_content_url}'" + ) # Sign-in again may not be enough because Tableau sometimes caches invalid sessions # so we need to recreate the Tableau Server object self.server = self.config.make_tableau_client(self.site_content_url) From 8e3fdc8c04a10258dfbfb5eb1f57d0cdad9188af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Tue, 24 Dec 2024 11:46:50 +0100 Subject: [PATCH 5/6] remove legacy and unnecessary @property site_content_url --- .../ingestion/source/tableau/tableau.py | 26 +++++-------------- .../tableau/test_tableau_ingest.py | 1 - 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 0d51e68f607508..508500ffe489b9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -911,25 +911,13 @@ def dataset_browse_prefix(self) -> str: return f"/{self.config.env.lower()}{self.no_env_browse_prefix}" def _re_authenticate(self) -> None: - if self.site_content_url: - assert self.site_content_url is not None, "Site Content URL is required" - self.report.info( - f"Re-authenticating to Tableau site='{self.site_content_url}'" - ) - # Sign-in again may not be enough because Tableau sometimes caches invalid sessions - # so we need to recreate the Tableau Server object - self.server = self.config.make_tableau_client(self.site_content_url) - else: - self.report.warning( - message="Site Content URL is not set. Unable to re-authenticate.", - context=f"site_id={self.site_id}, site={self.site}", - ) - - @property - def site_content_url(self) -> Optional[str]: - if self.site and self.site.content_url: - return self.site.content_url - return None + self.report.info( + message="Re-authenticating to Tableau", + context=f"site='{self.site_content_url}'", + ) + # Sign-in again may not be enough because Tableau sometimes caches invalid sessions + # so we need to recreate the Tableau Server object + self.server = self.config.make_tableau_client(self.site_content_url) def _populate_usage_stat_registry(self) -> None: if self.server is None: diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 97c7ba7c2c68a7..2c8e51d9e663b1 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -1010,7 +1010,6 @@ def check_lineage_metadata( ctx=context, platform="tableau", site=SiteIdContentUrl(site_id="id1", site_content_url="site1"), - site_id="site1", report=TableauSourceReport(), server=Server("https://test-tableau-server.com"), ) From acf8969adfe802219df6b55fab74a6cb1206af03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Tue, 24 Dec 2024 15:13:14 +0100 Subject: [PATCH 6/6] fix test --- .../tests/integration/tableau/test_tableau_ingest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 2c8e51d9e663b1..902ff243c802a8 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -1313,7 +1313,7 @@ def test_permission_warning(pytestconfig, tmp_path, mock_datahub_graph): platform="tableau", config=mock.MagicMock(), ctx=mock.MagicMock(), - site=mock.MagicMock(), + site=mock.MagicMock(spec=SiteItem, id="Site1", content_url="site1"), server=mock_sdk.return_value, report=reporter, ) @@ -1370,7 +1370,7 @@ def test_extract_project_hierarchy(extract_project_hierarchy, allowed_projects): config=config, ctx=context, platform="tableau", - site=SiteItem(name="Site 1", content_url="site1"), + site=mock.MagicMock(spec=SiteItem, id="Site1", content_url="site1"), report=TableauSourceReport(), server=Server("https://test-tableau-server.com"), )