diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py index e1301edef10b8..161975fa635fd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py @@ -84,13 +84,14 @@ def __init__( tenant_id: str, metadata_api_timeout: int, ): - self.__access_token: Optional[str] = None - self.__access_token_expiry_time: Optional[datetime] = None - self.__tenant_id = tenant_id + self._access_token: Optional[str] = None + self._access_token_expiry_time: Optional[datetime] = None + + self._tenant_id = tenant_id # Test connection by generating access token logger.info(f"Trying to connect to {self._get_authority_url()}") # Power-Bi Auth (Service Principal Auth) - self.__msal_client = msal.ConfidentialClientApplication( + self._msal_client = msal.ConfidentialClientApplication( client_id, client_credential=client_secret, authority=DataResolverBase.AUTHORITY + tenant_id, @@ -168,18 +169,18 @@ def _get_app( pass def _get_authority_url(self): - return f"{DataResolverBase.AUTHORITY}{self.__tenant_id}" + return f"{DataResolverBase.AUTHORITY}{self._tenant_id}" def get_authorization_header(self): return {Constant.Authorization: self.get_access_token()} - def get_access_token(self): - if self.__access_token is not None and not self._is_access_token_expired(): - return self.__access_token + def get_access_token(self) -> str: + if self._access_token is not None and not self._is_access_token_expired(): + return self._access_token logger.info("Generating PowerBi access token") - auth_response = self.__msal_client.acquire_token_for_client( + auth_response = self._msal_client.acquire_token_for_client( scopes=[DataResolverBase.SCOPE] ) @@ -193,24 +194,24 @@ def get_access_token(self): logger.info("Generated PowerBi access token") - self.__access_token = "Bearer {}".format( + self._access_token = "Bearer {}".format( auth_response.get(Constant.ACCESS_TOKEN) ) safety_gap = 300 - self.__access_token_expiry_time = datetime.now() + timedelta( + self._access_token_expiry_time = datetime.now() + timedelta( seconds=( max(auth_response.get(Constant.ACCESS_TOKEN_EXPIRY, 0) - safety_gap, 0) ) ) - logger.debug(f"{Constant.PBIAccessToken}={self.__access_token}") + logger.debug(f"{Constant.PBIAccessToken}={self._access_token}") - return self.__access_token + return self._access_token def _is_access_token_expired(self) -> bool: - if not self.__access_token_expiry_time: + if not self._access_token_expiry_time: return True - return self.__access_token_expiry_time < datetime.now() + return self._access_token_expiry_time < datetime.now() def get_dashboards(self, workspace: Workspace) -> List[Dashboard]: """ diff --git a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py index b636c12cfda06..00dc79ed38cfb 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py +++ b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py @@ -1,5 +1,3 @@ -import logging -import sys from typing import Any, Dict from unittest import mock @@ -483,12 +481,6 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None ) -def enable_logging(): - # set logging to console - logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) - logging.getLogger().setLevel(logging.DEBUG) - - def mock_msal_cca(*args, **kwargs): class MsalClient: def acquire_token_for_client(self, *args, **kwargs): @@ -527,8 +519,6 @@ def default_source_config(): @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) def test_admin_only_apis(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock): - enable_logging() - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_admin_api(request_mock=requests_mock) @@ -567,8 +557,6 @@ def test_admin_only_apis(mock_msal, pytestconfig, tmp_path, mock_time, requests_ def test_most_config_and_modified_since( mock_msal, pytestconfig, tmp_path, mock_time, requests_mock ): - enable_logging() - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_admin_api(request_mock=requests_mock) diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index edde11ff87d29..739be7cc8408d 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -1,8 +1,6 @@ import datetime import json -import logging import re -import sys from pathlib import Path from typing import Any, Dict, List, Optional, Union, cast from unittest import mock @@ -31,29 +29,21 @@ FROZEN_TIME = "2022-02-03 07:00:00" -def enable_logging(): - # set logging to console - logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) - logging.getLogger().setLevel(logging.DEBUG) - - -class MsalClient: - call_num = 0 - token: Dict[str, Any] = { - "access_token": "dummy", - } - - @staticmethod - def acquire_token_for_client(*args, **kwargs): - MsalClient.call_num += 1 - return MsalClient.token +def mock_msal_cca(*args, **kwargs): + class MsalClient: + def __init__(self): + self.call_num = 0 + self.token: Dict[str, Any] = { + "access_token": "dummy", + } - @staticmethod - def reset(): - MsalClient.call_num = 0 + def acquire_token_for_client(self, *args, **kwargs): + self.call_num += 1 + return self.token + def reset(self): + self.call_num = 0 -def mock_msal_cca(*args, **kwargs): return MsalClient() @@ -154,8 +144,6 @@ def test_powerbi_ingest( mock_time: datetime.datetime, requests_mock: Any, ) -> None: - enable_logging() - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) @@ -199,8 +187,6 @@ def test_powerbi_workspace_type_filter( mock_time: datetime.datetime, requests_mock: Any, ) -> None: - enable_logging() - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api( @@ -260,8 +246,6 @@ def test_powerbi_ingest_patch_disabled( mock_time: datetime.datetime, requests_mock: Any, ) -> None: - enable_logging() - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) @@ -327,8 +311,6 @@ def test_powerbi_platform_instance_ingest( mock_time: datetime.datetime, requests_mock: Any, ) -> None: - enable_logging() - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) @@ -515,8 +497,6 @@ def test_extract_reports( mock_time: datetime.datetime, requests_mock: Any, ) -> None: - enable_logging() - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) @@ -561,8 +541,6 @@ def test_extract_lineage( mock_time: datetime.datetime, requests_mock: Any, ) -> None: - enable_logging() - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) @@ -660,8 +638,6 @@ def test_admin_access_is_not_allowed( mock_time: datetime.datetime, requests_mock: Any, ) -> None: - enable_logging() - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api( @@ -723,8 +699,6 @@ def test_workspace_container( mock_time: datetime.datetime, requests_mock: Any, ) -> None: - enable_logging() - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) @@ -764,85 +738,84 @@ def test_workspace_container( ) -@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) def test_access_token_expiry_with_long_expiry( - mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: - enable_logging() - register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) - pipeline = Pipeline.create( - { - "run_id": "powerbi-test", - "source": { - "type": "powerbi", - "config": { - **default_source_config(), + mock_msal = mock_msal_cca() + + with mock.patch("msal.ConfidentialClientApplication", return_value=mock_msal): + pipeline = Pipeline.create( + { + "run_id": "powerbi-test", + "source": { + "type": "powerbi", + "config": { + **default_source_config(), + }, }, - }, - "sink": { - "type": "file", - "config": { - "filename": f"{tmp_path}/powerbi_access_token_mces.json", + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/powerbi_access_token_mces.json", + }, }, - }, - } - ) + } + ) # for long expiry, the token should only be requested once. - MsalClient.token = { + mock_msal.token = { "access_token": "dummy2", "expires_in": 3600, } + mock_msal.reset() - MsalClient.reset() pipeline.run() # We expect the token to be requested twice (once for AdminApiResolver and one for RegularApiResolver) - assert MsalClient.call_num == 2 + assert mock_msal.call_num == 2 -@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) def test_access_token_expiry_with_short_expiry( - mock_msal: MagicMock, pytestconfig: pytest.Config, tmp_path: str, mock_time: datetime.datetime, requests_mock: Any, ) -> None: - enable_logging() - register_mock_api(pytestconfig=pytestconfig, request_mock=requests_mock) - pipeline = Pipeline.create( - { - "run_id": "powerbi-test", - "source": { - "type": "powerbi", - "config": { - **default_source_config(), + mock_msal = mock_msal_cca() + with mock.patch("msal.ConfidentialClientApplication", return_value=mock_msal): + pipeline = Pipeline.create( + { + "run_id": "powerbi-test", + "source": { + "type": "powerbi", + "config": { + **default_source_config(), + }, }, - }, - "sink": { - "type": "file", - "config": { - "filename": f"{tmp_path}/powerbi_access_token_mces.json", + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/powerbi_access_token_mces.json", + }, }, - }, - } - ) + } + ) # for short expiry, the token should be requested when expires. - MsalClient.token = { + mock_msal.token = { "access_token": "dummy", "expires_in": 0, } + mock_msal.reset() + pipeline.run() - assert MsalClient.call_num > 2 + assert mock_msal.call_num > 2 def dataset_type_mapping_set_to_all_platform(pipeline: Pipeline) -> None: @@ -940,8 +913,6 @@ def test_dataset_type_mapping_error( def test_server_to_platform_map( mock_msal, pytestconfig, tmp_path, mock_time, requests_mock ): - enable_logging() - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" new_config: dict = { **default_source_config(), @@ -1416,8 +1387,6 @@ def test_powerbi_cross_workspace_reference_info_message( mock_time: datetime.datetime, requests_mock: Any, ) -> None: - enable_logging() - register_mock_api( pytestconfig=pytestconfig, request_mock=requests_mock, @@ -1495,8 +1464,6 @@ def common_app_ingest( output_mcp_path: str, override_config: dict = {}, ) -> Pipeline: - enable_logging() - register_mock_api( pytestconfig=pytestconfig, request_mock=requests_mock, diff --git a/metadata-ingestion/tests/integration/powerbi/test_profiling.py b/metadata-ingestion/tests/integration/powerbi/test_profiling.py index 4b48bed003b1e..78d35cf31a26d 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_profiling.py +++ b/metadata-ingestion/tests/integration/powerbi/test_profiling.py @@ -1,5 +1,3 @@ -import logging -import sys from typing import Any, Dict from unittest import mock @@ -271,12 +269,6 @@ def register_mock_admin_api(request_mock: Any, override_data: dict = {}) -> None ) -def enable_logging(): - # set logging to console - logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) - logging.getLogger().setLevel(logging.DEBUG) - - def mock_msal_cca(*args, **kwargs): class MsalClient: def acquire_token_for_client(self, *args, **kwargs): @@ -311,8 +303,6 @@ def default_source_config(): @freeze_time(FROZEN_TIME) @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca) def test_profiling(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock): - enable_logging() - test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi" register_mock_admin_api(request_mock=requests_mock) diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 902ff243c802a..71e5ad10c2fc5 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -1,7 +1,5 @@ import json -import logging import pathlib -import sys from typing import Any, Dict, List, cast from unittest import mock @@ -88,12 +86,6 @@ } -def enable_logging(): - # set logging to console - logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) - logging.getLogger().setLevel(logging.DEBUG) - - def read_response(file_name): response_json_path = f"{test_resources_dir}/setup/{file_name}" with open(response_json_path) as file: @@ -376,7 +368,6 @@ def tableau_ingest_common( @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_tableau_ingest(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_mces.json" golden_file_name: str = "tableau_mces_golden.json" tableau_ingest_common( @@ -454,7 +445,6 @@ def mock_data() -> List[dict]: @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_tableau_cll_ingest(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_mces_cll.json" golden_file_name: str = "tableau_cll_mces_golden.json" @@ -481,7 +471,6 @@ def test_tableau_cll_ingest(pytestconfig, tmp_path, mock_datahub_graph): @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_project_pattern(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_project_pattern_mces.json" golden_file_name: str = "tableau_mces_golden.json" @@ -505,7 +494,6 @@ def test_project_pattern(pytestconfig, tmp_path, mock_datahub_graph): @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_project_path_pattern(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_project_path_mces.json" golden_file_name: str = "tableau_project_path_mces_golden.json" @@ -529,8 +517,6 @@ def test_project_path_pattern(pytestconfig, tmp_path, mock_datahub_graph): @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_project_hierarchy(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() - output_file_name: str = "tableau_nested_project_mces.json" golden_file_name: str = "tableau_nested_project_mces_golden.json" @@ -554,7 +540,6 @@ def test_project_hierarchy(pytestconfig, tmp_path, mock_datahub_graph): @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_extract_all_project(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_extract_all_project_mces.json" golden_file_name: str = "tableau_extract_all_project_mces_golden.json" @@ -644,7 +629,6 @@ def test_project_path_pattern_deny(pytestconfig, tmp_path, mock_datahub_graph): def test_tableau_ingest_with_platform_instance( pytestconfig, tmp_path, mock_datahub_graph ): - enable_logging() output_file_name: str = "tableau_with_platform_instance_mces.json" golden_file_name: str = "tableau_with_platform_instance_mces_golden.json" @@ -691,7 +675,6 @@ def test_tableau_ingest_with_platform_instance( def test_lineage_overrides(): - enable_logging() # Simple - specify platform instance to presto table assert ( TableauUpstreamReference( @@ -745,7 +728,6 @@ def test_lineage_overrides(): def test_database_hostname_to_platform_instance_map(): - enable_logging() # Simple - snowflake table assert ( TableauUpstreamReference( @@ -916,7 +898,6 @@ def test_tableau_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph) def test_tableau_no_verify(): - enable_logging() # This test ensures that we can connect to a self-signed certificate # when ssl_verify is set to False. @@ -941,7 +922,6 @@ def test_tableau_no_verify(): @freeze_time(FROZEN_TIME) @pytest.mark.integration_batch_2 def test_tableau_signout_timeout(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_signout_timeout_mces.json" golden_file_name: str = "tableau_signout_timeout_mces_golden.json" tableau_ingest_common( @@ -1073,7 +1053,6 @@ def test_get_all_datasources_failure(pytestconfig, tmp_path, mock_datahub_graph) @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_tableau_ingest_multiple_sites(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_mces_multiple_sites.json" golden_file_name: str = "tableau_multiple_sites_mces_golden.json" @@ -1135,7 +1114,6 @@ def test_tableau_ingest_multiple_sites(pytestconfig, tmp_path, mock_datahub_grap @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_tableau_ingest_sites_as_container(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_mces_ingest_sites_as_container.json" golden_file_name: str = "tableau_sites_as_container_mces_golden.json" @@ -1159,7 +1137,6 @@ def test_tableau_ingest_sites_as_container(pytestconfig, tmp_path, mock_datahub_ @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_site_name_pattern(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_site_name_pattern_mces.json" golden_file_name: str = "tableau_site_name_pattern_mces_golden.json" @@ -1183,7 +1160,6 @@ def test_site_name_pattern(pytestconfig, tmp_path, mock_datahub_graph): @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_permission_ingestion(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_permission_ingestion_mces.json" golden_file_name: str = "tableau_permission_ingestion_mces_golden.json" @@ -1209,7 +1185,6 @@ def test_permission_ingestion(pytestconfig, tmp_path, mock_datahub_graph): @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_no_hidden_assets(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_no_hidden_assets_mces.json" golden_file_name: str = "tableau_no_hidden_assets_mces_golden.json" @@ -1232,7 +1207,6 @@ def test_no_hidden_assets(pytestconfig, tmp_path, mock_datahub_graph): @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_ingest_tags_disabled(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_ingest_tags_disabled_mces.json" golden_file_name: str = "tableau_ingest_tags_disabled_mces_golden.json" @@ -1254,7 +1228,6 @@ def test_ingest_tags_disabled(pytestconfig, tmp_path, mock_datahub_graph): @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_hidden_asset_tags(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() output_file_name: str = "tableau_hidden_asset_tags_mces.json" golden_file_name: str = "tableau_hidden_asset_tags_mces_golden.json" @@ -1277,8 +1250,6 @@ def test_hidden_asset_tags(pytestconfig, tmp_path, mock_datahub_graph): @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_hidden_assets_without_ingest_tags(pytestconfig, tmp_path, mock_datahub_graph): - enable_logging() - new_config = config_source_default.copy() new_config["tags_for_hidden_assets"] = ["hidden", "private"] new_config["ingest_tags"] = False