Skip to content

Commit

Permalink
refactor(ingest/tableau): mark the fetch_size configuration as depr…
Browse files Browse the repository at this point in the history
…ecated (#12126)
  • Loading branch information
sid-acryl authored Dec 20, 2024
1 parent e52a4de commit 98c056d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
18 changes: 11 additions & 7 deletions metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
DatasetSourceConfigMixin,
)
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_removal import pydantic_removed_field
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.mcp_builder import (
ContainerKey,
Expand Down Expand Up @@ -380,11 +381,6 @@ class TableauConfig(
description="[advanced] Number of metadata objects (e.g. CustomSQLTable, PublishedDatasource, etc) to query at a time using the Tableau API.",
)

fetch_size: int = Field(
default=250,
description="Specifies the number of records to retrieve in each batch during a query execution.",
)

# We've found that even with a small workbook page size (e.g. 10), the Tableau API often
# returns warnings like this:
# {
Expand Down Expand Up @@ -499,6 +495,10 @@ class TableauConfig(
"This can only be used with ingest_tags enabled as it will overwrite tags entered from the UI.",
)

_fetch_size = pydantic_removed_field(
"fetch_size",
)

# pre = True because we want to take some decision before pydantic initialize the configuration to default values
@root_validator(pre=True)
def projects_backward_compatibility(cls, values: Dict) -> Dict:
Expand Down Expand Up @@ -1147,7 +1147,7 @@ def get_connection_object_page(
connection_type: str,
query_filter: str,
current_cursor: Optional[str],
fetch_size: int = 250,
fetch_size: int,
retry_on_auth_error: bool = True,
retries_remaining: Optional[int] = None,
) -> Tuple[dict, Optional[str], int]:
Expand Down Expand Up @@ -1344,7 +1344,11 @@ def get_connection_objects(
connection_type=connection_type,
query_filter=filter_,
current_cursor=current_cursor,
fetch_size=self.config.fetch_size,
# `filter_page` contains metadata object IDs (e.g., Project IDs, Field IDs, Sheet IDs, etc.).
# The number of IDs is always less than or equal to page_size.
# If the IDs are primary keys, the number of metadata objects to load matches the number of records to return.
# In our case, mostly, the IDs are primary key, therefore, fetch_size is set equal to page_size.
fetch_size=page_size,
)

yield from connection_objects.get(c.NODES) or []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1324,6 +1324,7 @@ def test_permission_warning(pytestconfig, tmp_path, mock_datahub_graph):
query_filter=mock.MagicMock(),
current_cursor=None,
retries_remaining=1,
fetch_size=10,
)

warnings = list(reporter.warnings)
Expand Down

0 comments on commit 98c056d

Please sign in to comment.