Skip to content

Commit 98c056d

Browse files
authored
refactor(ingest/tableau): mark the fetch_size configuration as deprecated (#12126)
1 parent e52a4de commit 98c056d

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py

+11-7
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
DatasetSourceConfigMixin,
5050
)
5151
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
52+
from datahub.configuration.validate_field_removal import pydantic_removed_field
5253
from datahub.emitter.mcp import MetadataChangeProposalWrapper
5354
from datahub.emitter.mcp_builder import (
5455
ContainerKey,
@@ -380,11 +381,6 @@ class TableauConfig(
380381
description="[advanced] Number of metadata objects (e.g. CustomSQLTable, PublishedDatasource, etc) to query at a time using the Tableau API.",
381382
)
382383

383-
fetch_size: int = Field(
384-
default=250,
385-
description="Specifies the number of records to retrieve in each batch during a query execution.",
386-
)
387-
388384
# We've found that even with a small workbook page size (e.g. 10), the Tableau API often
389385
# returns warnings like this:
390386
# {
@@ -499,6 +495,10 @@ class TableauConfig(
499495
"This can only be used with ingest_tags enabled as it will overwrite tags entered from the UI.",
500496
)
501497

498+
_fetch_size = pydantic_removed_field(
499+
"fetch_size",
500+
)
501+
502502
# pre = True because we want to take some decision before pydantic initialize the configuration to default values
503503
@root_validator(pre=True)
504504
def projects_backward_compatibility(cls, values: Dict) -> Dict:
@@ -1147,7 +1147,7 @@ def get_connection_object_page(
11471147
connection_type: str,
11481148
query_filter: str,
11491149
current_cursor: Optional[str],
1150-
fetch_size: int = 250,
1150+
fetch_size: int,
11511151
retry_on_auth_error: bool = True,
11521152
retries_remaining: Optional[int] = None,
11531153
) -> Tuple[dict, Optional[str], int]:
@@ -1344,7 +1344,11 @@ def get_connection_objects(
13441344
connection_type=connection_type,
13451345
query_filter=filter_,
13461346
current_cursor=current_cursor,
1347-
fetch_size=self.config.fetch_size,
1347+
# `filter_page` contains metadata object IDs (e.g., Project IDs, Field IDs, Sheet IDs, etc.).
1348+
# The number of IDs is always less than or equal to page_size.
1349+
# If the IDs are primary keys, the number of metadata objects to load matches the number of records to return.
1350+
# In our case, mostly, the IDs are primary key, therefore, fetch_size is set equal to page_size.
1351+
fetch_size=page_size,
13481352
)
13491353

13501354
yield from connection_objects.get(c.NODES) or []

metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py

+1
Original file line numberDiff line numberDiff line change
@@ -1324,6 +1324,7 @@ def test_permission_warning(pytestconfig, tmp_path, mock_datahub_graph):
13241324
query_filter=mock.MagicMock(),
13251325
current_cursor=None,
13261326
retries_remaining=1,
1327+
fetch_size=10,
13271328
)
13281329

13291330
warnings = list(reporter.warnings)

0 commit comments

Comments
 (0)