|
49 | 49 | DatasetSourceConfigMixin,
|
50 | 50 | )
|
51 | 51 | from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
|
| 52 | +from datahub.configuration.validate_field_removal import pydantic_removed_field |
52 | 53 | from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
53 | 54 | from datahub.emitter.mcp_builder import (
|
54 | 55 | ContainerKey,
|
@@ -380,11 +381,6 @@ class TableauConfig(
|
380 | 381 | description="[advanced] Number of metadata objects (e.g. CustomSQLTable, PublishedDatasource, etc) to query at a time using the Tableau API.",
|
381 | 382 | )
|
382 | 383 |
|
383 |
| - fetch_size: int = Field( |
384 |
| - default=250, |
385 |
| - description="Specifies the number of records to retrieve in each batch during a query execution.", |
386 |
| - ) |
387 |
| - |
388 | 384 | # We've found that even with a small workbook page size (e.g. 10), the Tableau API often
|
389 | 385 | # returns warnings like this:
|
390 | 386 | # {
|
@@ -499,6 +495,10 @@ class TableauConfig(
|
499 | 495 | "This can only be used with ingest_tags enabled as it will overwrite tags entered from the UI.",
|
500 | 496 | )
|
501 | 497 |
|
| 498 | + _fetch_size = pydantic_removed_field( |
| 499 | + "fetch_size", |
| 500 | + ) |
| 501 | + |
502 | 502 | # pre = True because we want to take some decision before pydantic initialize the configuration to default values
|
503 | 503 | @root_validator(pre=True)
|
504 | 504 | def projects_backward_compatibility(cls, values: Dict) -> Dict:
|
@@ -1147,7 +1147,7 @@ def get_connection_object_page(
|
1147 | 1147 | connection_type: str,
|
1148 | 1148 | query_filter: str,
|
1149 | 1149 | current_cursor: Optional[str],
|
1150 |
| - fetch_size: int = 250, |
| 1150 | + fetch_size: int, |
1151 | 1151 | retry_on_auth_error: bool = True,
|
1152 | 1152 | retries_remaining: Optional[int] = None,
|
1153 | 1153 | ) -> Tuple[dict, Optional[str], int]:
|
@@ -1344,7 +1344,11 @@ def get_connection_objects(
|
1344 | 1344 | connection_type=connection_type,
|
1345 | 1345 | query_filter=filter_,
|
1346 | 1346 | current_cursor=current_cursor,
|
1347 |
| - fetch_size=self.config.fetch_size, |
| 1347 | + # `filter_page` contains metadata object IDs (e.g., Project IDs, Field IDs, Sheet IDs, etc.). |
| 1348 | + # The number of IDs is always less than or equal to page_size. |
| 1349 | + # If the IDs are primary keys, the number of metadata objects to load matches the number of records to return. |
| 1350 | + # In our case, mostly, the IDs are primary key, therefore, fetch_size is set equal to page_size. |
| 1351 | + fetch_size=page_size, |
1348 | 1352 | )
|
1349 | 1353 |
|
1350 | 1354 | yield from connection_objects.get(c.NODES) or []
|
|
0 commit comments