Skip to content

Commit 76ac477

Browse files
sid-acrylsleeperdeep
authored andcommitted
fix(ingest/tableau): honor the key projectNameWithin in pagination (datahub-project#12107)
1 parent 0a1cc49 commit 76ac477

File tree

4 files changed

+22
-8
lines changed

4 files changed

+22
-8
lines changed

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py

-1
Original file line numberDiff line numberDiff line change
@@ -1290,7 +1290,6 @@ def get_connection_objects(
12901290
page_size = page_size_override or self.config.page_size
12911291

12921292
filter_pages = get_filter_pages(query_filter, page_size)
1293-
12941293
for filter_page in filter_pages:
12951294
has_next_page = 1
12961295
current_cursor: Optional[str] = None

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -975,15 +975,22 @@ def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
975975
# a few ten thousand, then tableau server responds with empty response
976976
# causing below error:
977977
# tableauserverclient.server.endpoint.exceptions.NonXMLResponseError: b''
978+
979+
# in practice, we only do pagination if len(query_filter.keys()) == 1
980+
if len(query_filter.keys()) != 1:
981+
return filter_pages
982+
983+
current_key = (list(query_filter.keys()))[0]
984+
978985
if (
979-
len(query_filter.keys()) == 1
980-
and query_filter.get(c.ID_WITH_IN)
981-
and isinstance(query_filter[c.ID_WITH_IN], list)
986+
current_key in [c.ID_WITH_IN, c.PROJECT_NAME_WITH_IN]
987+
and query_filter.get(current_key)
988+
and isinstance(query_filter[current_key], list)
982989
):
983-
ids = query_filter[c.ID_WITH_IN]
990+
ids = query_filter[current_key]
984991
filter_pages = [
985992
{
986-
c.ID_WITH_IN: ids[
993+
current_key: ids[
987994
start : (
988995
start + page_size if start + page_size < len(ids) else len(ids)
989996
)

metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py

+2
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
"projects": ["default", "Project 2", "Samples"],
6262
"extract_project_hierarchy": False,
6363
"page_size": 1000,
64+
"workbook_page_size": 1000,
6465
"ingest_tags": True,
6566
"ingest_owner": True,
6667
"ingest_tables_external": True,
@@ -674,6 +675,7 @@ def test_tableau_ingest_with_platform_instance(
674675
"platform_instance": "acryl_site1",
675676
"projects": ["default", "Project 2"],
676677
"page_size": 1000,
678+
"workbook_page_size": 1000,
677679
"ingest_tags": True,
678680
"ingest_owner": True,
679681
"ingest_tables_external": True,

metadata-ingestion/tests/unit/test_tableau_source.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,14 @@ def test_get_filter_pages_simple():
182182
assert get_filter_pages(filter_dict, 10) == [filter_dict]
183183

184184

185-
def test_get_filter_pages_non_id_large_filter_passthrough():
186-
projects = [f"project{i}" for i in range(20000)]
185+
def test_get_filter_pages_non_id_large_filter():
186+
projects = [f"project{i}" for i in range(10)]
187+
filter_dict = {c.PROJECT_NAME_WITH_IN: projects}
188+
assert get_filter_pages(filter_dict, 10) == [filter_dict]
189+
190+
191+
def test_get_filter_pages_for_single_key():
192+
projects = ["project1"]
187193
filter_dict = {c.PROJECT_NAME_WITH_IN: projects}
188194
assert get_filter_pages(filter_dict, 10) == [filter_dict]
189195

0 commit comments

Comments
 (0)