Skip to content

Commit 0c588a1

Browse files
sid-acrylhsheth2
authored andcommitted
feat(ingest/tableau): verify role assignment to user in test_connection (datahub-project#12042)
Co-authored-by: Harshal Sheth <[email protected]>
1 parent 01d4f2c commit 0c588a1

File tree

5 files changed

+227
-47
lines changed

5 files changed

+227
-47
lines changed

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py

+42-2
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@
111111
tableau_field_to_schema_field,
112112
workbook_graphql_query,
113113
)
114+
from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
115+
from datahub.ingestion.source.tableau.tableau_validation import check_user_role
114116
from datahub.metadata.com.linkedin.pegasus2avro.common import (
115117
AuditStamp,
116118
ChangeAuditStamps,
@@ -167,7 +169,7 @@
167169

168170
try:
169171
# On earlier versions of the tableauserverclient, the NonXMLResponseError
170-
# was thrown when reauthentication was needed. We'll keep both exceptions
172+
# was thrown when reauthentication was necessary. We'll keep both exceptions
171173
# around for now, but can remove this in the future.
172174
from tableauserverclient.server.endpoint.exceptions import ( # type: ignore
173175
NotSignedInError,
@@ -632,6 +634,33 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
632634
num_upstream_table_lineage_failed_parse_sql: int = 0
633635
num_upstream_fine_grained_lineage_failed_parse_sql: int = 0
634636
num_hidden_assets_skipped: int = 0
637+
logged_in_user: List[UserInfo] = []
638+
639+
640+
def report_user_role(report: TableauSourceReport, server: Server) -> None:
641+
title: str = "Insufficient Permissions"
642+
message: str = "The user must have the `Site Administrator Explorer` role to perform metadata ingestion."
643+
try:
644+
# TableauSiteSource instance is per site, so each time we need to find-out user detail
645+
# the site-role might be different on another site
646+
logged_in_user: UserInfo = UserInfo.from_server(server=server)
647+
648+
if not logged_in_user.is_site_administrator_explorer():
649+
report.warning(
650+
title=title,
651+
message=message,
652+
context=f"user-name={logged_in_user.user_name}, role={logged_in_user.site_role}, site_id={logged_in_user.site_id}",
653+
)
654+
655+
report.logged_in_user.append(logged_in_user)
656+
657+
except Exception as e:
658+
report.warning(
659+
title=title,
660+
message="Failed to verify the user's role. The user must have `Site Administrator Explorer` role.",
661+
context=f"{e}",
662+
exc=e,
663+
)
635664

636665

637666
@platform_name("Tableau")
@@ -676,6 +705,7 @@ def _authenticate(self, site_content_url: str) -> None:
676705
try:
677706
logger.info(f"Authenticated to Tableau site: '{site_content_url}'")
678707
self.server = self.config.make_tableau_client(site_content_url)
708+
report_user_role(report=self.report, server=self.server)
679709
# Note that we're not catching ConfigurationError, since we want that to throw.
680710
except ValueError as e:
681711
self.report.failure(
@@ -689,9 +719,17 @@ def test_connection(config_dict: dict) -> TestConnectionReport:
689719
test_report = TestConnectionReport()
690720
try:
691721
source_config = TableauConfig.parse_obj_allow_extras(config_dict)
692-
source_config.make_tableau_client(source_config.site)
722+
723+
server = source_config.make_tableau_client(source_config.site)
724+
693725
test_report.basic_connectivity = CapabilityReport(capable=True)
726+
727+
test_report.capability_report = check_user_role(
728+
logged_in_user=UserInfo.from_server(server=server)
729+
)
730+
694731
except Exception as e:
732+
logger.warning(f"{e}", exc_info=e)
695733
test_report.basic_connectivity = CapabilityReport(
696734
capable=False, failure_reason=str(e)
697735
)
@@ -831,6 +869,8 @@ def __init__(
831869
# when emitting custom SQL data sources.
832870
self.custom_sql_ids_being_used: List[str] = []
833871

872+
report_user_role(report=report, server=server)
873+
834874
@property
835875
def no_env_browse_prefix(self) -> str:
836876
# Prefix to use with browse path (v1)

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py

+2
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,5 @@
8181
PROJECT = "Project"
8282
SITE = "Site"
8383
IS_UNSUPPORTED_CUSTOM_SQL = "isUnsupportedCustomSql"
84+
SITE_PERMISSION = "sitePermission"
85+
SITE_ROLE = "SiteAdministratorExplorer"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from dataclasses import dataclass
2+
3+
from tableauserverclient import Server, UserItem
4+
5+
from datahub.ingestion.source.tableau import tableau_constant as c
6+
7+
8+
@dataclass
9+
class UserInfo:
10+
user_name: str
11+
site_role: str
12+
site_id: str
13+
14+
def is_site_administrator_explorer(self):
15+
return self.site_role == c.SITE_ROLE
16+
17+
@staticmethod
18+
def from_server(server: Server) -> "UserInfo":
19+
assert server.user_id, "make the connection with tableau"
20+
21+
user: UserItem = server.users.get_by_id(server.user_id)
22+
23+
assert user.site_role, "site_role is not available" # to silent the lint
24+
25+
assert user.name, "user name is not available" # to silent the lint
26+
27+
assert server.site_id, "site identifier is not available" # to silent the lint
28+
29+
return UserInfo(
30+
user_name=user.name,
31+
site_role=user.site_role,
32+
site_id=server.site_id,
33+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import logging
2+
from typing import Dict, Union
3+
4+
from datahub.ingestion.api.source import CapabilityReport, SourceCapability
5+
from datahub.ingestion.source.tableau import tableau_constant as c
6+
from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
7+
8+
logger = logging.getLogger(__name__)
9+
10+
11+
def check_user_role(
12+
logged_in_user: UserInfo,
13+
) -> Dict[Union[SourceCapability, str], CapabilityReport]:
14+
capability_dict: Dict[Union[SourceCapability, str], CapabilityReport] = {
15+
c.SITE_PERMISSION: CapabilityReport(
16+
capable=True,
17+
)
18+
}
19+
20+
failure_reason: str = (
21+
"The user does not have the `Site Administrator Explorer` role."
22+
)
23+
24+
mitigation_message_prefix: str = (
25+
"Assign `Site Administrator Explorer` role to the user"
26+
)
27+
mitigation_message_suffix: str = "Refer to the setup guide: https://datahubproject.io/docs/quick-ingestion-guides/tableau/setup"
28+
29+
try:
30+
# TODO: Add check for `Enable Derived Permissions`
31+
if not logged_in_user.is_site_administrator_explorer():
32+
capability_dict[c.SITE_PERMISSION] = CapabilityReport(
33+
capable=False,
34+
failure_reason=f"{failure_reason} Their current role is {logged_in_user.site_role}.",
35+
mitigation_message=f"{mitigation_message_prefix} `{logged_in_user.user_name}`. {mitigation_message_suffix}",
36+
)
37+
38+
return capability_dict
39+
40+
except Exception as e:
41+
logger.warning(msg=e, exc_info=e)
42+
capability_dict[c.SITE_PERMISSION] = CapabilityReport(
43+
capable=False,
44+
failure_reason="Failed to verify user role.",
45+
mitigation_message=f"{mitigation_message_prefix}. {mitigation_message_suffix}", # user is unknown
46+
)
47+
48+
return capability_dict

metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py

+102-45
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import pytest
99
from freezegun import freeze_time
10+
from pydantic import ValidationError
1011
from requests.adapters import ConnectionError
1112
from tableauserverclient import PermissionsRule, Server
1213
from tableauserverclient.models import (
@@ -21,7 +22,9 @@
2122

2223
from datahub.emitter.mce_builder import DEFAULT_ENV, make_schema_field_urn
2324
from datahub.emitter.mcp import MetadataChangeProposalWrapper
24-
from datahub.ingestion.run.pipeline import Pipeline, PipelineContext, PipelineInitError
25+
from datahub.ingestion.api.source import TestConnectionReport
26+
from datahub.ingestion.run.pipeline import Pipeline, PipelineContext
27+
from datahub.ingestion.source.tableau import tableau_constant as c
2528
from datahub.ingestion.source.tableau.tableau import (
2629
TableauConfig,
2730
TableauSiteSource,
@@ -572,52 +575,28 @@ def test_extract_all_project(pytestconfig, tmp_path, mock_datahub_graph):
572575
def test_value_error_projects_and_project_pattern(
573576
pytestconfig, tmp_path, mock_datahub_graph
574577
):
575-
# Ingestion should raise ValueError
576-
output_file_name: str = "tableau_project_pattern_precedence_mces.json"
577-
golden_file_name: str = "tableau_project_pattern_precedence_mces_golden.json"
578-
579578
new_config = config_source_default.copy()
580579
new_config["projects"] = ["default"]
581580
new_config["project_pattern"] = {"allow": ["^Samples$"]}
582581

583582
with pytest.raises(
584-
PipelineInitError,
583+
ValidationError,
585584
match=r".*projects is deprecated. Please use project_path_pattern only.*",
586585
):
587-
tableau_ingest_common(
588-
pytestconfig,
589-
tmp_path,
590-
mock_data(),
591-
golden_file_name,
592-
output_file_name,
593-
mock_datahub_graph,
594-
pipeline_config=new_config,
595-
)
586+
TableauConfig.parse_obj(new_config)
596587

597588

598589
def test_project_pattern_deprecation(pytestconfig, tmp_path, mock_datahub_graph):
599-
# Ingestion should raise ValueError
600-
output_file_name: str = "tableau_project_pattern_deprecation_mces.json"
601-
golden_file_name: str = "tableau_project_pattern_deprecation_mces_golden.json"
602-
603590
new_config = config_source_default.copy()
604591
del new_config["projects"]
605592
new_config["project_pattern"] = {"allow": ["^Samples$"]}
606593
new_config["project_path_pattern"] = {"allow": ["^Samples$"]}
607594

608595
with pytest.raises(
609-
PipelineInitError,
596+
ValidationError,
610597
match=r".*project_pattern is deprecated. Please use project_path_pattern only*",
611598
):
612-
tableau_ingest_common(
613-
pytestconfig,
614-
tmp_path,
615-
mock_data(),
616-
golden_file_name,
617-
output_file_name,
618-
mock_datahub_graph,
619-
pipeline_config=new_config,
620-
)
599+
TableauConfig.parse_obj(new_config)
621600

622601

623602
def test_project_path_pattern_allow(pytestconfig, tmp_path, mock_datahub_graph):
@@ -1298,31 +1277,21 @@ def test_hidden_asset_tags(pytestconfig, tmp_path, mock_datahub_graph):
12981277
@pytest.mark.integration
12991278
def test_hidden_assets_without_ingest_tags(pytestconfig, tmp_path, mock_datahub_graph):
13001279
enable_logging()
1301-
output_file_name: str = "tableau_hidden_asset_tags_error_mces.json"
1302-
golden_file_name: str = "tableau_hidden_asset_tags_error_mces_golden.json"
13031280

13041281
new_config = config_source_default.copy()
13051282
new_config["tags_for_hidden_assets"] = ["hidden", "private"]
13061283
new_config["ingest_tags"] = False
13071284

13081285
with pytest.raises(
1309-
PipelineInitError,
1286+
ValidationError,
13101287
match=r".*tags_for_hidden_assets is only allowed with ingest_tags enabled.*",
13111288
):
1312-
tableau_ingest_common(
1313-
pytestconfig,
1314-
tmp_path,
1315-
mock_data(),
1316-
golden_file_name,
1317-
output_file_name,
1318-
mock_datahub_graph,
1319-
pipeline_config=new_config,
1320-
)
1289+
TableauConfig.parse_obj(new_config)
13211290

13221291

13231292
@freeze_time(FROZEN_TIME)
13241293
@pytest.mark.integration
1325-
def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_graph):
1294+
def test_permission_warning(pytestconfig, tmp_path, mock_datahub_graph):
13261295
with mock.patch(
13271296
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
13281297
mock_datahub_graph,
@@ -1359,11 +1328,99 @@ def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_gra
13591328

13601329
warnings = list(reporter.warnings)
13611330

1362-
assert len(warnings) == 1
1331+
assert len(warnings) == 2
1332+
1333+
assert warnings[0].title == "Insufficient Permissions"
13631334

1364-
assert warnings[0].title == "Derived Permission Error"
1335+
assert warnings[1].title == "Derived Permission Error"
13651336

1366-
assert warnings[0].message == (
1337+
assert warnings[1].message == (
13671338
"Turn on your derived permissions. See for details "
13681339
"https://community.tableau.com/s/question/0D54T00000QnjHbSAJ/how-to-fix-the-permissionsmodeswitched-error"
13691340
)
1341+
1342+
1343+
@freeze_time(FROZEN_TIME)
1344+
@pytest.mark.integration
1345+
def test_connection_report_test(requests_mock):
1346+
server_info_response = """
1347+
<tsResponse xmlns:t="http://tableau.com/api">
1348+
<t:serverInfo>
1349+
<t:productVersion build="build-number">foo</t:productVersion>
1350+
<t:restApiVersion>2.4</t:restApiVersion>
1351+
</t:serverInfo>
1352+
</tsResponse>
1353+
1354+
"""
1355+
1356+
requests_mock.register_uri(
1357+
"GET",
1358+
"https://do-not-connect/api/2.4/serverInfo",
1359+
text=server_info_response,
1360+
status_code=200,
1361+
headers={"Content-Type": "application/xml"},
1362+
)
1363+
1364+
signin_response = """
1365+
<tsResponse xmlns:t="http://tableau.com/api">
1366+
<t:credentials token="fake_token">
1367+
<t:site id="fake_site_luid" contentUrl="fake_site_content_url"/>
1368+
<t:user id="fake_user_id"/>
1369+
</t:credentials>
1370+
</tsResponse>
1371+
"""
1372+
1373+
requests_mock.register_uri(
1374+
"POST",
1375+
"https://do-not-connect/api/2.4/auth/signin",
1376+
text=signin_response,
1377+
status_code=200,
1378+
headers={"Content-Type": "application/xml"},
1379+
)
1380+
1381+
user_by_id_response = """
1382+
<tsResponse xmlns:t="http://tableau.com/api">
1383+
<t:user id="user-id" name="[email protected]" siteRole="SiteAdministratorExplorer" />
1384+
</tsResponse>
1385+
"""
1386+
1387+
requests_mock.register_uri(
1388+
"GET",
1389+
"https://do-not-connect/api/2.4/sites/fake_site_luid/users/fake_user_id",
1390+
text=user_by_id_response,
1391+
status_code=200,
1392+
headers={"Content-Type": "application/xml"},
1393+
)
1394+
1395+
report: TestConnectionReport = TableauSource.test_connection(config_source_default)
1396+
1397+
assert report
1398+
assert report.capability_report
1399+
assert report.capability_report.get(c.SITE_PERMISSION)
1400+
assert report.capability_report[c.SITE_PERMISSION].capable
1401+
1402+
# Role other than SiteAdministratorExplorer
1403+
user_by_id_response = """
1404+
<tsResponse xmlns:t="http://tableau.com/api">
1405+
<t:user id="user-id" name="[email protected]" siteRole="Explorer" />
1406+
</tsResponse>
1407+
"""
1408+
1409+
requests_mock.register_uri(
1410+
"GET",
1411+
"https://do-not-connect/api/2.4/sites/fake_site_luid/users/fake_user_id",
1412+
text=user_by_id_response,
1413+
status_code=200,
1414+
headers={"Content-Type": "application/xml"},
1415+
)
1416+
1417+
report = TableauSource.test_connection(config_source_default)
1418+
1419+
assert report
1420+
assert report.capability_report
1421+
assert report.capability_report.get(c.SITE_PERMISSION)
1422+
assert report.capability_report[c.SITE_PERMISSION].capable is False
1423+
assert (
1424+
report.capability_report[c.SITE_PERMISSION].failure_reason
1425+
== "The user does not have the `Site Administrator Explorer` role. Their current role is Explorer."
1426+
)

0 commit comments

Comments
 (0)