diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
index 6844b8a425a7b6..5790b3765077ef 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
@@ -111,6 +111,8 @@
tableau_field_to_schema_field,
workbook_graphql_query,
)
+from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
+from datahub.ingestion.source.tableau.tableau_validation import check_user_role
from datahub.metadata.com.linkedin.pegasus2avro.common import (
AuditStamp,
ChangeAuditStamps,
@@ -167,7 +169,7 @@
try:
# On earlier versions of the tableauserverclient, the NonXMLResponseError
- # was thrown when reauthentication was needed. We'll keep both exceptions
+ # was thrown when reauthentication was necessary. We'll keep both exceptions
# around for now, but can remove this in the future.
from tableauserverclient.server.endpoint.exceptions import ( # type: ignore
NotSignedInError,
@@ -632,6 +634,33 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
num_upstream_table_lineage_failed_parse_sql: int = 0
num_upstream_fine_grained_lineage_failed_parse_sql: int = 0
num_hidden_assets_skipped: int = 0
+ logged_in_user: List[UserInfo] = []
+
+
+def report_user_role(report: TableauSourceReport, server: Server) -> None:
+ title: str = "Insufficient Permissions"
+ message: str = "The user must have the `Site Administrator Explorer` role to perform metadata ingestion."
+ try:
+ # TableauSiteSource instance is per site, so each time we need to find-out user detail
+ # the site-role might be different on another site
+ logged_in_user: UserInfo = UserInfo.from_server(server=server)
+
+ if not logged_in_user.is_site_administrator_explorer():
+ report.warning(
+ title=title,
+ message=message,
+ context=f"user-name={logged_in_user.user_name}, role={logged_in_user.site_role}, site_id={logged_in_user.site_id}",
+ )
+
+ report.logged_in_user.append(logged_in_user)
+
+ except Exception as e:
+ report.warning(
+ title=title,
+ message="Failed to verify the user's role. The user must have `Site Administrator Explorer` role.",
+ context=f"{e}",
+ exc=e,
+ )
@platform_name("Tableau")
@@ -676,6 +705,7 @@ def _authenticate(self, site_content_url: str) -> None:
try:
logger.info(f"Authenticated to Tableau site: '{site_content_url}'")
self.server = self.config.make_tableau_client(site_content_url)
+ report_user_role(report=self.report, server=self.server)
# Note that we're not catching ConfigurationError, since we want that to throw.
except ValueError as e:
self.report.failure(
@@ -689,9 +719,17 @@ def test_connection(config_dict: dict) -> TestConnectionReport:
test_report = TestConnectionReport()
try:
source_config = TableauConfig.parse_obj_allow_extras(config_dict)
- source_config.make_tableau_client(source_config.site)
+
+ server = source_config.make_tableau_client(source_config.site)
+
test_report.basic_connectivity = CapabilityReport(capable=True)
+
+ test_report.capability_report = check_user_role(
+ logged_in_user=UserInfo.from_server(server=server)
+ )
+
except Exception as e:
+ logger.warning(f"{e}", exc_info=e)
test_report.basic_connectivity = CapabilityReport(
capable=False, failure_reason=str(e)
)
@@ -831,6 +869,8 @@ def __init__(
# when emitting custom SQL data sources.
self.custom_sql_ids_being_used: List[str] = []
+ report_user_role(report=report, server=server)
+
@property
def no_env_browse_prefix(self) -> str:
# Prefix to use with browse path (v1)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py
index d1dd0d92819991..ea0878143ef354 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py
@@ -81,3 +81,5 @@
PROJECT = "Project"
SITE = "Site"
IS_UNSUPPORTED_CUSTOM_SQL = "isUnsupportedCustomSql"
+SITE_PERMISSION = "sitePermission"
+SITE_ROLE = "SiteAdministratorExplorer"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py
new file mode 100644
index 00000000000000..f309622d12b91b
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py
@@ -0,0 +1,33 @@
+from dataclasses import dataclass
+
+from tableauserverclient import Server, UserItem
+
+from datahub.ingestion.source.tableau import tableau_constant as c
+
+
+@dataclass
+class UserInfo:
+ user_name: str
+ site_role: str
+ site_id: str
+
+ def is_site_administrator_explorer(self):
+ return self.site_role == c.SITE_ROLE
+
+ @staticmethod
+ def from_server(server: Server) -> "UserInfo":
+ assert server.user_id, "make the connection with tableau"
+
+ user: UserItem = server.users.get_by_id(server.user_id)
+
+ assert user.site_role, "site_role is not available" # to silent the lint
+
+ assert user.name, "user name is not available" # to silent the lint
+
+ assert server.site_id, "site identifier is not available" # to silent the lint
+
+ return UserInfo(
+ user_name=user.name,
+ site_role=user.site_role,
+ site_id=server.site_id,
+ )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py
new file mode 100644
index 00000000000000..4a703faf6091b3
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py
@@ -0,0 +1,48 @@
+import logging
+from typing import Dict, Union
+
+from datahub.ingestion.api.source import CapabilityReport, SourceCapability
+from datahub.ingestion.source.tableau import tableau_constant as c
+from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
+
+logger = logging.getLogger(__name__)
+
+
+def check_user_role(
+ logged_in_user: UserInfo,
+) -> Dict[Union[SourceCapability, str], CapabilityReport]:
+ capability_dict: Dict[Union[SourceCapability, str], CapabilityReport] = {
+ c.SITE_PERMISSION: CapabilityReport(
+ capable=True,
+ )
+ }
+
+ failure_reason: str = (
+ "The user does not have the `Site Administrator Explorer` role."
+ )
+
+ mitigation_message_prefix: str = (
+ "Assign `Site Administrator Explorer` role to the user"
+ )
+ mitigation_message_suffix: str = "Refer to the setup guide: https://datahubproject.io/docs/quick-ingestion-guides/tableau/setup"
+
+ try:
+ # TODO: Add check for `Enable Derived Permissions`
+ if not logged_in_user.is_site_administrator_explorer():
+ capability_dict[c.SITE_PERMISSION] = CapabilityReport(
+ capable=False,
+ failure_reason=f"{failure_reason} Their current role is {logged_in_user.site_role}.",
+ mitigation_message=f"{mitigation_message_prefix} `{logged_in_user.user_name}`. {mitigation_message_suffix}",
+ )
+
+ return capability_dict
+
+ except Exception as e:
+ logger.warning(msg=e, exc_info=e)
+ capability_dict[c.SITE_PERMISSION] = CapabilityReport(
+ capable=False,
+ failure_reason="Failed to verify user role.",
+ mitigation_message=f"{mitigation_message_prefix}. {mitigation_message_suffix}", # user is unknown
+ )
+
+ return capability_dict
diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
index 5b557efdab0bb0..d6259a472b59e5 100644
--- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
+++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
@@ -7,6 +7,7 @@
import pytest
from freezegun import freeze_time
+from pydantic import ValidationError
from requests.adapters import ConnectionError
from tableauserverclient import PermissionsRule, Server
from tableauserverclient.models import (
@@ -21,7 +22,9 @@
from datahub.emitter.mce_builder import DEFAULT_ENV, make_schema_field_urn
from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.ingestion.run.pipeline import Pipeline, PipelineContext, PipelineInitError
+from datahub.ingestion.api.source import TestConnectionReport
+from datahub.ingestion.run.pipeline import Pipeline, PipelineContext
+from datahub.ingestion.source.tableau import tableau_constant as c
from datahub.ingestion.source.tableau.tableau import (
TableauConfig,
TableauSiteSource,
@@ -571,52 +574,28 @@ def test_extract_all_project(pytestconfig, tmp_path, mock_datahub_graph):
def test_value_error_projects_and_project_pattern(
pytestconfig, tmp_path, mock_datahub_graph
):
- # Ingestion should raise ValueError
- output_file_name: str = "tableau_project_pattern_precedence_mces.json"
- golden_file_name: str = "tableau_project_pattern_precedence_mces_golden.json"
-
new_config = config_source_default.copy()
new_config["projects"] = ["default"]
new_config["project_pattern"] = {"allow": ["^Samples$"]}
with pytest.raises(
- PipelineInitError,
+ ValidationError,
match=r".*projects is deprecated. Please use project_path_pattern only.*",
):
- tableau_ingest_common(
- pytestconfig,
- tmp_path,
- mock_data(),
- golden_file_name,
- output_file_name,
- mock_datahub_graph,
- pipeline_config=new_config,
- )
+ TableauConfig.parse_obj(new_config)
def test_project_pattern_deprecation(pytestconfig, tmp_path, mock_datahub_graph):
- # Ingestion should raise ValueError
- output_file_name: str = "tableau_project_pattern_deprecation_mces.json"
- golden_file_name: str = "tableau_project_pattern_deprecation_mces_golden.json"
-
new_config = config_source_default.copy()
del new_config["projects"]
new_config["project_pattern"] = {"allow": ["^Samples$"]}
new_config["project_path_pattern"] = {"allow": ["^Samples$"]}
with pytest.raises(
- PipelineInitError,
+ ValidationError,
match=r".*project_pattern is deprecated. Please use project_path_pattern only*",
):
- tableau_ingest_common(
- pytestconfig,
- tmp_path,
- mock_data(),
- golden_file_name,
- output_file_name,
- mock_datahub_graph,
- pipeline_config=new_config,
- )
+ TableauConfig.parse_obj(new_config)
def test_project_path_pattern_allow(pytestconfig, tmp_path, mock_datahub_graph):
@@ -1296,31 +1275,21 @@ def test_hidden_asset_tags(pytestconfig, tmp_path, mock_datahub_graph):
@pytest.mark.integration
def test_hidden_assets_without_ingest_tags(pytestconfig, tmp_path, mock_datahub_graph):
enable_logging()
- output_file_name: str = "tableau_hidden_asset_tags_error_mces.json"
- golden_file_name: str = "tableau_hidden_asset_tags_error_mces_golden.json"
new_config = config_source_default.copy()
new_config["tags_for_hidden_assets"] = ["hidden", "private"]
new_config["ingest_tags"] = False
with pytest.raises(
- PipelineInitError,
+ ValidationError,
match=r".*tags_for_hidden_assets is only allowed with ingest_tags enabled.*",
):
- tableau_ingest_common(
- pytestconfig,
- tmp_path,
- mock_data(),
- golden_file_name,
- output_file_name,
- mock_datahub_graph,
- pipeline_config=new_config,
- )
+ TableauConfig.parse_obj(new_config)
@freeze_time(FROZEN_TIME)
@pytest.mark.integration
-def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_graph):
+def test_permission_warning(pytestconfig, tmp_path, mock_datahub_graph):
with mock.patch(
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
mock_datahub_graph,
@@ -1357,11 +1326,99 @@ def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_gra
warnings = list(reporter.warnings)
- assert len(warnings) == 1
+ assert len(warnings) == 2
+
+ assert warnings[0].title == "Insufficient Permissions"
- assert warnings[0].title == "Derived Permission Error"
+ assert warnings[1].title == "Derived Permission Error"
- assert warnings[0].message == (
+ assert warnings[1].message == (
"Turn on your derived permissions. See for details "
"https://community.tableau.com/s/question/0D54T00000QnjHbSAJ/how-to-fix-the-permissionsmodeswitched-error"
)
+
+
+@freeze_time(FROZEN_TIME)
+@pytest.mark.integration
+def test_connection_report_test(requests_mock):
+ server_info_response = """
+
+
+ foo
+ 2.4
+
+
+
+ """
+
+ requests_mock.register_uri(
+ "GET",
+ "https://do-not-connect/api/2.4/serverInfo",
+ text=server_info_response,
+ status_code=200,
+ headers={"Content-Type": "application/xml"},
+ )
+
+ signin_response = """
+
+
+
+
+
+
+ """
+
+ requests_mock.register_uri(
+ "POST",
+ "https://do-not-connect/api/2.4/auth/signin",
+ text=signin_response,
+ status_code=200,
+ headers={"Content-Type": "application/xml"},
+ )
+
+ user_by_id_response = """
+
+
+
+ """
+
+ requests_mock.register_uri(
+ "GET",
+ "https://do-not-connect/api/2.4/sites/fake_site_luid/users/fake_user_id",
+ text=user_by_id_response,
+ status_code=200,
+ headers={"Content-Type": "application/xml"},
+ )
+
+ report: TestConnectionReport = TableauSource.test_connection(config_source_default)
+
+ assert report
+ assert report.capability_report
+ assert report.capability_report.get(c.SITE_PERMISSION)
+ assert report.capability_report[c.SITE_PERMISSION].capable
+
+ # Role other than SiteAdministratorExplorer
+ user_by_id_response = """
+
+
+
+ """
+
+ requests_mock.register_uri(
+ "GET",
+ "https://do-not-connect/api/2.4/sites/fake_site_luid/users/fake_user_id",
+ text=user_by_id_response,
+ status_code=200,
+ headers={"Content-Type": "application/xml"},
+ )
+
+ report = TableauSource.test_connection(config_source_default)
+
+ assert report
+ assert report.capability_report
+ assert report.capability_report.get(c.SITE_PERMISSION)
+ assert report.capability_report[c.SITE_PERMISSION].capable is False
+ assert (
+ report.capability_report[c.SITE_PERMISSION].failure_reason
+ == "The user does not have the `Site Administrator Explorer` role. Their current role is Explorer."
+ )