diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 6844b8a425a7b6..5790b3765077ef 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -111,6 +111,8 @@ tableau_field_to_schema_field, workbook_graphql_query, ) +from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo +from datahub.ingestion.source.tableau.tableau_validation import check_user_role from datahub.metadata.com.linkedin.pegasus2avro.common import ( AuditStamp, ChangeAuditStamps, @@ -167,7 +169,7 @@ try: # On earlier versions of the tableauserverclient, the NonXMLResponseError - # was thrown when reauthentication was needed. We'll keep both exceptions + # was thrown when reauthentication was necessary. We'll keep both exceptions # around for now, but can remove this in the future. from tableauserverclient.server.endpoint.exceptions import ( # type: ignore NotSignedInError, @@ -632,6 +634,33 @@ class TableauSourceReport(StaleEntityRemovalSourceReport): num_upstream_table_lineage_failed_parse_sql: int = 0 num_upstream_fine_grained_lineage_failed_parse_sql: int = 0 num_hidden_assets_skipped: int = 0 + logged_in_user: List[UserInfo] = [] + + +def report_user_role(report: TableauSourceReport, server: Server) -> None: + title: str = "Insufficient Permissions" + message: str = "The user must have the `Site Administrator Explorer` role to perform metadata ingestion." + try: + # TableauSiteSource instance is per site, so each time we need to find-out user detail + # the site-role might be different on another site + logged_in_user: UserInfo = UserInfo.from_server(server=server) + + if not logged_in_user.is_site_administrator_explorer(): + report.warning( + title=title, + message=message, + context=f"user-name={logged_in_user.user_name}, role={logged_in_user.site_role}, site_id={logged_in_user.site_id}", + ) + + report.logged_in_user.append(logged_in_user) + + except Exception as e: + report.warning( + title=title, + message="Failed to verify the user's role. The user must have `Site Administrator Explorer` role.", + context=f"{e}", + exc=e, + ) @platform_name("Tableau") @@ -676,6 +705,7 @@ def _authenticate(self, site_content_url: str) -> None: try: logger.info(f"Authenticated to Tableau site: '{site_content_url}'") self.server = self.config.make_tableau_client(site_content_url) + report_user_role(report=self.report, server=self.server) # Note that we're not catching ConfigurationError, since we want that to throw. except ValueError as e: self.report.failure( @@ -689,9 +719,17 @@ def test_connection(config_dict: dict) -> TestConnectionReport: test_report = TestConnectionReport() try: source_config = TableauConfig.parse_obj_allow_extras(config_dict) - source_config.make_tableau_client(source_config.site) + + server = source_config.make_tableau_client(source_config.site) + test_report.basic_connectivity = CapabilityReport(capable=True) + + test_report.capability_report = check_user_role( + logged_in_user=UserInfo.from_server(server=server) + ) + except Exception as e: + logger.warning(f"{e}", exc_info=e) test_report.basic_connectivity = CapabilityReport( capable=False, failure_reason=str(e) ) @@ -831,6 +869,8 @@ def __init__( # when emitting custom SQL data sources. self.custom_sql_ids_being_used: List[str] = [] + report_user_role(report=report, server=server) + @property def no_env_browse_prefix(self) -> str: # Prefix to use with browse path (v1) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py index d1dd0d92819991..ea0878143ef354 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_constant.py @@ -81,3 +81,5 @@ PROJECT = "Project" SITE = "Site" IS_UNSUPPORTED_CUSTOM_SQL = "isUnsupportedCustomSql" +SITE_PERMISSION = "sitePermission" +SITE_ROLE = "SiteAdministratorExplorer" diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py new file mode 100644 index 00000000000000..f309622d12b91b --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_server_wrapper.py @@ -0,0 +1,33 @@ +from dataclasses import dataclass + +from tableauserverclient import Server, UserItem + +from datahub.ingestion.source.tableau import tableau_constant as c + + +@dataclass +class UserInfo: + user_name: str + site_role: str + site_id: str + + def is_site_administrator_explorer(self): + return self.site_role == c.SITE_ROLE + + @staticmethod + def from_server(server: Server) -> "UserInfo": + assert server.user_id, "make the connection with tableau" + + user: UserItem = server.users.get_by_id(server.user_id) + + assert user.site_role, "site_role is not available" # to silent the lint + + assert user.name, "user name is not available" # to silent the lint + + assert server.site_id, "site identifier is not available" # to silent the lint + + return UserInfo( + user_name=user.name, + site_role=user.site_role, + site_id=server.site_id, + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py new file mode 100644 index 00000000000000..4a703faf6091b3 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_validation.py @@ -0,0 +1,48 @@ +import logging +from typing import Dict, Union + +from datahub.ingestion.api.source import CapabilityReport, SourceCapability +from datahub.ingestion.source.tableau import tableau_constant as c +from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo + +logger = logging.getLogger(__name__) + + +def check_user_role( + logged_in_user: UserInfo, +) -> Dict[Union[SourceCapability, str], CapabilityReport]: + capability_dict: Dict[Union[SourceCapability, str], CapabilityReport] = { + c.SITE_PERMISSION: CapabilityReport( + capable=True, + ) + } + + failure_reason: str = ( + "The user does not have the `Site Administrator Explorer` role." + ) + + mitigation_message_prefix: str = ( + "Assign `Site Administrator Explorer` role to the user" + ) + mitigation_message_suffix: str = "Refer to the setup guide: https://datahubproject.io/docs/quick-ingestion-guides/tableau/setup" + + try: + # TODO: Add check for `Enable Derived Permissions` + if not logged_in_user.is_site_administrator_explorer(): + capability_dict[c.SITE_PERMISSION] = CapabilityReport( + capable=False, + failure_reason=f"{failure_reason} Their current role is {logged_in_user.site_role}.", + mitigation_message=f"{mitigation_message_prefix} `{logged_in_user.user_name}`. {mitigation_message_suffix}", + ) + + return capability_dict + + except Exception as e: + logger.warning(msg=e, exc_info=e) + capability_dict[c.SITE_PERMISSION] = CapabilityReport( + capable=False, + failure_reason="Failed to verify user role.", + mitigation_message=f"{mitigation_message_prefix}. {mitigation_message_suffix}", # user is unknown + ) + + return capability_dict diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 5b557efdab0bb0..d6259a472b59e5 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -7,6 +7,7 @@ import pytest from freezegun import freeze_time +from pydantic import ValidationError from requests.adapters import ConnectionError from tableauserverclient import PermissionsRule, Server from tableauserverclient.models import ( @@ -21,7 +22,9 @@ from datahub.emitter.mce_builder import DEFAULT_ENV, make_schema_field_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.run.pipeline import Pipeline, PipelineContext, PipelineInitError +from datahub.ingestion.api.source import TestConnectionReport +from datahub.ingestion.run.pipeline import Pipeline, PipelineContext +from datahub.ingestion.source.tableau import tableau_constant as c from datahub.ingestion.source.tableau.tableau import ( TableauConfig, TableauSiteSource, @@ -571,52 +574,28 @@ def test_extract_all_project(pytestconfig, tmp_path, mock_datahub_graph): def test_value_error_projects_and_project_pattern( pytestconfig, tmp_path, mock_datahub_graph ): - # Ingestion should raise ValueError - output_file_name: str = "tableau_project_pattern_precedence_mces.json" - golden_file_name: str = "tableau_project_pattern_precedence_mces_golden.json" - new_config = config_source_default.copy() new_config["projects"] = ["default"] new_config["project_pattern"] = {"allow": ["^Samples$"]} with pytest.raises( - PipelineInitError, + ValidationError, match=r".*projects is deprecated. Please use project_path_pattern only.*", ): - tableau_ingest_common( - pytestconfig, - tmp_path, - mock_data(), - golden_file_name, - output_file_name, - mock_datahub_graph, - pipeline_config=new_config, - ) + TableauConfig.parse_obj(new_config) def test_project_pattern_deprecation(pytestconfig, tmp_path, mock_datahub_graph): - # Ingestion should raise ValueError - output_file_name: str = "tableau_project_pattern_deprecation_mces.json" - golden_file_name: str = "tableau_project_pattern_deprecation_mces_golden.json" - new_config = config_source_default.copy() del new_config["projects"] new_config["project_pattern"] = {"allow": ["^Samples$"]} new_config["project_path_pattern"] = {"allow": ["^Samples$"]} with pytest.raises( - PipelineInitError, + ValidationError, match=r".*project_pattern is deprecated. Please use project_path_pattern only*", ): - tableau_ingest_common( - pytestconfig, - tmp_path, - mock_data(), - golden_file_name, - output_file_name, - mock_datahub_graph, - pipeline_config=new_config, - ) + TableauConfig.parse_obj(new_config) def test_project_path_pattern_allow(pytestconfig, tmp_path, mock_datahub_graph): @@ -1296,31 +1275,21 @@ def test_hidden_asset_tags(pytestconfig, tmp_path, mock_datahub_graph): @pytest.mark.integration def test_hidden_assets_without_ingest_tags(pytestconfig, tmp_path, mock_datahub_graph): enable_logging() - output_file_name: str = "tableau_hidden_asset_tags_error_mces.json" - golden_file_name: str = "tableau_hidden_asset_tags_error_mces_golden.json" new_config = config_source_default.copy() new_config["tags_for_hidden_assets"] = ["hidden", "private"] new_config["ingest_tags"] = False with pytest.raises( - PipelineInitError, + ValidationError, match=r".*tags_for_hidden_assets is only allowed with ingest_tags enabled.*", ): - tableau_ingest_common( - pytestconfig, - tmp_path, - mock_data(), - golden_file_name, - output_file_name, - mock_datahub_graph, - pipeline_config=new_config, - ) + TableauConfig.parse_obj(new_config) @freeze_time(FROZEN_TIME) @pytest.mark.integration -def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_graph): +def test_permission_warning(pytestconfig, tmp_path, mock_datahub_graph): with mock.patch( "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph", mock_datahub_graph, @@ -1357,11 +1326,99 @@ def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_gra warnings = list(reporter.warnings) - assert len(warnings) == 1 + assert len(warnings) == 2 + + assert warnings[0].title == "Insufficient Permissions" - assert warnings[0].title == "Derived Permission Error" + assert warnings[1].title == "Derived Permission Error" - assert warnings[0].message == ( + assert warnings[1].message == ( "Turn on your derived permissions. See for details " "https://community.tableau.com/s/question/0D54T00000QnjHbSAJ/how-to-fix-the-permissionsmodeswitched-error" ) + + +@freeze_time(FROZEN_TIME) +@pytest.mark.integration +def test_connection_report_test(requests_mock): + server_info_response = """ + + + foo + 2.4 + + + + """ + + requests_mock.register_uri( + "GET", + "https://do-not-connect/api/2.4/serverInfo", + text=server_info_response, + status_code=200, + headers={"Content-Type": "application/xml"}, + ) + + signin_response = """ + + + + + + + """ + + requests_mock.register_uri( + "POST", + "https://do-not-connect/api/2.4/auth/signin", + text=signin_response, + status_code=200, + headers={"Content-Type": "application/xml"}, + ) + + user_by_id_response = """ + + + + """ + + requests_mock.register_uri( + "GET", + "https://do-not-connect/api/2.4/sites/fake_site_luid/users/fake_user_id", + text=user_by_id_response, + status_code=200, + headers={"Content-Type": "application/xml"}, + ) + + report: TestConnectionReport = TableauSource.test_connection(config_source_default) + + assert report + assert report.capability_report + assert report.capability_report.get(c.SITE_PERMISSION) + assert report.capability_report[c.SITE_PERMISSION].capable + + # Role other than SiteAdministratorExplorer + user_by_id_response = """ + + + + """ + + requests_mock.register_uri( + "GET", + "https://do-not-connect/api/2.4/sites/fake_site_luid/users/fake_user_id", + text=user_by_id_response, + status_code=200, + headers={"Content-Type": "application/xml"}, + ) + + report = TableauSource.test_connection(config_source_default) + + assert report + assert report.capability_report + assert report.capability_report.get(c.SITE_PERMISSION) + assert report.capability_report[c.SITE_PERMISSION].capable is False + assert ( + report.capability_report[c.SITE_PERMISSION].failure_reason + == "The user does not have the `Site Administrator Explorer` role. Their current role is Explorer." + )