@@ -485,6 +485,18 @@ class TableauConfig(
485
485
description = "Configuration settings for ingesting Tableau groups and their capabilities as custom properties." ,
486
486
)
487
487
488
+ ingest_hidden_assets : bool = Field (
489
+ True ,
490
+ description = "When enabled, hidden views and dashboards are ingested into Datahub. "
491
+ "If a dashboard or view is hidden in Tableau the luid is blank. Default of this config field is True." ,
492
+ )
493
+
494
+ tags_for_hidden_assets : List [str ] = Field (
495
+ default = [],
496
+ description = "Tags to be added to hidden dashboards and views. If a dashboard or view is hidden in Tableau the luid is blank. "
497
+ "This can only be used with ingest_tags enabled as it will overwrite tags entered from the UI." ,
498
+ )
499
+
488
500
# pre = True because we want to take some decision before pydantic initialize the configuration to default values
489
501
@root_validator (pre = True )
490
502
def projects_backward_compatibility (cls , values : Dict ) -> Dict :
@@ -510,6 +522,20 @@ def projects_backward_compatibility(cls, values: Dict) -> Dict:
510
522
511
523
return values
512
524
525
+ @root_validator ()
526
+ def validate_config_values (cls , values : Dict ) -> Dict :
527
+ tags_for_hidden_assets = values .get ("tags_for_hidden_assets" )
528
+ ingest_tags = values .get ("ingest_tags" )
529
+ if (
530
+ not ingest_tags
531
+ and tags_for_hidden_assets
532
+ and len (tags_for_hidden_assets ) > 0
533
+ ):
534
+ raise ValueError (
535
+ "tags_for_hidden_assets is only allowed with ingest_tags enabled. Be aware that this will overwrite tags entered from the UI."
536
+ )
537
+ return values
538
+
513
539
514
540
class WorkbookKey (ContainerKey ):
515
541
workbook_id : str
@@ -596,7 +622,16 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
596
622
num_datasource_field_skipped_no_name : int = 0
597
623
num_csql_field_skipped_no_name : int = 0
598
624
num_table_field_skipped_no_name : int = 0
625
+ # lineage
626
+ num_tables_with_upstream_lineage : int = 0
627
+ num_upstream_table_lineage : int = 0
628
+ num_upstream_fine_grained_lineage : int = 0
599
629
num_upstream_table_skipped_no_name : int = 0
630
+ num_upstream_table_skipped_no_columns : int = 0
631
+ num_upstream_table_failed_generate_reference : int = 0
632
+ num_upstream_table_lineage_failed_parse_sql : int = 0
633
+ num_upstream_fine_grained_lineage_failed_parse_sql : int = 0
634
+ num_hidden_assets_skipped : int = 0
600
635
601
636
602
637
@platform_name ("Tableau" )
@@ -1043,6 +1078,11 @@ def get_data_platform_instance(self) -> DataPlatformInstanceClass:
1043
1078
),
1044
1079
)
1045
1080
1081
+ def _is_hidden_view (self , dashboard_or_view : Dict ) -> bool :
1082
+ # LUID is blank if the view is hidden in the workbook.
1083
+ # More info here: https://help.tableau.com/current/api/metadata_api/en-us/reference/view.doc.html
1084
+ return not dashboard_or_view .get (c .LUID )
1085
+
1046
1086
def get_connection_object_page (
1047
1087
self ,
1048
1088
query : str ,
@@ -1311,7 +1351,7 @@ def _create_upstream_table_lineage(
1311
1351
datasource : dict ,
1312
1352
browse_path : Optional [str ],
1313
1353
is_embedded_ds : bool = False ,
1314
- ) -> Tuple :
1354
+ ) -> Tuple [ List [ Upstream ], List [ FineGrainedLineage ]] :
1315
1355
upstream_tables : List [Upstream ] = []
1316
1356
fine_grained_lineages : List [FineGrainedLineage ] = []
1317
1357
table_id_to_urn = {}
@@ -1472,6 +1512,7 @@ def get_upstream_tables(
1472
1512
c .COLUMNS_CONNECTION
1473
1513
].get ("totalCount" )
1474
1514
if not is_custom_sql and not num_tbl_cols :
1515
+ self .report .num_upstream_table_skipped_no_columns += 1
1475
1516
logger .warning (
1476
1517
f"Skipping upstream table with id { table [c .ID ]} , no columns: { table } "
1477
1518
)
@@ -1488,6 +1529,7 @@ def get_upstream_tables(
1488
1529
table , default_schema_map = self .config .default_schema_map
1489
1530
)
1490
1531
except Exception as e :
1532
+ self .report .num_upstream_table_failed_generate_reference += 1
1491
1533
self .report .warning (
1492
1534
title = "Potentially Missing Lineage Issue" ,
1493
1535
message = "Failed to generate upstream reference" ,
@@ -1659,15 +1701,7 @@ def get_upstream_fields_from_custom_sql(
1659
1701
func_overridden_info = None , # Here we don't want to override any information from configuration
1660
1702
)
1661
1703
1662
- if parsed_result is None :
1663
- logger .info (
1664
- f"Failed to extract column level lineage from datasource { datasource_urn } "
1665
- )
1666
- return []
1667
- if parsed_result .debug_info .error :
1668
- logger .info (
1669
- f"Failed to extract column level lineage from datasource { datasource_urn } : { parsed_result .debug_info .error } "
1670
- )
1704
+ if parsed_result is None or parsed_result .debug_info .error :
1671
1705
return []
1672
1706
1673
1707
cll : List [ColumnLineageInfo ] = (
@@ -2031,6 +2065,8 @@ def _create_lineage_to_upstream_tables(
2031
2065
aspect_name = c .UPSTREAM_LINEAGE ,
2032
2066
aspect = upstream_lineage ,
2033
2067
)
2068
+ self .report .num_tables_with_upstream_lineage += 1
2069
+ self .report .num_upstream_table_lineage += len (upstream_tables )
2034
2070
2035
2071
@staticmethod
2036
2072
def _clean_tableau_query_parameters (query : str ) -> str :
@@ -2130,7 +2166,7 @@ def parse_custom_sql(
2130
2166
f"Overridden info upstream_db={ upstream_db } , platform_instance={ platform_instance } , platform={ platform } "
2131
2167
)
2132
2168
2133
- return create_lineage_sql_parsed_result (
2169
+ parsed_result = create_lineage_sql_parsed_result (
2134
2170
query = query ,
2135
2171
default_db = upstream_db ,
2136
2172
platform = platform ,
@@ -2140,6 +2176,21 @@ def parse_custom_sql(
2140
2176
schema_aware = not self .config .sql_parsing_disable_schema_awareness ,
2141
2177
)
2142
2178
2179
+ assert parsed_result is not None
2180
+
2181
+ if parsed_result .debug_info .table_error :
2182
+ logger .warning (
2183
+ f"Failed to extract table lineage from datasource { datasource_urn } : { parsed_result .debug_info .table_error } "
2184
+ )
2185
+ self .report .num_upstream_table_lineage_failed_parse_sql += 1
2186
+ elif parsed_result .debug_info .column_error :
2187
+ logger .warning (
2188
+ f"Failed to extract column level lineage from datasource { datasource_urn } : { parsed_result .debug_info .column_error } "
2189
+ )
2190
+ self .report .num_upstream_fine_grained_lineage_failed_parse_sql += 1
2191
+
2192
+ return parsed_result
2193
+
2143
2194
def _enrich_database_tables_with_parsed_schemas (
2144
2195
self , parsing_result : SqlParsingResult
2145
2196
) -> None :
@@ -2174,9 +2225,6 @@ def _create_lineage_from_unsupported_csql(
2174
2225
)
2175
2226
2176
2227
if parsed_result is None :
2177
- logger .info (
2178
- f"Failed to extract table level lineage for datasource { csql_urn } "
2179
- )
2180
2228
return
2181
2229
2182
2230
self ._enrich_database_tables_with_parsed_schemas (parsed_result )
@@ -2196,12 +2244,14 @@ def _create_lineage_from_unsupported_csql(
2196
2244
upstreams = upstream_tables ,
2197
2245
fineGrainedLineages = fine_grained_lineages ,
2198
2246
)
2199
-
2200
2247
yield self .get_metadata_change_proposal (
2201
2248
csql_urn ,
2202
2249
aspect_name = c .UPSTREAM_LINEAGE ,
2203
2250
aspect = upstream_lineage ,
2204
2251
)
2252
+ self .report .num_tables_with_upstream_lineage += 1
2253
+ self .report .num_upstream_table_lineage += len (upstream_tables )
2254
+ self .report .num_upstream_fine_grained_lineage += len (fine_grained_lineages )
2205
2255
2206
2256
def _get_schema_metadata_for_datasource (
2207
2257
self , datasource_fields : List [dict ]
@@ -2278,12 +2328,11 @@ def emit_datasource(
2278
2328
)
2279
2329
2280
2330
# Tags
2281
- if datasource_info :
2331
+ if datasource_info and self . config . ingest_tags :
2282
2332
tags = self .get_tags (datasource_info )
2283
- if tags :
2284
- dataset_snapshot .aspects .append (
2285
- builder .make_global_tag_aspect_with_tag_list (tags )
2286
- )
2333
+ dataset_snapshot .aspects .append (
2334
+ builder .make_global_tag_aspect_with_tag_list (tags )
2335
+ )
2287
2336
2288
2337
# Browse path
2289
2338
if browse_path and is_embedded_ds and workbook and workbook .get (c .NAME ):
@@ -2352,6 +2401,11 @@ def emit_datasource(
2352
2401
aspect_name = c .UPSTREAM_LINEAGE ,
2353
2402
aspect = upstream_lineage ,
2354
2403
)
2404
+ self .report .num_tables_with_upstream_lineage += 1
2405
+ self .report .num_upstream_table_lineage += len (upstream_tables )
2406
+ self .report .num_upstream_fine_grained_lineage += len (
2407
+ fine_grained_lineages
2408
+ )
2355
2409
2356
2410
# Datasource Fields
2357
2411
schema_metadata = self ._get_schema_metadata_for_datasource (
@@ -2669,7 +2723,13 @@ def emit_sheets(self) -> Iterable[MetadataWorkUnit]:
2669
2723
c .SHEETS_CONNECTION ,
2670
2724
sheets_filter ,
2671
2725
):
2672
- yield from self .emit_sheets_as_charts (sheet , sheet .get (c .WORKBOOK ))
2726
+ if self .config .ingest_hidden_assets or not self ._is_hidden_view (sheet ):
2727
+ yield from self .emit_sheets_as_charts (sheet , sheet .get (c .WORKBOOK ))
2728
+ else :
2729
+ self .report .num_hidden_assets_skipped += 1
2730
+ logger .debug (
2731
+ f"Skip view { sheet .get (c .ID )} because it's hidden (luid is blank)."
2732
+ )
2673
2733
2674
2734
def emit_sheets_as_charts (
2675
2735
self , sheet : dict , workbook : Optional [Dict ]
@@ -2760,11 +2820,17 @@ def emit_sheets_as_charts(
2760
2820
chart_snapshot .aspects .append (owner )
2761
2821
2762
2822
# Tags
2763
- tags = self .get_tags (sheet )
2764
- if tags :
2823
+ if self .config .ingest_tags :
2824
+ tags = self .get_tags (sheet )
2825
+ if len (self .config .tags_for_hidden_assets ) > 0 and self ._is_hidden_view (
2826
+ sheet
2827
+ ):
2828
+ tags .extend (self .config .tags_for_hidden_assets )
2829
+
2765
2830
chart_snapshot .aspects .append (
2766
2831
builder .make_global_tag_aspect_with_tag_list (tags )
2767
2832
)
2833
+
2768
2834
yield self .get_metadata_change_event (chart_snapshot )
2769
2835
if sheet_external_url is not None and self .config .ingest_embed_url is True :
2770
2836
yield self .new_work_unit (
@@ -2846,7 +2912,7 @@ def emit_workbook_as_container(self, workbook: Dict) -> Iterable[MetadataWorkUni
2846
2912
else None
2847
2913
)
2848
2914
2849
- tags = self .get_tags (workbook )
2915
+ tags = self .get_tags (workbook ) if self . config . ingest_tags else None
2850
2916
2851
2917
parent_key = None
2852
2918
project_luid : Optional [str ] = self ._get_workbook_project_luid (workbook )
@@ -2977,17 +3043,23 @@ def emit_dashboards(self) -> Iterable[MetadataWorkUnit]:
2977
3043
c .DASHBOARDS_CONNECTION ,
2978
3044
dashboards_filter ,
2979
3045
):
2980
- yield from self .emit_dashboard (dashboard , dashboard .get (c .WORKBOOK ))
3046
+ if self .config .ingest_hidden_assets or not self ._is_hidden_view (dashboard ):
3047
+ yield from self .emit_dashboard (dashboard , dashboard .get (c .WORKBOOK ))
3048
+ else :
3049
+ self .report .num_hidden_assets_skipped += 1
3050
+ logger .debug (
3051
+ f"Skip dashboard { dashboard .get (c .ID )} because it's hidden (luid is blank)."
3052
+ )
2981
3053
2982
- def get_tags (self , obj : dict ) -> Optional [ List [str ] ]:
3054
+ def get_tags (self , obj : dict ) -> List [str ]:
2983
3055
tag_list = obj .get (c .TAGS , [])
2984
- if tag_list and self . config . ingest_tags :
3056
+ if tag_list :
2985
3057
tag_list_str = [
2986
3058
t [c .NAME ] for t in tag_list if t is not None and t .get (c .NAME )
2987
3059
]
2988
3060
2989
3061
return tag_list_str
2990
- return None
3062
+ return []
2991
3063
2992
3064
def emit_dashboard (
2993
3065
self , dashboard : dict , workbook : Optional [Dict ]
@@ -3038,8 +3110,13 @@ def emit_dashboard(
3038
3110
)
3039
3111
dashboard_snapshot .aspects .append (dashboard_info_class )
3040
3112
3041
- tags = self .get_tags (dashboard )
3042
- if tags :
3113
+ if self .config .ingest_tags :
3114
+ tags = self .get_tags (dashboard )
3115
+ if len (self .config .tags_for_hidden_assets ) > 0 and self ._is_hidden_view (
3116
+ dashboard
3117
+ ):
3118
+ tags .extend (self .config .tags_for_hidden_assets )
3119
+
3043
3120
dashboard_snapshot .aspects .append (
3044
3121
builder .make_global_tag_aspect_with_tag_list (tags )
3045
3122
)
0 commit comments