Skip to content

Commit 32b654c

Browse files
feat(powerbi): Report to Dashboard lineage (#12451)
1 parent ac13f25 commit 32b654c

20 files changed

+188
-138
lines changed

metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py

+1
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ class Constant:
132132
ACTIVE = "Active"
133133
SQL_PARSING_FAILURE = "SQL Parsing Failure"
134134
M_QUERY_NULL = '"null"'
135+
REPORT_WEB_URL = "reportWebUrl"
135136

136137

137138
@dataclass

metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -582,8 +582,11 @@ def tile_custom_properties(tile: powerbi_data_classes.Tile) -> dict:
582582
if tile.dataset is not None and tile.dataset.webUrl is not None:
583583
custom_properties[Constant.DATASET_WEB_URL] = tile.dataset.webUrl
584584

585-
if tile.report is not None and tile.report.id is not None:
586-
custom_properties[Constant.REPORT_ID] = tile.report.id
585+
if tile.report_id is not None:
586+
custom_properties[Constant.REPORT_ID] = tile.report_id
587+
588+
if tile.report is not None and tile.report.webUrl is not None:
589+
custom_properties[Constant.REPORT_WEB_URL] = tile.report.webUrl
587590

588591
return custom_properties
589592

@@ -1053,6 +1056,7 @@ def report_to_dashboard(
10531056
report: powerbi_data_classes.Report,
10541057
chart_mcps: List[MetadataChangeProposalWrapper],
10551058
user_mcps: List[MetadataChangeProposalWrapper],
1059+
dashboard_edges: List[EdgeClass],
10561060
) -> List[MetadataChangeProposalWrapper]:
10571061
"""
10581062
Map PowerBi report to Datahub dashboard
@@ -1074,6 +1078,7 @@ def report_to_dashboard(
10741078
charts=chart_urn_list,
10751079
lastModified=ChangeAuditStamps(),
10761080
dashboardUrl=report.webUrl,
1081+
dashboards=dashboard_edges,
10771082
)
10781083

10791084
info_mcp = self.new_mcp(
@@ -1167,8 +1172,28 @@ def report_to_datahub_work_units(
11671172
ds_mcps = self.to_datahub_dataset(report.dataset, workspace)
11681173
chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps)
11691174

1175+
# find all dashboards with a Tile referencing this report
1176+
downstream_dashboards_edges = []
1177+
for d in workspace.dashboards.values():
1178+
if any(t.report_id == report.id for t in d.tiles):
1179+
dashboard_urn = builder.make_dashboard_urn(
1180+
platform=self.__config.platform_name,
1181+
platform_instance=self.__config.platform_instance,
1182+
name=d.get_urn_part(),
1183+
)
1184+
edge = EdgeClass(
1185+
destinationUrn=dashboard_urn,
1186+
sourceUrn=None,
1187+
created=None,
1188+
lastModified=None,
1189+
properties=None,
1190+
)
1191+
downstream_dashboards_edges.append(edge)
1192+
11701193
# Let's convert report to datahub dashboard
1171-
report_mcps = self.report_to_dashboard(workspace, report, chart_mcps, user_mcps)
1194+
report_mcps = self.report_to_dashboard(
1195+
workspace, report, chart_mcps, user_mcps, downstream_dashboards_edges
1196+
)
11721197

11731198
# Now add MCPs in sequence
11741199
mcps.extend(ds_mcps)

metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -286,11 +286,15 @@ class CreatedFrom(Enum):
286286
id: str
287287
title: str
288288
embedUrl: str
289-
dataset: Optional["PowerBIDataset"]
290289
dataset_id: Optional[str]
291-
report: Optional[Report]
290+
report_id: Optional[str]
292291
createdFrom: CreatedFrom
293292

293+
# In a first pass, `dataset_id` and/or `report_id` are filled in.
294+
# In a subsequent pass, the objects are populated.
295+
dataset: Optional["PowerBIDataset"]
296+
report: Optional[Report]
297+
294298
def get_urn_part(self):
295299
return f"charts.{self.id}"
296300

metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py

+11-36
Original file line numberDiff line numberDiff line change
@@ -337,41 +337,6 @@ def get_tiles(self, workspace: Workspace, dashboard: Dashboard) -> List[Tile]:
337337
-tiles), there is no information available on pagination
338338
339339
"""
340-
341-
def new_dataset_or_report(tile_instance: Any) -> dict:
342-
"""
343-
Find out which is the data source for tile. It is either REPORT or DATASET
344-
"""
345-
report_fields = {
346-
Constant.REPORT: (
347-
self.get_report(
348-
workspace=workspace,
349-
report_id=tile_instance.get(Constant.REPORT_ID),
350-
)
351-
if tile_instance.get(Constant.REPORT_ID) is not None
352-
else None
353-
),
354-
Constant.CREATED_FROM: Tile.CreatedFrom.UNKNOWN,
355-
}
356-
357-
# reportId and datasetId are exclusive in tile_instance
358-
# if datasetId is present that means tile is created from dataset
359-
# if reportId is present that means tile is created from report
360-
# if both i.e. reportId and datasetId are not present then tile is created from some visualization
361-
if tile_instance.get(Constant.REPORT_ID) is not None:
362-
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.REPORT
363-
elif tile_instance.get(Constant.DATASET_ID) is not None:
364-
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.DATASET
365-
else:
366-
report_fields[Constant.CREATED_FROM] = Tile.CreatedFrom.VISUALIZATION
367-
368-
title: Optional[str] = tile_instance.get(Constant.TITLE)
369-
_id: Optional[str] = tile_instance.get(Constant.ID)
370-
created_from: Any = report_fields[Constant.CREATED_FROM]
371-
logger.info(f"Tile {title}({_id}) is created from {created_from}")
372-
373-
return report_fields
374-
375340
tile_list_endpoint: str = self.get_tiles_endpoint(
376341
workspace, dashboard_id=dashboard.id
377342
)
@@ -393,8 +358,18 @@ def new_dataset_or_report(tile_instance: Any) -> dict:
393358
title=instance.get(Constant.TITLE),
394359
embedUrl=instance.get(Constant.EMBED_URL),
395360
dataset_id=instance.get(Constant.DATASET_ID),
361+
report_id=instance.get(Constant.REPORT_ID),
396362
dataset=None,
397-
**new_dataset_or_report(instance),
363+
report=None,
364+
createdFrom=(
365+
# In the past we considered that only one of the two report_id or dataset_id would be present
366+
# but we have seen cases where both are present. If both are present, we prioritize the report.
367+
Tile.CreatedFrom.REPORT
368+
if instance.get(Constant.REPORT_ID)
369+
else Tile.CreatedFrom.DATASET
370+
if instance.get(Constant.DATASET_ID)
371+
else Tile.CreatedFrom.VISUALIZATION
372+
),
398373
)
399374
for instance in tile_dict
400375
if instance is not None

metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -625,13 +625,26 @@ def fill_dashboards() -> None:
625625
dashboard.tiles = self._get_resolver().get_tiles(
626626
workspace, dashboard=dashboard
627627
)
628-
# set the dataset for tiles
628+
# set the dataset and the report for tiles
629629
for tile in dashboard.tiles:
630+
# In Power BI, dashboards, reports, and datasets are tightly scoped to the workspace they belong to.
631+
# https://learn.microsoft.com/en-us/power-bi/collaborate-share/service-new-workspaces
632+
if tile.report_id:
633+
tile.report = workspace.reports.get(tile.report_id)
634+
if tile.report is None:
635+
self.reporter.info(
636+
title="Missing Report Lineage For Tile",
637+
message="A Report reference that failed to be resolved. Please ensure that 'extract_reports' is set to True in the configuration.",
638+
context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, report-id: {tile.report_id}",
639+
)
640+
# However, semantic models (aka datasets) can be shared accross workspaces
641+
# https://learn.microsoft.com/en-us/fabric/admin/portal-workspace#use-semantic-models-across-workspaces
642+
# That's why the global 'dataset_registry' is required
630643
if tile.dataset_id:
631644
tile.dataset = self.dataset_registry.get(tile.dataset_id)
632645
if tile.dataset is None:
633646
self.reporter.info(
634-
title="Missing Lineage For Tile",
647+
title="Missing Dataset Lineage For Tile",
635648
message="A cross-workspace reference that failed to be resolved. Please ensure that no global workspace is being filtered out due to the workspace_id_pattern.",
636649
context=f"workspace-name: {workspace.name}, tile-name: {tile.title}, dataset-id: {tile.dataset_id}",
637650
)
@@ -653,10 +666,10 @@ def fill_dashboard_tags() -> None:
653666
for dashboard in workspace.dashboards.values():
654667
dashboard.tags = workspace.dashboard_endorsements.get(dashboard.id, [])
655668

669+
# fill reports first since some dashboard may reference a report
670+
fill_reports()
656671
if self.__config.extract_dashboards:
657672
fill_dashboards()
658-
659-
fill_reports()
660673
fill_dashboard_tags()
661674
self._fill_independent_datasets(workspace=workspace)
662675

metadata-ingestion/tests/integration/powerbi/golden_test_admin_access_not_allowed.json

+3-2
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@
3939
"aspect": {
4040
"json": {
4141
"customProperties": {
42-
"createdFrom": "Dataset",
43-
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445"
42+
"createdFrom": "Report",
43+
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
44+
"reportId": "5b218778-e7a5-4d73-8187-f10824047715"
4445
},
4546
"title": "test_tile",
4647
"description": "test_tile",

metadata-ingestion/tests/integration/powerbi/golden_test_cll.json

+3-2
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@
8181
"aspect": {
8282
"json": {
8383
"customProperties": {
84-
"createdFrom": "Dataset",
84+
"createdFrom": "Report",
8585
"datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
86-
"datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details"
86+
"datasetWebUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details",
87+
"reportId": "5b218778-e7a5-4d73-8187-f10824047715"
8788
},
8889
"title": "test_tile",
8990
"description": "test_tile",

0 commit comments

Comments
 (0)