2
2
import functools
3
3
import logging
4
4
import os
5
- from typing import Iterable , List , Optional
5
+ from typing import Iterable , List , Optional , Set
6
6
7
7
from datahub .ingestion .api .common import PipelineContext
8
8
from datahub .ingestion .api .decorators import (
@@ -255,10 +255,8 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
255
255
for project in projects :
256
256
yield from self .bq_schema_extractor .get_project_workunits (project )
257
257
258
- if self .config .use_queries_v2 :
259
- # Always ingest View and Snapshot lineage with schema ingestion
258
+ if self .config .include_view_lineage :
260
259
self .report .set_ingestion_stage ("*" , "View and Snapshot Lineage" )
261
-
262
260
yield from self .lineage_extractor .get_lineage_workunits_for_views_and_snapshots (
263
261
[p .id for p in projects ],
264
262
self .bq_schema_extractor .view_refs_by_project ,
@@ -267,6 +265,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
267
265
self .bq_schema_extractor .snapshots_by_ref ,
268
266
)
269
267
268
+ if self .config .use_queries_v2 :
270
269
# if both usage and lineage are disabled then skip queries extractor piece
271
270
if (
272
271
not self .config .include_usage_statistics
@@ -276,13 +275,21 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
276
275
277
276
self .report .set_ingestion_stage ("*" , QUERIES_EXTRACTION )
278
277
278
+ discovered_tables : Set [str ] = set ()
279
+ if self .config .include_table_lineage :
280
+ discovered_tables .update (self .bq_schema_extractor .table_refs )
281
+
282
+ if self .config .include_view_lineage :
283
+ discovered_tables .update (self .bq_schema_extractor .view_snapshot_refs )
284
+
279
285
with BigQueryQueriesExtractor (
280
286
connection = self .config .get_bigquery_client (),
281
287
schema_api = self .bq_schema_extractor .schema_api ,
282
288
config = BigQueryQueriesExtractorConfig (
283
289
window = self .config ,
284
290
user_email_pattern = self .config .usage .user_email_pattern ,
285
- include_lineage = self .config .include_table_lineage ,
291
+ include_lineage = self .config .include_table_lineage
292
+ or self .config .include_view_lineage ,
286
293
include_usage_statistics = self .config .include_usage_statistics ,
287
294
include_operations = self .config .usage .include_operational_stats ,
288
295
top_n_queries = self .config .usage .top_n_queries ,
@@ -292,24 +299,23 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
292
299
filters = self .filters ,
293
300
identifiers = self .identifiers ,
294
301
schema_resolver = self .sql_parser_schema_resolver ,
295
- discovered_tables = self . bq_schema_extractor . table_refs ,
302
+ discovered_tables = discovered_tables ,
296
303
) as queries_extractor :
297
304
self .report .queries_extractor = queries_extractor .report
298
305
yield from queries_extractor .get_workunits_internal ()
299
306
300
307
else :
301
308
if self .config .include_usage_statistics :
302
309
yield from self .usage_extractor .get_usage_workunits (
303
- [p .id for p in projects ], self .bq_schema_extractor .table_refs
310
+ [p .id for p in projects ],
311
+ self .bq_schema_extractor .table_refs .union (
312
+ self .bq_schema_extractor .view_snapshot_refs
313
+ ),
304
314
)
305
315
306
316
if self .config .include_table_lineage :
307
317
yield from self .lineage_extractor .get_lineage_workunits (
308
318
[p .id for p in projects ],
309
- self .bq_schema_extractor .view_refs_by_project ,
310
- self .bq_schema_extractor .view_definitions ,
311
- self .bq_schema_extractor .snapshot_refs_by_project ,
312
- self .bq_schema_extractor .snapshots_by_ref ,
313
319
self .bq_schema_extractor .table_refs ,
314
320
)
315
321
0 commit comments