Skip to content

Commit 4c0b568

Browse files
authored
feat(ingest): add sql parser trace mode (#12210)
1 parent dc82251 commit 4c0b568

File tree

1 file changed

+15
-5
lines changed

1 file changed

+15
-5
lines changed

metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
"SQL_LINEAGE_TIMEOUT_ENABLED", True
6767
)
6868
SQL_LINEAGE_TIMEOUT_SECONDS = 10
69+
SQL_PARSER_TRACE = get_boolean_env_variable("DATAHUB_SQL_PARSER_TRACE", False)
6970

7071

7172
# These rules are a subset of the rules in sqlglot.optimizer.optimizer.RULES.
@@ -365,10 +366,11 @@ def _sqlglot_force_column_normalizer(
365366

366367
return node
367368

368-
# logger.debug(
369-
# "Prior to case normalization sql %s",
370-
# statement.sql(pretty=True, dialect=dialect),
371-
# )
369+
if SQL_PARSER_TRACE:
370+
logger.debug(
371+
"Prior to case normalization sql %s",
372+
statement.sql(pretty=True, dialect=dialect),
373+
)
372374
statement = statement.transform(_sqlglot_force_column_normalizer, copy=False)
373375
# logger.debug(
374376
# "Sql after casing normalization %s",
@@ -562,7 +564,7 @@ def _select_statement_cll( # noqa: C901
562564
)
563565
)
564566

565-
# TODO: Also extract referenced columns (aka auxillary / non-SELECT lineage)
567+
# TODO: Also extract referenced columns (aka auxiliary / non-SELECT lineage)
566568
except (sqlglot.errors.OptimizeError, ValueError, IndexError) as e:
567569
raise SqlUnderstandingError(
568570
f"sqlglot failed to compute some lineage: {e}"
@@ -1022,6 +1024,14 @@ def _sqlglot_lineage_inner(
10221024
logger.debug(
10231025
f"Resolved {total_schemas_resolved} of {total_tables_discovered} table schemas"
10241026
)
1027+
if SQL_PARSER_TRACE:
1028+
for qualified_table, schema_info in table_name_schema_mapping.items():
1029+
logger.debug(
1030+
"Table name %s resolved to %s with schema %s",
1031+
qualified_table,
1032+
table_name_urn_mapping[qualified_table],
1033+
schema_info,
1034+
)
10251035

10261036
column_lineage: Optional[List[_ColumnLineageInfo]] = None
10271037
try:

0 commit comments

Comments
 (0)