@@ -99,45 +99,76 @@ def _extend_lineage(
99
99
if destination_details is None :
100
100
destination_details = self ._get_destination_details (connector )
101
101
102
+ # Ensure platform is set to avoid URN creation issues
103
+ if not source_details .platform :
104
+ source_details .platform = self ._detect_source_platform (connector )
105
+
106
+ if not destination_details .platform :
107
+ destination_details .platform = "snowflake" # Default to snowflake
108
+
109
+ # Log the lineage information for debugging
110
+ logger .info (
111
+ f"Processing lineage for connector { connector .connector_id } : "
112
+ f"source_platform={ source_details .platform } , "
113
+ f"destination_platform={ destination_details .platform } , "
114
+ f"{ len (connector .lineage )} table lineage entries"
115
+ )
116
+
102
117
# Handle lineage truncation if needed
103
118
if len (connector .lineage ) >= MAX_TABLE_LINEAGE_PER_CONNECTOR :
104
119
self ._report_lineage_truncation (connector )
105
120
106
121
# Process each table lineage entry
107
122
for lineage in connector .lineage :
108
- # Create source and destination URNs
109
- source_urn = self ._create_dataset_urn (
110
- lineage .source_table ,
111
- source_details ,
112
- is_source = True ,
113
- )
114
-
115
- dest_urn = self ._create_dataset_urn (
116
- lineage .destination_table ,
117
- destination_details ,
118
- is_source = False ,
119
- )
120
-
121
- # Skip if either URN creation failed
122
- if not source_urn or not dest_urn :
123
- continue
123
+ try :
124
+ # Create source and destination URNs
125
+ source_urn = self ._create_dataset_urn (
126
+ lineage .source_table ,
127
+ source_details ,
128
+ is_source = True ,
129
+ )
124
130
125
- # Add URNs to lists (avoiding duplicates)
126
- if source_urn not in input_dataset_urn_list :
127
- input_dataset_urn_list .append (source_urn )
131
+ dest_urn = self ._create_dataset_urn (
132
+ lineage .destination_table ,
133
+ destination_details ,
134
+ is_source = False ,
135
+ )
128
136
129
- if dest_urn not in output_dataset_urn_list :
130
- output_dataset_urn_list .append (dest_urn )
137
+ # Skip if either URN creation failed
138
+ if not source_urn or not dest_urn :
139
+ logger .warning (
140
+ f"Skipping lineage for { lineage .source_table } -> { lineage .destination_table } : "
141
+ f"Failed to create URNs"
142
+ )
143
+ continue
144
+
145
+ # Add URNs to lists (avoiding duplicates)
146
+ if str (source_urn ) not in [str (u ) for u in input_dataset_urn_list ]:
147
+ input_dataset_urn_list .append (source_urn )
148
+
149
+ if str (dest_urn ) not in [str (u ) for u in output_dataset_urn_list ]:
150
+ output_dataset_urn_list .append (dest_urn )
151
+
152
+ # Create column lineage if enabled
153
+ if self .config .include_column_lineage :
154
+ self ._create_column_lineage (
155
+ lineage = lineage ,
156
+ source_urn = source_urn ,
157
+ dest_urn = dest_urn ,
158
+ fine_grained_lineage = fine_grained_lineage ,
159
+ )
131
160
132
- # Create column lineage if enabled
133
- if self .config .include_column_lineage :
134
- self ._create_column_lineage (
135
- lineage = lineage ,
136
- source_urn = source_urn ,
137
- dest_urn = dest_urn ,
138
- fine_grained_lineage = fine_grained_lineage ,
161
+ logger .debug (f"Created lineage from { source_urn } to { dest_urn } " )
162
+ except Exception as e :
163
+ logger .warning (
164
+ f"Error creating lineage for table { lineage .source_table } -> { lineage .destination_table } : { e } "
139
165
)
140
166
167
+ # Log the lineage that was created for debugging
168
+ logger .info (
169
+ f"Created lineage with { len (input_dataset_urn_list )} input URNs and { len (output_dataset_urn_list )} output URNs"
170
+ )
171
+
141
172
# Add URNs and lineage to the datajob
142
173
datajob .inlets .extend (input_dataset_urn_list )
143
174
datajob .outlets .extend (output_dataset_urn_list )
@@ -150,22 +181,6 @@ def _extend_lineage(
150
181
destination_details = destination_details ,
151
182
)
152
183
153
- # Add source and destination platform information to properties
154
- if source_details .platform :
155
- lineage_properties ["source.platform" ] = source_details .platform
156
- if destination_details .platform :
157
- lineage_properties ["destination.platform" ] = destination_details .platform
158
-
159
- # Add database information if available
160
- if source_details .database :
161
- lineage_properties ["source.database" ] = source_details .database
162
- if destination_details .database :
163
- lineage_properties ["destination.database" ] = destination_details .database
164
-
165
- # Add environment information
166
- lineage_properties ["source.env" ] = source_details .env or "PROD"
167
- lineage_properties ["destination.env" ] = destination_details .env or "PROD"
168
-
169
184
return lineage_properties
170
185
171
186
def _get_source_details (self , connector : Connector ) -> PlatformDetail :
@@ -241,17 +256,24 @@ def _create_dataset_urn(
241
256
platform = details .platform
242
257
if not platform :
243
258
platform = "snowflake" if not is_source else "external"
259
+ logger .info (
260
+ f"Using default platform { platform } for { 'source' if is_source else 'destination' } table { table_name } "
261
+ )
244
262
245
- # Include database in the table name if available
246
- full_table_name = (
247
- f"{ details .database .lower ()} .{ table_name } "
248
- if details .database
249
- else table_name
250
- )
263
+ # Include database in the table name if available and ensure it's lowercase
264
+ database = details .database .lower () if details .database else ""
265
+ full_table_name = f"{ database } .{ table_name } " if database else table_name
251
266
252
267
# Ensure environment is set
253
268
env = details .env or "PROD"
254
269
270
+ # Log the URN creation details for debugging
271
+ logger .debug (
272
+ f"Creating { 'source' if is_source else 'destination' } URN with: "
273
+ f"platform={ platform } , table_name={ full_table_name } , env={ env } , "
274
+ f"platform_instance={ details .platform_instance } "
275
+ )
276
+
255
277
return DatasetUrn .create_from_ids (
256
278
platform_id = platform ,
257
279
table_name = full_table_name ,
@@ -260,19 +282,10 @@ def _create_dataset_urn(
260
282
)
261
283
except Exception as e :
262
284
logger .warning (
263
- f"Failed to create { 'source' if is_source else 'destination' } URN: { e } "
285
+ f"Failed to create { 'source' if is_source else 'destination' } URN for { table_name } : { e } "
264
286
)
265
287
return None
266
288
267
- def _report_lineage_truncation (self , connector : Connector ) -> None :
268
- """Report warning about truncated lineage."""
269
- self .report .warning (
270
- title = "Table lineage truncated" ,
271
- message = f"The connector had more than { MAX_TABLE_LINEAGE_PER_CONNECTOR } table lineage entries. "
272
- f"Only the most recent { MAX_TABLE_LINEAGE_PER_CONNECTOR } entries were ingested." ,
273
- context = f"{ connector .connector_name } (connector_id: { connector .connector_id } )" ,
274
- )
275
-
276
289
def _create_column_lineage (
277
290
self ,
278
291
lineage ,
0 commit comments