1
1
package io .datahubproject .iceberg .catalog ;
2
2
3
3
import static com .linkedin .metadata .Constants .*;
4
- import static com .linkedin .metadata .utils .GenericRecordUtils .serializeAspect ;
5
4
import static io .datahubproject .iceberg .catalog .Utils .*;
6
5
7
- import com .google .common .util .concurrent .Striped ;
8
- import com .linkedin .common .AuditStamp ;
6
+ import com .google .common .annotations .VisibleForTesting ;
9
7
import com .linkedin .common .FabricType ;
8
+ import com .linkedin .common .Status ;
10
9
import com .linkedin .common .urn .DatasetUrn ;
11
10
import com .linkedin .common .urn .Urn ;
11
+ import com .linkedin .container .Container ;
12
12
import com .linkedin .data .template .RecordTemplate ;
13
13
import com .linkedin .dataplatforminstance .IcebergWarehouseInfo ;
14
+ import com .linkedin .dataset .DatasetProperties ;
14
15
import com .linkedin .dataset .IcebergCatalogInfo ;
15
16
import com .linkedin .entity .EnvelopedAspect ;
16
- import com .linkedin .events . metadata .ChangeType ;
17
+ import com .linkedin .metadata .aspect . batch . AspectsBatch ;
17
18
import com .linkedin .metadata .entity .EntityService ;
18
- import com .linkedin .mxe . MetadataChangeProposal ;
19
+ import com .linkedin .metadata . entity . IngestResult ;
19
20
import com .linkedin .platformresource .PlatformResourceInfo ;
20
21
import com .linkedin .secret .DataHubSecretValue ;
21
22
import com .linkedin .util .Pair ;
24
25
import io .datahubproject .metadata .services .SecretService ;
25
26
import java .net .URISyntaxException ;
26
27
import java .util .*;
27
- import java .util .concurrent .locks .Lock ;
28
28
import lombok .Getter ;
29
29
import lombok .SneakyThrows ;
30
- import org . apache . iceberg . CatalogUtil ;
30
+ import lombok . extern . slf4j . Slf4j ;
31
31
import org .apache .iceberg .catalog .TableIdentifier ;
32
32
import org .apache .iceberg .exceptions .*;
33
33
34
+ @ Slf4j
34
35
public class DataHubIcebergWarehouse {
35
36
36
37
public static final String DATASET_ICEBERG_METADATA_ASPECT_NAME = "icebergCatalogInfo" ;
@@ -47,11 +48,8 @@ public class DataHubIcebergWarehouse {
47
48
48
49
@ Getter private final String platformInstance ;
49
50
50
- // TODO: Need to handle locks for deployments with multiple GMS replicas.
51
- private static final Striped <Lock > resourceLocks =
52
- Striped .lazyWeakLock (Runtime .getRuntime ().availableProcessors () * 2 );
53
-
54
- private DataHubIcebergWarehouse (
51
+ @ VisibleForTesting
52
+ DataHubIcebergWarehouse (
55
53
String platformInstance ,
56
54
IcebergWarehouseInfo icebergWarehouse ,
57
55
EntityService entityService ,
@@ -121,39 +119,96 @@ public String getDataRoot() {
121
119
return icebergWarehouse .getDataRoot ();
122
120
}
123
121
122
+ @ SneakyThrows
124
123
public Optional <DatasetUrn > getDatasetUrn (TableIdentifier tableIdentifier ) {
125
124
Urn resourceUrn = resourceUrn (tableIdentifier );
126
- PlatformResourceInfo platformResourceInfo =
127
- (PlatformResourceInfo )
128
- entityService .getLatestAspect (
129
- operationContext , resourceUrn , PLATFORM_RESOURCE_INFO_ASPECT_NAME );
130
- if (platformResourceInfo == null ) {
125
+ Optional <PlatformResourceInfo > platformResourceInfo =
126
+ getLatestAspectNonRemoved (resourceUrn , PLATFORM_RESOURCE_INFO_ASPECT_NAME );
127
+
128
+ if (platformResourceInfo .isEmpty ()) {
131
129
return Optional .empty ();
132
130
}
133
- try {
134
- return Optional .of (DatasetUrn .createFromString (platformResourceInfo .getPrimaryKey ()));
135
- } catch (URISyntaxException e ) {
136
- throw new RuntimeException ("Invalid dataset urn " + platformResourceInfo .getPrimaryKey (), e );
131
+
132
+ return Optional .of (DatasetUrn .createFromString (platformResourceInfo .get ().getPrimaryKey ()));
133
+ }
134
+
135
+ private <T extends RecordTemplate > Optional <T > getLatestAspectNonRemoved (
136
+ Urn urn , String aspectName ) {
137
+ Map <Urn , List <RecordTemplate >> aspectsMap =
138
+ entityService .getLatestAspects (
139
+ operationContext , Set .of (urn ), Set .of (STATUS_ASPECT_NAME , aspectName ), false );
140
+
141
+ if (aspectsMap == null || aspectsMap .isEmpty ()) {
142
+ return Optional .empty ();
143
+ }
144
+ List <RecordTemplate > aspects = aspectsMap .get (urn );
145
+ if (aspects == null || aspects .isEmpty ()) {
146
+ return Optional .empty ();
137
147
}
148
+
149
+ T result = null ;
150
+
151
+ for (RecordTemplate aspect : aspects ) {
152
+ if (aspect instanceof Status status ) {
153
+ if (status .isRemoved ()) {
154
+ return Optional .empty ();
155
+ }
156
+ } else {
157
+ result = (T ) aspect ;
158
+ }
159
+ }
160
+
161
+ return Optional .ofNullable (result );
138
162
}
139
163
140
- public IcebergCatalogInfo getIcebergMetadata (TableIdentifier tableIdentifier ) {
164
+ private Optional <EnvelopedAspect > getLatestEnvelopedAspectNonRemoved (Urn urn , String aspectName )
165
+ throws URISyntaxException {
166
+
167
+ Map <Urn , List <EnvelopedAspect >> aspectsMap =
168
+ entityService .getLatestEnvelopedAspects (
169
+ operationContext , Set .of (urn ), Set .of (STATUS_ASPECT_NAME , aspectName ), false );
170
+
171
+ if (aspectsMap == null || aspectsMap .isEmpty ()) {
172
+ return Optional .empty ();
173
+ }
174
+ List <EnvelopedAspect > aspects = aspectsMap .get (urn );
175
+ if (aspects == null || aspects .isEmpty ()) {
176
+ return Optional .empty ();
177
+ }
178
+
179
+ EnvelopedAspect result = null ;
180
+
181
+ for (EnvelopedAspect aspect : aspects ) {
182
+ if (STATUS_ASPECT_NAME .equals (aspect .getName ())) {
183
+ Status status = new Status (aspect .getValue ().data ());
184
+ if (status .isRemoved ()) {
185
+ return Optional .empty ();
186
+ }
187
+ } else {
188
+ result = aspect ;
189
+ }
190
+ }
191
+
192
+ return Optional .ofNullable (result );
193
+ }
194
+
195
+ public Optional <IcebergCatalogInfo > getIcebergMetadata (TableIdentifier tableIdentifier ) {
141
196
Optional <DatasetUrn > datasetUrn = getDatasetUrn (tableIdentifier );
142
197
if (datasetUrn .isEmpty ()) {
143
- return null ;
198
+ return Optional . empty () ;
144
199
}
145
200
146
- IcebergCatalogInfo icebergMeta =
147
- (IcebergCatalogInfo )
148
- entityService .getLatestAspect (
149
- operationContext , datasetUrn .get (), DATASET_ICEBERG_METADATA_ASPECT_NAME );
201
+ Optional <IcebergCatalogInfo > icebergMeta =
202
+ getLatestAspectNonRemoved (datasetUrn .get (), DATASET_ICEBERG_METADATA_ASPECT_NAME );
150
203
151
- if (icebergMeta == null ) {
152
- throw new IllegalStateException (
204
+ if (icebergMeta .isEmpty ()) {
205
+ // possibly some deletion cleanup is pending; log error & return as if dataset doesn't exist.
206
+ log .error (
153
207
String .format (
154
208
"IcebergMetadata not found for resource %s, dataset %s" ,
155
209
resourceUrn (tableIdentifier ), datasetUrn .get ()));
156
210
}
211
+
157
212
return icebergMeta ;
158
213
}
159
214
@@ -165,19 +220,19 @@ public Pair<EnvelopedAspect, DatasetUrn> getIcebergMetadataEnveloped(
165
220
}
166
221
167
222
try {
168
- EnvelopedAspect existingEnveloped =
169
- entityService .getLatestEnvelopedAspect (
170
- operationContext ,
171
- DATASET_ENTITY_NAME ,
172
- datasetUrn .get (),
173
- DATASET_ICEBERG_METADATA_ASPECT_NAME );
174
- if (existingEnveloped == null ) {
175
- throw new IllegalStateException (
223
+ Optional <EnvelopedAspect > existingEnveloped =
224
+ getLatestEnvelopedAspectNonRemoved (
225
+ datasetUrn .get (), DATASET_ICEBERG_METADATA_ASPECT_NAME );
226
+ if (existingEnveloped .isEmpty ()) {
227
+ // possibly some deletion cleanup is pending; log error & return as if dataset doesn't
228
+ // exist.
229
+ log .error (
176
230
String .format (
177
231
"IcebergMetadata not found for resource %s, dataset %s" ,
178
232
resourceUrn (tableIdentifier ), datasetUrn .get ()));
233
+ return null ;
179
234
}
180
- return Pair .of (existingEnveloped , datasetUrn .get ());
235
+ return Pair .of (existingEnveloped . get () , datasetUrn .get ());
181
236
} catch (Exception e ) {
182
237
throw new RuntimeException (
183
238
"Error fetching IcebergMetadata aspect for dataset " + datasetUrn .get (), e );
@@ -186,79 +241,121 @@ public Pair<EnvelopedAspect, DatasetUrn> getIcebergMetadataEnveloped(
186
241
187
242
public boolean deleteDataset (TableIdentifier tableIdentifier ) {
188
243
Urn resourceUrn = resourceUrn (tableIdentifier );
244
+ if (!entityService .exists (operationContext , resourceUrn )) {
245
+ return false ;
246
+ }
189
247
190
- // guard against concurrent modifications that depend on the resource (rename table/view)
191
- Lock lock = resourceLocks .get (resourceUrn );
192
- lock .lock ();
193
- try {
194
- if (!entityService .exists (operationContext , resourceUrn )) {
195
- return false ;
196
- }
197
- Optional <DatasetUrn > urn = getDatasetUrn (tableIdentifier );
248
+ Optional <DatasetUrn > datasetUrn = getDatasetUrn (tableIdentifier );
249
+ if (datasetUrn .isEmpty ()) {
250
+ log .warn ("Dataset urn not found for platform resource {}; cleaning up resource" , resourceUrn );
198
251
entityService .deleteUrn (operationContext , resourceUrn );
199
- urn .ifPresent (x -> entityService .deleteUrn (operationContext , x ));
200
- return true ;
201
- } finally {
202
- lock .unlock ();
252
+ return false ;
203
253
}
254
+
255
+ IcebergBatch icebergBatch = newIcebergBatch (operationContext );
256
+ icebergBatch .softDeleteEntity (resourceUrn , PLATFORM_RESOURCE_ENTITY_NAME );
257
+ icebergBatch .softDeleteEntity (datasetUrn .get (), DATASET_ENTITY_NAME );
258
+
259
+ AspectsBatch aspectsBatch = icebergBatch .asAspectsBatch ();
260
+ List <IngestResult > ingestResults =
261
+ entityService .ingestProposal (operationContext , aspectsBatch , false );
262
+
263
+ boolean result = true ;
264
+ for (IngestResult ingestResult : ingestResults ) {
265
+ if (ingestResult .getResult ().isNoOp ()) {
266
+ result = false ;
267
+ break ;
268
+ }
269
+ }
270
+
271
+ entityService .deleteUrn (operationContext , resourceUrn );
272
+ entityService .deleteUrn (operationContext , datasetUrn .get ());
273
+
274
+ return result ;
204
275
}
205
276
206
277
public DatasetUrn createDataset (
207
- TableIdentifier tableIdentifier , boolean view , AuditStamp auditStamp ) {
278
+ TableIdentifier tableIdentifier , boolean view , IcebergBatch icebergBatch ) {
208
279
String datasetName = platformInstance + "." + UUID .randomUUID ();
209
280
DatasetUrn datasetUrn = new DatasetUrn (platformUrn (), datasetName , fabricType ());
210
- createResource (datasetUrn , tableIdentifier , view , auditStamp );
281
+
282
+ createResource (datasetUrn , tableIdentifier , view , icebergBatch );
283
+
211
284
return datasetUrn ;
212
285
}
213
286
214
- public DatasetUrn renameDataset (
215
- TableIdentifier fromTableId , TableIdentifier toTableId , boolean view , AuditStamp auditStamp ) {
287
+ public void renameDataset (TableIdentifier fromTableId , TableIdentifier toTableId , boolean view ) {
288
+
289
+ Optional <DatasetUrn > optDatasetUrn = getDatasetUrn (fromTableId );
290
+ if (optDatasetUrn .isEmpty ()) {
291
+ throw noSuchEntity (view , fromTableId );
292
+ }
293
+
294
+ DatasetUrn datasetUrn = optDatasetUrn .get ();
295
+
296
+ IcebergBatch icebergBatch = newIcebergBatch (operationContext );
297
+ icebergBatch .softDeleteEntity (resourceUrn (fromTableId ), PLATFORM_RESOURCE_ENTITY_NAME );
298
+ createResource (datasetUrn , toTableId , view , icebergBatch );
299
+
300
+ DatasetProperties datasetProperties =
301
+ new DatasetProperties ()
302
+ .setName (toTableId .name ())
303
+ .setQualifiedName (fullTableName (platformInstance , toTableId ));
216
304
217
- // guard against concurrent modifications to the resource (other renames, deletion)
218
- Lock lock = resourceLocks .get (resourceUrn (fromTableId ));
219
- lock .lock ();
305
+ IcebergBatch .EntityBatch datasetBatch =
306
+ icebergBatch .updateEntity (datasetUrn , DATASET_ENTITY_NAME );
307
+ datasetBatch .aspect (DATASET_PROPERTIES_ASPECT_NAME , datasetProperties );
308
+
309
+ if (!fromTableId .namespace ().equals (toTableId .namespace ())) {
310
+ Container container =
311
+ new Container ().setContainer (containerUrn (platformInstance , toTableId .namespace ()));
312
+ datasetBatch .aspect (CONTAINER_ASPECT_NAME , container );
313
+ }
220
314
221
315
try {
222
- Optional <DatasetUrn > optDatasetUrn = getDatasetUrn (fromTableId );
223
- if (optDatasetUrn .isEmpty ()) {
224
- if (view ) {
225
- throw new NoSuchViewException (
226
- "No such view %s" , fullTableName (platformInstance , fromTableId ));
227
- } else {
228
- throw new NoSuchTableException (
229
- "No such table %s" , fullTableName (platformInstance , fromTableId ));
230
- }
316
+ AspectsBatch aspectsBatch = icebergBatch .asAspectsBatch ();
317
+ entityService .ingestProposal (operationContext , aspectsBatch , false );
318
+ } catch (ValidationException e ) {
319
+ if (!entityService .exists (operationContext , resourceUrn (fromTableId ), false )) {
320
+ // someone else deleted "fromTable" before we could get through
321
+ throw noSuchEntity (view , fromTableId );
231
322
}
232
-
233
- DatasetUrn datasetUrn = optDatasetUrn .get ();
234
- try {
235
- createResource (datasetUrn , toTableId , view , auditStamp );
236
- } catch (ValidationException e ) {
323
+ if (entityService .exists (operationContext , resourceUrn (toTableId ), true )) {
237
324
throw new AlreadyExistsException (
238
325
"%s already exists: %s" ,
239
326
view ? "View" : "Table" , fullTableName (platformInstance , toTableId ));
240
327
}
241
- entityService . deleteUrn ( operationContext , resourceUrn ( fromTableId ));
242
- return datasetUrn ;
243
- } finally {
244
- lock . unlock ( );
328
+ throw new IllegalStateException (
329
+ String . format (
330
+ "Rename operation failed inexplicably, from %s to %s in warehouse %s" ,
331
+ fromTableId , toTableId , platformInstance ) );
245
332
}
333
+
334
+ entityService .deleteUrn (operationContext , resourceUrn (fromTableId ));
335
+ }
336
+
337
+ private RuntimeException noSuchEntity (boolean view , TableIdentifier tableIdentifier ) {
338
+ return view
339
+ ? new NoSuchViewException (
340
+ "No such view %s" , fullTableName (platformInstance , tableIdentifier ))
341
+ : new NoSuchTableException (
342
+ "No such table %s" , fullTableName (platformInstance , tableIdentifier ));
246
343
}
247
344
248
345
private void createResource (
249
- DatasetUrn datasetUrn , TableIdentifier tableIdentifier , boolean view , AuditStamp auditStamp ) {
346
+ DatasetUrn datasetUrn ,
347
+ TableIdentifier tableIdentifier ,
348
+ boolean view ,
349
+ IcebergBatch icebergBatch ) {
250
350
PlatformResourceInfo resourceInfo =
251
351
new PlatformResourceInfo ().setPrimaryKey (datasetUrn .toString ());
252
352
resourceInfo .setResourceType (view ? "icebergView" : "icebergTable" );
253
353
254
- MetadataChangeProposal mcp = new MetadataChangeProposal ();
255
- mcp .setEntityUrn (resourceUrn (tableIdentifier ));
256
- mcp .setEntityType (PLATFORM_RESOURCE_ENTITY_NAME );
257
- mcp .setAspectName (PLATFORM_RESOURCE_INFO_ASPECT_NAME );
258
- mcp .setChangeType (ChangeType .CREATE_ENTITY );
259
- mcp .setAspect (serializeAspect (resourceInfo ));
260
-
261
- entityService .ingestProposal (operationContext , mcp , auditStamp , false );
354
+ icebergBatch .createEntity (
355
+ resourceUrn (tableIdentifier ),
356
+ PLATFORM_RESOURCE_ENTITY_NAME ,
357
+ PLATFORM_RESOURCE_INFO_ASPECT_NAME ,
358
+ resourceInfo );
262
359
}
263
360
264
361
private FabricType fabricType () {
@@ -268,8 +365,15 @@ private FabricType fabricType() {
268
365
@ SneakyThrows
269
366
private Urn resourceUrn (TableIdentifier tableIdentifier ) {
270
367
return Urn .createFromString (
271
- String .format (
272
- "urn:li:platformResource:%s.%s" ,
273
- PLATFORM_NAME , CatalogUtil .fullTableName (platformInstance , tableIdentifier )));
368
+ String .format ("urn:li:platformResource:%s.%s" , PLATFORM_NAME , tableName (tableIdentifier )));
369
+ }
370
+
371
+ private String tableName (TableIdentifier tableIdentifier ) {
372
+ return fullTableName (platformInstance , tableIdentifier );
373
+ }
374
+
375
+ @ VisibleForTesting
376
+ IcebergBatch newIcebergBatch (OperationContext operationContext ) {
377
+ return new IcebergBatch (operationContext );
274
378
}
275
379
}
0 commit comments