Skip to content

Commit ff09983

Browse files
committed
fix(entity-service): no-op batches
Refactor entityService function to filter no-op upserts. Upserts which would result in no change to the row. Still allows for updating last observed.
1 parent 3c388a5 commit ff09983

File tree

3 files changed

+198
-87
lines changed

3 files changed

+198
-87
lines changed

li-utils/src/main/java/com/linkedin/metadata/Constants.java

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ public class Constants {
5151
// App sources
5252
public static final String UI_SOURCE = "ui";
5353
public static final String SYSTEM_UPDATE_SOURCE = "systemUpdate";
54+
public static final String METADATA_TESTS_SOURCE = "metadataTests";
5455

5556
/** Entities */
5657
public static final String CORP_USER_ENTITY_NAME = "corpuser";

metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java

+101-84
Original file line numberDiff line numberDiff line change
@@ -972,10 +972,13 @@ This condition is specifically for an older conditional write ingestAspectIfNotP
972972
*/
973973
if (overwrite || databaseAspect == null) {
974974
result =
975-
ingestAspectToLocalDB(txContext, writeItem, databaseSystemAspect)
976-
.toBuilder()
977-
.request(writeItem)
978-
.build();
975+
Optional.ofNullable(
976+
ingestAspectToLocalDB(
977+
txContext, writeItem, databaseSystemAspect))
978+
.map(
979+
optResult ->
980+
optResult.toBuilder().request(writeItem).build())
981+
.orElse(null);
979982

980983
} else {
981984
RecordTemplate oldValue = databaseSystemAspect.getRecordTemplate();
@@ -996,49 +999,56 @@ This condition is specifically for an older conditional write ingestAspectIfNotP
996999

9971000
return result;
9981001
})
1002+
.filter(Objects::nonNull)
9991003
.collect(Collectors.toList());
10001004

1001-
// commit upserts prior to retention or kafka send, if supported by impl
1002-
if (txContext != null) {
1003-
txContext.commitAndContinue();
1004-
}
1005-
long took = TimeUnit.NANOSECONDS.toMillis(ingestToLocalDBTimer.stop());
1006-
if (took > DB_TIMER_LOG_THRESHOLD_MS) {
1007-
log.info("Ingestion of aspects batch to database took {} ms", took);
1008-
}
1009-
1010-
// Retention optimization and tx
1011-
if (retentionService != null) {
1012-
List<RetentionService.RetentionContext> retentionBatch =
1013-
upsertResults.stream()
1014-
// Only consider retention when there was a previous version
1015-
.filter(
1016-
result ->
1017-
batchAspects.containsKey(result.getUrn().toString())
1018-
&& batchAspects
1019-
.get(result.getUrn().toString())
1020-
.containsKey(result.getRequest().getAspectName()))
1021-
.filter(
1022-
result -> {
1023-
RecordTemplate oldAspect = result.getOldValue();
1024-
RecordTemplate newAspect = result.getNewValue();
1025-
// Apply retention policies if there was an update to existing aspect
1026-
// value
1027-
return oldAspect != newAspect
1028-
&& oldAspect != null
1029-
&& retentionService != null;
1030-
})
1031-
.map(
1032-
result ->
1033-
RetentionService.RetentionContext.builder()
1034-
.urn(result.getUrn())
1035-
.aspectName(result.getRequest().getAspectName())
1036-
.maxVersion(Optional.of(result.getMaxVersion()))
1037-
.build())
1038-
.collect(Collectors.toList());
1039-
retentionService.applyRetentionWithPolicyDefaults(opContext, retentionBatch);
1005+
if (!upsertResults.isEmpty()) {
1006+
// commit upserts prior to retention or kafka send, if supported by impl
1007+
if (txContext != null) {
1008+
txContext.commitAndContinue();
1009+
}
1010+
log.info(
1011+
"Ingestion of aspects batch to database took {} in {} ms",
1012+
upsertResults.size(),
1013+
TimeUnit.NANOSECONDS.toMillis(ingestToLocalDBTimer.stop()));
1014+
1015+
// Retention optimization and tx
1016+
if (retentionService != null) {
1017+
List<RetentionService.RetentionContext> retentionBatch =
1018+
upsertResults.stream()
1019+
// Only consider retention when there was a previous version
1020+
.filter(
1021+
result ->
1022+
batchAspects.containsKey(result.getUrn().toString())
1023+
&& batchAspects
1024+
.get(result.getUrn().toString())
1025+
.containsKey(result.getRequest().getAspectName()))
1026+
.filter(
1027+
result -> {
1028+
RecordTemplate oldAspect = result.getOldValue();
1029+
RecordTemplate newAspect = result.getNewValue();
1030+
// Apply retention policies if there was an update to existing
1031+
// aspect
1032+
// value
1033+
return oldAspect != newAspect
1034+
&& oldAspect != null
1035+
&& retentionService != null;
1036+
})
1037+
.map(
1038+
result ->
1039+
RetentionService.RetentionContext.builder()
1040+
.urn(result.getUrn())
1041+
.aspectName(result.getRequest().getAspectName())
1042+
.maxVersion(Optional.of(result.getMaxVersion()))
1043+
.build())
1044+
.collect(Collectors.toList());
1045+
retentionService.applyRetentionWithPolicyDefaults(opContext, retentionBatch);
1046+
} else {
1047+
log.warn("Retention service is missing!");
1048+
}
10401049
} else {
10411050
log.warn("Retention service is missing!");
1051+
log.warn("Empty transaction detected. {}", inputBatch);
10421052
}
10431053

10441054
return upsertResults;
@@ -2506,7 +2516,7 @@ private Map<EntityAspectIdentifier, EnvelopedAspect> getEnvelopedAspects(
25062516
* @param databaseAspect The aspect as it exists in the database.
25072517
* @return result object
25082518
*/
2509-
@Nonnull
2519+
@Nullable
25102520
private UpdateAspectResult ingestAspectToLocalDB(
25112521
@Nullable TransactionContext txContext,
25122522
@Nonnull final ChangeMCP writeItem,
@@ -2520,6 +2530,9 @@ private UpdateAspectResult ingestAspectToLocalDB(
25202530
.setLastRunId(writeItem.getSystemMetadata().getRunId(GetMode.NULL), SetMode.IGNORE_NULL);
25212531

25222532
// 2. Compare the latest existing and new.
2533+
final RecordTemplate databaseValue =
2534+
databaseAspect == null ? null : databaseAspect.getRecordTemplate();
2535+
25232536
final EntityAspect.EntitySystemAspect previousBatchAspect =
25242537
(EntityAspect.EntitySystemAspect) writeItem.getPreviousSystemAspect();
25252538
final RecordTemplate previousValue =
@@ -2528,7 +2541,7 @@ private UpdateAspectResult ingestAspectToLocalDB(
25282541
// 3. If there is no difference between existing and new, we just update
25292542
// the lastObserved in system metadata. RunId should stay as the original runId
25302543
if (previousValue != null
2531-
&& DataTemplateUtil.areEqual(previousValue, writeItem.getRecordTemplate())) {
2544+
&& DataTemplateUtil.areEqual(databaseValue, writeItem.getRecordTemplate())) {
25322545

25332546
SystemMetadata latestSystemMetadata = previousBatchAspect.getSystemMetadata();
25342547
latestSystemMetadata.setLastObserved(writeItem.getSystemMetadata().getLastObserved());
@@ -2564,45 +2577,49 @@ private UpdateAspectResult ingestAspectToLocalDB(
25642577
}
25652578

25662579
// 4. Save the newValue as the latest version
2567-
log.debug(
2568-
"Ingesting aspect with name {}, urn {}", writeItem.getAspectName(), writeItem.getUrn());
2569-
String newValueStr = EntityApiUtils.toJsonAspect(writeItem.getRecordTemplate());
2570-
long versionOfOld =
2571-
aspectDao.saveLatestAspect(
2572-
txContext,
2573-
writeItem.getUrn().toString(),
2574-
writeItem.getAspectName(),
2575-
previousBatchAspect == null ? null : EntityApiUtils.toJsonAspect(previousValue),
2576-
previousBatchAspect == null ? null : previousBatchAspect.getCreatedBy(),
2577-
previousBatchAspect == null
2578-
? null
2579-
: previousBatchAspect.getEntityAspect().getCreatedFor(),
2580-
previousBatchAspect == null ? null : previousBatchAspect.getCreatedOn(),
2581-
previousBatchAspect == null ? null : previousBatchAspect.getSystemMetadataRaw(),
2582-
newValueStr,
2583-
writeItem.getAuditStamp().getActor().toString(),
2584-
writeItem.getAuditStamp().hasImpersonator()
2585-
? writeItem.getAuditStamp().getImpersonator().toString()
2586-
: null,
2587-
new Timestamp(writeItem.getAuditStamp().getTime()),
2588-
EntityApiUtils.toJsonAspect(writeItem.getSystemMetadata()),
2589-
writeItem.getNextAspectVersion());
2590-
2591-
// metrics
2592-
aspectDao.incrementWriteMetrics(
2593-
writeItem.getAspectName(), 1, newValueStr.getBytes(StandardCharsets.UTF_8).length);
2594-
2595-
return UpdateAspectResult.builder()
2596-
.urn(writeItem.getUrn())
2597-
.oldValue(previousValue)
2598-
.newValue(writeItem.getRecordTemplate())
2599-
.oldSystemMetadata(
2600-
previousBatchAspect == null ? null : previousBatchAspect.getSystemMetadata())
2601-
.newSystemMetadata(writeItem.getSystemMetadata())
2602-
.operation(MetadataAuditOperation.UPDATE)
2603-
.auditStamp(writeItem.getAuditStamp())
2604-
.maxVersion(versionOfOld)
2605-
.build();
2580+
if (!DataTemplateUtil.areEqual(databaseValue, writeItem.getRecordTemplate())) {
2581+
log.debug(
2582+
"Ingesting aspect with name {}, urn {}", writeItem.getAspectName(), writeItem.getUrn());
2583+
String newValueStr = EntityApiUtils.toJsonAspect(writeItem.getRecordTemplate());
2584+
long versionOfOld =
2585+
aspectDao.saveLatestAspect(
2586+
txContext,
2587+
writeItem.getUrn().toString(),
2588+
writeItem.getAspectName(),
2589+
previousBatchAspect == null ? null : EntityApiUtils.toJsonAspect(previousValue),
2590+
previousBatchAspect == null ? null : previousBatchAspect.getCreatedBy(),
2591+
previousBatchAspect == null
2592+
? null
2593+
: previousBatchAspect.getEntityAspect().getCreatedFor(),
2594+
previousBatchAspect == null ? null : previousBatchAspect.getCreatedOn(),
2595+
previousBatchAspect == null ? null : previousBatchAspect.getSystemMetadataRaw(),
2596+
newValueStr,
2597+
writeItem.getAuditStamp().getActor().toString(),
2598+
writeItem.getAuditStamp().hasImpersonator()
2599+
? writeItem.getAuditStamp().getImpersonator().toString()
2600+
: null,
2601+
new Timestamp(writeItem.getAuditStamp().getTime()),
2602+
EntityApiUtils.toJsonAspect(writeItem.getSystemMetadata()),
2603+
writeItem.getNextAspectVersion());
2604+
2605+
// metrics
2606+
aspectDao.incrementWriteMetrics(
2607+
writeItem.getAspectName(), 1, newValueStr.getBytes(StandardCharsets.UTF_8).length);
2608+
2609+
return UpdateAspectResult.builder()
2610+
.urn(writeItem.getUrn())
2611+
.oldValue(previousValue)
2612+
.newValue(writeItem.getRecordTemplate())
2613+
.oldSystemMetadata(
2614+
previousBatchAspect == null ? null : previousBatchAspect.getSystemMetadata())
2615+
.newSystemMetadata(writeItem.getSystemMetadata())
2616+
.operation(MetadataAuditOperation.UPDATE)
2617+
.auditStamp(writeItem.getAuditStamp())
2618+
.maxVersion(versionOfOld)
2619+
.build();
2620+
}
2621+
2622+
return null;
26062623
}
26072624

26082625
private static boolean shouldAspectEmitChangeLog(@Nonnull final AspectSpec aspectSpec) {

metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java

+96-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
package com.linkedin.metadata.entity;
22

3+
import static com.linkedin.metadata.Constants.APP_SOURCE;
34
import static com.linkedin.metadata.Constants.CORP_USER_ENTITY_NAME;
45
import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME;
56
import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME;
7+
import static com.linkedin.metadata.Constants.METADATA_TESTS_SOURCE;
68
import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME;
79
import static org.mockito.Mockito.mock;
810
import static org.testng.Assert.assertEquals;
@@ -19,6 +21,7 @@
1921
import com.linkedin.common.urn.UrnUtils;
2022
import com.linkedin.data.template.DataTemplateUtil;
2123
import com.linkedin.data.template.RecordTemplate;
24+
import com.linkedin.data.template.StringMap;
2225
import com.linkedin.entity.EnvelopedAspect;
2326
import com.linkedin.identity.CorpUserInfo;
2427
import com.linkedin.metadata.AspectGenerationUtils;
@@ -61,6 +64,7 @@
6164
import java.util.concurrent.LinkedBlockingQueue;
6265
import java.util.stream.Collectors;
6366
import java.util.stream.IntStream;
67+
import java.util.stream.Stream;
6468
import org.apache.commons.lang3.tuple.Triple;
6569
import org.testng.Assert;
6670
import org.testng.annotations.BeforeMethod;
@@ -534,8 +538,8 @@ public void testBatchPatchWithTrailingNoOp() throws Exception {
534538
opContext, DATASET_ENTITY_NAME, entityUrn, GLOBAL_TAGS_ASPECT_NAME);
535539
assertEquals(
536540
envelopedAspect.getSystemMetadata().getVersion(),
537-
"2",
538-
"Expected version 2. 1 - Initial, + 1 batch operation (1 add, 1 remove)");
541+
"3",
542+
"Expected version 3. 1 - Initial, + 1 add, 1 remove");
539543
assertEquals(
540544
new GlobalTags(envelopedAspect.getValue().data())
541545
.getTags().stream().map(TagAssociation::getTag).collect(Collectors.toSet()),
@@ -649,14 +653,103 @@ public void testBatchPatchAdd() throws Exception {
649653
EnvelopedAspect envelopedAspect =
650654
_entityServiceImpl.getLatestEnvelopedAspect(
651655
opContext, DATASET_ENTITY_NAME, entityUrn, GLOBAL_TAGS_ASPECT_NAME);
652-
assertEquals(envelopedAspect.getSystemMetadata().getVersion(), "3", "Expected version 3");
656+
assertEquals(envelopedAspect.getSystemMetadata().getVersion(), "4", "Expected version 4");
653657
assertEquals(
654658
new GlobalTags(envelopedAspect.getValue().data())
655659
.getTags().stream().map(TagAssociation::getTag).collect(Collectors.toSet()),
656660
Set.of(tag1, tag2, tag3),
657661
"Expected all tags");
658662
}
659663

664+
@Test
665+
public void testBatchPatchAddDuplicate() throws Exception {
666+
Urn entityUrn =
667+
UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:snowflake,testBatchPatchAdd,PROD)");
668+
List<TagAssociation> initialTags =
669+
List.of(
670+
TagUrn.createFromString("urn:li:tag:__default_large_table"),
671+
TagUrn.createFromString("urn:li:tag:__default_low_queries"),
672+
TagUrn.createFromString("urn:li:tag:__default_low_changes"),
673+
TagUrn.createFromString("urn:li:tag:!10TB+ tables"))
674+
.stream()
675+
.map(tag -> new TagAssociation().setTag(tag))
676+
.collect(Collectors.toList());
677+
TagUrn tag2 = TagUrn.createFromString("urn:li:tag:$ 1TB+");
678+
679+
SystemMetadata systemMetadata = AspectGenerationUtils.createSystemMetadata();
680+
681+
SystemMetadata patchSystemMetadata = new SystemMetadata();
682+
patchSystemMetadata.setLastObserved(systemMetadata.getLastObserved() + 1);
683+
patchSystemMetadata.setProperties(new StringMap(Map.of(APP_SOURCE, METADATA_TESTS_SOURCE)));
684+
685+
ChangeItemImpl initialAspectTag1 =
686+
ChangeItemImpl.builder()
687+
.urn(entityUrn)
688+
.aspectName(GLOBAL_TAGS_ASPECT_NAME)
689+
.recordTemplate(new GlobalTags().setTags(new TagAssociationArray(initialTags)))
690+
.systemMetadata(systemMetadata.copy())
691+
.auditStamp(TEST_AUDIT_STAMP)
692+
.build(TestOperationContexts.emptyAspectRetriever(null));
693+
694+
PatchItemImpl patchAdd2 =
695+
PatchItemImpl.builder()
696+
.urn(entityUrn)
697+
.entitySpec(_testEntityRegistry.getEntitySpec(DATASET_ENTITY_NAME))
698+
.aspectName(GLOBAL_TAGS_ASPECT_NAME)
699+
.aspectSpec(
700+
_testEntityRegistry
701+
.getEntitySpec(DATASET_ENTITY_NAME)
702+
.getAspectSpec(GLOBAL_TAGS_ASPECT_NAME))
703+
.patch(
704+
GenericJsonPatch.builder()
705+
.arrayPrimaryKeys(Map.of("properties", List.of("tag")))
706+
.patch(List.of(tagPatchOp(PatchOperationType.ADD, tag2)))
707+
.build()
708+
.getJsonPatch())
709+
.systemMetadata(patchSystemMetadata)
710+
.auditStamp(AuditStampUtils.createDefaultAuditStamp())
711+
.build(_testEntityRegistry);
712+
713+
// establish base entity
714+
_entityServiceImpl.ingestAspects(
715+
opContext,
716+
AspectsBatchImpl.builder()
717+
.retrieverContext(opContext.getRetrieverContext().get())
718+
.items(List.of(initialAspectTag1))
719+
.build(),
720+
false,
721+
true);
722+
723+
_entityServiceImpl.ingestAspects(
724+
opContext,
725+
AspectsBatchImpl.builder()
726+
.retrieverContext(opContext.getRetrieverContext().get())
727+
.items(List.of(patchAdd2, patchAdd2)) // duplicate
728+
.build(),
729+
false,
730+
true);
731+
732+
// List aspects urns
733+
ListUrnsResult batch = _entityServiceImpl.listUrns(opContext, entityUrn.getEntityType(), 0, 1);
734+
735+
assertEquals(batch.getStart().intValue(), 0);
736+
assertEquals(batch.getCount().intValue(), 1);
737+
assertEquals(batch.getTotal().intValue(), 1);
738+
assertEquals(batch.getEntities().size(), 1);
739+
assertEquals(entityUrn.toString(), batch.getEntities().get(0).toString());
740+
741+
EnvelopedAspect envelopedAspect =
742+
_entityServiceImpl.getLatestEnvelopedAspect(
743+
opContext, DATASET_ENTITY_NAME, entityUrn, GLOBAL_TAGS_ASPECT_NAME);
744+
assertEquals(envelopedAspect.getSystemMetadata().getVersion(), "3", "Expected version 3");
745+
assertEquals(
746+
new GlobalTags(envelopedAspect.getValue().data())
747+
.getTags().stream().map(TagAssociation::getTag).collect(Collectors.toSet()),
748+
Stream.concat(initialTags.stream().map(TagAssociation::getTag), Stream.of(tag2))
749+
.collect(Collectors.toSet()),
750+
"Expected all tags");
751+
}
752+
660753
@Test
661754
public void dataGeneratorThreadingTest() {
662755
DataGenerator dataGenerator = new DataGenerator(opContext, _entityServiceImpl);

0 commit comments

Comments
 (0)