Skip to content

Commit 619a6d8

Browse files
committed
fix comparison, reduce duplicate logging
1 parent a1fd30c commit 619a6d8

File tree

21 files changed

+866
-104
lines changed

21 files changed

+866
-104
lines changed

entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java

+16-3
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,12 @@
2828
public interface AspectsBatch {
2929
Collection<? extends BatchItem> getItems();
3030

31+
Collection<? extends BatchItem> getInitialItems();
32+
3133
RetrieverContext getRetrieverContext();
3234

3335
/**
34-
* Returns MCP items. Could be patch, upsert, etc.
36+
* Returns MCP items. Could be one of patch, upsert, etc.
3537
*
3638
* @return batch items
3739
*/
@@ -160,13 +162,24 @@ static Stream<MCLItem> applyMCLSideEffects(
160162
}
161163

162164
default boolean containsDuplicateAspects() {
163-
return getItems().stream()
164-
.map(i -> String.format("%s_%s", i.getClass().getName(), i.hashCode()))
165+
return getInitialItems().stream()
166+
.map(i -> String.format("%s_%s", i.getClass().getSimpleName(), i.hashCode()))
165167
.distinct()
166168
.count()
167169
!= getItems().size();
168170
}
169171

172+
default Map<String, List<? extends BatchItem>> duplicateAspects() {
173+
return getInitialItems().stream()
174+
.collect(
175+
Collectors.groupingBy(
176+
i -> String.format("%s_%s", i.getClass().getSimpleName(), i.hashCode())))
177+
.entrySet()
178+
.stream()
179+
.filter(entry -> entry.getValue() != null && entry.getValue().size() > 1)
180+
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
181+
}
182+
170183
default Map<String, Set<String>> getUrnAspectsMap() {
171184
return getItems().stream()
172185
.map(aspect -> Pair.of(aspect.getUrn().toString(), aspect.getAspectName()))

entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java

+7
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,11 @@ public interface BatchItem extends ReadItem {
2323
*/
2424
@Nonnull
2525
ChangeType getChangeType();
26+
27+
/**
28+
* Determines if this item is a duplicate of another item in terms of the operation it represents
29+
* to the database.Each implementation can define what constitutes a duplicate based on its
30+
* specific fields which are persisted.
31+
*/
32+
boolean isDatabaseDuplicateOf(BatchItem other);
2633
}

entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCL.java

+21
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
import com.linkedin.common.urn.Urn;
55
import com.linkedin.data.template.RecordTemplate;
66
import com.linkedin.events.metadata.ChangeType;
7+
import com.linkedin.metadata.aspect.batch.BatchItem;
78
import com.linkedin.metadata.aspect.batch.MCLItem;
89
import com.linkedin.metadata.models.AspectSpec;
910
import com.linkedin.metadata.models.EntitySpec;
1011
import com.linkedin.mxe.MetadataChangeLog;
12+
import java.util.Objects;
1113
import javax.annotation.Nonnull;
1214
import lombok.Builder;
1315
import lombok.Getter;
@@ -29,4 +31,23 @@ public class TestMCL implements MCLItem {
2931
public String getAspectName() {
3032
return getAspectSpec().getName();
3133
}
34+
35+
@Override
36+
public boolean isDatabaseDuplicateOf(BatchItem other) {
37+
return equals(other);
38+
}
39+
40+
@Override
41+
public boolean equals(Object o) {
42+
if (this == o) return true;
43+
if (o == null || getClass() != o.getClass()) return false;
44+
45+
TestMCL testMCL = (TestMCL) o;
46+
return Objects.equals(metadataChangeLog, testMCL.metadataChangeLog);
47+
}
48+
49+
@Override
50+
public int hashCode() {
51+
return Objects.hashCode(metadataChangeLog);
52+
}
3253
}

entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java

+38
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import com.linkedin.common.AuditStamp;
88
import com.linkedin.common.urn.Urn;
9+
import com.linkedin.data.template.DataTemplateUtil;
910
import com.linkedin.data.template.RecordTemplate;
1011
import com.linkedin.events.metadata.ChangeType;
1112
import com.linkedin.metadata.aspect.ReadItem;
@@ -21,6 +22,7 @@
2122
import java.net.URISyntaxException;
2223
import java.util.Collection;
2324
import java.util.Map;
25+
import java.util.Objects;
2426
import java.util.Optional;
2527
import java.util.Set;
2628
import java.util.stream.Collectors;
@@ -140,4 +142,40 @@ public Map<String, String> getHeaders() {
140142
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))
141143
.orElse(headers);
142144
}
145+
146+
@Override
147+
public boolean isDatabaseDuplicateOf(BatchItem other) {
148+
return equals(other);
149+
}
150+
151+
@Override
152+
public boolean equals(Object o) {
153+
if (this == o) {
154+
return true;
155+
}
156+
if (o == null || getClass() != o.getClass()) {
157+
return false;
158+
}
159+
160+
TestMCP testMCP = (TestMCP) o;
161+
return urn.equals(testMCP.urn)
162+
&& DataTemplateUtil.areEqual(recordTemplate, testMCP.recordTemplate)
163+
&& Objects.equals(systemAspect, testMCP.systemAspect)
164+
&& Objects.equals(previousSystemAspect, testMCP.previousSystemAspect)
165+
&& Objects.equals(auditStamp, testMCP.auditStamp)
166+
&& Objects.equals(changeType, testMCP.changeType)
167+
&& Objects.equals(metadataChangeProposal, testMCP.metadataChangeProposal);
168+
}
169+
170+
@Override
171+
public int hashCode() {
172+
int result = urn.hashCode();
173+
result = 31 * result + Objects.hashCode(recordTemplate);
174+
result = 31 * result + Objects.hashCode(systemAspect);
175+
result = 31 * result + Objects.hashCode(previousSystemAspect);
176+
result = 31 * result + Objects.hashCode(auditStamp);
177+
result = 31 * result + Objects.hashCode(changeType);
178+
result = 31 * result + Objects.hashCode(metadataChangeProposal);
179+
return result;
180+
}
143181
}

metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityAspect.java

+25
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,26 @@ public class EntityAspect {
5252

5353
private String createdFor;
5454

55+
@Override
56+
public String toString() {
57+
return "EntityAspect{"
58+
+ "urn='"
59+
+ urn
60+
+ '\''
61+
+ ", aspect='"
62+
+ aspect
63+
+ '\''
64+
+ ", version="
65+
+ version
66+
+ ", metadata='"
67+
+ metadata
68+
+ '\''
69+
+ ", systemMetadata='"
70+
+ systemMetadata
71+
+ '\''
72+
+ '}';
73+
}
74+
5575
/**
5676
* Provide a typed EntityAspect without breaking the existing public contract with generic types.
5777
*/
@@ -144,6 +164,11 @@ public EnvelopedAspect toEnvelopedAspects() {
144164
return envelopedAspect;
145165
}
146166

167+
@Override
168+
public String toString() {
169+
return entityAspect.toString();
170+
}
171+
147172
public static class EntitySystemAspectBuilder {
148173

149174
private EntityAspect.EntitySystemAspect build() {

metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java

+36-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.linkedin.metadata.entity.ebean.batch;
22

33
import com.linkedin.common.AuditStamp;
4+
import com.linkedin.common.urn.Urn;
45
import com.linkedin.data.template.RecordTemplate;
56
import com.linkedin.events.metadata.ChangeType;
67
import com.linkedin.metadata.aspect.AspectRetriever;
@@ -15,7 +16,9 @@
1516
import com.linkedin.metadata.models.EntitySpec;
1617
import com.linkedin.mxe.MetadataChangeProposal;
1718
import com.linkedin.util.Pair;
19+
import java.util.ArrayList;
1820
import java.util.Collection;
21+
import java.util.HashMap;
1922
import java.util.LinkedList;
2023
import java.util.List;
2124
import java.util.Map;
@@ -29,12 +32,23 @@
2932
import lombok.extern.slf4j.Slf4j;
3033

3134
@Slf4j
32-
@Getter
3335
@Builder(toBuilder = true)
3436
public class AspectsBatchImpl implements AspectsBatch {
3537

3638
@Nonnull private final Collection<? extends BatchItem> items;
37-
@Nonnull private final RetrieverContext retrieverContext;
39+
@Nonnull private final Collection<? extends BatchItem> nonRepeatedItems;
40+
@Getter @Nonnull private final RetrieverContext retrieverContext;
41+
42+
@Override
43+
@Nonnull
44+
public Collection<? extends BatchItem> getItems() {
45+
return nonRepeatedItems;
46+
}
47+
48+
@Override
49+
public Collection<? extends BatchItem> getInitialItems() {
50+
return items;
51+
}
3852

3953
/**
4054
* Convert patches to upserts, apply hooks at the aspect and batch level.
@@ -207,14 +221,32 @@ public AspectsBatchImplBuilder mcps(
207221
return this;
208222
}
209223

224+
private static <T extends BatchItem> List<T> filterRepeats(Collection<T> items) {
225+
List<T> result = new ArrayList<>();
226+
Map<Pair<Urn, String>, T> last = new HashMap<>();
227+
228+
for (T item : items) {
229+
Pair<Urn, String> urnAspect = Pair.of(item.getUrn(), item.getAspectName());
230+
// Check if this item is a duplicate of the previous
231+
if (!last.containsKey(urnAspect) || !item.isDatabaseDuplicateOf(last.get(urnAspect))) {
232+
result.add(item);
233+
}
234+
last.put(urnAspect, item);
235+
}
236+
237+
return result;
238+
}
239+
210240
public AspectsBatchImpl build() {
241+
this.nonRepeatedItems = filterRepeats(this.items);
242+
211243
ValidationExceptionCollection exceptions =
212-
AspectsBatch.validateProposed(this.items, this.retrieverContext);
244+
AspectsBatch.validateProposed(this.nonRepeatedItems, this.retrieverContext);
213245
if (!exceptions.isEmpty()) {
214246
throw new IllegalArgumentException("Failed to validate MCP due to: " + exceptions);
215247
}
216248

217-
return new AspectsBatchImpl(this.items, this.retrieverContext);
249+
return new AspectsBatchImpl(this.items, this.nonRepeatedItems, this.retrieverContext);
218250
}
219251
}
220252

metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java

+11-2
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
import com.datahub.util.exception.ModelConversionException;
44
import com.linkedin.common.AuditStamp;
55
import com.linkedin.common.urn.Urn;
6+
import com.linkedin.data.template.DataTemplateUtil;
67
import com.linkedin.data.template.RecordTemplate;
78
import com.linkedin.data.template.StringMap;
89
import com.linkedin.events.metadata.ChangeType;
910
import com.linkedin.metadata.aspect.AspectRetriever;
1011
import com.linkedin.metadata.aspect.SystemAspect;
12+
import com.linkedin.metadata.aspect.batch.BatchItem;
1113
import com.linkedin.metadata.aspect.batch.ChangeMCP;
1214
import com.linkedin.metadata.aspect.batch.MCPItem;
1315
import com.linkedin.metadata.aspect.patch.template.common.GenericPatchTemplate;
@@ -269,6 +271,11 @@ private static RecordTemplate convertToRecordTemplate(
269271
}
270272
}
271273

274+
@Override
275+
public boolean isDatabaseDuplicateOf(BatchItem other) {
276+
return equals(other);
277+
}
278+
272279
@Override
273280
public boolean equals(Object o) {
274281
if (this == o) {
@@ -280,13 +287,15 @@ public boolean equals(Object o) {
280287
ChangeItemImpl that = (ChangeItemImpl) o;
281288
return urn.equals(that.urn)
282289
&& aspectName.equals(that.aspectName)
290+
&& changeType.equals(that.changeType)
283291
&& Objects.equals(systemMetadata, that.systemMetadata)
284-
&& recordTemplate.equals(that.recordTemplate);
292+
&& Objects.equals(auditStamp, that.auditStamp)
293+
&& DataTemplateUtil.areEqual(recordTemplate, that.recordTemplate);
285294
}
286295

287296
@Override
288297
public int hashCode() {
289-
return Objects.hash(urn, aspectName, systemMetadata, recordTemplate);
298+
return Objects.hash(urn, aspectName, changeType, systemMetadata, auditStamp, recordTemplate);
290299
}
291300

292301
@Override

metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java

+6
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import com.linkedin.events.metadata.ChangeType;
77
import com.linkedin.metadata.aspect.AspectRetriever;
88
import com.linkedin.metadata.aspect.SystemAspect;
9+
import com.linkedin.metadata.aspect.batch.BatchItem;
910
import com.linkedin.metadata.aspect.batch.ChangeMCP;
1011
import com.linkedin.metadata.entity.EntityApiUtils;
1112
import com.linkedin.metadata.entity.EntityAspect;
@@ -115,6 +116,11 @@ public DeleteItemImpl build(AspectRetriever aspectRetriever) {
115116
}
116117
}
117118

119+
@Override
120+
public boolean isDatabaseDuplicateOf(BatchItem other) {
121+
return equals(other);
122+
}
123+
118124
@Override
119125
public boolean equals(Object o) {
120126
if (this == o) {

metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java

+6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import com.linkedin.data.template.RecordTemplate;
66
import com.linkedin.events.metadata.ChangeType;
77
import com.linkedin.metadata.aspect.AspectRetriever;
8+
import com.linkedin.metadata.aspect.batch.BatchItem;
89
import com.linkedin.metadata.aspect.batch.MCLItem;
910
import com.linkedin.metadata.aspect.batch.MCPItem;
1011
import com.linkedin.metadata.entity.AspectUtils;
@@ -158,6 +159,11 @@ private static Pair<RecordTemplate, RecordTemplate> convertToRecordTemplate(
158159
}
159160
}
160161

162+
@Override
163+
public boolean isDatabaseDuplicateOf(BatchItem other) {
164+
return equals(other);
165+
}
166+
161167
@Override
162168
public boolean equals(Object o) {
163169
if (this == o) {

metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import com.linkedin.data.template.RecordTemplate;
1515
import com.linkedin.events.metadata.ChangeType;
1616
import com.linkedin.metadata.aspect.AspectRetriever;
17+
import com.linkedin.metadata.aspect.batch.BatchItem;
1718
import com.linkedin.metadata.aspect.batch.MCPItem;
1819
import com.linkedin.metadata.aspect.batch.PatchMCP;
1920
import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine;
@@ -216,6 +217,11 @@ public static JsonPatch convertToJsonPatch(MetadataChangeProposal mcp) {
216217
}
217218
}
218219

220+
@Override
221+
public boolean isDatabaseDuplicateOf(BatchItem other) {
222+
return equals(other);
223+
}
224+
219225
@Override
220226
public boolean equals(Object o) {
221227
if (this == o) {
@@ -228,12 +234,13 @@ public boolean equals(Object o) {
228234
return urn.equals(that.urn)
229235
&& aspectName.equals(that.aspectName)
230236
&& Objects.equals(systemMetadata, that.systemMetadata)
237+
&& auditStamp.equals(that.auditStamp)
231238
&& patch.equals(that.patch);
232239
}
233240

234241
@Override
235242
public int hashCode() {
236-
return Objects.hash(urn, aspectName, systemMetadata, patch);
243+
return Objects.hash(urn, aspectName, systemMetadata, auditStamp, patch);
237244
}
238245

239246
@Override

0 commit comments

Comments
 (0)