Skip to content

Commit 70f6ea7

Browse files
committed
Spotless check and tweaking openlineage endpoint default config
1 parent 2e33355 commit 70f6ea7

File tree

5 files changed

+95
-50
lines changed

5 files changed

+95
-50
lines changed

metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -300,8 +300,10 @@ private static UpstreamLineage getFineGrainedLineage(
300300
UrnArray downstreamsFields = new UrnArray();
301301
Optional<DatasetUrn> datasetUrn =
302302
convertOpenlineageDatasetToDatasetUrn(dataset, mappingConfig);
303-
datasetUrn.ifPresent(urn -> downstreamsFields.add(
304-
UrnUtils.getUrn("urn:li:schemaField:" + "(" + urn + "," + field.getKey() + ")")));
303+
datasetUrn.ifPresent(
304+
urn ->
305+
downstreamsFields.add(
306+
UrnUtils.getUrn("urn:li:schemaField:" + "(" + urn + "," + field.getKey() + ")")));
305307
OpenLineage.StaticDatasetBuilder staticDatasetBuilder =
306308
new OpenLineage.StaticDatasetBuilder();
307309
field

metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/dataset/DatahubJob.java

+86-44
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import com.linkedin.metadata.aspect.patch.builder.GlobalTagsPatchBuilder;
3535
import com.linkedin.metadata.aspect.patch.builder.UpstreamLineagePatchBuilder;
3636
import com.linkedin.metadata.key.DatasetKey;
37+
import com.linkedin.mxe.MetadataChangeProposal;
3738
import datahub.event.EventFormatter;
3839
import datahub.event.MetadataChangeProposalWrapper;
3940
import io.datahubproject.openlineage.config.DatahubOpenlineageConfig;
@@ -56,11 +57,9 @@
5657
import lombok.Setter;
5758
import lombok.ToString;
5859
import lombok.extern.slf4j.Slf4j;
59-
import com.linkedin.mxe.MetadataChangeProposal;
6060
import org.apache.commons.lang3.StringUtils;
6161
import org.apache.commons.lang3.tuple.Pair;
6262

63-
6463
@EqualsAndHashCode
6564
@Getter
6665
@Setter
@@ -102,7 +101,8 @@ public static MetadataChangeProposalWrapper materializeDataset(DatasetUrn datase
102101
.setPlatform(new DataPlatformUrn(datasetUrn.getPlatformEntity().getPlatformNameEntity()));
103102

104103
return MetadataChangeProposalWrapper.create(
105-
b -> b.entityType(DATASET_ENTITY_TYPE).entityUrn(datasetUrn).upsert().aspect(datasetAspect));
104+
b ->
105+
b.entityType(DATASET_ENTITY_TYPE).entityUrn(datasetUrn).upsert().aspect(datasetAspect));
106106
}
107107

108108
public List<MetadataChangeProposal> toMcps(DatahubOpenlineageConfig config) throws IOException {
@@ -113,7 +113,6 @@ public List<MetadataChangeProposal> toMcps(DatahubOpenlineageConfig config) thro
113113
addAspectToMcps(flowUrn, DATA_FLOW_ENTITY_TYPE, dataFlowInfo, mcps);
114114
generateStatus(flowUrn, DATA_FLOW_ENTITY_TYPE, mcps);
115115

116-
117116
// Generate and add PlatformInstance Aspect
118117
if (flowPlatformInstance != null) {
119118
addAspectToMcps(flowUrn, DATA_FLOW_ENTITY_TYPE, flowPlatformInstance, mcps);
@@ -154,7 +153,7 @@ public List<MetadataChangeProposal> toMcps(DatahubOpenlineageConfig config) thro
154153
UrnArray inputUrnArray = inputsTuple.getLeft();
155154
EdgeArray inputEdges = inputsTuple.getRight();
156155

157-
Pair<UrnArray, EdgeArray> outputTuple = processDownstreams(config, mcps);
156+
Pair<UrnArray, EdgeArray> outputTuple = processDownstreams(config, mcps);
158157
UrnArray outputUrnArray = outputTuple.getLeft();
159158
EdgeArray outputEdges = outputTuple.getRight();
160159

@@ -168,11 +167,16 @@ public List<MetadataChangeProposal> toMcps(DatahubOpenlineageConfig config) thro
168167
return mcps;
169168
}
170169

171-
private void generateDataJobInputOutputMcp(EdgeArray inputEdges, EdgeArray outputEdges, DatahubOpenlineageConfig config, List<MetadataChangeProposal> mcps) {
170+
private void generateDataJobInputOutputMcp(
171+
EdgeArray inputEdges,
172+
EdgeArray outputEdges,
173+
DatahubOpenlineageConfig config,
174+
List<MetadataChangeProposal> mcps) {
172175
DataJobInputOutput dataJobInputOutput = new DataJobInputOutput();
173176
log.info("Adding DataJob edges to {}", jobUrn);
174177
if (config.isUsePatch()) {
175-
DataJobInputOutputPatchBuilder dataJobInputOutputPatchBuilder = new DataJobInputOutputPatchBuilder().urn(jobUrn);
178+
DataJobInputOutputPatchBuilder dataJobInputOutputPatchBuilder =
179+
new DataJobInputOutputPatchBuilder().urn(jobUrn);
176180
for (DatahubDataset dataset : inSet) {
177181
dataJobInputOutputPatchBuilder.addInputDatasetEdge(dataset.getUrn());
178182
}
@@ -183,7 +187,9 @@ private void generateDataJobInputOutputMcp(EdgeArray inputEdges, EdgeArray outpu
183187
dataJobInputOutputPatchBuilder.addInputDatajobEdge(parentJob);
184188
}
185189
MetadataChangeProposal dataJobInputOutputMcp = dataJobInputOutputPatchBuilder.build();
186-
log.info("dataJobInputOutputMcp: {}", dataJobInputOutputMcp.getAspect().getValue().asString(Charset.defaultCharset()));
190+
log.info(
191+
"dataJobInputOutputMcp: {}",
192+
dataJobInputOutputMcp.getAspect().getValue().asString(Charset.defaultCharset()));
187193
mcps.add(dataJobInputOutputPatchBuilder.build());
188194

189195
} else {
@@ -194,32 +200,41 @@ private void generateDataJobInputOutputMcp(EdgeArray inputEdges, EdgeArray outpu
194200
DataJobUrnArray parentDataJobUrnArray = new DataJobUrnArray();
195201
parentDataJobUrnArray.addAll(parentJobs);
196202

197-
log.info("Adding input data jobs {} Number of jobs: {}", jobUrn, parentDataJobUrnArray.size());
203+
log.info(
204+
"Adding input data jobs {} Number of jobs: {}", jobUrn, parentDataJobUrnArray.size());
198205
dataJobInputOutput.setInputDatajobs(parentDataJobUrnArray);
199206
addAspectToMcps(jobUrn, DATAJOB_ENTITY_TYPE, dataJobInputOutput, mcps);
200207
}
201208
}
202209

203-
private void generateDataProcessInstanceMcp(UrnArray inputUrnArray, UrnArray outputUrnArray, List<MetadataChangeProposal> mcps) {
210+
private void generateDataProcessInstanceMcp(
211+
UrnArray inputUrnArray, UrnArray outputUrnArray, List<MetadataChangeProposal> mcps) {
204212
DataProcessInstanceInput dataProcessInstanceInput = new DataProcessInstanceInput();
205213
dataProcessInstanceInput.setInputs(inputUrnArray);
206214

207215
DataProcessInstanceOutput dataProcessInstanceOutput = new DataProcessInstanceOutput();
208216
dataProcessInstanceOutput.setOutputs(outputUrnArray);
209217

210-
addAspectToMcps(dataProcessInstanceUrn, DATA_PROCESS_INSTANCE_ENTITY_TYPE, dataProcessInstanceInput, mcps);
211-
addAspectToMcps(dataProcessInstanceUrn, DATA_PROCESS_INSTANCE_ENTITY_TYPE, dataProcessInstanceOutput, mcps);
218+
addAspectToMcps(
219+
dataProcessInstanceUrn, DATA_PROCESS_INSTANCE_ENTITY_TYPE, dataProcessInstanceInput, mcps);
220+
addAspectToMcps(
221+
dataProcessInstanceUrn, DATA_PROCESS_INSTANCE_ENTITY_TYPE, dataProcessInstanceOutput, mcps);
212222

213223
if (dataProcessInstanceProperties != null) {
214224
log.info("Adding dataProcessInstanceProperties to {}", jobUrn);
215-
addAspectToMcps(dataProcessInstanceUrn, DATA_PROCESS_INSTANCE_ENTITY_TYPE, dataProcessInstanceProperties, mcps);
225+
addAspectToMcps(
226+
dataProcessInstanceUrn,
227+
DATA_PROCESS_INSTANCE_ENTITY_TYPE,
228+
dataProcessInstanceProperties,
229+
mcps);
216230
}
217231

218232
generateDataProcessInstanceRunEvent(mcps);
219233
generateDataProcessInstanceRelationship(mcps);
220234
}
221235

222-
private Pair<UrnArray, EdgeArray> processDownstreams(DatahubOpenlineageConfig config, List<MetadataChangeProposal> mcps) {
236+
private Pair<UrnArray, EdgeArray> processDownstreams(
237+
DatahubOpenlineageConfig config, List<MetadataChangeProposal> mcps) {
223238
UrnArray outputUrnArray = new UrnArray();
224239
EdgeArray outputEdges = new EdgeArray();
225240

@@ -242,40 +257,53 @@ private Pair<UrnArray, EdgeArray> processDownstreams(DatahubOpenlineageConfig co
242257
outputEdges.add(edge);
243258

244259
if ((dataset.getSchemaMetadata() != null) && (config.isIncludeSchemaMetadata())) {
245-
addAspectToMcps(dataset.getUrn(), DATASET_ENTITY_TYPE, dataset.getSchemaMetadata(), mcps);
260+
addAspectToMcps(
261+
dataset.getUrn(), DATASET_ENTITY_TYPE, dataset.getSchemaMetadata(), mcps);
246262
}
247263

248264
if (dataset.getLineage() != null) {
249265
if (config.isUsePatch()) {
250-
UpstreamLineagePatchBuilder upstreamLineagePatchBuilder = new UpstreamLineagePatchBuilder().urn(dataset.getUrn());
266+
UpstreamLineagePatchBuilder upstreamLineagePatchBuilder =
267+
new UpstreamLineagePatchBuilder().urn(dataset.getUrn());
251268
for (Upstream upstream : dataset.getLineage().getUpstreams()) {
252269
upstreamLineagePatchBuilder.addUpstream(upstream.getDataset(), upstream.getType());
253270
}
254271

255272
log.info("Adding FineGrainedLineage to {}", dataset.getUrn());
256-
for (FineGrainedLineage fineGrainedLineage : Objects.requireNonNull(
257-
dataset.getLineage().getFineGrainedLineages())) {
273+
for (FineGrainedLineage fineGrainedLineage :
274+
Objects.requireNonNull(dataset.getLineage().getFineGrainedLineages())) {
258275
for (Urn upstream : Objects.requireNonNull(fineGrainedLineage.getUpstreams())) {
259276
upstreamLineagePatchBuilder.addFineGrainedUpstreamField(
260-
upstream, fineGrainedLineage.getConfidenceScore(), StringUtils.defaultIfEmpty(fineGrainedLineage.getTransformOperation(), "TRANSFORM"), fineGrainedLineage.getUpstreamType());
277+
upstream,
278+
fineGrainedLineage.getConfidenceScore(),
279+
StringUtils.defaultIfEmpty(
280+
fineGrainedLineage.getTransformOperation(), "TRANSFORM"),
281+
fineGrainedLineage.getUpstreamType());
261282
}
262283
for (Urn downstream : Objects.requireNonNull(fineGrainedLineage.getDownstreams())) {
263284
upstreamLineagePatchBuilder.addFineGrainedDownstreamField(
264-
downstream, fineGrainedLineage.getConfidenceScore(), StringUtils.defaultIfEmpty(fineGrainedLineage.getTransformOperation(), "TRANSFORM"), fineGrainedLineage.getDownstreamType());
285+
downstream,
286+
fineGrainedLineage.getConfidenceScore(),
287+
StringUtils.defaultIfEmpty(
288+
fineGrainedLineage.getTransformOperation(), "TRANSFORM"),
289+
fineGrainedLineage.getDownstreamType());
265290
}
266291
}
267292
MetadataChangeProposal mcp = upstreamLineagePatchBuilder.build();
268-
log.info("upstreamLineagePatch: {}", mcp.getAspect().getValue().asString(Charset.defaultCharset()));
293+
log.info(
294+
"upstreamLineagePatch: {}",
295+
mcp.getAspect().getValue().asString(Charset.defaultCharset()));
269296
mcps.add(mcp);
270-
}else {
297+
} else {
271298
addAspectToMcps(dataset.getUrn(), DATASET_ENTITY_TYPE, dataset.getLineage(), mcps);
272299
}
273300
}
274301
});
275302
return Pair.of(outputUrnArray, outputEdges);
276303
}
277304

278-
private Pair<UrnArray, EdgeArray> processUpstreams(DatahubOpenlineageConfig config, List<MetadataChangeProposal> mcps) {
305+
private Pair<UrnArray, EdgeArray> processUpstreams(
306+
DatahubOpenlineageConfig config, List<MetadataChangeProposal> mcps) {
279307
UrnArray inputUrnArray = new UrnArray();
280308
EdgeArray inputEdges = new EdgeArray();
281309

@@ -298,7 +326,8 @@ private Pair<UrnArray, EdgeArray> processUpstreams(DatahubOpenlineageConfig conf
298326
}
299327

300328
if (dataset.getSchemaMetadata() != null && config.isIncludeSchemaMetadata()) {
301-
addAspectToMcps(dataset.getUrn(), DATASET_ENTITY_TYPE, dataset.getSchemaMetadata(), mcps);
329+
addAspectToMcps(
330+
dataset.getUrn(), DATASET_ENTITY_TYPE, dataset.getSchemaMetadata(), mcps);
302331
}
303332

304333
if (dataset.getLineage() != null) {
@@ -308,11 +337,16 @@ private Pair<UrnArray, EdgeArray> processUpstreams(DatahubOpenlineageConfig conf
308337
return Pair.of(inputUrnArray, inputEdges);
309338
}
310339

311-
private void generateFlowDomainsAspect(List<MetadataChangeProposal> mcps, StringMap customProperties) {
340+
private void generateFlowDomainsAspect(
341+
List<MetadataChangeProposal> mcps, StringMap customProperties) {
312342
if (flowDomains != null) {
313343
MetadataChangeProposalWrapper domains =
314344
MetadataChangeProposalWrapper.create(
315-
b -> b.entityType(DATAFLOW_ENTITY_TYPE).entityUrn(flowUrn).upsert().aspect(flowDomains));
345+
b ->
346+
b.entityType(DATAFLOW_ENTITY_TYPE)
347+
.entityUrn(flowUrn)
348+
.upsert()
349+
.aspect(flowDomains));
316350
try {
317351
mcps.add(eventFormatter.convert(domains));
318352
} catch (IOException e) {
@@ -321,7 +355,11 @@ private void generateFlowDomainsAspect(List<MetadataChangeProposal> mcps, String
321355
}
322356
}
323357

324-
private void generateFlowGlobalTagsAspect(Urn flowUrn, GlobalTags flowGlobalTags, DatahubOpenlineageConfig config, List<MetadataChangeProposal> mcps) {
358+
private void generateFlowGlobalTagsAspect(
359+
Urn flowUrn,
360+
GlobalTags flowGlobalTags,
361+
DatahubOpenlineageConfig config,
362+
List<MetadataChangeProposal> mcps) {
325363
if (flowGlobalTags != null) {
326364
if (config.isUsePatch()) {
327365
GlobalTagsPatchBuilder globalTagsPatchBuilder = new GlobalTagsPatchBuilder().urn(flowUrn);
@@ -341,9 +379,11 @@ private void generateStatus(Urn entityUrn, String entityType, List<MetadataChang
341379
addAspectToMcps(entityUrn, entityType, statusInfo, mcps);
342380
}
343381

344-
private void addAspectToMcps(Urn entityUrn, String entityType, DataTemplate aspect, List<MetadataChangeProposal> mcps) {
345-
MetadataChangeProposalWrapper mcpw = MetadataChangeProposalWrapper.create(
346-
b -> b.entityType(entityType).entityUrn(entityUrn).upsert().aspect(aspect));
382+
private void addAspectToMcps(
383+
Urn entityUrn, String entityType, DataTemplate aspect, List<MetadataChangeProposal> mcps) {
384+
MetadataChangeProposalWrapper mcpw =
385+
MetadataChangeProposalWrapper.create(
386+
b -> b.entityType(entityType).entityUrn(entityUrn).upsert().aspect(aspect));
347387
try {
348388
mcps.add(eventFormatter.convert(mcpw));
349389
} catch (IOException e) {
@@ -355,13 +395,14 @@ private void generateDataProcessInstanceRelationship(List<MetadataChangeProposal
355395
if (dataProcessInstanceRelationships != null) {
356396
log.info("Adding dataProcessInstanceRelationships to {}", jobUrn);
357397
try {
358-
mcps.add(eventFormatter.convert(
359-
MetadataChangeProposalWrapper.create(
360-
b ->
361-
b.entityType(DATA_PROCESS_INSTANCE_ENTITY_TYPE)
362-
.entityUrn(dataProcessInstanceUrn)
363-
.upsert()
364-
.aspect(dataProcessInstanceRelationships))));
398+
mcps.add(
399+
eventFormatter.convert(
400+
MetadataChangeProposalWrapper.create(
401+
b ->
402+
b.entityType(DATA_PROCESS_INSTANCE_ENTITY_TYPE)
403+
.entityUrn(dataProcessInstanceUrn)
404+
.upsert()
405+
.aspect(dataProcessInstanceRelationships))));
365406
} catch (IOException e) {
366407
throw new RuntimeException(e);
367408
}
@@ -372,13 +413,14 @@ private void generateDataProcessInstanceRunEvent(List<MetadataChangeProposal> mc
372413
if (dataProcessInstanceRunEvent != null) {
373414
log.info("Adding dataProcessInstanceRunEvent to {}", jobUrn);
374415
try {
375-
mcps.add(eventFormatter.convert(
376-
MetadataChangeProposalWrapper.create(
377-
b ->
378-
b.entityType(DATA_PROCESS_INSTANCE_ENTITY_TYPE)
379-
.entityUrn(dataProcessInstanceUrn)
380-
.upsert()
381-
.aspect(dataProcessInstanceRunEvent))));
416+
mcps.add(
417+
eventFormatter.convert(
418+
MetadataChangeProposalWrapper.create(
419+
b ->
420+
b.entityType(DATA_PROCESS_INSTANCE_ENTITY_TYPE)
421+
.entityUrn(dataProcessInstanceUrn)
422+
.upsert()
423+
.aspect(dataProcessInstanceRunEvent))));
382424
} catch (IOException e) {
383425
throw new RuntimeException(e);
384426
}

metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubEventEmitter.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,8 @@ private void mergeCustomProperties(DatahubJob datahubJob, DatahubJob storedDatah
356356

357357
public void emit(StreamingQueryProgress event) throws URISyntaxException {
358358
List<MetadataChangeProposal> mcps = new ArrayList<>();
359-
for (MetadataChangeProposalWrapper mcpw : generateMcpFromStreamingProgressEvent(event, datahubConf, schemaMap)) {
359+
for (MetadataChangeProposalWrapper mcpw :
360+
generateMcpFromStreamingProgressEvent(event, datahubConf, schemaMap)) {
360361
try {
361362
mcps.add(eventFormatter.convert(mcpw));
362363
} catch (IOException e) {

metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/conf/SparkConfigParser.java

-1
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,6 @@ public static boolean isPatchEnabled(Config datahubConfig) {
309309
return datahubConfig.hasPath(PATCH_ENABLED) && datahubConfig.getBoolean(PATCH_ENABLED);
310310
}
311311

312-
313312
public static boolean isEmitCoalescePeriodically(Config datahubConfig) {
314313
if (!datahubConfig.hasPath(STAGE_METADATA_COALESCING)) {
315314
// if databricks tags are present and stage_metadata_coalescing is not present, then default

metadata-service/openlineage-servlet/src/main/java/io/datahubproject/openlineage/config/OpenLineageServletConfig.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ public RunEventMapper.MappingConfig mappingConfig() {
1717
.commonDatasetPlatformInstance(null)
1818
.platform(null)
1919
.filePartitionRegexpPattern(null)
20-
.materializeDataset(false)
21-
.includeSchemaMetadata(false)
20+
.materializeDataset(true)
21+
.includeSchemaMetadata(true)
2222
.captureColumnLevelLineage(true)
23+
.usePatch(false)
2324
.parentJobUrn(null)
2425
.build();
2526
return RunEventMapper.MappingConfig.builder().datahubConfig(datahubOpenlineageConfig).build();

0 commit comments

Comments
 (0)