Skip to content

Commit 6034aab

Browse files
Merge branch 'master' into openapi-ingest
2 parents c01cb37 + 47f59e6 commit 6034aab

File tree

22 files changed

+988
-187
lines changed

22 files changed

+988
-187
lines changed

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/util/SystemMetadataUtils.java

+24-39
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33
import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID;
44

5+
import com.linkedin.entity.EnvelopedAspect;
56
import com.linkedin.entity.EnvelopedAspectMap;
67
import com.linkedin.mxe.SystemMetadata;
7-
import java.util.ArrayList;
8-
import java.util.List;
8+
import java.util.Comparator;
9+
import java.util.Objects;
10+
import java.util.Optional;
911
import javax.annotation.Nonnull;
1012
import javax.annotation.Nullable;
1113

@@ -15,51 +17,34 @@ private SystemMetadataUtils() {}
1517

1618
@Nullable
1719
public static Long getLastIngestedTime(@Nonnull EnvelopedAspectMap aspectMap) {
18-
RunInfo lastIngestionRun = getLastIngestionRun(aspectMap);
19-
return lastIngestionRun != null ? lastIngestionRun.getTime() : null;
20+
return getLastIngestionRun(aspectMap).map(RunInfo::getTime).orElse(null);
2021
}
2122

2223
@Nullable
2324
public static String getLastIngestedRunId(@Nonnull EnvelopedAspectMap aspectMap) {
24-
RunInfo lastIngestionRun = getLastIngestionRun(aspectMap);
25-
return lastIngestionRun != null ? lastIngestionRun.getId() : null;
25+
return getLastIngestionRun(aspectMap).map(RunInfo::getId).orElse(null);
2626
}
2727

2828
/**
29-
* Returns a sorted list of all of the most recent ingestion runs based on the most recent aspects
30-
* present for the entity.
29+
* Returns the most recent ingestion run based on the most recent aspects present for the entity.
3130
*/
3231
@Nonnull
33-
public static List<RunInfo> getLastIngestionRuns(@Nonnull EnvelopedAspectMap aspectMap) {
34-
final List<RunInfo> runs = new ArrayList<>();
35-
for (String aspect : aspectMap.keySet()) {
36-
if (aspectMap.get(aspect).hasSystemMetadata()) {
37-
SystemMetadata systemMetadata = aspectMap.get(aspect).getSystemMetadata();
38-
if (systemMetadata.hasLastRunId()
39-
&& !systemMetadata.getLastRunId().equals(DEFAULT_RUN_ID)
40-
&& systemMetadata.hasLastObserved()) {
41-
Long lastObserved = systemMetadata.getLastObserved();
42-
String runId = systemMetadata.getLastRunId();
43-
RunInfo run = new RunInfo(runId, lastObserved);
44-
runs.add(run);
45-
} else if (systemMetadata.hasRunId()
46-
&& !systemMetadata.getRunId().equals(DEFAULT_RUN_ID)
47-
&& systemMetadata.hasLastObserved()) {
48-
// Handle the legacy case: Check original run ids.
49-
Long lastObserved = systemMetadata.getLastObserved();
50-
String runId = systemMetadata.getRunId();
51-
RunInfo run = new RunInfo(runId, lastObserved);
52-
runs.add(run);
53-
}
54-
}
55-
}
56-
runs.sort((a, b) -> Long.compare(b.getTime(), a.getTime()));
57-
return runs;
58-
}
59-
60-
@Nullable
61-
private static RunInfo getLastIngestionRun(@Nonnull EnvelopedAspectMap aspectMap) {
62-
List<RunInfo> runs = getLastIngestionRuns(aspectMap);
63-
return !runs.isEmpty() ? runs.get(0) : null; // Just take the first, to get the most recent run.
32+
private static Optional<RunInfo> getLastIngestionRun(@Nonnull EnvelopedAspectMap aspectMap) {
33+
return aspectMap.values().stream()
34+
.filter(EnvelopedAspect::hasSystemMetadata)
35+
.map(EnvelopedAspect::getSystemMetadata)
36+
.filter(SystemMetadata::hasLastObserved)
37+
.map(
38+
systemMetadata ->
39+
Optional.ofNullable(systemMetadata.getLastRunId())
40+
.filter(lastRunId -> !lastRunId.equals(DEFAULT_RUN_ID))
41+
.or(
42+
() ->
43+
Optional.ofNullable(systemMetadata.getRunId())
44+
.filter(runId -> !runId.equals(DEFAULT_RUN_ID)))
45+
.map(runId -> new RunInfo(runId, systemMetadata.getLastObserved()))
46+
.orElse(null))
47+
.filter(Objects::nonNull)
48+
.max(Comparator.comparingLong(RunInfo::getTime));
6449
}
6550
}

datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/SystemMetadataUtilsTest.java

-28
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,10 @@
33
import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID;
44
import static org.testng.Assert.*;
55

6-
import com.linkedin.datahub.graphql.types.common.mappers.util.RunInfo;
76
import com.linkedin.datahub.graphql.types.common.mappers.util.SystemMetadataUtils;
87
import com.linkedin.entity.EnvelopedAspect;
98
import com.linkedin.entity.EnvelopedAspectMap;
109
import com.linkedin.mxe.SystemMetadata;
11-
import java.util.List;
1210
import org.testng.annotations.Test;
1311

1412
public class SystemMetadataUtilsTest {
@@ -63,32 +61,6 @@ public void testGetLastIngestedRunId() {
6361
assertEquals(lastRunId, "real-id-1");
6462
}
6563

66-
@Test
67-
public void testGetLastIngestedRuns() {
68-
EnvelopedAspectMap aspectMap = new EnvelopedAspectMap();
69-
aspectMap.put(
70-
"default-run-id",
71-
new EnvelopedAspect()
72-
.setSystemMetadata(
73-
new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(recentLastObserved)));
74-
aspectMap.put(
75-
"real-run-id",
76-
new EnvelopedAspect()
77-
.setSystemMetadata(
78-
new SystemMetadata().setRunId("real-id-1").setLastObserved(mediumLastObserved)));
79-
aspectMap.put(
80-
"real-run-id2",
81-
new EnvelopedAspect()
82-
.setSystemMetadata(
83-
new SystemMetadata().setRunId("real-id-2").setLastObserved(distantLastObserved)));
84-
85-
List<RunInfo> runs = SystemMetadataUtils.getLastIngestionRuns(aspectMap);
86-
87-
assertEquals(runs.size(), 2);
88-
assertEquals(runs.get(0), new RunInfo("real-id-1", mediumLastObserved));
89-
assertEquals(runs.get(1), new RunInfo("real-id-2", distantLastObserved));
90-
}
91-
9264
@Test
9365
public void testGetLastIngestedTimeAllDefaultRunIds() {
9466
EnvelopedAspectMap aspectMap = new EnvelopedAspectMap();

docs/managed-datahub/release-notes/v_0_3_8.md

+25
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,31 @@ Recommended CLI/SDK
1717
## Release Changelog
1818
---
1919

20+
### v0.3.8.2
21+
- Helm Chart Requirement: 1.5.0+
22+
- [Remote Executor]: Add support for file-based secret manager.
23+
- [UI]: Show user that manually triggered an ingestion.
24+
- [UI]: Add option to copy custom ownership type urn.
25+
- [Ingestion]: Add Restore Indicies as a DataHub Cloud ingestion source.
26+
- [Observability]: Fix ingestionSourceForEntity to only fetches sources that can execute assertions based on aspect source.
27+
- [SQL] Fix **No rows** updated exception.
28+
- [Search] Fix autocomplete duplicate field.
29+
- [Lineage] Fix lineage counts on search cards with ghost entities.
30+
- [UI] Show editable field info for fields based on exact fieldPath version.
31+
- [Metadata Tests] Add browse path v2 container as a selectable property.
32+
- [Modelling] Fix DashboardContainsDashboard relationship in DashboardInfo aspect.
33+
- [UI] Consider Dataset Profiles, Status and Browse Path v2 aspects when computing last ingested timestamp.
34+
- [Metadata Tests] Add ownership type as a filter for selecting assets.
35+
- [API] Disallow deleting system users.
36+
- [UI] Add 'Created At' filter.
37+
- [Search Ranking] Reset ranking on unused datasets.
38+
- [Integrations] Pre-install required duckdb extensions.
39+
- [ElasticSearch] Fix runId no document found exception.
40+
- [Modelling] Add UrnValidation PDL annotation.
41+
- [UI] Fix bug with nested selects and selectable parents
42+
- [UI] Downgrade V2 field paths in column stats table
43+
44+
2045
### v0.3.8
2146

2247
- All changes in https://github.com/datahub-project/datahub/releases/tag/v0.14.1

metadata-ingestion/developing.md

+5-8
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ Instead, we recommend using UI-based ingestion or isolating the ingestion pipeli
155155
The syntax for installing plugins is slightly different in development. For example:
156156

157157
```diff
158-
- pip install 'acryl-datahub[bigquery,datahub-rest]'
159-
+ pip install -e '.[bigquery,datahub-rest]'
158+
- uv pip install 'acryl-datahub[bigquery,datahub-rest]'
159+
+ uv pip install -e '.[bigquery,datahub-rest]'
160160
```
161161

162162
## Architecture
@@ -180,7 +180,7 @@ The architecture of this metadata ingestion framework is heavily inspired by [Ap
180180
We use ruff, and mypy to ensure consistent code style and quality.
181181

182182
```shell
183-
# Assumes: pip install -e '.[dev]' and venv is activated
183+
# Assumes: ../gradlew :metadata-ingestion:installDev and venv is activated
184184
ruff check src/ tests/
185185
mypy src/ tests/
186186
```
@@ -247,11 +247,8 @@ In order to ensure that the configs are consistent and easy to use, we have a fe
247247
```shell
248248
# Follow standard install from source procedure - see above.
249249

250-
# Install, including all dev requirements.
251-
pip install -e '.[dev]'
252-
253-
# For running integration tests, you can use
254-
pip install -e '.[integration-tests]'
250+
# Install all dev and test requirements.
251+
../gradlew :metadata-ingestion:installDevTest
255252

256253
# Run the full testing suite
257254
pytest -vv

0 commit comments

Comments
 (0)