Skip to content

Commit f80e5ce

Browse files
authored
Merge branch 'master' into peter/column-level-lineage
2 parents 4fdd044 + 48b6581 commit f80e5ce

File tree

54 files changed

+2324
-466
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+2324
-466
lines changed

build.gradle

+8-7
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ buildscript {
6060
ext.googleJavaFormatVersion = '1.18.1'
6161
ext.openLineageVersion = '1.25.0'
6262
ext.logbackClassicJava8 = '1.2.12'
63+
ext.awsSdk2Version = '2.30.33'
6364

6465
ext.docker_registry = 'acryldata'
6566

@@ -120,12 +121,12 @@ project.ext.externalDependency = [
120121
'assertJ': 'org.assertj:assertj-core:3.11.1',
121122
'avro': 'org.apache.avro:avro:1.11.4',
122123
'avroCompiler': 'org.apache.avro:avro-compiler:1.11.4',
123-
'awsGlueSchemaRegistrySerde': 'software.amazon.glue:schema-registry-serde:1.1.17',
124-
'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:2.0.3',
125-
'awsS3': 'software.amazon.awssdk:s3:2.26.21',
126-
'awsSecretsManagerJdbc': 'com.amazonaws.secretsmanager:aws-secretsmanager-jdbc:1.0.13',
127-
'awsPostgresIamAuth': 'software.amazon.jdbc:aws-advanced-jdbc-wrapper:1.0.2',
128-
'awsRds':'software.amazon.awssdk:rds:2.18.24',
124+
'awsGlueSchemaRegistrySerde': 'software.amazon.glue:schema-registry-serde:1.1.23',
125+
'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:2.3.0',
126+
'awsS3': "software.amazon.awssdk:s3:$awsSdk2Version",
127+
'awsSecretsManagerJdbc': 'com.amazonaws.secretsmanager:aws-secretsmanager-jdbc:1.0.15',
128+
'awsPostgresIamAuth': 'software.amazon.jdbc:aws-advanced-jdbc-wrapper:2.5.4',
129+
'awsRds':"software.amazon.awssdk:rds:$awsSdk2Version",
129130
'cacheApi': 'javax.cache:cache-api:1.1.0',
130131
'commonsCli': 'commons-cli:commons-cli:1.5.0',
131132
'commonsIo': 'commons-io:commons-io:2.17.0',
@@ -240,7 +241,7 @@ project.ext.externalDependency = [
240241
'playFilters': "com.typesafe.play:filters-helpers_$playScalaVersion:$playVersion",
241242
'pac4j': 'org.pac4j:pac4j-oidc:6.0.6',
242243
'playPac4j': "org.pac4j:play-pac4j_$playScalaVersion:12.0.0-PLAY2.8",
243-
'postgresql': 'org.postgresql:postgresql:42.7.4',
244+
'postgresql': 'org.postgresql:postgresql:42.7.5',
244245
'protobuf': 'com.google.protobuf:protobuf-java:3.25.5',
245246
'grpcProtobuf': 'io.grpc:grpc-protobuf:1.53.0',
246247
'rangerCommons': 'org.apache.ranger:ranger-plugins-common:2.3.0',

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/util/SystemMetadataUtils.java

+24-39
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33
import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID;
44

5+
import com.linkedin.entity.EnvelopedAspect;
56
import com.linkedin.entity.EnvelopedAspectMap;
67
import com.linkedin.mxe.SystemMetadata;
7-
import java.util.ArrayList;
8-
import java.util.List;
8+
import java.util.Comparator;
9+
import java.util.Objects;
10+
import java.util.Optional;
911
import javax.annotation.Nonnull;
1012
import javax.annotation.Nullable;
1113

@@ -15,51 +17,34 @@ private SystemMetadataUtils() {}
1517

1618
@Nullable
1719
public static Long getLastIngestedTime(@Nonnull EnvelopedAspectMap aspectMap) {
18-
RunInfo lastIngestionRun = getLastIngestionRun(aspectMap);
19-
return lastIngestionRun != null ? lastIngestionRun.getTime() : null;
20+
return getLastIngestionRun(aspectMap).map(RunInfo::getTime).orElse(null);
2021
}
2122

2223
@Nullable
2324
public static String getLastIngestedRunId(@Nonnull EnvelopedAspectMap aspectMap) {
24-
RunInfo lastIngestionRun = getLastIngestionRun(aspectMap);
25-
return lastIngestionRun != null ? lastIngestionRun.getId() : null;
25+
return getLastIngestionRun(aspectMap).map(RunInfo::getId).orElse(null);
2626
}
2727

2828
/**
29-
* Returns a sorted list of all of the most recent ingestion runs based on the most recent aspects
30-
* present for the entity.
29+
* Returns the most recent ingestion run based on the most recent aspects present for the entity.
3130
*/
3231
@Nonnull
33-
public static List<RunInfo> getLastIngestionRuns(@Nonnull EnvelopedAspectMap aspectMap) {
34-
final List<RunInfo> runs = new ArrayList<>();
35-
for (String aspect : aspectMap.keySet()) {
36-
if (aspectMap.get(aspect).hasSystemMetadata()) {
37-
SystemMetadata systemMetadata = aspectMap.get(aspect).getSystemMetadata();
38-
if (systemMetadata.hasLastRunId()
39-
&& !systemMetadata.getLastRunId().equals(DEFAULT_RUN_ID)
40-
&& systemMetadata.hasLastObserved()) {
41-
Long lastObserved = systemMetadata.getLastObserved();
42-
String runId = systemMetadata.getLastRunId();
43-
RunInfo run = new RunInfo(runId, lastObserved);
44-
runs.add(run);
45-
} else if (systemMetadata.hasRunId()
46-
&& !systemMetadata.getRunId().equals(DEFAULT_RUN_ID)
47-
&& systemMetadata.hasLastObserved()) {
48-
// Handle the legacy case: Check original run ids.
49-
Long lastObserved = systemMetadata.getLastObserved();
50-
String runId = systemMetadata.getRunId();
51-
RunInfo run = new RunInfo(runId, lastObserved);
52-
runs.add(run);
53-
}
54-
}
55-
}
56-
runs.sort((a, b) -> Long.compare(b.getTime(), a.getTime()));
57-
return runs;
58-
}
59-
60-
@Nullable
61-
private static RunInfo getLastIngestionRun(@Nonnull EnvelopedAspectMap aspectMap) {
62-
List<RunInfo> runs = getLastIngestionRuns(aspectMap);
63-
return !runs.isEmpty() ? runs.get(0) : null; // Just take the first, to get the most recent run.
32+
private static Optional<RunInfo> getLastIngestionRun(@Nonnull EnvelopedAspectMap aspectMap) {
33+
return aspectMap.values().stream()
34+
.filter(EnvelopedAspect::hasSystemMetadata)
35+
.map(EnvelopedAspect::getSystemMetadata)
36+
.filter(SystemMetadata::hasLastObserved)
37+
.map(
38+
systemMetadata ->
39+
Optional.ofNullable(systemMetadata.getLastRunId())
40+
.filter(lastRunId -> !lastRunId.equals(DEFAULT_RUN_ID))
41+
.or(
42+
() ->
43+
Optional.ofNullable(systemMetadata.getRunId())
44+
.filter(runId -> !runId.equals(DEFAULT_RUN_ID)))
45+
.map(runId -> new RunInfo(runId, systemMetadata.getLastObserved()))
46+
.orElse(null))
47+
.filter(Objects::nonNull)
48+
.max(Comparator.comparingLong(RunInfo::getTime));
6449
}
6550
}

datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/SystemMetadataUtilsTest.java

-28
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,10 @@
33
import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID;
44
import static org.testng.Assert.*;
55

6-
import com.linkedin.datahub.graphql.types.common.mappers.util.RunInfo;
76
import com.linkedin.datahub.graphql.types.common.mappers.util.SystemMetadataUtils;
87
import com.linkedin.entity.EnvelopedAspect;
98
import com.linkedin.entity.EnvelopedAspectMap;
109
import com.linkedin.mxe.SystemMetadata;
11-
import java.util.List;
1210
import org.testng.annotations.Test;
1311

1412
public class SystemMetadataUtilsTest {
@@ -63,32 +61,6 @@ public void testGetLastIngestedRunId() {
6361
assertEquals(lastRunId, "real-id-1");
6462
}
6563

66-
@Test
67-
public void testGetLastIngestedRuns() {
68-
EnvelopedAspectMap aspectMap = new EnvelopedAspectMap();
69-
aspectMap.put(
70-
"default-run-id",
71-
new EnvelopedAspect()
72-
.setSystemMetadata(
73-
new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(recentLastObserved)));
74-
aspectMap.put(
75-
"real-run-id",
76-
new EnvelopedAspect()
77-
.setSystemMetadata(
78-
new SystemMetadata().setRunId("real-id-1").setLastObserved(mediumLastObserved)));
79-
aspectMap.put(
80-
"real-run-id2",
81-
new EnvelopedAspect()
82-
.setSystemMetadata(
83-
new SystemMetadata().setRunId("real-id-2").setLastObserved(distantLastObserved)));
84-
85-
List<RunInfo> runs = SystemMetadataUtils.getLastIngestionRuns(aspectMap);
86-
87-
assertEquals(runs.size(), 2);
88-
assertEquals(runs.get(0), new RunInfo("real-id-1", mediumLastObserved));
89-
assertEquals(runs.get(1), new RunInfo("real-id-2", distantLastObserved));
90-
}
91-
9264
@Test
9365
public void testGetLastIngestedTimeAllDefaultRunIds() {
9466
EnvelopedAspectMap aspectMap = new EnvelopedAspectMap();

datahub-web-react/src/app/previewV2/BrowsePaths.tsx

+5-1
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,16 @@ const BrowsePathSection = ({ path, linksDisabled }: { path: BrowsePathEntry } &
7171
if (!path.entity) {
7272
return <PlatFormTitle>{path.name}</PlatFormTitle>;
7373
}
74+
75+
// Till we have a DataPlatform instance page
76+
const hasDataPlatformInstance = path.name?.includes('dataPlatformInstance');
77+
7478
return (
7579
<ContextPathEntityLink
7680
key={path?.entity?.urn}
7781
entity={path?.entity}
7882
style={{ fontSize: '12px' }}
79-
linkDisabled={linksDisabled}
83+
linkDisabled={linksDisabled || hasDataPlatformInstance}
8084
/>
8185
);
8286
};

datahub-web-react/src/app/previewV2/ContextPath.tsx

+2-10
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ const PlatFormTitle = styled.span`
8282
interface Props {
8383
// eslint-disable-next-line react/no-unused-prop-types
8484
entityLogoComponent?: JSX.Element;
85+
// eslint-disable-next-line react/no-unused-prop-types
8586
instanceId?: string;
8687
// eslint-disable-next-line react/no-unused-prop-types
8788
typeIcon?: JSX.Element;
@@ -103,7 +104,6 @@ function ContextPath(props: Props) {
103104
entityType,
104105
parentEntities,
105106
browsePaths,
106-
instanceId,
107107
entityTitleWidth = 200,
108108
previewType,
109109
isCompactView,
@@ -118,12 +118,10 @@ function ContextPath(props: Props) {
118118

119119
const divider = <PlatformDivider>|</PlatformDivider>;
120120

121-
const hasPlatformInstance = !!instanceId;
122121
const hasBrowsePath = !!browsePaths?.path?.length && !isDefaultBrowsePath(browsePaths);
123122
const hasParentEntities = !!parentEntities?.length;
124123

125-
const showInstanceIdDivider = hasBrowsePath || hasParentEntities;
126-
const showEntityTypeDivider = hasPlatformInstance || hasBrowsePath || hasParentEntities;
124+
const showEntityTypeDivider = hasBrowsePath || hasParentEntities;
127125

128126
return (
129127
<PlatformContentWrapper>
@@ -136,12 +134,6 @@ function ContextPath(props: Props) {
136134
<PlatFormTitle>{capitalizeFirstLetterOnly(type)}</PlatFormTitle>
137135
{showEntityTypeDivider && divider}
138136
</PlatformText>
139-
{instanceId && (
140-
<PlatformText>
141-
{instanceId}
142-
{showInstanceIdDivider && divider}
143-
</PlatformText>
144-
)}
145137
{hasBrowsePath ? (
146138
<BrowsePaths
147139
browsePaths={browsePaths}

datahub-web-react/src/app/previewV2/ContextPathEntityLink.tsx

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ const StyledLink = styled(Link)<{ $disabled?: boolean }>`
4545
4646
:hover {
4747
color: ${({ $disabled }) => ($disabled ? REDESIGN_COLORS.LINK_GREY : colors.violet[500])};
48-
48+
cursor: ${({ $disabled }) => ($disabled ? 'default' : 'pointer')};
4949
&& svg {
5050
color: ${({ $disabled }) => ($disabled ? REDESIGN_COLORS.LINK_GREY : colors.violet[500])};
5151
}

docs/how/updating-datahub.md

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
3030

3131
- #12716: Fix the `platform_instance` being added twice to the URN. If you want to have the previous behavior back, you need to add your platform_instance twice (i.e. `plat.plat`).
3232

33+
- #12797: Previously endpoints when used in ASYNC mode would not validate URNs, entity & aspect names immediately. Starting with this release, even in ASYNC mode, these requests will be returned with http code 400.
34+
3335

3436
### Known Issues
3537

docs/managed-datahub/release-notes/v_0_3_8.md

+25
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,31 @@ Recommended CLI/SDK
1717
## Release Changelog
1818
---
1919

20+
### v0.3.8.2
21+
- Helm Chart Requirement: 1.5.0+
22+
- [Remote Executor]: Add support for file-based secret manager.
23+
- [UI]: Show user that manually triggered an ingestion.
24+
- [UI]: Add option to copy custom ownership type urn.
25+
- [Ingestion]: Add Restore Indicies as a DataHub Cloud ingestion source.
26+
- [Observability]: Fix ingestionSourceForEntity to only fetches sources that can execute assertions based on aspect source.
27+
- [SQL] Fix **No rows** updated exception.
28+
- [Search] Fix autocomplete duplicate field.
29+
- [Lineage] Fix lineage counts on search cards with ghost entities.
30+
- [UI] Show editable field info for fields based on exact fieldPath version.
31+
- [Metadata Tests] Add browse path v2 container as a selectable property.
32+
- [Modelling] Fix DashboardContainsDashboard relationship in DashboardInfo aspect.
33+
- [UI] Consider Dataset Profiles, Status and Browse Path v2 aspects when computing last ingested timestamp.
34+
- [Metadata Tests] Add ownership type as a filter for selecting assets.
35+
- [API] Disallow deleting system users.
36+
- [UI] Add 'Created At' filter.
37+
- [Search Ranking] Reset ranking on unused datasets.
38+
- [Integrations] Pre-install required duckdb extensions.
39+
- [ElasticSearch] Fix runId no document found exception.
40+
- [Modelling] Add UrnValidation PDL annotation.
41+
- [UI] Fix bug with nested selects and selectable parents
42+
- [UI] Downgrade V2 field paths in column stats table
43+
44+
2045
### v0.3.8
2146

2247
- All changes in https://github.com/datahub-project/datahub/releases/tag/v0.14.1

entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPItem.java

+4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package com.linkedin.metadata.aspect.batch;
22

33
import com.google.common.collect.ImmutableSet;
4+
import com.linkedin.common.urn.Urn;
45
import com.linkedin.events.metadata.ChangeType;
56
import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine;
67
import com.linkedin.metadata.models.AspectSpec;
8+
import com.linkedin.metadata.models.registry.EntityRegistry;
79
import com.linkedin.mxe.MetadataChangeProposal;
810
import com.linkedin.mxe.SystemMetadata;
911
import java.util.Collections;
@@ -82,4 +84,6 @@ static boolean supportsPatch(AspectSpec aspectSpec) {
8284
}
8385
return true;
8486
}
87+
88+
default void validate(Urn urn, String aspectName, EntityRegistry entityRegistry) {}
8589
}

metadata-ingestion/developing.md

+5-8
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ Instead, we recommend using UI-based ingestion or isolating the ingestion pipeli
155155
The syntax for installing plugins is slightly different in development. For example:
156156

157157
```diff
158-
- pip install 'acryl-datahub[bigquery,datahub-rest]'
159-
+ pip install -e '.[bigquery,datahub-rest]'
158+
- uv pip install 'acryl-datahub[bigquery,datahub-rest]'
159+
+ uv pip install -e '.[bigquery,datahub-rest]'
160160
```
161161

162162
## Architecture
@@ -180,7 +180,7 @@ The architecture of this metadata ingestion framework is heavily inspired by [Ap
180180
We use ruff, and mypy to ensure consistent code style and quality.
181181

182182
```shell
183-
# Assumes: pip install -e '.[dev]' and venv is activated
183+
# Assumes: ../gradlew :metadata-ingestion:installDev and venv is activated
184184
ruff check src/ tests/
185185
mypy src/ tests/
186186
```
@@ -247,11 +247,8 @@ In order to ensure that the configs are consistent and easy to use, we have a fe
247247
```shell
248248
# Follow standard install from source procedure - see above.
249249

250-
# Install, including all dev requirements.
251-
pip install -e '.[dev]'
252-
253-
# For running integration tests, you can use
254-
pip install -e '.[integration-tests]'
250+
# Install all dev and test requirements.
251+
../gradlew :metadata-ingestion:installDevTest
255252

256253
# Run the full testing suite
257254
pytest -vv

0 commit comments

Comments
 (0)