Skip to content

Commit a1f2337

Browse files
authored
Merge branch 'master' into master+CUS-1183-extract_ownership_from_tags
2 parents 376cdc5 + 69d0ba1 commit a1f2337

File tree

99 files changed

+17788
-5230
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+17788
-5230
lines changed

.github/workflows/metadata-ingestion.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
# DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }}
3232
strategy:
3333
matrix:
34-
python-version: ["3.7", "3.10"]
34+
python-version: ["3.8", "3.10"]
3535
command:
3636
[
3737
"testQuick",
@@ -40,7 +40,7 @@ jobs:
4040
"testIntegrationBatch2",
4141
]
4242
include:
43-
- python-version: "3.7"
43+
- python-version: "3.8"
4444
- python-version: "3.10"
4545
fail-fast: false
4646
steps:

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,8 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) {
878878
"scrollAcrossEntities",
879879
new ScrollAcrossEntitiesResolver(this.entityClient, this.viewService))
880880
.dataFetcher(
881-
"searchAcrossLineage", new SearchAcrossLineageResolver(this.entityClient))
881+
"searchAcrossLineage",
882+
new SearchAcrossLineageResolver(this.entityClient, this.entityRegistry))
882883
.dataFetcher(
883884
"scrollAcrossLineage", new ScrollAcrossLineageResolver(this.entityClient))
884885
.dataFetcher(

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/BatchLoadUtils.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ public static CompletableFuture<List<Entity>> batchLoadEntitiesOfSameType(
2828
.filter(entity -> entities.get(0).getClass().isAssignableFrom(entity.objectClass()))
2929
.collect(Collectors.toList()));
3030

31-
final DataLoader loader = dataLoaderRegistry.getDataLoader(filteredEntity.name());
32-
List keyList = new ArrayList();
31+
final DataLoader<Object, Entity> loader =
32+
dataLoaderRegistry.getDataLoader(filteredEntity.name());
33+
List<Object> keyList = new ArrayList();
3334
for (Entity entity : entities) {
3435
keyList.add(filteredEntity.getKeyProvider().apply(entity));
3536
}

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java

+41-11
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*;
44
import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*;
5+
import static com.linkedin.metadata.Constants.QUERY_ENTITY_NAME;
56

7+
import com.google.common.collect.ImmutableSet;
68
import com.linkedin.common.urn.Urn;
79
import com.linkedin.datahub.graphql.generated.EntityType;
810
import com.linkedin.datahub.graphql.generated.FacetFilterInput;
@@ -14,31 +16,63 @@
1416
import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper;
1517
import com.linkedin.datahub.graphql.types.mappers.UrnSearchAcrossLineageResultsMapper;
1618
import com.linkedin.entity.client.EntityClient;
19+
import com.linkedin.metadata.models.EntitySpec;
20+
import com.linkedin.metadata.models.registry.EntityRegistry;
1721
import com.linkedin.metadata.query.SearchFlags;
1822
import com.linkedin.metadata.query.filter.Filter;
23+
import com.linkedin.metadata.search.LineageSearchResult;
1924
import com.linkedin.r2.RemoteInvocationException;
25+
import graphql.VisibleForTesting;
2026
import graphql.schema.DataFetcher;
2127
import graphql.schema.DataFetchingEnvironment;
2228
import java.net.URISyntaxException;
2329
import java.util.ArrayList;
2430
import java.util.List;
31+
import java.util.Set;
2532
import java.util.concurrent.CompletableFuture;
2633
import java.util.stream.Collectors;
2734
import javax.annotation.Nullable;
28-
import lombok.RequiredArgsConstructor;
2935
import lombok.extern.slf4j.Slf4j;
3036

3137
/** Resolver responsible for resolving 'searchAcrossEntities' field of the Query type */
3238
@Slf4j
33-
@RequiredArgsConstructor
3439
public class SearchAcrossLineageResolver
3540
implements DataFetcher<CompletableFuture<SearchAcrossLineageResults>> {
3641

3742
private static final int DEFAULT_START = 0;
3843
private static final int DEFAULT_COUNT = 10;
3944

45+
private static final Set<String> TRANSIENT_ENTITIES = ImmutableSet.of(QUERY_ENTITY_NAME);
46+
4047
private final EntityClient _entityClient;
4148

49+
private final EntityRegistry _entityRegistry;
50+
51+
@VisibleForTesting final Set<String> _allEntities;
52+
private final List<String> _allowedEntities;
53+
54+
public SearchAcrossLineageResolver(EntityClient entityClient, EntityRegistry entityRegistry) {
55+
this._entityClient = entityClient;
56+
this._entityRegistry = entityRegistry;
57+
this._allEntities =
58+
entityRegistry.getEntitySpecs().values().stream()
59+
.map(EntitySpec::getName)
60+
.collect(Collectors.toSet());
61+
62+
this._allowedEntities =
63+
this._allEntities.stream()
64+
.filter(e -> !TRANSIENT_ENTITIES.contains(e))
65+
.collect(Collectors.toList());
66+
}
67+
68+
private List<String> getEntityNamesFromInput(List<EntityType> inputTypes) {
69+
if (inputTypes != null && !inputTypes.isEmpty()) {
70+
return inputTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList());
71+
} else {
72+
return this._allowedEntities;
73+
}
74+
}
75+
4276
@Override
4377
public CompletableFuture<SearchAcrossLineageResults> get(DataFetchingEnvironment environment)
4478
throws URISyntaxException {
@@ -50,12 +84,7 @@ public CompletableFuture<SearchAcrossLineageResults> get(DataFetchingEnvironment
5084

5185
final LineageDirection lineageDirection = input.getDirection();
5286

53-
List<EntityType> entityTypes =
54-
(input.getTypes() == null || input.getTypes().isEmpty())
55-
? SEARCHABLE_ENTITY_TYPES
56-
: input.getTypes();
57-
List<String> entityNames =
58-
entityTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList());
87+
List<String> entityNames = getEntityNamesFromInput(input.getTypes());
5988

6089
// escape forward slash since it is a reserved character in Elasticsearch
6190
final String sanitizedQuery =
@@ -99,8 +128,7 @@ public CompletableFuture<SearchAcrossLineageResults> get(DataFetchingEnvironment
99128
} else {
100129
searchFlags = new SearchFlags().setFulltext(true).setSkipHighlighting(true);
101130
}
102-
103-
return UrnSearchAcrossLineageResultsMapper.map(
131+
LineageSearchResult salResults =
104132
_entityClient.searchAcrossLineage(
105133
urn,
106134
resolvedDirection,
@@ -114,7 +142,9 @@ public CompletableFuture<SearchAcrossLineageResults> get(DataFetchingEnvironment
114142
startTimeMillis,
115143
endTimeMillis,
116144
searchFlags,
117-
ResolverUtils.getAuthentication(environment)));
145+
getAuthentication(environment));
146+
147+
return UrnSearchAcrossLineageResultsMapper.map(salResults);
118148
} catch (RemoteInvocationException e) {
119149
log.error(
120150
"Failed to execute search across relationships: source urn {}, direction {}, entity types {}, query {}, filters: {}, start: {}, count: {}",

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.linkedin.datahub.graphql.resolvers.search;
22

33
import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument;
4+
import static com.linkedin.metadata.Constants.*;
45
import static com.linkedin.metadata.search.utils.SearchUtils.applyDefaultSearchFlags;
56

67
import com.linkedin.datahub.graphql.generated.SearchInput;
@@ -10,6 +11,9 @@
1011
import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper;
1112
import com.linkedin.datahub.graphql.types.mappers.UrnSearchResultsMapper;
1213
import com.linkedin.entity.client.EntityClient;
14+
import com.linkedin.metadata.query.GroupingCriterion;
15+
import com.linkedin.metadata.query.GroupingCriterionArray;
16+
import com.linkedin.metadata.query.GroupingSpec;
1317
import com.linkedin.metadata.query.SearchFlags;
1418
import graphql.schema.DataFetcher;
1519
import graphql.schema.DataFetchingEnvironment;
@@ -28,7 +32,14 @@ public class SearchResolver implements DataFetcher<CompletableFuture<SearchResul
2832
.setMaxAggValues(20)
2933
.setSkipCache(false)
3034
.setSkipAggregates(false)
31-
.setSkipHighlighting(false);
35+
.setSkipHighlighting(false)
36+
.setGroupingSpec(
37+
new GroupingSpec()
38+
.setGroupingCriteria(
39+
new GroupingCriterionArray(
40+
new GroupingCriterion()
41+
.setBaseEntityType(SCHEMA_FIELD_ENTITY_NAME)
42+
.setGroupingEntityType(DATASET_ENTITY_NAME))));
3243
private static final int DEFAULT_START = 0;
3344
private static final int DEFAULT_COUNT = 10;
3445

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package com.linkedin.datahub.graphql.types.common.mappers;
2+
3+
import com.linkedin.data.template.SetMode;
4+
import com.linkedin.datahub.graphql.generated.GroupingCriterion;
5+
import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper;
6+
import com.linkedin.datahub.graphql.types.mappers.ModelMapper;
7+
import javax.annotation.Nonnull;
8+
9+
public class GroupingCriterionInputMapper
10+
implements ModelMapper<GroupingCriterion, com.linkedin.metadata.query.GroupingCriterion> {
11+
12+
public static final GroupingCriterionInputMapper INSTANCE = new GroupingCriterionInputMapper();
13+
14+
public static com.linkedin.metadata.query.GroupingCriterion map(
15+
@Nonnull final GroupingCriterion groupingCriterion) {
16+
return INSTANCE.apply(groupingCriterion);
17+
}
18+
19+
@Override
20+
public com.linkedin.metadata.query.GroupingCriterion apply(GroupingCriterion input) {
21+
return new com.linkedin.metadata.query.GroupingCriterion()
22+
.setBaseEntityType(
23+
input.getBaseEntityType() != null
24+
? EntityTypeMapper.getName(input.getBaseEntityType())
25+
: null,
26+
SetMode.REMOVE_OPTIONAL_IF_NULL)
27+
.setGroupingEntityType(EntityTypeMapper.getName(input.getGroupingEntityType()));
28+
}
29+
}

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java

+13
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
import com.linkedin.datahub.graphql.generated.SearchFlags;
44
import com.linkedin.datahub.graphql.types.mappers.ModelMapper;
5+
import com.linkedin.metadata.query.GroupingCriterionArray;
6+
import com.linkedin.metadata.query.GroupingSpec;
7+
import java.util.stream.Collectors;
58
import javax.annotation.Nonnull;
69

710
/**
@@ -42,6 +45,16 @@ public com.linkedin.metadata.query.SearchFlags apply(@Nonnull final SearchFlags
4245
if (searchFlags.getGetSuggestions() != null) {
4346
result.setGetSuggestions(searchFlags.getGetSuggestions());
4447
}
48+
if (searchFlags.getGroupingSpec() != null
49+
&& searchFlags.getGroupingSpec().getGroupingCriteria() != null) {
50+
result.setGroupingSpec(
51+
new GroupingSpec()
52+
.setGroupingCriteria(
53+
new GroupingCriterionArray(
54+
searchFlags.getGroupingSpec().getGroupingCriteria().stream()
55+
.map(GroupingCriterionInputMapper::map)
56+
.collect(Collectors.toList()))));
57+
}
4558
return result;
4659
}
4760
}

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java

+6
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import com.linkedin.datahub.graphql.generated.MLPrimaryKey;
3131
import com.linkedin.datahub.graphql.generated.Notebook;
3232
import com.linkedin.datahub.graphql.generated.OwnershipTypeEntity;
33+
import com.linkedin.datahub.graphql.generated.QueryEntity;
3334
import com.linkedin.datahub.graphql.generated.Role;
3435
import com.linkedin.datahub.graphql.generated.SchemaFieldEntity;
3536
import com.linkedin.datahub.graphql.generated.StructuredPropertyEntity;
@@ -198,6 +199,11 @@ public Entity apply(Urn input) {
198199
((StructuredPropertyEntity) partialEntity).setUrn(input.toString());
199200
((StructuredPropertyEntity) partialEntity).setType(EntityType.STRUCTURED_PROPERTY);
200201
}
202+
if (input.getEntityType().equals(QUERY_ENTITY_NAME)) {
203+
partialEntity = new QueryEntity();
204+
((QueryEntity) partialEntity).setUrn(input.toString());
205+
((QueryEntity) partialEntity).setType(EntityType.QUERY);
206+
}
201207
return partialEntity;
202208
}
203209
}

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ private SearchAcrossLineageResult mapResult(LineageSearchEntity searchEntity) {
6262
.setMatchedFields(getMatchedFieldEntry(searchEntity.getMatchedFields()))
6363
.setPaths(searchEntity.getPaths().stream().map(this::mapPath).collect(Collectors.toList()))
6464
.setDegree(searchEntity.getDegree())
65+
.setDegrees(searchEntity.getDegrees().stream().collect(Collectors.toList()))
6566
.build();
6667
}
6768

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/query/QueryType.java

+3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
import java.util.stream.Collectors;
2222
import javax.annotation.Nonnull;
2323
import lombok.RequiredArgsConstructor;
24+
import lombok.extern.slf4j.Slf4j;
2425

26+
@Slf4j
2527
@RequiredArgsConstructor
2628
public class QueryType
2729
implements com.linkedin.datahub.graphql.types.EntityType<QueryEntity, String> {
@@ -50,6 +52,7 @@ public List<DataFetcherResult<QueryEntity>> batchLoad(
5052
final List<Urn> viewUrns = urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList());
5153

5254
try {
55+
log.debug("Fetching query entities: {}", viewUrns);
5356
final Map<Urn, EntityResponse> entities =
5457
_entityClient.batchGetV2(
5558
QUERY_ENTITY_NAME,

datahub-graphql-core/src/main/resources/entity.graphql

+5
Original file line numberDiff line numberDiff line change
@@ -10948,6 +10948,11 @@ enum QuerySource {
1094810948
The query was provided manually, e.g. from the UI.
1094910949
"""
1095010950
MANUAL
10951+
10952+
"""
10953+
The query was extracted by the system, e.g. from a dashboard.
10954+
"""
10955+
SYSTEM
1095110956
}
1095210957

1095310958
"""

datahub-graphql-core/src/main/resources/search.graphql

+49
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,15 @@ input SearchFlags {
143143
Whether to request for search suggestions on the _entityName virtualized field
144144
"""
145145
getSuggestions: Boolean
146+
147+
"""
148+
Additional grouping specifications to apply to the search results
149+
Grouping specifications will control how search results are grouped together
150+
in the response. This is currently being used to group schema fields (columns)
151+
as datasets, and in the future will be used to group other entities as well.
152+
Note: This is an experimental feature and is subject to change.
153+
"""
154+
groupingSpec: GroupingSpec
146155
}
147156

148157
"""
@@ -278,6 +287,7 @@ input ScrollAcrossEntitiesInput {
278287
searchFlags: SearchFlags
279288
}
280289

290+
281291
"""
282292
Input arguments for a search query over the results of a multi-hop graph query
283293
"""
@@ -669,6 +679,12 @@ type SearchAcrossLineageResult {
669679
Degree of relationship (number of hops to get to entity)
670680
"""
671681
degree: Int!
682+
683+
"""
684+
Degrees of relationship (for entities discoverable at multiple degrees)
685+
"""
686+
degrees: [Int!]
687+
672688
}
673689

674690
"""
@@ -1303,4 +1319,37 @@ input SortCriterion {
13031319
The order in which we will be sorting
13041320
"""
13051321
sortOrder: SortOrder!
1322+
}
1323+
1324+
"""
1325+
A grouping specification for search results.
1326+
"""
1327+
input GroupingSpec {
1328+
1329+
"""
1330+
A list of grouping criteria for grouping search results.
1331+
There is no implied order in the grouping criteria.
1332+
"""
1333+
groupingCriteria: [GroupingCriterion!]
1334+
1335+
}
1336+
1337+
"""
1338+
A single grouping criterion for grouping search results
1339+
"""
1340+
input GroupingCriterion {
1341+
1342+
"""
1343+
The base entity type that needs to be grouped
1344+
e.g. schemaField
1345+
Omitting this field will result in all base entities being grouped into the groupingEntityType.
1346+
"""
1347+
baseEntityType: EntityType
1348+
1349+
"""
1350+
The type of entity being grouped into
1351+
e.g. dataset, domain, etc.
1352+
"""
1353+
groupingEntityType: EntityType!
1354+
13061355
}

0 commit comments

Comments
 (0)