Skip to content

Commit 22df180

Browse files
committed
Merge branch 'master+CUS-1183-extract_ownership_from_tags' of github.com:sid-acryl/datahub-fork into master+CUS-1183-extract_ownership_from_tags
2 parents f3b844c + a1f2337 commit 22df180

File tree

222 files changed

+21923
-6039
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

222 files changed

+21923
-6039
lines changed

.github/workflows/airflow-plugin.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ jobs:
8787
token: ${{ secrets.CODECOV_TOKEN }}
8888
directory: .
8989
fail_ci_if_error: false
90-
flags: airflow-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }}
91-
name: pytest-airflow
90+
flags: airflow,airflow-${{ matrix.extra_pip_extras }}
91+
name: pytest-airflow-${{ matrix.python-version }}-${{ matrix.extra_pip_requirements }}
9292
verbose: true
9393

9494
event-file:

.github/workflows/metadata-ingestion.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
# DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }}
3232
strategy:
3333
matrix:
34-
python-version: ["3.7", "3.10"]
34+
python-version: ["3.8", "3.10"]
3535
command:
3636
[
3737
"testQuick",
@@ -40,7 +40,7 @@ jobs:
4040
"testIntegrationBatch2",
4141
]
4242
include:
43-
- python-version: "3.7"
43+
- python-version: "3.8"
4444
- python-version: "3.10"
4545
fail-fast: false
4646
steps:

.github/workflows/spark-smoke-test.yml

+17-1
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,12 @@ jobs:
4242
cache: "pip"
4343
- name: Install dependencies
4444
run: ./metadata-ingestion/scripts/install_deps.sh
45+
- name: Disk Check
46+
run: df -h . && docker images
4547
- name: Remove images
4648
run: docker image prune -a -f || true
49+
- name: Disk Check
50+
run: df -h . && docker images
4751
- name: Smoke test
4852
run: |
4953
./gradlew :metadata-integration:java:spark-lineage:integrationTest \
@@ -54,12 +58,24 @@ jobs:
5458
-x :datahub-web-react:yarnBuild \
5559
-x :datahub-web-react:distZip \
5660
-x :datahub-web-react:jar
61+
- name: store logs
62+
if: failure()
63+
run: |
64+
docker ps -a
65+
docker logs datahub-gms >& gms-${{ matrix.test_strategy }}.log || true
66+
docker logs datahub-actions >& actions-${{ matrix.test_strategy }}.log || true
67+
docker logs broker >& broker-${{ matrix.test_strategy }}.log || true
68+
docker logs mysql >& mysql-${{ matrix.test_strategy }}.log || true
69+
docker logs elasticsearch >& elasticsearch-${{ matrix.test_strategy }}.log || true
70+
docker logs datahub-frontend-react >& frontend-${{ matrix.test_strategy }}.log || true
5771
- name: Upload logs
5872
uses: actions/upload-artifact@v3
5973
if: failure()
6074
with:
6175
name: docker logs
62-
path: "docker/build/container-logs/*.log"
76+
path: |
77+
"**/build/container-logs/*.log"
78+
"*.log"
6379
- uses: actions/upload-artifact@v3
6480
if: always()
6581
with:

datahub-graphql-core/build.gradle

+4-18
Original file line numberDiff line numberDiff line change
@@ -31,30 +31,16 @@ dependencies {
3131

3232
graphqlCodegen {
3333
// For options: https://github.com/kobylynskyi/graphql-java-codegen/blob/master/docs/codegen-options.md
34-
graphqlSchemaPaths = [
35-
"$projectDir/src/main/resources/entity.graphql".toString(),
36-
"$projectDir/src/main/resources/app.graphql".toString(),
37-
"$projectDir/src/main/resources/search.graphql".toString(),
38-
"$projectDir/src/main/resources/analytics.graphql".toString(),
39-
"$projectDir/src/main/resources/recommendation.graphql".toString(),
40-
"$projectDir/src/main/resources/ingestion.graphql".toString(),
41-
"$projectDir/src/main/resources/auth.graphql".toString(),
42-
"$projectDir/src/main/resources/timeline.graphql".toString(),
43-
"$projectDir/src/main/resources/tests.graphql".toString(),
44-
"$projectDir/src/main/resources/properties.graphql".toString(),
45-
"$projectDir/src/main/resources/step.graphql".toString(),
46-
"$projectDir/src/main/resources/lineage.graphql".toString(),
47-
"$projectDir/src/main/resources/forms.graphql".toString()
48-
]
49-
outputDir = new File("$projectDir/src/mainGeneratedGraphQL/java")
34+
graphqlSchemaPaths = fileTree(dir: "${projectDir}/src/main/resources", include: '**/*.graphql').collect { it.absolutePath }
35+
outputDir = new File("${projectDir}/src/mainGeneratedGraphQL/java")
5036
packageName = "com.linkedin.datahub.graphql.generated"
5137
generateToString = true
5238
generateApis = true
5339
generateParameterizedFieldsResolvers = false
5440
modelValidationAnnotation = "@javax.annotation.Nonnull"
5541
customTypesMapping = [
56-
Long: "Long",
57-
Float: "Float"
42+
Long: "Long",
43+
Float: "Float"
5844
]
5945
}
6046

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,8 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) {
878878
"scrollAcrossEntities",
879879
new ScrollAcrossEntitiesResolver(this.entityClient, this.viewService))
880880
.dataFetcher(
881-
"searchAcrossLineage", new SearchAcrossLineageResolver(this.entityClient))
881+
"searchAcrossLineage",
882+
new SearchAcrossLineageResolver(this.entityClient, this.entityRegistry))
882883
.dataFetcher(
883884
"scrollAcrossLineage", new ScrollAcrossLineageResolver(this.entityClient))
884885
.dataFetcher(

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/BatchLoadUtils.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ public static CompletableFuture<List<Entity>> batchLoadEntitiesOfSameType(
2828
.filter(entity -> entities.get(0).getClass().isAssignableFrom(entity.objectClass()))
2929
.collect(Collectors.toList()));
3030

31-
final DataLoader loader = dataLoaderRegistry.getDataLoader(filteredEntity.name());
32-
List keyList = new ArrayList();
31+
final DataLoader<Object, Entity> loader =
32+
dataLoaderRegistry.getDataLoader(filteredEntity.name());
33+
List<Object> keyList = new ArrayList();
3334
for (Entity entity : entities) {
3435
keyList.add(filteredEntity.getKeyProvider().apply(entity));
3536
}

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java

+41-11
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*;
44
import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*;
5+
import static com.linkedin.metadata.Constants.QUERY_ENTITY_NAME;
56

7+
import com.google.common.collect.ImmutableSet;
68
import com.linkedin.common.urn.Urn;
79
import com.linkedin.datahub.graphql.generated.EntityType;
810
import com.linkedin.datahub.graphql.generated.FacetFilterInput;
@@ -14,31 +16,63 @@
1416
import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper;
1517
import com.linkedin.datahub.graphql.types.mappers.UrnSearchAcrossLineageResultsMapper;
1618
import com.linkedin.entity.client.EntityClient;
19+
import com.linkedin.metadata.models.EntitySpec;
20+
import com.linkedin.metadata.models.registry.EntityRegistry;
1721
import com.linkedin.metadata.query.SearchFlags;
1822
import com.linkedin.metadata.query.filter.Filter;
23+
import com.linkedin.metadata.search.LineageSearchResult;
1924
import com.linkedin.r2.RemoteInvocationException;
25+
import graphql.VisibleForTesting;
2026
import graphql.schema.DataFetcher;
2127
import graphql.schema.DataFetchingEnvironment;
2228
import java.net.URISyntaxException;
2329
import java.util.ArrayList;
2430
import java.util.List;
31+
import java.util.Set;
2532
import java.util.concurrent.CompletableFuture;
2633
import java.util.stream.Collectors;
2734
import javax.annotation.Nullable;
28-
import lombok.RequiredArgsConstructor;
2935
import lombok.extern.slf4j.Slf4j;
3036

3137
/** Resolver responsible for resolving 'searchAcrossEntities' field of the Query type */
3238
@Slf4j
33-
@RequiredArgsConstructor
3439
public class SearchAcrossLineageResolver
3540
implements DataFetcher<CompletableFuture<SearchAcrossLineageResults>> {
3641

3742
private static final int DEFAULT_START = 0;
3843
private static final int DEFAULT_COUNT = 10;
3944

45+
private static final Set<String> TRANSIENT_ENTITIES = ImmutableSet.of(QUERY_ENTITY_NAME);
46+
4047
private final EntityClient _entityClient;
4148

49+
private final EntityRegistry _entityRegistry;
50+
51+
@VisibleForTesting final Set<String> _allEntities;
52+
private final List<String> _allowedEntities;
53+
54+
public SearchAcrossLineageResolver(EntityClient entityClient, EntityRegistry entityRegistry) {
55+
this._entityClient = entityClient;
56+
this._entityRegistry = entityRegistry;
57+
this._allEntities =
58+
entityRegistry.getEntitySpecs().values().stream()
59+
.map(EntitySpec::getName)
60+
.collect(Collectors.toSet());
61+
62+
this._allowedEntities =
63+
this._allEntities.stream()
64+
.filter(e -> !TRANSIENT_ENTITIES.contains(e))
65+
.collect(Collectors.toList());
66+
}
67+
68+
private List<String> getEntityNamesFromInput(List<EntityType> inputTypes) {
69+
if (inputTypes != null && !inputTypes.isEmpty()) {
70+
return inputTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList());
71+
} else {
72+
return this._allowedEntities;
73+
}
74+
}
75+
4276
@Override
4377
public CompletableFuture<SearchAcrossLineageResults> get(DataFetchingEnvironment environment)
4478
throws URISyntaxException {
@@ -50,12 +84,7 @@ public CompletableFuture<SearchAcrossLineageResults> get(DataFetchingEnvironment
5084

5185
final LineageDirection lineageDirection = input.getDirection();
5286

53-
List<EntityType> entityTypes =
54-
(input.getTypes() == null || input.getTypes().isEmpty())
55-
? SEARCHABLE_ENTITY_TYPES
56-
: input.getTypes();
57-
List<String> entityNames =
58-
entityTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList());
87+
List<String> entityNames = getEntityNamesFromInput(input.getTypes());
5988

6089
// escape forward slash since it is a reserved character in Elasticsearch
6190
final String sanitizedQuery =
@@ -99,8 +128,7 @@ public CompletableFuture<SearchAcrossLineageResults> get(DataFetchingEnvironment
99128
} else {
100129
searchFlags = new SearchFlags().setFulltext(true).setSkipHighlighting(true);
101130
}
102-
103-
return UrnSearchAcrossLineageResultsMapper.map(
131+
LineageSearchResult salResults =
104132
_entityClient.searchAcrossLineage(
105133
urn,
106134
resolvedDirection,
@@ -114,7 +142,9 @@ public CompletableFuture<SearchAcrossLineageResults> get(DataFetchingEnvironment
114142
startTimeMillis,
115143
endTimeMillis,
116144
searchFlags,
117-
ResolverUtils.getAuthentication(environment)));
145+
getAuthentication(environment));
146+
147+
return UrnSearchAcrossLineageResultsMapper.map(salResults);
118148
} catch (RemoteInvocationException e) {
119149
log.error(
120150
"Failed to execute search across relationships: source urn {}, direction {}, entity types {}, query {}, filters: {}, start: {}, count: {}",

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.linkedin.datahub.graphql.resolvers.search;
22

33
import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument;
4+
import static com.linkedin.metadata.Constants.*;
45
import static com.linkedin.metadata.search.utils.SearchUtils.applyDefaultSearchFlags;
56

67
import com.linkedin.datahub.graphql.generated.SearchInput;
@@ -10,6 +11,9 @@
1011
import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper;
1112
import com.linkedin.datahub.graphql.types.mappers.UrnSearchResultsMapper;
1213
import com.linkedin.entity.client.EntityClient;
14+
import com.linkedin.metadata.query.GroupingCriterion;
15+
import com.linkedin.metadata.query.GroupingCriterionArray;
16+
import com.linkedin.metadata.query.GroupingSpec;
1317
import com.linkedin.metadata.query.SearchFlags;
1418
import graphql.schema.DataFetcher;
1519
import graphql.schema.DataFetchingEnvironment;
@@ -28,7 +32,14 @@ public class SearchResolver implements DataFetcher<CompletableFuture<SearchResul
2832
.setMaxAggValues(20)
2933
.setSkipCache(false)
3034
.setSkipAggregates(false)
31-
.setSkipHighlighting(false);
35+
.setSkipHighlighting(false)
36+
.setGroupingSpec(
37+
new GroupingSpec()
38+
.setGroupingCriteria(
39+
new GroupingCriterionArray(
40+
new GroupingCriterion()
41+
.setBaseEntityType(SCHEMA_FIELD_ENTITY_NAME)
42+
.setGroupingEntityType(DATASET_ENTITY_NAME))));
3243
private static final int DEFAULT_START = 0;
3344
private static final int DEFAULT_COUNT = 10;
3445

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java

+1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ private static com.linkedin.datahub.graphql.generated.AssertionInfo mapAssertion
6666
mapDatasetAssertionInfo(gmsAssertionInfo.getDatasetAssertion());
6767
assertionInfo.setDatasetAssertion(datasetAssertion);
6868
}
69+
assertionInfo.setDescription(gmsAssertionInfo.getDescription());
6970
return assertionInfo;
7071
}
7172

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package com.linkedin.datahub.graphql.types.common.mappers;
2+
3+
import com.linkedin.data.template.SetMode;
4+
import com.linkedin.datahub.graphql.generated.GroupingCriterion;
5+
import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper;
6+
import com.linkedin.datahub.graphql.types.mappers.ModelMapper;
7+
import javax.annotation.Nonnull;
8+
9+
public class GroupingCriterionInputMapper
10+
implements ModelMapper<GroupingCriterion, com.linkedin.metadata.query.GroupingCriterion> {
11+
12+
public static final GroupingCriterionInputMapper INSTANCE = new GroupingCriterionInputMapper();
13+
14+
public static com.linkedin.metadata.query.GroupingCriterion map(
15+
@Nonnull final GroupingCriterion groupingCriterion) {
16+
return INSTANCE.apply(groupingCriterion);
17+
}
18+
19+
@Override
20+
public com.linkedin.metadata.query.GroupingCriterion apply(GroupingCriterion input) {
21+
return new com.linkedin.metadata.query.GroupingCriterion()
22+
.setBaseEntityType(
23+
input.getBaseEntityType() != null
24+
? EntityTypeMapper.getName(input.getBaseEntityType())
25+
: null,
26+
SetMode.REMOVE_OPTIONAL_IF_NULL)
27+
.setGroupingEntityType(EntityTypeMapper.getName(input.getGroupingEntityType()));
28+
}
29+
}

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java

+13
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
import com.linkedin.datahub.graphql.generated.SearchFlags;
44
import com.linkedin.datahub.graphql.types.mappers.ModelMapper;
5+
import com.linkedin.metadata.query.GroupingCriterionArray;
6+
import com.linkedin.metadata.query.GroupingSpec;
7+
import java.util.stream.Collectors;
58
import javax.annotation.Nonnull;
69

710
/**
@@ -42,6 +45,16 @@ public com.linkedin.metadata.query.SearchFlags apply(@Nonnull final SearchFlags
4245
if (searchFlags.getGetSuggestions() != null) {
4346
result.setGetSuggestions(searchFlags.getGetSuggestions());
4447
}
48+
if (searchFlags.getGroupingSpec() != null
49+
&& searchFlags.getGroupingSpec().getGroupingCriteria() != null) {
50+
result.setGroupingSpec(
51+
new GroupingSpec()
52+
.setGroupingCriteria(
53+
new GroupingCriterionArray(
54+
searchFlags.getGroupingSpec().getGroupingCriteria().stream()
55+
.map(GroupingCriterionInputMapper::map)
56+
.collect(Collectors.toList()))));
57+
}
4558
return result;
4659
}
4760
}

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java

+6
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import com.linkedin.datahub.graphql.generated.MLPrimaryKey;
3131
import com.linkedin.datahub.graphql.generated.Notebook;
3232
import com.linkedin.datahub.graphql.generated.OwnershipTypeEntity;
33+
import com.linkedin.datahub.graphql.generated.QueryEntity;
3334
import com.linkedin.datahub.graphql.generated.Role;
3435
import com.linkedin.datahub.graphql.generated.SchemaFieldEntity;
3536
import com.linkedin.datahub.graphql.generated.StructuredPropertyEntity;
@@ -198,6 +199,11 @@ public Entity apply(Urn input) {
198199
((StructuredPropertyEntity) partialEntity).setUrn(input.toString());
199200
((StructuredPropertyEntity) partialEntity).setType(EntityType.STRUCTURED_PROPERTY);
200201
}
202+
if (input.getEntityType().equals(QUERY_ENTITY_NAME)) {
203+
partialEntity = new QueryEntity();
204+
((QueryEntity) partialEntity).setUrn(input.toString());
205+
((QueryEntity) partialEntity).setType(EntityType.QUERY);
206+
}
201207
return partialEntity;
202208
}
203209
}

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ private SearchAcrossLineageResult mapResult(LineageSearchEntity searchEntity) {
6262
.setMatchedFields(getMatchedFieldEntry(searchEntity.getMatchedFields()))
6363
.setPaths(searchEntity.getPaths().stream().map(this::mapPath).collect(Collectors.toList()))
6464
.setDegree(searchEntity.getDegree())
65+
.setDegrees(searchEntity.getDegrees().stream().collect(Collectors.toList()))
6566
.build();
6667
}
6768

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/query/QueryType.java

+3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
import java.util.stream.Collectors;
2222
import javax.annotation.Nonnull;
2323
import lombok.RequiredArgsConstructor;
24+
import lombok.extern.slf4j.Slf4j;
2425

26+
@Slf4j
2527
@RequiredArgsConstructor
2628
public class QueryType
2729
implements com.linkedin.datahub.graphql.types.EntityType<QueryEntity, String> {
@@ -50,6 +52,7 @@ public List<DataFetcherResult<QueryEntity>> batchLoad(
5052
final List<Urn> viewUrns = urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList());
5153

5254
try {
55+
log.debug("Fetching query entities: {}", viewUrns);
5356
final Map<Urn, EntityResponse> entities =
5457
_entityClient.batchGetV2(
5558
QUERY_ENTITY_NAME,

0 commit comments

Comments
 (0)