Skip to content

Commit e07cb7a

Browse files
Merge branch 'master' into feature/cus-3452-graphql-dataflowjob-container-aspect
2 parents 209f0a4 + 67ef55e commit e07cb7a

File tree

110 files changed

+5924
-1105
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

110 files changed

+5924
-1105
lines changed

.github/workflows/build-and-test.yml

+1-3
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,6 @@ jobs:
109109
if: ${{ matrix.command == 'frontend' && needs.setup.outputs.frontend_change == 'true' }}
110110
run: |
111111
./gradlew :datahub-frontend:build :datahub-web-react:build --parallel
112-
env:
113-
NODE_OPTIONS: "--max-old-space-size=4096"
114112
- name: Gradle compile (jdk8) for legacy Spark
115113
if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }}
116114
run: |
@@ -157,4 +155,4 @@ jobs:
157155
uses: actions/upload-artifact@v3
158156
with:
159157
name: Event File
160-
path: ${{ github.event_path }}
158+
path: ${{ github.event_path }}

.github/workflows/contributor-open-pr-comment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
runs-on: ubuntu-latest
1313
steps:
1414
- name: Checkout repository
15-
uses: actions/checkout@v3
15+
uses: actions/checkout@v4
1616

1717
- name: Get and Format Username (PR only)
1818
if: github.event_name == 'pull_request'

.github/workflows/dagster-plugin.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ jobs:
3030
DATAHUB_TELEMETRY_ENABLED: false
3131
strategy:
3232
matrix:
33-
python-version: ["3.9", "3.10"]
33+
python-version: ["3.9", "3.11"]
3434
include:
3535
- python-version: "3.9"
3636
extraPythonRequirement: "dagster>=1.3.3"
37-
- python-version: "3.10"
37+
- python-version: "3.11"
3838
extraPythonRequirement: "dagster>=1.3.3"
3939
fail-fast: false
4040
steps:
@@ -57,7 +57,7 @@ jobs:
5757
if: always()
5858
run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && uv pip freeze
5959
- uses: actions/upload-artifact@v4
60-
if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }}
60+
if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }}
6161
with:
6262
name: Test Results (dagster Plugin ${{ matrix.python-version}})
6363
path: |

.github/workflows/docker-unified.yml

-2
Original file line numberDiff line numberDiff line change
@@ -445,8 +445,6 @@ jobs:
445445
run: |
446446
./gradlew :datahub-frontend:dist -x test -x yarnTest -x yarnLint --parallel
447447
mv ./datahub-frontend/build/distributions/datahub-frontend-*.zip datahub-frontend.zip
448-
env:
449-
NODE_OPTIONS: "--max-old-space-size=4096"
450448
- name: Build and push
451449
uses: ./.github/actions/docker-custom-build-and-push
452450
with:

.github/workflows/metadata-ingestion.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
# DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }}
3434
strategy:
3535
matrix:
36-
python-version: ["3.8", "3.10"]
36+
python-version: ["3.8", "3.11"]
3737
command:
3838
[
3939
"testQuick",
@@ -43,7 +43,7 @@ jobs:
4343
]
4444
include:
4545
- python-version: "3.8"
46-
- python-version: "3.10"
46+
- python-version: "3.11"
4747
fail-fast: false
4848
steps:
4949
- name: Free up disk space

.github/workflows/prefect-plugin.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
DATAHUB_TELEMETRY_ENABLED: false
3131
strategy:
3232
matrix:
33-
python-version: ["3.8", "3.9", "3.10"]
33+
python-version: ["3.8", "3.9", "3.10", "3.11"]
3434
fail-fast: false
3535
steps:
3636
- name: Set up JDK 17
@@ -52,7 +52,7 @@ jobs:
5252
if: always()
5353
run: source metadata-ingestion-modules/prefect-plugin/venv/bin/activate && uv pip freeze
5454
- uses: actions/upload-artifact@v4
55-
if: ${{ always() && matrix.python-version == '3.10'}}
55+
if: ${{ always() && matrix.python-version == '3.11'}}
5656
with:
5757
name: Test Results (Prefect Plugin ${{ matrix.python-version}})
5858
path: |
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package com.linkedin.datahub.graphql.types.common.mappers;
2+
3+
import com.linkedin.datahub.graphql.QueryContext;
4+
import com.linkedin.datahub.graphql.generated.DataTransform;
5+
import com.linkedin.datahub.graphql.generated.DataTransformLogic;
6+
import com.linkedin.datahub.graphql.generated.QueryLanguage;
7+
import com.linkedin.datahub.graphql.generated.QueryStatement;
8+
import com.linkedin.datahub.graphql.types.mappers.ModelMapper;
9+
import java.util.stream.Collectors;
10+
import javax.annotation.Nonnull;
11+
import javax.annotation.Nullable;
12+
13+
public class DataTransformLogicMapper
14+
implements ModelMapper<
15+
com.linkedin.common.DataTransformLogic,
16+
com.linkedin.datahub.graphql.generated.DataTransformLogic> {
17+
18+
public static final DataTransformLogicMapper INSTANCE = new DataTransformLogicMapper();
19+
20+
public static DataTransformLogic map(
21+
@Nullable final QueryContext context,
22+
@Nonnull final com.linkedin.common.DataTransformLogic input) {
23+
return INSTANCE.apply(context, input);
24+
}
25+
26+
@Override
27+
public DataTransformLogic apply(
28+
@Nullable final QueryContext context,
29+
@Nonnull final com.linkedin.common.DataTransformLogic input) {
30+
31+
final DataTransformLogic result = new DataTransformLogic();
32+
33+
// Map transforms array using DataTransformMapper
34+
result.setTransforms(
35+
input.getTransforms().stream()
36+
.map(transform -> DataTransformMapper.map(context, transform))
37+
.collect(Collectors.toList()));
38+
39+
return result;
40+
}
41+
}
42+
43+
class DataTransformMapper
44+
implements ModelMapper<
45+
com.linkedin.common.DataTransform, com.linkedin.datahub.graphql.generated.DataTransform> {
46+
47+
public static final DataTransformMapper INSTANCE = new DataTransformMapper();
48+
49+
public static DataTransform map(
50+
@Nullable final QueryContext context,
51+
@Nonnull final com.linkedin.common.DataTransform input) {
52+
return INSTANCE.apply(context, input);
53+
}
54+
55+
@Override
56+
public DataTransform apply(
57+
@Nullable final QueryContext context,
58+
@Nonnull final com.linkedin.common.DataTransform input) {
59+
60+
final DataTransform result = new DataTransform();
61+
62+
// Map query statement if present
63+
if (input.hasQueryStatement()) {
64+
QueryStatement statement =
65+
new QueryStatement(
66+
input.getQueryStatement().getValue(),
67+
QueryLanguage.valueOf(input.getQueryStatement().getLanguage().toString()));
68+
result.setQueryStatement(statement);
69+
}
70+
71+
return result;
72+
}
73+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package com.linkedin.datahub.graphql.types.common.mappers;
2+
3+
import com.linkedin.data.template.GetMode;
4+
import com.linkedin.datahub.graphql.QueryContext;
5+
import com.linkedin.datahub.graphql.generated.*;
6+
import com.linkedin.datahub.graphql.types.mappers.ModelMapper;
7+
import com.linkedin.query.QueryProperties;
8+
import javax.annotation.Nonnull;
9+
import javax.annotation.Nullable;
10+
11+
public class QueryPropertiesMapper
12+
implements ModelMapper<
13+
QueryProperties, com.linkedin.datahub.graphql.generated.QueryProperties> {
14+
15+
public static final QueryPropertiesMapper INSTANCE = new QueryPropertiesMapper();
16+
17+
public static com.linkedin.datahub.graphql.generated.QueryProperties map(
18+
@Nullable final QueryContext context, @Nonnull final QueryProperties input) {
19+
return INSTANCE.apply(context, input);
20+
}
21+
22+
@Override
23+
public com.linkedin.datahub.graphql.generated.QueryProperties apply(
24+
@Nullable final QueryContext context, @Nonnull final QueryProperties input) {
25+
26+
final com.linkedin.datahub.graphql.generated.QueryProperties result =
27+
new com.linkedin.datahub.graphql.generated.QueryProperties();
28+
29+
// Map Query Source
30+
result.setSource(QuerySource.valueOf(input.getSource().toString()));
31+
32+
// Map Query Statement
33+
result.setStatement(
34+
new QueryStatement(
35+
input.getStatement().getValue(),
36+
QueryLanguage.valueOf(input.getStatement().getLanguage().toString())));
37+
38+
// Map optional fields
39+
result.setName(input.getName(GetMode.NULL));
40+
result.setDescription(input.getDescription(GetMode.NULL));
41+
42+
// Map origin if present
43+
if (input.hasOrigin() && input.getOrigin() != null) {
44+
result.setOrigin(UrnToEntityMapper.map(context, input.getOrigin()));
45+
}
46+
47+
// Map created audit stamp
48+
AuditStamp created = new AuditStamp();
49+
created.setTime(input.getCreated().getTime());
50+
created.setActor(input.getCreated().getActor(GetMode.NULL).toString());
51+
result.setCreated(created);
52+
53+
// Map last modified audit stamp
54+
AuditStamp lastModified = new AuditStamp();
55+
lastModified.setTime(input.getLastModified().getTime());
56+
lastModified.setActor(input.getLastModified().getActor(GetMode.NULL).toString());
57+
result.setLastModified(lastModified);
58+
59+
return result;
60+
}
61+
}

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ public class DataJobType
7979
BROWSE_PATHS_V2_ASPECT_NAME,
8080
SUB_TYPES_ASPECT_NAME,
8181
STRUCTURED_PROPERTIES_ASPECT_NAME,
82-
FORMS_ASPECT_NAME);
82+
FORMS_ASPECT_NAME,
83+
DATA_TRANSFORM_LOGIC_ASPECT_NAME);
8384
private static final Set<String> FACET_FIELDS = ImmutableSet.of("flow");
8485
private final EntityClient _entityClient;
8586

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java

+5-19
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,7 @@
44
import static com.linkedin.metadata.Constants.*;
55

66
import com.google.common.collect.ImmutableList;
7-
import com.linkedin.common.BrowsePathsV2;
8-
import com.linkedin.common.DataPlatformInstance;
9-
import com.linkedin.common.Deprecation;
10-
import com.linkedin.common.Forms;
11-
import com.linkedin.common.GlobalTags;
12-
import com.linkedin.common.GlossaryTerms;
13-
import com.linkedin.common.InstitutionalMemory;
14-
import com.linkedin.common.Ownership;
15-
import com.linkedin.common.Status;
16-
import com.linkedin.common.SubTypes;
7+
import com.linkedin.common.*;
178
import com.linkedin.common.urn.Urn;
189
import com.linkedin.data.DataMap;
1910
import com.linkedin.datahub.graphql.QueryContext;
@@ -27,15 +18,7 @@
2718
import com.linkedin.datahub.graphql.generated.DataJobProperties;
2819
import com.linkedin.datahub.graphql.generated.Dataset;
2920
import com.linkedin.datahub.graphql.generated.EntityType;
30-
import com.linkedin.datahub.graphql.types.common.mappers.BrowsePathsV2Mapper;
31-
import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper;
32-
import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper;
33-
import com.linkedin.datahub.graphql.types.common.mappers.DeprecationMapper;
34-
import com.linkedin.datahub.graphql.types.common.mappers.FineGrainedLineagesMapper;
35-
import com.linkedin.datahub.graphql.types.common.mappers.InstitutionalMemoryMapper;
36-
import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper;
37-
import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper;
38-
import com.linkedin.datahub.graphql.types.common.mappers.SubTypesMapper;
21+
import com.linkedin.datahub.graphql.types.common.mappers.*;
3922
import com.linkedin.datahub.graphql.types.common.mappers.util.SystemMetadataUtils;
4023
import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper;
4124
import com.linkedin.datahub.graphql.types.form.FormsMapper;
@@ -148,6 +131,9 @@ public DataJob apply(
148131
context, new StructuredProperties(data), entityUrn));
149132
} else if (FORMS_ASPECT_NAME.equals(name)) {
150133
result.setForms(FormsMapper.map(new Forms(data), entityUrn.toString()));
134+
} else if (DATA_TRANSFORM_LOGIC_ASPECT_NAME.equals(name)) {
135+
result.setDataTransformLogic(
136+
DataTransformLogicMapper.map(context, new DataTransformLogic(data)));
151137
}
152138
});
153139

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/query/QueryMapper.java

+5-38
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,13 @@
55
import com.linkedin.common.DataPlatformInstance;
66
import com.linkedin.common.urn.Urn;
77
import com.linkedin.data.DataMap;
8-
import com.linkedin.data.template.GetMode;
98
import com.linkedin.datahub.graphql.QueryContext;
10-
import com.linkedin.datahub.graphql.generated.AuditStamp;
119
import com.linkedin.datahub.graphql.generated.DataPlatform;
1210
import com.linkedin.datahub.graphql.generated.Dataset;
1311
import com.linkedin.datahub.graphql.generated.EntityType;
1412
import com.linkedin.datahub.graphql.generated.QueryEntity;
15-
import com.linkedin.datahub.graphql.generated.QueryLanguage;
16-
import com.linkedin.datahub.graphql.generated.QuerySource;
17-
import com.linkedin.datahub.graphql.generated.QueryStatement;
1813
import com.linkedin.datahub.graphql.generated.QuerySubject;
19-
import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper;
14+
import com.linkedin.datahub.graphql.types.common.mappers.QueryPropertiesMapper;
2015
import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper;
2116
import com.linkedin.datahub.graphql.types.mappers.ModelMapper;
2217
import com.linkedin.entity.EntityResponse;
@@ -48,7 +43,10 @@ public QueryEntity apply(
4843
result.setType(EntityType.QUERY);
4944
EnvelopedAspectMap aspectMap = entityResponse.getAspects();
5045
MappingHelper<QueryEntity> mappingHelper = new MappingHelper<>(aspectMap, result);
51-
mappingHelper.mapToResult(context, QUERY_PROPERTIES_ASPECT_NAME, this::mapQueryProperties);
46+
mappingHelper.mapToResult(
47+
QUERY_PROPERTIES_ASPECT_NAME,
48+
(entity, dataMap) ->
49+
entity.setProperties(QueryPropertiesMapper.map(context, new QueryProperties(dataMap))));
5250
mappingHelper.mapToResult(QUERY_SUBJECTS_ASPECT_NAME, this::mapQuerySubjects);
5351
mappingHelper.mapToResult(DATA_PLATFORM_INSTANCE_ASPECT_NAME, this::mapPlatform);
5452
return mappingHelper.getResult();
@@ -64,37 +62,6 @@ private void mapPlatform(@Nonnull QueryEntity query, @Nonnull DataMap dataMap) {
6462
}
6563
}
6664

67-
private void mapQueryProperties(
68-
@Nullable final QueryContext context, @Nonnull QueryEntity query, @Nonnull DataMap dataMap) {
69-
QueryProperties queryProperties = new QueryProperties(dataMap);
70-
com.linkedin.datahub.graphql.generated.QueryProperties res =
71-
new com.linkedin.datahub.graphql.generated.QueryProperties();
72-
73-
// Query Source must be kept in sync.
74-
res.setSource(QuerySource.valueOf(queryProperties.getSource().toString()));
75-
res.setStatement(
76-
new QueryStatement(
77-
queryProperties.getStatement().getValue(),
78-
QueryLanguage.valueOf(queryProperties.getStatement().getLanguage().toString())));
79-
res.setName(queryProperties.getName(GetMode.NULL));
80-
res.setDescription(queryProperties.getDescription(GetMode.NULL));
81-
if (queryProperties.hasOrigin() && queryProperties.getOrigin() != null) {
82-
res.setOrigin(UrnToEntityMapper.map(context, queryProperties.getOrigin()));
83-
}
84-
85-
AuditStamp created = new AuditStamp();
86-
created.setTime(queryProperties.getCreated().getTime());
87-
created.setActor(queryProperties.getCreated().getActor(GetMode.NULL).toString());
88-
res.setCreated(created);
89-
90-
AuditStamp lastModified = new AuditStamp();
91-
lastModified.setTime(queryProperties.getLastModified().getTime());
92-
lastModified.setActor(queryProperties.getLastModified().getActor(GetMode.NULL).toString());
93-
res.setLastModified(lastModified);
94-
95-
query.setProperties(res);
96-
}
97-
9865
@Nonnull
9966
private void mapQuerySubjects(@Nonnull QueryEntity query, @Nonnull DataMap dataMap) {
10067
QuerySubjects querySubjects = new QuerySubjects(dataMap);

datahub-graphql-core/src/main/resources/entity.graphql

+25
Original file line numberDiff line numberDiff line change
@@ -6589,6 +6589,11 @@ type DataJob implements EntityWithRelationships & Entity & BrowsableEntity {
65896589
The forms associated with the Dataset
65906590
"""
65916591
forms: Forms
6592+
6593+
"""
6594+
Data Transform Logic associated with the Data Job
6595+
"""
6596+
dataTransformLogic: DataTransformLogic
65926597
}
65936598

65946599
"""
@@ -6806,6 +6811,26 @@ type DataJobInputOutput {
68066811
fineGrainedLineages: [FineGrainedLineage!]
68076812
}
68086813

6814+
"""
6815+
Information about a transformation applied to data assets
6816+
"""
6817+
type DataTransform {
6818+
"""
6819+
The transformation may be defined by a query statement
6820+
"""
6821+
queryStatement: QueryStatement
6822+
}
6823+
6824+
"""
6825+
Information about transformations applied to data assets
6826+
"""
6827+
type DataTransformLogic {
6828+
"""
6829+
List of transformations applied
6830+
"""
6831+
transforms: [DataTransform!]!
6832+
}
6833+
68096834
"""
68106835
Information about individual user usage of a Dataset
68116836
"""

0 commit comments

Comments
 (0)