Skip to content

Commit 5a7bb17

Browse files
Merge branch 'master' into node-options-consilidation
2 parents 5412c21 + abb6443 commit 5a7bb17

File tree

147 files changed

+5980
-1558
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

147 files changed

+5980
-1558
lines changed

.github/workflows/dagster-plugin.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ jobs:
3030
DATAHUB_TELEMETRY_ENABLED: false
3131
strategy:
3232
matrix:
33-
python-version: ["3.9", "3.10"]
33+
python-version: ["3.9", "3.11"]
3434
include:
3535
- python-version: "3.9"
3636
extraPythonRequirement: "dagster>=1.3.3"
37-
- python-version: "3.10"
37+
- python-version: "3.11"
3838
extraPythonRequirement: "dagster>=1.3.3"
3939
fail-fast: false
4040
steps:
@@ -57,7 +57,7 @@ jobs:
5757
if: always()
5858
run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && uv pip freeze
5959
- uses: actions/upload-artifact@v4
60-
if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }}
60+
if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }}
6161
with:
6262
name: Test Results (dagster Plugin ${{ matrix.python-version}})
6363
path: |

.github/workflows/docker-unified.yml

+36-14
Original file line numberDiff line numberDiff line change
@@ -1009,18 +1009,39 @@ jobs:
10091009
needs: setup
10101010
outputs:
10111011
matrix: ${{ steps.set-matrix.outputs.matrix }}
1012+
cypress_batch_count: ${{ steps.set-batch-count.outputs.cypress_batch_count }}
1013+
python_batch_count: ${{ steps.set-batch-count.outputs.python_batch_count }}
10121014
steps:
1015+
- id: set-batch-count
1016+
# Tests are split simply to ensure the configured number of batches for parallelization. This may need some
1017+
# increase as a new tests added increase the duration where an additional parallel batch helps.
1018+
# python_batch_count is used to split pytests in the smoke-test (batches of actual test functions)
1019+
# cypress_batch_count is used to split the collection of cypress test specs into batches.
1020+
run: |
1021+
echo "cypress_batch_count=11" >> "$GITHUB_OUTPUT"
1022+
echo "python_batch_count=5" >> "$GITHUB_OUTPUT"
1023+
10131024
- id: set-matrix
1025+
# For m batches for python and n batches for cypress, we need a test matrix of python x m + cypress x n.
1026+
# while the github action matrix generation can handle these two parts individually, there isnt a way to use the
1027+
# two generated matrices for the same job. So, produce that matrix with scripting and use the include directive
1028+
# to add it to the test matrix.
10141029
run: |
1015-
if [ '${{ needs.setup.outputs.frontend_only }}' == 'true' ]; then
1016-
echo 'matrix=["cypress_suite1","cypress_rest"]' >> "$GITHUB_OUTPUT"
1017-
elif [ '${{ needs.setup.outputs.ingestion_only }}' == 'true' ]; then
1018-
echo 'matrix=["no_cypress_suite0","no_cypress_suite1"]' >> "$GITHUB_OUTPUT"
1019-
elif [[ '${{ needs.setup.outputs.backend_change }}' == 'true' || '${{ needs.setup.outputs.smoke_test_change }}' == 'true' ]]; then
1020-
echo 'matrix=["no_cypress_suite0","no_cypress_suite1","cypress_suite1","cypress_rest"]' >> "$GITHUB_OUTPUT"
1021-
else
1022-
echo 'matrix=[]' >> "$GITHUB_OUTPUT"
1030+
python_batch_count=${{ steps.set-batch-count.outputs.python_batch_count }}
1031+
python_matrix=$(printf "{\"test_strategy\":\"pytests\",\"batch\":\"0\",\"batch_count\":\"$python_batch_count\"}"; for ((i=1;i<python_batch_count;i++)); do printf ",{\"test_strategy\":\"pytests\", \"batch_count\":\"$python_batch_count\",\"batch\":\"%d\"}" $i; done)
1032+
1033+
cypress_batch_count=${{ steps.set-batch-count.outputs.cypress_batch_count }}
1034+
cypress_matrix=$(printf "{\"test_strategy\":\"cypress\",\"batch\":\"0\",\"batch_count\":\"$cypress_batch_count\"}"; for ((i=1;i<cypress_batch_count;i++)); do printf ",{\"test_strategy\":\"cypress\", \"batch_count\":\"$cypress_batch_count\",\"batch\":\"%d\"}" $i; done)
1035+
1036+
includes=''
1037+
if [[ "${{ needs.setup.outputs.frontend_only }}" == 'true' ]]; then
1038+
includes=$cypress_matrix
1039+
elif [ "${{ needs.setup.outputs.ingestion_only }}" == 'true' ]; then
1040+
includes=$python_matrix
1041+
elif [[ "${{ needs.setup.outputs.backend_change }}" == 'true' || "${{ needs.setup.outputs.smoke_test_change }}" == 'true' ]]; then
1042+
includes="$python_matrix,$cypress_matrix"
10231043
fi
1044+
echo "matrix={\"include\":[$includes] }" >> "$GITHUB_OUTPUT"
10241045
10251046
smoke_test:
10261047
name: Run Smoke Tests
@@ -1041,8 +1062,7 @@ jobs:
10411062
]
10421063
strategy:
10431064
fail-fast: false
1044-
matrix:
1045-
test_strategy: ${{ fromJson(needs.smoke_test_matrix.outputs.matrix) }}
1065+
matrix: ${{ fromJson(needs.smoke_test_matrix.outputs.matrix) }}
10461066
if: ${{ always() && !failure() && !cancelled() && needs.smoke_test_matrix.outputs.matrix != '[]' }}
10471067
steps:
10481068
- name: Free up disk space
@@ -1218,6 +1238,8 @@ jobs:
12181238
CYPRESS_RECORD_KEY: ${{ secrets.CYPRESS_RECORD_KEY }}
12191239
CLEANUP_DATA: "false"
12201240
TEST_STRATEGY: ${{ matrix.test_strategy }}
1241+
BATCH_COUNT: ${{ matrix.batch_count }}
1242+
BATCH_NUMBER: ${{ matrix.batch }}
12211243
run: |
12221244
echo "$DATAHUB_VERSION"
12231245
./gradlew --stop
@@ -1228,25 +1250,25 @@ jobs:
12281250
if: failure()
12291251
run: |
12301252
docker ps -a
1231-
TEST_STRATEGY="-${{ matrix.test_strategy }}"
1253+
TEST_STRATEGY="-${{ matrix.test_strategy }}-${{ matrix.batch }}"
12321254
source .github/scripts/docker_logs.sh
12331255
- name: Upload logs
12341256
uses: actions/upload-artifact@v3
12351257
if: failure()
12361258
with:
1237-
name: docker-logs-${{ matrix.test_strategy }}
1259+
name: docker-logs-${{ matrix.test_strategy }}-${{ matrix.batch }}
12381260
path: "docker_logs/*.log"
12391261
retention-days: 5
12401262
- name: Upload screenshots
12411263
uses: actions/upload-artifact@v3
12421264
if: failure()
12431265
with:
1244-
name: cypress-snapshots-${{ matrix.test_strategy }}
1266+
name: cypress-snapshots-${{ matrix.test_strategy }}-${{ matrix.batch }}
12451267
path: smoke-test/tests/cypress/cypress/screenshots/
12461268
- uses: actions/upload-artifact@v3
12471269
if: always()
12481270
with:
1249-
name: Test Results (smoke tests) ${{ matrix.test_strategy }}
1271+
name: Test Results (smoke tests) ${{ matrix.test_strategy }} ${{ matrix.batch }}
12501272
path: |
12511273
**/build/reports/tests/test/**
12521274
**/build/test-results/test/**

.github/workflows/metadata-ingestion.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
# DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }}
3434
strategy:
3535
matrix:
36-
python-version: ["3.8", "3.10"]
36+
python-version: ["3.8", "3.11"]
3737
command:
3838
[
3939
"testQuick",
@@ -43,7 +43,7 @@ jobs:
4343
]
4444
include:
4545
- python-version: "3.8"
46-
- python-version: "3.10"
46+
- python-version: "3.11"
4747
fail-fast: false
4848
steps:
4949
- name: Free up disk space

.github/workflows/prefect-plugin.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
DATAHUB_TELEMETRY_ENABLED: false
3131
strategy:
3232
matrix:
33-
python-version: ["3.8", "3.9", "3.10"]
33+
python-version: ["3.8", "3.9", "3.10", "3.11"]
3434
fail-fast: false
3535
steps:
3636
- name: Set up JDK 17
@@ -52,7 +52,7 @@ jobs:
5252
if: always()
5353
run: source metadata-ingestion-modules/prefect-plugin/venv/bin/activate && uv pip freeze
5454
- uses: actions/upload-artifact@v4
55-
if: ${{ always() && matrix.python-version == '3.10'}}
55+
if: ${{ always() && matrix.python-version == '3.11'}}
5656
with:
5757
name: Test Results (Prefect Plugin ${{ matrix.python-version}})
5858
path: |

.github/workflows/qodana-scan.yml

-23
This file was deleted.

build.gradle

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ buildscript {
3535
ext.pegasusVersion = '29.57.0'
3636
ext.mavenVersion = '3.6.3'
3737
ext.versionGradle = '8.11.1'
38-
ext.springVersion = '6.1.13'
38+
ext.springVersion = '6.1.14'
3939
ext.springBootVersion = '3.2.9'
4040
ext.springKafkaVersion = '3.1.6'
4141
ext.openTelemetryVersion = '1.18.0'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package com.linkedin.datahub.graphql.types.common.mappers;
2+
3+
import com.linkedin.datahub.graphql.QueryContext;
4+
import com.linkedin.datahub.graphql.generated.DataTransform;
5+
import com.linkedin.datahub.graphql.generated.DataTransformLogic;
6+
import com.linkedin.datahub.graphql.generated.QueryLanguage;
7+
import com.linkedin.datahub.graphql.generated.QueryStatement;
8+
import com.linkedin.datahub.graphql.types.mappers.ModelMapper;
9+
import java.util.stream.Collectors;
10+
import javax.annotation.Nonnull;
11+
import javax.annotation.Nullable;
12+
13+
public class DataTransformLogicMapper
14+
implements ModelMapper<
15+
com.linkedin.common.DataTransformLogic,
16+
com.linkedin.datahub.graphql.generated.DataTransformLogic> {
17+
18+
public static final DataTransformLogicMapper INSTANCE = new DataTransformLogicMapper();
19+
20+
public static DataTransformLogic map(
21+
@Nullable final QueryContext context,
22+
@Nonnull final com.linkedin.common.DataTransformLogic input) {
23+
return INSTANCE.apply(context, input);
24+
}
25+
26+
@Override
27+
public DataTransformLogic apply(
28+
@Nullable final QueryContext context,
29+
@Nonnull final com.linkedin.common.DataTransformLogic input) {
30+
31+
final DataTransformLogic result = new DataTransformLogic();
32+
33+
// Map transforms array using DataTransformMapper
34+
result.setTransforms(
35+
input.getTransforms().stream()
36+
.map(transform -> DataTransformMapper.map(context, transform))
37+
.collect(Collectors.toList()));
38+
39+
return result;
40+
}
41+
}
42+
43+
class DataTransformMapper
44+
implements ModelMapper<
45+
com.linkedin.common.DataTransform, com.linkedin.datahub.graphql.generated.DataTransform> {
46+
47+
public static final DataTransformMapper INSTANCE = new DataTransformMapper();
48+
49+
public static DataTransform map(
50+
@Nullable final QueryContext context,
51+
@Nonnull final com.linkedin.common.DataTransform input) {
52+
return INSTANCE.apply(context, input);
53+
}
54+
55+
@Override
56+
public DataTransform apply(
57+
@Nullable final QueryContext context,
58+
@Nonnull final com.linkedin.common.DataTransform input) {
59+
60+
final DataTransform result = new DataTransform();
61+
62+
// Map query statement if present
63+
if (input.hasQueryStatement()) {
64+
QueryStatement statement =
65+
new QueryStatement(
66+
input.getQueryStatement().getValue(),
67+
QueryLanguage.valueOf(input.getQueryStatement().getLanguage().toString()));
68+
result.setQueryStatement(statement);
69+
}
70+
71+
return result;
72+
}
73+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package com.linkedin.datahub.graphql.types.common.mappers;
2+
3+
import com.linkedin.data.template.GetMode;
4+
import com.linkedin.datahub.graphql.QueryContext;
5+
import com.linkedin.datahub.graphql.generated.*;
6+
import com.linkedin.datahub.graphql.types.mappers.ModelMapper;
7+
import com.linkedin.query.QueryProperties;
8+
import javax.annotation.Nonnull;
9+
import javax.annotation.Nullable;
10+
11+
public class QueryPropertiesMapper
12+
implements ModelMapper<
13+
QueryProperties, com.linkedin.datahub.graphql.generated.QueryProperties> {
14+
15+
public static final QueryPropertiesMapper INSTANCE = new QueryPropertiesMapper();
16+
17+
public static com.linkedin.datahub.graphql.generated.QueryProperties map(
18+
@Nullable final QueryContext context, @Nonnull final QueryProperties input) {
19+
return INSTANCE.apply(context, input);
20+
}
21+
22+
@Override
23+
public com.linkedin.datahub.graphql.generated.QueryProperties apply(
24+
@Nullable final QueryContext context, @Nonnull final QueryProperties input) {
25+
26+
final com.linkedin.datahub.graphql.generated.QueryProperties result =
27+
new com.linkedin.datahub.graphql.generated.QueryProperties();
28+
29+
// Map Query Source
30+
result.setSource(QuerySource.valueOf(input.getSource().toString()));
31+
32+
// Map Query Statement
33+
result.setStatement(
34+
new QueryStatement(
35+
input.getStatement().getValue(),
36+
QueryLanguage.valueOf(input.getStatement().getLanguage().toString())));
37+
38+
// Map optional fields
39+
result.setName(input.getName(GetMode.NULL));
40+
result.setDescription(input.getDescription(GetMode.NULL));
41+
42+
// Map origin if present
43+
if (input.hasOrigin() && input.getOrigin() != null) {
44+
result.setOrigin(UrnToEntityMapper.map(context, input.getOrigin()));
45+
}
46+
47+
// Map created audit stamp
48+
AuditStamp created = new AuditStamp();
49+
created.setTime(input.getCreated().getTime());
50+
created.setActor(input.getCreated().getActor(GetMode.NULL).toString());
51+
result.setCreated(created);
52+
53+
// Map last modified audit stamp
54+
AuditStamp lastModified = new AuditStamp();
55+
lastModified.setTime(input.getLastModified().getTime());
56+
lastModified.setActor(input.getLastModified().getActor(GetMode.NULL).toString());
57+
result.setLastModified(lastModified);
58+
59+
return result;
60+
}
61+
}

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ public class DataJobType
7979
BROWSE_PATHS_V2_ASPECT_NAME,
8080
SUB_TYPES_ASPECT_NAME,
8181
STRUCTURED_PROPERTIES_ASPECT_NAME,
82-
FORMS_ASPECT_NAME);
82+
FORMS_ASPECT_NAME,
83+
DATA_TRANSFORM_LOGIC_ASPECT_NAME);
8384
private static final Set<String> FACET_FIELDS = ImmutableSet.of("flow");
8485
private final EntityClient _entityClient;
8586

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java

+5-19
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,7 @@
44
import static com.linkedin.metadata.Constants.*;
55

66
import com.google.common.collect.ImmutableList;
7-
import com.linkedin.common.BrowsePathsV2;
8-
import com.linkedin.common.DataPlatformInstance;
9-
import com.linkedin.common.Deprecation;
10-
import com.linkedin.common.Forms;
11-
import com.linkedin.common.GlobalTags;
12-
import com.linkedin.common.GlossaryTerms;
13-
import com.linkedin.common.InstitutionalMemory;
14-
import com.linkedin.common.Ownership;
15-
import com.linkedin.common.Status;
16-
import com.linkedin.common.SubTypes;
7+
import com.linkedin.common.*;
178
import com.linkedin.common.urn.Urn;
189
import com.linkedin.data.DataMap;
1910
import com.linkedin.datahub.graphql.QueryContext;
@@ -26,15 +17,7 @@
2617
import com.linkedin.datahub.graphql.generated.DataJobProperties;
2718
import com.linkedin.datahub.graphql.generated.Dataset;
2819
import com.linkedin.datahub.graphql.generated.EntityType;
29-
import com.linkedin.datahub.graphql.types.common.mappers.BrowsePathsV2Mapper;
30-
import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper;
31-
import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper;
32-
import com.linkedin.datahub.graphql.types.common.mappers.DeprecationMapper;
33-
import com.linkedin.datahub.graphql.types.common.mappers.FineGrainedLineagesMapper;
34-
import com.linkedin.datahub.graphql.types.common.mappers.InstitutionalMemoryMapper;
35-
import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper;
36-
import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper;
37-
import com.linkedin.datahub.graphql.types.common.mappers.SubTypesMapper;
20+
import com.linkedin.datahub.graphql.types.common.mappers.*;
3821
import com.linkedin.datahub.graphql.types.common.mappers.util.SystemMetadataUtils;
3922
import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper;
4023
import com.linkedin.datahub.graphql.types.form.FormsMapper;
@@ -139,6 +122,9 @@ public DataJob apply(
139122
context, new StructuredProperties(data), entityUrn));
140123
} else if (FORMS_ASPECT_NAME.equals(name)) {
141124
result.setForms(FormsMapper.map(new Forms(data), entityUrn.toString()));
125+
} else if (DATA_TRANSFORM_LOGIC_ASPECT_NAME.equals(name)) {
126+
result.setDataTransformLogic(
127+
DataTransformLogicMapper.map(context, new DataTransformLogic(data)));
142128
}
143129
});
144130

0 commit comments

Comments
 (0)