Skip to content

Commit 1d4c082

Browse files
authored
Merge branch 'master' into fix/views-table
2 parents a707be1 + ee54f1f commit 1d4c082

File tree

6,324 files changed

+1160748
-211413
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

6,324 files changed

+1160748
-211413
lines changed

.dockerignore

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
**/node_modules/
2-
datahub-frontend/build/
3-
metadata-ingestion/venv/
2+
*/build/
3+
*/*/build/
4+
**/venv/
5+
**/.tox/
6+
**/.mypy_cache/
7+
**/.pytest_cache/
8+
**/__pycache__/
49
out
510
**/*.class
611
# Have to copy gradle/wrapper/gradle-wrapper.jar, can't exclude ALL jars
+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
name: "Identify CI Optimizations"
2+
description: "Determine if code changes are specific to certain modules."
3+
4+
outputs:
5+
frontend-only:
6+
description: "Frontend only change"
7+
value: ${{ steps.filter.outputs.frontend == 'true' && steps.filter.outputs.ingestion == 'false' && steps.filter.outputs.backend == 'false' }}
8+
ingestion-only:
9+
description: "Ingestion only change"
10+
value: ${{ steps.filter.outputs.frontend == 'false' && steps.filter.outputs.ingestion == 'true' && steps.filter.outputs.backend == 'false' }}
11+
backend-only:
12+
description: "Backend only change"
13+
value: ${{ steps.filter.outputs.frontend == 'false' && steps.filter.outputs.ingestion == 'false' && steps.filter.outputs.backend == 'true' }}
14+
backend-change:
15+
description: "Backend code has changed"
16+
value: ${{ steps.filter.outputs.backend == 'true' }}
17+
ingestion-change:
18+
description: "Ingestion code has changed"
19+
value: ${{ steps.filter.outputs.ingestion == 'true' }}
20+
ingestion-base-change:
21+
description: "Ingestion base image docker image has changed"
22+
value: ${{ steps.filter.outputs.ingestion-base == 'true' }}
23+
frontend-change:
24+
description: "Frontend code has changed"
25+
value: ${{ steps.filter.outputs.frontend == 'true' }}
26+
docker-change:
27+
description: "Docker code has changed"
28+
value: ${{ steps.filter.outputs.docker == 'true' }}
29+
kafka-setup-change:
30+
description: "Kafka setup docker change"
31+
value: ${{ steps.filter.outputs.kafka-setup == 'true' }}
32+
mysql-setup-change:
33+
description: "Mysql setup docker change"
34+
value: ${{ steps.filter.outputs.mysql-setup == 'true' }}
35+
postgres-setup-change:
36+
description: "Postgres setup docker change"
37+
value: ${{ steps.filter.outputs.postgres-setup == 'true' }}
38+
elasticsearch-setup-change:
39+
description: "Elasticsearch setup docker change"
40+
value: ${{ steps.filter.outputs.elasticsearch-setup == 'true' }}
41+
smoke-test-change:
42+
description: "Smoke test change"
43+
value: ${{ steps.filter.outputs.smoke-test == 'true' }}
44+
runs:
45+
using: "composite"
46+
steps:
47+
- uses: dorny/paths-filter@v3
48+
id: filter
49+
with:
50+
token: "" # Empty token forces it to use raw git commands.
51+
filters: |
52+
frontend:
53+
- "datahub-frontend/**"
54+
- "datahub-web-react/**"
55+
- "docker/datahub-frontend/**"
56+
ingestion:
57+
- "metadata-ingestion-modules/**"
58+
- "metadata-ingestion/**"
59+
- "metadata-models/**"
60+
- "docker/datahub-ingestion-base/**"
61+
- "docker/datahub-ingestion/**"
62+
ingestion-base:
63+
- "docker/datahub-ingestion-base/**"
64+
docker:
65+
- "docker/**"
66+
backend:
67+
- "metadata-models/**"
68+
- "datahub-upgrade/**"
69+
- "entity-registry/**"
70+
- "li-utils/**"
71+
- "metadata-auth/**"
72+
- "metadata-dao-impl/**"
73+
- "metadata-events/**"
74+
- "metadata-io/**"
75+
- "metadata-jobs/**"
76+
- "metadata-service/**"
77+
- "metadata-utils/**"
78+
- "metadata-operation-context/**"
79+
- "datahub-graphql-core/**"
80+
- "docker/**"
81+
kafka-setup:
82+
- "docker/kafka-setup/**"
83+
mysql-setup:
84+
- "docker/mysql-setup/**"
85+
postgres-setup:
86+
- "docker/postgres-setup/**"
87+
elasticsearch-setup:
88+
- "docker/elasticsearch-setup/**"
89+
smoke-test:
90+
- "smoke-test/**"

.github/actions/docker-custom-build-and-push/action.yml

+77-24
Original file line numberDiff line numberDiff line change
@@ -20,42 +20,65 @@ inputs:
2020
required: false
2121

2222
images:
23-
# e.g. linkedin/datahub-gms
23+
# e.g. acryldata/datahub-gms
2424
description: "List of Docker images to use as base name for tags"
2525
required: true
2626
build-args:
2727
description: "List of build-time variables. Same as docker/build-push-action"
2828
required: false
29-
tags:
30-
# e.g. latest,head,sha12345
31-
description: "List of tags to use for the Docker image"
29+
image_tag:
30+
# e.g. pr12345 OR head OR v0.1.2.3
31+
description: "Main tag to use for the Docker image"
3232
required: true
33+
flavor:
34+
description: "Image flavor (e.g., slim, full)"
35+
required: false
3336
target:
3437
description: "Sets the target stage to build"
3538
required: false
39+
depot-project:
40+
# Setting this will use native arm64 docker builds instead of QEMU emulation.
41+
# This speeds up builds by 2-3x.
42+
description: "Depot project id"
43+
required: false
44+
3645
outputs:
3746
image_tag:
3847
description: "Docker image tags"
3948
value: ${{ steps.docker_meta.outputs.tags }}
40-
# image_name: ${{ env.DATAHUB_GMS_IMAGE }}
4149

4250
runs:
4351
using: "composite"
4452

4553
steps:
4654
- name: Docker meta
4755
id: docker_meta
48-
uses: crazy-max/ghaction-docker-meta@v1
56+
uses: docker/metadata-action@v5
4957
with:
50-
# list of Docker images to use as base name for tags
5158
images: ${{ inputs.images }}
52-
# add git short SHA as Docker tag
53-
tag-custom: ${{ inputs.tags }}
54-
tag-custom-only: true
59+
flavor: |
60+
latest=false
61+
tags: |
62+
type=raw,value=${{ inputs.image_tag }}
63+
type=raw,value=head,suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }},enable={{is_default_branch}}
64+
type=sha,prefix=,format=short,suffix=${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
65+
66+
- name: Single Tag
67+
id: single_tag
68+
shell: bash
69+
run: |
70+
IMAGES="""
71+
${{ inputs.images }}
72+
"""
73+
TAGS="""
74+
${{ inputs.image_tag }}
75+
"""
76+
echo "SINGLE_IMAGE=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> "$GITHUB_OUTPUT"
77+
echo "SINGLE_TAG=$(echo $IMAGES | tr '\n' ' ' | awk -F' |,' '{ print $1 }'):$(echo $TAGS | tr '\n' ' ' | awk -F' |,' '{ print $1 }')" >> "$GITHUB_OUTPUT"
5578
5679
# Code for testing the build when not pushing to Docker Hub.
5780
- name: Build and Load image for testing (if not publishing)
58-
uses: docker/build-push-action@v3
81+
uses: docker/build-push-action@v6
5982
if: ${{ inputs.publish != 'true' }}
6083
with:
6184
context: ${{ inputs.context }}
@@ -68,30 +91,57 @@ runs:
6891
target: ${{ inputs.target }}
6992
load: true
7093
push: false
71-
cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }}
72-
cache-to: type=inline
94+
cache-from: |
95+
type=registry,ref=${{ steps.single_tag.outputs.SINGLE_IMAGE }}:head${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
96+
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
97+
cache-to: |
98+
type=inline
7399
- name: Upload image locally for testing (if not publishing)
74-
uses: ishworkh/docker-image-artifact-upload@v1
100+
uses: ishworkh/container-image-artifact-upload@v2.0.0
75101
if: ${{ inputs.publish != 'true' }}
76102
with:
77-
image: ${{ steps.docker_meta.outputs.tags }}
103+
image: ${{ steps.single_tag.outputs.SINGLE_TAG }}
104+
retention_days: "2"
78105

79106
# Code for building multi-platform images and pushing to Docker Hub.
80107
- name: Set up QEMU
81-
uses: docker/setup-qemu-action@v2
82-
if: ${{ inputs.publish == 'true' }}
108+
uses: docker/setup-qemu-action@v3
109+
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
83110
- name: Set up Docker Buildx
84-
uses: docker/setup-buildx-action@v2
85-
if: ${{ inputs.publish == 'true' }}
111+
uses: docker/setup-buildx-action@v3
112+
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
113+
- name: Setup Depot CLI
114+
uses: depot/setup-action@v1
115+
if: ${{ inputs.publish == 'true' && inputs.depot-project != '' }}
86116
- name: Login to DockerHub
87-
uses: docker/login-action@v2
117+
uses: docker/login-action@v3
88118
if: ${{ inputs.publish == 'true' }}
89119
with:
90120
username: ${{ inputs.username }}
91121
password: ${{ inputs.password }}
122+
123+
# Depot variant.
92124
- name: Build and Push Multi-Platform image
93-
uses: docker/build-push-action@v3
94-
if: ${{ inputs.publish == 'true' }}
125+
uses: depot/build-push-action@v1
126+
if: ${{ inputs.publish == 'true' && inputs.depot-project != '' }}
127+
with:
128+
project: ${{ inputs.depot-project }}
129+
context: ${{ inputs.context }}
130+
file: ${{ inputs.file }}
131+
platforms: ${{ inputs.platforms }}
132+
build-args: ${{ inputs.build-args }}
133+
tags: ${{ steps.docker_meta.outputs.tags }}
134+
target: ${{ inputs.target }}
135+
push: true
136+
cache-from: |
137+
type=registry,ref=${{ steps.single_tag.outputs.SINGLE_IMAGE }}:head${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
138+
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
139+
cache-to: |
140+
type=inline
141+
142+
- name: Build and Push Multi-Platform image
143+
uses: docker/build-push-action@v6
144+
if: ${{ inputs.publish == 'true' && inputs.depot-project == '' }}
95145
with:
96146
context: ${{ inputs.context }}
97147
file: ${{ inputs.file }}
@@ -100,7 +150,10 @@ runs:
100150
tags: ${{ steps.docker_meta.outputs.tags }}
101151
target: ${{ inputs.target }}
102152
push: true
103-
cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }}
104-
cache-to: type=inline
153+
cache-from: |
154+
type=registry,ref=${{ steps.single_tag.outputs.SINGLE_IMAGE }}:head${{ inputs.flavor && format('-{0}', inputs.flavor) || '' }}
155+
type=registry,ref=${{ steps.docker_meta.outputs.tags }}
156+
cache-to: |
157+
type=inline
105158
106159
# TODO add code for vuln scanning?

.github/scripts/check_policies.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
elif urn == "urn:li:dataHubPolicy:editor-platform-policy":
2121
editor_platform_policy_privileges = policy["info"]["privileges"]
2222
elif urn == "urn:li:dataHubPolicy:7":
23-
all_user_platform_policy_privilges = policy["info"]["privileges"]
23+
all_user_platform_policy_privileges = policy["info"]["privileges"]
2424
try:
2525
doc_type = policy["info"]["type"]
2626
privileges = policy["info"]["privileges"]
@@ -54,10 +54,22 @@
5454
)
5555
assert len(diff_policies) == 0, f"Missing privileges for root user are {diff_policies}"
5656

57-
diff_policies = set(editor_platform_policy_privileges).difference(
58-
set(all_user_platform_policy_privilges)
59-
)
60-
assert "MANAGE_POLICIES" not in all_user_platform_policy_privilges
61-
assert (
62-
len(diff_policies) == 0
63-
), f"Missing privileges for all user policies are {diff_policies}"
57+
# All users privileges checks
58+
assert "MANAGE_POLICIES" not in all_user_platform_policy_privileges
59+
assert "MANAGE_USERS_AND_GROUPS" not in all_user_platform_policy_privileges
60+
assert "MANAGE_SECRETS" not in all_user_platform_policy_privileges
61+
assert "MANAGE_USER_CREDENTIALS" not in all_user_platform_policy_privileges
62+
assert "MANAGE_ACCESS_TOKENS" not in all_user_platform_policy_privileges
63+
assert "EDIT_ENTITY" not in all_user_platform_policy_privileges
64+
assert "DELETE_ENTITY" not in all_user_platform_policy_privileges
65+
66+
# Editor checks
67+
assert "MANAGE_POLICIES" not in editor_platform_policy_privileges
68+
assert "MANAGE_USERS_AND_GROUPS" not in editor_platform_policy_privileges
69+
assert "MANAGE_SECRETS" not in editor_platform_policy_privileges
70+
assert "MANAGE_USER_CREDENTIALS" not in editor_platform_policy_privileges
71+
assert "MANAGE_ACCESS_TOKENS" not in editor_platform_policy_privileges
72+
# These don't prevent a user from modifying entities they are an asset owner of, i.e. their own profile info
73+
assert "EDIT_CONTACT_INFO" not in editor_platform_policy_privileges
74+
assert "EDIT_USER_PROFILE" not in editor_platform_policy_privileges
75+
assert "EDIT_ENTITY_OWNERS" not in editor_platform_policy_privileges
+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import setuptools
2+
import os
3+
4+
folders = ["./smoke-test/tests"]
5+
6+
for folder in folders:
7+
print(f"Checking folder {folder}")
8+
packages = [i for i in setuptools.find_packages(folder) if "cypress" not in i]
9+
namespace_packages = [
10+
i for i in setuptools.find_namespace_packages(folder) if "cypress" not in i
11+
]
12+
13+
print("Packages found:", packages)
14+
print("Namespace packages found:", namespace_packages)
15+
16+
in_packages_not_namespace = set(packages) - set(namespace_packages)
17+
in_namespace_not_packages = set(namespace_packages) - set(packages)
18+
19+
if in_packages_not_namespace:
20+
print(f"Packages not in namespace packages: {in_packages_not_namespace}")
21+
if in_namespace_not_packages:
22+
print(f"Namespace packages not in packages: {in_namespace_not_packages}")
23+
for pkg in in_namespace_not_packages:
24+
pkg_path = os.path.join(folder, pkg.replace(".", os.path.sep))
25+
print(f"Contents of {pkg_path}:")
26+
print(os.listdir(pkg_path))
27+
28+
assert (
29+
len(in_packages_not_namespace) == 0
30+
), f"Found packages in {folder} that are not in namespace packages: {in_packages_not_namespace}"
31+
assert (
32+
len(in_namespace_not_packages) == 0
33+
), f"Found namespace packages in {folder} that are not in packages: {in_namespace_not_packages}"

.github/scripts/docker_helpers.sh

+16-8
Original file line numberDiff line numberDiff line change
@@ -5,36 +5,44 @@ export MAIN_BRANCH="master"
55
export MAIN_BRANCH_TAG="head"
66

77
function get_short_sha {
8-
echo $(git rev-parse --short "$GITHUB_SHA")
8+
echo $(git rev-parse --short "$GITHUB_SHA"|head -c7)
99
}
1010

1111
export SHORT_SHA=$(get_short_sha)
1212
echo "SHORT_SHA: $SHORT_SHA"
1313

1414
function get_tag {
15-
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
15+
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
1616
}
1717

1818
function get_tag_slim {
19-
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim\,${SHORT_SHA}-slim,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
19+
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim,g" -e 's,refs/tags/\(.*\),\1-slim,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
2020
}
2121

2222
function get_tag_full {
23-
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full\,${SHORT_SHA}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
23+
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
2424
}
2525

2626
function get_python_docker_release_v {
27-
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},0.0.0+docker.${SHORT_SHA},g" -e 's,refs/tags/v\(.*\),\1+docker,g' -e 's,refs/pull/\([0-9]*\).*,0.0.0+docker.pr\1,g')
27+
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},1!0.0.0+docker.${SHORT_SHA},g" -e 's,refs/tags/v\(.*\),1!\1+docker,g' -e 's,refs/pull/\([0-9]*\).*,1!0.0.0+docker.pr\1,g')
2828
}
2929

3030
function get_unique_tag {
3131
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
3232
}
3333

3434
function get_unique_tag_slim {
35-
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-slim,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
35+
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-slim,g" -e 's,refs/tags/\(.*\),\1-slim,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
3636
}
3737

3838
function get_unique_tag_full {
39-
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
40-
}
39+
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-full,g" -e 's,refs/tags/\(.*\),\1-full,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
40+
}
41+
42+
function get_platforms_based_on_branch {
43+
if [ "${{ github.event_name }}" == 'push' && "${{ github.ref }}" == "refs/heads/${MAIN_BRANCH}" ]; then
44+
echo "linux/amd64,linux/arm64"
45+
else
46+
echo "linux/amd64"
47+
fi
48+
}

0 commit comments

Comments
 (0)