Skip to content

Commit 2ba316e

Browse files
Merge branch 'master' of https://github.com/shubhamjagtap639/datahub into prefect-source-integration
2 parents f963b55 + 69d0ba1 commit 2ba316e

File tree

3,498 files changed

+240359
-129319
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

3,498 files changed

+240359
-129319
lines changed

.dockerignore

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
**/node_modules/
2-
datahub-frontend/build/
3-
metadata-ingestion/venv/
2+
*/build/
3+
*/*/build/
4+
*/venv/
45
out
56
**/*.class
67
# Have to copy gradle/wrapper/gradle-wrapper.jar, can't exclude ALL jars
+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
name: 'Identify CI Optimizations'
2+
description: 'Determine if code changes are specific to certain modules.'
3+
4+
outputs:
5+
frontend-only:
6+
description: "Frontend only change"
7+
value: ${{ steps.filter.outputs.frontend == 'true' && steps.filter.outputs.ingestion == 'false' && steps.filter.outputs.backend == 'false' }}
8+
ingestion-only:
9+
description: "Ingestion only change"
10+
value: ${{ steps.filter.outputs.frontend == 'false' && steps.filter.outputs.ingestion == 'true' && steps.filter.outputs.backend == 'false' }}
11+
backend-only:
12+
description: "Backend only change"
13+
value: ${{ steps.filter.outputs.frontend == 'false' && steps.filter.outputs.ingestion == 'false' && steps.filter.outputs.backend == 'true' }}
14+
backend-change:
15+
description: "Backend code has changed"
16+
value: ${{ steps.filter.outputs.backend == 'true' }}
17+
ingestion-change:
18+
description: "Ingestion code has changed"
19+
value: ${{ steps.filter.outputs.ingestion == 'true' }}
20+
frontend-change:
21+
description: "Frontend code has changed"
22+
value: ${{ steps.filter.outputs.frontend == 'true' }}
23+
docker-change:
24+
description: "Docker code has changed"
25+
value: ${{ steps.filter.outputs.docker == 'true' }}
26+
kafka-setup-change:
27+
description: "Kafka setup docker change"
28+
value: ${{ steps.filter.outputs.kafka-setup == 'true' }}
29+
mysql-setup-change:
30+
description: "Mysql setup docker change"
31+
value: ${{ steps.filter.outputs.mysql-setup == 'true' }}
32+
postgres-setup-change:
33+
description: "Postgres setup docker change"
34+
value: ${{ steps.filter.outputs.postgres-setup == 'true' }}
35+
elasticsearch-setup-change:
36+
description: "Elasticsearch setup docker change"
37+
value: ${{ steps.filter.outputs.elasticsearch-setup == 'true' }}
38+
runs:
39+
using: "composite"
40+
steps:
41+
- uses: dorny/paths-filter@v2
42+
id: filter
43+
with:
44+
filters: |
45+
frontend:
46+
- "datahub-frontend/**"
47+
- "datahub-web-react/**"
48+
- "smoke-test/tests/cypress/**"
49+
- "docker/datahub-frontend/**"
50+
ingestion:
51+
- "metadata-ingestion-modules/**"
52+
- "metadata-ingestion/**"
53+
- "metadata-models/**"
54+
- "smoke-test/**"
55+
- "docker/datahub-ingestion-**"
56+
docker:
57+
- "docker/**"
58+
backend:
59+
- "metadata-models/**"
60+
- "datahub-upgrade/**"
61+
- "entity-registry/**"
62+
- "li-utils/**"
63+
- "metadata-auth/**"
64+
- "metadata-dao-impl/**"
65+
- "metadata-events/**"
66+
- "metadata-io/**"
67+
- "metadata-jobs/**"
68+
- "metadata-service/**"
69+
- "metadata-utils/**"
70+
- "smoke-test/**"
71+
- "docker/**"
72+
kafka-setup:
73+
- "docker/kafka-setup/**"
74+
mysql-setup:
75+
- "docker/mysql-setup/**"
76+
postgres-setup:
77+
- "docker/postgres-setup/**"
78+
elasticsearch-setup:
79+
- "docker/elasticsearch-setup/**"

.github/actions/docker-custom-build-and-push/action.yml

+10-1
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,20 @@ runs:
7070
push: false
7171
cache-from: type=registry,ref=${{ steps.docker_meta.outputs.tags }}
7272
cache-to: type=inline
73+
- name: Single Tag
74+
if: ${{ inputs.publish != 'true' }}
75+
shell: bash
76+
run: |
77+
TAGS="""
78+
${{ steps.docker_meta.outputs.tags }}
79+
"""
80+
echo "SINGLE_TAG=$(echo $TAGS | tr '\n' ' ' | awk -F' ' '{ print $1 }')" >> $GITHUB_OUTPUT
81+
id: single_tag
7382
- name: Upload image locally for testing (if not publishing)
7483
uses: ishworkh/docker-image-artifact-upload@v1
7584
if: ${{ inputs.publish != 'true' }}
7685
with:
77-
image: ${{ steps.docker_meta.outputs.tags }}
86+
image: ${{ steps.single_tag.outputs.SINGLE_TAG }}
7887

7988
# Code for building multi-platform images and pushing to Docker Hub.
8089
- name: Set up QEMU

.github/scripts/check_policies.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
elif urn == "urn:li:dataHubPolicy:editor-platform-policy":
2121
editor_platform_policy_privileges = policy["info"]["privileges"]
2222
elif urn == "urn:li:dataHubPolicy:7":
23-
all_user_platform_policy_privilges = policy["info"]["privileges"]
23+
all_user_platform_policy_privileges = policy["info"]["privileges"]
2424
try:
2525
doc_type = policy["info"]["type"]
2626
privileges = policy["info"]["privileges"]
@@ -54,10 +54,22 @@
5454
)
5555
assert len(diff_policies) == 0, f"Missing privileges for root user are {diff_policies}"
5656

57-
diff_policies = set(editor_platform_policy_privileges).difference(
58-
set(all_user_platform_policy_privilges)
59-
)
60-
assert "MANAGE_POLICIES" not in all_user_platform_policy_privilges
61-
assert (
62-
len(diff_policies) == 0
63-
), f"Missing privileges for all user policies are {diff_policies}"
57+
# All users privileges checks
58+
assert "MANAGE_POLICIES" not in all_user_platform_policy_privileges
59+
assert "MANAGE_USERS_AND_GROUPS" not in all_user_platform_policy_privileges
60+
assert "MANAGE_SECRETS" not in all_user_platform_policy_privileges
61+
assert "MANAGE_USER_CREDENTIALS" not in all_user_platform_policy_privileges
62+
assert "MANAGE_ACCESS_TOKENS" not in all_user_platform_policy_privileges
63+
assert "EDIT_ENTITY" not in all_user_platform_policy_privileges
64+
assert "DELETE_ENTITY" not in all_user_platform_policy_privileges
65+
66+
# Editor checks
67+
assert "MANAGE_POLICIES" not in editor_platform_policy_privileges
68+
assert "MANAGE_USERS_AND_GROUPS" not in editor_platform_policy_privileges
69+
assert "MANAGE_SECRETS" not in editor_platform_policy_privileges
70+
assert "MANAGE_USER_CREDENTIALS" not in editor_platform_policy_privileges
71+
assert "MANAGE_ACCESS_TOKENS" not in editor_platform_policy_privileges
72+
# These don't prevent a user from modifying entities they are an asset owner of, i.e. their own profile info
73+
assert "EDIT_CONTACT_INFO" not in editor_platform_policy_privileges
74+
assert "EDIT_USER_PROFILE" not in editor_platform_policy_privileges
75+
assert "EDIT_ENTITY_OWNERS" not in editor_platform_policy_privileges

.github/scripts/docker_helpers.sh

+17-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,15 @@ export SHORT_SHA=$(get_short_sha)
1212
echo "SHORT_SHA: $SHORT_SHA"
1313

1414
function get_tag {
15-
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
15+
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g'),${SHORT_SHA}
16+
}
17+
18+
function get_tag_slim {
19+
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-slim,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g'),${SHORT_SHA}-slim
20+
}
21+
22+
function get_tag_full {
23+
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${MAIN_BRANCH_TAG}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g'),${SHORT_SHA}-full
1624
}
1725

1826
function get_python_docker_release_v {
@@ -21,4 +29,12 @@ function get_python_docker_release_v {
2129

2230
function get_unique_tag {
2331
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g')
32+
}
33+
34+
function get_unique_tag_slim {
35+
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-slim,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-slim,g')
36+
}
37+
38+
function get_unique_tag_full {
39+
echo $(echo ${GITHUB_REF} | sed -e "s,refs/heads/${MAIN_BRANCH},${SHORT_SHA}-full,g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1-full,g')
2440
}

.github/workflows/airflow-plugin.yml

+30-14
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ on:
1010
- "metadata-models/**"
1111
pull_request:
1212
branches:
13-
- master
13+
- "**"
1414
paths:
15-
- ".github/**"
15+
- ".github/workflows/airflow-plugin.yml"
1616
- "metadata-ingestion-modules/airflow-plugin/**"
1717
- "metadata-ingestion/**"
1818
- "metadata-models/**"
@@ -32,47 +32,63 @@ jobs:
3232
strategy:
3333
matrix:
3434
include:
35-
- python-version: "3.7"
36-
extraPythonRequirement: "apache-airflow~=2.1.0"
37-
- python-version: "3.7"
38-
extraPythonRequirement: "apache-airflow~=2.2.0"
35+
# Note: this should be kept in sync with tox.ini.
36+
- python-version: "3.8"
37+
extra_pip_requirements: "apache-airflow~=2.1.4"
38+
extra_pip_extras: plugin-v1
39+
- python-version: "3.8"
40+
extra_pip_requirements: "apache-airflow~=2.2.4"
41+
extra_pip_extras: plugin-v1
3942
- python-version: "3.10"
40-
extraPythonRequirement: "apache-airflow~=2.4.0"
43+
extra_pip_requirements: 'apache-airflow~=2.4.0 pluggy==1.0.0 "pendulum<3.0" "Flask-Session<0.6.0"'
44+
extra_pip_extras: plugin-v2
4145
- python-version: "3.10"
42-
extraPythonRequirement: "apache-airflow~=2.6.0"
46+
extra_pip_requirements: 'apache-airflow~=2.6.0 "pendulum<3.0" "Flask-Session<0.6.0"'
47+
extra_pip_extras: plugin-v2
4348
- python-version: "3.10"
44-
extraPythonRequirement: "apache-airflow>2.6.0"
49+
extra_pip_requirements: 'apache-airflow~=2.7.0 pydantic==2.4.2 "Flask-Session<0.6.0"'
50+
extra_pip_extras: plugin-v2
51+
- python-version: "3.10"
52+
extra_pip_requirements: 'apache-airflow>=2.8.0 pydantic>=2.4.2 "Flask-Session<0.6.0"'
53+
extra_pip_extras: plugin-v2
4554
fail-fast: false
4655
steps:
56+
- name: Set up JDK 17
57+
uses: actions/setup-java@v3
58+
with:
59+
distribution: "zulu"
60+
java-version: 17
61+
- uses: gradle/gradle-build-action@v2
4762
- uses: actions/checkout@v3
4863
- uses: actions/setup-python@v4
4964
with:
5065
python-version: ${{ matrix.python-version }}
5166
cache: "pip"
5267
- name: Install dependencies
5368
run: ./metadata-ingestion/scripts/install_deps.sh
54-
- name: Install airflow package and test (extras ${{ matrix.extraPythonRequirement }})
55-
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:airflow-plugin:lint :metadata-ingestion-modules:airflow-plugin:testQuick
69+
- name: Install airflow package and test (extras ${{ matrix.extra_pip_requirements }})
70+
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extra_pip_requirements }}' -Pextra_pip_extras='${{ matrix.extra_pip_extras }}' :metadata-ingestion-modules:airflow-plugin:lint :metadata-ingestion-modules:airflow-plugin:testQuick
5671
- name: pip freeze show list installed
5772
if: always()
5873
run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && pip freeze
5974
- uses: actions/upload-artifact@v3
60-
if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'apache-airflow>2.6.0' }}
75+
if: ${{ always() && matrix.python-version == '3.10' && matrix.extra_pip_requirements == 'apache-airflow>=2.7.0' }}
6176
with:
6277
name: Test Results (Airflow Plugin ${{ matrix.python-version}})
6378
path: |
6479
**/build/reports/tests/test/**
6580
**/build/test-results/test/**
6681
**/junit.*.xml
82+
!**/binary/**
6783
- name: Upload coverage to Codecov
6884
if: always()
6985
uses: codecov/codecov-action@v3
7086
with:
7187
token: ${{ secrets.CODECOV_TOKEN }}
7288
directory: .
7389
fail_ci_if_error: false
74-
flags: airflow-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }}
75-
name: pytest-airflow
90+
flags: airflow,airflow-${{ matrix.extra_pip_extras }}
91+
name: pytest-airflow-${{ matrix.python-version }}-${{ matrix.extra_pip_requirements }}
7692
verbose: true
7793

7894
event-file:

0 commit comments

Comments
 (0)