Skip to content

Commit 3da8ca7

Browse files
committed
feat(python): split out temp wheel builds
1 parent ef1c1df commit 3da8ca7

File tree

10 files changed

+200
-23
lines changed

10 files changed

+200
-23
lines changed

.github/workflows/airflow-plugin.yml

-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ jobs:
2727
airflow-plugin:
2828
runs-on: ubuntu-latest
2929
env:
30-
SPARK_VERSION: 3.0.3
3130
DATAHUB_TELEMETRY_ENABLED: false
3231
strategy:
3332
matrix:
+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
name: Python Build
2+
on:
3+
push:
4+
branches:
5+
- master
6+
paths:
7+
- ".github/workflows/python-build-pages.yml"
8+
- "metadata-ingestion/**"
9+
- "metadata-ingestion-modules/**"
10+
- "metadata-models/**"
11+
pull_request:
12+
branches:
13+
- "**"
14+
paths:
15+
- ".github/workflows/python-build-pages.yml"
16+
- "metadata-ingestion/**"
17+
- "metadata-ingestion-modules/**"
18+
- "metadata-models/**"
19+
20+
concurrency:
21+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
22+
cancel-in-progress: true
23+
24+
jobs:
25+
deploy-pages:
26+
runs-on: ubuntu-latest
27+
if: ${{ secrets.CLOUDFLARE_ACCOUNT_ID != '' && secrets.CLOUDFLARE_API_TOKEN != '' }}
28+
29+
permissions:
30+
contents: read
31+
deployments: write
32+
steps:
33+
- name: Set up JDK 17
34+
uses: actions/setup-java@v4
35+
with:
36+
distribution: "zulu"
37+
java-version: 17
38+
- uses: gradle/actions/setup-gradle@v3
39+
- uses: acryldata/sane-checkout-action@v3
40+
- uses: actions/setup-python@v5
41+
with:
42+
python-version: 3.10
43+
cache: "pip"
44+
- uses: actions/cache@v4
45+
with:
46+
path: |
47+
~/.cache/uv
48+
key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }}
49+
- name: Build Python wheel site
50+
run: |
51+
./gradlew :python-build:buildSite
52+
- name: Publish
53+
uses: cloudflare/wrangler-action@v3
54+
with:
55+
gitHubToken: ${{ secrets.GITHUB_TOKEN }}
56+
apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
57+
accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
58+
workingDirectory: python-build
59+
command: pages deploy ./site --project-name=datahub-wheels

docs-website/build.gradle

+1-5
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,7 @@ task yarnInstall(type: YarnTask) {
8383
task yarnGenerate(type: YarnTask, dependsOn: [yarnInstall,
8484
generateGraphQLSchema, generateJsonSchema,
8585
':metadata-ingestion:modelDocGen', ':metadata-ingestion:docGen',
86-
':metadata-ingestion:buildWheel',
87-
':metadata-ingestion-modules:airflow-plugin:buildWheel',
88-
':metadata-ingestion-modules:dagster-plugin:buildWheel',
89-
':metadata-ingestion-modules:prefect-plugin:buildWheel',
90-
':metadata-ingestion-modules:gx-plugin:buildWheel',
86+
':python-build:buildWheels',
9187
]) {
9288
inputs.files(projectMdFiles)
9389
outputs.cacheIf { true }

docs-website/generateDocsDir.ts

+9-15
Original file line numberDiff line numberDiff line change
@@ -573,26 +573,20 @@ function write_markdown_file(
573573

574574
function copy_python_wheels(): void {
575575
// Copy the built wheel files to the static directory.
576-
const wheel_dirs = [
577-
"../metadata-ingestion/dist",
578-
"../metadata-ingestion-modules/airflow-plugin/dist",
579-
"../metadata-ingestion-modules/dagster-plugin/dist",
580-
"../metadata-ingestion-modules/prefect-plugin/dist",
581-
"../metadata-ingestion-modules/gx-plugin/dist",
582-
];
576+
// Everything is copied to the python-build directory first, so
577+
// we just need to copy from there.
578+
const wheel_dir = "../python-build/wheels";
583579

584580
const wheel_output_directory = path.join(STATIC_DIRECTORY, "wheels");
585581
fs.mkdirSync(wheel_output_directory, { recursive: true });
586582

587-
for (const wheel_dir of wheel_dirs) {
588-
const wheel_files = fs.readdirSync(wheel_dir);
589-
for (const wheel_file of wheel_files) {
590-
const src = path.join(wheel_dir, wheel_file);
591-
const dest = path.join(wheel_output_directory, wheel_file);
583+
const wheel_files = fs.readdirSync(wheel_dir);
584+
for (const wheel_file of wheel_files) {
585+
const src = path.join(wheel_dir, wheel_file);
586+
const dest = path.join(wheel_output_directory, wheel_file);
592587

593-
// console.log(`Copying artifact ${src} to ${dest}...`);
594-
fs.copyFileSync(src, dest);
595-
}
588+
// console.log(`Copying artifact ${src} to ${dest}...`);
589+
fs.copyFileSync(src, dest);
596590
}
597591
}
598592

metadata-ingestion/build.gradle

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
2323
inputs.file file('setup.py')
2424
outputs.file(sentinel_file)
2525
commandLine 'bash', '-c',
26-
"${python_executable} -m venv ${venv_name} && " +
27-
"${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " +
26+
"${python_executable} -m venv ${venv_name} && set -x && " +
27+
"${venv_name}/bin/python -m pip install --upgrade uv && " +
2828
"touch ${sentinel_file}"
2929
}
3030

python-build/.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2+
/wheels
3+
/site

python-build/build.gradle

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
plugins {
2+
id 'base'
3+
}
4+
5+
ext {
6+
python_executable = 'python3'
7+
}
8+
9+
task checkPythonVersion(type: Exec) {
10+
commandLine python_executable, '-c',
11+
'import sys; sys.version_info >= (3, 8), f"Python version {sys.version_info} is too old"'
12+
}
13+
14+
task buildWheels(type: Exec, dependsOn: [
15+
checkPythonVersion,
16+
':metadata-ingestion:buildWheel',
17+
':metadata-ingestion-modules:airflow-plugin:buildWheel',
18+
':metadata-ingestion-modules:dagster-plugin:buildWheel',
19+
':metadata-ingestion-modules:prefect-plugin:buildWheel',
20+
':metadata-ingestion-modules:gx-plugin:buildWheel',
21+
]) {
22+
commandLine python_executable, "copy_wheels.py"
23+
}
24+
25+
task buildSite(type: Exec, dependsOn: [buildWheels]) {
26+
commandLine python_executable, "build_site.py"
27+
}

python-build/build_site.py

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import pathlib
2+
import shutil
3+
from datetime import datetime
4+
5+
PYTHON_BUILD_DIR = pathlib.Path(__file__).parent
6+
WHEEL_DIR = PYTHON_BUILD_DIR / "wheels"
7+
SITE_OUTPUT_DIR = PYTHON_BUILD_DIR / "site"
8+
9+
shutil.rmtree(SITE_OUTPUT_DIR, ignore_errors=True)
10+
SITE_OUTPUT_DIR.mkdir(parents=True)
11+
12+
SITE_ARTIFACT_WHEEL_DIR = SITE_OUTPUT_DIR / "artifacts" / "wheels"
13+
SITE_ARTIFACT_WHEEL_DIR.mkdir(parents=True)
14+
for wheel_file in WHEEL_DIR.glob("*"):
15+
shutil.copy(wheel_file, SITE_ARTIFACT_WHEEL_DIR)
16+
17+
newline = "\n"
18+
(SITE_OUTPUT_DIR / "index.html").write_text(
19+
f"""
20+
<html>
21+
<head>
22+
<title>DataHub Python Builds</title>
23+
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0">
24+
25+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/semantic.min.css" integrity="sha256-cDGQ39yChhpN5vzgHbjIdGEtQ5kXE9tttCsI7VR9TuY=" crossorigin="anonymous">
26+
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/semantic.min.js" integrity="sha256-fN8vcX2ULyTDspVTHEteK8hd3rQAb5thNiwakjAW75Q=" crossorigin="anonymous"></script>
27+
</head>
28+
<body>
29+
<div class="ui container">
30+
<h1 class="ui header" style="padding-top: 1.5em;">DataHub Python Builds</h1>
31+
<p>Built at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>
32+
33+
<p>
34+
Current base URL: <span class="base-url">unknown</span>
35+
</p>
36+
37+
<table class="ui celled table">
38+
<thead>
39+
<tr>
40+
<th>Package</th>
41+
<th>Size</th>
42+
<th>Install command</th>
43+
</tr>
44+
</thead>
45+
<tbody>
46+
{
47+
newline.join(
48+
f'''
49+
<tr>
50+
<td><code>{wheel_file.name.split('-')[0].replace('_', '-')}</code></td>
51+
<td>{wheel_file.stat().st_size / 1024 / 1024:.3f} MB</td>
52+
<td><code>pip install '<span class="base-url">&lt;base-url&gt;</span>/artifacts/wheels/{wheel_file.name}'</code></td>
53+
</tr>
54+
'''
55+
for wheel_file in sorted(WHEEL_DIR.glob("*.whl"))
56+
)
57+
}
58+
</tbody>
59+
</table>
60+
</div>
61+
</body>
62+
<script>
63+
document.querySelectorAll(".base-url").forEach(el => {{
64+
el.textContent = window.location.href.split('/').slice(0, -1).join('/');
65+
}});
66+
</script>
67+
</html>
68+
"""
69+
)
70+
71+
print("Built site in", SITE_OUTPUT_DIR)

python-build/copy_wheels.py

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import pathlib
2+
import shutil
3+
4+
PYTHON_BUILD_DIR = pathlib.Path(__file__).parent
5+
ROOT_DIR = PYTHON_BUILD_DIR.parent
6+
WHEEL_OUTPUT_DIR = PYTHON_BUILD_DIR / "wheels"
7+
8+
# These should line up with the build.gradle file.
9+
wheel_dirs = [
10+
ROOT_DIR / "metadata-ingestion/dist",
11+
ROOT_DIR / "metadata-ingestion-modules/airflow-plugin/dist",
12+
ROOT_DIR / "metadata-ingestion-modules/dagster-plugin/dist",
13+
ROOT_DIR / "metadata-ingestion-modules/prefect-plugin/dist",
14+
ROOT_DIR / "metadata-ingestion-modules/gx-plugin/dist",
15+
]
16+
17+
# Delete and recreate the output directory.
18+
if WHEEL_OUTPUT_DIR.exists():
19+
shutil.rmtree(WHEEL_OUTPUT_DIR)
20+
WHEEL_OUTPUT_DIR.mkdir(parents=True)
21+
22+
# Copy things over.
23+
for wheel_dir in wheel_dirs:
24+
for wheel_file in wheel_dir.glob("*"):
25+
shutil.copy(wheel_file, WHEEL_OUTPUT_DIR)
26+
27+
print("Copied wheels to", WHEEL_OUTPUT_DIR)

settings.gradle

+1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ include 'metadata-ingestion-modules:airflow-plugin'
6464
include 'metadata-ingestion-modules:gx-plugin'
6565
include 'metadata-ingestion-modules:dagster-plugin'
6666
include 'metadata-ingestion-modules:prefect-plugin'
67+
include 'python-build'
6768
include 'smoke-test'
6869
include 'metadata-auth:auth-api'
6970
include 'metadata-service:schema-registry-api'

0 commit comments

Comments
 (0)