Skip to content

Commit 5e180e6

Browse files
committed
Add Dockerfile.rocm
1 parent b40937d commit 5e180e6

File tree

2 files changed

+321
-0
lines changed

2 files changed

+321
-0
lines changed

docker/Dockerfile.rocm

+320
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
# syntax = docker/dockerfile:experimental
2+
#
3+
# This file can build images for cpu and gpu env. By default it builds image for CPU.
4+
# Use following option to build image for cuda/GPU: --build-arg BASE_IMAGE=nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
5+
# Here is complete command for GPU/cuda -
6+
# $ DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE=nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 -t torchserve:latest .
7+
#
8+
# Following comments have been shamelessly copied from https://github.com/pytorch/pytorch/blob/master/Dockerfile
9+
#
10+
# NOTE: To build this you will need a docker version > 18.06 with
11+
# experimental enabled and DOCKER_BUILDKIT=1
12+
#
13+
# If you do not use buildkit you are not going to have a good time
14+
#
15+
# For reference:
16+
# https://docs.docker.com/develop/develop-images/build_enhancements/
17+
18+
ARG BASE_IMAGE=ubuntu:24.04
19+
ARG BRANCH_NAME=master
20+
# Note:
21+
# Define here the default python version to be used in all later build-stages as default.
22+
# ARG and ENV variables do not persist across stages (they're build-stage scoped).
23+
# That is crucial for ARG PYTHON_VERSION, which otherwise becomes "" leading to nasty bugs,
24+
# that don't let the build fail, but break current version handling logic and result
25+
# in images with wrong python version. To fix that, we will restate the ARG PYTHON_VERSION
26+
# on each build-stage.
27+
ARG PYTHON_VERSION=3.11
28+
29+
FROM ${BASE_IMAGE} AS compile-image
30+
ARG BASE_IMAGE=ubuntu:24.04
31+
ARG PYTHON_VERSION
32+
ARG BUILD_NIGHTLY
33+
ARG BUILD_FROM_SRC
34+
ARG LOCAL_CHANGES
35+
ARG BRANCH_NAME
36+
ARG REPO_URL=https://github.com/pytorch/serve.git
37+
ENV PYTHONUNBUFFERED TRUE
38+
39+
RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
40+
apt-get update && \
41+
apt-get upgrade -y && \
42+
apt-get install software-properties-common -y && \
43+
add-apt-repository -y ppa:deadsnakes/ppa && \
44+
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
45+
ca-certificates \
46+
g++ \
47+
python3-setuptools \
48+
python$PYTHON_VERSION \
49+
python$PYTHON_VERSION-dev \
50+
python$PYTHON_VERSION-venv \
51+
openjdk-17-jdk \
52+
curl \
53+
wget \
54+
git \
55+
&& rm -rf /var/lib/apt/lists/*
56+
57+
# Make the virtual environment and "activating" it by adding it first to the path.
58+
# From here on the python$PYTHON_VERSION interpreter is used and the packages
59+
# are installed in /home/venv which is what we need for the "runtime-image"
60+
RUN python$PYTHON_VERSION -m venv /home/venv
61+
ENV PATH="/home/venv/bin:$PATH"
62+
63+
ARG USE_ROCM_VERSION=""
64+
65+
COPY ./ serve
66+
67+
RUN \
68+
if echo "$LOCAL_CHANGES" | grep -q "false"; then \
69+
rm -rf /serve;\
70+
git clone --recursive $REPO_URL -b $BRANCH_NAME /serve; \
71+
fi
72+
73+
WORKDIR "/serve"
74+
75+
RUN cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
76+
77+
RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
78+
if [ -n "$USE_ROCM_VERSION" ]; then \
79+
apt-get update \
80+
&& wget https://repo.radeon.com/amdgpu-install/6.2.2/ubuntu/noble/amdgpu-install_6.2.60202-1_all.deb \
81+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y ./amdgpu-install_6.2.60202-1_all.deb \
82+
&& apt-get update \
83+
&& apt-get install --no-install-recommends -y amdgpu-dkms rocm; \
84+
else \
85+
echo "Skip ROCm installation"; \
86+
fi
87+
88+
RUN \
89+
# Install ROCm version specific binary when ROCm version is specified as a build arg
90+
if [ "$USE_ROCM_VERSION" ]; then \
91+
python$PYTHON_VERSION ./ts_scripts/install_dependencies.py --rocm $USE_ROCM_VERSION; \
92+
# Install the binary with the latest CPU image on a ROCm base image
93+
else \
94+
python$PYTHON_VERSION ./ts_scripts/install_dependencies.py;\
95+
fi;
96+
97+
# Make sure latest version of torchserve is uploaded before running this
98+
RUN \
99+
if echo "$BUILD_FROM_SRC" | grep -q "true"; then \
100+
python$PYTHON_VERSION -m pip install -r requirements/developer.txt \
101+
&& python$PYTHON_VERSION ts_scripts/install_from_src.py;\
102+
elif echo "$BUILD_NIGHTLY" | grep -q "false"; then \
103+
python$PYTHON_VERSION -m pip install --no-cache-dir torchserve torch-model-archiver torch-workflow-archiver;\
104+
else \
105+
python$PYTHON_VERSION -m pip install --no-cache-dir torchserve-nightly torch-model-archiver-nightly torch-workflow-archiver-nightly;\
106+
fi
107+
108+
# Final image for production
109+
FROM ${BASE_IMAGE} AS production-image
110+
# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top)
111+
ARG PYTHON_VERSION
112+
ENV PYTHONUNBUFFERED TRUE
113+
ARG USE_ROCM_VERSION
114+
115+
RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
116+
apt-get update && \
117+
apt-get upgrade -y && \
118+
apt-get install software-properties-common -y && \
119+
add-apt-repository ppa:deadsnakes/ppa -y && \
120+
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
121+
python$PYTHON_VERSION \
122+
python3-setuptools \
123+
python$PYTHON_VERSION-dev \
124+
python$PYTHON_VERSION-venv \
125+
# using openjdk-17-jdk due to circular dependency(ca-certificates) bug in openjdk-17-jre-headless debian package
126+
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905
127+
openjdk-17-jdk \
128+
build-essential \
129+
wget \
130+
&& rm -rf /var/lib/apt/lists/* \
131+
&& cd /tmp
132+
133+
RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
134+
if [ -n "$USE_ROCM_VERSION" ]; then \
135+
apt-get update \
136+
&& wget https://repo.radeon.com/amdgpu-install/6.2.2/ubuntu/noble/amdgpu-install_6.2.60202-1_all.deb \
137+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y ./amdgpu-install_6.2.60202-1_all.deb \
138+
&& apt-get update \
139+
&& apt-get install --no-install-recommends -y amdgpu-dkms rocm; \
140+
else \
141+
echo "Skip ROCm installation"; \
142+
fi
143+
144+
RUN useradd -m model-server \
145+
&& mkdir -p /home/model-server/tmp
146+
147+
COPY --chown=model-server --from=compile-image /home/venv /home/venv
148+
COPY --from=compile-image /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
149+
ENV PATH="/home/venv/bin:$PATH"
150+
151+
RUN \
152+
if [ -n "$USE_ROCM_VERSION" ]; then \
153+
python$PYTHON_VERSION -m pip install -U pip setuptools \
154+
&& python -m pip install /opt/rocm/share/amd_smi; \
155+
fi
156+
157+
RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh \
158+
&& chown -R model-server /home/model-server
159+
160+
COPY docker/config.properties /home/model-server/config.properties
161+
RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store
162+
163+
EXPOSE 8080 8081 8082 7070 7071
164+
165+
USER model-server
166+
WORKDIR /home/model-server
167+
ENV TEMP=/home/model-server/tmp
168+
ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
169+
CMD ["serve"]
170+
171+
# Final image for docker regression
172+
FROM ${BASE_IMAGE} AS ci-image
173+
# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top)
174+
ARG PYTHON_VERSION
175+
ARG BRANCH_NAME
176+
ARG USE_ROCM_VERSION
177+
ENV PYTHONUNBUFFERED TRUE
178+
179+
RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
180+
apt-get update && \
181+
apt-get upgrade -y && \
182+
apt-get install software-properties-common -y && \
183+
add-apt-repository -y ppa:deadsnakes/ppa && \
184+
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
185+
python$PYTHON_VERSION \
186+
python3-setuptools \
187+
python$PYTHON_VERSION-dev \
188+
python$PYTHON_VERSION-venv \
189+
# using openjdk-17-jdk due to circular dependency(ca-certificates) bug in openjdk-17-jre-headless debian package
190+
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905
191+
openjdk-17-jdk \
192+
build-essential \
193+
wget \
194+
numactl \
195+
nodejs \
196+
npm \
197+
zip \
198+
unzip \
199+
&& npm install -g [email protected] newman-reporter-htmlextra markdown-link-check \
200+
&& rm -rf /var/lib/apt/lists/* \
201+
&& cd /tmp
202+
203+
RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
204+
if [ -n "$USE_ROCM_VERSION" ]; then \
205+
apt-get update \
206+
&& wget https://repo.radeon.com/amdgpu-install/6.2.2/ubuntu/noble/amdgpu-install_6.2.60202-1_all.deb \
207+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y ./amdgpu-install_6.2.60202-1_all.deb \
208+
&& apt-get update \
209+
&& apt-get install --no-install-recommends -y amdgpu-dkms rocm; \
210+
else \
211+
echo "Skip ROCm installation"; \
212+
fi
213+
214+
COPY --from=compile-image /home/venv /home/venv
215+
216+
ENV PATH="/home/venv/bin:$PATH"
217+
218+
RUN \
219+
if [ -n "$USE_ROCM_VERSION" ]; then \
220+
python$PYTHON_VERSION -m pip install -U pip setuptools \
221+
&& python -m pip install /opt/rocm/share/amd_smi; \
222+
fi
223+
224+
RUN python$PYTHON_VERSION -m pip install --no-cache-dir -r https://raw.githubusercontent.com/pytorch/serve/$BRANCH_NAME/requirements/developer.txt
225+
226+
RUN mkdir /serve
227+
ENV TS_RUN_IN_DOCKER True
228+
229+
WORKDIR /serve
230+
CMD ["python", "test/regression_tests.py"]
231+
232+
#Final image for developer Docker image
233+
FROM ${BASE_IMAGE} as dev-image
234+
# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top)
235+
ARG PYTHON_VERSION
236+
ARG BRANCH_NAME
237+
ARG BUILD_FROM_SRC
238+
ARG USE_ROCM_VERSION
239+
ARG LOCAL_CHANGES
240+
ARG BUILD_WITH_IPEX
241+
ARG IPEX_VERSION=1.11.0
242+
ARG IPEX_URL=https://software.intel.com/ipex-whl-stable
243+
ENV PYTHONUNBUFFERED TRUE
244+
RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
245+
apt-get update && \
246+
apt-get upgrade -y && \
247+
apt-get install software-properties-common -y && \
248+
add-apt-repository -y ppa:deadsnakes/ppa && \
249+
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
250+
fakeroot \
251+
ca-certificates \
252+
dpkg-dev \
253+
sudo \
254+
g++ \
255+
git \
256+
python$PYTHON_VERSION \
257+
python$PYTHON_VERSION-dev \
258+
python3-setuptools \
259+
python$PYTHON_VERSION-venv \
260+
# using openjdk-17-jdk due to circular dependency(ca-certificates) bug in openjdk-17-jre-headless debian package
261+
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905
262+
openjdk-17-jdk \
263+
build-essential \
264+
wget \
265+
curl \
266+
vim \
267+
numactl \
268+
nodejs \
269+
npm \
270+
zip \
271+
unzip \
272+
&& npm install -g [email protected] newman-reporter-htmlextra markdown-link-check \
273+
&& rm -rf /var/lib/apt/lists/*
274+
275+
RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
276+
if [ -n "$USE_ROCM_VERSION" ]; then \
277+
apt-get update \
278+
&& wget https://repo.radeon.com/amdgpu-install/6.2.2/ubuntu/noble/amdgpu-install_6.2.60202-1_all.deb \
279+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y ./amdgpu-install_6.2.60202-1_all.deb \
280+
&& apt-get update \
281+
&& apt-get install --no-install-recommends -y amdgpu-dkms rocm; \
282+
else \
283+
echo "Skip ROCm installation"; \
284+
fi
285+
286+
COPY ./ serve
287+
288+
RUN \
289+
if echo "$LOCAL_CHANGES" | grep -q "false"; then \
290+
rm -rf /serve;\
291+
git clone --recursive $REPO_URL -b $BRANCH_NAME /serve; \
292+
fi
293+
294+
COPY --from=compile-image /home/venv /home/venv
295+
ENV PATH="/home/venv/bin:$PATH"
296+
297+
RUN \
298+
if [ -n "$USE_ROCM_VERSION" ]; then \
299+
python$PYTHON_VERSION -m pip install -U pip setuptools \
300+
&& python -m pip install /opt/rocm/share/amd_smi; \
301+
fi
302+
303+
WORKDIR "serve"
304+
305+
RUN python$PYTHON_VERSION -m pip install -U pip setuptools \
306+
&& python$PYTHON_VERSION -m pip install --no-cache-dir -r requirements/developer.txt \
307+
&& python$PYTHON_VERSION ts_scripts/install_from_src.py --environment=dev \
308+
&& useradd -m model-server \
309+
&& mkdir -p /home/model-server/tmp \
310+
&& cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh \
311+
&& chmod +x /usr/local/bin/dockerd-entrypoint.sh \
312+
&& chown -R model-server /home/model-server \
313+
&& cp docker/config.properties /home/model-server/config.properties \
314+
&& mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store \
315+
&& chown -R model-server /home/venv
316+
EXPOSE 8080 8081 8082 7070 7071
317+
WORKDIR /home/model-server
318+
ENV TEMP=/home/model-server/tmp
319+
ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
320+
CMD ["serve"]

docker/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ Creates a docker image with `torchserve` and `torch-model-archiver` installed fr
164164
./build_image.sh -bt dev -g [-cv cu121|cu118] -cpp
165165
```
166166

167+
- For ROCm support (*experimental*), refer to [this documentation](../docs/hardware_support/amd_support.md).
167168

168169
## Start a container with a TorchServe image
169170

0 commit comments

Comments
 (0)