Skip to content

Commit 1cf88dc

Browse files
committed
Merge branch 'feature/k8s_nightly_test' of https://github.com/pytorch/serve into feature/k8s_nightly_test
2 parents efdaebf + a28723b commit 1cf88dc

File tree

12 files changed

+211
-50
lines changed

12 files changed

+211
-50
lines changed

.github/workflows/regression_tests_cpu_binaries.yml

+6-1
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,16 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
os: [ubuntu-20.04, macOS-latest]
19+
os: [ubuntu-20.04, macOS-latest, macos-14]
2020
python-version: ["3.8", "3.9", "3.10"]
2121
binaries: ["pypi", "conda"]
2222
exclude:
2323
- os: macos-latest
2424
python-version: 3.8
25+
- os: macos-14
26+
python-version: 3.8
27+
- os: macos-14
28+
python-version: 3.9
2529
steps:
2630
- uses: actions/checkout@v3
2731
with:
@@ -47,3 +51,4 @@ jobs:
4751
- name: Validate Torchserve CPU Regression
4852
run: |
4953
python test/regression_tests.py --binaries --${{ matrix.binaries }} --nightly
54+

binaries/conda/build_packages.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
PACKAGES = ["torchserve", "torch-model-archiver", "torch-workflow-archiver"]
2323

2424
# conda convert supported platforms https://docs.conda.io/projects/conda-build/en/stable/resources/commands/conda-convert.html
25-
PLATFORMS = ["linux-64", "osx-64", "win-64"] # Add a new platform here
25+
PLATFORMS = ["linux-64", "osx-64", "win-64", "osx-arm64"] # Add a new platform here
2626

2727
if os.name == "nt":
2828
# Assumes miniconda is installed in windows

cpp/README.md

+19-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,24 @@
44
* GCC version: gcc-9
55
* cmake version: 3.18+
66
* Linux
7+
8+
For convenience, a docker container can be used as the development environment to build and install Torchserve CPP
9+
```
10+
cd serve/docker
11+
# For CPU support
12+
./build_image.sh -bt dev -cpp
13+
# For GPU support
14+
./build_image.sh -bt dev -g [-cv cu121|cu118] -cpp
15+
```
16+
17+
Start the container and optionally bind mount a build directory into the container to persist build artifacts across container runs
18+
```
19+
# For CPU support
20+
docker run [-v /path/to/build/dir:/serve/cpp/_build] -it pytorch/torchserve:cpp-dev-cpu /bin/bash
21+
# For GPU support
22+
docker run --gpus all [-v /path/to/build/dir:/serve/cpp/_build] -it pytorch/torchserve:cpp-dev-gpu /bin/bash
23+
```
24+
725
## Installation and Running TorchServe CPP
826
This installation instruction assumes that TorchServe is already installed through pip/conda/source. If this is not the case install it after the `Install dependencies` step through your preferred method.
927

@@ -22,7 +40,7 @@ Then build the backend:
2240
```
2341
## Dev Build
2442
cd cpp
25-
./build.sh [-g cu121|cu118]
43+
./build.sh
2644
```
2745

2846
### Run TorchServe

cpp/build.sh

-41
Original file line numberDiff line numberDiff line change
@@ -20,45 +20,6 @@ function detect_platform() {
2020
echo -e "${COLOR_GREEN}Detected platform: $PLATFORM ${COLOR_OFF}"
2121
}
2222

23-
function prepare_test_files() {
24-
echo -e "${COLOR_GREEN}[ INFO ]Preparing test files ${COLOR_OFF}"
25-
local EX_DIR="${TR_DIR}/examples/"
26-
rsync -a --link-dest=../../test/resources/ ${BASE_DIR}/test/resources/ ${TR_DIR}/
27-
if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin" ]; then
28-
wget -q https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin -O "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin"
29-
fi
30-
if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin" ]; then
31-
wget -q https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin -O "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin"
32-
fi
33-
# PT2.2 torch.expport does not support Mac
34-
if [ "$PLATFORM" = "Linux" ]; then
35-
if [ ! -f "${EX_DIR}/aot_inductor/llama_handler/stories15M.so" ]; then
36-
local HANDLER_DIR=${EX_DIR}/aot_inductor/llama_handler/
37-
if [ ! -f "${HANDLER_DIR}/stories15M.pt" ]; then
38-
wget -q https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt?download=true -O "${HANDLER_DIR}/stories15M.pt"
39-
fi
40-
local LLAMA_SO_DIR=${BASE_DIR}/third-party/llama2.so/
41-
PYTHONPATH=${LLAMA_SO_DIR}:${PYTHONPATH} python ${BASE_DIR}/../examples/cpp/aot_inductor/llama2/compile.py --checkpoint ${HANDLER_DIR}/stories15M.pt ${HANDLER_DIR}/stories15M.so
42-
fi
43-
if [ ! -f "${EX_DIR}/aot_inductor/bert_handler/bert-seq.so" ]; then
44-
pip install transformers
45-
local HANDLER_DIR=${EX_DIR}/aot_inductor/bert_handler/
46-
export TOKENIZERS_PARALLELISM=false
47-
cd ${BASE_DIR}/../examples/cpp/aot_inductor/bert/
48-
python aot_compile_export.py
49-
mv bert-seq.so ${HANDLER_DIR}/bert-seq.so
50-
mv Transformer_model/tokenizer.json ${HANDLER_DIR}/tokenizer.json
51-
export TOKENIZERS_PARALLELISM=""
52-
fi
53-
if [ ! -f "${EX_DIR}/aot_inductor/resnet_handler/resnet50_pt2.so" ]; then
54-
local HANDLER_DIR=${EX_DIR}/aot_inductor/resnet_handler/
55-
cd ${HANDLER_DIR}
56-
python ${BASE_DIR}/../examples/cpp/aot_inductor/resnet/resnet50_torch_export.py
57-
fi
58-
fi
59-
cd "$BWD" || exit
60-
}
61-
6223
function build() {
6324
echo -e "${COLOR_GREEN}[ INFO ]Building backend ${COLOR_OFF}"
6425
MAYBE_BUILD_QUIC=""
@@ -121,7 +82,6 @@ function build() {
12182
fi
12283

12384
make -j "$JOBS"
124-
make format
12585
make install
12686
echo -e "${COLOR_GREEN}torchserve_cpp build is complete. To run unit test: \
12787
./_build/test/torchserve_cpp_test ${COLOR_OFF}"
@@ -207,6 +167,5 @@ cd $BASE_DIR
207167

208168
git submodule update --init --recursive
209169

210-
prepare_test_files
211170
build
212171
install_torchserve_cpp

cpp/src/backends/core/backend.cc

+2
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@ bool Backend::Initialize(const std::string &model_dir) {
2222
// TODO: windows
2323
TS_LOGF(DEBUG, "Initializing from manifest: {}", manifest_file);
2424
if (!manifest_->Initialize(manifest_file)) {
25+
TS_LOGF(ERROR, "Could not initialize from manifest: {}", manifest_file);
2526
return false;
2627
}
2728

2829
LoadHandler(model_dir);
2930

3031
if (!handler_) {
32+
TS_LOG(ERROR, "Could not load handler");
3133
return false;
3234
}
3335

cpp/src/examples/CMakeLists.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11

2+
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../../test/resources/ DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/)
3+
24
add_subdirectory("../../../examples/cpp/babyllama/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/babyllama/babyllama_handler/")
35

46
add_subdirectory("../../../examples/cpp/llamacpp/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/llamacpp/llamacpp_handler/")
@@ -10,6 +12,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux")
1012
add_subdirectory("../../../examples/cpp/aot_inductor/llama2/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/llama_handler/")
1113

1214
add_subdirectory("../../../examples/cpp/aot_inductor/bert" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/bert_handler/")
13-
15+
1416
add_subdirectory("../../../examples/cpp/aot_inductor/resnet" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/resnet_handler/")
1517
endif()

docker/Dockerfile.cpp

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# syntax = docker/dockerfile:experimental
2+
#
3+
# This file can build images for CPU & GPU with CPP backend support.
4+
#
5+
# Following comments have been shamelessly copied from https://github.com/pytorch/pytorch/blob/master/Dockerfile
6+
#
7+
# NOTE: To build this you will need a docker version > 18.06 with
8+
# experimental enabled and DOCKER_BUILDKIT=1
9+
#
10+
# If you do not use buildkit you are not going to have a good time
11+
#
12+
# For reference:
13+
# https://docs.docker.com/develop/develop-images/build_enhancements/
14+
15+
16+
ARG BASE_IMAGE=ubuntu:20.04
17+
ARG PYTHON_VERSION=3.9
18+
ARG CMAKE_VERSION=3.26.4
19+
ARG BRANCH_NAME="master"
20+
ARG USE_CUDA_VERSION=""
21+
22+
FROM ${BASE_IMAGE} AS cpp-dev-image
23+
ARG BASE_IMAGE
24+
ARG PYTHON_VERSION
25+
ARG CMAKE_VERSION
26+
ARG BRANCH_NAME
27+
ARG USE_CUDA_VERSION
28+
ENV PYTHONUNBUFFERED TRUE
29+
30+
RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
31+
apt-get update && \
32+
apt-get install software-properties-common -y && \
33+
add-apt-repository -y ppa:deadsnakes/ppa && \
34+
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
35+
sudo \
36+
vim \
37+
git \
38+
curl \
39+
wget \
40+
rsync \
41+
gpg \
42+
ca-certificates \
43+
lsb-release \
44+
openjdk-17-jdk \
45+
python$PYTHON_VERSION \
46+
python$PYTHON_VERSION-dev \
47+
python$PYTHON_VERSION-venv \
48+
&& rm -rf /var/lib/apt/lists/*
49+
50+
# Create a virtual environment and "activate" it by adding it first to the path.
51+
RUN python$PYTHON_VERSION -m venv /home/venv
52+
ENV PATH="/home/venv/bin:$PATH"
53+
54+
# Enable installation of recent cmake release
55+
# Ref: https://apt.kitware.com/
56+
RUN (wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null) \
57+
&& (echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null) \
58+
&& apt-get update \
59+
&& (test -f /usr/share/doc/kitware-archive-keyring/copyright || sudo rm /usr/share/keyrings/kitware-archive-keyring.gpg) \
60+
&& sudo apt-get install kitware-archive-keyring \
61+
&& rm -rf /var/lib/apt/lists/*
62+
63+
# Pin cmake and cmake-data version
64+
# Ref: https://manpages.ubuntu.com/manpages/xenial/man5/apt_preferences.5.html
65+
RUN echo "Package: cmake\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake
66+
RUN echo "Package: cmake-data\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake-data
67+
68+
# Install CUDA toolkit to enable "libtorch" build with GPU support
69+
RUN apt-get update && \
70+
if echo "$BASE_IMAGE" | grep -q "cuda:"; then \
71+
if [ "$USE_CUDA_VERSION" = "cu121" ]; then \
72+
apt-get -y install cuda-toolkit-12-1; \
73+
elif [ "$USE_CUDA_VERSION" = "cu118" ]; then \
74+
apt-get -y install cuda-toolkit-11-8; \
75+
else \
76+
echo "Cuda version not supported by CPP backend: $USE_CUDA_VERSION"; \
77+
exit 1; \
78+
fi; \
79+
fi \
80+
&& rm -rf /var/lib/apt/lists/*
81+
82+
RUN git clone --recursive https://github.com/pytorch/serve.git \
83+
&& cd serve \
84+
&& git checkout ${BRANCH_NAME}
85+
86+
WORKDIR "serve"
87+
88+
# CPP backend binary install depends on "ts" directory being present in python site-packages
89+
RUN pip install pygit2 && python ts_scripts/install_from_src.py
90+
91+
EXPOSE 8080 8081 8082 7070 7071

docker/build_image.sh

+36-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ USE_CUSTOM_TAG=false
1212
CUDA_VERSION=""
1313
USE_LOCAL_SERVE_FOLDER=false
1414
BUILD_WITH_IPEX=false
15+
BUILD_CPP=false
1516
BUILD_NIGHTLY=false
1617
PYTHON_VERSION=3.9
1718

@@ -29,6 +30,7 @@ do
2930
echo "-t, --tag specify tag name for docker image"
3031
echo "-lf, --use-local-serve-folder specify this option for the benchmark image if the current 'serve' folder should be used during automated benchmarks"
3132
echo "-ipex, --build-with-ipex specify to build with intel_extension_for_pytorch"
33+
echo "-cpp, --build-cpp specify to build TorchServe CPP"
3234
echo "-py, --pythonversion specify to python version to use: Possible values: 3.8 3.9 3.10"
3335
echo "-n, --nightly specify to build with TorchServe nightly"
3436
exit 0
@@ -76,6 +78,10 @@ do
7678
BUILD_WITH_IPEX=true
7779
shift
7880
;;
81+
-cpp|--build-cpp)
82+
BUILD_CPP=true
83+
shift
84+
;;
7985
-n|--nightly)
8086
BUILD_NIGHTLY=true
8187
shift
@@ -139,7 +145,12 @@ fi
139145

140146
if [ "${BUILD_TYPE}" == "dev" ] && ! $USE_CUSTOM_TAG ;
141147
then
142-
DOCKER_TAG="pytorch/torchserve:dev-$MACHINE"
148+
if [ "${BUILD_CPP}" == "true" ]
149+
then
150+
DOCKER_TAG="pytorch/torchserve:cpp-dev-$MACHINE"
151+
else
152+
DOCKER_TAG="pytorch/torchserve:dev-$MACHINE"
153+
fi
143154
fi
144155

145156
if [ "$USE_CUSTOM_TAG" = true ]
@@ -153,12 +164,35 @@ then
153164
exit 1
154165
fi
155166

167+
if [ "$BUILD_CPP" == "true" ];
168+
then
169+
if [ "$BUILD_TYPE" != "dev" ];
170+
then
171+
echo "Only dev container build is supported for CPP"
172+
exit 1
173+
fi
174+
175+
if [[ "${MACHINE}" == "gpu" || "${CUDA_VERSION}" != "" ]];
176+
then
177+
if [[ "${CUDA_VERSION}" != "cu121" && "${CUDA_VERSION}" != "cu118" ]];
178+
then
179+
echo "Only cuda versions 12.1 and 11.8 are supported for CPP"
180+
exit 1
181+
fi
182+
fi
183+
fi
184+
156185
if [ "${BUILD_TYPE}" == "production" ]
157186
then
158187
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}" --build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" -t "${DOCKER_TAG}" --target production-image .
159188
elif [ "${BUILD_TYPE}" == "ci" ]
160189
then
161190
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}" --build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" -t "${DOCKER_TAG}" --target ci-image .
162191
else
163-
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}" --build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg BUILD_WITH_IPEX="${BUILD_WITH_IPEX}" -t "${DOCKER_TAG}" --target dev-image .
192+
if [ "${BUILD_CPP}" == "true" ]
193+
then
194+
DOCKER_BUILDKIT=1 docker build --file Dockerfile.cpp --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}" --build-arg BRANCH_NAME="${BRANCH_NAME}" -t "${DOCKER_TAG}" --target cpp-dev-image .
195+
else
196+
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}" --build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg BUILD_WITH_IPEX="${BUILD_WITH_IPEX}" -t "${DOCKER_TAG}" --target dev-image .
197+
fi
164198
fi
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
1+
2+
add_custom_command(
3+
OUTPUT bert-seq.so
4+
COMMAND TOKENIZERS_PARALLELISM=false python ${CMAKE_CURRENT_SOURCE_DIR}/aot_compile_export.py
5+
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/Transformer_model/tokenizer.json ${CMAKE_CURRENT_BINARY_DIR}/
6+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/aot_compile_export.py
7+
)
8+
19
set(TOKENZIER_CPP_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../../../cpp/third-party/tokenizers-cpp)
210
add_subdirectory(${TOKENZIER_CPP_PATH} tokenizers EXCLUDE_FROM_ALL)
3-
add_library(bert_handler SHARED src/bert_handler.cc)
11+
add_library(bert_handler SHARED src/bert_handler.cc bert-seq.so)
412
target_include_directories(bert_handler PRIVATE ${TOKENZIER_CPP_PATH}/include)
513
target_link_libraries(bert_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES} tokenizers_cpp)
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,23 @@
1+
2+
FetchContent_Declare(
3+
stories15M_pt
4+
URL https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt?download=true
5+
DOWNLOAD_NO_EXTRACT TRUE
6+
DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/
7+
)
8+
9+
FetchContent_MakeAvailable(stories15M_pt)
10+
11+
12+
add_custom_command(
13+
OUTPUT stories15M.so
14+
COMMAND PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/../../../../cpp/third-party/llama2.so/ python ${CMAKE_CURRENT_SOURCE_DIR}/compile.py --checkpoint ${CMAKE_CURRENT_BINARY_DIR}/\'stories15M.pt?download=true\' ${CMAKE_CURRENT_BINARY_DIR}/stories15M.so
15+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/compile.py
16+
)
17+
18+
119
add_library(llama2_so STATIC ../../../../cpp/third-party/llama2.so/run.cpp)
220
target_compile_options(llama2_so PRIVATE -Wall -Wextra -Ofast -fpermissive)
321

4-
add_library(llama_so_handler SHARED src/llama_handler.cc)
22+
add_library(llama_so_handler SHARED src/llama_handler.cc stories15M.so)
523
target_link_libraries(llama_so_handler PRIVATE llama2_so ts_backends_core ts_utils ${TORCH_LIBRARIES})
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,9 @@
1-
add_library(resnet_handler SHARED src/resnet_handler.cc)
1+
2+
add_custom_command(
3+
OUTPUT resnet50_pt2.so
4+
COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/resnet50_torch_export.py
5+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/resnet50_torch_export.py
6+
)
7+
8+
add_library(resnet_handler SHARED src/resnet_handler.cc resnet50_pt2.so)
29
target_link_libraries(resnet_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})

examples/cpp/babyllama/CMakeLists.txt

+17
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
include(FetchContent)
2+
3+
FetchContent_Declare(
4+
stories15M_bin
5+
URL https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
6+
DOWNLOAD_NO_EXTRACT TRUE
7+
DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/
8+
)
9+
10+
FetchContent_Declare(
11+
tokenizer_bin
12+
URL https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
13+
DOWNLOAD_NO_EXTRACT TRUE
14+
DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/
15+
)
16+
17+
FetchContent_MakeAvailable(tokenizer_bin stories15M_bin)
118

219
add_library(llama2_c STATIC ../../../cpp/third-party/llama2.c/run.c)
320
target_compile_options(llama2_c PRIVATE -Wall -Wextra -Ofast -fPIC)

0 commit comments

Comments
 (0)