Skip to content

Commit b443a03

Browse files
authored
Merge branch 'master' into fix-log-worker-traceback
2 parents 0526621 + 13d092c commit b443a03

37 files changed

+967
-81
lines changed
+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: Kubernetes Nightly Tests
2+
3+
on:
4+
workflow_dispatch:
5+
# runs everyday at 6:15am
6+
schedule:
7+
- cron: '15 6 * * *'
8+
9+
jobs:
10+
kubernetes-tests:
11+
runs-on: [self-hosted, regression-test-gpu]
12+
steps:
13+
- name: Clean up previous run
14+
run: |
15+
echo "Cleaning up previous run"
16+
ls -la ./
17+
sudo rm -rf ./* || true
18+
sudo rm -rf ./.??* || true
19+
ls -la ./
20+
- name: Install minikube and kubectl
21+
run: |
22+
curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
23+
sudo install minikube-linux-amd64 /usr/local/bin/minikube
24+
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
25+
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
26+
echo "/usr/local/bin" >> $GITHUB_PATH
27+
- name: Setup Python 3.9
28+
uses: actions/setup-python@v5
29+
with:
30+
python-version: 3.9
31+
architecture: x64
32+
- name: Checkout TorchServe
33+
uses: actions/checkout@v3
34+
- name: Validate TorchServe
35+
run: ./kubernetes/tests/scripts/test_mnist.sh

.github/workflows/official_release.yml

+4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ jobs:
1111
steps:
1212
- name: Setup Conda
1313
uses: s-weigand/setup-conda@v1
14+
with:
15+
activate-conda: true
16+
update-conda: false
17+
python-version: "3.9"
1418
- name: Setup Anaconda
1519
run: |
1620
conda --version

.github/workflows/official_release_docker.yml

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ jobs:
3232
architecture: x64
3333
- name: Checkout TorchServe
3434
uses: actions/checkout@v3
35+
with:
36+
submodules: recursive
3537
- name: Login to Docker
3638
env:
3739
DOCKER_PASSWORD: ${{secrets.DOCKER_PASSWORD}}

.github/workflows/regression_tests_cpu_binaries.yml

+24-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name: Run Regression Tests for CPU nightly binaries
33
on:
44
# run every day at 6:15am
55
schedule:
6-
- cron: '15 6 * * *'
6+
- cron: '15 6 * * *'
77

88
concurrency:
99
group: ci-cpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/master' && github.run_number || github.ref }}
@@ -16,34 +16,54 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
os: [ubuntu-20.04, macOS-latest]
19+
os: [ubuntu-20.04, macOS-latest, macos-14]
2020
python-version: ["3.8", "3.9", "3.10"]
2121
binaries: ["pypi", "conda"]
2222
exclude:
2323
- os: macos-latest
2424
python-version: 3.8
25+
- os: macos-14
26+
python-version: 3.8
27+
- os: macos-14
28+
python-version: 3.9
2529
steps:
2630
- uses: actions/checkout@v3
2731
with:
2832
submodules: recursive
2933
- name: Setup conda with Python ${{ matrix.python-version }}
34+
if: matrix.os == 'macos-14'
35+
uses: conda-incubator/setup-miniconda@v3
36+
with:
37+
auto-update-conda: true
38+
channels: anaconda, conda-forge
39+
python-version: ${{ matrix.python-version }}
40+
- name: Setup conda with Python ${{ matrix.python-version }}
41+
if: matrix.os != 'macos-14'
3042
uses: s-weigand/setup-conda@v1
3143
with:
3244
update-conda: true
3345
python-version: ${{ matrix.python-version }}
3446
conda-channels: anaconda, conda-forge
35-
- run: conda --version
36-
- run: python --version
3747
- name: Setup Java 17
3848
uses: actions/setup-java@v3
3949
with:
4050
distribution: 'zulu'
4151
java-version: '17'
4252
- name: Checkout TorchServe
4353
uses: actions/checkout@v3
54+
- name: Run install dependencies and regression test
55+
if: matrix.os == 'macos-14'
56+
shell: bash -el {0}
57+
run: |
58+
conda info
59+
python ts_scripts/install_dependencies.py --environment=dev
60+
python test/regression_tests.py --binaries --${{ matrix.binaries }} --nightly
4461
- name: Install dependencies
62+
if: matrix.os != 'macos-14'
4563
run: |
4664
python ts_scripts/install_dependencies.py --environment=dev
4765
- name: Validate Torchserve CPU Regression
66+
if: matrix.os != 'macos-14'
4867
run: |
4968
python test/regression_tests.py --binaries --${{ matrix.binaries }} --nightly
69+

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
![Benchmark Nightly](https://github.com/pytorch/serve/actions/workflows/benchmark_nightly.yml/badge.svg)
66
![Docker Regression Nightly](https://github.com/pytorch/serve/actions/workflows/regression_tests_docker.yml/badge.svg)
77
![KServe Regression Nightly](https://github.com/pytorch/serve/actions/workflows/kserve_cpu_tests.yml/badge.svg)
8+
![Kubernetes Regression Nightly](https://github.com/pytorch/serve/actions/workflows/kubernetes_tests.yml/badge.svg)
89

910
TorchServe is a flexible and easy-to-use tool for serving and scaling PyTorch models in production.
1011

binaries/conda/build_packages.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
PACKAGES = ["torchserve", "torch-model-archiver", "torch-workflow-archiver"]
2323

2424
# conda convert supported platforms https://docs.conda.io/projects/conda-build/en/stable/resources/commands/conda-convert.html
25-
PLATFORMS = ["linux-64", "osx-64", "win-64"] # Add a new platform here
25+
PLATFORMS = ["linux-64", "osx-64", "win-64", "osx-arm64"] # Add a new platform here
2626

2727
if os.name == "nt":
2828
# Assumes miniconda is installed in windows

cpp/README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
## Requirements
33
* C++17
44
* GCC version: gcc-9
5-
* cmake version: 3.18+
5+
* cmake version: 3.26.4+
66
* Linux
77

8-
For convenience, a docker container can be used as the development environment to build and install Torchserve CPP
8+
For convenience, a [docker container](../docker/README.md#create-torchserve-docker-image) can be used as the development environment to build and install Torchserve CPP
99
```
1010
cd serve/docker
1111
# For CPU support
@@ -21,6 +21,7 @@ docker run [-v /path/to/build/dir:/serve/cpp/_build] -it pytorch/torchserve:cpp-
2121
# For GPU support
2222
docker run --gpus all [-v /path/to/build/dir:/serve/cpp/_build] -it pytorch/torchserve:cpp-dev-gpu /bin/bash
2323
```
24+
`Warning`: The dev docker container does not install all necessary dependencies or build Torchserve CPP. Please follow the steps below after starting the container.
2425

2526
## Installation and Running TorchServe CPP
2627
This installation instruction assumes that TorchServe is already installed through pip/conda/source. If this is not the case install it after the `Install dependencies` step through your preferred method.

cpp/build.sh

-41
Original file line numberDiff line numberDiff line change
@@ -20,45 +20,6 @@ function detect_platform() {
2020
echo -e "${COLOR_GREEN}Detected platform: $PLATFORM ${COLOR_OFF}"
2121
}
2222

23-
function prepare_test_files() {
24-
echo -e "${COLOR_GREEN}[ INFO ]Preparing test files ${COLOR_OFF}"
25-
local EX_DIR="${TR_DIR}/examples/"
26-
rsync -a --link-dest=../../test/resources/ ${BASE_DIR}/test/resources/ ${TR_DIR}/
27-
if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin" ]; then
28-
wget -q https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin -O "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin"
29-
fi
30-
if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin" ]; then
31-
wget -q https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin -O "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin"
32-
fi
33-
# PT2.2 torch.expport does not support Mac
34-
if [ "$PLATFORM" = "Linux" ]; then
35-
if [ ! -f "${EX_DIR}/aot_inductor/llama_handler/stories15M.so" ]; then
36-
local HANDLER_DIR=${EX_DIR}/aot_inductor/llama_handler/
37-
if [ ! -f "${HANDLER_DIR}/stories15M.pt" ]; then
38-
wget -q https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt?download=true -O "${HANDLER_DIR}/stories15M.pt"
39-
fi
40-
local LLAMA_SO_DIR=${BASE_DIR}/third-party/llama2.so/
41-
PYTHONPATH=${LLAMA_SO_DIR}:${PYTHONPATH} python ${BASE_DIR}/../examples/cpp/aot_inductor/llama2/compile.py --checkpoint ${HANDLER_DIR}/stories15M.pt ${HANDLER_DIR}/stories15M.so
42-
fi
43-
if [ ! -f "${EX_DIR}/aot_inductor/bert_handler/bert-seq.so" ]; then
44-
pip install transformers
45-
local HANDLER_DIR=${EX_DIR}/aot_inductor/bert_handler/
46-
export TOKENIZERS_PARALLELISM=false
47-
cd ${BASE_DIR}/../examples/cpp/aot_inductor/bert/
48-
python aot_compile_export.py
49-
mv bert-seq.so ${HANDLER_DIR}/bert-seq.so
50-
mv Transformer_model/tokenizer.json ${HANDLER_DIR}/tokenizer.json
51-
export TOKENIZERS_PARALLELISM=""
52-
fi
53-
if [ ! -f "${EX_DIR}/aot_inductor/resnet_handler/resnet50_pt2.so" ]; then
54-
local HANDLER_DIR=${EX_DIR}/aot_inductor/resnet_handler/
55-
cd ${HANDLER_DIR}
56-
python ${BASE_DIR}/../examples/cpp/aot_inductor/resnet/resnet50_torch_export.py
57-
fi
58-
fi
59-
cd "$BWD" || exit
60-
}
61-
6223
function build() {
6324
echo -e "${COLOR_GREEN}[ INFO ]Building backend ${COLOR_OFF}"
6425
MAYBE_BUILD_QUIC=""
@@ -121,7 +82,6 @@ function build() {
12182
fi
12283

12384
make -j "$JOBS"
124-
make format
12585
make install
12686
echo -e "${COLOR_GREEN}torchserve_cpp build is complete. To run unit test: \
12787
./_build/test/torchserve_cpp_test ${COLOR_OFF}"
@@ -207,6 +167,5 @@ cd $BASE_DIR
207167

208168
git submodule update --init --recursive
209169

210-
prepare_test_files
211170
build
212171
install_torchserve_cpp

cpp/src/backends/core/backend.cc

+2
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@ bool Backend::Initialize(const std::string &model_dir) {
2222
// TODO: windows
2323
TS_LOGF(DEBUG, "Initializing from manifest: {}", manifest_file);
2424
if (!manifest_->Initialize(manifest_file)) {
25+
TS_LOGF(ERROR, "Could not initialize from manifest: {}", manifest_file);
2526
return false;
2627
}
2728

2829
LoadHandler(model_dir);
2930

3031
if (!handler_) {
32+
TS_LOG(ERROR, "Could not load handler");
3133
return false;
3234
}
3335

cpp/src/examples/CMakeLists.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11

2+
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../../test/resources/ DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/)
3+
24
add_subdirectory("../../../examples/cpp/babyllama/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/babyllama/babyllama_handler/")
35

46
add_subdirectory("../../../examples/cpp/llamacpp/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/llamacpp/llamacpp_handler/")
@@ -10,6 +12,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux")
1012
add_subdirectory("../../../examples/cpp/aot_inductor/llama2/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/llama_handler/")
1113

1214
add_subdirectory("../../../examples/cpp/aot_inductor/bert" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/bert_handler/")
13-
15+
1416
add_subdirectory("../../../examples/cpp/aot_inductor/resnet" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/resnet_handler/")
1517
endif()

docker/Dockerfile.cpp

+9-21
Original file line numberDiff line numberDiff line change
@@ -16,29 +16,34 @@
1616
ARG BASE_IMAGE=ubuntu:20.04
1717
ARG PYTHON_VERSION=3.9
1818
ARG CMAKE_VERSION=3.26.4
19+
ARG GCC_VERSION=9
1920
ARG BRANCH_NAME="master"
2021
ARG USE_CUDA_VERSION=""
2122

2223
FROM ${BASE_IMAGE} AS cpp-dev-image
2324
ARG BASE_IMAGE
2425
ARG PYTHON_VERSION
2526
ARG CMAKE_VERSION
27+
ARG GCC_VERSION
2628
ARG BRANCH_NAME
2729
ARG USE_CUDA_VERSION
30+
ARG DEBIAN_FRONTEND=noninteractive
2831
ENV PYTHONUNBUFFERED TRUE
32+
ENV TZ=Etc/UTC
2933

3034
RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
3135
apt-get update && \
3236
apt-get install software-properties-common -y && \
3337
add-apt-repository -y ppa:deadsnakes/ppa && \
34-
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
38+
apt-get install --no-install-recommends -y \
3539
sudo \
3640
vim \
3741
git \
3842
curl \
3943
wget \
4044
rsync \
4145
gpg \
46+
gcc-$GCC_VERSION \
4247
ca-certificates \
4348
lsb-release \
4449
openjdk-17-jdk \
@@ -51,32 +56,15 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
5156
RUN python$PYTHON_VERSION -m venv /home/venv
5257
ENV PATH="/home/venv/bin:$PATH"
5358
54-
# Enable installation of recent cmake release
59+
# Enable installation of recent cmake release and pin cmake & cmake-data version
5560
# Ref: https://apt.kitware.com/
5661
RUN (wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null) \
5762
&& (echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null) \
5863
&& apt-get update \
5964
&& (test -f /usr/share/doc/kitware-archive-keyring/copyright || sudo rm /usr/share/keyrings/kitware-archive-keyring.gpg) \
6065
&& sudo apt-get install kitware-archive-keyring \
61-
&& rm -rf /var/lib/apt/lists/*
62-
63-
# Pin cmake and cmake-data version
64-
# Ref: https://manpages.ubuntu.com/manpages/xenial/man5/apt_preferences.5.html
65-
RUN echo "Package: cmake\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake
66-
RUN echo "Package: cmake-data\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake-data
67-
68-
# Install CUDA toolkit to enable "libtorch" build with GPU support
69-
RUN apt-get update && \
70-
if echo "$BASE_IMAGE" | grep -q "cuda:"; then \
71-
if [ "$USE_CUDA_VERSION" = "cu121" ]; then \
72-
apt-get -y install cuda-toolkit-12-1; \
73-
elif [ "$USE_CUDA_VERSION" = "cu118" ]; then \
74-
apt-get -y install cuda-toolkit-11-8; \
75-
else \
76-
echo "Cuda version not supported by CPP backend: $USE_CUDA_VERSION"; \
77-
exit 1; \
78-
fi; \
79-
fi \
66+
&& echo "Package: cmake\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake \
67+
&& echo "Package: cmake-data\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake-data \
8068
&& rm -rf /var/lib/apt/lists/*
8169
8270
RUN git clone --recursive https://github.com/pytorch/serve.git \

docker/README.md

+11
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ Use `build_image.sh` script to build the docker images. The script builds the `p
4141
|-t, --tag|Tag name for image. If not specified, script uses torchserve default tag names.|
4242
|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118`. `cu121`, Default `cu121`|
4343
|-ipex, --build-with-ipex| Specify to build with intel_extension_for_pytorch. If not specified, script builds without intel_extension_for_pytorch.|
44+
|-cpp, --build-cpp specify to build TorchServe CPP|
4445
|-n, --nightly| Specify to build with TorchServe nightly.|
4546
|-py, --pythonversion| Specify the python version to use. Supported values `3.8`, `3.9`, `3.10`, `3.11`. Default `3.9`|
4647

@@ -147,6 +148,16 @@ Creates a docker image with `torchserve` and `torch-model-archiver` installed fr
147148
./build_image.sh -bt dev -ipex -t torchserve-ipex:1.0
148149
```
149150

151+
- For creating image to build Torchserve CPP with CPU support:
152+
```bash
153+
./build_image.sh -bt dev -cpp
154+
```
155+
156+
- For creating image to build Torchserve CPP with GPU support:
157+
```bash
158+
./build_image.sh -bt dev -g [-cv cu121|cu118] -cpp
159+
```
160+
150161

151162
## Start a container with a TorchServe image
152163

docker/build_image.sh

+7-2
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,14 @@ then
174174

175175
if [[ "${MACHINE}" == "gpu" || "${CUDA_VERSION}" != "" ]];
176176
then
177-
if [[ "${CUDA_VERSION}" != "cu121" && "${CUDA_VERSION}" != "cu118" ]];
177+
if [ "${CUDA_VERSION}" == "cu121" ];
178178
then
179-
echo "Only cuda versions 12.1 and 11.8 are supported for CPP"
179+
BASE_IMAGE="nvidia/cuda:12.1.1-devel-ubuntu20.04"
180+
elif [ "${CUDA_VERSION}" == "cu118" ];
181+
then
182+
BASE_IMAGE="nvidia/cuda:11.8.0-devel-ubuntu20.04"
183+
else
184+
echo "Cuda version $CUDA_VERSION is not supported for CPP"
180185
exit 1
181186
fi
182187
fi

0 commit comments

Comments
 (0)