Merge branch 'feature/k8s_nightly_test' of https://github.com/pytorch/serve into feature/k8s_nightly_test

agunapal · agunapal · commit 1cf88dc22011 · 2024-03-13T01:44:55.000Z
diff --git a/.github/workflows/regression_tests_cpu_binaries.yml b/.github/workflows/regression_tests_cpu_binaries.yml
@@ -16,12 +16,16 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-20.04, macOS-latest]
+        os: [ubuntu-20.04, macOS-latest, macos-14]
         python-version: ["3.8", "3.9", "3.10"]
         binaries: ["pypi", "conda"]
         exclude:
         - os: macos-latest
           python-version: 3.8
+        - os: macos-14
+          python-version: 3.8
+        - os: macos-14
+          python-version: 3.9
     steps:
       - uses: actions/checkout@v3
         with:
@@ -47,3 +51,4 @@ jobs:
       - name: Validate Torchserve CPU Regression
         run: |
           python test/regression_tests.py --binaries --${{ matrix.binaries }} --nightly
+          
diff --git a/binaries/conda/build_packages.py b/binaries/conda/build_packages.py
@@ -22,7 +22,7 @@
 PACKAGES = ["torchserve", "torch-model-archiver", "torch-workflow-archiver"]
 
 # conda convert supported platforms https://docs.conda.io/projects/conda-build/en/stable/resources/commands/conda-convert.html
-PLATFORMS = ["linux-64", "osx-64", "win-64"]  # Add a new platform here
+PLATFORMS = ["linux-64", "osx-64", "win-64", "osx-arm64"]  # Add a new platform here
 
 if os.name == "nt":
     # Assumes miniconda is installed in windows
diff --git a/cpp/README.md b/cpp/README.md
@@ -4,6 +4,24 @@
 * GCC version: gcc-9
 * cmake version: 3.18+
 * Linux
+
+For convenience, a docker container can be used as the development environment to build and install Torchserve CPP
+```
+cd serve/docker
+# For CPU support
+./build_image.sh -bt dev -cpp
+# For GPU support
+./build_image.sh -bt dev -g [-cv cu121|cu118] -cpp
+```
+
+Start the container and optionally bind mount a build directory into the container to persist build artifacts across container runs
+```
+# For CPU support
+docker run [-v /path/to/build/dir:/serve/cpp/_build] -it pytorch/torchserve:cpp-dev-cpu /bin/bash
+# For GPU support
+docker run --gpus all [-v /path/to/build/dir:/serve/cpp/_build] -it pytorch/torchserve:cpp-dev-gpu /bin/bash
+```
+
 ## Installation and Running TorchServe CPP
 This installation instruction assumes that TorchServe is already installed through pip/conda/source. If this is not the case install it after the `Install dependencies` step through your preferred method.
 
@@ -22,7 +40,7 @@ Then build the backend:
 ```
 ## Dev Build
 cd cpp
-./build.sh [-g cu121|cu118]
+./build.sh
 ```
 
 ### Run TorchServe
diff --git a/cpp/build.sh b/cpp/build.sh
@@ -20,45 +20,6 @@ function detect_platform() {
   echo -e "${COLOR_GREEN}Detected platform: $PLATFORM ${COLOR_OFF}"
 }
 
-function prepare_test_files() {
-  echo -e "${COLOR_GREEN}[ INFO ]Preparing test files ${COLOR_OFF}"
-  local EX_DIR="${TR_DIR}/examples/"
-  rsync -a --link-dest=../../test/resources/ ${BASE_DIR}/test/resources/ ${TR_DIR}/
-  if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin" ]; then
-    wget -q https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin -O "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin"
-  fi
-  if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin" ]; then
-    wget -q https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin -O "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin"
-  fi
-  # PT2.2 torch.expport does not support Mac
-  if [ "$PLATFORM" = "Linux" ]; then
-    if [ ! -f "${EX_DIR}/aot_inductor/llama_handler/stories15M.so" ]; then
-      local HANDLER_DIR=${EX_DIR}/aot_inductor/llama_handler/
-      if [ ! -f "${HANDLER_DIR}/stories15M.pt" ]; then
-        wget -q https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt?download=true -O "${HANDLER_DIR}/stories15M.pt"
-      fi
-      local LLAMA_SO_DIR=${BASE_DIR}/third-party/llama2.so/
-      PYTHONPATH=${LLAMA_SO_DIR}:${PYTHONPATH} python ${BASE_DIR}/../examples/cpp/aot_inductor/llama2/compile.py --checkpoint ${HANDLER_DIR}/stories15M.pt ${HANDLER_DIR}/stories15M.so
-    fi
-    if [ ! -f "${EX_DIR}/aot_inductor/bert_handler/bert-seq.so" ]; then
-      pip install transformers
-      local HANDLER_DIR=${EX_DIR}/aot_inductor/bert_handler/
-      export TOKENIZERS_PARALLELISM=false
-      cd ${BASE_DIR}/../examples/cpp/aot_inductor/bert/
-      python aot_compile_export.py
-      mv bert-seq.so ${HANDLER_DIR}/bert-seq.so
-      mv Transformer_model/tokenizer.json ${HANDLER_DIR}/tokenizer.json
-      export TOKENIZERS_PARALLELISM=""
-    fi
-    if [ ! -f "${EX_DIR}/aot_inductor/resnet_handler/resnet50_pt2.so" ]; then
-      local HANDLER_DIR=${EX_DIR}/aot_inductor/resnet_handler/
-      cd ${HANDLER_DIR}
-      python ${BASE_DIR}/../examples/cpp/aot_inductor/resnet/resnet50_torch_export.py
-    fi
-  fi
-  cd "$BWD" || exit
-}
-
 function build() {
   echo -e "${COLOR_GREEN}[ INFO ]Building backend ${COLOR_OFF}"
   MAYBE_BUILD_QUIC=""
@@ -121,7 +82,6 @@ function build() {
   fi
 
   make -j "$JOBS"
-  make format
   make install
   echo -e "${COLOR_GREEN}torchserve_cpp build is complete. To run unit test: \
   ./_build/test/torchserve_cpp_test ${COLOR_OFF}"
@@ -207,6 +167,5 @@ cd $BASE_DIR
 
 git submodule update --init --recursive
 
-prepare_test_files
 build
 install_torchserve_cpp
diff --git a/cpp/src/backends/core/backend.cc b/cpp/src/backends/core/backend.cc
@@ -22,12 +22,14 @@ bool Backend::Initialize(const std::string &model_dir) {
   // TODO: windows
   TS_LOGF(DEBUG, "Initializing from manifest: {}", manifest_file);
   if (!manifest_->Initialize(manifest_file)) {
+    TS_LOGF(ERROR, "Could not initialize from manifest: {}", manifest_file);
     return false;
   }
 
   LoadHandler(model_dir);
 
   if (!handler_) {
+    TS_LOG(ERROR, "Could not load handler");
     return false;
   }
 
diff --git a/cpp/src/examples/CMakeLists.txt b/cpp/src/examples/CMakeLists.txt
@@ -1,4 +1,6 @@
 
+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../../test/resources/ DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/)
+
 add_subdirectory("../../../examples/cpp/babyllama/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/babyllama/babyllama_handler/")
 
 add_subdirectory("../../../examples/cpp/llamacpp/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/llamacpp/llamacpp_handler/")
@@ -10,6 +12,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux")
   add_subdirectory("../../../examples/cpp/aot_inductor/llama2/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/llama_handler/")
 
   add_subdirectory("../../../examples/cpp/aot_inductor/bert" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/bert_handler/")
-  
+
   add_subdirectory("../../../examples/cpp/aot_inductor/resnet" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/resnet_handler/")
 endif()
diff --git a/docker/Dockerfile.cpp b/docker/Dockerfile.cpp
@@ -0,0 +1,91 @@
+# syntax = docker/dockerfile:experimental
+#
+# This file can build images for CPU & GPU with CPP backend support.
+#
+# Following comments have been shamelessly copied from https://github.com/pytorch/pytorch/blob/master/Dockerfile
+#
+# NOTE: To build this you will need a docker version > 18.06 with
+#       experimental enabled and DOCKER_BUILDKIT=1
+#
+#       If you do not use buildkit you are not going to have a good time
+#
+#       For reference:
+#           https://docs.docker.com/develop/develop-images/build_enhancements/
+
+
+ARG BASE_IMAGE=ubuntu:20.04
+ARG PYTHON_VERSION=3.9
+ARG CMAKE_VERSION=3.26.4
+ARG BRANCH_NAME="master"
+ARG USE_CUDA_VERSION=""
+
+FROM ${BASE_IMAGE} AS cpp-dev-image
+ARG BASE_IMAGE
+ARG PYTHON_VERSION
+ARG CMAKE_VERSION
+ARG BRANCH_NAME
+ARG USE_CUDA_VERSION
+ENV PYTHONUNBUFFERED TRUE
+
+RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
+    apt-get update && \
+    apt-get install software-properties-common -y && \
+    add-apt-repository -y ppa:deadsnakes/ppa && \
+    DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+        sudo \
+        vim \
+        git \
+        curl \
+        wget \
+        rsync \
+        gpg \
+        ca-certificates \
+        lsb-release \
+        openjdk-17-jdk \
+        python$PYTHON_VERSION \
+        python$PYTHON_VERSION-dev \
+        python$PYTHON_VERSION-venv \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create a virtual environment and "activate" it by adding it first to the path.
+RUN python$PYTHON_VERSION -m venv /home/venv
+ENV PATH="/home/venv/bin:$PATH"
+
+# Enable installation of recent cmake release
+# Ref: https://apt.kitware.com/
+RUN (wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null) \
+    && (echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null) \
+    && apt-get update \
+    && (test -f /usr/share/doc/kitware-archive-keyring/copyright || sudo rm /usr/share/keyrings/kitware-archive-keyring.gpg) \
+    && sudo apt-get install kitware-archive-keyring \
+    && rm -rf /var/lib/apt/lists/*
+
+# Pin cmake and cmake-data version
+# Ref: https://manpages.ubuntu.com/manpages/xenial/man5/apt_preferences.5.html
+RUN echo "Package: cmake\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake
+RUN echo "Package: cmake-data\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake-data
+
+# Install CUDA toolkit to enable "libtorch" build with GPU support
+RUN apt-get update && \
+    if echo "$BASE_IMAGE" | grep -q "cuda:"; then \
+        if [ "$USE_CUDA_VERSION" = "cu121" ]; then \
+            apt-get -y install cuda-toolkit-12-1; \
+        elif [ "$USE_CUDA_VERSION" = "cu118" ]; then \
+            apt-get -y install cuda-toolkit-11-8; \
+        else \
+            echo "Cuda version not supported by CPP backend: $USE_CUDA_VERSION"; \
+            exit 1; \
+        fi; \
+    fi \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN git clone --recursive https://github.com/pytorch/serve.git \
+    && cd serve \
+    && git checkout ${BRANCH_NAME}
+
+WORKDIR "serve"
+
+# CPP backend binary install depends on "ts" directory being present in python site-packages
+RUN pip install pygit2 && python ts_scripts/install_from_src.py
+
+EXPOSE 8080 8081 8082 7070 7071
diff --git a/docker/build_image.sh b/docker/build_image.sh
@@ -12,6 +12,7 @@ USE_CUSTOM_TAG=false
 CUDA_VERSION=""
 USE_LOCAL_SERVE_FOLDER=false
 BUILD_WITH_IPEX=false
+BUILD_CPP=false
 BUILD_NIGHTLY=false
 PYTHON_VERSION=3.9
 
@@ -29,6 +30,7 @@ do
           echo "-t, --tag specify tag name for docker image"
           echo "-lf, --use-local-serve-folder specify this option for the benchmark image if the current 'serve' folder should be used during automated benchmarks"
           echo "-ipex, --build-with-ipex specify to build with intel_extension_for_pytorch"
+          echo "-cpp, --build-cpp specify to build TorchServe CPP"
           echo "-py, --pythonversion specify to python version to use: Possible values: 3.8 3.9 3.10"
           echo "-n, --nightly specify to build with TorchServe nightly"
           exit 0
@@ -76,6 +78,10 @@ do
           BUILD_WITH_IPEX=true
           shift
           ;;
+        -cpp|--build-cpp)
+          BUILD_CPP=true
+          shift
+          ;;
         -n|--nightly)
           BUILD_NIGHTLY=true
           shift
@@ -139,7 +145,12 @@ fi
 
 if [ "${BUILD_TYPE}" == "dev" ] && ! $USE_CUSTOM_TAG ;
 then
-  DOCKER_TAG="pytorch/torchserve:dev-$MACHINE"
+  if [ "${BUILD_CPP}" == "true" ]
+  then
+    DOCKER_TAG="pytorch/torchserve:cpp-dev-$MACHINE"
+  else
+    DOCKER_TAG="pytorch/torchserve:dev-$MACHINE"
+  fi
 fi
 
 if [ "$USE_CUSTOM_TAG" = true ]
@@ -153,12 +164,35 @@ then
   exit 1
 fi
 
+if [ "$BUILD_CPP" == "true" ];
+then
+  if [ "$BUILD_TYPE" != "dev" ];
+  then
+    echo "Only dev container build is supported for CPP"
+    exit 1
+  fi
+
+  if [[ "${MACHINE}" == "gpu" || "${CUDA_VERSION}" != "" ]];
+  then
+    if [[ "${CUDA_VERSION}" != "cu121" && "${CUDA_VERSION}" != "cu118" ]];
+    then
+      echo "Only cuda versions 12.1 and 11.8 are supported for CPP"
+      exit 1
+    fi
+  fi
+fi
+
 if [ "${BUILD_TYPE}" == "production" ]
 then
   DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}"  --build-arg PYTHON_VERSION="${PYTHON_VERSION}" --build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" -t "${DOCKER_TAG}" --target production-image  .
 elif [ "${BUILD_TYPE}" == "ci" ]
 then
   DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}"  --build-arg PYTHON_VERSION="${PYTHON_VERSION}" --build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}"  -t "${DOCKER_TAG}" --target ci-image  .
 else
-  DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}"  --build-arg PYTHON_VERSION="${PYTHON_VERSION}" --build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg BUILD_WITH_IPEX="${BUILD_WITH_IPEX}"  -t "${DOCKER_TAG}" --target dev-image  .
+  if [ "${BUILD_CPP}" == "true" ]
+  then
+    DOCKER_BUILDKIT=1 docker build --file Dockerfile.cpp --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}" --build-arg BRANCH_NAME="${BRANCH_NAME}" -t "${DOCKER_TAG}" --target cpp-dev-image .
+  else
+    DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}"  --build-arg PYTHON_VERSION="${PYTHON_VERSION}" --build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg BUILD_WITH_IPEX="${BUILD_WITH_IPEX}"  -t "${DOCKER_TAG}" --target dev-image  .
+  fi
 fi
diff --git a/examples/cpp/aot_inductor/bert/CMakeLists.txt b/examples/cpp/aot_inductor/bert/CMakeLists.txt
@@ -1,5 +1,13 @@
+
+add_custom_command(
+    OUTPUT bert-seq.so
+    COMMAND TOKENIZERS_PARALLELISM=false python ${CMAKE_CURRENT_SOURCE_DIR}/aot_compile_export.py
+    COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/Transformer_model/tokenizer.json ${CMAKE_CURRENT_BINARY_DIR}/
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/aot_compile_export.py
+)
+
 set(TOKENZIER_CPP_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../../../cpp/third-party/tokenizers-cpp)
 add_subdirectory(${TOKENZIER_CPP_PATH} tokenizers EXCLUDE_FROM_ALL)
-add_library(bert_handler SHARED src/bert_handler.cc)
+add_library(bert_handler SHARED src/bert_handler.cc bert-seq.so)
 target_include_directories(bert_handler PRIVATE ${TOKENZIER_CPP_PATH}/include)
 target_link_libraries(bert_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES} tokenizers_cpp)
diff --git a/examples/cpp/aot_inductor/llama2/CMakeLists.txt b/examples/cpp/aot_inductor/llama2/CMakeLists.txt
@@ -1,5 +1,23 @@
+
+FetchContent_Declare(
+  stories15M_pt
+  URL      https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt?download=true
+  DOWNLOAD_NO_EXTRACT TRUE
+  DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/
+)
+
+FetchContent_MakeAvailable(stories15M_pt)
+
+
+add_custom_command(
+    OUTPUT stories15M.so
+    COMMAND PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/../../../../cpp/third-party/llama2.so/ python ${CMAKE_CURRENT_SOURCE_DIR}/compile.py --checkpoint ${CMAKE_CURRENT_BINARY_DIR}/\'stories15M.pt?download=true\' ${CMAKE_CURRENT_BINARY_DIR}/stories15M.so
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/compile.py
+)
+
+
 add_library(llama2_so STATIC ../../../../cpp/third-party/llama2.so/run.cpp)
 target_compile_options(llama2_so PRIVATE -Wall -Wextra -Ofast -fpermissive)
 
-add_library(llama_so_handler SHARED src/llama_handler.cc)
+add_library(llama_so_handler SHARED src/llama_handler.cc stories15M.so)
 target_link_libraries(llama_so_handler PRIVATE llama2_so ts_backends_core ts_utils ${TORCH_LIBRARIES})
diff --git a/examples/cpp/aot_inductor/resnet/CMakeLists.txt b/examples/cpp/aot_inductor/resnet/CMakeLists.txt
@@ -1,2 +1,9 @@
-add_library(resnet_handler SHARED src/resnet_handler.cc)
+
+add_custom_command(
+    OUTPUT resnet50_pt2.so
+    COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/resnet50_torch_export.py
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/resnet50_torch_export.py
+)
+
+add_library(resnet_handler SHARED src/resnet_handler.cc resnet50_pt2.so)
 target_link_libraries(resnet_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
diff --git a/examples/cpp/babyllama/CMakeLists.txt b/examples/cpp/babyllama/CMakeLists.txt
@@ -1,3 +1,20 @@
+include(FetchContent)
+
+FetchContent_Declare(
+    stories15M_bin
+  URL      https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
+  DOWNLOAD_NO_EXTRACT TRUE
+  DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/
+)
+
+FetchContent_Declare(
+  tokenizer_bin
+  URL      https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
+  DOWNLOAD_NO_EXTRACT TRUE
+  DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/
+)
+
+FetchContent_MakeAvailable(tokenizer_bin stories15M_bin)
 
 add_library(llama2_c STATIC ../../../cpp/third-party/llama2.c/run.c)
 target_compile_options(llama2_c PRIVATE -Wall -Wextra -Ofast -fPIC)

Original file line number	Diff line number	Diff line change
`@@ -22,12 +22,14 @@ bool Backend::Initialize(const std::string &model_dir) {`
`22`	`22`	`// TODO: windows`
`23`	`23`	`TS_LOGF(DEBUG, "Initializing from manifest: {}", manifest_file);`
`24`	`24`	`if (!manifest_->Initialize(manifest_file)) {`
	`25`	`+ TS_LOGF(ERROR, "Could not initialize from manifest: {}", manifest_file);`
`25`	`26`	`return false;`
`26`	`27`	`}`
`27`	`28`
`28`	`29`	`LoadHandler(model_dir);`
`29`	`30`
`30`	`31`	`if (!handler_) {`
	`32`	`+ TS_LOG(ERROR, "Could not load handler");`
`31`	`33`	`return false;`
`32`	`34`	`}`
`33`	`35`