Merge branch 'master' into publish-dev-cpp-docker-nightly

namannandan · web-flow · commit 0dd7207c47f3 · 2024-03-13T11:58:25.000-07:00
diff --git a/.github/workflows/regression_tests_cpu_binaries.yml b/.github/workflows/regression_tests_cpu_binaries.yml
@@ -3,7 +3,7 @@ name: Run Regression Tests for CPU nightly binaries
 on:
   # run every day at 6:15am
   schedule:
-    - cron:  '15 6 * * *'
+    - cron: '15 6 * * *'
 
 concurrency:
   group: ci-cpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/master' && github.run_number || github.ref }}
@@ -16,34 +16,54 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-20.04, macOS-latest]
+        os: [ubuntu-20.04, macOS-latest, macos-14]
         python-version: ["3.8", "3.9", "3.10"]
         binaries: ["pypi", "conda"]
         exclude:
         - os: macos-latest
           python-version: 3.8
+        - os: macos-14
+          python-version: 3.8
+        - os: macos-14
+          python-version: 3.9
     steps:
       - uses: actions/checkout@v3
         with:
           submodules: recursive
       - name: Setup conda with Python ${{ matrix.python-version }}
+        if: matrix.os == 'macos-14'
+        uses: conda-incubator/setup-miniconda@v3
+        with:
+          auto-update-conda: true
+          channels: anaconda, conda-forge
+          python-version: ${{ matrix.python-version }}
+      - name: Setup conda with Python ${{ matrix.python-version }}
+        if: matrix.os != 'macos-14'
         uses: s-weigand/setup-conda@v1
         with:
           update-conda: true
           python-version: ${{ matrix.python-version }}
           conda-channels: anaconda, conda-forge
-      - run: conda --version
-      - run: python --version
       - name: Setup Java 17
         uses: actions/setup-java@v3
         with:
           distribution: 'zulu'
           java-version: '17'
       - name: Checkout TorchServe
         uses: actions/checkout@v3
+      - name: Run install dependencies and regression test
+        if: matrix.os == 'macos-14'
+        shell: bash -el {0}
+        run: |
+          conda info 
+          python ts_scripts/install_dependencies.py --environment=dev 
+          python test/regression_tests.py --binaries --${{ matrix.binaries }} --nightly
       - name: Install dependencies
+        if: matrix.os != 'macos-14'
         run: |
           python ts_scripts/install_dependencies.py --environment=dev
       - name: Validate Torchserve CPU Regression
+        if: matrix.os != 'macos-14'
         run: |
           python test/regression_tests.py --binaries --${{ matrix.binaries }} --nightly
+          
diff --git a/binaries/conda/build_packages.py b/binaries/conda/build_packages.py
@@ -22,7 +22,7 @@
 PACKAGES = ["torchserve", "torch-model-archiver", "torch-workflow-archiver"]
 
 # conda convert supported platforms https://docs.conda.io/projects/conda-build/en/stable/resources/commands/conda-convert.html
-PLATFORMS = ["linux-64", "osx-64", "win-64"]  # Add a new platform here
+PLATFORMS = ["linux-64", "osx-64", "win-64", "osx-arm64"]  # Add a new platform here
 
 if os.name == "nt":
     # Assumes miniconda is installed in windows
diff --git a/cpp/README.md b/cpp/README.md
@@ -2,10 +2,10 @@
 ## Requirements
 * C++17
 * GCC version: gcc-9
-* cmake version: 3.18+
+* cmake version: 3.26.4+
 * Linux
 
-For convenience, a docker container can be used as the development environment to build and install Torchserve CPP
+For convenience, a [docker container](../docker/README.md#create-torchserve-docker-image) can be used as the development environment to build and install Torchserve CPP
 ```
 cd serve/docker
 # For CPU support
@@ -21,6 +21,7 @@ docker run [-v /path/to/build/dir:/serve/cpp/_build] -it pytorch/torchserve:cpp-
 # For GPU support
 docker run --gpus all [-v /path/to/build/dir:/serve/cpp/_build] -it pytorch/torchserve:cpp-dev-gpu /bin/bash
 ```
+`Warning`: The dev docker container does not install all necessary dependencies or build Torchserve CPP. Please follow the steps below after starting the container.
 
 ## Installation and Running TorchServe CPP
 This installation instruction assumes that TorchServe is already installed through pip/conda/source. If this is not the case install it after the `Install dependencies` step through your preferred method.
diff --git a/cpp/build.sh b/cpp/build.sh
@@ -20,45 +20,6 @@ function detect_platform() {
   echo -e "${COLOR_GREEN}Detected platform: $PLATFORM ${COLOR_OFF}"
 }
 
-function prepare_test_files() {
-  echo -e "${COLOR_GREEN}[ INFO ]Preparing test files ${COLOR_OFF}"
-  local EX_DIR="${TR_DIR}/examples/"
-  rsync -a --link-dest=../../test/resources/ ${BASE_DIR}/test/resources/ ${TR_DIR}/
-  if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin" ]; then
-    wget -q https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin -O "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin"
-  fi
-  if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin" ]; then
-    wget -q https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin -O "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin"
-  fi
-  # PT2.2 torch.expport does not support Mac
-  if [ "$PLATFORM" = "Linux" ]; then
-    if [ ! -f "${EX_DIR}/aot_inductor/llama_handler/stories15M.so" ]; then
-      local HANDLER_DIR=${EX_DIR}/aot_inductor/llama_handler/
-      if [ ! -f "${HANDLER_DIR}/stories15M.pt" ]; then
-        wget -q https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt?download=true -O "${HANDLER_DIR}/stories15M.pt"
-      fi
-      local LLAMA_SO_DIR=${BASE_DIR}/third-party/llama2.so/
-      PYTHONPATH=${LLAMA_SO_DIR}:${PYTHONPATH} python ${BASE_DIR}/../examples/cpp/aot_inductor/llama2/compile.py --checkpoint ${HANDLER_DIR}/stories15M.pt ${HANDLER_DIR}/stories15M.so
-    fi
-    if [ ! -f "${EX_DIR}/aot_inductor/bert_handler/bert-seq.so" ]; then
-      pip install transformers
-      local HANDLER_DIR=${EX_DIR}/aot_inductor/bert_handler/
-      export TOKENIZERS_PARALLELISM=false
-      cd ${BASE_DIR}/../examples/cpp/aot_inductor/bert/
-      python aot_compile_export.py
-      mv bert-seq.so ${HANDLER_DIR}/bert-seq.so
-      mv Transformer_model/tokenizer.json ${HANDLER_DIR}/tokenizer.json
-      export TOKENIZERS_PARALLELISM=""
-    fi
-    if [ ! -f "${EX_DIR}/aot_inductor/resnet_handler/resnet50_pt2.so" ]; then
-      local HANDLER_DIR=${EX_DIR}/aot_inductor/resnet_handler/
-      cd ${HANDLER_DIR}
-      python ${BASE_DIR}/../examples/cpp/aot_inductor/resnet/resnet50_torch_export.py
-    fi
-  fi
-  cd "$BWD" || exit
-}
-
 function build() {
   echo -e "${COLOR_GREEN}[ INFO ]Building backend ${COLOR_OFF}"
   MAYBE_BUILD_QUIC=""
@@ -121,7 +82,6 @@ function build() {
   fi
 
   make -j "$JOBS"
-  make format
   make install
   echo -e "${COLOR_GREEN}torchserve_cpp build is complete. To run unit test: \
   ./_build/test/torchserve_cpp_test ${COLOR_OFF}"
@@ -207,6 +167,5 @@ cd $BASE_DIR
 
 git submodule update --init --recursive
 
-prepare_test_files
 build
 install_torchserve_cpp
diff --git a/cpp/src/backends/core/backend.cc b/cpp/src/backends/core/backend.cc
@@ -22,12 +22,14 @@ bool Backend::Initialize(const std::string &model_dir) {
   // TODO: windows
   TS_LOGF(DEBUG, "Initializing from manifest: {}", manifest_file);
   if (!manifest_->Initialize(manifest_file)) {
+    TS_LOGF(ERROR, "Could not initialize from manifest: {}", manifest_file);
     return false;
   }
 
   LoadHandler(model_dir);
 
   if (!handler_) {
+    TS_LOG(ERROR, "Could not load handler");
     return false;
   }
 
diff --git a/cpp/src/examples/CMakeLists.txt b/cpp/src/examples/CMakeLists.txt
@@ -1,4 +1,6 @@
 
+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../../test/resources/ DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/)
+
 add_subdirectory("../../../examples/cpp/babyllama/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/babyllama/babyllama_handler/")
 
 add_subdirectory("../../../examples/cpp/llamacpp/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/llamacpp/llamacpp_handler/")
@@ -10,6 +12,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux")
   add_subdirectory("../../../examples/cpp/aot_inductor/llama2/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/llama_handler/")
 
   add_subdirectory("../../../examples/cpp/aot_inductor/bert" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/bert_handler/")
-  
+
   add_subdirectory("../../../examples/cpp/aot_inductor/resnet" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/resnet_handler/")
 endif()
diff --git a/docker/Dockerfile.cpp b/docker/Dockerfile.cpp
@@ -16,29 +16,34 @@
 ARG BASE_IMAGE=ubuntu:20.04
 ARG PYTHON_VERSION=3.9
 ARG CMAKE_VERSION=3.26.4
+ARG GCC_VERSION=9
 ARG BRANCH_NAME="master"
 ARG USE_CUDA_VERSION=""
 
 FROM ${BASE_IMAGE} AS cpp-dev-image
 ARG BASE_IMAGE
 ARG PYTHON_VERSION
 ARG CMAKE_VERSION
+ARG GCC_VERSION
 ARG BRANCH_NAME
 ARG USE_CUDA_VERSION
+ARG DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED TRUE
+ENV TZ=Etc/UTC
 
 RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
     apt-get update && \
     apt-get install software-properties-common -y && \
     add-apt-repository -y ppa:deadsnakes/ppa && \
-    DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    apt-get install --no-install-recommends -y \
         sudo \
         vim \
         git \
         curl \
         wget \
         rsync \
         gpg \
+        gcc-$GCC_VERSION \
         ca-certificates \
         lsb-release \
         openjdk-17-jdk \
@@ -51,32 +56,15 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
 RUN python$PYTHON_VERSION -m venv /home/venv
 ENV PATH="/home/venv/bin:$PATH"
 
-# Enable installation of recent cmake release
+# Enable installation of recent cmake release and pin cmake & cmake-data version
 # Ref: https://apt.kitware.com/
 RUN (wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null) \
     && (echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null) \
     && apt-get update \
     && (test -f /usr/share/doc/kitware-archive-keyring/copyright || sudo rm /usr/share/keyrings/kitware-archive-keyring.gpg) \
     && sudo apt-get install kitware-archive-keyring \
-    && rm -rf /var/lib/apt/lists/*
-
-# Pin cmake and cmake-data version
-# Ref: https://manpages.ubuntu.com/manpages/xenial/man5/apt_preferences.5.html
-RUN echo "Package: cmake\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake
-RUN echo "Package: cmake-data\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake-data
-
-# Install CUDA toolkit to enable "libtorch" build with GPU support
-RUN apt-get update && \
-    if echo "$BASE_IMAGE" | grep -q "cuda:"; then \
-        if [ "$USE_CUDA_VERSION" = "cu121" ]; then \
-            apt-get -y install cuda-toolkit-12-1; \
-        elif [ "$USE_CUDA_VERSION" = "cu118" ]; then \
-            apt-get -y install cuda-toolkit-11-8; \
-        else \
-            echo "Cuda version not supported by CPP backend: $USE_CUDA_VERSION"; \
-            exit 1; \
-        fi; \
-    fi \
+    && echo "Package: cmake\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake \
+    && echo "Package: cmake-data\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake-data \
     && rm -rf /var/lib/apt/lists/*
 
 RUN git clone --recursive https://github.com/pytorch/serve.git \
diff --git a/docker/README.md b/docker/README.md
@@ -41,6 +41,7 @@ Use `build_image.sh` script to build the docker images. The script builds the `p
 |-t, --tag|Tag name for image. If not specified, script uses torchserve default tag names.|
 |-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118`. `cu121`, Default `cu121`|
 |-ipex, --build-with-ipex| Specify to build with intel_extension_for_pytorch. If not specified, script builds without intel_extension_for_pytorch.|
+|-cpp, --build-cpp specify to build TorchServe CPP|
 |-n, --nightly| Specify to build with TorchServe nightly.|
 |-py, --pythonversion| Specify the python version to use. Supported values `3.8`, `3.9`, `3.10`, `3.11`. Default `3.9`|
 
@@ -147,6 +148,16 @@ Creates a docker image with `torchserve` and `torch-model-archiver` installed fr
 ./build_image.sh -bt dev -ipex -t torchserve-ipex:1.0
 ```
 
+ - For creating image to build Torchserve CPP with CPU support:
+```bash
+./build_image.sh -bt dev -cpp
+```
+
+- For creating image to build Torchserve CPP with GPU support:
+```bash
+./build_image.sh -bt dev -g [-cv cu121|cu118] -cpp
+```
+
 
 ## Start a container with a TorchServe image
 
diff --git a/docker/build_image.sh b/docker/build_image.sh
@@ -174,9 +174,14 @@ then
 
   if [[ "${MACHINE}" == "gpu" || "${CUDA_VERSION}" != "" ]];
   then
-    if [[ "${CUDA_VERSION}" != "cu121" && "${CUDA_VERSION}" != "cu118" ]];
+    if [ "${CUDA_VERSION}" == "cu121" ];
     then
-      echo "Only cuda versions 12.1 and 11.8 are supported for CPP"
+      BASE_IMAGE="nvidia/cuda:12.1.1-devel-ubuntu20.04"
+    elif [ "${CUDA_VERSION}" == "cu118" ];
+    then
+      BASE_IMAGE="nvidia/cuda:11.8.0-devel-ubuntu20.04"
+    else
+      echo "Cuda version $CUDA_VERSION is not supported for CPP"
       exit 1
     fi
   fi
diff --git a/docs/Security.md b/docs/Security.md
@@ -5,6 +5,7 @@
 | Version | Supported          |
 |---------| ------------------ |
 | 0.9.0   | :white_check_mark: |
+| 0.10.0  | :white_check_mark: |
 
 
 ## How we do security
@@ -36,6 +37,7 @@ TorchServe as much as possible relies on automated tools to do security scanning
     2. Using private-key/certificate files
 
     You can find more details in the [configuration guide](https://pytorch.org/serve/configuration.html#enable-ssl)
+6. TorchServe supports token authorization: check [documentation](https://github.com/pytorch/serve/blob/master/docs/token_authorization_api.md) for more information. 
 
 
 
diff --git a/examples/cpp/aot_inductor/bert/CMakeLists.txt b/examples/cpp/aot_inductor/bert/CMakeLists.txt
@@ -1,5 +1,13 @@
+
+add_custom_command(
+    OUTPUT bert-seq.so
+    COMMAND TOKENIZERS_PARALLELISM=false python ${CMAKE_CURRENT_SOURCE_DIR}/aot_compile_export.py
+    COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/Transformer_model/tokenizer.json ${CMAKE_CURRENT_BINARY_DIR}/
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/aot_compile_export.py
+)
+
 set(TOKENZIER_CPP_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../../../cpp/third-party/tokenizers-cpp)
 add_subdirectory(${TOKENZIER_CPP_PATH} tokenizers EXCLUDE_FROM_ALL)
-add_library(bert_handler SHARED src/bert_handler.cc)
+add_library(bert_handler SHARED src/bert_handler.cc bert-seq.so)
 target_include_directories(bert_handler PRIVATE ${TOKENZIER_CPP_PATH}/include)
 target_link_libraries(bert_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES} tokenizers_cpp)
diff --git a/examples/cpp/aot_inductor/llama2/CMakeLists.txt b/examples/cpp/aot_inductor/llama2/CMakeLists.txt
@@ -1,5 +1,23 @@
+
+FetchContent_Declare(
+  stories15M_pt
+  URL      https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt?download=true
+  DOWNLOAD_NO_EXTRACT TRUE
+  DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/
+)
+
+FetchContent_MakeAvailable(stories15M_pt)
+
+
+add_custom_command(
+    OUTPUT stories15M.so
+    COMMAND PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/../../../../cpp/third-party/llama2.so/ python ${CMAKE_CURRENT_SOURCE_DIR}/compile.py --checkpoint ${CMAKE_CURRENT_BINARY_DIR}/\'stories15M.pt?download=true\' ${CMAKE_CURRENT_BINARY_DIR}/stories15M.so
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/compile.py
+)
+
+
 add_library(llama2_so STATIC ../../../../cpp/third-party/llama2.so/run.cpp)
 target_compile_options(llama2_so PRIVATE -Wall -Wextra -Ofast -fpermissive)
 
-add_library(llama_so_handler SHARED src/llama_handler.cc)
+add_library(llama_so_handler SHARED src/llama_handler.cc stories15M.so)
 target_link_libraries(llama_so_handler PRIVATE llama2_so ts_backends_core ts_utils ${TORCH_LIBRARIES})
diff --git a/examples/cpp/aot_inductor/resnet/CMakeLists.txt b/examples/cpp/aot_inductor/resnet/CMakeLists.txt
@@ -1,2 +1,9 @@
-add_library(resnet_handler SHARED src/resnet_handler.cc)
+
+add_custom_command(
+    OUTPUT resnet50_pt2.so
+    COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/resnet50_torch_export.py
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/resnet50_torch_export.py
+)
+
+add_library(resnet_handler SHARED src/resnet_handler.cc resnet50_pt2.so)
 target_link_libraries(resnet_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
diff --git a/examples/cpp/babyllama/CMakeLists.txt b/examples/cpp/babyllama/CMakeLists.txt
@@ -1,3 +1,20 @@
+include(FetchContent)
+
+FetchContent_Declare(
+    stories15M_bin
+  URL      https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
+  DOWNLOAD_NO_EXTRACT TRUE
+  DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/
+)
+
+FetchContent_Declare(
+  tokenizer_bin
+  URL      https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
+  DOWNLOAD_NO_EXTRACT TRUE
+  DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/
+)
+
+FetchContent_MakeAvailable(tokenizer_bin stories15M_bin)
 
 add_library(llama2_c STATIC ../../../cpp/third-party/llama2.c/run.c)
 target_compile_options(llama2_c PRIVATE -Wall -Wextra -Ofast -fPIC)
diff --git a/examples/large_models/inferentia2/llama2/continuous_batching/requirements.txt b/examples/large_models/inferentia2/llama2/continuous_batching/requirements.txt
@@ -1 +1,2 @@
-sentencepiece
+transformers==4.36.2
+sentencepiece==0.1.99
diff --git a/ts_scripts/install_dependencies.py b/ts_scripts/install_dependencies.py

Original file line number	Diff line number	Diff line change
`@@ -22,12 +22,14 @@ bool Backend::Initialize(const std::string &model_dir) {`
`22`	`22`	`// TODO: windows`
`23`	`23`	`TS_LOGF(DEBUG, "Initializing from manifest: {}", manifest_file);`
`24`	`24`	`if (!manifest_->Initialize(manifest_file)) {`
	`25`	`+ TS_LOGF(ERROR, "Could not initialize from manifest: {}", manifest_file);`
`25`	`26`	`return false;`
`26`	`27`	`}`
`27`	`28`
`28`	`29`	`LoadHandler(model_dir);`
`29`	`30`
`30`	`31`	`if (!handler_) {`
	`32`	`+ TS_LOG(ERROR, "Could not load handler");`
`31`	`33`	`return false;`
`32`	`34`	`}`
`33`	`35`
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`		`-sentencepiece`
	`1`	`+transformers==4.36.2`
	`2`	`+sentencepiece==0.1.99`