pytorch
diff --git a/‎.github/workflows/kubernetes_tests.yml
+35 b/‎.github/workflows/kubernetes_tests.yml
+35
diff --git a/‎.github/workflows/official_release.yml
+4 b/‎.github/workflows/official_release.yml
+4
diff --git a/‎.github/workflows/official_release_docker.yml
+2 b/‎.github/workflows/official_release_docker.yml
+2
diff --git a/‎.github/workflows/regression_tests_cpu_binaries.yml
+24-4 b/‎.github/workflows/regression_tests_cpu_binaries.yml
+24-4
diff --git a/‎README.md
+1 b/‎README.md
+1
diff --git a/‎binaries/conda/build_packages.py
+1-1 b/‎binaries/conda/build_packages.py
+1-1
diff --git a/‎cpp/README.md
+3-2 b/‎cpp/README.md
+3-2
diff --git a/‎cpp/build.sh
-41 b/‎cpp/build.sh
-41
diff --git a/‎cpp/src/backends/core/backend.cc
+2 b/‎cpp/src/backends/core/backend.cc
+2
diff --git a/‎cpp/src/examples/CMakeLists.txt
+3-1 b/‎cpp/src/examples/CMakeLists.txt
+3-1
diff --git a/‎docker/Dockerfile.cpp
+9-21 b/‎docker/Dockerfile.cpp
+9-21
diff --git a/‎docker/README.md
+11 b/‎docker/README.md
+11
diff --git a/‎docker/build_image.sh
+7-2 b/‎docker/build_image.sh
+7-2
@@ -0,0 +1,35 @@
+name: Kubernetes Nightly Tests
+
+on:
+  workflow_dispatch:
+  # runs everyday  at 6:15am
+  schedule:
+    - cron:  '15 6 * * *'
+
+jobs:
+  kubernetes-tests:
+    runs-on: [self-hosted, regression-test-gpu]
+    steps:
+      - name: Clean up previous run
+        run: |
+          echo "Cleaning up previous run"
+          ls -la ./
+          sudo rm -rf ./* || true
+          sudo rm -rf ./.??* || true
+          ls -la ./
+      - name: Install minikube and kubectl
+        run: |
+          curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
+          sudo install minikube-linux-amd64 /usr/local/bin/minikube
+          curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
+          sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
+          echo "/usr/local/bin" >> $GITHUB_PATH
+      - name: Setup Python 3.9
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.9
+          architecture: x64
+      - name: Checkout TorchServe
+        uses: actions/checkout@v3
+      - name: Validate TorchServe
+        run: ./kubernetes/tests/scripts/test_mnist.sh
@@ -11,6 +11,10 @@ jobs:
     steps:
       - name: Setup Conda
         uses: s-weigand/setup-conda@v1
+        with:
+          activate-conda: true
+          update-conda: false
+          python-version: "3.9"
       - name: Setup Anaconda
         run: |
           conda --version
 
@@ -32,6 +32,8 @@ jobs:
           architecture: x64
       - name: Checkout TorchServe
         uses: actions/checkout@v3
+        with:
+          submodules: recursive
       - name: Login to Docker
         env:
           DOCKER_PASSWORD: ${{secrets.DOCKER_PASSWORD}}
 
@@ -3,7 +3,7 @@ name: Run Regression Tests for CPU nightly binaries
 on:
   # run every day at 6:15am
   schedule:
-    - cron:  '15 6 * * *'
+    - cron: '15 6 * * *'
 
 concurrency:
   group: ci-cpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/master' && github.run_number || github.ref }}
@@ -16,34 +16,54 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-20.04, macOS-latest]
+        os: [ubuntu-20.04, macOS-latest, macos-14]
         python-version: ["3.8", "3.9", "3.10"]
         binaries: ["pypi", "conda"]
         exclude:
         - os: macos-latest
           python-version: 3.8
+        - os: macos-14
+          python-version: 3.8
+        - os: macos-14
+          python-version: 3.9
     steps:
       - uses: actions/checkout@v3
         with:
           submodules: recursive
       - name: Setup conda with Python ${{ matrix.python-version }}
+        if: matrix.os == 'macos-14'
+        uses: conda-incubator/setup-miniconda@v3
+        with:
+          auto-update-conda: true
+          channels: anaconda, conda-forge
+          python-version: ${{ matrix.python-version }}
+      - name: Setup conda with Python ${{ matrix.python-version }}
+        if: matrix.os != 'macos-14'
         uses: s-weigand/setup-conda@v1
         with:
           update-conda: true
           python-version: ${{ matrix.python-version }}
           conda-channels: anaconda, conda-forge
-      - run: conda --version
-      - run: python --version
       - name: Setup Java 17
         uses: actions/setup-java@v3
         with:
           distribution: 'zulu'
           java-version: '17'
       - name: Checkout TorchServe
         uses: actions/checkout@v3
+      - name: Run install dependencies and regression test
+        if: matrix.os == 'macos-14'
+        shell: bash -el {0}
+        run: |
+          conda info 
+          python ts_scripts/install_dependencies.py --environment=dev 
+          python test/regression_tests.py --binaries --${{ matrix.binaries }} --nightly
       - name: Install dependencies
+        if: matrix.os != 'macos-14'
         run: |
           python ts_scripts/install_dependencies.py --environment=dev
       - name: Validate Torchserve CPU Regression
+        if: matrix.os != 'macos-14'
         run: |
           python test/regression_tests.py --binaries --${{ matrix.binaries }} --nightly
+          
@@ -5,6 +5,7 @@
 ![Benchmark Nightly](https://github.com/pytorch/serve/actions/workflows/benchmark_nightly.yml/badge.svg)
 ![Docker Regression Nightly](https://github.com/pytorch/serve/actions/workflows/regression_tests_docker.yml/badge.svg)
 ![KServe Regression Nightly](https://github.com/pytorch/serve/actions/workflows/kserve_cpu_tests.yml/badge.svg)
+![Kubernetes Regression Nightly](https://github.com/pytorch/serve/actions/workflows/kubernetes_tests.yml/badge.svg)
 
 TorchServe is a flexible and easy-to-use tool for serving and scaling PyTorch models in production.
 
 
@@ -22,7 +22,7 @@
 PACKAGES = ["torchserve", "torch-model-archiver", "torch-workflow-archiver"]
 
 # conda convert supported platforms https://docs.conda.io/projects/conda-build/en/stable/resources/commands/conda-convert.html
-PLATFORMS = ["linux-64", "osx-64", "win-64"]  # Add a new platform here
+PLATFORMS = ["linux-64", "osx-64", "win-64", "osx-arm64"]  # Add a new platform here
 
 if os.name == "nt":
     # Assumes miniconda is installed in windows
 
@@ -2,10 +2,10 @@
 ## Requirements
 * C++17
 * GCC version: gcc-9
-* cmake version: 3.18+
+* cmake version: 3.26.4+
 * Linux
 
-For convenience, a docker container can be used as the development environment to build and install Torchserve CPP
+For convenience, a [docker container](../docker/README.md#create-torchserve-docker-image) can be used as the development environment to build and install Torchserve CPP
 ```
 cd serve/docker
 # For CPU support
@@ -21,6 +21,7 @@ docker run [-v /path/to/build/dir:/serve/cpp/_build] -it pytorch/torchserve:cpp-
 # For GPU support
 docker run --gpus all [-v /path/to/build/dir:/serve/cpp/_build] -it pytorch/torchserve:cpp-dev-gpu /bin/bash
 ```
+`Warning`: The dev docker container does not install all necessary dependencies or build Torchserve CPP. Please follow the steps below after starting the container.
 
 ## Installation and Running TorchServe CPP
 This installation instruction assumes that TorchServe is already installed through pip/conda/source. If this is not the case install it after the `Install dependencies` step through your preferred method.
 
@@ -20,45 +20,6 @@ function detect_platform() {
   echo -e "${COLOR_GREEN}Detected platform: $PLATFORM ${COLOR_OFF}"
 }
 
-function prepare_test_files() {
-  echo -e "${COLOR_GREEN}[ INFO ]Preparing test files ${COLOR_OFF}"
-  local EX_DIR="${TR_DIR}/examples/"
-  rsync -a --link-dest=../../test/resources/ ${BASE_DIR}/test/resources/ ${TR_DIR}/
-  if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin" ]; then
-    wget -q https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin -O "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin"
-  fi
-  if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin" ]; then
-    wget -q https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin -O "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin"
-  fi
-  # PT2.2 torch.expport does not support Mac
-  if [ "$PLATFORM" = "Linux" ]; then
-    if [ ! -f "${EX_DIR}/aot_inductor/llama_handler/stories15M.so" ]; then
-      local HANDLER_DIR=${EX_DIR}/aot_inductor/llama_handler/
-      if [ ! -f "${HANDLER_DIR}/stories15M.pt" ]; then
-        wget -q https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt?download=true -O "${HANDLER_DIR}/stories15M.pt"
-      fi
-      local LLAMA_SO_DIR=${BASE_DIR}/third-party/llama2.so/
-      PYTHONPATH=${LLAMA_SO_DIR}:${PYTHONPATH} python ${BASE_DIR}/../examples/cpp/aot_inductor/llama2/compile.py --checkpoint ${HANDLER_DIR}/stories15M.pt ${HANDLER_DIR}/stories15M.so
-    fi
-    if [ ! -f "${EX_DIR}/aot_inductor/bert_handler/bert-seq.so" ]; then
-      pip install transformers
-      local HANDLER_DIR=${EX_DIR}/aot_inductor/bert_handler/
-      export TOKENIZERS_PARALLELISM=false
-      cd ${BASE_DIR}/../examples/cpp/aot_inductor/bert/
-      python aot_compile_export.py
-      mv bert-seq.so ${HANDLER_DIR}/bert-seq.so
-      mv Transformer_model/tokenizer.json ${HANDLER_DIR}/tokenizer.json
-      export TOKENIZERS_PARALLELISM=""
-    fi
-    if [ ! -f "${EX_DIR}/aot_inductor/resnet_handler/resnet50_pt2.so" ]; then
-      local HANDLER_DIR=${EX_DIR}/aot_inductor/resnet_handler/
-      cd ${HANDLER_DIR}
-      python ${BASE_DIR}/../examples/cpp/aot_inductor/resnet/resnet50_torch_export.py
-    fi
-  fi
-  cd "$BWD" || exit
-}
-
 function build() {
   echo -e "${COLOR_GREEN}[ INFO ]Building backend ${COLOR_OFF}"
   MAYBE_BUILD_QUIC=""
@@ -121,7 +82,6 @@ function build() {
   fi
 
   make -j "$JOBS"
-  make format
   make install
   echo -e "${COLOR_GREEN}torchserve_cpp build is complete. To run unit test: \
   ./_build/test/torchserve_cpp_test ${COLOR_OFF}"
@@ -207,6 +167,5 @@ cd $BASE_DIR
 
 git submodule update --init --recursive
 
-prepare_test_files
 build
 install_torchserve_cpp
@@ -22,12 +22,14 @@ bool Backend::Initialize(const std::string &model_dir) {
   // TODO: windows
   TS_LOGF(DEBUG, "Initializing from manifest: {}", manifest_file);
   if (!manifest_->Initialize(manifest_file)) {
+    TS_LOGF(ERROR, "Could not initialize from manifest: {}", manifest_file);
     return false;
   }
 
   LoadHandler(model_dir);
 
   if (!handler_) {
+    TS_LOG(ERROR, "Could not load handler");
     return false;
   }
 
 
@@ -1,4 +1,6 @@
 
+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../../test/resources/ DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/)
+
 add_subdirectory("../../../examples/cpp/babyllama/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/babyllama/babyllama_handler/")
 
 add_subdirectory("../../../examples/cpp/llamacpp/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/llamacpp/llamacpp_handler/")
@@ -10,6 +12,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux")
   add_subdirectory("../../../examples/cpp/aot_inductor/llama2/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/llama_handler/")
 
   add_subdirectory("../../../examples/cpp/aot_inductor/bert" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/bert_handler/")
-  
+
   add_subdirectory("../../../examples/cpp/aot_inductor/resnet" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/resnet_handler/")
 endif()
@@ -16,29 +16,34 @@
 ARG BASE_IMAGE=ubuntu:20.04
 ARG PYTHON_VERSION=3.9
 ARG CMAKE_VERSION=3.26.4
+ARG GCC_VERSION=9
 ARG BRANCH_NAME="master"
 ARG USE_CUDA_VERSION=""
 
 FROM ${BASE_IMAGE} AS cpp-dev-image
 ARG BASE_IMAGE
 ARG PYTHON_VERSION
 ARG CMAKE_VERSION
+ARG GCC_VERSION
 ARG BRANCH_NAME
 ARG USE_CUDA_VERSION
+ARG DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED TRUE
+ENV TZ=Etc/UTC
 
 RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
     apt-get update && \
     apt-get install software-properties-common -y && \
     add-apt-repository -y ppa:deadsnakes/ppa && \
-    DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    apt-get install --no-install-recommends -y \
         sudo \
         vim \
         git \
         curl \
         wget \
         rsync \
         gpg \
+        gcc-$GCC_VERSION \
         ca-certificates \
         lsb-release \
         openjdk-17-jdk \
@@ -51,32 +56,15 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
 RUN python$PYTHON_VERSION -m venv /home/venv
 ENV PATH="/home/venv/bin:$PATH"
 
-# Enable installation of recent cmake release
+# Enable installation of recent cmake release and pin cmake & cmake-data version
 # Ref: https://apt.kitware.com/
 RUN (wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null) \
     && (echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null) \
     && apt-get update \
     && (test -f /usr/share/doc/kitware-archive-keyring/copyright || sudo rm /usr/share/keyrings/kitware-archive-keyring.gpg) \
     && sudo apt-get install kitware-archive-keyring \
-    && rm -rf /var/lib/apt/lists/*
-
-# Pin cmake and cmake-data version
-# Ref: https://manpages.ubuntu.com/manpages/xenial/man5/apt_preferences.5.html
-RUN echo "Package: cmake\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake
-RUN echo "Package: cmake-data\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake-data
-
-# Install CUDA toolkit to enable "libtorch" build with GPU support
-RUN apt-get update && \
-    if echo "$BASE_IMAGE" | grep -q "cuda:"; then \
-        if [ "$USE_CUDA_VERSION" = "cu121" ]; then \
-            apt-get -y install cuda-toolkit-12-1; \
-        elif [ "$USE_CUDA_VERSION" = "cu118" ]; then \
-            apt-get -y install cuda-toolkit-11-8; \
-        else \
-            echo "Cuda version not supported by CPP backend: $USE_CUDA_VERSION"; \
-            exit 1; \
-        fi; \
-    fi \
+    && echo "Package: cmake\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake \
+    && echo "Package: cmake-data\nPin: version $CMAKE_VERSION*\nPin-Priority: 1001" > /etc/apt/preferences.d/cmake-data \
     && rm -rf /var/lib/apt/lists/*
 
 RUN git clone --recursive https://github.com/pytorch/serve.git \
 
@@ -41,6 +41,7 @@ Use `build_image.sh` script to build the docker images. The script builds the `p
 |-t, --tag|Tag name for image. If not specified, script uses torchserve default tag names.|
 |-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118`. `cu121`, Default `cu121`|
 |-ipex, --build-with-ipex| Specify to build with intel_extension_for_pytorch. If not specified, script builds without intel_extension_for_pytorch.|
+|-cpp, --build-cpp specify to build TorchServe CPP|
 |-n, --nightly| Specify to build with TorchServe nightly.|
 |-py, --pythonversion| Specify the python version to use. Supported values `3.8`, `3.9`, `3.10`, `3.11`. Default `3.9`|
 
@@ -147,6 +148,16 @@ Creates a docker image with `torchserve` and `torch-model-archiver` installed fr
 ./build_image.sh -bt dev -ipex -t torchserve-ipex:1.0
 ```
 
+ - For creating image to build Torchserve CPP with CPU support:
+```bash
+./build_image.sh -bt dev -cpp
+```
+
+- For creating image to build Torchserve CPP with GPU support:
+```bash
+./build_image.sh -bt dev -g [-cv cu121|cu118] -cpp
+```
+
 
 ## Start a container with a TorchServe image
 
 
@@ -174,9 +174,14 @@ then
 
   if [[ "${MACHINE}" == "gpu" || "${CUDA_VERSION}" != "" ]];
   then
-    if [[ "${CUDA_VERSION}" != "cu121" && "${CUDA_VERSION}" != "cu118" ]];
+    if [ "${CUDA_VERSION}" == "cu121" ];
     then
-      echo "Only cuda versions 12.1 and 11.8 are supported for CPP"
+      BASE_IMAGE="nvidia/cuda:12.1.1-devel-ubuntu20.04"
+    elif [ "${CUDA_VERSION}" == "cu118" ];
+    then
+      BASE_IMAGE="nvidia/cuda:11.8.0-devel-ubuntu20.04"
+    else
+      echo "Cuda version $CUDA_VERSION is not supported for CPP"
       exit 1
     fi
   fi
Original file line number	Diff line number	Diff line change
`@@ -22,12 +22,14 @@ bool Backend::Initialize(const std::string &model_dir) {`
`22`	`22`	`// TODO: windows`
`23`	`23`	`TS_LOGF(DEBUG, "Initializing from manifest: {}", manifest_file);`
`24`	`24`	`if (!manifest_->Initialize(manifest_file)) {`
	`25`	`+ TS_LOGF(ERROR, "Could not initialize from manifest: {}", manifest_file);`
`25`	`26`	`return false;`
`26`	`27`	`}`
`27`	`28`
`28`	`29`	`LoadHandler(model_dir);`
`29`	`30`
`30`	`31`	`if (!handler_) {`
	`32`	`+ TS_LOG(ERROR, "Could not load handler");`
`31`	`33`	`return false;`
`32`	`34`	`}`
`33`	`35`