pytorch
diff --git a/‎.github/ISSUE_TEMPLATE/bug.yml
+5-5 b/‎.github/ISSUE_TEMPLATE/bug.yml
+5-5
diff --git a/‎.github/workflows/ci_cpu.yml
+8-5 b/‎.github/workflows/ci_cpu.yml
+8-5
diff --git a/‎.github/workflows/ci_gpu.yml
+1-1 b/‎.github/workflows/ci_gpu.yml
+1-1
diff --git a/‎.github/workflows/regression_tests_cpu.yml
+8-5 b/‎.github/workflows/regression_tests_cpu.yml
+8-5
diff --git a/‎.github/workflows/regression_tests_cpu_binaries.yml
+14-19 b/‎.github/workflows/regression_tests_cpu_binaries.yml
+14-19
diff --git a/‎.github/workflows/regression_tests_docker.yml
+3 b/‎.github/workflows/regression_tests_docker.yml
+3
diff --git a/‎.github/workflows/regression_tests_gpu_binaries.yml
+4-9 b/‎.github/workflows/regression_tests_gpu_binaries.yml
+4-9
diff --git a/‎CONTRIBUTING.md
+16-2 b/‎CONTRIBUTING.md
+16-2
diff --git a/‎README.md
+3-3 b/‎README.md
+3-3
diff --git a/‎SECURITY.md
+13-14 b/‎SECURITY.md
+13-14
diff --git a/‎binaries/conda/build_packages.py
+7-1 b/‎binaries/conda/build_packages.py
+7-1
diff --git a/‎docs/configuration.md
+10-2 b/‎docs/configuration.md
+10-2
@@ -40,20 +40,20 @@ body:
         Did you install torchserve from source? Are you using Docker?
       placeholder: |
         Install torchserve from source: No
-        Are you using Docker: Yes I ran ./build_image.sh     
+        Are you using Docker: Yes I ran ./build_image.sh
     validations:
       required: true
-    
+
   - type: textarea
     attributes:
-      label: Model Packaing
+      label: Model Packaging
       description: |
         Please describe how you packaged your model
       placeholder: |
         Link to builtin handler or example you used or link to a repo or gist with your custom handler or step by step instructions with torch-model-archiver
     validations:
       required: true
-  
+
   - type: textarea
     attributes:
       label: config.properties
@@ -86,7 +86,7 @@ body:
         torchserve --start
         ```
     validations:
-      required: true 
+      required: true
 
   - type: textarea
     attributes:
 
@@ -21,18 +21,19 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-20.04, macOS-latest]
+        os: [ubuntu-20.04, macos-latest]
     steps:
       - name: Setup Python for M1
-        if: matrix.os == 'macos-14'
+        if: matrix.os == 'macos-latest'
         uses: actions/setup-python@v5
         with:
           python-version: '3.10'
+          architecture: arm64
       - name: Setup Python for all other OS
-        if: matrix.os != 'macos-14'
+        if: matrix.os != 'macos-latest'
         uses: actions/setup-python@v5
         with:
-          python-version: 3.9
+          python-version: '3.9'
           architecture: x64
       - name: Setup Java 17
         uses: actions/setup-java@v3
@@ -47,7 +48,9 @@ jobs:
         run: |
           python ts_scripts/install_dependencies.py --environment=dev
       - name: Torchserve Sanity
-        uses: nick-fields/retry@v2
+        env:
+          TS_MAC_ARM64_CPU_ONLY: ${{ matrix.os == 'macos-latest' && 'True' || 'False' }}
+        uses: nick-fields/retry@v3
         with:
           timeout_minutes: 60
           max_attempts: 3
 
@@ -45,7 +45,7 @@ jobs:
         run: |
           python ts_scripts/install_dependencies.py --environment=dev --cuda=cu121
       - name: Torchserve Sanity
-        uses: nick-fields/retry@v2
+        uses: nick-fields/retry@v3
         with:
           timeout_minutes: 60
           retry_on: error
 
@@ -15,23 +15,24 @@ concurrency:
 
 jobs:
   regression-cpu:
-    # creates workflows for OS: ubuntu, macOS, macOS M1
+    # creates workflows for OS: ubuntu, macOS M1
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-20.04, macOS-latest]
+        os: [ubuntu-20.04, macos-latest]
     steps:
       - name: Setup Python for M1
-        if: matrix.os == 'macos-14'
+        if: matrix.os == 'macos-latest'
         uses: actions/setup-python@v5
         with:
           python-version: '3.10'
+          architecture: arm64
       - name: Setup Python for all other OS
-        if: matrix.os != 'macos-14'
+        if: matrix.os != 'macos-latest'
         uses: actions/setup-python@v5
         with:
-          python-version: 3.9
+          python-version: '3.9'
           architecture: x64
       - name: Setup Java 17
         uses: actions/setup-java@v3
@@ -46,5 +47,7 @@ jobs:
         run: |
           python ts_scripts/install_dependencies.py --environment=dev
       - name: Torchserve Regression Tests
+        env:
+          TS_MAC_ARM64_CPU_ONLY: ${{ matrix.os == 'macos-latest' && 'True' || 'False' }}
         run: |
           python test/regression_tests.py
@@ -16,7 +16,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-20.04, macOS-latest]
+        os: [ubuntu-20.04, macos-latest]
         python-version: ["3.8", "3.9", "3.10"]
         binaries: ["pypi", "conda"]
         exclude:
@@ -31,38 +31,33 @@ jobs:
         with:
           submodules: recursive
       - name: Setup conda with Python ${{ matrix.python-version }}
-        if: matrix.os == 'macos-14'
         uses: conda-incubator/setup-miniconda@v3
         with:
           auto-update-conda: true
           channels: anaconda, conda-forge
           python-version: ${{ matrix.python-version }}
-      - name: Setup conda with Python ${{ matrix.python-version }}
-        if: matrix.os != 'macos-14'
-        uses: s-weigand/setup-conda@v1
-        with:
-          update-conda: true
-          python-version: ${{ matrix.python-version }}
-          conda-channels: anaconda, conda-forge
       - name: Setup Java 17
         uses: actions/setup-java@v3
         with:
           distribution: 'zulu'
           java-version: '17'
       - name: Checkout TorchServe
         uses: actions/checkout@v3
-      - name: Run install dependencies and regression test
-        if: matrix.os == 'macos-14'
-        shell: bash -el {0}
-        run: |
-          conda info
-          python ts_scripts/install_dependencies.py --environment=dev
-          python test/regression_tests.py --binaries --${{ matrix.binaries }} --nightly
       - name: Install dependencies
-        if: matrix.os != 'macos-14'
+        shell: bash -el {0}
         run: |
+          echo "=====CHECK ENV AND PYTHON VERSION===="
+          conda info --envs
+          python --version
+          echo "=====RUN INSTALL DEPENDENCIES===="
           python ts_scripts/install_dependencies.py --environment=dev
-      - name: Validate Torchserve CPU Regression
-        if: matrix.os != 'macos-14'
+      - name: Torchserve Regression Tests
+        shell: bash -el {0}
+        env:
+          TS_MAC_ARM64_CPU_ONLY: ${{ matrix.os == 'macos-latest' && 'True' || 'False' }}
         run: |
+          echo "=====CHECK ENV AND PYTHON VERSION===="
+          conda info --envs
+          python --version
+          echo "=====RUN REGRESSION TESTS===="
           python test/regression_tests.py --binaries --${{ matrix.binaries }} --nightly
@@ -1,6 +1,9 @@
 name: Run Regression Tests on Docker
 
 on:
+  push:
+    tags:
+      - docker
   workflow_dispatch:
   # run every day at 5:15am
   schedule:
 
@@ -39,12 +39,7 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
           architecture: x64
-      - name: Setup Conda
-        uses: s-weigand/setup-conda@v1
-        with:
-          update-conda: true
-          python-version: ${{ matrix.python-version }}
-          conda-channels: anaconda, conda-forge
+      - run: python --version
       - run: conda --version
       - name: Setup Java 17
         uses: actions/setup-java@v3
@@ -53,17 +48,17 @@ jobs:
           java-version: '17'
       - name: Install dependencies
         shell: bash -el {0}
-        run: | 
+        run: |
           echo "=====CHECK ENV AND PYTHON VERSION===="
           /home/ubuntu/actions-runner/_work/serve/serve/3/condabin/conda info --envs
           python --version
           echo "=====RUN INSTALL DEPENDENCIES===="
           python ts_scripts/install_dependencies.py --environment=dev --cuda=cu121
       - name: Torchserve Regression Tests
-        shell: bash -el {0}  
+        shell: bash -el {0}
         run: |
           echo "=====CHECK ENV AND PYTHON VERSION===="
           /home/ubuntu/actions-runner/_work/serve/serve/3/condabin/conda info --envs
           python --version
           echo "=====RUN REGRESSION TESTS===="
-          python test/regression_tests.py --binaries --${{ matrix.binaries }} --nightly
+          python test/regression_tests.py --binaries --${{ matrix.binaries }} --nightly
@@ -40,7 +40,21 @@ Your contributions will fall into two categories:
 
 Once you finish implementing a feature or bug-fix, please send a Pull Request to https://github.com/pytorch/serve.
 
-For more non-technical guidance about how to contribute to PyTorch, see the Contributing Guide.
+New features should always be covered by at least one integration test.
+For guidance please have a look at our [current suite of pytest tests](https://github.com/pytorch/serve/tree/master/test/pytest) and orient yourself on a test that covers a similar use case as your new feature.
+A simplified version of an example test can be found in the [mnist template test](https://github.com/pytorch/serve/blob/master/test/pytest/test_mnist_template.py) which shows how to create a mar file on the fly and register it with TorchServe from within a test.
+You can run most tests by simply executing:
+```bash
+pytest test/pytest/test_mnist_template.py
+```
+To have a look at the TorchServe and/or test output add `-s` like this:
+```bash
+pytest -s test/pytest/test_mnist_template.py
+```
+To run only a subset or a single test from a file use `-k` like this:
+```bash
+pytest -k  test/pytest/test_mnist_template.py
+```
 
 ### Install TorchServe for development
 
@@ -50,7 +64,7 @@ Ensure that you have `python3` installed, and the user has access to the site-pa
 
 Run the following script from the top of the source directory.
 
-NOTE: This script force reinstalls `torchserve`, `torch-model-archiver` and `torch-workflow-archiver` if existing installations are found
+NOTE: This script force re-installs `torchserve`, `torch-model-archiver` and `torch-workflow-archiver` if existing installations are found
 
 #### For Debian Based Systems/ MacOS
 
 
@@ -79,7 +79,7 @@ Refer to [torchserve docker](docker/README.md) for details.
   * Microsoft [DeepSpeed](examples/large_models/deepspeed), [DeepSpeed-Mii](examples/large_models/deepspeed_mii)
   * Hugging Face [Accelerate](examples/large_models/Huggingface_accelerate), [Diffusers](examples/diffusers)
   * Running large models on AWS [Sagemaker](https://docs.aws.amazon.com/sagemaker/latest/dg/large-model-inference-tutorials-torchserve.html) and [Inferentia2](https://pytorch.org/blog/high-performance-llama/)
-  * Running [Llama 2 Chatbot locally on Mac](examples/LLM/llama2)
+  * Running [Meta Llama Chatbot locally on Mac](examples/LLM/llama)
 * Monitoring using Grafana and [Datadog](https://www.datadoghq.com/blog/ai-integrations/#model-serving-and-deployment-vertex-ai-amazon-sagemaker-torchserve)
 
 
@@ -90,8 +90,8 @@ Refer to [torchserve docker](docker/README.md) for details.
 
 
 ## 🏆 Highlighted Examples
-* [Serving Llama 2 with TorchServe](examples/LLM/llama2/README.md)
-* [Chatbot with Llama 2 on Mac 🦙💬](examples/LLM/llama2/chat_app)
+* [Serving Meta Llama with TorchServe](examples/LLM/llama/README.md)
+* [Chatbot with Meta Llama on Mac 🦙💬](examples/LLM/llama/chat_app)
 * [🤗 HuggingFace Transformers](examples/Huggingface_Transformers) with a [Better Transformer Integration/ Flash Attention & Xformer Memory Efficient ](examples/Huggingface_Transformers#Speed-up-inference-with-Better-Transformer)
 * [Stable Diffusion](examples/diffusers)
 * [Model parallel inference](examples/Huggingface_Transformers#model-parallelism)
 
@@ -4,12 +4,14 @@
 
 | Version | Supported          |
 |---------| ------------------ |
-| 0.10.0   | :white_check_mark: |
+| 0.11.0   | :white_check_mark: |
 
 
 ## How we do security
 
-TorchServe as much as possible relies on automated tools to do security scanning, in particular we support
+
+As much as possible, TorchServe relies on automated tools to do security scanning. In particular, we support:
+
 1. Dependency Analysis: Using Dependabot
 2. Docker Scanning: Using Snyk
 3. Code Analysis: Using CodeQL
@@ -23,22 +25,22 @@ TorchServe as much as possible relies on automated tools to do security scanning
     These ports are accessible to `localhost` by default.  The addresses can be configured by following the guides for
     [HTTP](https://github.com/pytorch/serve/blob/master/docs/configuration.md#configure-torchserve-listening-address-and-port) and
     [gRPC](https://github.com/pytorch/serve/blob/master/docs/configuration.md#configure-torchserve-grpc-listening-addresses-and-ports).
-    TorchServe does not prevent users from configuring the address to be any value, including the wildcard address `0.0.0.0`.
+    TorchServe does not prevent users from configuring the address to be of any value, including the wildcard address `0.0.0.0`.
     Please be aware of the security risks of configuring the address to be `0.0.0.0`, this will give all addresses(including publicly accessible addresses, if any)
-    on the host, access to the TorchServer endpoints listening on the ports shown above.
-2. TorchServe's Docker image is configured to expose the ports `8080`, `8081`, `8082`, `7070`, `7071` to the host by [default](https://github.com/pytorch/serve/blob/master/docker/Dockerfile). When starting the container,
-   make sure to map the ports exposed by the container to `localhost` ports or a specific IP address as shown in this [security guideline](https://github.com/pytorch/serve/blob/master/docker/README.md#security-guideline).
+    on the host, access to the TorchServe endpoints listening on the ports shown above.
+2. By [default](https://github.com/pytorch/serve/blob/master/docker/Dockerfile), TorchServe's Docker image is configured to expose the ports `8080`, `8081`, `8082`, `7070`, `7071` to the host. When starting the container,
+   map the ports exposed by the container to `localhost` ports or a specific IP address, as shown in this [security guideline](https://github.com/pytorch/serve/blob/master/docker/README.md#security-guideline).
 
 3. Be sure to validate the authenticity of the `.mar` file being used with TorchServe.
     1. A `.mar` file being downloaded from the internet from an untrustworthy source may have malicious code, compromising the integrity of your application.
-    2. TorchServe executes arbitrary python code packaged in the `mar` file. Make sure that you've either audited that the code you're using is safe and/or is from a source that you trust.
-    3. Torchserve supports custom [plugins](https://github.com/pytorch/serve/tree/master/plugins) and [handlers](https://github.com/pytorch/serve/blob/master/docs/custom_service.md).
+    2. TorchServe executes the arbitrary python code packaged in the `mar` file. Make sure that you've either audited that the code you're using is safe and/or is from a source that you trust.
+    3. TorchServe supports custom [plugins](https://github.com/pytorch/serve/tree/master/plugins) and [handlers](https://github.com/pytorch/serve/blob/master/docs/custom_service.md).
        These can be utilized to extend TorchServe functionality to perform runtime security scanning using tools such as:
         - Clamd: https://pypi.org/project/clamd/
         - VirusTotal: https://virustotal.github.io/vt-py/
         - Fickling: https://github.com/trailofbits/fickling
-    4. Running Torchserve inside a container environment and loading an untrusted `.mar` file does not guarantee isolation from a security perspective.
-4. By default TorchServe allows you to register models from all URLs. Make sure to set `allowed_urls` parameter in config.properties to restrict this. You can find more details in the [configuration guide](https://pytorch.org/serve/configuration.html#other-properties).
+    4. Running TorchServe inside a container environment and loading an untrusted `.mar` file does not guarantee isolation from a security perspective.
+4. By default, TorchServe allows you to register models from all URLs. Make sure to set `allowed_urls` parameter in config.properties to restrict this. You can find more details in the [configuration guide](https://pytorch.org/serve/configuration.html#other-properties).
     - `use_env_allowed_urls=true` is required in config.properties to read `allowed_urls` from environment variable.
 5. Enable SSL:
 
@@ -57,9 +59,6 @@ TorchServe as much as possible relies on automated tools to do security scanning
 7. If you intend to run multiple models in parallel with shared memory, it is your responsibility to ensure the models do not interact or access each other's data. The primary areas of concern are tenant isolation, resource allocation, model sharing and hardware attacks.
 8. TorchServe supports token authorization: check [documentation](https://github.com/pytorch/serve/blob/master/docs/token_authorization_api.md) for more information.
 
-
-
-
 ## Reporting a Vulnerability
 
-If you find a serious vulnerability please report it to https://www.facebook.com/whitehat and [email protected]
+If you find a vulnerability please report it to https://www.facebook.com/whitehat and [email protected]
@@ -22,7 +22,13 @@
 PACKAGES = ["torchserve", "torch-model-archiver", "torch-workflow-archiver"]
 
 # conda convert supported platforms https://docs.conda.io/projects/conda-build/en/stable/resources/commands/conda-convert.html
-PLATFORMS = ["linux-64", "osx-64", "win-64", "osx-arm64"]  # Add a new platform here
+PLATFORMS = [
+    "linux-64",
+    "osx-64",
+    "win-64",
+    "osx-arm64",
+    "linux-aarch64",
+]  # Add a new platform here
 
 if os.name == "nt":
     # Assumes miniconda is installed in windows
 
@@ -93,7 +93,7 @@ inference_address=https://127.0.0.1:8443
 inference_address=https://172.16.1.10:8080
 ```
 
-### Configure TorchServe gRPC listening addresses and ports
+### Configure TorchServe gRPC listening addresses, ports and max connection age
 The inference gRPC API is listening on port 7070, and the management gRPC API is listening on port 7071 on localhost by default.
 
 To configure different addresses use following properties
@@ -106,7 +106,15 @@ To configure different ports use following properties
 * `grpc_inference_port`: Inference gRPC API binding port. Default: 7070
 * `grpc_management_port`: management gRPC API binding port. Default: 7071
 
-Here are a couple of examples:
+To configure [max connection age](https://grpc.github.io/grpc-java/javadoc/io/grpc/netty/NettyServerBuilder.html#maxConnectionAge(long,java.util.concurrent.TimeUnit)) (milliseconds)
+
+* `grpc_inference_max_connection_age_ms`: Inference gRPC max connection age. Default: Infinite
+* `grpc_management_max_connection_age_ms`: Management gRPC max connection age. Default: Infinite
+
+To configure [max connection age grace](https://grpc.github.io/grpc-java/javadoc/io/grpc/netty/NettyServerBuilder.html#maxConnectionAgeGrace(long,java.util.concurrent.TimeUnit)) (milliseconds)
+
+* `grpc_inference_max_connection_age_grace_ms`: Inference gRPC max connection age grace. Default: Infinite
+* `grpc_management_max_connection_age_grace_ms`: Management gRPC max connection age grace. Default: Infinite
 
 ### Enable SSL