Skip to content

Commit 4c7080d

Browse files
committed
Merge branch 'api_change' of https://github.com/pytorch/serve into api_change
2 parents 55cedd5 + 599c635 commit 4c7080d

File tree

82 files changed

+2116
-339
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+2116
-339
lines changed

.github/workflows/benchmark_nightly.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ jobs:
2424
cd ..
2525
pwd
2626
rm -rf _tool
27-
- name: Setup Python 3.8
28-
uses: actions/setup-python@v4
27+
- name: Setup Python 3.9
28+
uses: actions/setup-python@v5
2929
with:
30-
python-version: 3.8
30+
python-version: 3.9
3131
architecture: x64
3232
- name: Setup Java 17
3333
uses: actions/setup-java@v3

.github/workflows/ci_cpu.yml

+10-4
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,18 @@ jobs:
2121
strategy:
2222
fail-fast: false
2323
matrix:
24-
os: [ubuntu-20.04, macOS-latest]
24+
os: [ubuntu-20.04, macOS-latest, macos-14]
2525
steps:
26-
- name: Setup Python 3.8
27-
uses: actions/setup-python@v4
26+
- name: Setup Python for M1
27+
if: matrix.os == 'macos-14'
28+
uses: actions/setup-python@v5
2829
with:
29-
python-version: 3.8
30+
python-version: '3.10'
31+
- name: Setup Python for all other OS
32+
if: matrix.os != 'macos-14'
33+
uses: actions/setup-python@v5
34+
with:
35+
python-version: 3.9
3036
architecture: x64
3137
- name: Setup Java 17
3238
uses: actions/setup-java@v3

.github/workflows/ci_gpu.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ jobs:
2727
# deletes all hidden files in a directory
2828
sudo rm -rf ./.??* || true
2929
ls -la ./
30-
- name: Setup Python 3.8
31-
uses: actions/setup-python@v4
30+
- name: Setup Python 3.9
31+
uses: actions/setup-python@v5
3232
with:
33-
python-version: 3.8
33+
python-version: 3.9
3434
architecture: x64
3535
- name: Setup Java 17
3636
uses: actions/setup-java@v3

.github/workflows/codeql.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@ jobs:
3636
uses: actions/checkout@v3
3737
with:
3838
submodules: recursive
39-
40-
- name: Setup Python 3.8
41-
uses: actions/setup-python@v4
39+
40+
- name: Setup Python 3.9
41+
uses: actions/setup-python@v5
4242
with:
43-
python-version: 3.8
43+
python-version: 3.9
4444
architecture: x64
4545
- name: Setup Java 17
4646
uses: actions/setup-java@v3

.github/workflows/doc-automation.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ jobs:
88
build_docs_job:
99
runs-on: ubuntu-20.04
1010
steps:
11-
- name: Setup Python 3.8
12-
uses: actions/setup-python@v4
11+
- name: Setup Python 3.9
12+
uses: actions/setup-python@v5
1313
with:
14-
python-version: 3.8
14+
python-version: 3.9
1515
architecture: x64
1616
- name: Checkout
1717
uses: actions/checkout@v3

.github/workflows/docker-nightly-build.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ jobs:
1515
sudo rm -rf ./* || true
1616
sudo rm -rf ./.??* || true
1717
ls -la ./
18-
- name: Setup Python 3.8
19-
uses: actions/setup-python@v4
18+
- name: Setup Python 3.9
19+
uses: actions/setup-python@v5
2020
with:
21-
python-version: 3.8
21+
python-version: 3.9
2222
architecture: x64
2323
- name: Checkout TorchServe
2424
uses: actions/checkout@v3

.github/workflows/kserve_cpu_tests.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ jobs:
2424
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
2525
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
2626
echo "/usr/local/bin" >> $GITHUB_PATH
27-
- name: Setup Python 3.8
28-
uses: actions/setup-python@v4
27+
- name: Setup Python 3.9
28+
uses: actions/setup-python@v5
2929
with:
30-
python-version: 3.8
30+
python-version: 3.9
3131
architecture: x64
3232
- name: Install grpcurl
3333
run: |

.github/workflows/lint.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ jobs:
1414
mypy:
1515
runs-on: ubuntu-20.04
1616
steps:
17-
- name: Setup Python 3.8
18-
uses: actions/setup-python@v4
17+
- name: Setup Python 3.9
18+
uses: actions/setup-python@v5
1919
with:
20-
python-version: 3.8
20+
python-version: 3.9
2121
architecture: x64
2222
- name: Checkout TorchServe
2323
uses: actions/checkout@v3

.github/workflows/official_release_docker.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ jobs:
2525
sudo rm -rf ./* || true
2626
sudo rm -rf ./.??* || true
2727
ls -la ./
28-
- name: Setup Python 3.8
29-
uses: actions/setup-python@v4
28+
- name: Setup Python 3.9
29+
uses: actions/setup-python@v5
3030
with:
31-
python-version: 3.8
31+
python-version: 3.9
3232
architecture: x64
3333
- name: Checkout TorchServe
3434
uses: actions/checkout@v3

.github/workflows/regression_tests_cpu.yml

+11-5
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,23 @@ concurrency:
1515

1616
jobs:
1717
regression-cpu:
18-
# creates workflows for OS: ubuntu, macOS
18+
# creates workflows for OS: ubuntu, macOS, macOS M1
1919
runs-on: ${{ matrix.os }}
2020
strategy:
2121
fail-fast: false
2222
matrix:
23-
os: [ubuntu-20.04, macOS-latest]
23+
os: [ubuntu-20.04, macOS-latest, macos-14]
2424
steps:
25-
- name: Setup Python 3.8
26-
uses: actions/setup-python@v3
25+
- name: Setup Python for M1
26+
if: matrix.os == 'macos-14'
27+
uses: actions/setup-python@v5
2728
with:
28-
python-version: 3.8
29+
python-version: '3.10'
30+
- name: Setup Python for all other OS
31+
if: matrix.os != 'macos-14'
32+
uses: actions/setup-python@v5
33+
with:
34+
python-version: 3.9
2935
architecture: x64
3036
- name: Setup Java 17
3137
uses: actions/setup-java@v3

.github/workflows/regression_tests_gpu.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ jobs:
3030
run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt-get update && sudo apt-get install git -y
3131
- name: Check git version
3232
run: git --version
33-
- name: Setup Python 3.8
34-
uses: actions/setup-python@v3
33+
- name: Setup Python 3.9
34+
uses: actions/setup-python@v5
3535
with:
36-
python-version: 3.8
36+
python-version: 3.9
3737
architecture: x64
3838
- name: Setup Java 17
3939
uses: actions/setup-java@v3

.gitmodules

+3
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,6 @@
77
[submodule "cpp/third-party/llama2.c"]
88
path = cpp/third-party/llama2.c
99
url = https://github.com/karpathy/llama2.c
10+
[submodule "cpp/third-party/llama2.so"]
11+
path = cpp/third-party/llama2.so
12+
url = https://github.com/mreso/llama2.so.git

README.md

+10-8
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
![Docker Nightly build](https://github.com/pytorch/serve/actions/workflows/docker-nightly-build.yml/badge.svg)
55
![Benchmark Nightly](https://github.com/pytorch/serve/actions/workflows/benchmark_nightly.yml/badge.svg)
66
![Docker Regression Nightly](https://github.com/pytorch/serve/actions/workflows/regression_tests_docker.yml/badge.svg)
7+
![KServe Regression Nightly](https://github.com/pytorch/serve/actions/workflows/kserve_cpu_tests.yml/badge.svg)
78

89
TorchServe is a flexible and easy-to-use tool for serving and scaling PyTorch models in production.
910

@@ -55,26 +56,27 @@ docker pull pytorch/torchserve-nightly
5556
Refer to [torchserve docker](docker/README.md) for details.
5657

5758
## ⚡ Why TorchServe
58-
* Write once, run anywhere, on-prem, on-cloud, supports inference on CPUs, GPUs, AWS Inf1/Inf2/Trn1, Google Cloud TPUs, [Nvidia MPS](master/docs/nvidia_mps.md)
59+
* Write once, run anywhere, on-prem, on-cloud, supports inference on CPUs, GPUs, AWS Inf1/Inf2/Trn1, Google Cloud TPUs, [Nvidia MPS](docs/nvidia_mps.md)
5960
* [Model Management API](docs/management_api.md): multi model management with optimized worker to model allocation
6061
* [Inference API](docs/inference_api.md): REST and gRPC support for batched inference
6162
* [TorchServe Workflows](examples/Workflows/README.md): deploy complex DAGs with multiple interdependent models
6263
* Default way to serve PyTorch models in
6364
* [Sagemaker](https://aws.amazon.com/blogs/machine-learning/serving-pytorch-models-in-production-with-the-amazon-sagemaker-native-torchserve-integration/)
6465
* [Vertex AI](https://cloud.google.com/blog/topics/developers-practitioners/pytorch-google-cloud-how-deploy-pytorch-models-vertex-ai)
65-
* [Kubernetes](master/kubernetes) with support for [autoscaling](kubernetes#session-affinity-with-multiple-torchserve-pods), session-affinity, monitoring using Grafana works on-prem, AWS EKS, Google GKE, Azure AKS
66+
* [Kubernetes](kubernetes) with support for [autoscaling](kubernetes#session-affinity-with-multiple-torchserve-pods), session-affinity, monitoring using Grafana works on-prem, AWS EKS, Google GKE, Azure AKS
6667
* [Kserve](https://kserve.github.io/website/0.8/modelserving/v1beta1/torchserve/): Supports both v1 and v2 API, [autoscaling and canary deployments](kubernetes/kserve/README.md#autoscaling) for A/B testing
67-
* [Kubeflow](https://v0-5.kubeflow.org/docs/components/pytorchserving/)
68+
* [Kubeflow](https://v0-5.kubeflow.org/docs/components/pytorchserving/)
6869
* [MLflow](https://github.com/mlflow/mlflow-torchserve)
6970
* Export your model for optimized inference. Torchscript out of the box, [PyTorch Compiler](examples/pt2/README.md) preview, [ORT and ONNX](https://github.com/pytorch/serve/blob/master/docs/performance_guide.md), [IPEX](https://github.com/pytorch/serve/tree/master/examples/intel_extension_for_pytorch), [TensorRT](https://github.com/pytorch/serve/blob/master/docs/performance_guide.md), [FasterTransformer](https://github.com/pytorch/serve/tree/master/examples/FasterTransformer_HuggingFace_Bert), FlashAttention (Better Transformers)
7071
* [Performance Guide](docs/performance_guide.md): builtin support to optimize, benchmark, and profile PyTorch and TorchServe performance
7172
* [Expressive handlers](CONTRIBUTING.md): An expressive handler architecture that makes it trivial to support inferencing for your use case with [many supported out of the box](https://github.com/pytorch/serve/tree/master/ts/torch_handler)
72-
* [Metrics API](docs/metrics.md): out-of-the-box support for system-level metrics with [Prometheus exports](https://github.com/pytorch/serve/tree/master/examples/custom_metrics), custom metrics,
73+
* [Metrics API](docs/metrics.md): out-of-the-box support for system-level metrics with [Prometheus exports](https://github.com/pytorch/serve/tree/master/examples/custom_metrics), custom metrics,
7374
* [Large Model Inference Guide](docs/large_model_inference.md): With support for GenAI, LLMs including
75+
* [SOTA GenAI performance](https://github.com/pytorch/serve/tree/master/examples/pt2#torchcompile-genai-examples) using `torch.compile`
7476
* Fast Kernels with FlashAttention v2, continuous batching and streaming response
75-
* PyTorch [Tensor Parallel](examples/large_models/tp_llama) preview, [Pipeline Parallel](examples/large_models/Huggingface_pippy)
76-
* Microsoft [DeepSpeed](examples/large_models/deepspeed), [DeepSpeed-Mii](examples/large_models/deepspeed_mii)
77-
* Hugging Face [Accelerate](large_models/Huggingface_accelerate), [Diffusers](examples/diffusers)
77+
* PyTorch [Tensor Parallel](examples/large_models/tp_llama) preview, [Pipeline Parallel](examples/large_models/Huggingface_pippy)
78+
* Microsoft [DeepSpeed](examples/large_models/deepspeed), [DeepSpeed-Mii](examples/large_models/deepspeed_mii)
79+
* Hugging Face [Accelerate](examples/large_models/Huggingface_accelerate), [Diffusers](examples/diffusers)
7880
* Running large models on AWS [Sagemaker](https://docs.aws.amazon.com/sagemaker/latest/dg/large-model-inference-tutorials-torchserve.html) and [Inferentia2](https://pytorch.org/blog/high-performance-llama/)
7981
* Running [Llama 2 Chatbot locally on Mac](examples/LLM/llama2)
8082
* Monitoring using Grafana and [Datadog](https://www.datadoghq.com/blog/ai-integrations/#model-serving-and-deployment-vertex-ai-amazon-sagemaker-torchserve)
@@ -113,7 +115,7 @@ To learn more about how to contribute, see the contributor guide [here](https://
113115
## 📰 News
114116
* [High performance Llama 2 deployments with AWS Inferentia2 using TorchServe](https://pytorch.org/blog/high-performance-llama/)
115117
* [Naver Case Study: Transition From High-Cost GPUs to Intel CPUs and oneAPI powered Software with performance](https://pytorch.org/blog/ml-model-server-resource-saving/)
116-
* [Run multiple generative AI models on GPU using Amazon SageMaker multi-model endpoints with TorchServe and save up to 75% in inference costs](https://aws.amazon.com/blogs/machine-learning/run-multiple-generative-ai-models-on-gpu-using-amazon-sagemaker-multi-model-endpoints-with-torchserve-and-save-up-to-75-in-inference-costs/)
118+
* [Run multiple generative AI models on GPU using Amazon SageMaker multi-model endpoints with TorchServe and save up to 75% in inference costs](https://pytorch.org/blog/amazon-sagemaker-w-torchserve/)
117119
* [Deploying your Generative AI model in only four steps with Vertex AI and PyTorch](https://cloud.google.com/blog/products/ai-machine-learning/get-your-genai-model-going-in-four-easy-steps)
118120
* [PyTorch Model Serving on Google Cloud TPU v5](https://cloud.google.com/tpu/docs/v5e-inference#pytorch-model-inference-and-serving)
119121
* [Monitoring using Datadog](https://www.datadoghq.com/blog/ai-integrations/#model-serving-and-deployment-vertex-ai-amazon-sagemaker-torchserve)

benchmarks/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ The benchmarks measure the performance of TorchServe on various models and bench
44

55
We currently support benchmarking with JMeter, Apache Bench and Locust. One can also profile backend code with snakeviz.
66

7-
* [Benchmarking with Apache Bench](#benchmarking-with-apache-bench)
7+
* [Benchmarking with Locust/Apache Bench](#benchmarking-with-locustapache-bench)
88
* [Auto Benchmarking with Apache Bench](#auto-benchmarking-with-apache-bench)
99
* [Benchmarking and Profiling with JMeter](jmeter.md)
1010

cpp/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
1+
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
22
project(torchserve_cpp VERSION 0.1)
33

44
set(CMAKE_CXX_STANDARD 17)

0 commit comments

Comments
 (0)