Skip to content

Commit 15a9fd9

Browse files
authoredMar 7, 2024··
Merge branch 'master' into integrate_sanity_tests_with_pytest
2 parents 064eab6 + 14e8d6f commit 15a9fd9

File tree

16 files changed

+237
-193
lines changed

16 files changed

+237
-193
lines changed
 

‎.github/workflows/ci-cpu-cpp.yml

+33-8
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,42 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
os: [ubuntu-20.04, macOS-latest]
19+
os: [ubuntu-20.04]
2020
steps:
21+
# - name: Setup Python for M1
22+
# if: matrix.os == 'macos-14'
23+
# uses: actions/setup-python@v5
24+
# with:
25+
# python-version: '3.10'
26+
- name: Setup Python for all other OS
27+
if: matrix.os != 'macos-14'
28+
uses: actions/setup-python@v5
29+
with:
30+
python-version: 3.9
31+
architecture: x64
32+
- name: Setup Java 17
33+
uses: actions/setup-java@v3
34+
with:
35+
distribution: 'zulu'
36+
java-version: '17'
2137
- name: Checkout TorchServe
22-
uses: actions/checkout@v2
23-
- name: Install libtorch - macOS
24-
if: matrix.os == 'macOS-latest'
25-
run: |
26-
brew install libtorch
38+
uses: actions/checkout@v3
39+
with:
40+
submodules: recursive
41+
# - name: Install libtorch - macOS
42+
# if: matrix.os == 'macOS-latest'
43+
# run: |
44+
# brew install libtorch
2745
- name: Install dependencies
2846
run: |
29-
python ts_scripts/install_dependencies.py --environment=dev --cpp
47+
sudo apt update && python ts_scripts/install_dependencies.py --environment=dev --cpp
48+
- name: Install TorchServe
49+
run: |
50+
python ts_scripts/install_from_src.py
51+
- name: Print Env
52+
run: |
53+
python ts_scripts/print_env_info.py
3054
- name: Build
3155
run: |
32-
cd cpp && ./build.sh
56+
cd cpp && rm -rf _build && sudo mkdir /mnt/_build && sudo chmod 777 /mnt/_build && mkdir _build && sudo mount --bind /mnt/_build _build
57+
./build.sh

‎.gitmodules

-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
[submodule "third_party/google/rpc"]
22
path = third_party/google/rpc
33
url = https://github.com/googleapis/googleapis.git
4-
[submodule "cpp/third-party/llama.cpp"]
5-
path = cpp/third-party/llama.cpp
6-
url = https://github.com/ggerganov/llama.cpp.git
74
[submodule "cpp/third-party/llama2.c"]
85
path = cpp/third-party/llama2.c
96
url = https://github.com/karpathy/llama2.c

‎cpp/CMakeLists.txt

+15-4
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@ if(CLANG_FORMAT_EXE)
1818
${PROJECT_SOURCE_DIR}/test/*.hh
1919
)
2020

21-
add_custom_target(format
22-
COMMAND
23-
${CLANG_FORMAT_EXE} -i -style=google ${ALL_CXX_SOURCE_FILES}
24-
)
2521
endif()
2622

2723

@@ -31,6 +27,21 @@ find_package(fmt REQUIRED)
3127
find_package(gflags REQUIRED)
3228
find_package(Torch REQUIRED)
3329

30+
include(FetchContent)
31+
32+
FetchContent_Declare(
33+
yaml-cpp
34+
GIT_REPOSITORY https://github.com/jbeder/yaml-cpp.git
35+
GIT_TAG 0.8.0 # Can be a tag (yaml-cpp-x.x.x), a commit hash, or a branch name (master)
36+
)
37+
FetchContent_GetProperties(yaml-cpp)
38+
39+
if(NOT yaml-cpp_POPULATED)
40+
message(STATUS "Fetching yaml-cpp...")
41+
FetchContent_Populate(yaml-cpp)
42+
add_subdirectory(${yaml-cpp_SOURCE_DIR} ${yaml-cpp_BINARY_DIR})
43+
endif()
44+
3445
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
3546

3647
include_directories(${TORCH_INCLUDE_DIRS})

‎cpp/README.md

+12-14
Original file line numberDiff line numberDiff line change
@@ -5,36 +5,34 @@
55
* cmake version: 3.18+
66
## Installation and Running TorchServe CPP
77

8+
This installation instruction assumes that TorchServe is already installed through pip/conda/source. If this is not the case install it after the `Install dependencies` step through your preferred method.
9+
810
### Install dependencies
911
```
1012
cd serve
1113
python ts_scripts/install_dependencies.py --cpp --environment dev [--cuda=cu121|cu118]
1214
```
1315
### Building the backend
16+
Don't forget to install or update TorchServe at this point if it wasn't previously installed.
1417
```
1518
## Dev Build
1619
cd cpp
1720
./build.sh [-g cu121|cu118]
1821
19-
## Install TorchServe from source
20-
cd ..
21-
python ts_scripts/install_from_src.py
22-
```
23-
### Set Environment Var
24-
#### On Mac
25-
```
26-
export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:$(pwd)/_build/_deps/libtorch/lib
27-
```
28-
#### On Ubuntu
29-
```
30-
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/_build/_deps/libtorch/lib
3122
```
3223

3324
### Run TorchServe
3425
```
3526
mkdir model_store
3627
torchserve --ncs --start --model-store model_store
3728
```
29+
30+
### Clean the build directory
31+
To clean the build directory in order to rebuild from scratch simply delete the cpp/_build directory with
32+
```
33+
rm -rf cpp/_build
34+
```
35+
3836
## Backend
3937
TorchServe cpp backend can run as a process, which is similar to [TorchServe Python backend](https://github.com/pytorch/serve/tree/master/ts). By default, TorchServe supports torch scripted model in cpp backend. Other platforms such as MxNet, ONNX can be supported through custom handlers following the TorchScript example [src/backends/handler/torch_scripted_handler.hh](https://github.com/pytorch/serve/blob/master/cpp/src/backends/handler/torch_scripted_handler.hh).
4038
### Custom Handler
@@ -89,11 +87,11 @@ python -c "import ts; from pathlib import Path; print((Path(ts.__file__).parent
8987
3. Make sure you have the right conda/venv environment activated during building that you're also using to run TorchServe.
9088
9189
Q: Build on Mac fails with `Library not loaded: @rpath/libomp.dylib`
92-
A: Install libomp with brew and link in /usr/local/lib
90+
A: Install libomp with brew and link in /usr/local/lib
9391
```bash
9492
brew install libomp
9593
sudo ln -s /opt/homebrew/opt/libomp/lib/libomp.dylib /usr/local/lib/libomp.dylib
9694
```
9795
9896
Q: When loading a handler which uses a model exported with torch._export.aot_compile the handler dies with "error: Error in dlopen: MODEL.SO : undefined symbol: SOME_SYMBOL".
99-
A: Make sure that you are using matching libtorch and Pytorch versions for inference and export, respectively.
97+
A: Make sure that you are using matching libtorch and Pytorch versions for inference and export, respectively.

‎cpp/build.sh

+28-96
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,9 @@ function install_folly() {
2828
echo -e "${COLOR_GREEN}[ INFO ] Building Folly ${COLOR_OFF}"
2929
cd $FOLLY_SRC_DIR
3030

31-
if [ "$PLATFORM" = "Linux" ]; then
32-
SUDO="sudo"
33-
elif [ "$PLATFORM" = "Mac" ]; then
34-
SUDO=""
35-
fi
36-
$SUDO ./build/fbcode_builder/getdeps.py install-system-deps --recursive
31+
./build/fbcode_builder/getdeps.py install-system-deps --recursive
3732

38-
$SUDO ./build/fbcode_builder/getdeps.py build \
33+
./build/fbcode_builder/getdeps.py build \
3934
--allow-system-packages \
4035
--scratch-path $FOLLY_BUILD_DIR \
4136
--extra-cmake-defines='{"CMAKE_CXX_FLAGS": "-fPIC -D_GLIBCXX_USE_CXX11_ABI=1"}'
@@ -47,36 +42,29 @@ function install_folly() {
4742
echo "$FOLLY_BUILD_DIR/installed"
4843
}
4944

50-
function install_kineto() {
51-
if [ "$PLATFORM" = "Linux" ]; then
52-
echo -e "${COLOR_GREEN}[ INFO ] Skip install kineto on Linux ${COLOR_OFF}"
53-
elif [ "$PLATFORM" = "Mac" ]; then
54-
KINETO_SRC_DIR=$BASE_DIR/third-party/kineto
55-
56-
if [ ! -d "$KINETO_SRC_DIR/libkineto/build" ] ; then
57-
cd $KINETO_SRC_DIR/libkineto
58-
mkdir build && cd build
59-
cmake ..
60-
make install
61-
fi
62-
fi
63-
64-
cd "$BWD" || exit
65-
}
66-
6745
function install_libtorch() {
46+
cd "$DEPS_DIR" || exit
6847
TORCH_VERSION="2.2.1"
48+
if [ -d "$DEPS_DIR/libtorch" ]; then
49+
RAW_VERSION=`cat "$DEPS_DIR/libtorch/build-version"`
50+
VERSION=`cat "$DEPS_DIR/libtorch/build-version" | cut -d "+" -f 1`
51+
if [ "$USE_NIGHTLIES" = "true" ] && [[ ! "${RAW_VERSION}" =~ .*"dev".* ]]; then
52+
rm -rf "$DEPS_DIR/libtorch"
53+
elif [ "$USE_NIGHTLIES" == "" ] && [ "$VERSION" != "$TORCH_VERSION" ]; then
54+
rm -rf "$DEPS_DIR/libtorch"
55+
fi
56+
fi
6957
if [ "$PLATFORM" = "Mac" ]; then
7058
if [ ! -d "$DEPS_DIR/libtorch" ]; then
7159
if [[ $(uname -m) == 'x86_64' ]]; then
7260
echo -e "${COLOR_GREEN}[ INFO ] Install libtorch on Mac x86_64 ${COLOR_OFF}"
73-
wget https://download.pytorch.org/libtorch/cpu/libtorch-macos-x86_64-${TORCH_VERSION}.zip
74-
unzip libtorch-macos-x86_64-${TORCH_VERSION}.zip
61+
wget -q https://download.pytorch.org/libtorch/cpu/libtorch-macos-x86_64-${TORCH_VERSION}.zip
62+
unzip -q libtorch-macos-x86_64-${TORCH_VERSION}.zip
7563
rm libtorch-macos-x86_64-${TORCH_VERSION}.zip
7664
else
7765
echo -e "${COLOR_GREEN}[ INFO ] Install libtorch on Mac arm64 ${COLOR_OFF}"
78-
wget https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-${TORCH_VERSION}.zip
79-
unzip libtorch-macos-arm64-${TORCH_VERSION}.zip
66+
wget -q https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-${TORCH_VERSION}.zip
67+
unzip -q libtorch-macos-arm64-${TORCH_VERSION}.zip
8068
rm libtorch-macos-arm64-${TORCH_VERSION}.zip
8169
fi
8270
fi
@@ -86,27 +74,17 @@ function install_libtorch() {
8674
echo -e "${COLOR_RED}[ ERROR ] Unknown platform: $PLATFORM ${COLOR_OFF}"
8775
exit 1
8876
else # Linux
89-
if [ -d "$DEPS_DIR/libtorch" ]; then
90-
RAW_VERSION=`cat "$DEPS_DIR/libtorch/build-version"`
91-
VERSION=`cat "$DEPS_DIR/libtorch/build-version" | cut -d "+" -f 1`
92-
if [ "$USE_NIGHTLIES" = "true" ] && [[ ! "${RAW_VERSION}" =~ .*"dev".* ]]; then
93-
rm -rf "$DEPS_DIR/libtorch"
94-
elif [ "$USE_NIGHTLIES" == "" ] && [ "$VERSION" != "$TORCH_VERSION" ]; then
95-
rm -rf "$DEPS_DIR/libtorch"
96-
fi
97-
fi
9877
if [ ! -d "$DEPS_DIR/libtorch" ]; then
99-
cd "$DEPS_DIR" || exit
10078
echo -e "${COLOR_GREEN}[ INFO ] Install libtorch on Linux ${COLOR_OFF}"
10179
if [ "$USE_NIGHTLIES" == true ]; then
10280
URL=https://download.pytorch.org/libtorch/nightly/${CUDA}/libtorch-cxx11-abi-shared-with-deps-latest.zip
10381
else
10482
URL=https://download.pytorch.org/libtorch/${CUDA}/libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}%2B${CUDA}.zip
10583
fi
106-
wget $URL
84+
wget -q $URL
10785
ZIP_FILE=$(basename "$URL")
10886
ZIP_FILE="${ZIP_FILE//%2B/+}"
109-
unzip $ZIP_FILE
87+
unzip -q $ZIP_FILE
11088
rm $ZIP_FILE
11189
fi
11290
echo -e "${COLOR_GREEN}[ INFO ] libtorch is installed ${COLOR_OFF}"
@@ -115,58 +93,22 @@ function install_libtorch() {
11593
cd "$BWD" || exit
11694
}
11795

118-
function install_yaml_cpp() {
119-
YAML_CPP_SRC_DIR=$BASE_DIR/third-party/yaml-cpp
120-
YAML_CPP_BUILD_DIR=$DEPS_DIR/yaml-cpp-build
121-
122-
if [ ! -d "$YAML_CPP_BUILD_DIR" ] ; then
123-
echo -e "${COLOR_GREEN}[ INFO ] Building yaml-cpp ${COLOR_OFF}"
124-
125-
if [ "$PLATFORM" = "Linux" ]; then
126-
SUDO="sudo"
127-
elif [ "$PLATFORM" = "Mac" ]; then
128-
SUDO=""
129-
fi
130-
131-
mkdir $YAML_CPP_BUILD_DIR
132-
cd $YAML_CPP_BUILD_DIR
133-
cmake $YAML_CPP_SRC_DIR -DYAML_BUILD_SHARED_LIBS=ON -DYAML_CPP_BUILD_TESTS=OFF -DCMAKE_CXX_FLAGS="-fPIC"
134-
$SUDO make install
135-
136-
echo -e "${COLOR_GREEN}[ INFO ] yaml-cpp is installed ${COLOR_OFF}"
137-
fi
138-
139-
cd "$BWD" || exit
140-
}
141-
142-
function build_llama_cpp() {
143-
BWD=$(pwd)
144-
LLAMA_CPP_SRC_DIR=$BASE_DIR/third-party/llama.cpp
145-
cd "${LLAMA_CPP_SRC_DIR}"
146-
if [ "$PLATFORM" = "Mac" ]; then
147-
make LLAMA_METAL=OFF -j
148-
else
149-
make -j
150-
fi
151-
cd "$BWD" || exit
152-
}
153-
15496
function prepare_test_files() {
15597
echo -e "${COLOR_GREEN}[ INFO ]Preparing test files ${COLOR_OFF}"
15698
local EX_DIR="${TR_DIR}/examples/"
15799
rsync -a --link-dest=../../test/resources/ ${BASE_DIR}/test/resources/ ${TR_DIR}/
158100
if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin" ]; then
159-
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin -O "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin"
101+
wget -q https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin -O "${EX_DIR}/babyllama/babyllama_handler/tokenizer.bin"
160102
fi
161103
if [ ! -f "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin" ]; then
162-
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin -O "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin"
104+
wget -q https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin -O "${EX_DIR}/babyllama/babyllama_handler/stories15M.bin"
163105
fi
164106
# PT2.2 torch.expport does not support Mac
165107
if [ "$PLATFORM" = "Linux" ]; then
166108
if [ ! -f "${EX_DIR}/aot_inductor/llama_handler/stories15M.so" ]; then
167109
local HANDLER_DIR=${EX_DIR}/aot_inductor/llama_handler/
168110
if [ ! -f "${HANDLER_DIR}/stories15M.pt" ]; then
169-
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt?download=true -O "${HANDLER_DIR}/stories15M.pt"
111+
wget -q https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt?download=true -O "${HANDLER_DIR}/stories15M.pt"
170112
fi
171113
local LLAMA_SO_DIR=${BASE_DIR}/third-party/llama2.so/
172114
PYTHONPATH=${LLAMA_SO_DIR}:${PYTHONPATH} python ${BASE_DIR}/../examples/cpp/aot_inductor/llama2/compile.py --checkpoint ${HANDLER_DIR}/stories15M.pt ${HANDLER_DIR}/stories15M.so
@@ -221,12 +163,11 @@ function build() {
221163

222164
# Build torchserve_cpp with cmake
223165
cd "$BWD" || exit
224-
YAML_CPP_CMAKE_DIR=$DEPS_DIR/yaml-cpp-build
225166
FOLLY_CMAKE_DIR=$DEPS_DIR/folly-build/installed
226167
find $FOLLY_CMAKE_DIR -name "lib*.*" -exec ln -s "{}" $LIBS_DIR/ \;
227168
if [ "$PLATFORM" = "Linux" ]; then
228169
cmake \
229-
-DCMAKE_PREFIX_PATH="$DEPS_DIR;$FOLLY_CMAKE_DIR;$YAML_CPP_CMAKE_DIR;$DEPS_DIR/libtorch" \
170+
-DCMAKE_PREFIX_PATH="$DEPS_DIR;$FOLLY_CMAKE_DIR;$DEPS_DIR/libtorch" \
230171
-DCMAKE_INSTALL_PREFIX="$PREFIX" \
231172
"$MAYBE_BUILD_QUIC" \
232173
"$MAYBE_BUILD_TESTS" \
@@ -242,8 +183,10 @@ function build() {
242183
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/bin/nvcc
243184
fi
244185
elif [ "$PLATFORM" = "Mac" ]; then
186+
export LIBRARY_PATH=${LIBRARY_PATH}:`brew --prefix icu4c`/lib:`brew --prefix libomp`/lib
187+
245188
cmake \
246-
-DCMAKE_PREFIX_PATH="$DEPS_DIR;$FOLLY_CMAKE_DIR;$YAML_CPP_CMAKE_DIR;$DEPS_DIR/libtorch" \
189+
-DCMAKE_PREFIX_PATH="$DEPS_DIR;$FOLLY_CMAKE_DIR;$DEPS_DIR/libtorch" \
247190
-DCMAKE_INSTALL_PREFIX="$PREFIX" \
248191
"$MAYBE_BUILD_QUIC" \
249192
"$MAYBE_BUILD_TESTS" \
@@ -252,9 +195,10 @@ function build() {
252195
"$MAYBE_USE_STATIC_DEPS" \
253196
"$MAYBE_LIB_FUZZING_ENGINE" \
254197
"$MAYBE_NIGHTLIES" \
198+
"-DLLAMA_METAL=OFF" \
255199
..
256200

257-
export LIBRARY_PATH=${LIBRARY_PATH}:/usr/local/opt/icu4c/lib
201+
258202
else
259203
# TODO: Windows
260204
echo -e "${COLOR_RED}[ ERROR ] Unknown platform: $PLATFORM ${COLOR_OFF}"
@@ -282,16 +226,8 @@ function symlink_torch_libs() {
282226
fi
283227
}
284228

285-
function symlink_yaml_cpp_lib() {
286-
if [ "$PLATFORM" = "Linux" ]; then
287-
ln -sf ${DEPS_DIR}/yaml-cpp-build/*.so* ${LIBS_DIR}
288-
elif [ "$PLATFORM" = "Mac" ]; then
289-
ln -sf ${DEPS_DIR}/yaml-cpp-build/*.dylib* ${LIBS_DIR}
290-
fi
291-
}
292-
293229
function install_torchserve_cpp() {
294-
TARGET_DIR=$BASE_DIR/../ts/cpp/
230+
TARGET_DIR=`python -c "import ts; from pathlib import Path; print(Path(ts.__file__).parent / 'cpp')"`
295231

296232
if [ -d $TARGET_DIR ]; then
297233
rm -rf $TARGET_DIR
@@ -370,12 +306,8 @@ cd $BASE_DIR
370306
git submodule update --init --recursive
371307

372308
install_folly
373-
#install_kineto
374309
install_libtorch
375-
install_yaml_cpp
376-
build_llama_cpp
377310
prepare_test_files
378311
build
379312
symlink_torch_libs
380-
symlink_yaml_cpp_lib
381313
install_torchserve_cpp

‎cpp/third-party/llama.cpp

-1
This file was deleted.

‎docs/configuration.md

+1
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ e.g. : To allow base URLs `https://s3.amazonaws.com/` and `https://torchserve.py
297297
* For security reason, `use_env_allowed_urls=true` is required in config.properties to read `allowed_urls` from environment variable.
298298
* `workflow_store` : Path of workflow store directory. Defaults to model store directory.
299299
* `disable_system_metrics` : Disable collection of system metrics when set to "true". Default value is "false".
300+
* `system_metrics_cmd`: The customized system metrics python script name with arguments. For example:`ts/metrics/metric_collector.py --gpu 0`. Default: empty which means TorchServe collects system metrics via "ts/metrics/metric_collector.py --gpu $CUDA_VISIBLE_DEVICES".
300301

301302
**NOTE**
302303

‎docs/token_authorization_api.md

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
# TorchServe token authorization API
22

3+
## Setup
4+
1. Download the jar files from [Maven](https://mvnrepository.com/artifact/org.pytorch/torchserve-endpoint-plugin)
5+
2. Enable token authorization by adding the `--plugins-path /path/to/the/jar/files` flag at start up with the path leading to the downloaded jar files.
6+
37
## Configuration
4-
1. Enable token authorization by adding the provided plugin at start using the `--plugins-path` command.
5-
2. Torchserve will enable token authorization if the plugin is provided. In the current working directory a file `key_file.json` will be generated.
8+
1. Torchserve will enable token authorization if the plugin is provided. Expected log statement `[INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager - Loading plugin for endpoint token`
9+
2. In the current working directory a file `key_file.json` will be generated.
610
1. Example key file:
711

812
```python

‎examples/cpp/llamacpp/CMakeLists.txt

+13-13
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
set(LLAMACPP_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/third-party/llama.cpp")
2+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
23

34
add_library(llamacpp_handler SHARED src/llamacpp_handler.cc)
45

5-
set(MY_OBJECT_FILES
6-
${LLAMACPP_SRC_DIR}/ggml.o
7-
${LLAMACPP_SRC_DIR}/llama.o
8-
${LLAMACPP_SRC_DIR}/common.o
9-
${LLAMACPP_SRC_DIR}/ggml-quants.o
10-
${LLAMACPP_SRC_DIR}/ggml-alloc.o
11-
${LLAMACPP_SRC_DIR}/grammar-parser.o
12-
${LLAMACPP_SRC_DIR}/console.o
13-
${LLAMACPP_SRC_DIR}/build-info.o
14-
${LLAMACPP_SRC_DIR}/ggml-backend.o
15-
6+
FetchContent_Declare(
7+
llama.cpp
8+
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
9+
GIT_TAG b2241
1610
)
11+
FetchContent_GetProperties(llama.cpp)
12+
13+
if(NOT llama.cpp_POPULATED)
14+
message(STATUS "Fetching llama.cpp...")
15+
FetchContent_Populate(llama.cpp)
16+
add_subdirectory(${llama.cpp_SOURCE_DIR} ${llama.cpp_BINARY_DIR})
17+
endif()
1718

18-
target_sources(llamacpp_handler PRIVATE ${MY_OBJECT_FILES})
1919
target_include_directories(llamacpp_handler PUBLIC ${LLAMACPP_SRC_DIR})
20-
target_link_libraries(llamacpp_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
20+
target_link_libraries(llamacpp_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES} common llama)

‎examples/cpp/llamacpp/src/llamacpp_handler.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ LlamaCppHandler::LoadModel(
4444
params.main_gpu = 0;
4545
params.n_gpu_layers = 35;
4646

47-
llama_backend_init(params.numa);
47+
llama_backend_init();
4848
ctx_params = llama_context_default_params();
4949
model_params = llama_model_default_params();
5050
llamamodel = llama_load_model_from_file(params.model.c_str(), model_params);

‎frontend/server/src/main/java/org/pytorch/serve/metrics/MetricCollector.java

+15-7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import java.io.OutputStream;
88
import java.nio.charset.StandardCharsets;
99
import java.util.ArrayList;
10+
import java.util.Arrays;
1011
import java.util.List;
1112
import java.util.Map;
1213
import org.apache.commons.io.IOUtils;
@@ -32,16 +33,23 @@ public MetricCollector(ConfigManager configManager) {
3233
public void run() {
3334
try {
3435
// Collect System level Metrics
35-
String[] args = new String[4];
36-
args[0] = configManager.getPythonExecutable();
37-
args[1] = "ts/metrics/metric_collector.py";
38-
args[2] = "--gpu";
39-
args[3] = String.valueOf(ConfigManager.getInstance().getNumberOfGpu());
36+
List<String> args = new ArrayList<>();
37+
args.add(configManager.getPythonExecutable());
38+
String systemMetricsCmd = configManager.getSystemMetricsCmd();
39+
if (systemMetricsCmd.isEmpty()) {
40+
systemMetricsCmd =
41+
String.format(
42+
"%s --gpu %s",
43+
"ts/metrics/metric_collector.py",
44+
String.valueOf(configManager.getNumberOfGpu()));
45+
}
46+
args.addAll(Arrays.asList(systemMetricsCmd.split("\\s+")));
4047
File workingDir = new File(configManager.getModelServerHome());
4148

4249
String[] envp = EnvironmentUtils.getEnvString(workingDir.getAbsolutePath(), null, null);
43-
44-
final Process p = Runtime.getRuntime().exec(args, envp, workingDir); // NOPMD
50+
final Process p =
51+
Runtime.getRuntime()
52+
.exec(args.toArray(new String[0]), envp, workingDir); // NOPMD
4553
ModelManager modelManager = ModelManager.getInstance();
4654
Map<Integer, WorkerThread> workerMap = modelManager.getWorkers();
4755
try (OutputStream os = p.getOutputStream()) {

‎frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ public final class ConfigManager {
117117
private static final String MODEL_SNAPSHOT = "model_snapshot";
118118
private static final String MODEL_CONFIG = "models";
119119
private static final String VERSION = "version";
120+
private static final String SYSTEM_METRICS_CMD = "system_metrics_cmd";
120121

121122
// Configuration default values
122123
private static final String DEFAULT_TS_ALLOWED_URLS = "file://.*|http(s)?://.*";
@@ -559,6 +560,10 @@ public String getCertificateFile() {
559560
return prop.getProperty(TS_CERTIFICATE_FILE);
560561
}
561562

563+
public String getSystemMetricsCmd() {
564+
return prop.getProperty(SYSTEM_METRICS_CMD, "");
565+
}
566+
562567
public SslContext getSslContext() throws IOException, GeneralSecurityException {
563568
List<String> supportedCiphers =
564569
Arrays.asList(
@@ -734,7 +739,9 @@ public String dumpConfigurations() {
734739
+ "\nCPP log config: "
735740
+ (getTsCppLogConfig() == null ? "N/A" : getTsCppLogConfig())
736741
+ "\nModel config: "
737-
+ prop.getProperty(MODEL_CONFIG, "N/A");
742+
+ prop.getProperty(MODEL_CONFIG, "N/A")
743+
+ "\nSystem metrics command: "
744+
+ (getSystemMetricsCmd().isEmpty() ? "default" : getSystemMetricsCmd());
738745
}
739746

740747
public boolean useNativeIo() {

‎kubernetes/kserve/kserve_wrapper/__main__.py

+21-31
Original file line numberDiff line numberDiff line change
@@ -28,52 +28,42 @@ def parse_config():
2828
model_store: the path in which the .mar file resides
2929
"""
3030
separator = "="
31-
keys = {}
31+
ts_configuration = {}
3232
config_path = os.environ.get("CONFIG_PATH", DEFAULT_CONFIG_PATH)
3333

3434
logging.info(f"Wrapper: loading configuration from {config_path}")
3535

3636
with open(config_path) as f:
3737
for line in f:
38-
if separator in line:
39-
# Find the name and value by splitting the string
40-
name, value = line.split(separator, 1)
41-
42-
# Assign key value pair to dict
43-
# strip() removes white space from the ends of strings
44-
keys[name.strip()] = value.strip()
45-
46-
keys["model_snapshot"] = json.loads(keys["model_snapshot"])
47-
inference_address, management_address, grpc_inference_port, model_store = (
48-
keys["inference_address"],
49-
keys["management_address"],
50-
keys["grpc_inference_port"],
51-
keys["model_store"],
38+
if not line.startswith("#"):
39+
if separator in line:
40+
name, value = line.split(separator, 1)
41+
ts_configuration[name.strip()] = value.strip()
42+
43+
ts_configuration["model_snapshot"] = json.loads(
44+
ts_configuration.get("model_snapshot", "{}")
5245
)
5346

54-
models = keys["model_snapshot"]["models"]
55-
model_names = []
47+
inference_address = ts_configuration.get(
48+
"inference_address", DEFAULT_INFERENCE_ADDRESS
49+
)
50+
management_address = ts_configuration.get(
51+
"management_address", DEFAULT_MANAGEMENT_ADDRESS
52+
)
53+
grpc_inference_port = ts_configuration.get(
54+
"grpc_inference_port", DEFAULT_GRPC_INFERENCE_PORT
55+
)
56+
model_store = ts_configuration.get("model_store", DEFAULT_MODEL_STORE)
5657

5758
# Get all the model_names
58-
for model, value in models.items():
59-
model_names.append(model)
59+
model_names = ts_configuration["model_snapshot"].get("models", {}).keys()
6060

61-
if not inference_address:
62-
inference_address = DEFAULT_INFERENCE_ADDRESS
6361
if not model_names:
6462
model_names = [DEFAULT_MODEL_NAME]
65-
if not inference_address:
66-
inference_address = DEFAULT_INFERENCE_ADDRESS
67-
if not management_address:
68-
management_address = DEFAULT_MANAGEMENT_ADDRESS
63+
6964
inf_splits = inference_address.split(":")
70-
if not grpc_inference_port:
71-
grpc_inference_address = inf_splits[1] + ":" + DEFAULT_GRPC_INFERENCE_PORT
72-
else:
73-
grpc_inference_address = inf_splits[1] + ":" + grpc_inference_port
65+
grpc_inference_address = inf_splits[1] + ":" + grpc_inference_port
7466
grpc_inference_address = grpc_inference_address.replace("/", "")
75-
if not model_store:
76-
model_store = DEFAULT_MODEL_STORE
7767

7868
logging.info(
7969
"Wrapper : Model names %s, inference address %s, management address %s, grpc_inference_address, %s, model store %s",

‎ts_scripts/install_dependencies.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,9 @@
6464
"xz",
6565
"openssl",
6666
"libsodium",
67-
"llv",
67+
"icu4c",
68+
"libomp",
69+
"llvm",
6870
)
6971

7072
CPP_DARWIN_DEPENDENCIES_LINK = (
@@ -286,13 +288,13 @@ def install_cpp_dependencies(self):
286288
os.system(f"brew install -f {' '.join(CPP_DARWIN_DEPENDENCIES)}")
287289
os.system(f"brew link {' '.join(CPP_DARWIN_DEPENDENCIES_LINK)}")
288290
os.system(
289-
'ln -s "$(brew --prefix llvm)/bin/clang-format" "/usr/local/bin/clang-format"'
291+
f'{self.sudo_cmd} ln -s "$(brew --prefix llvm)/bin/clang-format" "/usr/local/bin/clang-format"'
290292
)
291293
os.system(
292-
'ln -s "$(brew --prefix llvm)/bin/clang-tidy" "/usr/local/bin/clang-tidy"'
294+
f'{self.sudo_cmd} ln -s "$(brew --prefix llvm)/bin/clang-tidy" "/usr/local/bin/clang-tidy"'
293295
)
294296
os.system(
295-
'ln -s "$(brew --prefix llvm)/bin/clang-apply-replacements" "/usr/local/bin/clang-apply-replacements"'
297+
f'{self.sudo_cmd} ln -s "$(brew --prefix llvm)/bin/clang-apply-replacements" "/usr/local/bin/clang-apply-replacements"'
296298
)
297299

298300
def install_neuronx_driver(self):

‎ts_scripts/print_env_info.py

+29
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343

4444
npm_env = {"npm_pkg_version": []}
4545

46+
cpp_env = {"LIBRARY_PATH": ""}
47+
4648

4749
def get_nvidia_smi():
4850
# Note: nvidia-smi is currently available only on Windows and Linux
@@ -284,6 +286,16 @@ def get_torch_model_archiver():
284286
return version
285287

286288

289+
def get_library_path():
290+
platform = get_platform()
291+
if platform == "darwin":
292+
return os.environ.get("DYLD_LIBRARY_PATH", "")
293+
elif platform == "linux":
294+
return os.environ.get("LD_LIBRARY_PATH", "")
295+
else:
296+
return ""
297+
298+
287299
def populate_torchserve_env(torch_pkg):
288300
for pkg in torch_pkg:
289301
if pkg.split("==")[0] == "torch":
@@ -338,6 +350,10 @@ def populate_npm_env():
338350
npm_env["npm_pkg_version"] = get_npm_packages()
339351

340352

353+
def populate_cpp_env():
354+
cpp_env["LIBRARY_PATH"] = get_library_path()
355+
356+
341357
def populate_env_info():
342358
# torchserve packages
343359
_, torch_list_output = get_pip_packages("torch")
@@ -361,6 +377,9 @@ def populate_env_info():
361377
if get_platform() == "darwin":
362378
populate_npm_env()
363379

380+
if get_platform() in ("darwin", "linux"):
381+
populate_cpp_env()
382+
364383

365384
env_info_fmt = """
366385
------------------------------------------------------------------------------------------
@@ -403,18 +422,25 @@ def populate_env_info():
403422
{npm_pkg_version}
404423
"""
405424

425+
cpp_env_info_fmt = """
426+
Environment:
427+
library_path (LD_/DYLD_): {LIBRARY_PATH}
428+
"""
429+
406430

407431
def get_pretty_env_info(branch_name):
408432
global env_info_fmt
409433
global cuda_info_fmt
410434
global npm_info_fmt
435+
global cpp_env_info_fmt
411436
populate_env_info()
412437
env_dict = {
413438
**torchserve_env,
414439
**python_env,
415440
**java_env,
416441
**os_info,
417442
"torchserve_branch": branch_name,
443+
**cpp_env,
418444
}
419445

420446
if TORCH_AVAILABLE and torch.cuda.is_available():
@@ -425,6 +451,9 @@ def get_pretty_env_info(branch_name):
425451
env_dict.update(npm_env)
426452
env_info_fmt = env_info_fmt + "\n" + npm_info_fmt
427453

454+
if get_platform() in ("darwin", "linux"):
455+
env_info_fmt = env_info_fmt + "\n" + cpp_env_info_fmt
456+
428457
return env_info_fmt.format(**env_dict)
429458

430459

‎ts_scripts/sanity_utils.py

+49-8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import glob
23
import json
34
import os
@@ -18,17 +19,57 @@
1819
)
1920

2021

21-
def run_markdown_link_checker():
22-
print("## Started markdown link checker")
23-
result = True
24-
for mdfile in glob.glob("**/*.md", recursive=True):
22+
async def markdown_link_checker(in_queue, out_queue, n):
23+
print(f"worker started {n}")
24+
while True:
25+
mdfile = await in_queue.get()
26+
output = []
27+
result = True
2528
cmd = f"markdown-link-check {mdfile} --config link_check_config.json"
26-
print(f"## In directory: {os.getcwd()} | Executing command: {cmd}")
27-
status = os.system(cmd)
29+
output.append(f"## In directory: {os.getcwd()} | Executing command: {cmd}")
30+
p = await asyncio.create_subprocess_shell(
31+
cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
32+
)
33+
while not p.stdout.at_eof():
34+
line = await p.stdout.readline()
35+
output.append(line.decode("utf-8"))
36+
37+
status = await p.wait()
2838
if status != 0:
29-
print(f"## Broken links in file: {mdfile}")
39+
output.append(f"## Broken links in file: {mdfile}")
3040
result = False
31-
return result
41+
await out_queue.put((result, output))
42+
43+
44+
async def run_markdown_link_checker_on_files(files):
45+
results = []
46+
tasks = []
47+
in_queue = asyncio.Queue()
48+
out_queue = asyncio.Queue()
49+
for f in files:
50+
in_queue.put_nowait(f)
51+
52+
for n in range(16):
53+
tasks.append(asyncio.create_task(markdown_link_checker(in_queue, out_queue, n)))
54+
55+
while len(results) != len(files):
56+
print(len(results))
57+
r, output = await out_queue.get()
58+
results.append(r)
59+
for line in output:
60+
print(line)
61+
62+
for t in tasks:
63+
t.cancel()
64+
65+
return results
66+
67+
68+
def run_markdown_link_checker():
69+
print("## Started markdown link checker")
70+
files = glob.glob("**/*.md", recursive=True)
71+
results = asyncio.run(run_markdown_link_checker_on_files(files))
72+
return all(results)
3273

3374

3475
def validate_model_on_gpu():

0 commit comments

Comments
 (0)
Please sign in to comment.