Skip to content

Commit 4af6024

Browse files
lxningmreso
authored andcommitted
AOTInductor BERT CPP example (pytorch#2931)
* fix compile error on mac x86 * update install libtorch * fmt * fmt * fmt * Set return type of bert model and dynamic shapes * fix json value * fix build on linux * add linux dependency * replace sentenepice with tokenizers-cpp * update dependency * add attention mask * fix compile error * fix compile error * fmt * Fmt * tockenizer-cpp git submodule * update handler * fmt * fmt * fmt * unset env * fix path * Fix type error in bert aot example * fmt * fmt * update max setting * fix lint * add limitation * pinned folly to v2024.02.19.00 * pinned yam-cpp with tags/0.8.0 * pinned yaml-cpp 0.8.0 * update build.sh * pinned yaml-cpp v0.8.0 * fmt * fix typo * add submodule kineto * fmt * fix workflow * fix workflow * fix ubuntu version * update readme --------- Co-authored-by: Matthias Reso <[email protected]>
1 parent 0815678 commit 4af6024

28 files changed

+602
-53
lines changed

.github/workflows/ci-cpu-cpp.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
os: [ubuntu-latest, macOS-latest]
19+
os: [ubuntu-20.04, macOS-latest]
2020
steps:
2121
- name: Checkout TorchServe
2222
uses: actions/checkout@v2
@@ -29,4 +29,4 @@ jobs:
2929
python ts_scripts/install_dependencies.py --environment=dev --cpp
3030
- name: Build
3131
run: |
32-
cd cpp && ./build.sh --install-dependencies
32+
cd cpp && ./build.sh

.gitmodules

+12
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,15 @@
1010
[submodule "cpp/third-party/llama2.so"]
1111
path = cpp/third-party/llama2.so
1212
url = https://github.com/mreso/llama2.so.git
13+
[submodule "cpp/third-party/folly"]
14+
path = cpp/third-party/folly
15+
url = https://github.com/facebook/folly.git
16+
[submodule "cpp/third-party/yaml-cpp"]
17+
path = cpp/third-party/yaml-cpp
18+
url = https://github.com/jbeder/yaml-cpp.git
19+
[submodule "cpp/third-party/tokenizers-cpp"]
20+
path = cpp/third-party/tokenizers-cpp
21+
url = https://github.com/mlc-ai/tokenizers-cpp.git
22+
[submodule "cpp/third-party/kineto"]
23+
path = cpp/third-party/kineto
24+
url = https://github.com/pytorch/kineto.git

cpp/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
1+
cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
22
project(torchserve_cpp VERSION 0.1)
33

44
set(CMAKE_CXX_STANDARD 17)

cpp/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
## Requirements
33
* C++17
44
* GCC version: gcc-9
5+
* cmake version: 3.18+
56
## Installation and Running TorchServe CPP
67

78
### Install dependencies

cpp/build.sh

+12-18
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,6 @@ function install_folly() {
2424
FOLLY_SRC_DIR=$BASE_DIR/third-party/folly
2525
FOLLY_BUILD_DIR=$DEPS_DIR/folly-build
2626

27-
if [ ! -d "$FOLLY_SRC_DIR" ] ; then
28-
echo -e "${COLOR_GREEN}[ INFO ] Cloning folly repo ${COLOR_OFF}"
29-
git clone https://github.com/facebook/folly.git "$FOLLY_SRC_DIR"
30-
cd $FOLLY_SRC_DIR
31-
git checkout tags/v2024.01.29.00
32-
fi
33-
3427
if [ ! -d "$FOLLY_BUILD_DIR" ] ; then
3528
echo -e "${COLOR_GREEN}[ INFO ] Building Folly ${COLOR_OFF}"
3629
cd $FOLLY_SRC_DIR
@@ -60,9 +53,7 @@ function install_kineto() {
6053
elif [ "$PLATFORM" = "Mac" ]; then
6154
KINETO_SRC_DIR=$BASE_DIR/third-party/kineto
6255

63-
if [ ! -d "$KINETO_SRC_DIR" ] ; then
64-
echo -e "${COLOR_GREEN}[ INFO ] Cloning kineto repo ${COLOR_OFF}"
65-
git clone --recursive https://github.com/pytorch/kineto.git "$KINETO_SRC_DIR"
56+
if [ ! -d "$KINETO_SRC_DIR/libkineto/build" ] ; then
6657
cd $KINETO_SRC_DIR/libkineto
6758
mkdir build && cd build
6859
cmake ..
@@ -128,13 +119,6 @@ function install_yaml_cpp() {
128119
YAML_CPP_SRC_DIR=$BASE_DIR/third-party/yaml-cpp
129120
YAML_CPP_BUILD_DIR=$DEPS_DIR/yaml-cpp-build
130121

131-
if [ ! -d "$YAML_CPP_SRC_DIR" ] ; then
132-
echo -e "${COLOR_GREEN}[ INFO ] Cloning yaml-cpp repo ${COLOR_OFF}"
133-
git clone https://github.com/jbeder/yaml-cpp.git "$YAML_CPP_SRC_DIR"
134-
cd $YAML_CPP_SRC_DIR
135-
git checkout tags/0.8.0
136-
fi
137-
138122
if [ ! -d "$YAML_CPP_BUILD_DIR" ] ; then
139123
echo -e "${COLOR_GREEN}[ INFO ] Building yaml-cpp ${COLOR_OFF}"
140124

@@ -187,6 +171,16 @@ function prepare_test_files() {
187171
local LLAMA_SO_DIR=${BASE_DIR}/third-party/llama2.so/
188172
PYTHONPATH=${LLAMA_SO_DIR}:${PYTHONPATH} python ${BASE_DIR}/../examples/cpp/aot_inductor/llama2/compile.py --checkpoint ${HANDLER_DIR}/stories15M.pt ${HANDLER_DIR}/stories15M.so
189173
fi
174+
if [ ! -f "${EX_DIR}/aot_inductor/bert_handler/bert-seq.so" ]; then
175+
pip install transformers
176+
local HANDLER_DIR=${EX_DIR}/aot_inductor/bert_handler/
177+
export TOKENIZERS_PARALLELISM=false
178+
cd ${BASE_DIR}/../examples/cpp/aot_inductor/bert/
179+
python aot_compile_export.py
180+
mv bert-seq.so ${HANDLER_DIR}/bert-seq.so
181+
mv Transformer_model/tokenizer.json ${HANDLER_DIR}/tokenizer.json
182+
export TOKENIZERS_PARALLELISM=""
183+
fi
190184
if [ ! -f "${EX_DIR}/aot_inductor/resnet_handler/resne50_pt2.so" ]; then
191185
local HANDLER_DIR=${EX_DIR}/aot_inductor/resnet_handler/
192186
cd ${HANDLER_DIR}
@@ -376,7 +370,7 @@ cd $BASE_DIR
376370
git submodule update --init --recursive
377371

378372
install_folly
379-
install_kineto
373+
#install_kineto
380374
install_libtorch
381375
install_yaml_cpp
382376
build_llama_cpp

cpp/src/examples/CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@ add_subdirectory("../../../examples/cpp/llamacpp/" "${CMAKE_CURRENT_BINARY_DIR}/
55

66
add_subdirectory("../../../examples/cpp/mnist/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/mnist/mnist_handler/")
77

8-
98
# PT2.2 torch.expport does not support Mac
109
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
1110
add_subdirectory("../../../examples/cpp/aot_inductor/llama2/" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/llama_handler/")
1211

12+
add_subdirectory("../../../examples/cpp/aot_inductor/bert" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/bert_handler/")
13+
1314
add_subdirectory("../../../examples/cpp/aot_inductor/resnet" "${CMAKE_CURRENT_BINARY_DIR}/../../test/resources/examples/aot_inductor/resnet_handler/")
1415
endif()

cpp/src/utils/file_system.cc

+38-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
#include "src/utils/file_system.hh"
2+
#include "src/utils/logging.hh"
3+
4+
#include <folly/FileUtil.h>
5+
#include <folly/json.h>
26

37
namespace torchserve {
48
std::unique_ptr<std::istream> FileSystem::GetStream(
@@ -10,4 +14,37 @@ std::unique_ptr<std::istream> FileSystem::GetStream(
1014
}
1115
return file_stream;
1216
}
13-
} // namespace torchserve
17+
18+
std::string FileSystem::LoadBytesFromFile(const std::string& path) {
19+
std::ifstream fs(path, std::ios::in | std::ios::binary);
20+
if (fs.fail()) {
21+
TS_LOGF(ERROR, "Cannot open tokenizer file {}", path);
22+
throw;
23+
}
24+
std::string data;
25+
fs.seekg(0, std::ios::end);
26+
size_t size = static_cast<size_t>(fs.tellg());
27+
fs.seekg(0, std::ios::beg);
28+
data.resize(size);
29+
fs.read(data.data(), size);
30+
return data;
31+
}
32+
33+
std::unique_ptr<folly::dynamic> FileSystem::LoadJsonFile(const std::string& file_path) {
34+
std::string content;
35+
if (!folly::readFile(file_path.c_str(), content)) {
36+
TS_LOGF(ERROR, "{} not found", file_path);
37+
throw;
38+
}
39+
return std::make_unique<folly::dynamic>(folly::parseJson(content));
40+
}
41+
42+
const folly::dynamic& FileSystem::GetJsonValue(std::unique_ptr<folly::dynamic>& json, const std::string& key) {
43+
if (json->find(key) != json->items().end()) {
44+
return (*json)[key];
45+
} else {
46+
TS_LOG(ERROR, "Required field {} not found in JSON.", key);
47+
throw ;
48+
}
49+
}
50+
} // namespace torchserve

cpp/src/utils/file_system.hh

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
#ifndef TS_CPP_UTILS_FILE_SYSTEM_HH_
22
#define TS_CPP_UTILS_FILE_SYSTEM_HH_
33

4-
#include <fmt/format.h>
5-
4+
#include <folly/dynamic.h>
65
#include <fstream>
76
#include <stdexcept>
87
#include <string>
@@ -11,6 +10,9 @@ namespace torchserve {
1110
class FileSystem {
1211
public:
1312
static std::unique_ptr<std::istream> GetStream(const std::string& path);
13+
static std::string LoadBytesFromFile(const std::string& path);
14+
static std::unique_ptr<folly::dynamic> LoadJsonFile(const std::string& file_path);
15+
static const folly::dynamic& GetJsonValue(std::unique_ptr<folly::dynamic>& json, const std::string& key);
1416
};
1517
} // namespace torchserve
1618
#endif // TS_CPP_UTILS_FILE_SYSTEM_HH_

cpp/test/examples/examples_test.cc

+23
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,29 @@ TEST_F(ModelPredictTest, TestLoadPredictLlamaCppHandler) {
6060
base_dir + "llamacpp_handler", base_dir + "prompt.txt", "llm_ts", 200);
6161
}
6262

63+
TEST_F(ModelPredictTest, TestLoadPredictAotInductorBertHandler) {
64+
std::string base_dir = "_build/test/resources/examples/aot_inductor/";
65+
std::string file1 = base_dir + "bert_handler/bert-seq.so";
66+
std::string file2 = base_dir + "bert_handler/tokenizer.json";
67+
68+
std::ifstream f1(file1);
69+
std::ifstream f2(file2);
70+
71+
if (!f1.good() || !f2.good())
72+
GTEST_SKIP() << "Skipping TestLoadPredictAotInductorBertHandler because "
73+
"of missing files: "
74+
<< file1 << " or " << file2;
75+
76+
this->LoadPredict(
77+
std::make_shared<torchserve::LoadModelRequest>(
78+
base_dir + "bert_handler", "bert_aot",
79+
torch::cuda::is_available() ? 0 : -1, "", "", 1, false),
80+
base_dir + "bert_handler",
81+
base_dir + "bert_handler/sample_text.txt",
82+
"bert_ts",
83+
200);
84+
}
85+
6386
TEST_F(ModelPredictTest, TestLoadPredictAotInductorResnetHandler) {
6487
std::string base_dir = "_build/test/resources/examples/aot_inductor/";
6588
std::string file1 = base_dir + "resnet_handler/resnet50_pt2.so";
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"createdOn": "12/02/2024 21:09:26",
3+
"runtime": "LSP",
4+
"model": {
5+
"modelName": "bertcppaot",
6+
"handler": "libbert_handler:BertCppHandler",
7+
"modelVersion": "1.0",
8+
"configFile": "model-config.yaml"
9+
},
10+
"archiverVersion": "0.9.0"
11+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"0":"Not Accepted",
3+
"1":"Accepted"
4+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
minWorkers: 1
2+
maxWorkers: 1
3+
batchSize: 2
4+
5+
handler:
6+
model_so_path: "bert-seq.so"
7+
tokenizer_path: "tokenizer.json"
8+
mapping: "index_to_name.json"
9+
model_name: "bert-base-uncased"
10+
mode: "sequence_classification"
11+
do_lower_case: true
12+
num_labels: 2
13+
max_length: 150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Bloomberg has decided to publish a new report on the global economy.

cpp/third-party/folly

Submodule folly added at 323e467

cpp/third-party/kineto

Submodule kineto added at 594c63c

cpp/third-party/tokenizers-cpp

Submodule tokenizers-cpp added at 27dbe17

cpp/third-party/yaml-cpp

Submodule yaml-cpp added at f732014
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
set(TOKENZIER_CPP_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../../../cpp/third-party/tokenizers-cpp)
2+
add_subdirectory(${TOKENZIER_CPP_PATH} tokenizers EXCLUDE_FROM_ALL)
3+
add_library(bert_handler SHARED src/bert_handler.cc)
4+
target_include_directories(bert_handler PRIVATE ${TOKENZIER_CPP_PATH}/include)
5+
target_link_libraries(bert_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES} tokenizers_cpp)
+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
This example uses AOTInductor to compile the [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) into an so file (see script [aot_compile_export.py](aot_compile_export.py)). In PyTorch 2.2, the supported `MAX_SEQ_LENGTH` in this script is 511.
2+
3+
Then, this example loads model and runs prediction using libtorch. The handler C++ source code for this examples can be found [here](src).
4+
5+
### Setup
6+
1. Follow the instructions in [README.md](../../../../cpp/README.md) to build the TorchServe C++ backend.
7+
8+
```
9+
cd serve/cpp
10+
./builld.sh
11+
```
12+
13+
The build script will create the necessary artifact for this example.
14+
To recreate these by hand you can follow the prepare_test_files function of the [build.sh](../../../../cpp/build.sh) script.
15+
We will need the handler .so file as well as the bert-seq.so and tokenizer.json.
16+
17+
2. Create a [model-config.yaml](model-config.yaml)
18+
19+
```yaml
20+
minWorkers: 1
21+
maxWorkers: 1
22+
batchSize: 2
23+
24+
handler:
25+
model_so_path: "bert-seq.so"
26+
tokenizer_path: "tokenizer.json"
27+
mapping: "index_to_name.json"
28+
model_name: "bert-base-uncased"
29+
mode: "sequence_classification"
30+
do_lower_case: true
31+
num_labels: 2
32+
max_length: 150
33+
```
34+
35+
### Generate Model Artifact Folder
36+
37+
```bash
38+
torch-model-archiver --model-name bertcppaot --version 1.0 --handler ../../../../cpp/_build/test/resources/examples/aot_inductor/bert_handler/libbert_handler:BertCppHandler --runtime LSP --extra-files index_to_name.json,../../../../cpp/_build/test/resources/examples/aot_inductor/bert_handler/bert-seq.so,../../../../cpp/_build/test/resources/examples/aot_inductor/bert_handler/tokenizer.json --config-file model-config.yaml --archive-format no-archive
39+
```
40+
41+
Create model store directory and move the folder `bertcppaot`
42+
43+
```
44+
mkdir model_store
45+
mv bertcppaot model_store/
46+
```
47+
48+
### Inference
49+
50+
Start torchserve using the following command
51+
52+
```
53+
torchserve --ncs --model-store model_store/ --models bertcppaot
54+
```
55+
56+
Infer the model using the following command
57+
58+
```
59+
curl http://localhost:8080/predictions/bertcppaot -T ../../../../cpp/test/resources/examples/aot_inductor/bert_handler/sample_text.txt
60+
Not Accepted
61+
```

0 commit comments

Comments
 (0)