Skip to content

Commit fd30685

Browse files
committed
fmt
1 parent c345a8b commit fd30685

File tree

4 files changed

+104
-14
lines changed

4 files changed

+104
-14
lines changed
Binary file not shown.

examples/cpp/aot_inductor/resnet/resnet50_torch_export.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
torch.set_float32_matmul_precision("high")
77

8-
MAX_BATCH_SIZE = 32
8+
MAX_BATCH_SIZE = 15
99

1010
model = resnet50(weights=ResNet50_Weights.DEFAULT)
1111
model.eval()
@@ -15,11 +15,12 @@
1515
device = "cuda"
1616
else:
1717
device = "cpu"
18+
# The max batch size is less than 16. The following setting can only work in PT2.3.
1819
# We need to turn off the below optimizations to support batch_size = 16,
1920
# which is treated like a special case
2021
# https://github.com/pytorch/pytorch/pull/116152
21-
torch.backends.mkldnn.set_flags(False)
22-
torch.backends.nnpack.set_flags(False)
22+
# torch.backends.mkldnn.set_flags(False)
23+
# torch.backends.nnpack.set_flags(False)
2324

2425
model = model.to(device=device)
2526
example_inputs = (torch.randn(2, 3, 224, 224, device=device),)

examples/cpp/aot_inductor/resnet/src/resnet_handler.cc

+92-9
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,16 @@ const folly::dynamic& ResnetCppHandler::GetJsonValue(std::unique_ptr<folly::dyna
2121
}
2222
}
2323

24+
std::string ResnetCppHandler::MapClassToLabel(const torch::Tensor& classes, const torch::Tensor& probs) {
25+
folly::dynamic map = folly::dynamic::object;
26+
for (int i = 0; i < classes.sizes()[0]; i++) {
27+
auto class_value = GetJsonValue(mapping_json_, std::to_string(classes[i].item<long>()));
28+
map[class_value[1].asString()] = probs[i].item<float>();
29+
}
30+
31+
return folly::toJson(map);
32+
}
33+
2434
std::pair<std::shared_ptr<void>, std::shared_ptr<torch::Device>>
2535
ResnetCppHandler::LoadModel(
2636
std::shared_ptr<torchserve::LoadModelRequest>& load_model_request) {
@@ -60,45 +70,118 @@ ResnetCppHandler::LoadModel(
6070
}
6171
}
6272

73+
c10::IValue ResnetCppHandler::Preprocess(
74+
std::shared_ptr<torch::Device>& device,
75+
std::pair<std::string&, std::map<uint8_t, std::string>&>& idx_to_req_id,
76+
std::shared_ptr<torchserve::InferenceRequestBatch>& request_batch,
77+
std::shared_ptr<torchserve::InferenceResponseBatch>& response_batch) {
78+
auto batch_ivalue = c10::impl::GenericList(c10::TensorType::get());
79+
80+
std::vector<torch::Tensor> batch_tensors;
81+
uint8_t idx = 0;
82+
for (auto& request : *request_batch) {
83+
(*response_batch)[request.request_id] =
84+
std::make_shared<torchserve::InferenceResponse>(request.request_id);
85+
idx_to_req_id.first += idx_to_req_id.first.empty()
86+
? request.request_id
87+
: "," + request.request_id;
88+
auto data_it =
89+
request.parameters.find(torchserve::PayloadType::kPARAMETER_NAME_DATA);
90+
auto dtype_it =
91+
request.headers.find(torchserve::PayloadType::kHEADER_NAME_DATA_TYPE);
92+
if (data_it == request.parameters.end()) {
93+
data_it = request.parameters.find(
94+
torchserve::PayloadType::kPARAMETER_NAME_BODY);
95+
dtype_it =
96+
request.headers.find(torchserve::PayloadType::kHEADER_NAME_BODY_TYPE);
97+
}
98+
99+
if (data_it == request.parameters.end() ||
100+
dtype_it == request.headers.end()) {
101+
TS_LOGF(ERROR, "Empty payload for request id: {}", request.request_id);
102+
(*response_batch)[request.request_id]->SetResponse(
103+
500, "data_type", torchserve::PayloadType::kCONTENT_TYPE_TEXT,
104+
"Empty payload");
105+
continue;
106+
}
107+
108+
try {
109+
if (dtype_it->second == torchserve::PayloadType::kDATA_TYPE_BYTES) {
110+
batch_tensors.emplace_back(
111+
torch::pickle_load(data_it->second).toTensor());
112+
idx_to_req_id.second[idx++] = request.request_id;
113+
} else {
114+
TS_LOG(ERROR, "Not supported input format, only support bytesstring in this example");
115+
(*response_batch)[request.request_id]->SetResponse(
116+
500, "data_type", torchserve::PayloadType::kCONTENT_TYPE_TEXT,
117+
"Not supported input format, only support bytesstring in this example");
118+
continue;
119+
}
120+
} catch (const std::runtime_error& e) {
121+
TS_LOGF(ERROR, "Failed to load tensor for request id: {}, error: {}",
122+
request.request_id, e.what());
123+
auto response = (*response_batch)[request.request_id];
124+
response->SetResponse(500, "data_type",
125+
torchserve::PayloadType::kDATA_TYPE_STRING,
126+
"runtime_error, failed to load tensor");
127+
} catch (const c10::Error& e) {
128+
TS_LOGF(ERROR, "Failed to load tensor for request id: {}, c10 error: {}",
129+
request.request_id, e.msg());
130+
auto response = (*response_batch)[request.request_id];
131+
response->SetResponse(500, "data_type",
132+
torchserve::PayloadType::kDATA_TYPE_STRING,
133+
"c10 error, failed to load tensor");
134+
}
135+
}
136+
if (!batch_tensors.empty()) {
137+
batch_ivalue.emplace_back(torch::stack(batch_tensors).to(*device));
138+
}
139+
140+
return batch_ivalue;
141+
}
63142

64143
c10::IValue ResnetCppHandler::Inference(
65144
std::shared_ptr<void> model, c10::IValue &inputs,
66145
std::shared_ptr<torch::Device> &device,
67146
std::pair<std::string &, std::map<uint8_t, std::string> &> &idx_to_req_id,
68147
std::shared_ptr<torchserve::InferenceResponseBatch> &response_batch) {
69148
c10::InferenceMode mode;
149+
auto batch_ivalue = c10::impl::GenericList(c10::TensorType::get());
70150
try {
71151
std::shared_ptr<torch::inductor::AOTIModelContainerRunner> runner;
72152
if (device->is_cuda()) {
73153
runner = std::static_pointer_cast<torch::inductor::AOTIModelContainerRunnerCuda>(model);
74154
} else {
75155
runner = std::static_pointer_cast<torch::inductor::AOTIModelContainerRunnerCpu>(model);
76156
}
77-
78-
auto batch_output_tensor_vector = runner->run(inputs.toTensorVector());
79-
return c10::IValue(batch_output_tensor_vector[0]);
157+
auto data = inputs.toTensorList()[0].get().toTensor();
158+
std::vector<torch::Tensor> input_vec;
159+
input_vec.emplace_back(data);
160+
auto batch_output_tensor_vector = runner->run(input_vec);
161+
batch_ivalue.emplace_back(torch::stack(batch_output_tensor_vector).to(*device));
80162
} catch (std::runtime_error& e) {
81163
TS_LOG(ERROR, e.what());
82164
} catch (const c10::Error& e) {
83165
TS_LOGF(ERROR, "Failed to apply inference on input, c10 error:{}", e.msg());
84166
}
167+
return batch_ivalue;
85168
}
86169

87170
void ResnetCppHandler::Postprocess(
88171
c10::IValue &inputs,
89172
std::pair<std::string &, std::map<uint8_t, std::string> &> &idx_to_req_id,
90173
std::shared_ptr<torchserve::InferenceResponseBatch> &response_batch) {
91-
auto& data = inputs.toTensor();
174+
auto data = inputs.toTensorList().get(0);
175+
auto ps = torch::softmax(data[0], 1);
176+
auto top5 = torch::topk(ps, 5, 1);
92177
for (const auto &kv : idx_to_req_id.second) {
93178
try {
94-
auto out = data[kv.first].unsqueeze(0);
95-
auto y_hat = torch::argmax(out, 1).item<int>();
96-
auto predicted_idx = std::to_string(y_hat);
179+
auto probs = std::get<0>(top5)[kv.first];
180+
auto classes = std::get<1>(top5)[kv.first];
97181
auto response = (*response_batch)[kv.second];
98-
99182
response->SetResponse(200, "data_type",
100183
torchserve::PayloadType::kDATA_TYPE_STRING,
101-
(*mapping_json_)[predicted_idx].asString());
184+
MapClassToLabel(classes, probs));
102185
} catch (const std::runtime_error &e) {
103186
TS_LOGF(ERROR, "Failed to load tensor for request id: {}, error: {}",
104187
kv.second, e.what());

examples/cpp/aot_inductor/resnet/src/resnet_handler.hh

+8-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
#include <folly/json.h>
66
#include <fmt/format.h>
77
#include <iostream>
8-
#include <sentencepiece_processor.h>
9-
#include <sentencepiece_trainer.h>
108
#include <torch/torch.h>
119
#include <torch/csrc/inductor/aoti_model_container_runner.h>
1210
#include <torch/csrc/inductor/aoti_model_container_runner_cuda.h>
@@ -25,6 +23,13 @@ class ResnetCppHandler : public torchserve::BaseHandler {
2523
std::shared_ptr<torchserve::LoadModelRequest>& load_model_request)
2624
override;
2725

26+
c10::IValue Preprocess(
27+
std::shared_ptr<torch::Device>& device,
28+
std::pair<std::string&, std::map<uint8_t, std::string>&>& idx_to_req_id,
29+
std::shared_ptr<torchserve::InferenceRequestBatch>& request_batch,
30+
std::shared_ptr<torchserve::InferenceResponseBatch>& response_batch)
31+
override;
32+
2833
c10::IValue Inference(
2934
std::shared_ptr<void> model, c10::IValue& inputs,
3035
std::shared_ptr<torch::Device>& device,
@@ -41,6 +46,7 @@ class ResnetCppHandler : public torchserve::BaseHandler {
4146
private:
4247
std::unique_ptr<folly::dynamic> LoadJsonFile(const std::string& file_path);
4348
const folly::dynamic& GetJsonValue(std::unique_ptr<folly::dynamic>& json, const std::string& key);
49+
std::string MapClassToLabel(const torch::Tensor& classes, const torch::Tensor& probs);
4450

4551
std::unique_ptr<folly::dynamic> config_json_;
4652
std::unique_ptr<folly::dynamic> mapping_json_;

0 commit comments

Comments
 (0)