@@ -21,6 +21,16 @@ const folly::dynamic& ResnetCppHandler::GetJsonValue(std::unique_ptr<folly::dyna
21
21
}
22
22
}
23
23
24
+ std::string ResnetCppHandler::MapClassToLabel (const torch::Tensor& classes, const torch::Tensor& probs) {
25
+ folly::dynamic map = folly::dynamic::object;
26
+ for (int i = 0 ; i < classes.sizes ()[0 ]; i++) {
27
+ auto class_value = GetJsonValue (mapping_json_, std::to_string (classes[i].item <long >()));
28
+ map[class_value[1 ].asString ()] = probs[i].item <float >();
29
+ }
30
+
31
+ return folly::toJson (map);
32
+ }
33
+
24
34
std::pair<std::shared_ptr<void >, std::shared_ptr<torch::Device>>
25
35
ResnetCppHandler::LoadModel (
26
36
std::shared_ptr<torchserve::LoadModelRequest>& load_model_request) {
@@ -60,45 +70,118 @@ ResnetCppHandler::LoadModel(
60
70
}
61
71
}
62
72
73
+ c10::IValue ResnetCppHandler::Preprocess (
74
+ std::shared_ptr<torch::Device>& device,
75
+ std::pair<std::string&, std::map<uint8_t , std::string>&>& idx_to_req_id,
76
+ std::shared_ptr<torchserve::InferenceRequestBatch>& request_batch,
77
+ std::shared_ptr<torchserve::InferenceResponseBatch>& response_batch) {
78
+ auto batch_ivalue = c10::impl::GenericList (c10::TensorType::get ());
79
+
80
+ std::vector<torch::Tensor> batch_tensors;
81
+ uint8_t idx = 0 ;
82
+ for (auto & request : *request_batch) {
83
+ (*response_batch)[request.request_id ] =
84
+ std::make_shared<torchserve::InferenceResponse>(request.request_id );
85
+ idx_to_req_id.first += idx_to_req_id.first .empty ()
86
+ ? request.request_id
87
+ : " ," + request.request_id ;
88
+ auto data_it =
89
+ request.parameters .find (torchserve::PayloadType::kPARAMETER_NAME_DATA );
90
+ auto dtype_it =
91
+ request.headers .find (torchserve::PayloadType::kHEADER_NAME_DATA_TYPE );
92
+ if (data_it == request.parameters .end ()) {
93
+ data_it = request.parameters .find (
94
+ torchserve::PayloadType::kPARAMETER_NAME_BODY );
95
+ dtype_it =
96
+ request.headers .find (torchserve::PayloadType::kHEADER_NAME_BODY_TYPE );
97
+ }
98
+
99
+ if (data_it == request.parameters .end () ||
100
+ dtype_it == request.headers .end ()) {
101
+ TS_LOGF (ERROR, " Empty payload for request id: {}" , request.request_id );
102
+ (*response_batch)[request.request_id ]->SetResponse (
103
+ 500 , " data_type" , torchserve::PayloadType::kCONTENT_TYPE_TEXT ,
104
+ " Empty payload" );
105
+ continue ;
106
+ }
107
+
108
+ try {
109
+ if (dtype_it->second == torchserve::PayloadType::kDATA_TYPE_BYTES ) {
110
+ batch_tensors.emplace_back (
111
+ torch::pickle_load (data_it->second ).toTensor ());
112
+ idx_to_req_id.second [idx++] = request.request_id ;
113
+ } else {
114
+ TS_LOG (ERROR, " Not supported input format, only support bytesstring in this example" );
115
+ (*response_batch)[request.request_id ]->SetResponse (
116
+ 500 , " data_type" , torchserve::PayloadType::kCONTENT_TYPE_TEXT ,
117
+ " Not supported input format, only support bytesstring in this example" );
118
+ continue ;
119
+ }
120
+ } catch (const std::runtime_error& e) {
121
+ TS_LOGF (ERROR, " Failed to load tensor for request id: {}, error: {}" ,
122
+ request.request_id , e.what ());
123
+ auto response = (*response_batch)[request.request_id ];
124
+ response->SetResponse (500 , " data_type" ,
125
+ torchserve::PayloadType::kDATA_TYPE_STRING ,
126
+ " runtime_error, failed to load tensor" );
127
+ } catch (const c10::Error& e) {
128
+ TS_LOGF (ERROR, " Failed to load tensor for request id: {}, c10 error: {}" ,
129
+ request.request_id , e.msg ());
130
+ auto response = (*response_batch)[request.request_id ];
131
+ response->SetResponse (500 , " data_type" ,
132
+ torchserve::PayloadType::kDATA_TYPE_STRING ,
133
+ " c10 error, failed to load tensor" );
134
+ }
135
+ }
136
+ if (!batch_tensors.empty ()) {
137
+ batch_ivalue.emplace_back (torch::stack (batch_tensors).to (*device));
138
+ }
139
+
140
+ return batch_ivalue;
141
+ }
63
142
64
143
c10::IValue ResnetCppHandler::Inference (
65
144
std::shared_ptr<void > model, c10::IValue &inputs,
66
145
std::shared_ptr<torch::Device> &device,
67
146
std::pair<std::string &, std::map<uint8_t , std::string> &> &idx_to_req_id,
68
147
std::shared_ptr<torchserve::InferenceResponseBatch> &response_batch) {
69
148
c10::InferenceMode mode;
149
+ auto batch_ivalue = c10::impl::GenericList (c10::TensorType::get ());
70
150
try {
71
151
std::shared_ptr<torch::inductor::AOTIModelContainerRunner> runner;
72
152
if (device->is_cuda ()) {
73
153
runner = std::static_pointer_cast<torch::inductor::AOTIModelContainerRunnerCuda>(model);
74
154
} else {
75
155
runner = std::static_pointer_cast<torch::inductor::AOTIModelContainerRunnerCpu>(model);
76
156
}
77
-
78
- auto batch_output_tensor_vector = runner->run (inputs.toTensorVector ());
79
- return c10::IValue (batch_output_tensor_vector[0 ]);
157
+ auto data = inputs.toTensorList ()[0 ].get ().toTensor ();
158
+ std::vector<torch::Tensor> input_vec;
159
+ input_vec.emplace_back (data);
160
+ auto batch_output_tensor_vector = runner->run (input_vec);
161
+ batch_ivalue.emplace_back (torch::stack (batch_output_tensor_vector).to (*device));
80
162
} catch (std::runtime_error& e) {
81
163
TS_LOG (ERROR, e.what ());
82
164
} catch (const c10::Error& e) {
83
165
TS_LOGF (ERROR, " Failed to apply inference on input, c10 error:{}" , e.msg ());
84
166
}
167
+ return batch_ivalue;
85
168
}
86
169
87
170
void ResnetCppHandler::Postprocess (
88
171
c10::IValue &inputs,
89
172
std::pair<std::string &, std::map<uint8_t , std::string> &> &idx_to_req_id,
90
173
std::shared_ptr<torchserve::InferenceResponseBatch> &response_batch) {
91
- auto & data = inputs.toTensor ();
174
+ auto data = inputs.toTensorList ().get (0 );
175
+ auto ps = torch::softmax (data[0 ], 1 );
176
+ auto top5 = torch::topk (ps, 5 , 1 );
92
177
for (const auto &kv : idx_to_req_id.second ) {
93
178
try {
94
- auto out = data[kv.first ].unsqueeze (0 );
95
- auto y_hat = torch::argmax (out, 1 ).item <int >();
96
- auto predicted_idx = std::to_string (y_hat);
179
+ auto probs = std::get<0 >(top5)[kv.first ];
180
+ auto classes = std::get<1 >(top5)[kv.first ];
97
181
auto response = (*response_batch)[kv.second ];
98
-
99
182
response->SetResponse (200 , " data_type" ,
100
183
torchserve::PayloadType::kDATA_TYPE_STRING ,
101
- (*mapping_json_)[predicted_idx]. asString ( ));
184
+ MapClassToLabel (classes, probs ));
102
185
} catch (const std::runtime_error &e) {
103
186
TS_LOGF (ERROR, " Failed to load tensor for request id: {}, error: {}" ,
104
187
kv.second , e.what ());
0 commit comments