fixed config

lxning · lxning · commit 91db56951950 · 2024-03-22T15:13:17.000-07:00
diff --git a/ts/torch_handler/distributed/base_neuronx_microbatching_handler.py b/ts/torch_handler/distributed/base_neuronx_microbatching_handler.py
@@ -151,7 +151,9 @@ def preprocess(self, requests):
             )
 
         # Pad input to match compiled model batch size
-        inputs.extend([""] * (self.handle.micro_batch_size - len(inputs)))
+        inputs.extend(
+            [""] * (self.micro_batching_handle.micro_batch_size - len(inputs))
+        )
 
         return self.tokenizer(inputs, return_tensors="pt", padding=True)
 
@@ -165,7 +167,7 @@ def inference(self, inputs):
         thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
         thread.start()
 
-        micro_batch_idx = self.handle.get_micro_batch_idx()
+        micro_batch_idx = self.micro_batching_handle.get_micro_batch_idx()
         micro_batch_req_id_map = self.get_micro_batch_req_id_map(micro_batch_idx)
         for new_text in self.output_streamer:
             send_intermediate_predict_response(