Skip to content

Commit c4a5afa

Browse files
authored
Update vllm to 0.6.2 (#3343)
1 parent 1a3b18b commit c4a5afa

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed
+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
vllm==0.6.1.post2
1+
vllm==0.6.2

ts/torch_handler/vllm_handler.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
)
1414
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
1515
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
16-
from vllm.entrypoints.openai.serving_engine import LoRAModulePath
16+
from vllm.entrypoints.openai.serving_engine import BaseModelPath, LoRAModulePath
1717

1818
from ts.handler_utils.utils import send_intermediate_predict_response
1919
from ts.service import PredictionException
@@ -54,6 +54,11 @@ def initialize(self, ctx):
5454
else:
5555
served_model_names = [vllm_engine_config.model]
5656

57+
base_model_paths = [
58+
BaseModelPath(name=name, model_path=vllm_engine_config.model)
59+
for name in served_model_names
60+
]
61+
5762
chat_template = ctx.model_yaml_config.get("handler", {}).get(
5863
"chat_template", None
5964
)
@@ -64,7 +69,7 @@ def initialize(self, ctx):
6469
self.completion_service = OpenAIServingCompletion(
6570
self.vllm_engine,
6671
model_config,
67-
served_model_names,
72+
base_model_paths,
6873
lora_modules=lora_modules,
6974
prompt_adapters=None,
7075
request_logger=None,
@@ -73,7 +78,7 @@ def initialize(self, ctx):
7378
self.chat_completion_service = OpenAIServingChat(
7479
self.vllm_engine,
7580
model_config,
76-
served_model_names,
81+
base_model_paths,
7782
"assistant",
7883
lora_modules=lora_modules,
7984
prompt_adapters=None,

0 commit comments

Comments
 (0)