File tree 2 files changed +9
-4
lines changed
examples/large_models/vllm
2 files changed +9
-4
lines changed Original file line number Diff line number Diff line change 1
- vllm == 0.6.1.post2
1
+ vllm == 0.6.2
Original file line number Diff line number Diff line change 13
13
)
14
14
from vllm .entrypoints .openai .serving_chat import OpenAIServingChat
15
15
from vllm .entrypoints .openai .serving_completion import OpenAIServingCompletion
16
- from vllm .entrypoints .openai .serving_engine import LoRAModulePath
16
+ from vllm .entrypoints .openai .serving_engine import BaseModelPath , LoRAModulePath
17
17
18
18
from ts .handler_utils .utils import send_intermediate_predict_response
19
19
from ts .service import PredictionException
@@ -54,6 +54,11 @@ def initialize(self, ctx):
54
54
else :
55
55
served_model_names = [vllm_engine_config .model ]
56
56
57
+ base_model_paths = [
58
+ BaseModelPath (name = name , model_path = vllm_engine_config .model )
59
+ for name in served_model_names
60
+ ]
61
+
57
62
chat_template = ctx .model_yaml_config .get ("handler" , {}).get (
58
63
"chat_template" , None
59
64
)
@@ -64,7 +69,7 @@ def initialize(self, ctx):
64
69
self .completion_service = OpenAIServingCompletion (
65
70
self .vllm_engine ,
66
71
model_config ,
67
- served_model_names ,
72
+ base_model_paths ,
68
73
lora_modules = lora_modules ,
69
74
prompt_adapters = None ,
70
75
request_logger = None ,
@@ -73,7 +78,7 @@ def initialize(self, ctx):
73
78
self .chat_completion_service = OpenAIServingChat (
74
79
self .vllm_engine ,
75
80
model_config ,
76
- served_model_names ,
81
+ base_model_paths ,
77
82
"assistant" ,
78
83
lora_modules = lora_modules ,
79
84
prompt_adapters = None ,
You can’t perform that action at this time.
0 commit comments