set model_snapshot_path to None to prevent unbound local error (#3336)

johnathanchiu · web-flow · commit 9d1008795f17 · 2024-10-03T18:56:43.000Z
* set model_snapshot_path to None to prevent unbound local error

* address PR comments with pythonic usage, fix README

* small change

* revert formatting changes
diff --git a/README.md b/README.md
@@ -65,10 +65,10 @@ Refer to [torchserve docker](docker/README.md) for details.
 #### VLLM Engine
 ```bash
 # Make sure to install torchserve with pip or conda as described above and login with `huggingface-cli login`
-python -m ts.llm_launcher --model_id meta-llama/Meta-Llama-3.1-8B-Instruct --disable_token_auth
+python -m ts.llm_launcher --model_id meta-llama/Llama-3.2-3B-Instruct --disable_token_auth
 
 # Try it out
-curl -X POST -d '{"model":"meta-llama/Meta-Llama-3.1-8B-Instruct", "prompt":"Hello, my name is", "max_tokens": 200}' --header "Content-Type: application/json" "http://localhost:8080/predictions/model/1.0/v1/completions"
+curl -X POST -d '{"model":"meta-llama/Llama-3.2-3B-Instruct", "prompt":"Hello, my name is", "max_tokens": 200}' --header "Content-Type: application/json" "http://localhost:8080/predictions/model/1.0/v1/completions"
 ```
 
 #### TRT-LLM Engine
diff --git a/ts/llm_launcher.py b/ts/llm_launcher.py
@@ -168,8 +168,9 @@ def main(args):
 
     model_store_path = Path(args.model_store)
     model_store_path.mkdir(parents=True, exist_ok=True)
-    if args.engine == "trt_llm":
-        model_snapshot_path = download_model(args.model_id)
+    model_snapshot_path = (
+        download_model(args.model_id) if args.engine == "trt_llm" else None
+    )
 
     with create_mar_file(args, model_snapshot_path):
         if args.engine == "trt_llm":