Skip to content

Commit 297c676

Browse files
committed
updated model-config
1 parent aac6bc4 commit 297c676

File tree

3 files changed

+12
-5
lines changed

3 files changed

+12
-5
lines changed

examples/large_models/inferentia2/llama2/continuous_batching/inf2-llama-2-continuous-batching.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,10 @@
6969
"source": [
7070
"# login in Hugginface hub\n",
7171
"!huggingface-cli login --token $HUGGINGFACE_TOKEN\n",
72-
"!python examples/large_models/utils/Download_model.py --model_path model --model_name meta-llama/Llama-2-13b-hf --use_auth_token True\n",
72+
"!python examples/large_models/utils/Download_model.py --model_path model --model_name meta-llama/Llama-2-70b-hf --use_auth_token True\n",
7373
"\n",
7474
"# Create TorchServe model artifacts\n",
75-
"!torch-model-archiver --model-name llama-2-70b --version 1.0 --handler ts/torch_handler/distributed/base_neuronx_continuous_batching_handler.py -r examples/large_models/inferentia2/llama2/requirements.txt --config-file examples/large_models/inferentia2/llama2/continuous_batching/model-config.yaml --archive-format no-archive\n",
75+
"!torch-model-archiver --model-name llama-2-70b --version 1.0 --handler ts/torch_handler/distributed/base_neuronx_continuous_batching_handler.py -r examples/large_models/inferentia2/llama2/continuous_batching/requirements.txt --config-file examples/large_models/inferentia2/llama2/continuous_batching/model-config.yaml --archive-format no-archive\n",
7676
"\n",
7777
"!mkdir -p model_store\n",
7878
"!mv llama-2-70b model_store\n",

examples/large_models/inferentia2/llama2/streamer/model-config.yaml

+8-3
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,15 @@ responseTimeout: 10800
55
batchSize: 16
66

77
handler:
8-
model_checkpoint_dir: "llama-2-13b-split"
8+
model_path: "model/models--meta-llama--Llama-2-70b-hf/snapshots/90052941a64de02075ca800b09fcea1bdaacb939"
9+
model_checkpoint_dir: "llama-2-70b-split"
10+
model_module_prefix: "transformers_neuronx"
11+
model_class_name: "llama.model.LlamaForSampling"
12+
tokenizer_class_name: "transformers.LlamaTokenizer"
913
amp: "bf16"
10-
tp_degree: 6
11-
max_length: 100
14+
tp_degree: 24
15+
max_length: 256
16+
max_new_tokens: 50
1217

1318
micro_batching:
1419
micro_batch_size: 4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
transformers==4.36.2
2+
sentencepiece==0.1.99

0 commit comments

Comments
 (0)