|
| 1 | +import shutil |
| 2 | +import sys |
| 3 | +import threading |
| 4 | +from pathlib import Path |
| 5 | + |
| 6 | +import pytest |
| 7 | +import requests |
| 8 | +import test_utils |
| 9 | +from model_archiver.model_archiver_config import ModelArchiverConfig |
| 10 | + |
| 11 | +CURR_FILE_PATH = Path(__file__).parent |
| 12 | +STATEFUL_PATH = CURR_FILE_PATH.parents[1] / "examples" / "stateful" |
| 13 | +CONFIG_PROPERTIES_PATH = CURR_FILE_PATH.parents[1] / "test" / "config_ts.properties" |
| 14 | + |
| 15 | +YAML_CONFIG = f""" |
| 16 | +# TorchServe frontend parameters |
| 17 | +minWorkers: 2 |
| 18 | +maxWorkers: 2 |
| 19 | +batchSize: 4 |
| 20 | +maxNumSequence: 4 |
| 21 | +sequenceMaxIdleMSec: 5000 |
| 22 | +maxSequenceJobQueueSize: 10 |
| 23 | +sequenceBatching: true |
| 24 | +
|
| 25 | +handler: |
| 26 | + cache: |
| 27 | + capacity: 4 |
| 28 | +""" |
| 29 | + |
| 30 | +PROMPTS = [ |
| 31 | + { |
| 32 | + "prompt": "A robot may not injure a human being", |
| 33 | + "max_new_tokens": 50, |
| 34 | + "temperature": 0.8, |
| 35 | + "logprobs": 1, |
| 36 | + "prompt_logprobs": 1, |
| 37 | + "max_tokens": 128, |
| 38 | + "adapter": "adapter_1", |
| 39 | + }, |
| 40 | +] |
| 41 | + |
| 42 | + |
| 43 | +@pytest.fixture |
| 44 | +def add_paths(): |
| 45 | + sys.path.append(STATEFUL_PATH.as_posix()) |
| 46 | + yield |
| 47 | + sys.path.pop() |
| 48 | + |
| 49 | + |
| 50 | +@pytest.fixture(scope="module") |
| 51 | +def model_name(): |
| 52 | + yield "stateful" |
| 53 | + |
| 54 | + |
| 55 | +@pytest.fixture(scope="module") |
| 56 | +def work_dir(tmp_path_factory, model_name): |
| 57 | + return tmp_path_factory.mktemp(model_name) |
| 58 | + |
| 59 | + |
| 60 | +@pytest.fixture(scope="module", name="mar_file_path") |
| 61 | +def create_mar_file(work_dir, model_archiver, model_name, request): |
| 62 | + mar_file_path = Path(work_dir).joinpath(model_name) |
| 63 | + |
| 64 | + model_config_yaml = Path(work_dir) / "model-config.yaml" |
| 65 | + model_config_yaml.write_text(YAML_CONFIG) |
| 66 | + |
| 67 | + config = ModelArchiverConfig( |
| 68 | + model_name=model_name, |
| 69 | + version="1.0", |
| 70 | + handler=(STATEFUL_PATH / "stateful_handler.py").as_posix(), |
| 71 | + serialized_file=(STATEFUL_PATH / "model_cnn.pt").as_posix(), |
| 72 | + model_file=(STATEFUL_PATH / "model.py").as_posix(), |
| 73 | + export_path=work_dir, |
| 74 | + requirements_file=(STATEFUL_PATH / "requirements.txt").as_posix(), |
| 75 | + runtime="python", |
| 76 | + force=False, |
| 77 | + config_file=model_config_yaml.as_posix(), |
| 78 | + archive_format="no-archive", |
| 79 | + ) |
| 80 | + |
| 81 | + model_archiver.generate_model_archive(config) |
| 82 | + |
| 83 | + assert mar_file_path.exists() |
| 84 | + |
| 85 | + yield mar_file_path.as_posix() |
| 86 | + |
| 87 | + # Clean up files |
| 88 | + shutil.rmtree(mar_file_path) |
| 89 | + |
| 90 | + |
| 91 | +def test_stateful_mar(mar_file_path, model_store): |
| 92 | + """ |
| 93 | + Register the model in torchserve |
| 94 | + """ |
| 95 | + |
| 96 | + file_name = Path(mar_file_path).name |
| 97 | + |
| 98 | + model_name = Path(file_name).stem |
| 99 | + |
| 100 | + shutil.copytree(mar_file_path, Path(model_store) / model_name) |
| 101 | + |
| 102 | + params = ( |
| 103 | + ("model_name", model_name), |
| 104 | + ("url", Path(model_store) / model_name), |
| 105 | + ("initial_workers", "2"), |
| 106 | + ("synchronous", "true"), |
| 107 | + ) |
| 108 | + |
| 109 | + test_utils.start_torchserve( |
| 110 | + model_store=model_store, snapshot_file=CONFIG_PROPERTIES_PATH, gen_mar=False |
| 111 | + ) |
| 112 | + |
| 113 | + try: |
| 114 | + test_utils.reg_resp = test_utils.register_model_with_params(params) |
| 115 | + |
| 116 | + t0 = threading.Thread( |
| 117 | + target=__infer_stateful, |
| 118 | + args=( |
| 119 | + model_name, |
| 120 | + "seq_0", |
| 121 | + "1 4 9 16 25", |
| 122 | + ), |
| 123 | + ) |
| 124 | + t1 = threading.Thread( |
| 125 | + target=__infer_stateful, |
| 126 | + args=( |
| 127 | + model_name, |
| 128 | + "seq_1", |
| 129 | + "2 6 12 20 30", |
| 130 | + ), |
| 131 | + ) |
| 132 | + |
| 133 | + t0.start() |
| 134 | + t1.start() |
| 135 | + |
| 136 | + t0.join() |
| 137 | + t1.join() |
| 138 | + finally: |
| 139 | + test_utils.unregister_model(model_name) |
| 140 | + |
| 141 | + # Clean up files |
| 142 | + shutil.rmtree(Path(model_store) / model_name) |
| 143 | + |
| 144 | + |
| 145 | +def __infer_stateful(model_name, sequence_id, expected): |
| 146 | + headers = { |
| 147 | + "ts_request_sequence_id": sequence_id, |
| 148 | + } |
| 149 | + prediction = [] |
| 150 | + for idx in range(5): |
| 151 | + if sequence_id == "seq_0": |
| 152 | + idx = 2 * idx |
| 153 | + elif sequence_id == "seq_1": |
| 154 | + idx = 2 * idx + 1 |
| 155 | + response = requests.post( |
| 156 | + url=f"http://localhost:8080/predictions/{model_name}", |
| 157 | + headers=headers, |
| 158 | + data=str(idx + 1).encode(), |
| 159 | + ) |
| 160 | + prediction.append(response.text) |
| 161 | + |
| 162 | + assert str(" ".join(prediction)) == expected |
0 commit comments