Skip to content

Commit e332949

Browse files
namannandanmreso
andauthored
Include missing model configuration values in describe model API response (#3122)
* Include missing model configuration values in describe model API response * fix Java formatting --------- Co-authored-by: Matthias Reso <[email protected]>
1 parent 2c140b9 commit e332949

File tree

2 files changed

+140
-0
lines changed

2 files changed

+140
-0
lines changed

frontend/server/src/main/java/org/pytorch/serve/http/messages/DescribeModelResponse.java

+126
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,20 @@ public class DescribeModelResponse {
2121
private int maxWorkers;
2222
private int batchSize;
2323
private int maxBatchDelay;
24+
private int responseTimeout;
25+
private long maxRetryTimeoutInSec;
26+
private long clientTimeoutInMills;
27+
private String parallelType;
28+
private int parallelLevel;
29+
private String deviceType;
30+
private List<Integer> deviceIds;
31+
private boolean continuousBatching;
32+
private boolean useJobTicket;
33+
private boolean useVenv;
34+
private boolean stateful;
35+
private long sequenceMaxIdleMSec;
36+
private int maxNumSequence;
37+
private int maxSequenceJobQueueSize;
2438
private String status;
2539
private boolean loadedAtStartup;
2640

@@ -113,6 +127,118 @@ public void setMaxBatchDelay(int maxBatchDelay) {
113127
this.maxBatchDelay = maxBatchDelay;
114128
}
115129

130+
public int getResponseTimeout() {
131+
return responseTimeout;
132+
}
133+
134+
public void setResponseTimeout(int responseTimeout) {
135+
this.responseTimeout = responseTimeout;
136+
}
137+
138+
public long getMaxRetryTimeoutInSec() {
139+
return maxRetryTimeoutInSec;
140+
}
141+
142+
public void setMaxRetryTimeoutInSec(long maxRetryTimeoutInSec) {
143+
this.maxRetryTimeoutInSec = maxRetryTimeoutInSec;
144+
}
145+
146+
public long getClientTimeoutInMills() {
147+
return clientTimeoutInMills;
148+
}
149+
150+
public void setClientTimeoutInMills(long clientTimeoutInMills) {
151+
this.clientTimeoutInMills = clientTimeoutInMills;
152+
}
153+
154+
public String getParallelType() {
155+
return parallelType;
156+
}
157+
158+
public void setParallelType(String parallelType) {
159+
this.parallelType = parallelType;
160+
}
161+
162+
public int getParallelLevel() {
163+
return parallelLevel;
164+
}
165+
166+
public void setParallelLevel(int parallelLevel) {
167+
this.parallelLevel = parallelLevel;
168+
}
169+
170+
public String getDeviceType() {
171+
return deviceType;
172+
}
173+
174+
public void setDeviceType(String deviceType) {
175+
this.deviceType = deviceType;
176+
}
177+
178+
public List<Integer> getDeviceIds() {
179+
return deviceIds;
180+
}
181+
182+
public void setDeviceIds(List<Integer> deviceIds) {
183+
this.deviceIds = deviceIds;
184+
}
185+
186+
public boolean getContinuousBatching() {
187+
return continuousBatching;
188+
}
189+
190+
public void setContinuousBatching(boolean continuousBatching) {
191+
this.continuousBatching = continuousBatching;
192+
}
193+
194+
public boolean getUseJobTicket() {
195+
return useJobTicket;
196+
}
197+
198+
public void setUseJobTicket(boolean useJobTicket) {
199+
this.useJobTicket = useJobTicket;
200+
}
201+
202+
public boolean getUseVenv() {
203+
return useVenv;
204+
}
205+
206+
public void setUseVenv(boolean useVenv) {
207+
this.useVenv = useVenv;
208+
}
209+
210+
public boolean getStateful() {
211+
return stateful;
212+
}
213+
214+
public void setStateful(boolean stateful) {
215+
this.stateful = stateful;
216+
}
217+
218+
public long getSequenceMaxIdleMSec() {
219+
return sequenceMaxIdleMSec;
220+
}
221+
222+
public void setSequenceMaxIdleMSec(long sequenceMaxIdleMSec) {
223+
this.sequenceMaxIdleMSec = sequenceMaxIdleMSec;
224+
}
225+
226+
public int getMaxNumSequence() {
227+
return maxNumSequence;
228+
}
229+
230+
public void setMaxNumSequence(int maxNumSequence) {
231+
this.maxNumSequence = maxNumSequence;
232+
}
233+
234+
public int getMaxSequenceJobQueueSize() {
235+
return maxSequenceJobQueueSize;
236+
}
237+
238+
public void setMaxSequenceJobQueueSize(int maxSequenceJobQueueSize) {
239+
this.maxSequenceJobQueueSize = maxSequenceJobQueueSize;
240+
}
241+
116242
public String getStatus() {
117243
return status;
118244
}

frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java

+14
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,20 @@ private static DescribeModelResponse createModelResponse(
402402
Manifest manifest = model.getModelArchive().getManifest();
403403
resp.setModelVersion(manifest.getModel().getModelVersion());
404404
resp.setRuntime(manifest.getRuntime().getValue());
405+
resp.setResponseTimeout(model.getResponseTimeout());
406+
resp.setMaxRetryTimeoutInSec(model.getMaxRetryTimeoutInMill() / 1000);
407+
resp.setClientTimeoutInMills(model.getClientTimeoutInMills());
408+
resp.setParallelType(model.getParallelType().getParallelType());
409+
resp.setParallelLevel(model.getParallelLevel());
410+
resp.setDeviceType(model.getDeviceType().getDeviceType());
411+
resp.setDeviceIds(model.getDeviceIds());
412+
resp.setContinuousBatching(model.isContinuousBatching());
413+
resp.setUseJobTicket(model.isUseJobTicket());
414+
resp.setUseVenv(model.isUseVenv());
415+
resp.setStateful(model.isStateful());
416+
resp.setSequenceMaxIdleMSec(model.getSequenceMaxIdleMSec());
417+
resp.setMaxNumSequence(model.getMaxNumSequence());
418+
resp.setMaxSequenceJobQueueSize(model.getMaxSequenceJobQueueSize());
405419

406420
List<WorkerThread> workers = modelManager.getWorkers(model.getModelVersionName());
407421
for (WorkerThread worker : workers) {

0 commit comments

Comments
 (0)