Include missing model configuration values in describe model API response (#3122)

namannandan · mreso · web-flow · commit e332949c697f · 2024-05-09T18:43:15.000Z
* Include missing model configuration values in describe model API response

* fix Java formatting

---------

Co-authored-by: Matthias Reso &lt;13337103+mreso@users.noreply.github.com&gt;
diff --git a/frontend/server/src/main/java/org/pytorch/serve/http/messages/DescribeModelResponse.java b/frontend/server/src/main/java/org/pytorch/serve/http/messages/DescribeModelResponse.java
@@ -21,6 +21,20 @@ public class DescribeModelResponse {
     private int maxWorkers;
     private int batchSize;
     private int maxBatchDelay;
+    private int responseTimeout;
+    private long maxRetryTimeoutInSec;
+    private long clientTimeoutInMills;
+    private String parallelType;
+    private int parallelLevel;
+    private String deviceType;
+    private List<Integer> deviceIds;
+    private boolean continuousBatching;
+    private boolean useJobTicket;
+    private boolean useVenv;
+    private boolean stateful;
+    private long sequenceMaxIdleMSec;
+    private int maxNumSequence;
+    private int maxSequenceJobQueueSize;
     private String status;
     private boolean loadedAtStartup;
 
@@ -113,6 +127,118 @@ public void setMaxBatchDelay(int maxBatchDelay) {
         this.maxBatchDelay = maxBatchDelay;
     }
 
+    public int getResponseTimeout() {
+        return responseTimeout;
+    }
+
+    public void setResponseTimeout(int responseTimeout) {
+        this.responseTimeout = responseTimeout;
+    }
+
+    public long getMaxRetryTimeoutInSec() {
+        return maxRetryTimeoutInSec;
+    }
+
+    public void setMaxRetryTimeoutInSec(long maxRetryTimeoutInSec) {
+        this.maxRetryTimeoutInSec = maxRetryTimeoutInSec;
+    }
+
+    public long getClientTimeoutInMills() {
+        return clientTimeoutInMills;
+    }
+
+    public void setClientTimeoutInMills(long clientTimeoutInMills) {
+        this.clientTimeoutInMills = clientTimeoutInMills;
+    }
+
+    public String getParallelType() {
+        return parallelType;
+    }
+
+    public void setParallelType(String parallelType) {
+        this.parallelType = parallelType;
+    }
+
+    public int getParallelLevel() {
+        return parallelLevel;
+    }
+
+    public void setParallelLevel(int parallelLevel) {
+        this.parallelLevel = parallelLevel;
+    }
+
+    public String getDeviceType() {
+        return deviceType;
+    }
+
+    public void setDeviceType(String deviceType) {
+        this.deviceType = deviceType;
+    }
+
+    public List<Integer> getDeviceIds() {
+        return deviceIds;
+    }
+
+    public void setDeviceIds(List<Integer> deviceIds) {
+        this.deviceIds = deviceIds;
+    }
+
+    public boolean getContinuousBatching() {
+        return continuousBatching;
+    }
+
+    public void setContinuousBatching(boolean continuousBatching) {
+        this.continuousBatching = continuousBatching;
+    }
+
+    public boolean getUseJobTicket() {
+        return useJobTicket;
+    }
+
+    public void setUseJobTicket(boolean useJobTicket) {
+        this.useJobTicket = useJobTicket;
+    }
+
+    public boolean getUseVenv() {
+        return useVenv;
+    }
+
+    public void setUseVenv(boolean useVenv) {
+        this.useVenv = useVenv;
+    }
+
+    public boolean getStateful() {
+        return stateful;
+    }
+
+    public void setStateful(boolean stateful) {
+        this.stateful = stateful;
+    }
+
+    public long getSequenceMaxIdleMSec() {
+        return sequenceMaxIdleMSec;
+    }
+
+    public void setSequenceMaxIdleMSec(long sequenceMaxIdleMSec) {
+        this.sequenceMaxIdleMSec = sequenceMaxIdleMSec;
+    }
+
+    public int getMaxNumSequence() {
+        return maxNumSequence;
+    }
+
+    public void setMaxNumSequence(int maxNumSequence) {
+        this.maxNumSequence = maxNumSequence;
+    }
+
+    public int getMaxSequenceJobQueueSize() {
+        return maxSequenceJobQueueSize;
+    }
+
+    public void setMaxSequenceJobQueueSize(int maxSequenceJobQueueSize) {
+        this.maxSequenceJobQueueSize = maxSequenceJobQueueSize;
+    }
+
     public String getStatus() {
         return status;
     }
diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java b/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java
@@ -402,6 +402,20 @@ private static DescribeModelResponse createModelResponse(
         Manifest manifest = model.getModelArchive().getManifest();
         resp.setModelVersion(manifest.getModel().getModelVersion());
         resp.setRuntime(manifest.getRuntime().getValue());
+        resp.setResponseTimeout(model.getResponseTimeout());
+        resp.setMaxRetryTimeoutInSec(model.getMaxRetryTimeoutInMill() / 1000);
+        resp.setClientTimeoutInMills(model.getClientTimeoutInMills());
+        resp.setParallelType(model.getParallelType().getParallelType());
+        resp.setParallelLevel(model.getParallelLevel());
+        resp.setDeviceType(model.getDeviceType().getDeviceType());
+        resp.setDeviceIds(model.getDeviceIds());
+        resp.setContinuousBatching(model.isContinuousBatching());
+        resp.setUseJobTicket(model.isUseJobTicket());
+        resp.setUseVenv(model.isUseVenv());
+        resp.setStateful(model.isStateful());
+        resp.setSequenceMaxIdleMSec(model.getSequenceMaxIdleMSec());
+        resp.setMaxNumSequence(model.getMaxNumSequence());
+        resp.setMaxSequenceJobQueueSize(model.getMaxSequenceJobQueueSize());
 
         List<WorkerThread> workers = modelManager.getWorkers(model.getModelVersionName());
         for (WorkerThread worker : workers) {