Skip to content

Commit

Permalink
Include missing model configuration values in describe model API resp…
Browse files Browse the repository at this point in the history
…onse (#3122)

* Include missing model configuration values in describe model API response

* fix Java formatting

---------

Co-authored-by: Matthias Reso <13337103+mreso@users.noreply.github.com>
  • Loading branch information
namannandan and mreso committed May 9, 2024
1 parent 2c140b9 commit e332949
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,20 @@ public class DescribeModelResponse {
private int maxWorkers;
private int batchSize;
private int maxBatchDelay;
private int responseTimeout;
private long maxRetryTimeoutInSec;
private long clientTimeoutInMills;
private String parallelType;
private int parallelLevel;
private String deviceType;
private List<Integer> deviceIds;
private boolean continuousBatching;
private boolean useJobTicket;
private boolean useVenv;
private boolean stateful;
private long sequenceMaxIdleMSec;
private int maxNumSequence;
private int maxSequenceJobQueueSize;
private String status;
private boolean loadedAtStartup;

Expand Down Expand Up @@ -113,6 +127,118 @@ public void setMaxBatchDelay(int maxBatchDelay) {
this.maxBatchDelay = maxBatchDelay;
}

public int getResponseTimeout() {
return responseTimeout;
}

public void setResponseTimeout(int responseTimeout) {
this.responseTimeout = responseTimeout;
}

public long getMaxRetryTimeoutInSec() {
return maxRetryTimeoutInSec;
}

public void setMaxRetryTimeoutInSec(long maxRetryTimeoutInSec) {
this.maxRetryTimeoutInSec = maxRetryTimeoutInSec;
}

public long getClientTimeoutInMills() {
return clientTimeoutInMills;
}

public void setClientTimeoutInMills(long clientTimeoutInMills) {
this.clientTimeoutInMills = clientTimeoutInMills;
}

public String getParallelType() {
return parallelType;
}

public void setParallelType(String parallelType) {
this.parallelType = parallelType;
}

public int getParallelLevel() {
return parallelLevel;
}

public void setParallelLevel(int parallelLevel) {
this.parallelLevel = parallelLevel;
}

public String getDeviceType() {
return deviceType;
}

public void setDeviceType(String deviceType) {
this.deviceType = deviceType;
}

public List<Integer> getDeviceIds() {
return deviceIds;
}

public void setDeviceIds(List<Integer> deviceIds) {
this.deviceIds = deviceIds;
}

public boolean getContinuousBatching() {
return continuousBatching;
}

public void setContinuousBatching(boolean continuousBatching) {
this.continuousBatching = continuousBatching;
}

public boolean getUseJobTicket() {
return useJobTicket;
}

public void setUseJobTicket(boolean useJobTicket) {
this.useJobTicket = useJobTicket;
}

public boolean getUseVenv() {
return useVenv;
}

public void setUseVenv(boolean useVenv) {
this.useVenv = useVenv;
}

public boolean getStateful() {
return stateful;
}

public void setStateful(boolean stateful) {
this.stateful = stateful;
}

public long getSequenceMaxIdleMSec() {
return sequenceMaxIdleMSec;
}

public void setSequenceMaxIdleMSec(long sequenceMaxIdleMSec) {
this.sequenceMaxIdleMSec = sequenceMaxIdleMSec;
}

public int getMaxNumSequence() {
return maxNumSequence;
}

public void setMaxNumSequence(int maxNumSequence) {
this.maxNumSequence = maxNumSequence;
}

public int getMaxSequenceJobQueueSize() {
return maxSequenceJobQueueSize;
}

public void setMaxSequenceJobQueueSize(int maxSequenceJobQueueSize) {
this.maxSequenceJobQueueSize = maxSequenceJobQueueSize;
}

public String getStatus() {
return status;
}
Expand Down
14 changes: 14 additions & 0 deletions frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,20 @@ private static DescribeModelResponse createModelResponse(
Manifest manifest = model.getModelArchive().getManifest();
resp.setModelVersion(manifest.getModel().getModelVersion());
resp.setRuntime(manifest.getRuntime().getValue());
resp.setResponseTimeout(model.getResponseTimeout());
resp.setMaxRetryTimeoutInSec(model.getMaxRetryTimeoutInMill() / 1000);
resp.setClientTimeoutInMills(model.getClientTimeoutInMills());
resp.setParallelType(model.getParallelType().getParallelType());
resp.setParallelLevel(model.getParallelLevel());
resp.setDeviceType(model.getDeviceType().getDeviceType());
resp.setDeviceIds(model.getDeviceIds());
resp.setContinuousBatching(model.isContinuousBatching());
resp.setUseJobTicket(model.isUseJobTicket());
resp.setUseVenv(model.isUseVenv());
resp.setStateful(model.isStateful());
resp.setSequenceMaxIdleMSec(model.getSequenceMaxIdleMSec());
resp.setMaxNumSequence(model.getMaxNumSequence());
resp.setMaxSequenceJobQueueSize(model.getMaxSequenceJobQueueSize());

List<WorkerThread> workers = modelManager.getWorkers(model.getModelVersionName());
for (WorkerThread worker : workers) {
Expand Down

0 comments on commit e332949

Please sign in to comment.