Skip to content

Commit 9419344

Browse files
namannandanmreso
andauthoredMay 9, 2024
Add support for gRPC max connection age configuration (#3121)
* Add support for gRPC max connection age configuration * Add separate gRPC connection age configurations for Inference and Management endpoints * Fix configuration parsing and default value * Link docs for gRPC max connection age configuration * fix spellcheck --------- Co-authored-by: Matthias Reso <13337103+mreso@users.noreply.github.com>
1 parent e332949 commit 9419344

File tree

4 files changed

+58
-2
lines changed

4 files changed

+58
-2
lines changed
 

‎docs/configuration.md

+10-2
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ inference_address=https://127.0.0.1:8443
9393
inference_address=https://172.16.1.10:8080
9494
```
9595

96-
### Configure TorchServe gRPC listening addresses and ports
96+
### Configure TorchServe gRPC listening addresses, ports and max connection age
9797
The inference gRPC API is listening on port 7070, and the management gRPC API is listening on port 7071 on localhost by default.
9898

9999
To configure different addresses use following properties
@@ -106,7 +106,15 @@ To configure different ports use following properties
106106
* `grpc_inference_port`: Inference gRPC API binding port. Default: 7070
107107
* `grpc_management_port`: management gRPC API binding port. Default: 7071
108108

109-
Here are a couple of examples:
109+
To configure [max connection age](https://grpc.github.io/grpc-java/javadoc/io/grpc/netty/NettyServerBuilder.html#maxConnectionAge(long,java.util.concurrent.TimeUnit)) (milliseconds)
110+
111+
* `grpc_inference_max_connection_age_ms`: Inference gRPC max connection age. Default: Infinite
112+
* `grpc_management_max_connection_age_ms`: Management gRPC max connection age. Default: Infinite
113+
114+
To configure [max connection age grace](https://grpc.github.io/grpc-java/javadoc/io/grpc/netty/NettyServerBuilder.html#maxConnectionAgeGrace(long,java.util.concurrent.TimeUnit)) (milliseconds)
115+
116+
* `grpc_inference_max_connection_age_grace_ms`: Inference gRPC max connection age grace. Default: Infinite
117+
* `grpc_management_max_connection_age_grace_ms`: Management gRPC max connection age grace. Default: Infinite
110118

111119
### Enable SSL
112120

‎frontend/server/src/main/java/org/pytorch/serve/ModelServer.java

+7
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.util.ServiceLoader;
2828
import java.util.Set;
2929
import java.util.concurrent.ExecutionException;
30+
import java.util.concurrent.TimeUnit;
3031
import java.util.concurrent.atomic.AtomicBoolean;
3132
import org.apache.commons.cli.CommandLine;
3233
import org.apache.commons.cli.DefaultParser;
@@ -452,6 +453,12 @@ private Server startGRPCServer(ConnectorType connectorType) throws IOException {
452453
new InetSocketAddress(
453454
configManager.getGRPCAddress(connectorType),
454455
configManager.getGRPCPort(connectorType)))
456+
.maxConnectionAge(
457+
configManager.getGRPCMaxConnectionAge(connectorType),
458+
TimeUnit.MILLISECONDS)
459+
.maxConnectionAgeGrace(
460+
configManager.getGRPCMaxConnectionAgeGrace(connectorType),
461+
TimeUnit.MILLISECONDS)
455462
.maxInboundMessageSize(configManager.getMaxRequestSize())
456463
.addService(
457464
ServerInterceptors.intercept(

‎frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java

+40
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,14 @@ public final class ConfigManager {
105105
private static final String TS_GRPC_MANAGEMENT_ADDRESS = "grpc_management_address";
106106
private static final String TS_GRPC_INFERENCE_PORT = "grpc_inference_port";
107107
private static final String TS_GRPC_MANAGEMENT_PORT = "grpc_management_port";
108+
private static final String TS_GRPC_INFERENCE_MAX_CONNECTION_AGE_MS =
109+
"grpc_inference_max_connection_age_ms";
110+
private static final String TS_GRPC_MANAGEMENT_MAX_CONNECTION_AGE_MS =
111+
"grpc_management_max_connection_age_ms";
112+
private static final String TS_GRPC_INFERENCE_MAX_CONNECTION_AGE_GRACE_MS =
113+
"grpc_inference_max_connection_age_grace_ms";
114+
private static final String TS_GRPC_MANAGEMENT_MAX_CONNECTION_AGE_GRACE_MS =
115+
"grpc_management_max_connection_age_grace_ms";
108116
private static final String TS_ENABLE_GRPC_SSL = "enable_grpc_ssl";
109117
private static final String TS_INITIAL_WORKER_PORT = "initial_worker_port";
110118
private static final String TS_INITIAL_DISTRIBUTION_PORT = "initial_distribution_port";
@@ -384,6 +392,30 @@ public int getGRPCPort(ConnectorType connectorType) throws IllegalArgumentExcept
384392
return Integer.parseInt(port);
385393
}
386394

395+
public long getGRPCMaxConnectionAge(ConnectorType connectorType)
396+
throws IllegalArgumentException {
397+
if (connectorType == ConnectorType.MANAGEMENT_CONNECTOR) {
398+
return getLongProperty(TS_GRPC_MANAGEMENT_MAX_CONNECTION_AGE_MS, Long.MAX_VALUE);
399+
} else if (connectorType == ConnectorType.INFERENCE_CONNECTOR) {
400+
return getLongProperty(TS_GRPC_INFERENCE_MAX_CONNECTION_AGE_MS, Long.MAX_VALUE);
401+
} else {
402+
throw new IllegalArgumentException(
403+
"Connector type not supported by gRPC: " + connectorType);
404+
}
405+
}
406+
407+
public long getGRPCMaxConnectionAgeGrace(ConnectorType connectorType)
408+
throws IllegalArgumentException {
409+
if (connectorType == ConnectorType.MANAGEMENT_CONNECTOR) {
410+
return getLongProperty(TS_GRPC_MANAGEMENT_MAX_CONNECTION_AGE_GRACE_MS, Long.MAX_VALUE);
411+
} else if (connectorType == ConnectorType.INFERENCE_CONNECTOR) {
412+
return getLongProperty(TS_GRPC_INFERENCE_MAX_CONNECTION_AGE_GRACE_MS, Long.MAX_VALUE);
413+
} else {
414+
throw new IllegalArgumentException(
415+
"Connector type not supported by gRPC: " + connectorType);
416+
}
417+
}
418+
387419
public boolean isOpenInferenceProtocol() {
388420
String inferenceProtocol = System.getenv("TS_OPEN_INFERENCE_PROTOCOL");
389421
if (inferenceProtocol != null && inferenceProtocol != "") {
@@ -795,6 +827,14 @@ private int getIntProperty(String key, int def) {
795827
return Integer.parseInt(value);
796828
}
797829

830+
private long getLongProperty(String key, long def) {
831+
String value = prop.getProperty(key);
832+
if (value == null) {
833+
return def;
834+
}
835+
return Long.parseLong(value);
836+
}
837+
798838
public int getDefaultResponseTimeout() {
799839
return Integer.parseInt(prop.getProperty(TS_DEFAULT_RESPONSE_TIMEOUT, "120"));
800840
}

‎ts_scripts/spellcheck_conf/wordlist.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1237,3 +1237,4 @@ SamplingParams
12371237
lora
12381238
vllm
12391239
sql
1240+
TimeUnit

0 commit comments

Comments
 (0)