Skip to content

Commit 1ff1b3b

Browse files
authored
support system_metrics_cmd in config.properties (#3000)
* support system_metrics_cmd in config.properties * address security concern * add log * update readme
1 parent 424b66d commit 1ff1b3b

File tree

3 files changed

+24
-8
lines changed

3 files changed

+24
-8
lines changed

docs/configuration.md

+1
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ e.g. : To allow base URLs `https://s3.amazonaws.com/` and `https://torchserve.py
297297
* For security reason, `use_env_allowed_urls=true` is required in config.properties to read `allowed_urls` from environment variable.
298298
* `workflow_store` : Path of workflow store directory. Defaults to model store directory.
299299
* `disable_system_metrics` : Disable collection of system metrics when set to "true". Default value is "false".
300+
* `system_metrics_cmd`: The customized system metrics python script name with arguments. For example:`ts/metrics/metric_collector.py --gpu 0`. Default: empty which means TorchServe collects system metrics via "ts/metrics/metric_collector.py --gpu $CUDA_VISIBLE_DEVICES".
300301

301302
**NOTE**
302303

frontend/server/src/main/java/org/pytorch/serve/metrics/MetricCollector.java

+15-7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import java.io.OutputStream;
88
import java.nio.charset.StandardCharsets;
99
import java.util.ArrayList;
10+
import java.util.Arrays;
1011
import java.util.List;
1112
import java.util.Map;
1213
import org.apache.commons.io.IOUtils;
@@ -32,16 +33,23 @@ public MetricCollector(ConfigManager configManager) {
3233
public void run() {
3334
try {
3435
// Collect System level Metrics
35-
String[] args = new String[4];
36-
args[0] = configManager.getPythonExecutable();
37-
args[1] = "ts/metrics/metric_collector.py";
38-
args[2] = "--gpu";
39-
args[3] = String.valueOf(ConfigManager.getInstance().getNumberOfGpu());
36+
List<String> args = new ArrayList<>();
37+
args.add(configManager.getPythonExecutable());
38+
String systemMetricsCmd = configManager.getSystemMetricsCmd();
39+
if (systemMetricsCmd.isEmpty()) {
40+
systemMetricsCmd =
41+
String.format(
42+
"%s --gpu %s",
43+
"ts/metrics/metric_collector.py",
44+
String.valueOf(configManager.getNumberOfGpu()));
45+
}
46+
args.addAll(Arrays.asList(systemMetricsCmd.split("\\s+")));
4047
File workingDir = new File(configManager.getModelServerHome());
4148

4249
String[] envp = EnvironmentUtils.getEnvString(workingDir.getAbsolutePath(), null, null);
43-
44-
final Process p = Runtime.getRuntime().exec(args, envp, workingDir); // NOPMD
50+
final Process p =
51+
Runtime.getRuntime()
52+
.exec(args.toArray(new String[0]), envp, workingDir); // NOPMD
4553
ModelManager modelManager = ModelManager.getInstance();
4654
Map<Integer, WorkerThread> workerMap = modelManager.getWorkers();
4755
try (OutputStream os = p.getOutputStream()) {

frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ public final class ConfigManager {
117117
private static final String MODEL_SNAPSHOT = "model_snapshot";
118118
private static final String MODEL_CONFIG = "models";
119119
private static final String VERSION = "version";
120+
private static final String SYSTEM_METRICS_CMD = "system_metrics_cmd";
120121

121122
// Configuration default values
122123
private static final String DEFAULT_TS_ALLOWED_URLS = "file://.*|http(s)?://.*";
@@ -559,6 +560,10 @@ public String getCertificateFile() {
559560
return prop.getProperty(TS_CERTIFICATE_FILE);
560561
}
561562

563+
public String getSystemMetricsCmd() {
564+
return prop.getProperty(SYSTEM_METRICS_CMD, "");
565+
}
566+
562567
public SslContext getSslContext() throws IOException, GeneralSecurityException {
563568
List<String> supportedCiphers =
564569
Arrays.asList(
@@ -734,7 +739,9 @@ public String dumpConfigurations() {
734739
+ "\nCPP log config: "
735740
+ (getTsCppLogConfig() == null ? "N/A" : getTsCppLogConfig())
736741
+ "\nModel config: "
737-
+ prop.getProperty(MODEL_CONFIG, "N/A");
742+
+ prop.getProperty(MODEL_CONFIG, "N/A")
743+
+ "\nSystem metrics command: "
744+
+ (getSystemMetricsCmd().isEmpty() ? "default" : getSystemMetricsCmd());
738745
}
739746

740747
public boolean useNativeIo() {

0 commit comments

Comments
 (0)