Skip to content

Commit 159547e

Browse files
author
Ubuntu
committed
test dashboard
1 parent dd4de27 commit 159547e

File tree

3 files changed

+30
-83
lines changed

3 files changed

+30
-83
lines changed

.github/workflows/benchmark_nightly_lmi.yml

+1-22
Original file line numberDiff line numberDiff line change
@@ -56,25 +56,4 @@ jobs:
5656
- name: Clean up weights
5757
run: |
5858
rm -rf /home/ubuntu/Llama-2-7b-chat-hf
59-
- name: Save benchmark artifacts
60-
uses: actions/upload-artifact@v2
61-
with:
62-
name: nightly ${{ matrix.hardware }} artifact
63-
path: /tmp/ts_benchmark
64-
- name: Download benchmark artifacts for auto validation
65-
uses: dawidd6/action-download-artifact@v2
66-
with:
67-
workflow: ${{ github.event.workflow_run.workflow_id }}
68-
workflow_conclusion: success
69-
if_no_artifact_found: ignore
70-
path: /tmp/ts_artifacts
71-
name: ${{ matrix.hardware }}_benchmark_validation
72-
- name: Validate Benchmark result
73-
run: python benchmarks/validate_report.py --input-artifacts-dir /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
74-
- name: Update benchmark artifacts for auto validation
75-
run: python benchmarks/utils/update_artifacts.py --output /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
76-
- name: Upload the updated benchmark artifacts for auto validation
77-
uses: actions/upload-artifact@v2
78-
with:
79-
name: ${{ matrix.hardware }}_benchmark_validation
80-
path: /tmp/ts_artifacts
59+

benchmarks/benchmark_config_lmi.yaml

+7-8
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@ ts_version:
99
# a list of model configure yaml files defined in benchmarks/models_config
1010
# or a list of model configure yaml files with full path
1111
models:
12-
#- "llama-2-7b.yaml"
13-
#- "llama-2-13b.yaml"
14-
- "llama-2-70b.yaml"
12+
- "llama-2-7b.yaml"
13+
#- "llama-2-70b.yaml"
1514

1615
# benchmark on "cpu" or "gpu".
1716
# "cpu" is set if "hardware" is not specified
@@ -24,11 +23,11 @@ hardware: &hardware "gpu"
2423
# - keep the values order as the same as the command definition.
2524
# - set up the command before enabling `metrics_cmd`.
2625
# For example, aws client and AWS credentials need to be setup before trying this example.
27-
# metrics_cmd:
28-
# - "cmd": "aws cloudwatch put-metric-data"
29-
# - "--namespace": ["torchserve_benchmark_nightly_", *hardware]
30-
# - "--region": "us-east-2"
31-
# - "--metric-data": 'file:///tmp/benchmark/logs/stats_metrics.json'
26+
metrics_cmd:
27+
- "cmd": "aws cloudwatch put-metric-data"
28+
- "--namespace": ["torchserve_benchmark_nightly_lmi"]
29+
- "--region": "us-east-2"
30+
- "--metric-data": 'file:///tmp/benchmark/logs/stats_metrics.json'
3231

3332
# load report to remote storage or local different path if "report_cmd" is set.
3433
# the command line to load report to remote storage.
+22-53
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,8 @@
11
---
22
llama-2-7b:
3-
int8:
4-
benchmark_engine: "ab"
5-
url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int8.mar
6-
workers:
7-
- 1
8-
batch_delay: 100
9-
batch_size:
10-
- 1
11-
input: "./examples/large_models/gpt_fast/request.json"
12-
requests: 1000
13-
concurrency: 1
14-
backend_profiling: False
15-
exec_env: "local"
16-
processors:
17-
- "gpus": "all"
18-
stream: "false"
19-
int4:
20-
benchmark_engine: "ab"
21-
url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int4.mar
22-
workers:
23-
- 1
24-
batch_delay: 100
25-
batch_size:
26-
- 1
27-
input: "./examples/large_models/gpt_fast/request.json"
28-
requests: 1000
29-
concurrency: 1
30-
backend_profiling: False
31-
exec_env: "local"
32-
processors:
33-
- "gpus": "all"
34-
stream: "false"
35-
# base:
3+
# int8:
364
# benchmark_engine: "ab"
37-
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-base.mar
5+
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int8.mar
386
# workers:
397
# - 1
408
# batch_delay: 100
@@ -48,36 +16,37 @@ llama-2-7b:
4816
# processors:
4917
# - "gpus": "all"
5018
# stream: "false"
51-
# int8-tp:
19+
# int4:
5220
# benchmark_engine: "ab"
53-
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/llama-2-7b-int8-tp.mar
21+
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int4.mar
5422
# workers:
55-
# - 4
56-
# batch_delay: 100
57-
# batch_size:
5823
# - 1
59-
# input: "./examples/large_models/gpt_fast/request.json"
60-
# requests: 1000
61-
# concurrency: 4
62-
# backend_profiling: False
63-
# exec_env: "local"
64-
# processors:
65-
# - "gpus": "all"
66-
# stream: "false"
67-
# int4-tp:
68-
# benchmark_engine: "ab"
69-
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/llama-2-7b-int4-tp.mar
70-
# workers:
71-
# - 4
7224
# batch_delay: 100
7325
# batch_size:
7426
# - 1
7527
# input: "./examples/large_models/gpt_fast/request.json"
7628
# requests: 1000
77-
# concurrency: 4
29+
# concurrency: 1
7830
# backend_profiling: False
7931
# exec_env: "local"
8032
# processors:
8133
# - "gpus": "all"
8234
# stream: "false"
35+
base:
36+
benchmark_engine: "ab"
37+
url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-base.mar
38+
workers:
39+
- 1
40+
batch_delay: 100
41+
batch_size:
42+
- 1
43+
input: "./examples/large_models/gpt_fast/request.json"
44+
requests: 1000
45+
concurrency: 1
46+
backend_profiling: False
47+
exec_env: "local"
48+
processors:
49+
- "gpus": "all"
50+
stream: "false"
51+
8352

0 commit comments

Comments
 (0)