test dashboard

Ubuntu · Ubuntu · commit 159547ed1e2d · 2024-05-22T04:56:12.000Z
diff --git a/.github/workflows/benchmark_nightly_lmi.yml b/.github/workflows/benchmark_nightly_lmi.yml
@@ -56,25 +56,4 @@ jobs:
       - name: Clean up weights
         run: |
           rm -rf /home/ubuntu/Llama-2-7b-chat-hf
-      - name: Save benchmark artifacts
-        uses: actions/upload-artifact@v2
-        with:
-          name: nightly ${{ matrix.hardware }} artifact
-          path: /tmp/ts_benchmark
-      - name: Download benchmark artifacts for auto validation
-        uses: dawidd6/action-download-artifact@v2
-        with:
-          workflow: ${{ github.event.workflow_run.workflow_id }}
-          workflow_conclusion: success
-          if_no_artifact_found: ignore
-          path: /tmp/ts_artifacts
-          name: ${{ matrix.hardware }}_benchmark_validation
-      - name: Validate Benchmark result
-        run: python benchmarks/validate_report.py --input-artifacts-dir /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
-      - name: Update benchmark artifacts for auto validation
-        run: python benchmarks/utils/update_artifacts.py --output /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
-      - name: Upload the updated benchmark artifacts for auto validation
-        uses: actions/upload-artifact@v2
-        with:
-          name: ${{ matrix.hardware }}_benchmark_validation
-          path: /tmp/ts_artifacts
+      
diff --git a/benchmarks/benchmark_config_lmi.yaml b/benchmarks/benchmark_config_lmi.yaml
@@ -9,9 +9,8 @@ ts_version:
 # a list of model configure yaml files defined in benchmarks/models_config
 # or a list of model configure yaml files with full path
 models:
-  #- "llama-2-7b.yaml"
-  #- "llama-2-13b.yaml"
-  - "llama-2-70b.yaml"
+  - "llama-2-7b.yaml"
+  #- "llama-2-70b.yaml"
 
 # benchmark on "cpu" or "gpu".
 # "cpu" is set if "hardware" is not specified
@@ -24,11 +23,11 @@ hardware: &hardware "gpu"
 #    - keep the values order as the same as the command definition.
 #    - set up the command before enabling `metrics_cmd`.
 #      For example, aws client and AWS credentials need to be setup before trying this example.
-# metrics_cmd:
-#     - "cmd": "aws cloudwatch put-metric-data"
-#     - "--namespace": ["torchserve_benchmark_nightly_", *hardware]
-#     - "--region": "us-east-2"
-#     - "--metric-data": 'file:///tmp/benchmark/logs/stats_metrics.json'
+metrics_cmd:
+    - "cmd": "aws cloudwatch put-metric-data"
+    - "--namespace": ["torchserve_benchmark_nightly_lmi"]
+    - "--region": "us-east-2"
+    - "--metric-data": 'file:///tmp/benchmark/logs/stats_metrics.json'
 
 # load report to remote storage or local different path if "report_cmd" is set.
 # the command line to load report to remote storage.
diff --git a/benchmarks/models_config/llama-2-7b.yaml b/benchmarks/models_config/llama-2-7b.yaml
@@ -1,40 +1,8 @@
 ---
 llama-2-7b:
-  int8:
-    benchmark_engine: "ab"
-    url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int8.mar
-    workers:
-      - 1
-    batch_delay: 100
-    batch_size:
-      - 1
-    input: "./examples/large_models/gpt_fast/request.json"
-    requests: 1000
-    concurrency: 1
-    backend_profiling: False
-    exec_env: "local"
-    processors:
-      - "gpus": "all"
-    stream: "false"
-  int4:
-    benchmark_engine: "ab"
-    url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int4.mar
-    workers:
-      - 1
-    batch_delay: 100
-    batch_size:
-      - 1
-    input: "./examples/large_models/gpt_fast/request.json"
-    requests: 1000
-    concurrency: 1
-    backend_profiling: False
-    exec_env: "local"
-    processors:
-      - "gpus": "all"
-    stream: "false"
-  # base:
+  # int8:
   #   benchmark_engine: "ab"
-  #   url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-base.mar
+  #   url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int8.mar
   #   workers:
   #     - 1
   #   batch_delay: 100
@@ -48,36 +16,37 @@ llama-2-7b:
   #   processors:
   #     - "gpus": "all"
   #   stream: "false"
-  # int8-tp:
+  # int4:
   #   benchmark_engine: "ab"
-  #   url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/llama-2-7b-int8-tp.mar
+  #   url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int4.mar
   #   workers:
-  #     - 4
-  #   batch_delay: 100
-  #   batch_size:
   #     - 1
-  #   input: "./examples/large_models/gpt_fast/request.json"
-  #   requests: 1000
-  #   concurrency: 4
-  #   backend_profiling: False
-  #   exec_env: "local"
-  #   processors:
-  #     - "gpus": "all"
-  #   stream: "false"
-  # int4-tp:
-  #   benchmark_engine: "ab"
-  #   url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/llama-2-7b-int4-tp.mar
-  #   workers:
-  #     - 4
   #   batch_delay: 100
   #   batch_size:
   #     - 1
   #   input: "./examples/large_models/gpt_fast/request.json"
   #   requests: 1000
-  #   concurrency: 4
+  #   concurrency: 1
   #   backend_profiling: False
   #   exec_env: "local"
   #   processors:
   #     - "gpus": "all"
   #   stream: "false"
+  base:
+    benchmark_engine: "ab"
+    url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-base.mar
+    workers:
+      - 1
+    batch_delay: 100
+    batch_size:
+      - 1
+    input: "./examples/large_models/gpt_fast/request.json"
+    requests: 1000
+    concurrency: 1
+    backend_profiling: False
+    exec_env: "local"
+    processors:
+      - "gpus": "all"
+    stream: "false"
+