Skip to content

Commit 822521f

Browse files
committed
b200 test
1 parent 837622f commit 822521f

3 files changed

Lines changed: 11 additions & 5 deletions

File tree

.github/workflows/eval-gms8k.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@ jobs:
4949
uses: ./.github/workflows/eval-tmpl.yml
5050
secrets: inherit
5151
with:
52-
runner: h100-cw_0
53-
image: ${{ inputs.image || 'vllm/vllm-openai:v0.11.0' }}
52+
runner: b200-nvd_2
53+
image: ${{ inputs.image || 'nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1' }}
5454
model: ${{ inputs.model || 'openai/gpt-oss-120b' }}
55-
framework: vllm
55+
framework: trt
5656
precision: fp4
5757
exp-name: ${{ inputs.exp-name || 'gptoss_gsm8k_poc' }}
5858
tp: '4'
@@ -61,4 +61,4 @@ jobs:
6161
port: ${{ inputs.port || '8888' }}
6262
eval-task: gsm8k
6363
num-fewshot: ${{ inputs.num_fewshot || '5' }}
64-
limit: ${{ inputs.limit || '200' }}
64+
limit: ${{ inputs.limit || '200' }}

.github/workflows/eval-tmpl.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ jobs:
8181
steps:
8282
- name: Resource cleanup
8383
run: |
84-
sudo rm -rf /home/nvadmin/actions-runner/_work/InferenceMAX/InferenceMAX/eval_out/
8584
# Helper to avoid indefinite hangs on flaky tools (Docker/Slurm)
8685
safe_timeout() {
8786
if command -v timeout >/dev/null 2>&1; then

benchmarks/gptoss_fp4_b200_trt_slurm.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,10 @@ run_benchmark_serving \
9494
--max-concurrency "$CONC" \
9595
--result-filename "$RESULT_FILENAME" \
9696
--result-dir /workspace/
97+
98+
# After throughput, run evaluation only if RUN_EVAL is true
99+
if [ "${RUN_EVAL}" = "true" ]; then
100+
run_eval --framework lm-eval --port "$PORT" --concurrent-requests $(( $CONC * 2 ))
101+
append_lm_eval_summary
102+
fi
103+
set +x

0 commit comments

Comments
 (0)