File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -49,10 +49,10 @@ jobs:
4949 uses : ./.github/workflows/eval-tmpl.yml
5050 secrets : inherit
5151 with :
52- runner : h100-cw_0
53- image : ${{ inputs.image || 'vllm/vllm-openai:v0.11.0 ' }}
52+ runner : b200-nvd_2
53+ image : ${{ inputs.image || 'nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1 ' }}
5454 model : ${{ inputs.model || 'openai/gpt-oss-120b' }}
55- framework : vllm
55+ framework : trt
5656 precision : fp4
5757 exp-name : ${{ inputs.exp-name || 'gptoss_gsm8k_poc' }}
5858 tp : ' 4'
6161 port : ${{ inputs.port || '8888' }}
6262 eval-task : gsm8k
6363 num-fewshot : ${{ inputs.num_fewshot || '5' }}
64- limit : ${{ inputs.limit || '200' }}
64+ limit : ${{ inputs.limit || '200' }}
Original file line number Diff line number Diff line change 8181 steps :
8282 - name : Resource cleanup
8383 run : |
84- sudo rm -rf /home/nvadmin/actions-runner/_work/InferenceMAX/InferenceMAX/eval_out/
8584 # Helper to avoid indefinite hangs on flaky tools (Docker/Slurm)
8685 safe_timeout() {
8786 if command -v timeout >/dev/null 2>&1; then
Original file line number Diff line number Diff line change @@ -94,3 +94,10 @@ run_benchmark_serving \
9494 --max-concurrency " $CONC " \
9595 --result-filename " $RESULT_FILENAME " \
9696 --result-dir /workspace/
97+
98+ # After throughput, run evaluation only if RUN_EVAL is true
99+ if [ " ${RUN_EVAL} " = " true" ]; then
100+ run_eval --framework lm-eval --port " $PORT " --concurrent-requests $(( $CONC * 2 ))
101+ append_lm_eval_summary
102+ fi
103+ set +x
You can’t perform that action at this time.
0 commit comments