Skip to content

Commit 68c1a2d

Browse files
committed
Missing eval env var docker
1 parent 733d7ca commit 68c1a2d

8 files changed

Lines changed: 18 additions & 11 deletions

File tree

.github/workflows/benchmark-tmpl.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ jobs:
7474
benchmark:
7575
runs-on: ${{ inputs.runner }}
7676
timeout-minutes: 180
77-
name: '${{ inputs.exp-name }} ${{ inputs.runner }} ${{ inputs.framework }} ${{ inputs.run-eval }} ${{ inputs.precision }} tp=${{ inputs.tp }} ep=${{ inputs.ep }} dpa=${{ inputs.dp-attn }} conc=${{ inputs.conc }}'
77+
ame: '${{ inputs.exp-name }} ${{ inputs.runner }} ${{ inputs.framework }} ${{ inputs.precision }} ${{ inputs.run-eval && ''eval '' || '''' }}tp=${{ inputs.tp }} ep=${{ inputs.ep }} dpa=${{ inputs.dp-attn }} conc=${{ inputs.conc }}'
7878
steps:
7979
- name: Resource cleanup
8080
run: |

benchmarks/benchmark_lib.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,18 +128,18 @@ run_benchmark_serving() {
128128
_install_lm_eval_deps() {
129129
set +x
130130
python3 -m pip install -q --no-cache-dir "lm-eval[api]" || true
131-
# Temporary: workaround known harness issue by using main
131+
# Temporary: workaround issue by using main
132132
python3 -m pip install -q --no-cache-dir --no-deps \
133133
"git+https://github.com/EleutherAI/lm-evaluation-harness.git@main" || true
134134
}
135135

136136
# Patch lm-eval filters to be robust to empty strings via sitecustomize
137-
# Patch lm-eval filters to be robust to empty strings via sitecustomize
138-
_patch_lm_eval_filters() {
137+
_patch_lm_eval() {
139138
set +x
140139
local patch_dir
141140
patch_dir="$(mktemp -d)"
142141
cat > "$patch_dir/sitecustomize.py" <<'PY'
142+
# --- Patch LocalChatCompletion.parse_generations to handle empty content with reasoning_content ---
143143
import re, sys, unicodedata, json
144144
from lm_eval.filters import extraction as ex
145145
from lm_eval.models.openai_completions import LocalChatCompletion as _LCC
@@ -167,7 +167,7 @@ def _le_parse_generations(outputs, **kwargs):
167167
# Keep staticmethod semantics
168168
_LCC.parse_generations = staticmethod(_le_parse_generations)
169169
170-
# --- Patch TemplateAPI.apply_chat_template to avoid injecting "type": "text" ---
170+
# --- Patch TemplateAPI.apply_chat_template to avoid injecting "type": "text" for TRT ---
171171
try:
172172
from lm_eval.models import api_models as _api_models
173173
_TemplateAPI = _api_models.TemplateAPI
@@ -234,7 +234,7 @@ run_lm_eval() {
234234
done
235235

236236
_install_lm_eval_deps
237-
_patch_lm_eval_filters
237+
_patch_lm_eval
238238

239239
local openai_server_base="http://0.0.0.0:${port}"
240240
local openai_chat_base="${openai_server_base}/v1/chat/completions"

benchmarks/dsr1_fp8_h200_trt_slurm.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,10 @@ run_benchmark_serving \
8686
--max-concurrency "$CONC" \
8787
--result-filename "$RESULT_FILENAME" \
8888
--result-dir /workspace/
89+
90+
# After throughput, run evaluation only if RUN_EVAL is true
91+
if [ "${RUN_EVAL}" = "true" ]; then
92+
run_eval --framework lm-eval --port "$PORT" --concurrent-requests $(( $CONC * 2 ))
93+
append_lm_eval_summary
94+
fi
95+
set +x

runners/launch_b200-tg.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ docker run --rm -d --network host --name $server_name \
2424
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
2525
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e PORT=$PORT -e EP_SIZE \
2626
-e TORCH_CUDA_ARCH_LIST="10.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \
27-
-e RANDOM_RANGE_RATIO -e RESULT_FILENAME -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
27+
-e RANDOM_RANGE_RATIO -e RESULT_FILENAME -e RUN_EVAL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
2828
${GH_SUM_ENV} ${GH_SUM_MOUNT} \
2929
--entrypoint=/bin/bash \
3030
$(echo "$IMAGE" | sed 's/#/\//') \

runners/launch_h100-cr.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ docker run --rm --network=host --name=$server_name \
2020
--runtime=nvidia --gpus=all --ipc=host --privileged --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
2121
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
2222
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
23-
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e RESULT_FILENAME -e RANDOM_RANGE_RATIO -e PORT=$PORT \
23+
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e RUN_EVAL -e RESULT_FILENAME -e RANDOM_RANGE_RATIO -e PORT=$PORT \
2424
-e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e TORCH_CUDA_ARCH_LIST="9.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \
2525
${GH_SUM_ENV} ${GH_SUM_MOUNT} \
2626
--entrypoint=/bin/bash \

runners/launch_mi300x-amd.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ docker run --rm --ipc=host --shm-size=16g --network=host --name=$server_name \
2424
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
2525
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
2626
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT \
27-
-e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME \
27+
-e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME -e RUN_EVAL \
2828
${GH_SUM_ENV} ${GH_SUM_MOUNT} \
2929
--entrypoint=/bin/bash \
3030
$IMAGE \

runners/launch_mi300x-cr.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ docker run --rm --ipc=host --shm-size=16g --network=host --name=$server_name \
2424
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
2525
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
2626
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT \
27-
-e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME \
27+
-e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME -e RUN_EVAL \
2828
${GH_SUM_ENV} ${GH_SUM_MOUNT} \
2929
--entrypoint=/bin/bash \
3030
$IMAGE \

runners/launch_mi355x-amd.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ docker run --rm --ipc=host --shm-size=16g --network=host --name=$server_name \
4646
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
4747
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
4848
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT -e NUM_PROMPTS \
49-
-e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME \
49+
-e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME -e RUN_EVAL \
5050
${GH_SUM_ENV} ${GH_SUM_MOUNT} \
5151
--entrypoint=/bin/bash \
5252
$IMAGE \

0 commit comments

Comments
 (0)