@@ -128,18 +128,18 @@ run_benchmark_serving() {
128128_install_lm_eval_deps () {
129129 set +x
130130 python3 -m pip install -q --no-cache-dir " lm-eval[api]" || true
131- # Temporary: workaround known harness issue by using main
131+ # Temporary: workaround issue by using main
132132 python3 -m pip install -q --no-cache-dir --no-deps \
133133 " git+https://github.com/EleutherAI/lm-evaluation-harness.git@main" || true
134134}
135135
136136# Patch lm-eval filters to be robust to empty strings via sitecustomize
137- # Patch lm-eval filters to be robust to empty strings via sitecustomize
138- _patch_lm_eval_filters () {
137+ _patch_lm_eval () {
139138 set +x
140139 local patch_dir
141140 patch_dir=" $( mktemp -d) "
142141 cat > " $patch_dir /sitecustomize.py" << 'PY '
142+ # --- Patch LocalChatCompletion.parse_generations to handle empty content with reasoning_content ---
143143import re, sys, unicodedata, json
144144from lm_eval.filters import extraction as ex
145145from lm_eval.models.openai_completions import LocalChatCompletion as _LCC
@@ -167,7 +167,7 @@ def _le_parse_generations(outputs, **kwargs):
167167# Keep staticmethod semantics
168168_LCC.parse_generations = staticmethod(_le_parse_generations)
169169
170- # --- Patch TemplateAPI.apply_chat_template to avoid injecting "type": "text" ---
170+ # --- Patch TemplateAPI.apply_chat_template to avoid injecting "type": "text" for TRT ---
171171try:
172172 from lm_eval.models import api_models as _api_models
173173 _TemplateAPI = _api_models.TemplateAPI
@@ -234,7 +234,7 @@ run_lm_eval() {
234234 done
235235
236236 _install_lm_eval_deps
237- _patch_lm_eval_filters
237+ _patch_lm_eval
238238
239239 local openai_server_base=" http://0.0.0.0:${port} "
240240 local openai_chat_base=" ${openai_server_base} /v1/chat/completions"
0 commit comments