|
2 | 2 |
|
3 | 3 | # Shared benchmarking utilities for InferenceMAX |
4 | 4 |
|
| 5 | +# ------------------------------ |
| 6 | +# Non-root helpers |
| 7 | +# ------------------------------ |
| 8 | + |
| 9 | +# Ensure pip and caches target user-writable locations when running as non-root |
| 10 | +_ensure_user_env() { |
| 11 | + export HOME=${HOME:-/workspace} |
| 12 | + export XDG_CACHE_HOME=${XDG_CACHE_HOME:-$HOME/.cache} |
| 13 | + export PIP_CACHE_DIR=${PIP_CACHE_DIR:-$HOME/.cache/pip} |
| 14 | + export PYTHONUSERBASE=${PYTHONUSERBASE:-$HOME/.local} |
| 15 | + # Prepend user base bin so "python3 -m ..." finds scripts if needed |
| 16 | + case ":$PATH:" in |
| 17 | + *":$HOME/.local/bin:"*) ;; |
| 18 | + *) export PATH="$HOME/.local/bin:$PATH" ;; |
| 19 | + esac |
| 20 | + mkdir -p "$HOME" "$XDG_CACHE_HOME" "$PIP_CACHE_DIR" "$HOME/.local/bin" || true |
| 21 | +} |
| 22 | + |
| 23 | +# Wrapper for user installs that works without root |
| 24 | +_pip_user_install() { |
| 25 | + _ensure_user_env |
| 26 | + python3 -m pip install --user -q --no-cache-dir "$@" || true |
| 27 | +} |
| 28 | + |
| 29 | +# Patch flashinfer cubin_loader in a user overlay instead of system site |
| 30 | +_patch_flashinfer_user_overlay() { |
| 31 | + set +x |
| 32 | + _ensure_user_env |
| 33 | + local src dst parent |
| 34 | + src=$(python3 - <<'PY' |
| 35 | +import os, flashinfer |
| 36 | +print(os.path.dirname(flashinfer.__file__)) |
| 37 | +PY |
| 38 | + ) |
| 39 | + parent=$(mktemp -d /tmp/fi_patch-XXXXXX) |
| 40 | + dst="$parent/flashinfer" |
| 41 | + # Copy package tree to a writable overlay |
| 42 | + cp -a "$src" "$dst" |
| 43 | + # Apply the minimal patch |
| 44 | + if [ -f "$dst/jit/cubin_loader.py" ]; then |
| 45 | + sed -i '102,108d' "$dst/jit/cubin_loader.py" || true |
| 46 | + fi |
| 47 | + # Prepend overlay to PYTHONPATH so it takes precedence even with PYTHONNOUSERSITE=1 |
| 48 | + export PYTHONPATH="$parent:${PYTHONPATH:-}" |
| 49 | + set -x |
| 50 | +} |
| 51 | + |
5 | 52 | # Wait for server to be ready by polling the health endpoint |
6 | 53 | # All parameters are required |
7 | 54 | # Parameters: |
@@ -159,10 +206,10 @@ run_benchmark_serving() { |
159 | 206 | # ------------------------------ |
160 | 207 |
|
161 | 208 | _install_lm_eval_deps() { |
162 | | - python3 -m pip install -q --no-cache-dir "lm-eval[api]" || true |
| 209 | + _pip_user_install "lm-eval[api]" |
163 | 210 | # Temporary: workaround issue by using main |
164 | | - python3 -m pip install -q --no-cache-dir --no-deps \ |
165 | | - "git+https://github.com/EleutherAI/lm-evaluation-harness.git@main" || true |
| 211 | + _pip_user_install --no-deps \ |
| 212 | + "git+https://github.com/EleutherAI/lm-evaluation-harness.git@main" |
166 | 213 | } |
167 | 214 |
|
168 | 215 | # Patch lm-eval filters to be robust to empty strings via sitecustomize |
@@ -243,7 +290,9 @@ run_lm_eval() { |
243 | 290 | local port="${PORT:-8888}" |
244 | 291 | local task="${EVAL_TASK:-gsm8k}" |
245 | 292 | local num_fewshot="${NUM_FEWSHOT:-5}" |
246 | | - local results_dir="${EVAL_RESULT_DIR:-$(mktemp -d /tmp/eval_out-XXXXXX)}" |
| 293 | + # Prefer a stable, workspace-mounted location so the host can upload artifacts. |
| 294 | + # If EVAL_RESULT_DIR is not set, default to /workspace/eval_out/${RESULT_FILENAME}. |
| 295 | + local results_dir="${EVAL_RESULT_DIR:-/workspace/eval_out/${RESULT_FILENAME:-eval_out}}" |
247 | 296 | local gen_max_tokens=4096 |
248 | 297 | local temperature=0 |
249 | 298 | local top_p=1 |
@@ -285,9 +334,9 @@ run_lm_eval() { |
285 | 334 | } |
286 | 335 |
|
287 | 336 | append_lm_eval_summary() { |
288 | | - local results_dir="${EVAL_RESULT_DIR}" |
289 | 337 | local task="${EVAL_TASK:-gsm8k}" |
290 | | - local out_dir="${results_dir}" |
| 338 | + local out_dir="${EVAL_RESULT_DIR:-/workspace/eval_out/${RESULT_FILENAME:-eval_out}}" |
| 339 | + local results_dir="${out_dir}" |
291 | 340 | local summary_md="${out_dir}/SUMMARY.md" |
292 | 341 | mkdir -p "$out_dir" || true |
293 | 342 |
|
|
326 | 375 | fi |
327 | 376 | fi |
328 | 377 |
|
329 | | - # Note: Per policy, eval outputs stay under /tmp only; do not copy to workspace. |
330 | | - |
331 | 378 | echo "Results saved to: ${summary_md}" |
332 | 379 | } |
333 | 380 |
|
|
336 | 383 | # ------------------------------ |
337 | 384 |
|
338 | 385 | _install_lighteval_deps() { |
339 | | - python3 -m pip install -q --no-cache-dir "lighteval[api]" "litellm" || true |
| 386 | + _pip_user_install "lighteval" "litellm" |
340 | 387 | } |
341 | 388 |
|
342 | 389 | # Patch lighteval's LiteLLMClient to handle reasoning content and Python name mangling |
@@ -565,7 +612,8 @@ run_lighteval_eval() { |
565 | 612 | local port="${PORT:-8888}" |
566 | 613 | local task="${EVAL_TASK:-gsm8k}" |
567 | 614 | local num_fewshot="${NUM_FEWSHOT:-5}" |
568 | | - local results_dir="${EVAL_RESULT_DIR:-eval_out_lighteval}" |
| 615 | + # Align output path to workspace when not explicitly set |
| 616 | + local results_dir="${EVAL_RESULT_DIR:-/workspace/eval_out/${RESULT_FILENAME:-eval_out_lighteval}}" |
569 | 617 | local max_samples=0 |
570 | 618 | local concurrent_requests=32 |
571 | 619 |
|
|
0 commit comments