Skip to content

Commit f2cc919

Browse files
committed
docker as non-root
1 parent 837622f commit f2cc919

11 files changed

Lines changed: 95 additions & 31 deletions

benchmarks/benchmark_lib.sh

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,53 @@
22

33
# Shared benchmarking utilities for InferenceMAX
44

5+
# ------------------------------
6+
# Non-root helpers
7+
# ------------------------------
8+
9+
# Ensure pip and caches target user-writable locations when running as non-root
10+
_ensure_user_env() {
11+
export HOME=${HOME:-/workspace}
12+
export XDG_CACHE_HOME=${XDG_CACHE_HOME:-$HOME/.cache}
13+
export PIP_CACHE_DIR=${PIP_CACHE_DIR:-$HOME/.cache/pip}
14+
export PYTHONUSERBASE=${PYTHONUSERBASE:-$HOME/.local}
15+
# Prepend user base bin so "python3 -m ..." finds scripts if needed
16+
case ":$PATH:" in
17+
*":$HOME/.local/bin:"*) ;;
18+
*) export PATH="$HOME/.local/bin:$PATH" ;;
19+
esac
20+
mkdir -p "$HOME" "$XDG_CACHE_HOME" "$PIP_CACHE_DIR" "$HOME/.local/bin" || true
21+
}
22+
23+
# Wrapper for user installs that works without root
24+
_pip_user_install() {
25+
_ensure_user_env
26+
python3 -m pip install --user -q --no-cache-dir "$@" || true
27+
}
28+
29+
# Patch flashinfer cubin_loader in a user overlay instead of system site
30+
_patch_flashinfer_user_overlay() {
31+
set +x
32+
_ensure_user_env
33+
local src dst parent
34+
src=$(python3 - <<'PY'
35+
import os, flashinfer
36+
print(os.path.dirname(flashinfer.__file__))
37+
PY
38+
)
39+
parent=$(mktemp -d /tmp/fi_patch-XXXXXX)
40+
dst="$parent/flashinfer"
41+
# Copy package tree to a writable overlay
42+
cp -a "$src" "$dst"
43+
# Apply the minimal patch
44+
if [ -f "$dst/jit/cubin_loader.py" ]; then
45+
sed -i '102,108d' "$dst/jit/cubin_loader.py" || true
46+
fi
47+
# Prepend overlay to PYTHONPATH so it takes precedence even with PYTHONNOUSERSITE=1
48+
export PYTHONPATH="$parent:${PYTHONPATH:-}"
49+
set -x
50+
}
51+
552
# Wait for server to be ready by polling the health endpoint
653
# All parameters are required
754
# Parameters:
@@ -159,10 +206,10 @@ run_benchmark_serving() {
159206
# ------------------------------
160207

161208
_install_lm_eval_deps() {
162-
python3 -m pip install -q --no-cache-dir "lm-eval[api]" || true
209+
_pip_user_install "lm-eval[api]"
163210
# Temporary: workaround issue by using main
164-
python3 -m pip install -q --no-cache-dir --no-deps \
165-
"git+https://github.com/EleutherAI/lm-evaluation-harness.git@main" || true
211+
_pip_user_install --no-deps \
212+
"git+https://github.com/EleutherAI/lm-evaluation-harness.git@main"
166213
}
167214

168215
# Patch lm-eval filters to be robust to empty strings via sitecustomize
@@ -243,7 +290,9 @@ run_lm_eval() {
243290
local port="${PORT:-8888}"
244291
local task="${EVAL_TASK:-gsm8k}"
245292
local num_fewshot="${NUM_FEWSHOT:-5}"
246-
local results_dir="${EVAL_RESULT_DIR:-$(mktemp -d /tmp/eval_out-XXXXXX)}"
293+
# Prefer a stable, workspace-mounted location so the host can upload artifacts.
294+
# If EVAL_RESULT_DIR is not set, default to /workspace/eval_out/${RESULT_FILENAME}.
295+
local results_dir="${EVAL_RESULT_DIR:-/workspace/eval_out/${RESULT_FILENAME:-eval_out}}"
247296
local gen_max_tokens=4096
248297
local temperature=0
249298
local top_p=1
@@ -285,9 +334,9 @@ run_lm_eval() {
285334
}
286335

287336
append_lm_eval_summary() {
288-
local results_dir="${EVAL_RESULT_DIR}"
289337
local task="${EVAL_TASK:-gsm8k}"
290-
local out_dir="${results_dir}"
338+
local out_dir="${EVAL_RESULT_DIR:-/workspace/eval_out/${RESULT_FILENAME:-eval_out}}"
339+
local results_dir="${out_dir}"
291340
local summary_md="${out_dir}/SUMMARY.md"
292341
mkdir -p "$out_dir" || true
293342

@@ -326,8 +375,6 @@ META
326375
fi
327376
fi
328377

329-
# Note: Per policy, eval outputs stay under /tmp only; do not copy to workspace.
330-
331378
echo "Results saved to: ${summary_md}"
332379
}
333380

@@ -336,7 +383,7 @@ META
336383
# ------------------------------
337384

338385
_install_lighteval_deps() {
339-
python3 -m pip install -q --no-cache-dir "lighteval[api]" "litellm" || true
386+
_pip_user_install "lighteval" "litellm"
340387
}
341388

342389
# Patch lighteval's LiteLLMClient to handle reasoning content and Python name mangling
@@ -565,7 +612,8 @@ run_lighteval_eval() {
565612
local port="${PORT:-8888}"
566613
local task="${EVAL_TASK:-gsm8k}"
567614
local num_fewshot="${NUM_FEWSHOT:-5}"
568-
local results_dir="${EVAL_RESULT_DIR:-eval_out_lighteval}"
615+
# Align output path to workspace when not explicitly set
616+
local results_dir="${EVAL_RESULT_DIR:-/workspace/eval_out/${RESULT_FILENAME:-eval_out_lighteval}}"
569617
local max_samples=0
570618
local concurrent_requests=32
571619

benchmarks/dsr1_fp4_b200_docker.sh

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414

1515
nvidia-smi
1616

17-
# To improve CI stability, we patch this helper function to prevent a race condition that
18-
# happens 1% of the time. ref: https://github.com/flashinfer-ai/flashinfer/pull/1779
19-
sed -i '102,108d' /usr/local/lib/python3.12/dist-packages/flashinfer/jit/cubin_loader.py
17+
# Load helpers and patch flashinfer in a user-writable overlay
18+
source "$(dirname "$0")/benchmark_lib.sh"
19+
_patch_flashinfer_user_overlay
2020

2121
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
2222

@@ -40,13 +40,12 @@ PYTHONNOUSERSITE=1 python3 -m sglang.launch_server --model-path $MODEL --host 0.
4040

4141
SERVER_PID=$!
4242

43-
# Source benchmark utilities
44-
source "$(dirname "$0")/benchmark_lib.sh"
43+
# Source benchmark utilities (already sourced above)
4544

4645
# Wait for server to be ready
4746
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
4847

49-
pip install -q datasets pandas
48+
_pip_user_install datasets pandas
5049

5150
run_benchmark_serving \
5251
--model "$MODEL" \
@@ -65,4 +64,4 @@ if [ "${RUN_EVAL}" = "true" ]; then
6564
run_eval --framework lm-eval --port "$PORT" --concurrent-requests $(( $CONC * 2 ))
6665
append_lm_eval_summary
6766
fi
68-
set +x
67+
set +x

benchmarks/dsr1_fp8_b200_docker.sh

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414

1515
nvidia-smi
1616

17-
# To improve CI stability, we patch this helper function to prevent a race condition that
18-
# happens 1% of the time. ref: https://github.com/flashinfer-ai/flashinfer/pull/1779
19-
sed -i '102,108d' /usr/local/lib/python3.12/dist-packages/flashinfer/jit/cubin_loader.py
17+
# Load helpers and patch flashinfer in a user-writable overlay
18+
source "$(dirname "$0")/benchmark_lib.sh"
19+
_patch_flashinfer_user_overlay
2020

2121
export SGL_ENABLE_JIT_DEEPGEMM=false
2222
export SGLANG_ENABLE_FLASHINFER_GEMM=true
@@ -42,13 +42,12 @@ PYTHONNOUSERSITE=1 python3 -m sglang.launch_server --model-path=$MODEL --host=0.
4242

4343
SERVER_PID=$!
4444

45-
# Source benchmark utilities
46-
source "$(dirname "$0")/benchmark_lib.sh"
45+
# Source benchmark utilities (already sourced above)
4746

4847
# Wait for server to be ready
4948
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
5049

51-
pip install -q datasets pandas
50+
_pip_user_install datasets pandas
5251

5352
run_benchmark_serving \
5453
--model "$MODEL" \

benchmarks/gptoss_fp4_b200_docker.sh

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414

1515
nvidia-smi
1616

17-
# To improve CI stability, we patch this helper function to prevent a race condition that
18-
# happens 1% of the time. ref: https://github.com/flashinfer-ai/flashinfer/pull/1779
19-
sed -i '102,108d' /usr/local/lib/python3.12/dist-packages/flashinfer/jit/cubin_loader.py
17+
# Load helpers and patch flashinfer in a user-writable overlay
18+
source "$(dirname "$0")/benchmark_lib.sh"
19+
_patch_flashinfer_user_overlay
2020

2121

2222
# Calculate max-model-len based on ISL and OSL
@@ -56,13 +56,12 @@ vllm serve $MODEL --host 0.0.0.0 --port $PORT \
5656

5757
SERVER_PID=$!
5858

59-
# Source benchmark utilities
60-
source "$(dirname "$0")/benchmark_lib.sh"
59+
# Source benchmark utilities (already sourced above)
6160

6261
# Wait for server to be ready
6362
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
6463

65-
pip install -q datasets pandas
64+
_pip_user_install datasets pandas
6665

6766
run_benchmark_serving \
6867
--model "$MODEL_NAME" \

benchmarks/gptoss_fp4_h100_docker.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ source "$(dirname "$0")/benchmark_lib.sh"
4343
# Wait for server to be ready
4444
wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
4545

46-
pip install -q datasets pandas
46+
source "$(dirname "$0")/benchmark_lib.sh"
47+
_pip_user_install datasets pandas
4748

4849
run_benchmark_serving \
4950
--model "$MODEL_NAME" \

runners/launch_b200-dgxc.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,16 @@ else
3535
fi
3636

3737
docker run --rm --init --network host --name $server_name \
38+
--user $(id -u):$(id -g) \
3839
--runtime nvidia --gpus all --ipc host --privileged --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
3940
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
4041
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
4142
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e PORT=$PORT -e EP_SIZE -e DP_ATTENTION \
4243
-e NCCL_GRAPH_REGISTER=0 \
4344
-e TORCH_CUDA_ARCH_LIST="10.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \
4445
-e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RESULT_FILENAME -e RANDOM_RANGE_RATIO -e NUM_PROMPTS \
46+
-e HOME=/workspace -e XDG_CACHE_HOME=/workspace/.cache -e PIP_CACHE_DIR=/workspace/.cache/pip -e PYTHONUSERBASE=/workspace/.local \
47+
-e PATH=/workspace/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
4548
--entrypoint=/bin/bash \
4649
$(echo "$IMAGE" | sed 's/#/\//') \
4750
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_b200${FRAMEWORK_SUFFIX}_docker.sh"

runners/launch_b200-nvd.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,16 @@ else
3636
fi
3737

3838
docker run --rm --init --network host --name $server_name \
39+
--user $(id -u):$(id -g) \
3940
--runtime nvidia --gpus all --ipc host --privileged --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
4041
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
4142
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
4243
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e PORT=$PORT -e EP_SIZE -e DP_ATTENTION \
4344
-e NCCL_GRAPH_REGISTER=0 \
4445
-e TORCH_CUDA_ARCH_LIST="10.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \
4546
-e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RESULT_FILENAME -e RANDOM_RANGE_RATIO -e NUM_PROMPTS \
47+
-e HOME=/workspace -e XDG_CACHE_HOME=/workspace/.cache -e PIP_CACHE_DIR=/workspace/.cache/pip -e PYTHONUSERBASE=/workspace/.local \
48+
-e PATH=/workspace/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
4649
--entrypoint=/bin/bash \
4750
$(echo "$IMAGE" | sed 's/#/\//') \
4851
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_b200${FRAMEWORK_SUFFIX}_docker.sh"

runners/launch_h100-cr.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,15 @@ server_name="bmk-server"
77

88
set -x
99
docker run --rm --network=host --name=$server_name \
10+
--user $(id -u):$(id -g) \
1011
--runtime=nvidia --gpus=all --ipc=host --privileged --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
1112
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
1213
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
1314
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e RUN_EVAL -e RESULT_FILENAME -e RANDOM_RANGE_RATIO -e PORT=$PORT \
1415
-e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e TORCH_CUDA_ARCH_LIST="9.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \
15-
${GH_SUM_ENV} ${GH_SUM_MOUNT} \
16+
${GH_SUM_ENV} ${GH_SUM_MOUNT} \
17+
-e HOME=/workspace -e XDG_CACHE_HOME=/workspace/.cache -e PIP_CACHE_DIR=/workspace/.cache/pip -e PYTHONUSERBASE=/workspace/.local \
18+
-e PATH=/workspace/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
1619
--entrypoint=/bin/bash \
1720
$IMAGE \
1821
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_h100_docker.sh"

runners/launch_mi300x-amd.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@ server_name="bmk-server"
99

1010
set -x
1111
docker run --rm --ipc=host --shm-size=16g --network=host --name=$server_name \
12+
--user $(id -u):$(id -g) \
1213
--privileged --cap-add=CAP_SYS_ADMIN --device=/dev/kfd --device=/dev/dri --device=/dev/mem \
1314
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
1415
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
1516
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
1617
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT \
1718
-e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME -e RUN_EVAL \
19+
-e HOME=/workspace -e XDG_CACHE_HOME=/workspace/.cache -e PIP_CACHE_DIR=/workspace/.cache/pip -e PYTHONUSERBASE=/workspace/.local \
20+
-e PATH=/workspace/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
1821
${GH_SUM_ENV} ${GH_SUM_MOUNT} \
1922
--entrypoint=/bin/bash \
2023
$IMAGE \

runners/launch_mi300x-cr.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@ server_name="bmk-server"
99

1010
set -x
1111
docker run --rm --ipc=host --shm-size=16g --network=host --name=$server_name \
12+
--user $(id -u):$(id -g) \
1213
--privileged --cap-add=CAP_SYS_ADMIN --device=/dev/kfd --device=/dev/dri --device=/dev/mem \
1314
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
1415
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
1516
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
1617
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT \
1718
-e ISL -e OSL -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RANDOM_RANGE_RATIO -e RESULT_FILENAME -e RUN_EVAL \
19+
-e HOME=/workspace -e XDG_CACHE_HOME=/workspace/.cache -e PIP_CACHE_DIR=/workspace/.cache/pip -e PYTHONUSERBASE=/workspace/.local \
20+
-e PATH=/workspace/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
1821
${GH_SUM_ENV} ${GH_SUM_MOUNT} \
1922
--entrypoint=/bin/bash \
2023
$IMAGE \

0 commit comments

Comments
 (0)