|
1 | | -#!/usr/bin/bash |
| 1 | +#!/usr/bin/env bash |
2 | 2 |
|
3 | | -sudo sh -c 'echo 0 > /proc/sys/kernel/numa_balancing' |
| 3 | +export HF_HUB_CACHE_MOUNT="/nfsdata/sa/hf_hub_cache-${USER: -1}/" |
| 4 | +export PORT_OFFSET=${USER: -1} |
4 | 5 |
|
5 | | -HF_HUB_CACHE_MOUNT="/home/kimbosemianalysis/hf_hub_cache/" |
6 | | -PORT=8888 |
7 | | - |
8 | | -network_name="bmk-net" |
9 | | -server_name="bmk-server" |
10 | | -client_name="bmk-client" |
11 | | - |
12 | | -docker network create $network_name |
| 6 | +PARTITION="compute" |
| 7 | +SQUASH_FILE="/nfsdata/sa/squash/$(echo "$IMAGE" | sed 's/[\/:@#]/_/g').sqsh" |
13 | 8 |
|
14 | 9 | set -x |
15 | | -docker run --rm -d --ipc=host --shm-size=16g --network=$network_name --name=$server_name \ |
16 | | ---privileged --cap-add=CAP_SYS_ADMIN --device=/dev/kfd --device=/dev/dri --device=/dev/mem \ |
17 | | ---cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ |
18 | | --v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \ |
19 | | --v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \ |
20 | | --e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT \ |
21 | | --e ISL -e OSL \ |
22 | | ---entrypoint=/bin/bash \ |
23 | | -$IMAGE \ |
24 | | -benchmarks/"${EXP_NAME%%_*}_${PRECISION}_mi325x_docker.sh" |
25 | | - |
26 | | -set +x |
27 | | -while IFS= read -r line; do |
28 | | - printf '%s\n' "$line" |
29 | | - if [[ "$line" =~ Application\ startup\ complete ]]; then |
30 | | - break |
31 | | - fi |
32 | | -done < <(docker logs -f --tail=0 $server_name 2>&1) |
33 | | - |
34 | | -git clone https://github.com/kimbochen/bench_serving.git |
35 | | - |
36 | | -set -x |
37 | | -docker run --rm --network=$network_name --name=$client_name \ |
38 | | --v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \ |
39 | | --e HF_TOKEN -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \ |
40 | | ---entrypoint=python3 \ |
41 | | -$IMAGE \ |
42 | | -bench_serving/benchmark_serving.py \ |
43 | | ---model=$MODEL --backend=vllm --base-url=http://$server_name:$PORT \ |
44 | | ---dataset-name=random \ |
45 | | ---random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \ |
46 | | ---num-prompts=$(( $CONC * 10 )) \ |
47 | | ---max-concurrency=$CONC \ |
48 | | ---request-rate=inf --ignore-eos \ |
49 | | ---save-result --percentile-metrics="ttft,tpot,itl,e2el" \ |
50 | | ---result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json |
51 | | - |
52 | | -while [ -n "$(docker ps -aq)" ]; do |
53 | | - docker stop $server_name |
54 | | - docker network rm $network_name |
55 | | - sleep 5 |
56 | | -done |
| 10 | +salloc --partition=$PARTITION --gres=gpu:$TP --cpus-per-task=128 --time=180 --no-shell |
| 11 | +JOB_ID=$(squeue -u $USER -h -o %A | head -n1) |
| 12 | + |
| 13 | +srun --jobid=$JOB_ID bash -c "sudo enroot import -o $SQUASH_FILE docker://$IMAGE" |
| 14 | +srun --jobid=$JOB_ID \ |
| 15 | +--container-image=$SQUASH_FILE \ |
| 16 | +--container-mounts=$GITHUB_WORKSPACE:/workspace/,$HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \ |
| 17 | +--container-mount-home \ |
| 18 | +--container-writable \ |
| 19 | +--container-remap-root \ |
| 20 | +--container-workdir=/workspace/ \ |
| 21 | +--no-container-entrypoint --export=ALL \ |
| 22 | +bash benchmarks/${EXP_NAME%%_*}_${PRECISION}_mi325x_slurm.sh |
| 23 | + |
| 24 | +scancel $JOB_ID |
0 commit comments