Skip to content

Commit 848e834

Browse files
authored
Merge branch 'main' into evals-on-refactor
2 parents 2461447 + 175fe53 commit 848e834

1 file changed

Lines changed: 49 additions & 8 deletions

File tree

runners/launch_gb200-nv.sh

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,17 @@ export SLURM_PARTITION="batch"
88
export SLURM_ACCOUNT="benchmark"
99
export SLURM_JOB_NAME="benchmark-dynamo.job"
1010

11+
# For SGLang - we are working on updating the 8k1k configs
12+
# For now we add conditionals to this script to use newer code for the 1k1k configs
13+
1114
### FRAMEWORK_DIFF_IF_STATEMENT #1 - difference in setting up envvars
1215
if [[ $FRAMEWORK == "dynamo-sglang" ]]; then
13-
export IMAGE="/mnt/lustre01/artifacts/containers/dynamo-sglang.sqsh"
16+
# Set IMAGE based on ISL/OSL
17+
if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
18+
export IMAGE="/mnt/lustre01/artifacts/containers/lmsysorg+sglang+v0.5.5.post2.sqsh"
19+
else
20+
export IMAGE="/mnt/lustre01/artifacts/containers/dynamo-sglang.sqsh"
21+
fi
1422
export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528"
1523
export CONFIG_DIR="/mnt/lustre01/artifacts/sglang-configs/1k1k"
1624
else
@@ -157,13 +165,24 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then
157165

158166
else # if statement at the top - search for "FRAMEWORK_DIFF_IF_STATEMENT #2"
159167
# Set up Dynamo repository path
168+
set -x
160169
DYNAMO_PATH="/mnt/lustre01/users/sa-shared/benchmarks/dynamo"
161-
SGL_SLURM_JOBS_PATH="$DYNAMO_PATH/components/backends/sglang/slurm_jobs"
170+
if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
171+
SGL_SLURM_JOBS_PATH="$DYNAMO_PATH/examples/backends/sglang/slurm_jobs"
172+
else
173+
SGL_SLURM_JOBS_PATH="$DYNAMO_PATH/components/backends/sglang/slurm_jobs"
174+
fi
162175

163176
# Always clone and setup Dynamo
164177
echo "Cloning Dynamo repository..."
165178
rm -rf "$DYNAMO_PATH"
166-
git clone --branch update-result-file-name https://github.com/Elnifio/dynamo.git $DYNAMO_PATH
179+
if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
180+
# TODO: before merge this will be a different branch off of main
181+
git clone --branch ishan/sa-1.1-sgl-dsr1-fp8 https://github.com/ai-dynamo/dynamo.git $DYNAMO_PATH
182+
else
183+
git clone --branch update-result-file-name https://github.com/Elnifio/dynamo.git $DYNAMO_PATH
184+
fi
185+
167186
cd "$DYNAMO_PATH"
168187

169188
# Navigate to corresponding directory
@@ -179,15 +198,32 @@ else # if statement at the top - search for "FRAMEWORK_DIFF_IF_STATEMENT #2"
179198

180199
# Launch jobs based on ISL/OSL
181200
if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
182-
concurrency_list="1024x2048x4096x4608x4864x4992x5120x5376x5632x6144x8192"
183-
bash ./submit_disagg.sh 6 3 12 1 8 $ISL $OSL $concurrency_list inf
201+
NUMBER_OF_EXPERIMENTS=3
202+
203+
top_of_curve_concurrency_list="4096"
204+
middle_of_curve_concurrency_list="1024x2048x4096"
205+
bottom_of_curve_concurrency_list="2x4x8x16x64x128"
206+
207+
# Top of curve (2 prefill workers each at DEP8 and 1 decode worker at DEP32)
208+
bash ./submit_disagg.sh 4 2 8 1 9 $ISL $OSL $top_of_curve_concurrency_list inf
209+
210+
# Bottom of curve (1 prefill worker at DEP4 and 4 decode workers at DEP4)
211+
bash ./submit_disagg.sh 1 1 4 4 9 $ISL $OSL $bottom_of_curve_concurrency_list inf 1p_4d
212+
213+
# Middle of curve (3 prefill workers each at DEP8 and 1 decode worker at DEP48)
214+
bash ./submit_disagg.sh 6 3 12 1 9 $ISL $OSL $middle_of_curve_concurrency_list inf
215+
184216
elif [ "$ISL" = "8192" ] && [ "$OSL" = "1024" ]; then
217+
NUMBER_OF_EXPERIMENTS=1
218+
185219
concurrency_list="128x256x384x448x512x576x1024x2048x4096"
186220
bash ./submit_disagg.sh 12 6 6 1 8 $ISL $OSL $concurrency_list inf
187221
else
188222
echo "Unsupported ISL/OSL combination: $ISL/$OSL"
189223
exit 1
190224
fi
225+
226+
set +x
191227
fi
192228

193229
# Wait for all jobs to complete
@@ -259,9 +295,14 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then
259295
done
260296

261297
else # search for "FRAMEWORK_DIFF_IF_STATEMENT #3" for this if-statement
262-
# Find the latest log directory
263-
# we do "tail -1" here since only the latest job will yield the result
264-
LOGS_DIR=$(find logs/*/vllm_isl_${ISL}_osl_${OSL} -type d | sort -V | tail -1)
298+
# Find the latest log directory that contains the data
299+
cat > collect_latest_results.py <<'PY'
300+
import os, sys
301+
isl, osl, nexp = [int(x) for x in sys.argv[1:]]
302+
for path in sorted([f"logs/{name}/vllm_isl_{isl}_osl_{osl}" for name in os.listdir("logs/") if os.path.isdir(f"logs/{name}/vllm_isl_{isl}_osl_{osl}")], key=os.path.getmtime, reverse=True)[:nexp]:
303+
print(path)
304+
PY
305+
LOGS_DIR=$(python3 collect_latest_results.py $ISL $OSL $NUMBER_OF_EXPERIMENTS)
265306
if [ -z "$LOGS_DIR" ]; then
266307
echo "No logs directory found for ISL=${ISL}, OSL=${OSL}"
267308
exit 1

0 commit comments

Comments
 (0)