@@ -8,9 +8,17 @@ export SLURM_PARTITION="batch"
88export SLURM_ACCOUNT=" benchmark"
99export SLURM_JOB_NAME=" benchmark-dynamo.job"
1010
11+ # For SGLang - we are working on updating the 8k1k configs
12+ # For now we add conditionals to this script to use newer code for the 1k1k configs
13+
1114# ## FRAMEWORK_DIFF_IF_STATEMENT #1 - difference in setting up envvars
1215if [[ $FRAMEWORK == " dynamo-sglang" ]]; then
13- export IMAGE=" /mnt/lustre01/artifacts/containers/dynamo-sglang.sqsh"
16+ # Set IMAGE based on ISL/OSL
17+ if [ " $ISL " = " 1024" ] && [ " $OSL " = " 1024" ]; then
18+ export IMAGE=" /mnt/lustre01/artifacts/containers/lmsysorg+sglang+v0.5.5.post2.sqsh"
19+ else
20+ export IMAGE=" /mnt/lustre01/artifacts/containers/dynamo-sglang.sqsh"
21+ fi
1422 export MODEL_PATH=" /mnt/lustre01/models/deepseek-r1-0528"
1523 export CONFIG_DIR=" /mnt/lustre01/artifacts/sglang-configs/1k1k"
1624else
@@ -157,13 +165,24 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then
157165
158166else # if statement at the top - search for "FRAMEWORK_DIFF_IF_STATEMENT #2"
159167 # Set up Dynamo repository path
168+ set -x
160169 DYNAMO_PATH=" /mnt/lustre01/users/sa-shared/benchmarks/dynamo"
161- SGL_SLURM_JOBS_PATH=" $DYNAMO_PATH /components/backends/sglang/slurm_jobs"
170+ if [ " $ISL " = " 1024" ] && [ " $OSL " = " 1024" ]; then
171+ SGL_SLURM_JOBS_PATH=" $DYNAMO_PATH /examples/backends/sglang/slurm_jobs"
172+ else
173+ SGL_SLURM_JOBS_PATH=" $DYNAMO_PATH /components/backends/sglang/slurm_jobs"
174+ fi
162175
163176 # Always clone and setup Dynamo
164177 echo " Cloning Dynamo repository..."
165178 rm -rf " $DYNAMO_PATH "
166- git clone --branch update-result-file-name https://github.com/Elnifio/dynamo.git $DYNAMO_PATH
179+ if [ " $ISL " = " 1024" ] && [ " $OSL " = " 1024" ]; then
180+ # TODO: before merge this will be a different branch off of main
181+ git clone --branch ishan/sa-1.1-sgl-dsr1-fp8 https://github.com/ai-dynamo/dynamo.git $DYNAMO_PATH
182+ else
183+ git clone --branch update-result-file-name https://github.com/Elnifio/dynamo.git $DYNAMO_PATH
184+ fi
185+
167186 cd " $DYNAMO_PATH "
168187
169188 # Navigate to corresponding directory
@@ -179,15 +198,32 @@ else # if statement at the top - search for "FRAMEWORK_DIFF_IF_STATEMENT #2"
179198
180199 # Launch jobs based on ISL/OSL
181200 if [ " $ISL " = " 1024" ] && [ " $OSL " = " 1024" ]; then
182- concurrency_list=" 1024x2048x4096x4608x4864x4992x5120x5376x5632x6144x8192"
183- bash ./submit_disagg.sh 6 3 12 1 8 $ISL $OSL $concurrency_list inf
201+ NUMBER_OF_EXPERIMENTS=3
202+
203+ top_of_curve_concurrency_list=" 4096"
204+ middle_of_curve_concurrency_list=" 1024x2048x4096"
205+ bottom_of_curve_concurrency_list=" 2x4x8x16x64x128"
206+
207+ # Top of curve (2 prefill workers each at DEP8 and 1 decode worker at DEP32)
208+ bash ./submit_disagg.sh 4 2 8 1 9 $ISL $OSL $top_of_curve_concurrency_list inf
209+
210+ # Bottom of curve (1 prefill worker at DEP4 and 4 decode workers at DEP4)
211+ bash ./submit_disagg.sh 1 1 4 4 9 $ISL $OSL $bottom_of_curve_concurrency_list inf 1p_4d
212+
213+ # Middle of curve (3 prefill workers each at DEP8 and 1 decode worker at DEP48)
214+ bash ./submit_disagg.sh 6 3 12 1 9 $ISL $OSL $middle_of_curve_concurrency_list inf
215+
184216 elif [ " $ISL " = " 8192" ] && [ " $OSL " = " 1024" ]; then
217+ NUMBER_OF_EXPERIMENTS=1
218+
185219 concurrency_list=" 128x256x384x448x512x576x1024x2048x4096"
186220 bash ./submit_disagg.sh 12 6 6 1 8 $ISL $OSL $concurrency_list inf
187221 else
188222 echo " Unsupported ISL/OSL combination: $ISL /$OSL "
189223 exit 1
190224 fi
225+
226+ set +x
191227fi
192228
193229# Wait for all jobs to complete
@@ -259,9 +295,14 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then
259295 done
260296
261297else # search for "FRAMEWORK_DIFF_IF_STATEMENT #3" for this if-statement
262- # Find the latest log directory
263- # we do "tail -1" here since only the latest job will yield the result
264- LOGS_DIR=$( find logs/* /vllm_isl_${ISL} _osl_${OSL} -type d | sort -V | tail -1)
298+ # Find the latest log directory that contains the data
299+ cat > collect_latest_results.py << 'PY '
300+ import os, sys
301+ isl, osl, nexp = [int(x) for x in sys.argv[1:]]
302+ for path in sorted([f"logs/{name}/vllm_isl_{isl}_osl_{osl}" for name in os.listdir("logs/") if os.path.isdir(f"logs/{name}/vllm_isl_{isl}_osl_{osl}")], key=os.path.getmtime, reverse=True)[:nexp]:
303+ print(path)
304+ PY
305+ LOGS_DIR=$( python3 collect_latest_results.py $ISL $OSL $NUMBER_OF_EXPERIMENTS )
265306 if [ -z " $LOGS_DIR " ]; then
266307 echo " No logs directory found for ISL=${ISL} , OSL=${OSL} "
267308 exit 1
0 commit comments