Skip to content

Commit c95a3e4

Browse files
committed
add updates for newest gb200 merge pt 2
1 parent 50f0af5 commit c95a3e4

2 files changed

Lines changed: 12 additions & 9 deletions

File tree

benchmarks/dsr1_fp8_gb200_dynamo-sglang_slurm.sh

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ source "$(dirname "$0")/benchmark_lib.sh"
88
check_env_vars CONC_LIST ISL OSL IMAGE SPEC_DECODING MODEL_PATH \
99
PREFILL_NUM_WORKERS PREFILL_TP PREFILL_EP PREFILL_DP_ATTN \
1010
DECODE_NUM_WORKERS DECODE_TP DECODE_EP DECODE_DP_ATTN \
11-
PREFILL_NODES DECODE_NODES N_ADDITIONAL_FRONTENDS
11+
PREFILL_NODES DECODE_NODES N_ADDITIONAL_FRONTENDS SGL_SLURM_JOBS_PATH # SGL_SLURM_JOBS_PATH FIXME
1212

1313
# Always clone and setup Dynamo
1414
echo "Cloning Dynamo repository..."
@@ -18,11 +18,6 @@ else
1818
git clone --branch update-result-file-name https://github.com/Elnifio/dynamo.git
1919
fi
2020

21-
if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
22-
SGL_SLURM_JOBS_PATH="dynamo/examples/backends/sglang/slurm_jobs"
23-
else
24-
SGL_SLURM_JOBS_PATH="dynamo/components/backends/sglang/slurm_jobs"
25-
fi
2621
cd "$SGL_SLURM_JOBS_PATH"
2722

2823
# Set up SGL launch script-specific environment variables

runners/launch_gb200-nv.sh

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ if [[ $FRAMEWORK == "dynamo-sglang" ]]; then
2424
fi
2525
export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528"
2626
export CONFIG_DIR="/mnt/lustre01/artifacts/sglang-configs/1k1k"
27+
28+
# FIXME: Another workaround for all the different branching
29+
# THIS NEEDS TO BE STANDARDIZED ASAP
30+
if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
31+
export SGL_SLURM_JOBS_PATH="dynamo/examples/backends/sglang/slurm_jobs"
32+
else
33+
export SGL_SLURM_JOBS_PATH="dynamo/components/backends/sglang/slurm_jobs"
34+
fi
2735
else
2836
SQUASH_FILE="/mnt/lustre01/users/sa-shared/images/$(echo "$IMAGE" | sed 's/[\/:@#]/_/g').sqsh"
2937
srun --partition=$SLURM_PARTITION --exclusive --time=180 bash -c "enroot import -o $SQUASH_FILE docker://$IMAGE"
@@ -115,8 +123,8 @@ else # search for "FRAMEWORK_DIFF_IF_STATEMENT #3" for this if-statement
115123
# Find the latest log directory that contains the data
116124
cat > collect_latest_results.py <<'PY'
117125
import os, sys
118-
isl, osl, nexp = [int(x) for x in sys.argv[1:]]
119-
for path in sorted([f"logs/{name}/vllm_isl_{isl}_osl_{osl}" for name in os.listdir("logs/") if os.path.isdir(f"logs/{name}/vllm_isl_{isl}_osl_{osl}")], key=os.path.getmtime, reverse=True)[:nexp]:
126+
sgl_job_dir, isl, osl, nexp = sys.argv[1], int(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4])
127+
for path in sorted([f"{sgl_job_dir}/logs/{name}/vllm_isl_{isl}_osl_{osl}" for name in os.listdir(f"{sgl_job_dir}/logs/") if os.path.isdir(f"{sgl_job_dir}/logs/{name}/vllm_isl_{isl}_osl_{osl}")], key=os.path.getmtime, reverse=True)[:nexp]:
120128
print(path)
121129
PY
122130

@@ -127,7 +135,7 @@ PY
127135
NUMBER_OF_EXPERIMENTS=1
128136
fi
129137

130-
LOGS_DIR=$(python3 collect_latest_results.py $ISL $OSL $NUMBER_OF_EXPERIMENTS)
138+
LOGS_DIR=$(python3 collect_latest_results.py "$SGL_SLURM_JOBS_PATH" $ISL $OSL $NUMBER_OF_EXPERIMENTS)
131139
if [ -z "$LOGS_DIR" ]; then
132140
echo "No logs directory found for ISL=${ISL}, OSL=${OSL}"
133141
exit 1

0 commit comments

Comments
 (0)