Skip to content
Closed
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/benchmark-multinode-tmpl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ env:
MAX_MODEL_LEN: ${{ inputs.max-model-len }}
RANDOM_RANGE_RATIO: ${{ inputs.random-range-ratio }}
MTP_MODE: ${{ inputs.mtp-mode }}
MODEL: ${{ inputs.model }}

permissions:
contents: read
Expand Down Expand Up @@ -97,9 +98,9 @@ jobs:
if [ -f "$result_file" ]; then
echo "Processing $result_file"
# Extract GPU count, prefill_gpus and decode_gpus from filename for tp_size calculation
gpus=$(echo "$result_file" | sed -n "s/.*_gpus_\([0-9]*\).*\.json/\1/p")
prefill_gpus=$(echo "$result_file" | sed -n "s/.*_ctx_\([0-9]*\).*\.json/\1/p")
decode_gpus=$(echo "$result_file" | sed -n "s/.*_gen_\([0-9]*\).*\.json/\1/p")
gpus=$(echo "$result_file" | sed -n "s/.*_gpus-\([0-9]*\).*\.json/\1/p")
prefill_gpus=$(echo "$result_file" | sed -n "s/.*_ctx-\([0-9]*\).*\.json/\1/p")
decode_gpus=$(echo "$result_file" | sed -n "s/.*_gen-\([0-9]*\).*\.json/\1/p")

if [ -n "$gpus" ]; then
echo "Extracted: gpus=$gpus, prefill_gpus=$prefill_gpus, decode_gpus=$decode_gpus"
Expand Down
12 changes: 11 additions & 1 deletion .github/workflows/full-sweep-8k1k-scheduler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ jobs:
fail-fast: false
matrix:
config:
# DSR1
- {
"image": "nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3",
"model": "deepseek-r1-fp4",
Expand All @@ -114,6 +115,15 @@ jobs:
"framework": "dynamo-sglang",
"mtp": "off",
}
# GPTOSS
Copy link

Copilot AI Nov 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Inconsistent comment formatting. The DeepSeek comment on line 93 uses '# DSR1' while this uses '# GPTOSS' with different indentation. Align the comment indentation with line 93 for consistency.

Suggested change
# GPTOSS
# GPTOSS

Copilot uses AI. Check for mistakes.
- {
"image": "nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1-cuda13",
"model": "openai/gpt-oss-120b",
"model-prefix": "gptoss",
"precision": "fp4",
"framework": "dynamo-trtllm",
"mtp": "off",
}
secrets: inherit
with:
runner: gb200
Expand All @@ -136,7 +146,7 @@ jobs:
exp-name: "dsr1_8k1k"

collect-gptoss-results:
needs: benchmark-gptoss
needs: [benchmark-gptoss, benchmark-gb200]
if: ${{ always() }}
uses: ./.github/workflows/collect-results.yml
secrets: inherit
Expand Down
19 changes: 18 additions & 1 deletion .github/workflows/gb200-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
options:
- "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1"
- "nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3"
- "nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1-cuda13"

model:
description: "Model"
Expand All @@ -18,6 +19,7 @@ on:
options:
- "deepseek-ai/DeepSeek-R1-0528"
- "deepseek-r1-fp4"
- "openai/gpt-oss-120b"

precision:
description: "Precision"
Expand Down Expand Up @@ -58,6 +60,7 @@ jobs:
runs-on: ubuntu-latest
outputs:
max-model-len: ${{ steps.calc.outputs.max-model-len }}
model-prefix: ${{ steps.calc.outputs.model-prefix }}
steps:
- id: calc
shell: python
Expand All @@ -70,8 +73,22 @@ jobs:
except ValueError:
print("Error: ISL and OSL must be integers")
sys.exit(1)

# Map model names to clean prefixes
model = "${{ inputs.model }}"
if model == "deepseek-ai/DeepSeek-R1-0528":
model_prefix = "dsr1"
elif model == "deepseek-r1-fp4":
model_prefix = "dsr1-fp4"
elif model == "openai/gpt-oss-120b":
model_prefix = "gptoss"
else:
# Fallback: replace slashes with underscores
model_prefix = model.replace("/", "_")

with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write(f"max-model-len={isl + osl}\n")
f.write(f"model-prefix={model_prefix}\n")

benchmark-gb200:
needs: pre-run
Expand All @@ -84,7 +101,7 @@ jobs:
model: ${{ inputs.model }}
framework: ${{ inputs.framework }}
precision: ${{ inputs.precision }}
exp-name: dsr1_1k1k
exp-name: ${{ needs.pre-run.outputs.model-prefix }}
isl: ${{ inputs.isl }}
osl: ${{ inputs.osl }}
max-model-len: ${{ needs.pre-run.outputs.max-model-len }}
Expand Down
119 changes: 78 additions & 41 deletions runners/launch_gb200-nv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,16 @@ else
# Update the IMAGE variable to the squash file
export IMAGE=$SQUASH_FILE

export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528-fp4-v2"
export SERVED_MODEL_NAME="deepseek-r1-fp4"
if [[ $MODEL == *"gpt-oss"* ]]; then
export MODEL_PATH="/mnt/lustre01/models/gpt-oss-120b"
export SERVED_MODEL_NAME="gpt-oss-120b"
elif [[ $MODEL == *"deepseek-r1-fp4" ]]; then
export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528-fp4-v2"
export SERVED_MODEL_NAME="deepseek-r1-fp4"
else
echo "Unsupported model: $MODEL. Supported models are: gpt-oss, deepseek-r1-fp4"
exit 1
fi
fi


Expand Down Expand Up @@ -49,7 +57,11 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then
rm -rf "$DYNAMO_PATH"
git clone https://github.com/ai-dynamo/dynamo.git "$DYNAMO_PATH"
cd "$DYNAMO_PATH"
git checkout release/0.5.1-rc0.20251105
if [[ $MODEL == *"gpt-oss"* ]]; then
git checkout jthomson04/gpt-oss-disagg-slurm
else
git checkout release/0.5.1-rc0.20251105
fi
git submodule update --init --recursive

# Navigate to performance sweeps directory
Expand Down Expand Up @@ -94,60 +106,82 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then
# gen_eplb_num_slots: Expert load balancing slots (0, 256, 288)
# gen_concurrency_list: Concurrency values (space-separated, quoted)

if [ "$isl" = "1024" ] && [ "$osl" = "1024" ]; then
if [ "$mtp_mode" = "on" ]; then
echo "Running 1k/1k MTP=ON configurations"
# MODEL-SPECIFIC HOOK: Different benchmark configurations for different models
if [[ $MODEL == *"gpt-oss"* ]]; then
# GPT-OSS specific benchmark configurations
if [ "$isl" = "8192" ] && [ "$osl" = "1024" ]; then

Copy link

Copilot AI Nov 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary blank line with trailing whitespace. Remove this line or the trailing spaces.

Suggested change

Copilot uses AI. Check for mistakes.
echo "Running 8k/1k MTP=OFF configurations for GPT-OSS"

./submit_disagg.sh mtp=off tp 1 1 1 512 20000 "0.9" 0 0 "128 256 512"
./submit_disagg.sh mtp=off tp 1 1 2 1024 20000 "0.9" 0 0 "64 128 256"
./submit_disagg.sh mtp=off tep 1 1 2 1024 20000 "0.9" 0 0 "64 256"
./submit_disagg.sh mtp=off tp 1 1 4 2048 20000 "0.9" 0 0 "8 16 32 64 128"
./submit_disagg.sh mtp=off tp 1 1 8 2048 20000 "0.9" 0 0 "1 2 4 8 16"
else
echo "Unsupported ISL/OSL combination for GPT-OSS: $isl/$osl"
exit 1
fi
elif [[ $MODEL == *"deepseek-r1-fp4" ]]; then
# DeepSeek-R1 specific benchmark configurations (existing logic)
if [ "$isl" = "1024" ] && [ "$osl" = "1024" ]; then
if [ "$mtp_mode" = "on" ]; then
echo "Running 1k/1k MTP=ON configurations for DeepSeek-R1"

./submit_disagg.sh "mtp=on" "tep" 1 4 8 32 128 "0.9" 3 0 "1 2 4 8 16 36"
./submit_disagg.sh "mtp=on" "tep" 1 4 8 32 128 "0.9" 3 0 "1 2 4 8 16 36"

./submit_disagg.sh "mtp=on" "dep" 1 1 16 64 256 "0.7" 3 0 "512 1075"
./submit_disagg.sh "mtp=on" "dep" 1 1 16 64 256 "0.7" 3 0 "512 1075"

./submit_disagg.sh "mtp=on" "dep" 2 1 16 128 256 "0.7" 1 0 "2150"
./submit_disagg.sh "mtp=on" "dep" 2 1 16 128 256 "0.7" 1 0 "2150"

./submit_disagg.sh "mtp=on" "dep" 1 1 32 16 64 "0.6" 3 0 "512"
./submit_disagg.sh "mtp=on" "dep" 1 1 32 16 64 "0.6" 3 0 "512"

./submit_disagg.sh "mtp=on" "dep" 1 1 8 256 512 "0.8" 1 0 "2252"
else
echo "Running 1k/1k MTP=OFF configurations"
./submit_disagg.sh "mtp=on" "dep" 1 1 8 256 512 "0.8" 1 0 "2252"
else
echo "Running 1k/1k MTP=OFF configurations for DeepSeek-R1"

./submit_disagg.sh "mtp=off" "tep" 1 4 8 128 128 "0.9" 0 0 "1 2 4 8 16 32 64 141"
./submit_disagg.sh "mtp=off" "tep" 1 4 8 128 128 "0.9" 0 0 "1 2 4 8 16 32 64 141"

./submit_disagg.sh "mtp=off" "dep" 1 1 32 32 32 "0.7" 0 0 "1075"
./submit_disagg.sh "mtp=off" "dep" 1 1 32 32 32 "0.7" 0 0 "1075"

./submit_disagg.sh "mtp=off" "dep" 1 1 16 64 64 "0.75" 0 0 "1075"
./submit_disagg.sh "mtp=off" "dep" 1 1 16 64 64 "0.75" 0 0 "1075"

./submit_disagg.sh "mtp=off" "dep" 2 1 16 256 256 "0.75" 0 0 "2048 4300"
./submit_disagg.sh "mtp=off" "dep" 2 1 16 256 256 "0.75" 0 0 "2048 4300"

./submit_disagg.sh "mtp=off" "dep" 1 1 8 512 512 "0.8" 0 0 "4300"
fi
elif [ "$isl" = "8192" ] && [ "$osl" = "1024" ]; then
if [ "$mtp_mode" = "on" ]; then
echo "Running 8k/1k MTP=ON configurations"
./submit_disagg.sh "mtp=off" "dep" 1 1 8 512 512 "0.8" 0 0 "4300"
fi
elif [ "$isl" = "8192" ] && [ "$osl" = "1024" ]; then
if [ "$mtp_mode" = "on" ]; then
echo "Running 8k/1k MTP=ON configurations for DeepSeek-R1"

./submit_disagg.sh "mtp=on" "tep" 1 3 8 16 64 "0.9" 3 0 "1 2 4 8 18"
./submit_disagg.sh "mtp=on" "tep" 1 3 8 16 64 "0.9" 3 0 "1 2 4 8 18"

./submit_disagg.sh "mtp=on" "dep" 5 1 32 8 32 "0.7" 3 0 "128 269"
./submit_disagg.sh "mtp=on" "dep" 5 1 32 8 32 "0.7" 3 0 "128 269"

./submit_disagg.sh "mtp=on" "dep" 8 1 32 16 64 "0.7" 3 0 "538"
./submit_disagg.sh "mtp=on" "dep" 8 1 32 16 64 "0.7" 3 0 "538"

./submit_disagg.sh "mtp=on" "dep" 8 1 16 64 256 "0.75" 2 0 "1075"
./submit_disagg.sh "mtp=on" "dep" 8 1 16 64 256 "0.75" 2 0 "1075"

./submit_disagg.sh "mtp=on" "dep" 6 1 8 256 512 "0.8" 1 0 "2150"
else
echo "Running 8k/1k MTP=OFF configurations"
./submit_disagg.sh "mtp=on" "dep" 6 1 8 256 512 "0.8" 1 0 "2150"
else
echo "Running 8k/1k MTP=OFF configurations for DeepSeek-R1"

./submit_disagg.sh "mtp=off" "tep" 1 3 8 32 32 "0.9" 0 0 "1 2 4 8 16 34"
./submit_disagg.sh "mtp=off" "tep" 1 3 8 32 32 "0.9" 0 0 "1 2 4 8 16 34"

./submit_disagg.sh "mtp=off" "dep" 4 1 32 16 16 "0.7" 0 0 "256 538"
./submit_disagg.sh "mtp=off" "dep" 4 1 32 16 16 "0.7" 0 0 "256 538"

./submit_disagg.sh "mtp=off" "dep" 6 1 16 64 64 "0.75" 0 0 "1075"
./submit_disagg.sh "mtp=off" "dep" 6 1 16 64 64 "0.75" 0 0 "1075"

./submit_disagg.sh "mtp=off" "dep" 8 1 16 128 128 "0.75" 0 0 "2150"
./submit_disagg.sh "mtp=off" "dep" 8 1 16 128 128 "0.75" 0 0 "2150"

./submit_disagg.sh "mtp=off" "dep" 5 1 8 256 256 "0.8" 0 0 "2150"
./submit_disagg.sh "mtp=off" "dep" 5 1 8 256 256 "0.8" 0 0 "2150"
fi
else
echo "Unsupported ISL/OSL combination for DeepSeek-R1: $isl/$osl"
exit 1
fi
else
echo "Unsupported ISL/OSL combination: $isl/$osl"
echo "Unsupported model: $MODEL. Supported models are: gpt-oss, deepseek-r1-fp4"
exit 1
fi
}
Expand Down Expand Up @@ -212,7 +246,7 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then
echo "Found logs directory: $LOGS_DIR"

# Find all result subdirectories in this logs directory
RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_[td]ep*_batch*_eplb*_mtp*" -type d)
RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_*_batch*_eplb*_mtp*" -type d)
Copy link

Copilot AI Nov 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The find pattern 'ctx*gen*_batch_eplb*_mtp*' uses a generic wildcard () in the middle which may match unintended directory names. Consider using a more specific pattern like 'ctxgen*[td]ep*_batch*_eplb*_mtp*' or 'ctx*gen*{tp,tep,dep}_batch_eplb*_mtp*' to match only valid parallelism strategies (tp/tep/dep).

Suggested change
RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_*_batch*_eplb*_mtp*" -type d)
RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_{tp,tep,dep}_batch*_eplb*_mtp*" -type d)

Copilot uses AI. Check for mistakes.

if [ -z "$RESULT_SUBDIRS" ]; then
echo "No result subdirectories found in $LOGS_DIR"
Expand Down Expand Up @@ -240,14 +274,17 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then

for result_file in $CONCURRENCY_FILES; do
if [ -f "$result_file" ]; then
# Extract concurrency and GPU count from filename
# Extract concurrency, total_gpus, prefill_gpus, and decode_gpus from filename
filename=$(basename "$result_file")
concurrency=$(echo "$filename" | sed 's/results_concurrency_\([0-9]*\)_gpus_.*\.json/\1/')
gpus=$(echo "$filename" | sed 's/results_concurrency_.*_gpus_\([0-9]*\)\.json/\1/')
echo "Processing concurrency $concurrency with $gpus GPUs: $result_file"
concurrency=$(echo "$filename" | sed 's/results_concurrency_\([0-9]*\)_.*/\1/')
gpus=$(echo "$filename" | sed 's/.*_gpus_\([0-9]*\)_.*/\1/')
prefill_gpus=$(echo "$filename" | sed 's/.*_ctx_\([0-9]*\)_.*/\1/')
decode_gpus=$(echo "$filename" | sed 's/.*_gen_\([0-9]*\)\.json/\1/')

echo "Processing concurrency $concurrency with $gpus GPUs (prefill_gpus=$prefill_gpus, decode_gpus=$decode_gpus): $result_file"

# Copy the result file to workspace with a unique name
WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus${gpus}.json"
WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus-${gpus}_ctx-${prefill_gpus}_gen-${decode_gpus}.json"
cp "$result_file" "$WORKSPACE_RESULT_FILE"

echo "Copied result file to: $WORKSPACE_RESULT_FILE"
Expand Down