SemiAnalysisAI · jgangani · Nov 10, 2025 · Nov 11, 2025 · Nov 12, 2025 · Nov 13, 2025
diff --git a/.github/workflows/benchmark-multinode-tmpl.yml b/.github/workflows/benchmark-multinode-tmpl.yml
@@ -48,6 +48,7 @@ env:
   MAX_MODEL_LEN: ${{ inputs.max-model-len }}
   RANDOM_RANGE_RATIO: ${{ inputs.random-range-ratio }}
   MTP_MODE: ${{ inputs.mtp-mode }}
+  MODEL: ${{ inputs.model }}
 
 permissions:
   contents: read
@@ -97,9 +98,9 @@ jobs:
             if [ -f "$result_file" ]; then
               echo "Processing $result_file"
               # Extract GPU count, prefill_gpus and decode_gpus from filename for tp_size calculation
-              gpus=$(echo "$result_file" | sed -n "s/.*_gpus_\([0-9]*\).*\.json/\1/p")
-              prefill_gpus=$(echo "$result_file" | sed -n "s/.*_ctx_\([0-9]*\).*\.json/\1/p")
-              decode_gpus=$(echo "$result_file" | sed -n "s/.*_gen_\([0-9]*\).*\.json/\1/p")
+              gpus=$(echo "$result_file" | sed -n "s/.*_gpus-\([0-9]*\).*\.json/\1/p")
+              prefill_gpus=$(echo "$result_file" | sed -n "s/.*_ctx-\([0-9]*\).*\.json/\1/p")
+              decode_gpus=$(echo "$result_file" | sed -n "s/.*_gen-\([0-9]*\).*\.json/\1/p")
 
               if [ -n "$gpus" ]; then
                 echo "Extracted: gpus=$gpus, prefill_gpus=$prefill_gpus, decode_gpus=$decode_gpus"

diff --git a/.github/workflows/full-sweep-8k1k-scheduler.yml b/.github/workflows/full-sweep-8k1k-scheduler.yml
@@ -90,6 +90,7 @@ jobs:
             fail-fast: false
             matrix:
                 config:
+                    # DSR1
                     - {
                           "image": "nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3",
                           "model": "deepseek-r1-fp4",
@@ -114,6 +115,15 @@ jobs:
                           "framework": "dynamo-sglang",
                           "mtp": "off",
                       }
+                      # GPTOSS
-                      # GPTOSS
+                    # GPTOSS
-                      # GPTOSS
+                    # GPTOSS
+                    - {
+                          "image": "nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1-cuda13",
+                          "model": "openai/gpt-oss-120b",
+                          "model-prefix": "gptoss",
+                          "precision": "fp4",
+                          "framework": "dynamo-trtllm",
+                          "mtp": "off",
+                      }
         secrets: inherit
         with:
             runner: gb200
@@ -136,7 +146,7 @@ jobs:
             exp-name: "dsr1_8k1k"
 
     collect-gptoss-results:
-        needs: benchmark-gptoss
+        needs: [benchmark-gptoss, benchmark-gb200]
         if: ${{ always() }}
         uses: ./.github/workflows/collect-results.yml
         secrets: inherit

diff --git a/.github/workflows/gb200-tests.yml b/.github/workflows/gb200-tests.yml
@@ -10,6 +10,7 @@ on:
                 options:
                     - "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1"
                     - "nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3"
+                    - "nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1-cuda13"
 
             model:
                 description: "Model"
@@ -18,6 +19,7 @@ on:
                 options:
                     - "deepseek-ai/DeepSeek-R1-0528"
                     - "deepseek-r1-fp4"
+                    - "openai/gpt-oss-120b"
 
             precision:
                 description: "Precision"
@@ -58,6 +60,7 @@ jobs:
         runs-on: ubuntu-latest
         outputs:
             max-model-len: ${{ steps.calc.outputs.max-model-len }}
+            model-prefix: ${{ steps.calc.outputs.model-prefix }}
         steps:
             - id: calc
               shell: python
@@ -70,8 +73,22 @@ jobs:
                   except ValueError:
                       print("Error: ISL and OSL must be integers")
                       sys.exit(1)
+
+                  # Map model names to clean prefixes
+                  model = "${{ inputs.model }}"
+                  if model == "deepseek-ai/DeepSeek-R1-0528":
+                      model_prefix = "dsr1"
+                  elif model == "deepseek-r1-fp4":
+                      model_prefix = "dsr1-fp4"
+                  elif model == "openai/gpt-oss-120b":
+                      model_prefix = "gptoss"
+                  else:
+                      # Fallback: replace slashes with underscores
+                      model_prefix = model.replace("/", "_")
+
                   with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
                       f.write(f"max-model-len={isl + osl}\n")
+                      f.write(f"model-prefix={model_prefix}\n")
 
     benchmark-gb200:
         needs: pre-run
@@ -84,7 +101,7 @@ jobs:
             model: ${{ inputs.model }}
             framework: ${{ inputs.framework }}
             precision: ${{ inputs.precision }}
-            exp-name: dsr1_1k1k
+            exp-name: ${{ needs.pre-run.outputs.model-prefix }}
             isl: ${{ inputs.isl }}
             osl: ${{ inputs.osl }}
             max-model-len: ${{ needs.pre-run.outputs.max-model-len }}

diff --git a/runners/launch_gb200-nv.sh b/runners/launch_gb200-nv.sh
@@ -20,8 +20,16 @@ else
     # Update the IMAGE variable to the squash file
     export IMAGE=$SQUASH_FILE
 
-    export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528-fp4-v2"
-    export SERVED_MODEL_NAME="deepseek-r1-fp4"
+    if [[ $MODEL == *"gpt-oss"* ]]; then
+        export MODEL_PATH="/mnt/lustre01/models/gpt-oss-120b"
+        export SERVED_MODEL_NAME="gpt-oss-120b"
+    elif [[ $MODEL == *"deepseek-r1-fp4" ]]; then
+        export MODEL_PATH="/mnt/lustre01/models/deepseek-r1-0528-fp4-v2"
+        export SERVED_MODEL_NAME="deepseek-r1-fp4"
+    else
+        echo "Unsupported model: $MODEL. Supported models are: gpt-oss, deepseek-r1-fp4"
+        exit 1
+    fi
 fi
 
 
@@ -49,7 +57,11 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then
     rm -rf "$DYNAMO_PATH"
     git clone https://github.com/ai-dynamo/dynamo.git "$DYNAMO_PATH"
     cd "$DYNAMO_PATH"
-    git checkout release/0.5.1-rc0.20251105
+    if [[ $MODEL == *"gpt-oss"* ]]; then
+        git checkout jthomson04/gpt-oss-disagg-slurm
+    else
+        git checkout release/0.5.1-rc0.20251105
+    fi  
     git submodule update --init --recursive
 
     # Navigate to performance sweeps directory
@@ -94,60 +106,82 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then
         #   gen_eplb_num_slots: Expert load balancing slots (0, 256, 288)
         #   gen_concurrency_list: Concurrency values (space-separated, quoted)
 
-        if [ "$isl" = "1024" ] && [ "$osl" = "1024" ]; then
-            if [ "$mtp_mode" = "on" ]; then
-                echo "Running 1k/1k MTP=ON configurations"
+        # MODEL-SPECIFIC HOOK: Different benchmark configurations for different models
+        if [[ $MODEL == *"gpt-oss"* ]]; then
+            # GPT-OSS specific benchmark configurations
+            if [ "$isl" = "8192" ] && [ "$osl" = "1024" ]; then
+
-                
-                
+                    echo "Running 8k/1k MTP=OFF configurations for GPT-OSS"
+
+                    ./submit_disagg.sh mtp=off tp 1 1 1 512 20000 "0.9" 0 0 "128 256 512"
+                    ./submit_disagg.sh mtp=off tp 1 1 2 1024 20000 "0.9" 0 0 "64 128 256"
+                    ./submit_disagg.sh mtp=off tep 1 1 2 1024 20000 "0.9" 0 0 "64 256"
+                    ./submit_disagg.sh mtp=off tp 1 1 4 2048 20000 "0.9" 0 0 "8 16 32 64 128"
+                    ./submit_disagg.sh mtp=off tp 1 1 8 2048 20000 "0.9" 0 0 "1 2 4 8 16"
+            else
+                echo "Unsupported ISL/OSL combination for GPT-OSS: $isl/$osl"
+                exit 1
+            fi
+        elif [[ $MODEL == *"deepseek-r1-fp4" ]]; then
+            # DeepSeek-R1 specific benchmark configurations (existing logic)
+            if [ "$isl" = "1024" ] && [ "$osl" = "1024" ]; then
+                if [ "$mtp_mode" = "on" ]; then
+                    echo "Running 1k/1k MTP=ON configurations for DeepSeek-R1"
 
-                ./submit_disagg.sh "mtp=on" "tep" 1 4 8 32 128 "0.9" 3 0 "1 2 4 8 16 36"
+                    ./submit_disagg.sh "mtp=on" "tep" 1 4 8 32 128 "0.9" 3 0 "1 2 4 8 16 36"
 
-                ./submit_disagg.sh "mtp=on" "dep" 1 1 16 64 256 "0.7" 3 0 "512 1075"
+                    ./submit_disagg.sh "mtp=on" "dep" 1 1 16 64 256 "0.7" 3 0 "512 1075"
 
-                ./submit_disagg.sh "mtp=on" "dep" 2 1 16 128 256 "0.7" 1 0 "2150"
+                    ./submit_disagg.sh "mtp=on" "dep" 2 1 16 128 256 "0.7" 1 0 "2150"
 
-                ./submit_disagg.sh "mtp=on" "dep" 1 1 32 16 64 "0.6" 3 0 "512"
+                    ./submit_disagg.sh "mtp=on" "dep" 1 1 32 16 64 "0.6" 3 0 "512"
 
-                ./submit_disagg.sh "mtp=on" "dep" 1 1 8 256 512 "0.8" 1 0 "2252"
-            else
-                echo "Running 1k/1k MTP=OFF configurations"
+                    ./submit_disagg.sh "mtp=on" "dep" 1 1 8 256 512 "0.8" 1 0 "2252"
+                else
+                    echo "Running 1k/1k MTP=OFF configurations for DeepSeek-R1"
 
-                ./submit_disagg.sh "mtp=off" "tep" 1 4 8 128 128 "0.9" 0 0 "1 2 4 8 16 32 64 141"
+                    ./submit_disagg.sh "mtp=off" "tep" 1 4 8 128 128 "0.9" 0 0 "1 2 4 8 16 32 64 141"
 
-                ./submit_disagg.sh "mtp=off" "dep" 1 1 32 32 32 "0.7" 0 0 "1075"
+                    ./submit_disagg.sh "mtp=off" "dep" 1 1 32 32 32 "0.7" 0 0 "1075"
 
-                ./submit_disagg.sh "mtp=off" "dep" 1 1 16 64 64 "0.75" 0 0 "1075"
+                    ./submit_disagg.sh "mtp=off" "dep" 1 1 16 64 64 "0.75" 0 0 "1075"
 
-                ./submit_disagg.sh "mtp=off" "dep" 2 1 16 256 256 "0.75" 0 0 "2048 4300"
+                    ./submit_disagg.sh "mtp=off" "dep" 2 1 16 256 256 "0.75" 0 0 "2048 4300"
 
-                ./submit_disagg.sh "mtp=off" "dep" 1 1 8 512 512 "0.8" 0 0 "4300"
-            fi
-        elif [ "$isl" = "8192" ] && [ "$osl" = "1024" ]; then
-            if [ "$mtp_mode" = "on" ]; then
-                echo "Running 8k/1k MTP=ON configurations"
+                    ./submit_disagg.sh "mtp=off" "dep" 1 1 8 512 512 "0.8" 0 0 "4300"
+                fi
+            elif [ "$isl" = "8192" ] && [ "$osl" = "1024" ]; then
+                if [ "$mtp_mode" = "on" ]; then
+                    echo "Running 8k/1k MTP=ON configurations for DeepSeek-R1"
 
-                ./submit_disagg.sh "mtp=on" "tep" 1 3 8 16 64 "0.9" 3 0 "1 2 4 8 18"
+                    ./submit_disagg.sh "mtp=on" "tep" 1 3 8 16 64 "0.9" 3 0 "1 2 4 8 18"
 
-                ./submit_disagg.sh "mtp=on" "dep" 5 1 32 8 32 "0.7" 3 0 "128 269"
+                    ./submit_disagg.sh "mtp=on" "dep" 5 1 32 8 32 "0.7" 3 0 "128 269"
 
-                ./submit_disagg.sh "mtp=on" "dep" 8 1 32 16 64 "0.7" 3 0 "538"
+                    ./submit_disagg.sh "mtp=on" "dep" 8 1 32 16 64 "0.7" 3 0 "538"
 
-                ./submit_disagg.sh "mtp=on" "dep" 8 1 16 64 256 "0.75" 2 0 "1075"
+                    ./submit_disagg.sh "mtp=on" "dep" 8 1 16 64 256 "0.75" 2 0 "1075"
 
-                ./submit_disagg.sh "mtp=on" "dep" 6 1 8 256 512 "0.8" 1 0 "2150"
-            else
-                echo "Running 8k/1k MTP=OFF configurations"
+                    ./submit_disagg.sh "mtp=on" "dep" 6 1 8 256 512 "0.8" 1 0 "2150"
+                else
+                    echo "Running 8k/1k MTP=OFF configurations for DeepSeek-R1"
 
-                ./submit_disagg.sh "mtp=off" "tep" 1 3 8 32 32 "0.9" 0 0 "1 2 4 8 16 34"
+                    ./submit_disagg.sh "mtp=off" "tep" 1 3 8 32 32 "0.9" 0 0 "1 2 4 8 16 34"
 
-                ./submit_disagg.sh "mtp=off" "dep" 4 1 32 16 16 "0.7" 0 0 "256 538"
+                    ./submit_disagg.sh "mtp=off" "dep" 4 1 32 16 16 "0.7" 0 0 "256 538"
 
-                ./submit_disagg.sh "mtp=off" "dep" 6 1 16 64 64 "0.75" 0 0 "1075"
+                    ./submit_disagg.sh "mtp=off" "dep" 6 1 16 64 64 "0.75" 0 0 "1075"
 
-                ./submit_disagg.sh "mtp=off" "dep" 8 1 16 128 128 "0.75" 0 0 "2150"
+                    ./submit_disagg.sh "mtp=off" "dep" 8 1 16 128 128 "0.75" 0 0 "2150"
 
-                ./submit_disagg.sh "mtp=off" "dep" 5 1 8 256 256 "0.8" 0 0 "2150"
+                    ./submit_disagg.sh "mtp=off" "dep" 5 1 8 256 256 "0.8" 0 0 "2150"
+                fi
+            else
+                echo "Unsupported ISL/OSL combination for DeepSeek-R1: $isl/$osl"
+                exit 1
             fi
         else
-            echo "Unsupported ISL/OSL combination: $isl/$osl"
+            echo "Unsupported model: $MODEL. Supported models are: gpt-oss, deepseek-r1-fp4"
             exit 1
         fi
     }
@@ -212,7 +246,7 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then
     echo "Found logs directory: $LOGS_DIR"
 
     # Find all result subdirectories in this logs directory
-    RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_[td]ep*_batch*_eplb*_mtp*" -type d)
+    RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_*_batch*_eplb*_mtp*" -type d)
-    RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_*_batch*_eplb*_mtp*" -type d)
+    RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_{tp,tep,dep}_batch*_eplb*_mtp*" -type d)
-    RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_*_batch*_eplb*_mtp*" -type d)
+    RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_{tp,tep,dep}_batch*_eplb*_mtp*" -type d)
 
     if [ -z "$RESULT_SUBDIRS" ]; then
         echo "No result subdirectories found in $LOGS_DIR"
@@ -240,14 +274,17 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then
 
             for result_file in $CONCURRENCY_FILES; do
                 if [ -f "$result_file" ]; then
-                    # Extract concurrency and GPU count from filename
+                    # Extract concurrency, total_gpus, prefill_gpus, and decode_gpus from filename
                     filename=$(basename "$result_file")
-                    concurrency=$(echo "$filename" | sed 's/results_concurrency_\([0-9]*\)_gpus_.*\.json/\1/')
-                    gpus=$(echo "$filename" | sed 's/results_concurrency_.*_gpus_\([0-9]*\)\.json/\1/')
-                    echo "Processing concurrency $concurrency with $gpus GPUs: $result_file"
+                    concurrency=$(echo "$filename" | sed 's/results_concurrency_\([0-9]*\)_.*/\1/')
+                    gpus=$(echo "$filename" | sed 's/.*_gpus_\([0-9]*\)_.*/\1/')
+                    prefill_gpus=$(echo "$filename" | sed 's/.*_ctx_\([0-9]*\)_.*/\1/')
+                    decode_gpus=$(echo "$filename" | sed 's/.*_gen_\([0-9]*\)\.json/\1/')
+
+                    echo "Processing concurrency $concurrency with $gpus GPUs (prefill_gpus=$prefill_gpus, decode_gpus=$decode_gpus): $result_file"
 
                     # Copy the result file to workspace with a unique name
-                    WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus${gpus}.json"
+                    WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus-${gpus}_ctx-${prefill_gpus}_gen-${decode_gpus}.json"
                     cp "$result_file" "$WORKSPACE_RESULT_FILE"
 
                     echo "Copied result file to: $WORKSPACE_RESULT_FILE"