diff --git a/.github/README.md b/.github/README.md index 69fc1069f..003b8809f 100644 --- a/.github/README.md +++ b/.github/README.md @@ -96,7 +96,7 @@ full-sweep --precision fp8 --seq-lens 1k8k --config-files .github/configs/nvidia **Test all TRT configs on H200 runners:** ``` -full-sweep --framework trt --runner-type h200 h200-trt --config-files .github/configs/nvidia-master.yaml --runner-config .github/configs/runners.yaml +full-sweep --framework trt --runner-type h200 b200-trt --config-files .github/configs/nvidia-master.yaml --runner-config .github/configs/runners.yaml ``` **Quick smoke test of all configs (highest TP, lowest concurrency only):** diff --git a/.github/configs/CONFIGS.md b/.github/configs/CONFIGS.md index 218e17821..9d3c24309 100644 --- a/.github/configs/CONFIGS.md +++ b/.github/configs/CONFIGS.md @@ -49,4 +49,4 @@ Notes: ## Runners -The `runners.yaml` config represents the available runners in the repository. The keys are the runner *types* (i.e., the GPUs as well as some specific combinations like `h200-trt`) whereas the value is a list of *runner nodes*. This config is used to verify the master configs. +The `runners.yaml` config represents the available runners in the repository. The keys are the runner *types* (i.e., the GPUs as well as some specific combinations like `b200-trt`) whereas the value is a list of *runner nodes*. This config is used to verify the master configs. diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 4f8ea4ca7..917136739 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -144,7 +144,7 @@ dsr1-fp8-h200-trt: image: nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc2.post2 model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 - runner: h200-trt + runner: h200 precision: fp8 framework: trt # For all sequence lengths, EP=TP @@ -258,7 +258,7 @@ gptoss-fp4-h200-trt: image: nvcr.io#nvidia/tensorrt-llm/release:gpt-oss-dev model: openai/gpt-oss-120b model-prefix: gptoss - runner: h200-trt + runner: h200 precision: fp4 framework: trt # For all sequence lengths, EP=TP, DP_ATTENTION=false diff --git a/.github/configs/runners.yaml b/.github/configs/runners.yaml index ccbb15cbc..948db2754 100644 --- a/.github/configs/runners.yaml +++ b/.github/configs/runners.yaml @@ -14,17 +14,6 @@ h200: - 'h200-nv_1' - 'h200-nv_2' - 'h200-nv_3' -h200-trt: -- 'h200-cw_0' -- 'h200-cw_1' -- 'h200-nb_0' -- 'h200-nb_1' -- 'h200-nb_2' -- 'h200-nb_3' -- 'h200-nv_0' -- 'h200-nv_1' -- 'h200-nv_2' -- 'h200-nv_3' b200-trt: - 'b200-nv_0' - 'b200-nv_1' diff --git a/.github/workflows/collect-results.yml b/.github/workflows/collect-results.yml index 2bd499090..aa00aa2ae 100644 --- a/.github/workflows/collect-results.yml +++ b/.github/workflows/collect-results.yml @@ -26,7 +26,7 @@ jobs: pattern: ${{ inputs.exp-name && format('{0}_*', inputs.exp-name) || '*' }} - name: Print summary - run: python3 utils/summarize.py results/ ${{ inputs.exp-name || 'all' }} >> $GITHUB_STEP_SUMMARY + run: python3 utils/summarize.py results/ >> $GITHUB_STEP_SUMMARY - name: Aggregate results run: python3 utils/collect_results.py results/ ${{ inputs.exp-name || 'all' }} diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index 8b7654ff9..e675ea93b 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -55,7 +55,7 @@ jobs: secrets: inherit calc-success-rate: - needs: test-sweep + needs: collect-results if: ${{ always() }} runs-on: ubuntu-latest diff --git a/.github/workflows/full-sweep-test.yml b/.github/workflows/full-sweep-test.yml index 9cd08e163..6c25d2c4b 100644 --- a/.github/workflows/full-sweep-test.yml +++ b/.github/workflows/full-sweep-test.yml @@ -56,7 +56,7 @@ jobs: set -x # Build runner type filters based on inputs - RUNNER_TYPES="${{ inputs.use_h100 && 'h100' || '' }} ${{ inputs.use_h200 && 'h200' || '' }} ${{ inputs.use_h200 && 'h200 h200-trt' || '' }} ${{ inputs.use_b200 && 'b200 b200-trt' || '' }} ${{ inputs.use_mi300x && 'mi300x' || '' }} ${{ inputs.use_mi325x && 'mi325x' || '' }} ${{ inputs.use_mi355x && 'mi355x' || '' }}" + RUNNER_TYPES="${{ inputs.use_h100 && 'h100' || '' }} ${{ inputs.use_h200 && 'h200' || '' }} ${{ inputs.use_b200 && 'b200 b200-trt' || '' }} ${{ inputs.use_mi300x && 'mi300x' || '' }} ${{ inputs.use_mi325x && 'mi325x' || '' }} ${{ inputs.use_mi355x && 'mi355x' || '' }}" # DSR1 doesn't support H100, so exclude it DSR1_RUNNER_TYPES=$(echo $RUNNER_TYPES | sed 's/\bh100\b//g' | xargs) diff --git a/utils/matrix-logic/generate_sweep_configs.py b/utils/matrix-logic/generate_sweep_configs.py index bb0e22911..0c3ccac51 100644 --- a/utils/matrix-logic/generate_sweep_configs.py +++ b/utils/matrix-logic/generate_sweep_configs.py @@ -824,7 +824,7 @@ def main(): test_config_parser.add_argument( '--runner-type', required=True, - help='Runner type (e.g., h200-trt, h100)' + help='Runner type (e.g., b200-trt, h100)' ) test_config_parser.add_argument( '--runner-config', @@ -847,7 +847,7 @@ def main(): test_config_parser.add_argument( '--runner-type', required=True, - help='Runner type (e.g., h200-trt, h100)' + help='Runner type (e.g., b200-trt, h100)' ) test_config_parser.add_argument( '--model-prefix',