|
82 | 82 | dp-attn: ${{ matrix.config.dp-attn }} |
83 | 83 | conc: ${{ matrix.config.conc }} |
84 | 84 |
|
85 | | - # This is a workaround until we can integrate GB200 into master configs. |
86 | | - benchmark-gb200: |
87 | | - uses: ./.github/workflows/benchmark-multinode-tmpl.yml |
88 | | - name: gb200 1k8k sweep |
89 | | - strategy: |
90 | | - fail-fast: false |
91 | | - matrix: |
92 | | - config: |
93 | | - - { |
94 | | - "image": "nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3", |
95 | | - "model": "deepseek-r1-fp4", |
96 | | - "model-prefix": "dsr1", |
97 | | - "precision": "fp4", |
98 | | - "framework": "dynamo-trtllm", |
99 | | - "mtp": "off", |
100 | | - } |
101 | | - - { |
102 | | - "image": "nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3", |
103 | | - "model": "deepseek-r1-fp4", |
104 | | - "model-prefix": "dsr1", |
105 | | - "precision": "fp4", |
106 | | - "framework": "dynamo-trtllm", |
107 | | - "mtp": "on", |
108 | | - } |
109 | | - - { |
110 | | - "image": "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1", |
111 | | - "model": "deepseek-ai/DeepSeek-R1-0528", |
112 | | - "model-prefix": "dsr1", |
113 | | - "precision": "fp8", |
114 | | - "framework": "dynamo-sglang", |
115 | | - "mtp": "off", |
116 | | - } |
117 | | - secrets: inherit |
118 | | - with: |
119 | | - runner: gb200 |
120 | | - image: ${{ matrix.config.image }} |
121 | | - model: ${{ matrix.config.model }} |
122 | | - framework: ${{ matrix.config.framework }} |
123 | | - precision: ${{ matrix.config.precision }} |
124 | | - exp-name: ${{ matrix.config.model-prefix }}_1k8k |
125 | | - isl: 1024 |
126 | | - osl: 8192 |
127 | | - max-model-len: 9216 |
128 | | - mtp-mode: ${{ matrix.config.mtp }} |
129 | | - |
130 | 85 | collect-dsr1-results: |
131 | | - needs: [benchmark-dsr1, benchmark-gb200] |
| 86 | + needs: benchmark-dsr1 |
132 | 87 | if: ${{ always() }} |
133 | 88 | uses: ./.github/workflows/collect-results.yml |
134 | 89 | secrets: inherit |
|
0 commit comments