1414 runs-on : ubuntu-latest
1515 outputs :
1616 search-space-config : ${{ steps.get-jobs.outputs.search-space-config }}
17- gb200-config : ${{ steps.get-jobs.outputs.gb200-config }}
1817 steps :
1918 - name : Checkout code
2019 uses : actions/checkout@v4
@@ -32,25 +31,19 @@ jobs:
3231 pattern = r'^([^_]+)_([^_]+)$'
3332
3433 matching = []
35- gb200_labels = []
3634 for label in labels:
3735 match = re.match(pattern, label['name'])
3836 if match:
3937 runner_type = match.group(1)
4038 model_prefix = match.group(2)
4139
42- if runner_type == 'gb200':
43- gb200_labels.append({'runner-type': runner_type, 'model-prefix': model_prefix})
44- print(f"Matched GB200 label: {label['name']}")
45- else:
46- matching.append({'runner-type': runner_type, 'model-prefix': model_prefix})
47- print(f"Matched label: {label['name']}")
40+ matching.append({'runner-type': runner_type, 'model-prefix': model_prefix})
41+ print(f"Matched label: {label['name']}")
4842
49- if not matching and not gb200_labels :
43+ if not matching:
5044 print("No matching labels found")
5145 with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
5246 f.write('search-space-config=[]\n')
53- f.write('gb200-config=[]\n')
5447 exit(0)
5548
5649 # Generate configs for standard labels
@@ -80,24 +73,10 @@ jobs:
8073
8174 all_configs.extend(json.loads(result.stdout))
8275
83- # Handle GB200 configs (use static config like in full-sweep-test.yml)
84- # FIXME: https://github.com/InferenceMAX/InferenceMAX/issues/171
85- gb200_configs = []
86- if gb200_labels:
87- # Static GB200 config from full-sweep-test.yml
88- gb200_configs = [
89- {"image": "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1", "model": "deepseek-ai/DeepSeek-R1-0528", "framework": "dynamo-sglang", "precision": "fp4", "mtp": "on"},
90- {"image": "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1", "model": "deepseek-ai/DeepSeek-R1-0528", "framework": "dynamo-sglang", "precision": "fp4", "mtp": "off"},
91- {"image": "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3", "model": "deepseek-r1-fp4", "framework": "dynamo-trtllm", "precision": "fp4", "mtp": "on"},
92- {"image": "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3", "model": "deepseek-r1-fp4", "framework": "dynamo-trtllm", "precision": "fp4", "mtp": "off"}
93- ]
94-
9576 print(f"Total standard configs: {len(all_configs)}")
96- print(f"Total GB200 configs: {len(gb200_configs)}")
9777
9878 with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
9979 f.write(f'search-space-config={json.dumps(all_configs)}\n')
100- f.write(f'gb200-config={json.dumps(gb200_configs)}\n')
10180
10281 validate :
10382 needs : get-jobs
@@ -124,31 +103,8 @@ jobs:
124103 dp-attn : ${{ matrix.config.dp-attn }}
125104 conc : ${{ matrix.config.conc }}
126105
127- # FIXME: https://github.com/InferenceMAX/InferenceMAX/issues/171
128- validate-gb200 :
129- needs : get-jobs
130- if : ${{ needs.get-jobs.outputs.gb200-config != '[]' }}
131- uses : ./.github/workflows/benchmark-multinode-tmpl.yml
132- name : validate gb200 /
133- strategy :
134- fail-fast : false
135- matrix :
136- config : ${{ fromJson(needs.get-jobs.outputs.gb200-config) }}
137- secrets : inherit
138- with :
139- runner : gb200
140- image : ${{ matrix.config.image }}
141- model : ${{ matrix.config.model }}
142- framework : ${{ matrix.config.framework }}
143- precision : ${{ matrix.config.precision }}
144- exp-name : dsr1_1k1k
145- isl : " 1024"
146- osl : " 1024"
147- max-model-len : 2048
148- mtp-mode : ${{ matrix.config.mtp }}
149-
150106 calc-success-rate :
151- needs : [ validate, validate-gb200]
107+ needs : validate
152108 if : ${{ always() }}
153109 runs-on : ubuntu-latest
154110
0 commit comments