Skip to content

Commit 617440d

Browse files
authored
[None][test] Remove A100 test cases from QA perf scope (NVIDIA#11712)
Signed-off-by: yufeiwu-nv <230315618+yufeiwu-nv@users.noreply.github.com>
1 parent 3f4c42d commit 617440d

2 files changed

Lines changed: 79 additions & 135 deletions

File tree

tests/integration/test_lists/qa/llm_perf_core.yml

Lines changed: 51 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,25 @@ llm_perf_core:
33
# ===============================================================================
44
# Test Conditions Index
55
# ===============================================================================
6-
# 1: All GPUs common tests
7-
# 2: L20, L40S, H100, H20, H200, GB200, B200, B300, GB300, RTX-6000D, RTX-6000-Server test cases
8-
# 3: A100, L20, L40S, H100, H20, H200
9-
# 4: A100, L40S, H100, H20, H200
10-
# 5: A100, H100, H20, H200 test cases
11-
# 6: L40S, H100, H200, H20 test cases
12-
# 7: H100, H200, H20 test cases
13-
# 8: L20, L40S, H100, H200, H20 test cases
14-
# 9: H100, H20, H200, GB200, B200, B300, GB300, RTX6000-D, RTX6000-Server test cases
15-
# 10: GB200, B200, B300, GB300, RTX6000-Server test cases
16-
# 11: B200, GB200, B300, GB300 test cases
17-
# 12: B200, B300 test cases
18-
# 13: H100, H20, H200, B200, B300 test cases
19-
# 14: H100, H20, H200, B200, B300, RTX-6000 Server test cases
20-
# 15: RTX-6000D, RTX-6000 Server test cases
21-
# 16: RTX6000-Server test cases
6+
# 1: All GPUs common tests(L20, L40S, H100, H20, H200, GB200, B200, B300, GB300, RTX-6000D, RTX-6000-Server test cases)
7+
# 2: L20, L40S, H100, H20, H200
8+
# 3: L40S, H100, H20, H200
9+
# 4: H100, H20, H200 test cases
10+
# 5: H100, H20, H200, GB200, B200, B300, GB300, RTX6000-D, RTX6000-Server test cases
11+
# 6: GB200, B200, B300, GB300, RTX6000-Server test cases
12+
# 7: B200, GB200, B300, GB300 test cases
13+
# 8: B200, B300 test cases
14+
# 9: H100, H20, H200, B200, B300 test cases
15+
# 10: H100, H20, H200, B200, B300, RTX-6000 Server test cases
16+
# 11: RTX-6000D, RTX-6000 Server test cases
17+
# 12: RTX6000-Server
2218
# ===============================================================================
2319

2420

2521
# 1: All GPUs common tests
2622
- condition:
23+
terms:
24+
supports_fp8: true
2725
ranges:
2826
system_gpu_count:
2927
gte: 2
@@ -38,19 +36,12 @@ llm_perf_core:
3836
- perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:128,128]
3937
- perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:500,2000-con:250]
4038
- perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:512,32]
41-
42-
43-
# 2: L20, L40S, H100, H20, H200, GB200, B200, B300, GB300, RTX-6000D, RTX-6000-Server test cases
44-
- condition:
45-
terms:
46-
supports_fp8: true
47-
tests:
4839
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:2000,500]
4940
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:500,2000]
5041
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
5142

5243

53-
# 3: A100, L20, L40S, H100, H20, H200
44+
# 2: L20, L40S, H100, H20, H200
5445
- condition:
5546
ranges:
5647
system_gpu_count:
@@ -110,49 +101,35 @@ llm_perf_core:
110101
- perf/test_perf.py::test_perf[llama_v3.2_1b-bench-pytorch-bfloat16-input_output_len:500,2000-reqs:10-con:1-gpus:2]
111102
#Mistral-Small-3.1-24B-Instruct-2503
112103
- perf/test_perf.py::test_perf[mistral_small_v3.1_24b-bench-pytorch-bfloat16-maxbs:4096-maxnt:20000-input_output_len:20000,2000-reqs:500-con:200-gpus:2] TIMEOUT(120)
104+
#pytorch backend
105+
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32]
106+
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000]
107+
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-loras:1-reqs:100-con:2-gpus:1]
108+
- perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000-con:250]
109+
- perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct_fp8-bench-pytorch-float8-input_output_len:2000,2000-con:250]
110+
# Ministral-8B FP8
111+
- perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-maxbs:1-maxnt:5000-input_output_len:5000,500-reqs:8-con:1]
112+
- perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-maxbs:1-input_output_len:500,2000-reqs:8-con:1]
113+
- perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-maxnt:5000-input_output_len:5000,500-reqs:500-con:250]
114+
- perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-input_output_len:500,2000-reqs:500-con:250]
115+
#mixtral_8x7b_v0.1_fp8 pytorch backend
116+
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-gpus:2]
117+
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-gpus:2]
113118

114119

115-
# 4: A100, L40S, H100, H20, H200
120+
# 3: L40S, H100, H20, H200
116121
- condition:
117122
ranges:
118123
system_gpu_count:
119124
gte: 4
120125
compute_capability:
121-
lt: 10.0
126+
gt: 8.0
127+
lte: 9.0
122128
tests:
123129
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-streaming-bfloat16-input_output_len:128,128-gpus:4]
124130
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-streaming-bfloat16-input_output_len:512,32-gpus:4]
125131
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-input_output_len:128,128-gpus:4]
126132
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-input_output_len:512,32-gpus:4]
127-
128-
# 5: A100, H100, H20, H200 test cases
129-
- condition:
130-
ranges:
131-
system_gpu_count:
132-
gte: 8
133-
compute_capability:
134-
lt: 10.0
135-
gpu_memory:
136-
gt: 80000
137-
tests:
138-
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-pytorch-float16-input_output_len:128,128-gpus:2]
139-
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-pytorch-streaming-float16-input_output_len:128,128-gpus:2]
140-
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-pytorch-bfloat16-input_output_len:2000,200-reqs:64-gpus:8]
141-
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:2000,200-reqs:64-gpus:8]
142-
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-bfloat16-input_output_len:500,2000-gpus:8]
143-
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-bfloat16-input_output_len:2000,500-gpus:8]
144-
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-pytorch-bfloat16-input_output_len:200,2000-reqs:64-con:200-gpus:8]
145-
146-
147-
# 6: L40S, H100, H200, H20 test cases
148-
- condition:
149-
ranges:
150-
system_gpu_count:
151-
gte: 8
152-
compute_capability:
153-
gt: 8.0
154-
lte: 9.0
155-
tests:
156133
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:500,2000-gpus:4]
157134
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000-gpus:4]
158135
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:2000,500-gpus:4]
@@ -167,48 +144,34 @@ llm_perf_core:
167144
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000-gpus:8]
168145

169146

170-
# 7: H100, H200, H20 test cases
147+
# 4: H100, H20, H200 test cases
171148
- condition:
172149
ranges:
173150
system_gpu_count:
174151
gte: 8
175152
compute_capability:
176153
gte: 9.0
177154
lte: 9.0
155+
gpu_memory:
156+
gt: 80000
178157
tests:
179-
# deepseek_v3_lite_fp8
158+
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-pytorch-float16-input_output_len:128,128-gpus:2]
159+
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-pytorch-streaming-float16-input_output_len:128,128-gpus:2]
160+
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-pytorch-bfloat16-input_output_len:2000,200-reqs:64-gpus:8]
161+
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:2000,200-reqs:64-gpus:8]
162+
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-bfloat16-input_output_len:500,2000-gpus:8]
163+
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-bfloat16-input_output_len:2000,500-gpus:8]
164+
- perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-pytorch-bfloat16-input_output_len:200,2000-reqs:64-con:200-gpus:8]
165+
# deepseek_v3_lite_fp8
180166
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-input_output_len:2000,500]
181167
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-streaming-float8-input_output_len:2000,500]
182168
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-input_output_len:500,2000]
183169
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-maxbs:512-maxnt:2048-kv_frac:0.85-input_output_len:3000,500-reqs:200]
184170
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-streaming-float8-input_output_len:128,128]
185171
- perf/test_perf.py::test_perf[mistral_small_v3.1_24b-bench-pytorch-bfloat16-maxbs:4096-maxnt:20000-input_output_len:20000,2000-reqs:500-con:200] TIMEOUT(120)
186172

187-
# 8: L20, L40S, H100, H200, H20 test cases
188-
- condition:
189-
ranges:
190-
system_gpu_count:
191-
gte: 2
192-
compute_capability:
193-
gt: 8.0
194-
lte: 9.0
195-
tests:
196-
#pytorch backend
197-
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32]
198-
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000]
199-
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-loras:1-reqs:100-con:2-gpus:1]
200-
- perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000-con:250]
201-
- perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct_fp8-bench-pytorch-float8-input_output_len:2000,2000-con:250]
202-
# Ministral-8B FP8
203-
- perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-maxbs:1-maxnt:5000-input_output_len:5000,500-reqs:8-con:1]
204-
- perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-maxbs:1-input_output_len:500,2000-reqs:8-con:1]
205-
- perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-maxnt:5000-input_output_len:5000,500-reqs:500-con:250]
206-
- perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-input_output_len:500,2000-reqs:500-con:250]
207-
#mixtral_8x7b_v0.1_fp8 pytorch backend
208-
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-gpus:2]
209-
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-gpus:2]
210173

211-
# 9: H100, H20, H200, GB200, B200, B300, GB300, RTX6000-D, RTX6000-Server test cases
174+
# 5: H100, H20, H200, GB200, B200, B300, GB300, RTX6000-D, RTX6000-Server test cases
212175
- condition:
213176
ranges:
214177
system_gpu_count:
@@ -224,7 +187,7 @@ llm_perf_core:
224187
- perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8]
225188

226189

227-
# 10: GB200, B200, B300, GB300, RTX6000-Server test cases
190+
# 6: GB200, B200, B300, GB300, RTX6000-Server test cases
228191
- condition:
229192
ranges:
230193
system_gpu_count:
@@ -274,7 +237,7 @@ llm_perf_core:
274237
- perf/test_perf.py::test_perf[qwen3_235b_a22b_fp4-bench-pytorch-float4-input_output_len:1000,2000-con:512-ep:4-gpus:4]
275238

276239

277-
# 11: B200, GB200 B300, GB300 test cases
240+
# 7: B200, GB200, B300, GB300 test cases
278241
- condition:
279242
ranges:
280243
system_gpu_count:
@@ -290,7 +253,7 @@ llm_perf_core:
290253
- perf/test_perf.py::test_perf[deepseek_r1_0528_fp4-bench-pytorch-float4-maxbs:1000-maxnt:5000-kv_frac:0.85-input_output_len:5000,500-reqs:2000-ep:4-tp:4-gpus:4] TIMEOUT(120)
291254
- perf/test_perf.py::test_perf[deepseek_r1_0528_fp4-bench-pytorch-float4-maxbs:32-maxnt:32768-input_output_len:8192,1024-reqs:20-con:1-ep:1-tp:4-gpus:4] TIMEOUT(120)
292255

293-
# 12: B200, B300 test cases
256+
# 8: B200, B300 test cases
294257
- condition:
295258
ranges:
296259
system_gpu_count:
@@ -319,7 +282,6 @@ llm_perf_core:
319282
# deepseek_r1_0528_fp4
320283
- perf/test_perf.py::test_perf[deepseek_r1_0528_fp4-bench-pytorch-float4-kv_frac:0.85-input_output_len:1000,1000-reqs:20000-ep:8-tp:8-gpus:8] TIMEOUT(120)
321284
- perf/test_perf.py::test_perf[deepseek_r1_0528_fp4-bench-pytorch-float4-kv_frac:0.85-input_output_len:1000,2000-reqs:3000-ep:8-tp:8-gpus:8] TIMEOUT(120)
322-
- perf/test_perf.py::test_perf[deepseek_r1_0528_fp4-bench-pytorch-float4-maxbs:1000-maxnt:5000-kv_frac:0.85-input_output_len:5000,500-reqs:20000-ep:4-tp:4-gpus:4] TIMEOUT(120)
323285

324286
# gpt_oss_120b_fp4
325287
- perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-float4-maxbs:720-maxnt:16384-input_output_len:1024,1024-reqs:1280-con:256-ep:8-tp:8-gpus:8]
@@ -329,7 +291,7 @@ llm_perf_core:
329291
- perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-float4-maxbs:720-maxnt:16384-input_output_len:1024,1024-reqs:8-con:1-ep:8-tp:8-gpus:8]
330292
- perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-float4-maxbs:720-maxnt:16384-input_output_len:1024,1024-reqs:100-con:32-ep:8-tp:8-gpus:8]
331293

332-
# 13: H100, H20, H200, B200, B300 test cases
294+
# 9: H100, H20, H200, B200, B300 test cases
333295
- condition:
334296
ranges:
335297
system_gpu_count:
@@ -392,11 +354,9 @@ llm_perf_core:
392354
# for chunked prefill cases
393355
- perf/test_perf.py::test_perf[deepseek_r1_fp8-bench-pytorch-float8-maxbs:512-maxnt:2048-kv_frac:0.85-input_output_len:5000,500-reqs:200-ep:8-tp:8-gpus:8] TIMEOUT(120)
394356
- perf/test_perf.py::test_perf[deepseek_r1_fp8-bench-pytorch-float8-maxbs:256-maxnt:1024-kv_frac:0.85-input_output_len:2000,2000-reqs:200-ep:8-tp:8-gpus:8] TIMEOUT(120)
395-
# qwen3_235b_a22b_fp8
396-
- perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8] TIMEOUT(60)
397357

398358

399-
# 14: H100, H20, H200, B200, B300, RTX-6000 Server test cases
359+
# 10: H100, H20, H200, B200, B300, RTX-6000 Server test cases
400360
- condition:
401361
ranges:
402362
system_gpu_count:
@@ -408,7 +368,7 @@ llm_perf_core:
408368
- perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-ep:8-tp:8-gpus:8]
409369

410370

411-
# 15: RTX-6000D, RTX-6000 Server test cases
371+
# 11: RTX-6000D, RTX-6000 Server test cases
412372
- condition:
413373
ranges:
414374
system_gpu_count:
@@ -442,7 +402,7 @@ llm_perf_core:
442402
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8-tp:2-gpus:2]
443403

444404

445-
# 16: RTX6000-Server test cases
405+
# 12: RTX6000-Server test cases
446406
- condition:
447407
ranges:
448408
system_gpu_count:

0 commit comments

Comments
 (0)