IzzyPutterman
diff --git a/‎tests/integration/test_lists/qa/llm_perf_core.yml‎
Lines changed: 51 additions & 91 deletions b/‎tests/integration/test_lists/qa/llm_perf_core.yml‎
Lines changed: 51 additions & 91 deletions
@@ -3,27 +3,25 @@ llm_perf_core:
 # ===============================================================================
 # Test Conditions Index
 # ===============================================================================
-# 1: All GPUs common tests
-# 2: L20, L40S, H100, H20, H200, GB200, B200, B300, GB300, RTX-6000D, RTX-6000-Server test cases
-# 3: A100, L20, L40S, H100, H20, H200
-# 4: A100, L40S, H100, H20, H200
-# 5: A100, H100, H20, H200 test cases
-# 6: L40S, H100, H200, H20 test cases
-# 7: H100, H200, H20 test cases
-# 8: L20, L40S, H100, H200, H20 test cases
-# 9: H100, H20, H200, GB200, B200, B300, GB300, RTX6000-D, RTX6000-Server test cases
-# 10: GB200, B200, B300, GB300, RTX6000-Server test cases
-# 11: B200, GB200, B300, GB300 test cases
-# 12: B200, B300 test cases
-# 13: H100, H20, H200, B200, B300 test cases
-# 14: H100, H20, H200, B200, B300, RTX-6000 Server test cases
-# 15: RTX-6000D, RTX-6000 Server test cases
-# 16: RTX6000-Server test cases
+# 1: All GPUs common tests(L20, L40S, H100, H20, H200, GB200, B200, B300, GB300, RTX-6000D, RTX-6000-Server test cases)
+# 2: L20, L40S, H100, H20, H200
+# 3: L40S, H100, H20, H200
+# 4: H100, H20, H200 test cases
+# 5: H100, H20, H200, GB200, B200, B300, GB300, RTX6000-D, RTX6000-Server test cases
+# 6: GB200, B200, B300, GB300, RTX6000-Server test cases
+# 7: B200, GB200, B300, GB300 test cases
+# 8: B200, B300 test cases
+# 9: H100, H20, H200, B200, B300 test cases
+# 10: H100, H20, H200, B200, B300, RTX-6000 Server test cases
+# 11: RTX-6000D, RTX-6000 Server test cases
+# 12: RTX6000-Server
 # ===============================================================================
 
 
 # 1: All GPUs common tests
 - condition:
+    terms:
+      supports_fp8: true
     ranges:
       system_gpu_count:
         gte: 2
@@ -38,19 +36,12 @@ llm_perf_core:
   - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:128,128]
   - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:500,2000-con:250]
   - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:512,32]
-
-
-# 2: L20, L40S, H100, H20, H200, GB200, B200, B300, GB300, RTX-6000D, RTX-6000-Server test cases
-- condition:
-    terms:
-      supports_fp8: true
-  tests:
   - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:2000,500]
   - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:500,2000]
   - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128]
 
 
-# 3: A100, L20, L40S, H100, H20, H200
+# 2: L20, L40S, H100, H20, H200
 - condition:
     ranges:
       system_gpu_count:
@@ -110,49 +101,35 @@ llm_perf_core:
   - perf/test_perf.py::test_perf[llama_v3.2_1b-bench-pytorch-bfloat16-input_output_len:500,2000-reqs:10-con:1-gpus:2]
   #Mistral-Small-3.1-24B-Instruct-2503
   - perf/test_perf.py::test_perf[mistral_small_v3.1_24b-bench-pytorch-bfloat16-maxbs:4096-maxnt:20000-input_output_len:20000,2000-reqs:500-con:200-gpus:2] TIMEOUT(120)
+    #pytorch backend
+  - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32]
+  - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000]
+  - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-loras:1-reqs:100-con:2-gpus:1]
+  - perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000-con:250]
+  - perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct_fp8-bench-pytorch-float8-input_output_len:2000,2000-con:250]
+  # Ministral-8B FP8
+  - perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-maxbs:1-maxnt:5000-input_output_len:5000,500-reqs:8-con:1]
+  - perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-maxbs:1-input_output_len:500,2000-reqs:8-con:1]
+  - perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-maxnt:5000-input_output_len:5000,500-reqs:500-con:250]
+  - perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-input_output_len:500,2000-reqs:500-con:250]
+    #mixtral_8x7b_v0.1_fp8 pytorch backend
+  - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-gpus:2]
+  - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-gpus:2]
 
 
-# 4: A100, L40S, H100, H20, H200
+# 3: L40S, H100, H20, H200
 - condition:
     ranges:
       system_gpu_count:
         gte: 4
       compute_capability:
-        lt: 10.0
+        gt: 8.0
+        lte: 9.0
   tests:
   - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-streaming-bfloat16-input_output_len:128,128-gpus:4]
   - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-streaming-bfloat16-input_output_len:512,32-gpus:4]
   - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-input_output_len:128,128-gpus:4]
   - perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-input_output_len:512,32-gpus:4]
-
-# 5: A100, H100, H20, H200 test cases
-- condition:
-    ranges:
-      system_gpu_count:
-        gte: 8
-      compute_capability:
-        lt: 10.0
-      gpu_memory:
-        gt: 80000
-  tests:
-  - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-pytorch-float16-input_output_len:128,128-gpus:2]
-  - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-pytorch-streaming-float16-input_output_len:128,128-gpus:2]
-  - perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-pytorch-bfloat16-input_output_len:2000,200-reqs:64-gpus:8]
-  - perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:2000,200-reqs:64-gpus:8]
-  - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-bfloat16-input_output_len:500,2000-gpus:8]
-  - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-bfloat16-input_output_len:2000,500-gpus:8]
-  - perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-pytorch-bfloat16-input_output_len:200,2000-reqs:64-con:200-gpus:8]
-
-
-# 6: L40S, H100, H200, H20 test cases
-- condition:
-    ranges:
-      system_gpu_count:
-        gte: 8
-      compute_capability:
-        gt: 8.0
-        lte: 9.0
-  tests:
   - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:500,2000-gpus:4]
   - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000-gpus:4]
   - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:2000,500-gpus:4]
@@ -167,48 +144,34 @@ llm_perf_core:
   - perf/test_perf.py::test_perf[llama_v3.1_70b_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000-gpus:8]
 
 
-# 7: H100, H200, H20 test cases
+# 4: H100, H20, H200 test cases
 - condition:
     ranges:
       system_gpu_count:
         gte: 8
       compute_capability:
         gte: 9.0
         lte: 9.0
+      gpu_memory:
+        gt: 80000
   tests:
-  # deepseek_v3_lite_fp8
+  - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-pytorch-float16-input_output_len:128,128-gpus:2]
+  - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-pytorch-streaming-float16-input_output_len:128,128-gpus:2]
+  - perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-pytorch-bfloat16-input_output_len:2000,200-reqs:64-gpus:8]
+  - perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-pytorch-streaming-bfloat16-input_output_len:2000,200-reqs:64-gpus:8]
+  - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-bfloat16-input_output_len:500,2000-gpus:8]
+  - perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-bfloat16-input_output_len:2000,500-gpus:8]
+  - perf/test_perf.py::test_perf[llama_v3.1_70b_instruct-bench-pytorch-bfloat16-input_output_len:200,2000-reqs:64-con:200-gpus:8]
+   # deepseek_v3_lite_fp8
   - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-input_output_len:2000,500]
   - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-streaming-float8-input_output_len:2000,500]
   - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-input_output_len:500,2000]
   - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-maxbs:512-maxnt:2048-kv_frac:0.85-input_output_len:3000,500-reqs:200]
   - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-streaming-float8-input_output_len:128,128]
   - perf/test_perf.py::test_perf[mistral_small_v3.1_24b-bench-pytorch-bfloat16-maxbs:4096-maxnt:20000-input_output_len:20000,2000-reqs:500-con:200] TIMEOUT(120)
 
-# 8: L20, L40S, H100, H200, H20 test cases
-- condition:
-    ranges:
-      system_gpu_count:
-        gte: 2
-      compute_capability:
-        gt: 8.0
-        lte: 9.0
-  tests:
-  #pytorch backend
-  - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:512,32]
-  - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000]
-  - perf/test_perf.py::test_perf[llama_v3.1_8b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-loras:1-reqs:100-con:2-gpus:1]
-  - perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000-con:250]
-  - perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct_fp8-bench-pytorch-float8-input_output_len:2000,2000-con:250]
-  # Ministral-8B FP8
-  - perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-maxbs:1-maxnt:5000-input_output_len:5000,500-reqs:8-con:1]
-  - perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-maxbs:1-input_output_len:500,2000-reqs:8-con:1]
-  - perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-maxnt:5000-input_output_len:5000,500-reqs:500-con:250]
-  - perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-input_output_len:500,2000-reqs:500-con:250]
-    #mixtral_8x7b_v0.1_fp8 pytorch backend
-  - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-gpus:2]
-  - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-gpus:2]
 
-# 9: H100, H20, H200, GB200, B200, B300, GB300, RTX6000-D, RTX6000-Server test cases
+# 5: H100, H20, H200, GB200, B200, B300, GB300, RTX6000-D, RTX6000-Server test cases
 - condition:
     ranges:
       system_gpu_count:
@@ -224,7 +187,7 @@ llm_perf_core:
   - perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8]
 
 
-# 10: GB200, B200, B300, GB300, RTX6000-Server test cases
+# 6: GB200, B200, B300, GB300, RTX6000-Server test cases
 - condition:
     ranges:
       system_gpu_count:
@@ -274,7 +237,7 @@ llm_perf_core:
   - perf/test_perf.py::test_perf[qwen3_235b_a22b_fp4-bench-pytorch-float4-input_output_len:1000,2000-con:512-ep:4-gpus:4]
 
 
-# 11: B200, GB200 B300, GB300 test cases
+# 7: B200, GB200, B300, GB300 test cases
 - condition:
     ranges:
       system_gpu_count:
@@ -290,7 +253,7 @@ llm_perf_core:
   - perf/test_perf.py::test_perf[deepseek_r1_0528_fp4-bench-pytorch-float4-maxbs:1000-maxnt:5000-kv_frac:0.85-input_output_len:5000,500-reqs:2000-ep:4-tp:4-gpus:4] TIMEOUT(120)
   - perf/test_perf.py::test_perf[deepseek_r1_0528_fp4-bench-pytorch-float4-maxbs:32-maxnt:32768-input_output_len:8192,1024-reqs:20-con:1-ep:1-tp:4-gpus:4] TIMEOUT(120)
 
-# 12: B200, B300 test cases
+# 8: B200, B300 test cases
 - condition:
     ranges:
       system_gpu_count:
@@ -319,7 +282,6 @@ llm_perf_core:
   # deepseek_r1_0528_fp4
   - perf/test_perf.py::test_perf[deepseek_r1_0528_fp4-bench-pytorch-float4-kv_frac:0.85-input_output_len:1000,1000-reqs:20000-ep:8-tp:8-gpus:8] TIMEOUT(120)
   - perf/test_perf.py::test_perf[deepseek_r1_0528_fp4-bench-pytorch-float4-kv_frac:0.85-input_output_len:1000,2000-reqs:3000-ep:8-tp:8-gpus:8] TIMEOUT(120)
-  - perf/test_perf.py::test_perf[deepseek_r1_0528_fp4-bench-pytorch-float4-maxbs:1000-maxnt:5000-kv_frac:0.85-input_output_len:5000,500-reqs:20000-ep:4-tp:4-gpus:4] TIMEOUT(120)
 
   # gpt_oss_120b_fp4
   - perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-float4-maxbs:720-maxnt:16384-input_output_len:1024,1024-reqs:1280-con:256-ep:8-tp:8-gpus:8]
@@ -329,7 +291,7 @@ llm_perf_core:
   - perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-float4-maxbs:720-maxnt:16384-input_output_len:1024,1024-reqs:8-con:1-ep:8-tp:8-gpus:8]
   - perf/test_perf.py::test_perf[gpt_oss_120b_fp4-bench-pytorch-float4-maxbs:720-maxnt:16384-input_output_len:1024,1024-reqs:100-con:32-ep:8-tp:8-gpus:8]
 
-# 13: H100, H20, H200, B200, B300 test cases
+# 9: H100, H20, H200, B200, B300 test cases
 - condition:
     ranges:
       system_gpu_count:
@@ -392,11 +354,9 @@ llm_perf_core:
   # for chunked prefill cases
   - perf/test_perf.py::test_perf[deepseek_r1_fp8-bench-pytorch-float8-maxbs:512-maxnt:2048-kv_frac:0.85-input_output_len:5000,500-reqs:200-ep:8-tp:8-gpus:8] TIMEOUT(120)
   - perf/test_perf.py::test_perf[deepseek_r1_fp8-bench-pytorch-float8-maxbs:256-maxnt:1024-kv_frac:0.85-input_output_len:2000,2000-reqs:200-ep:8-tp:8-gpus:8] TIMEOUT(120)
-  # qwen3_235b_a22b_fp8
-  - perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8] TIMEOUT(60)
 
 
-# 14: H100, H20, H200, B200, B300, RTX-6000 Server test cases
+# 10: H100, H20, H200, B200, B300, RTX-6000 Server test cases
 - condition:
     ranges:
       system_gpu_count:
@@ -408,7 +368,7 @@ llm_perf_core:
   - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-ep:8-tp:8-gpus:8]
 
 
-# 15: RTX-6000D, RTX-6000 Server test cases
+# 11: RTX-6000D, RTX-6000 Server test cases
 - condition:
     ranges:
       system_gpu_count:
@@ -442,7 +402,7 @@ llm_perf_core:
   - perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp4-bench-pytorch-float4-input_output_len:128,128-kv_cache_dtype:fp8-tp:2-gpus:2]
 
 
-# 16: RTX6000-Server test cases
+# 12: RTX6000-Server test cases
 - condition:
     ranges:
       system_gpu_count: