Skip to content

Commit 85ec854

Browse files
committed
more test variants for qwen3.5
Signed-off-by: Anthony Chang <27950904+rosenrodt@users.noreply.github.com>
1 parent b9e6069 commit 85ec854

File tree

4 files changed

+29
-14
lines changed

4 files changed

+29
-14
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5831,20 +5831,24 @@ class TestQwen3_5_35B_A3B(LlmapiAccuracyTestHarness):
58315831
)
58325832

58335833
@pytest.mark.parametrize("moe_backend", ["CUTLASS", "TRTLLM"])
5834-
def test_bf16(self, moe_backend):
5834+
@pytest.mark.parametrize(
5835+
"tp_size",
5836+
[1, pytest.param(2, marks=pytest.mark.skip_less_device(2))],
5837+
ids=["tp1", "tp2"],
5838+
)
5839+
def test_bf16(self, moe_backend, tp_size):
58355840
if moe_backend == "TRTLLM" and get_sm_version() not in (100, 103):
58365841
pytest.skip(f"{moe_backend} backend supports SM 100 and 103 only")
58375842

5838-
world_size = 1
58395843
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.8,
58405844
enable_block_reuse=False)
58415845
cuda_graph_config = CudaGraphConfig(
58425846
enable_padding=True, batch_sizes=[1, 2, 4, 8, 16, 32, 64, 128])
58435847
moe_config = MoeConfig(backend=moe_backend)
58445848

58455849
with LLM(self.MODEL_PATH,
5846-
tensor_parallel_size=world_size,
5847-
moe_expert_parallel_size=world_size,
5850+
tensor_parallel_size=tp_size,
5851+
moe_expert_parallel_size=1,
58485852
max_seq_len=4096,
58495853
max_num_tokens=4096,
58505854
max_batch_size=128,
@@ -5856,20 +5860,24 @@ def test_bf16(self, moe_backend):
58565860
task.evaluate(llm,
58575861
extra_evaluator_kwargs=self.EXTRA_EVALUATOR_KWARGS)
58585862

5859-
def test_fp8(self):
5863+
@pytest.mark.parametrize(
5864+
"tp_size",
5865+
[1, pytest.param(2, marks=pytest.mark.skip_less_device(2))],
5866+
ids=["tp1", "tp2"],
5867+
)
5868+
def test_fp8(self, tp_size):
58605869
model_dir = f"{self.MODEL_PATH}-FP8"
58615870
# Model is being added to CI. Skip at the moment.
58625871
if not os.path.exists(model_dir):
58635872
pytest.skip(f"Model directory {model_dir} does not exist")
58645873

5865-
world_size = 1
58665874
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.8,
58675875
enable_block_reuse=False)
58685876
moe_config = MoeConfig(backend='DEEPGEMM')
58695877

58705878
with LLM(model_dir,
5871-
tensor_parallel_size=world_size,
5872-
moe_expert_parallel_size=world_size,
5879+
tensor_parallel_size=tp_size,
5880+
moe_expert_parallel_size=1,
58735881
max_seq_len=4096,
58745882
enable_chunked_prefill=True,
58755883
kv_cache_config=kv_cache_config,

tests/integration/test_lists/qa/llm_function_core.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,9 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cu
186186
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm]
187187
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm_attention_dp]
188188
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4_4gpus[latency_moe_trtllm_eagle3]
189-
accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_bf16
190-
accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_fp8
189+
accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_bf16[tp1-CUTLASS]
190+
accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_bf16[tp1-TRTLLM]
191+
accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_fp8[tp1]
191192
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[v1_kv_cache-True-True-cutlass-auto]
192193
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[v1_kv_cache-True-True-cutlass-fp8]
193194
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[v1_kv_cache-True-True-triton-auto]

tests/integration/test_lists/qa/llm_function_core_sanity.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,10 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_4B::test_eagle3
175175
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_fp8_block_scales[latency]
176176
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[fp8-latency]
177177
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[mxfp8-latency]
178-
accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_bf16
179-
accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_fp8
178+
accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_bf16[tp1-CUTLASS]
179+
accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_bf16[tp1-TRTLLM]
180+
accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_fp8[tp1]
181+
accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_fp8[tp2]
180182

181183
# disaggregated serving accuracy test
182184
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=False]

tests/integration/test_lists/test-db/l0_b200.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,12 @@ l0_b200:
7272
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_w4a16_mxfp4[latency-TRTLLM]
7373
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_softmax_attention[target_sparsity_0.5]
7474
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B_Instruct_2507::test_skip_softmax_attention[target_sparsity_0.9]
75-
- accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_bf16
76-
- accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_fp8
75+
- accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_bf16[tp1-CUTLASS]
76+
- accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_bf16[tp1-TRTLLM]
77+
- accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_bf16[tp2-CUTLASS]
78+
- accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_bf16[tp2-TRTLLM]
79+
- accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_fp8[tp1]
80+
- accuracy/test_llm_api_pytorch.py::TestQwen3_5_35B_A3B::test_fp8[tp2]
7781
- accuracy/test_llm_api_pytorch.py::TestQwen3NextInstruct::test_nvfp4[tp1-cutlass]
7882
- disaggregated/test_workers.py::test_workers_kv_cache_aware_router_eviction[TinyLlama-1.1B-Chat-v1.0] # nvbugs 5300551
7983
- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B]

0 commit comments

Comments
 (0)