@@ -5831,20 +5831,24 @@ class TestQwen3_5_35B_A3B(LlmapiAccuracyTestHarness):
58315831 )
58325832
58335833 @pytest .mark .parametrize ("moe_backend" , ["CUTLASS" , "TRTLLM" ])
5834- def test_bf16 (self , moe_backend ):
5834+ @pytest .mark .parametrize (
5835+ "tp_size" ,
5836+ [1 , pytest .param (2 , marks = pytest .mark .skip_less_device (2 ))],
5837+ ids = ["tp1" , "tp2" ],
5838+ )
5839+ def test_bf16 (self , moe_backend , tp_size ):
58355840 if moe_backend == "TRTLLM" and get_sm_version () not in (100 , 103 ):
58365841 pytest .skip (f"{ moe_backend } backend supports SM 100 and 103 only" )
58375842
5838- world_size = 1
58395843 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.8 ,
58405844 enable_block_reuse = False )
58415845 cuda_graph_config = CudaGraphConfig (
58425846 enable_padding = True , batch_sizes = [1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 ])
58435847 moe_config = MoeConfig (backend = moe_backend )
58445848
58455849 with LLM (self .MODEL_PATH ,
5846- tensor_parallel_size = world_size ,
5847- moe_expert_parallel_size = world_size ,
5850+ tensor_parallel_size = tp_size ,
5851+ moe_expert_parallel_size = 1 ,
58485852 max_seq_len = 4096 ,
58495853 max_num_tokens = 4096 ,
58505854 max_batch_size = 128 ,
@@ -5856,20 +5860,24 @@ def test_bf16(self, moe_backend):
58565860 task .evaluate (llm ,
58575861 extra_evaluator_kwargs = self .EXTRA_EVALUATOR_KWARGS )
58585862
5859- def test_fp8 (self ):
5863+ @pytest .mark .parametrize (
5864+ "tp_size" ,
5865+ [1 , pytest .param (2 , marks = pytest .mark .skip_less_device (2 ))],
5866+ ids = ["tp1" , "tp2" ],
5867+ )
5868+ def test_fp8 (self , tp_size ):
58605869 model_dir = f"{ self .MODEL_PATH } -FP8"
58615870 # Model is being added to CI. Skip at the moment.
58625871 if not os .path .exists (model_dir ):
58635872 pytest .skip (f"Model directory { model_dir } does not exist" )
58645873
5865- world_size = 1
58665874 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.8 ,
58675875 enable_block_reuse = False )
58685876 moe_config = MoeConfig (backend = 'DEEPGEMM' )
58695877
58705878 with LLM (model_dir ,
5871- tensor_parallel_size = world_size ,
5872- moe_expert_parallel_size = world_size ,
5879+ tensor_parallel_size = tp_size ,
5880+ moe_expert_parallel_size = 1 ,
58735881 max_seq_len = 4096 ,
58745882 enable_chunked_prefill = True ,
58755883 kv_cache_config = kv_cache_config ,
0 commit comments