3131import pytest
3232import yaml
3333from defs .common import get_free_port_in_ci as get_free_port
34- from defs .common import parse_gsm8k_output , wait_for_server
34+ from defs .common import (parse_gsm8k_output , resolve_llm_model_path ,
35+ wait_for_server )
3536from defs .conftest import (get_sm_version , llm_models_root , skip_arm ,
3637 skip_no_hopper , skip_pre_blackwell , skip_pre_hopper )
3738from defs .trt_test_alternative import check_call , check_output , print_info
@@ -308,6 +309,8 @@ def get_test_config(test_desc, example_dir, test_root):
308309 f"{ test_configs_root } /disagg_config_ctxtp2_gentp2_gptoss_triton.yaml" ,
309310 "qwen3_5_4b_fp8_stress" :
310311 f"{ test_configs_root } /disagg_config_ctxtp1_gentp1_qwen3_5_4b_fp8_tllm.yaml" ,
312+ "qwen3_32b_fp8_stress" :
313+ f"{ test_configs_root } /disagg_config_ctxtp1_gentp4_qwen3_32b_fp8.yaml" ,
311314 "gpt_oss_120b_harmony" :
312315 f"{ test_configs_root } /disagg_config_ctxtp2_gentp2_gptoss_tllm.yaml" ,
313316 "cancel_stress_test" :
@@ -492,6 +495,11 @@ def run_client_tests(example_dir,
492495 "The capital of Germany is Berlin" ,
493496 "Using `asyncio` in Python"
494497 ]
498+ elif "qwen3_32b_fp8" in test_desc :
499+ expected_strings = [
500+ "The capital of Germany is Berlin" ,
501+ "Asyncio in Python is a library"
502+ ]
495503 else :
496504 expected_strings = [
497505 "The capital of Germany is Berlin" ,
@@ -618,6 +626,13 @@ def setup_disagg_cluster(
618626 with open (config_file , 'r' ) as f :
619627 config = yaml .safe_load (f )
620628
629+ speculative_config = config .get ("speculative_config" )
630+ if isinstance (speculative_config , dict ):
631+ speculative_model = speculative_config .get ("speculative_model" )
632+ if speculative_model :
633+ speculative_config ["speculative_model" ] = resolve_llm_model_path (
634+ speculative_model )
635+
621636 disagg_cluster = get_default_disagg_cluster_config ()
622637 server_host = config .get ("hostname" , "localhost" )
623638 server_port = get_free_port ()
@@ -648,6 +663,8 @@ def setup_disagg_cluster(
648663
649664 # Launch workers
650665 model = model_name or config .get ("model" )
666+ if model :
667+ model = resolve_llm_model_path (model )
651668 ctx_workers = []
652669 gen_workers = []
653670 disagg_server = None
@@ -2288,6 +2305,22 @@ def test_disaggregated_gpt_oss_120b_harmony(disaggregated_test_root,
22882305 cwd = llm_venv .get_working_directory ())
22892306
22902307
2308+ @skip_pre_hopper
2309+ @pytest .mark .skip_less_device (8 )
2310+ @pytest .mark .parametrize ("model_path" , ['Qwen3/Qwen3-32B-FP8' ])
2311+ def test_disaggregated_qwen3_32b_fp8 (disaggregated_test_root ,
2312+ disaggregated_example_root , llm_venv ,
2313+ model_path ):
2314+ model_dir = resolve_llm_model_path (model_path )
2315+ setup_model_symlink (llm_venv , model_dir , model_path )
2316+
2317+ run_disaggregated_test (disaggregated_example_root ,
2318+ "qwen3_32b_fp8_stress" ,
2319+ env = llm_venv ._new_env ,
2320+ model_path = model_dir ,
2321+ cwd = llm_venv .get_working_directory ())
2322+
2323+
22912324@pytest .mark .timeout (12600 )
22922325@pytest .mark .parametrize ("test_config" , [
22932326 pytest .param (TestConfig (model_path = 'DeepSeek-R1/DeepSeek-R1-0528-FP4-v2' ,
@@ -2349,6 +2382,11 @@ def test_disaggregated_gpt_oss_120b_harmony(disaggregated_test_root,
23492382 cancellation_rate = 10 ,
23502383 cancellation_delay = 0.5 ),
23512384 marks = (pytest .mark .skip_less_device (2 ), skip_no_hopper )),
2385+ pytest .param (TestConfig (model_path = 'Qwen3/Qwen3-32B-FP8' ,
2386+ test_desc = 'qwen3_32b_fp8_stress' ,
2387+ request_count = 10000 ,
2388+ accuracy_threshold = 0.42 ),
2389+ marks = (pytest .mark .skip_less_device (8 ), skip_pre_hopper )),
23522390],
23532391 ids = lambda x : x .test_desc )
23542392@pytest .mark .parametrize ("concurrency" , [512 ], ids = lambda x : f"conc{ x } " )
@@ -2363,7 +2401,7 @@ def test_disaggregated_stress_test(disaggregated_test_root,
23632401 # Unpack configuration from dataclass
23642402 model_path = test_config .model_path
23652403 test_desc = test_config .test_desc
2366- model_dir = f" { llm_models_root () } / { model_path } "
2404+ model_dir = resolve_llm_model_path ( model_path )
23672405 setup_model_symlink (llm_venv , model_dir , model_path )
23682406
23692407 config_file = get_test_config (test_desc , disaggregated_example_root ,
0 commit comments