Skip to content

Commit 4a1ddd4

Browse files
authored
[CI] Enable weight prefetch for 8-gpu-h200 basic tests (sgl-project#25684)
1 parent 9330101 commit 4a1ddd4

2 files changed

Lines changed: 4 additions & 2 deletions

File tree

test/registered/8-gpu-models/test_minimax_m25_basic.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
write_github_step_summary,
1515
)
1616

17-
register_cuda_ci(est_time=290, stage="base-c", runner_config="8-gpu-h200")
17+
register_cuda_ci(est_time=250, stage="base-c", runner_config="8-gpu-h200")
1818

1919
MINIMAX_M25_MODEL_PATH = "MiniMaxAI/MiniMax-M2.5"
2020

@@ -36,6 +36,7 @@ def setUpClass(cls):
3636
"minimax-append-think",
3737
"--model-loader-extra-config",
3838
'{"enable_multithread_load": true, "num_threads": 64}',
39+
"--weight-loader-prefetch-checkpoints",
3940
]
4041
cls.process = popen_launch_server(
4142
cls.model,

test/registered/radix_cache/test_unified_radix_cache_kl_hicache.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
DSV4_FLASH_MODEL = "sgl-project/DeepSeek-V4-Flash-FP8"
2525
DSV4_FLASH_LAUNCH_TIMEOUT = 3600
2626

27-
register_cuda_ci(est_time=768, stage="base-c", runner_config="8-gpu-h200")
27+
register_cuda_ci(est_time=745, stage="base-c", runner_config="8-gpu-h200")
2828

2929

3030
class TestUnifiedMambaHiCache(UnifiedRadixTreeTestMixin, CustomTestCase):
@@ -72,6 +72,7 @@ def setUpClass(cls):
7272
"500",
7373
"--max-running-requests",
7474
"4",
75+
"--weight-loader-prefetch-checkpoints",
7576
],
7677
env={"SGLANG_ENABLE_UNIFIED_RADIX_TREE": "1"},
7778
)

0 commit comments

Comments
 (0)