@@ -26,33 +26,29 @@ def test_tpu_slice_placement_group_creation_default_resources(ray_tpu_cluster):
2626 llm_config = LLMConfig (
2727 model_loading_config = ModelLoadingConfig (model_id = "test-tpu-model" ),
2828 accelerator_type = "TPU-V6E" ,
29- accelerator_config = TPUConfig ( kind = " tpu" , topology = " 4x4") ,
29+ accelerator_config = { " kind" : " tpu" , " topology" : " 4x4"} ,
3030 )
3131
3232 engine_config = llm_config .get_engine_config ()
33+ pg = engine_config .get_or_create_pg ()
3334
34- pg = None
35- try :
36- pg = engine_config .get_or_create_pg ()
35+ assert isinstance (pg , PlacementGroup )
3736
38- assert isinstance (pg , PlacementGroup )
37+ pg_table = placement_group_table (pg )
38+ assert pg_table ["strategy" ] == "PACK"
3939
40- pg_table = placement_group_table (pg )
41- assert pg_table ["strategy" ] == "PACK"
40+ # 4x4 v6e = 16 chips. We default to 1 TPU chip per bundle.
41+ assert len (pg_table ["bundles" ]) == 16
42+ for bundle in pg_table ["bundles" ].values ():
43+ assert "TPU" in bundle
44+ assert bundle ["TPU" ] == 1
4245
43- # 4x4 v6e = 16 chips. We default to 4 TPU chips per bundle (per-host).
44- assert len (pg_table ["bundles" ]) == 4
45- for bundle in pg_table ["bundles" ].values ():
46- assert "TPU" in bundle
47- assert bundle ["TPU" ] == 4.0
48- finally :
49- # Let the backend tear down its own resources if it has any
50- engine_config .accelerator .shutdown ()
51- if pg is not None :
52- try :
53- ray .util .remove_placement_group (pg )
54- except Exception :
55- pass
46+ # Let the backend tear down its own resources if it has any
47+ engine_config .accelerator .shutdown ()
48+ try :
49+ ray .util .remove_placement_group (pg )
50+ except Exception :
51+ pass # Already cleaned up by the wrapper
5652
5753
5854def test_tpu_slice_placement_group_creation_host_resources (ray_tpu_cluster ):
@@ -63,36 +59,32 @@ def test_tpu_slice_placement_group_creation_host_resources(ray_tpu_cluster):
6359 llm_config = LLMConfig (
6460 model_loading_config = ModelLoadingConfig (model_id = "test-tpu-model" ),
6561 accelerator_type = "TPU-V6E" ,
66- accelerator_config = TPUConfig ( kind = " tpu" , topology = " 4x4") ,
62+ accelerator_config = { " kind" : " tpu" , " topology" : " 4x4"} ,
6763 placement_group_config = {
6864 "strategy" : "STRICT_SPREAD" ,
69- "bundles" : [{"TPU" : 4 }] * 4 ,
65+ "bundles" : [{"TPU" : 4 }],
7066 },
7167 )
7268
7369 engine_config = llm_config .get_engine_config ()
70+ pg = engine_config .get_or_create_pg ()
71+
72+ assert isinstance (pg , PlacementGroup )
7473
75- pg = None
74+ pg_table = placement_group_table (pg )
75+ assert pg_table ["strategy" ] == "STRICT_SPREAD"
76+ # We should provision 4 host-level bundles instead of the default 16 chip-level bundles.
77+ assert len (pg_table ["bundles" ]) == 4
78+ for bundle in pg_table ["bundles" ].values ():
79+ assert "TPU" in bundle
80+ assert bundle ["TPU" ] == 4
81+
82+ # Let the backend tear down its own resources if it has any
83+ engine_config .accelerator .shutdown ()
7684 try :
77- pg = engine_config .get_or_create_pg ()
78-
79- assert isinstance (pg , PlacementGroup )
80-
81- pg_table = placement_group_table (pg )
82- assert pg_table ["strategy" ] == "STRICT_SPREAD"
83- # We should provision 4 host-level bundles instead of the default 16 chip-level bundles.
84- assert len (pg_table ["bundles" ]) == 4
85- for bundle in pg_table ["bundles" ].values ():
86- assert "TPU" in bundle
87- assert bundle ["TPU" ] == 4
88- finally :
89- # Let the backend tear down its own resources if it has any
90- engine_config .accelerator .shutdown ()
91- if pg is not None :
92- try :
93- ray .util .remove_placement_group (pg )
94- except Exception :
95- pass
85+ ray .util .remove_placement_group (pg )
86+ except Exception :
87+ pass # Already cleaned up by the wrapper
9688
9789
9890def test_single_tpu_fallback (ray_tpu_cluster ):
@@ -229,17 +221,15 @@ def test_tpu_slice_placement_group_creation_cpu_driver_homogeneous_tpu_bundles_p
229221 pass
230222
231223
232- def test_tpu_serve_deployment_default_host_level_bundles (ray_tpu_cluster ):
224+ def test_tpu_serve_deployment_default_chip_level_bundles (ray_tpu_cluster ):
233225 """
234226 Verifies that a Serve deployment created for a multi-host TPU slice defaults
235- to host -level bundles when no placement_group_config is specified.
227+ to chip -level bundles when no placement_group_config is specified.
236228 """
237- from ray .llm ._internal .serve .core .configs .accelerators import TPUConfig
238-
239229 llm_config = LLMConfig (
240230 model_loading_config = ModelLoadingConfig (model_id = "test-tpu-model" ),
241231 accelerator_type = "TPU-V6E" ,
242- accelerator_config = TPUConfig ( kind = " tpu" , topology = " 4x4") ,
232+ accelerator_config = { " kind" : " tpu" , " topology" : " 4x4"} ,
243233 )
244234
245235 app = serve .deployment (LLMServer ).bind (llm_config , engine_cls = PGCreationMockEngine )
@@ -266,10 +256,10 @@ def test_tpu_serve_deployment_default_host_level_bundles(ray_tpu_cluster):
266256 worker_pg = [pg for pg in active_pgs if pg not in head_pgs ][0 ]
267257
268258 assert worker_pg ["strategy" ] == "PACK"
269- # 4x4 topology = 16 chips. Default is 4 bundles of 4 TPUs (per-host) .
270- assert len (worker_pg ["bundles" ]) == 4
259+ # 4x4 topology = 16 chips. Default is 16 bundles of 1 TPU .
260+ assert len (worker_pg ["bundles" ]) == 16
271261 for bundle in worker_pg ["bundles" ].values ():
272- assert bundle .get ("TPU" , 0 ) == 4.0
262+ assert bundle .get ("TPU" , 0 ) == 1
273263
274264 serve .shutdown ()
275265
@@ -282,7 +272,7 @@ def test_tpu_serve_deployment_explicit_host_level_bundles(ray_tpu_cluster):
282272 llm_config = LLMConfig (
283273 model_loading_config = ModelLoadingConfig (model_id = "test-tpu-model" ),
284274 accelerator_type = "TPU-V6E" ,
285- accelerator_config = TPUConfig ( kind = " tpu" , topology = " 4x4") ,
275+ accelerator_config = { " kind" : " tpu" , " topology" : " 4x4"} ,
286276 placement_group_config = {"bundle_per_worker" : {"TPU" : 4 }},
287277 )
288278
@@ -318,52 +308,5 @@ def test_tpu_serve_deployment_explicit_host_level_bundles(ray_tpu_cluster):
318308 serve .shutdown ()
319309
320310
321- def test_tpu_serve_deployment_explicit_per_chip_bundles (ray_tpu_cluster ):
322- """
323- Verifies that a user can explicitly request chip-level bundles (1 TPU per bundle)
324- for a full multi-host TPU slice via placement_group_config.
325- """
326- from ray .llm ._internal .serve .core .configs .accelerators import TPUConfig
327-
328- llm_config = LLMConfig (
329- model_loading_config = ModelLoadingConfig (model_id = "test-tpu-model" ),
330- accelerator_type = "TPU-V6E" ,
331- accelerator_config = TPUConfig (kind = "tpu" , topology = "4x4" ),
332- placement_group_config = {"bundle_per_worker" : {"TPU" : 1 }},
333- engine_kwargs = {"tensor_parallel_size" : 16 },
334- )
335-
336- app = serve .deployment (LLMServer ).bind (llm_config , engine_cls = PGCreationMockEngine )
337- serve .run (app )
338-
339- pg_table = ray .util .placement_group_table ()
340- active_pgs = list (
341- {k : v for k , v in pg_table .items () if v ["state" ] == "CREATED" }.values ()
342- )
343-
344- assert (
345- len (active_pgs ) == 2
346- ), "Expected 2 PGs - one for TPU Head, one for worker bundles"
347-
348- tpu_head_resource = "TPU-v6e-16-head"
349- head_pgs = [
350- pg
351- for pg in active_pgs
352- if len (pg ["bundles" ]) == 1
353- and tpu_head_resource in list (pg ["bundles" ].values ())[0 ]
354- ]
355- assert len (head_pgs ) == 1
356-
357- worker_pg = [pg for pg in active_pgs if pg not in head_pgs ][0 ]
358-
359- assert worker_pg ["strategy" ] == "PACK"
360- # 4x4 topology = 16 chips. Explicitly requested 16 bundles of 1 TPU.
361- assert len (worker_pg ["bundles" ]) == 16
362- for bundle in worker_pg ["bundles" ].values ():
363- assert bundle .get ("TPU" , 0 ) == 1.0
364-
365- serve .shutdown ()
366-
367-
368311if __name__ == "__main__" :
369312 sys .exit (pytest .main (["-v" , __file__ ]))
0 commit comments