1717import tempfile
1818import time
1919import unittest
20- from unittest .mock import MagicMock , patch
20+ from unittest .mock import MagicMock , Mock , patch
2121
2222import paddle
2323
@@ -37,6 +37,7 @@ def enable_torch_proxy(scope=None):
3737import fastdeploy .cache_manager .cache_transfer_manager as cache_transfer_manager
3838from fastdeploy .cache_manager .cache_tasks import ReadStorageTask , WriteStorageTask
3939from fastdeploy .cache_manager .cache_transfer_manager import CacheTransferManager
40+ from fastdeploy .engine .request import ControlRequest
4041
4142
4243# ==========================
@@ -121,6 +122,16 @@ def __init__(self, name, array, dtype, suffix, create=False):
121122 patcher_thread .start ()
122123 self .addCleanup (patcher_thread .stop )
123124
125+ # --------------------------
126+ # mock FMQ
127+ # --------------------------
128+ patcher_fmq = patch ("fastdeploy.cache_manager.cache_transfer_manager.FMQ" )
129+ mock_fmq_cls = patcher_fmq .start ()
130+ mock_fmq = MagicMock ()
131+ mock_fmq .queue .return_value = MagicMock (name = "ctrl_output_queue" )
132+ mock_fmq_cls .return_value = mock_fmq
133+ self .addCleanup (patcher_fmq .stop )
134+
124135 # --------------------------
125136 # mock _init_cpu_cache 和 _init_gpu_cache
126137 # --------------------------
@@ -1515,6 +1526,111 @@ def resume_sleep(_):
15151526
15161527 self .assertFalse (self .manager .is_paused )
15171528
1529+ def test_init_control_builds_expected_queue_name (self ):
1530+ self .manager .rank = 1
1531+ self .manager .n_ranks = 4
1532+ self .manager .local_data_parallel_id = 2
1533+ self .manager .cache_queue_port = 8899
1534+
1535+ queue = MagicMock (name = "ctrl_q" )
1536+ fmq = MagicMock ()
1537+ fmq .queue .return_value = queue
1538+
1539+ with patch ("fastdeploy.cache_manager.cache_transfer_manager.FMQ" , return_value = fmq ):
1540+ self .manager ._init_control ()
1541+
1542+ fmq .queue .assert_called_once_with ("ctrl_c2e_rank9_8899" , "producer" )
1543+ self .assertIs (self .manager .ctrl_output_queue , queue )
1544+
1545+ def test_control_task_success_puts_control_response (self ):
1546+ self .manager .cache_task_queue .barrier = MagicMock (wait = Mock ())
1547+ self .manager .ctrl_output_queue = MagicMock (name = "ctrl_q" )
1548+ self .manager .ctrl_output_queue .put = Mock (return_value = "coro" )
1549+ self .manager ._handle_pause = MagicMock (return_value = True )
1550+
1551+ with patch ("fastdeploy.cache_manager.cache_transfer_manager.asyncio.run" ):
1552+ self .manager .control_task (ControlRequest (request_id = "ctrl-1" , method = "pause" ))
1553+
1554+ self .manager ._handle_pause .assert_called_once ()
1555+ self .manager .cache_task_queue .barrier .wait .assert_called_once ()
1556+ self .manager .ctrl_output_queue .put .assert_called_once ()
1557+ response = self .manager .ctrl_output_queue .put .call_args .args [0 ]
1558+ self .assertEqual (response .request_id , "ctrl-1" )
1559+ self .assertEqual (response .error_code , 200 )
1560+
1561+ def test_control_task_unknown_method_returns_400 (self ):
1562+ self .manager .cache_task_queue .barrier = MagicMock (wait = Mock ())
1563+ self .manager .ctrl_output_queue = MagicMock (name = "ctrl_q" )
1564+ self .manager .ctrl_output_queue .put = Mock (return_value = "coro" )
1565+
1566+ with patch ("fastdeploy.cache_manager.cache_transfer_manager.asyncio.run" ):
1567+ self .manager .control_task (ControlRequest (request_id = "ctrl-2" , method = "unknown" ))
1568+
1569+ response = self .manager .ctrl_output_queue .put .call_args .args [0 ]
1570+ self .assertEqual (response .error_code , 400 )
1571+ self .assertIn ("Unknown control method" , response .error_message )
1572+
1573+ def test_control_task_exception_returns_500 (self ):
1574+ self .manager .cache_task_queue .barrier = MagicMock (wait = Mock ())
1575+ self .manager .ctrl_output_queue = MagicMock (name = "ctrl_q" )
1576+ self .manager .ctrl_output_queue .put = Mock (return_value = "coro" )
1577+
1578+ with (
1579+ patch .object (self .manager , "_handle_sleep" , side_effect = RuntimeError ("boom" )),
1580+ patch ("fastdeploy.cache_manager.cache_transfer_manager.asyncio.run" ),
1581+ ):
1582+ self .manager .control_task (ControlRequest (request_id = "ctrl-3" , method = "sleep" ))
1583+
1584+ response = self .manager .ctrl_output_queue .put .call_args .args [0 ]
1585+ self .assertEqual (response .error_code , 500 )
1586+ self .assertIn ("Failed to execute sleep" , response .error_message )
1587+
1588+ def test_handle_resume_updates_key_prefix_for_storage_backend (self ):
1589+ self .manager .is_paused = True
1590+ self .manager .storage_backend_type = "mooncake"
1591+ self .manager .resume = MagicMock ()
1592+ self .manager ._update_key_prefix = MagicMock ()
1593+
1594+ result = self .manager ._handle_resume ()
1595+
1596+ self .assertTrue (result )
1597+ self .manager .resume .assert_called_once ()
1598+ self .manager ._update_key_prefix .assert_called_once ()
1599+
1600+ def test_handle_update_weights_updates_key_prefix_for_storage_backend (self ):
1601+ self .manager .storage_backend_type = "mooncake"
1602+ self .manager ._update_key_prefix = MagicMock ()
1603+
1604+ result = self .manager ._handle_update_weights ()
1605+
1606+ self .assertTrue (result )
1607+ self .manager ._update_key_prefix .assert_called_once ()
1608+
1609+ def test_handle_update_weights_skips_without_storage_backend (self ):
1610+ self .manager .storage_backend_type = None
1611+ self .manager ._update_key_prefix = MagicMock ()
1612+
1613+ result = self .manager ._handle_update_weights ()
1614+
1615+ self .assertTrue (result )
1616+ self .manager ._update_key_prefix .assert_not_called ()
1617+
1618+ def test_handle_sleep_and_wakeup_are_idempotent (self ):
1619+ self .manager .is_sleeping = True
1620+ self .manager ._clear_cpu_cache = MagicMock ()
1621+ self .manager ._clear_gpu_cache = MagicMock ()
1622+ self .manager ._init_cpu_cache = MagicMock ()
1623+ self .manager ._init_gpu_cache = MagicMock ()
1624+
1625+ self .assertTrue (self .manager ._handle_sleep ())
1626+ self .manager ._clear_cpu_cache .assert_not_called ()
1627+ self .manager ._clear_gpu_cache .assert_not_called ()
1628+
1629+ self .manager .is_sleeping = False
1630+ self .assertTrue (self .manager ._handle_wakeup ())
1631+ self .manager ._init_cpu_cache .assert_not_called ()
1632+ self .manager ._init_gpu_cache .assert_not_called ()
1633+
15181634 def test_submit_task_decrements_inflight_on_task_error (self ):
15191635 class DummyPool :
15201636 def submit (self , fn , * args ):
0 commit comments