File tree Expand file tree Collapse file tree 2 files changed +6
-0
lines changed
Expand file tree Collapse file tree 2 files changed +6
-0
lines changed Original file line number Diff line number Diff line change @@ -236,6 +236,11 @@ async def stop_all_session(self):
236236 self .epoch += 1
237237 await self .session_mgr .async_abort_all ()
238238
239+ def prepare_sleep (self ):
240+ """Reject new inference requests before backend sleep starts."""
241+ self .sleeping_tags = {'weights' , 'kv_cache' }
242+ self .is_sleeping = True
243+
239244 def sleep (self , level : int = 1 ):
240245 """Sleep the model.
241246
Original file line number Diff line number Diff line change @@ -1190,6 +1190,7 @@ async def sleep(raw_request: Request = None):
11901190 if level not in (1 , 2 ):
11911191 return create_error_response (HTTPStatus .BAD_REQUEST , 'The "level" query parameter must be 1 or 2.' )
11921192 async_engine = VariableInterface .async_engine
1193+ async_engine .prepare_sleep ()
11931194 await async_engine .stop_all_session ()
11941195 async_engine .sleep (level )
11951196 return Response (status_code = 200 )
You can’t perform that action at this time.
0 commit comments