Skip to content

Commit ea9aa7a

Browse files
committed
fix race window
1 parent e29c31c commit ea9aa7a

File tree

2 files changed

+6
-0
lines changed

2 files changed

+6
-0
lines changed

lmdeploy/serve/core/async_engine.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,11 @@ async def stop_all_session(self):
236236
self.epoch += 1
237237
await self.session_mgr.async_abort_all()
238238

239+
def prepare_sleep(self):
240+
"""Reject new inference requests before backend sleep starts."""
241+
self.sleeping_tags = {'weights', 'kv_cache'}
242+
self.is_sleeping = True
243+
239244
def sleep(self, level: int = 1):
240245
"""Sleep the model.
241246

lmdeploy/serve/openai/api_server.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,6 +1190,7 @@ async def sleep(raw_request: Request = None):
11901190
if level not in (1, 2):
11911191
return create_error_response(HTTPStatus.BAD_REQUEST, 'The "level" query parameter must be 1 or 2.')
11921192
async_engine = VariableInterface.async_engine
1193+
async_engine.prepare_sleep()
11931194
await async_engine.stop_all_session()
11941195
async_engine.sleep(level)
11951196
return Response(status_code=200)

0 commit comments

Comments
 (0)