Skip to content

Commit 81627e3

Browse files
authored
make safe_run cancellation cleanup reliable with shield and SafeRunException (#4439)
1 parent c677cdd commit 81627e3

1 file changed

Lines changed: 19 additions & 2 deletions

File tree

lmdeploy/serve/core/async_engine.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,9 +279,26 @@ async def safe_run(self, handle, session, **kwargs):
279279
yield generator
280280
except (Exception, asyncio.CancelledError, GeneratorExit) as e: # noqa
281281
logger.exception(f'[safe_run] session {session.session_id} exception caught: {e}')
282-
await session.async_abort()
282+
# Use asyncio.shield to protect cleanup coroutines from being cancelled.
283+
# When a task is in cancelling state, bare `await` raises CancelledError
284+
# immediately. shield ensures the inner coroutine runs to completion.
285+
# The outer `except (asyncio.CancelledError, Exception)` catches the
286+
# CancelledError that shield itself re-raises at the await point.
287+
try:
288+
await asyncio.shield(handle.async_cancel(session.session_id))
289+
except (asyncio.CancelledError, Exception) as cancel_e:
290+
logger.debug(f'[safe_run] session {session.session_id} async_cancel exception caught: {cancel_e}')
283291
if self.backend == 'pytorch':
284-
await handle.async_end(session.session_id)
292+
logger.info(f'[safe_run] session {session.session_id} ending session')
293+
try:
294+
await asyncio.shield(handle.async_end(session.session_id))
295+
except (asyncio.CancelledError, Exception) as end_e:
296+
logger.debug(f'[safe_run] session {session.session_id} async_end exception caught: {end_e}')
297+
# Wrap as SafeRunException so that the outer `request_handle` context
298+
# manager in `session_manager.py` can distinguish a handled cancellation (caught by
299+
# `except SafeRunException: pass`) from an unexpected CancelledError.
300+
# Without this, the suppressed exception leaves the task in cancelling
301+
# state, causing a second CancelledError at the next await point.
285302
raise SafeRunException(f'Safe run exception for session {session.session_id}') from e
286303
finally:
287304
await generator.aclose()

0 commit comments

Comments
 (0)