@@ -279,9 +279,26 @@ async def safe_run(self, handle, session, **kwargs):
279279 yield generator
280280 except (Exception , asyncio .CancelledError , GeneratorExit ) as e : # noqa
281281 logger .exception (f'[safe_run] session { session .session_id } exception caught: { e } ' )
282- await session .async_abort ()
282+ # Use asyncio.shield to protect cleanup coroutines from being cancelled.
283+ # When a task is in cancelling state, bare `await` raises CancelledError
284+ # immediately. shield ensures the inner coroutine runs to completion.
285+ # The outer `except (asyncio.CancelledError, Exception)` catches the
286+ # CancelledError that shield itself re-raises at the await point.
287+ try :
288+ await asyncio .shield (handle .async_cancel (session .session_id ))
289+ except (asyncio .CancelledError , Exception ) as cancel_e :
290+ logger .debug (f'[safe_run] session { session .session_id } async_cancel exception caught: { cancel_e } ' )
283291 if self .backend == 'pytorch' :
284- await handle .async_end (session .session_id )
292+ logger .info (f'[safe_run] session { session .session_id } ending session' )
293+ try :
294+ await asyncio .shield (handle .async_end (session .session_id ))
295+ except (asyncio .CancelledError , Exception ) as end_e :
296+ logger .debug (f'[safe_run] session { session .session_id } async_end exception caught: { end_e } ' )
297+ # Wrap as SafeRunException so that the outer `request_handle` context
298+ # manager in `session_manager.py` can distinguish a handled cancellation (caught by
299+ # `except SafeRunException: pass`) from an unexpected CancelledError.
300+ # Without this, the suppressed exception leaves the task in cancelling
301+ # state, causing a second CancelledError at the next await point.
285302 raise SafeRunException (f'Safe run exception for session { session .session_id } ' ) from e
286303 finally :
287304 await generator .aclose ()
0 commit comments