Skip to content

Commit 66ce774

Browse files
committed
Fix cross-task CancelScope busy-loop in Query lifecycle
When Query.close() was called from a different asyncio task than the one that called Query.start(), anyio's CancelScope.__exit__ raised RuntimeError ("different task") and never completed. This left _deliver_cancellation in an infinite call_soon loop, burning ~4% CPU per leaked scope. The root cause was start() calling __aenter__() on the task group in one task and close() calling __aexit__() from another. anyio requires these to happen in the same task. Fix: move the task group into a dedicated _run_reader coroutine launched via asyncio.create_task, so enter and exit always happen in the same task. close() now closes the transport first (letting _read_messages exit naturally via sentinel/EOF), then awaits the reader task's completion signal with a timeout fallback.
1 parent f375889 commit 66ce774

1 file changed

Lines changed: 63 additions & 14 deletions

File tree

src/claude_agent_sdk/_internal/query.py

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Query class for handling bidirectional control protocol."""
22

3+
import asyncio
34
import json
45
import logging
56
import os
@@ -30,6 +31,8 @@
3031

3132
logger = logging.getLogger(__name__)
3233

34+
_READER_SHUTDOWN_TIMEOUT = 5.0
35+
3336

3437
def _convert_hook_output_for_cli(hook_output: dict[str, Any]) -> dict[str, Any]:
3538
"""Convert Python-safe field names to CLI-expected field names.
@@ -116,6 +119,14 @@ def __init__(
116119
float(os.environ.get("CLAUDE_CODE_STREAM_CLOSE_TIMEOUT", "60000")) / 1000.0
117120
) # Convert ms to seconds
118121

122+
# Reader lifecycle — _run_reader owns the anyio task group so that
123+
# enter and exit always happen in the same asyncio task, avoiding
124+
# the cross-task RuntimeError from anyio's CancelScope.
125+
self._reader_task: asyncio.Task[None] | None = None
126+
self._reader_ready = asyncio.Event()
127+
self._reader_done = asyncio.Event()
128+
self._reader_start_exc: BaseException | None = None
129+
119130
async def initialize(self) -> dict[str, Any] | None:
120131
"""Initialize control protocol if in streaming mode.
121132
@@ -164,10 +175,39 @@ async def initialize(self) -> dict[str, Any] | None:
164175

165176
async def start(self) -> None:
166177
"""Start reading messages from transport."""
167-
if self._tg is None:
168-
self._tg = anyio.create_task_group()
169-
await self._tg.__aenter__()
170-
self._tg.start_soon(self._read_messages)
178+
if self._reader_task is not None:
179+
return
180+
181+
self._reader_ready.clear()
182+
self._reader_done.clear()
183+
self._reader_start_exc = None
184+
self._reader_task = asyncio.create_task(self._run_reader())
185+
await self._reader_ready.wait()
186+
if self._reader_start_exc is not None:
187+
raise self._reader_start_exc
188+
189+
async def _run_reader(self) -> None:
190+
"""Owns the anyio task group — enter and exit happen in this task.
191+
192+
The task group is entered and exited here so that anyio's
193+
CancelScope never sees a cross-task mismatch. Child tasks
194+
(_read_messages, _handle_control_request, stream_input) are
195+
started inside this group via _tg.start_soon(). When the
196+
transport closes, _read_messages finishes, which lets the
197+
task group exit naturally.
198+
"""
199+
try:
200+
async with anyio.create_task_group() as tg:
201+
self._tg = tg
202+
self._reader_ready.set()
203+
tg.start_soon(self._read_messages)
204+
except BaseException as exc:
205+
if not self._reader_ready.is_set():
206+
self._reader_start_exc = exc
207+
self._reader_ready.set()
208+
finally:
209+
self._tg = None
210+
self._reader_done.set()
171211

172212
async def _read_messages(self) -> None:
173213
"""Read messages from transport and route them."""
@@ -657,19 +697,28 @@ async def receive_messages(self) -> AsyncIterator[dict[str, Any]]:
657697
yield message
658698

659699
async def close(self) -> None:
660-
"""Close the query and transport."""
700+
"""Close the query and transport.
701+
702+
Closes the transport first so _read_messages exits naturally
703+
(sentinel/EOF unblocks the queue), then waits for the reader
704+
task to finish. Falls back to cancellation if the reader does
705+
not exit within the timeout.
706+
"""
661707
self._closed = True
662-
if self._tg:
663-
self._tg.cancel_scope.cancel()
664-
# Set a deadline to prevent _deliver_cancellation() busy-loop
665-
# when tasks don't respond to cancellation cleanly.
666-
# Uses the task group's own scope (not a nested scope) to avoid
667-
# "not the current cancel scope" errors from anyio.
668-
self._tg.cancel_scope.deadline = anyio.current_time() + 5.0
669-
with suppress(anyio.get_cancelled_exc_class()):
670-
await self._tg.__aexit__(None, None, None)
671708
await self.transport.close()
672709

710+
if self._reader_task is not None:
711+
try:
712+
await asyncio.wait_for(
713+
self._reader_done.wait(),
714+
timeout=_READER_SHUTDOWN_TIMEOUT,
715+
)
716+
except asyncio.TimeoutError:
717+
self._reader_task.cancel()
718+
with suppress(asyncio.CancelledError):
719+
await self._reader_task
720+
self._reader_task = None
721+
673722
# Make Query an async iterator
674723
def __aiter__(self) -> AsyncIterator[dict[str, Any]]:
675724
"""Return async iterator for messages."""

0 commit comments

Comments
 (0)