Skip to content

Commit b9f5b90

Browse files
fix: break circular references so Agent cleanup doesn't hang with MCPClient (#1830)
1 parent e7d3eb9 commit b9f5b90

File tree

4 files changed

+155
-2
lines changed

4 files changed

+155
-2
lines changed

src/strands/plugins/registry.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import inspect
88
import logging
9+
import weakref
910
from collections.abc import Awaitable, Callable
1011
from typing import TYPE_CHECKING, cast
1112

@@ -55,9 +56,17 @@ def __init__(self, agent: "Agent") -> None:
5556
Args:
5657
agent: The agent instance that plugins will be initialized with.
5758
"""
58-
self._agent = agent
59+
self._agent_ref = weakref.ref(agent)
5960
self._plugins: dict[str, Plugin] = {}
6061

62+
@property
63+
def _agent(self) -> "Agent":
64+
"""Return the agent, raising ReferenceError if it has been garbage collected."""
65+
agent = self._agent_ref()
66+
if agent is None:
67+
raise ReferenceError("Agent has been garbage collected")
68+
return agent
69+
6170
def add_and_init(self, plugin: Plugin) -> None:
6271
"""Add and initialize a plugin with the agent.
6372

src/strands/tools/_caller.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import json
1111
import random
12+
import weakref
1213
from collections.abc import Callable
1314
from typing import TYPE_CHECKING, Any
1415

@@ -35,7 +36,15 @@ def __init__(self, agent: "Agent | BidiAgent") -> None:
3536
"""
3637
# WARNING: Do not add any other member variables or methods as this could result in a name conflict with
3738
# agent tools and thus break their execution.
38-
self._agent = agent
39+
self._agent_ref = weakref.ref(agent)
40+
41+
@property
42+
def _agent(self) -> "Agent | BidiAgent":
43+
"""Return the agent, raising ReferenceError if it has been garbage collected."""
44+
agent = self._agent_ref()
45+
if agent is None:
46+
raise ReferenceError("Agent has been garbage collected")
47+
return agent
3948

4049
def __getattr__(self, name: str) -> Callable[..., Any]:
4150
"""Call tool as a function.

tests/strands/plugins/test_plugins.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
"""Tests for the plugin system."""
22

3+
import gc
34
import unittest.mock
45

56
import pytest
67

8+
from strands import Agent
79
from strands.hooks import HookRegistry
810
from strands.plugins import Plugin
911
from strands.plugins.registry import _PluginRegistry
@@ -194,3 +196,14 @@ async def init_agent(self, agent):
194196

195197
assert plugin.initialized
196198
assert mock_agent.async_plugin_initialized
199+
200+
201+
def test_plugin_registry_raises_reference_error_after_agent_collected():
202+
"""Verify _PluginRegistry raises ReferenceError when the Agent has been garbage collected."""
203+
agent = Agent()
204+
registry = agent._plugin_registry
205+
del agent
206+
gc.collect()
207+
208+
with pytest.raises(ReferenceError, match="Agent has been garbage collected"):
209+
_ = registry._agent

tests/strands/tools/test_caller.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1+
import gc
12
import unittest.mock
3+
import weakref
24

35
import pytest
46

57
from strands import Agent, tool
8+
from strands.tools.tool_provider import ToolProvider
69

710

811
@pytest.fixture
@@ -312,3 +315,122 @@ def test_agent_tool_caller_interrupt_activated():
312315
exp_message = r"cannot directly call tool during interrupt"
313316
with pytest.raises(RuntimeError, match=exp_message):
314317
agent.tool.test_tool()
318+
319+
320+
def test_agent_collected_without_cyclic_gc():
321+
"""Verify that Agent is promptly collectable (no persistent reference cycle).
322+
323+
This ensures that the weakref-based back-references in _ToolCaller and _PluginRegistry
324+
do not create reference cycles that would delay cleanup until interpreter shutdown.
325+
When cleanup is deferred to interpreter shutdown, MCPClient.stop() hangs because its
326+
background thread cannot complete async cleanup at that point.
327+
328+
Note: On some platforms/versions (e.g. Python 3.14 with deferred refcounting), del may
329+
not immediately trigger collection. A single gc.collect() is allowed as a fallback since
330+
it still proves no persistent cycle exists — the agent is collected promptly, not deferred
331+
to interpreter shutdown.
332+
"""
333+
gc.disable()
334+
try:
335+
agent = Agent()
336+
ref = weakref.ref(agent)
337+
del agent
338+
339+
if ref() is not None:
340+
# Deferred refcounting (Python 3.14+) may not collect immediately on del;
341+
# a single gc.collect() should still reclaim it since there are no cycles.
342+
gc.collect()
343+
344+
assert ref() is None, "Agent was not collected; a reference cycle likely exists"
345+
finally:
346+
gc.enable()
347+
348+
349+
class _MockToolProvider(ToolProvider):
350+
"""Minimal ToolProvider that tracks cleanup calls, mimicking MCPClient lifecycle."""
351+
352+
def __init__(self):
353+
self.consumers: set = set()
354+
self.cleanup_called = False
355+
356+
async def load_tools(self, **kwargs):
357+
return []
358+
359+
def add_consumer(self, consumer_id, **kwargs):
360+
self.consumers.add(consumer_id)
361+
362+
def remove_consumer(self, consumer_id, **kwargs):
363+
self.consumers.discard(consumer_id)
364+
if not self.consumers:
365+
self.cleanup_called = True
366+
367+
368+
def test_agent_with_tool_provider_cleaned_up_when_function_returns():
369+
"""Replicate the hang from issue #1732: Agent with MCPClient created inside a function.
370+
371+
When an Agent using a managed MCPClient (as ToolProvider) is created inside a function,
372+
the script used to hang on exit. The Agent went out of scope when the function returned,
373+
but circular references (Agent → _ToolCaller._agent → Agent) prevented refcount-based
374+
destruction. Cleanup was deferred to the cyclic GC during interpreter shutdown, where
375+
MCPClient.stop() → thread.join() would hang.
376+
377+
This test verifies that with the weakref fix, the Agent is destroyed immediately when
378+
the function returns, and the tool provider's cleanup runs promptly.
379+
"""
380+
provider = _MockToolProvider()
381+
382+
def get_agent():
383+
return Agent(tools=[provider])
384+
385+
def main():
386+
agent = get_agent() # noqa: F841
387+
388+
gc.disable()
389+
try:
390+
main()
391+
392+
if not provider.cleanup_called:
393+
# Deferred refcounting (Python 3.14+) may not collect immediately on scope exit;
394+
# a single gc.collect() should still reclaim it since there are no cycles.
395+
gc.collect()
396+
397+
assert provider.cleanup_called, (
398+
"Tool provider was not cleaned up when the function returned; Agent likely leaked due to a reference cycle"
399+
)
400+
finally:
401+
gc.enable()
402+
403+
404+
def test_agent_with_tool_provider_cleaned_up_on_del():
405+
"""Replicate the working case from issue #1732: Agent at module scope, explicitly deleted.
406+
407+
In the issue, an Agent created at module level did not hang because module-level variables
408+
are cleared early during interpreter shutdown (while the runtime is still functional).
409+
This test verifies the equivalent: explicitly deleting the agent triggers immediate cleanup.
410+
"""
411+
provider = _MockToolProvider()
412+
413+
agent = Agent(tools=[provider])
414+
assert not provider.cleanup_called
415+
416+
del agent
417+
418+
if not provider.cleanup_called:
419+
# Deferred refcounting (Python 3.14+) may not collect immediately on del;
420+
# a single gc.collect() should still reclaim it since there are no cycles.
421+
gc.collect()
422+
423+
assert provider.cleanup_called, "Tool provider was not cleaned up after del agent"
424+
425+
426+
def test_tool_caller_raises_reference_error_after_agent_collected():
427+
"""Verify _ToolCaller raises ReferenceError when the Agent has been garbage collected."""
428+
agent = Agent()
429+
caller = agent.tool_caller
430+
# Clear the weak reference by replacing it directly
431+
caller._agent_ref = weakref.ref(agent)
432+
del agent
433+
gc.collect()
434+
435+
with pytest.raises(ReferenceError, match="Agent has been garbage collected"):
436+
_ = caller._agent

0 commit comments

Comments
 (0)