Skip to content

Commit bd2a28a

Browse files
vishal-balaclaude
andcommitted
fix(mcp): tear down resources when startup fails after initialization (RAAE-1604)
Move binding teardown into startup()'s exception handler so it covers every post-begin failure, not just failures inside _initialize_runtime_resources. Previously, if initialization succeeded (clients connected, tools registered) and a later step such as _mark_running() raised, the handler marked the server stopped without tearing down, leaking Redis clients and vectorizers; a later shutdown() saw STOPPED and skipped cleanup. _initialize_runtime_resources no longer needs its own teardown (a binding that fails mid-build still closes its bare client inside _initialize_binding), giving a single fail-closed teardown path. Add a regression test for the init-succeeds-then-post-step-fails case. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 96f42c3 commit bd2a28a

2 files changed

Lines changed: 53 additions & 11 deletions

File tree

redisvl/mcp/server.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,12 @@ async def startup(self) -> None:
9797
await self._initialize_runtime_resources()
9898
await self._mark_running()
9999
except Exception:
100+
# Fail closed: release whatever initialization built before
101+
# marking the server stopped. This is the single teardown path
102+
# for any post-begin failure -- binding init, tool registration,
103+
# or a later step such as _mark_running -- so resources are
104+
# never leaked regardless of where startup fails.
105+
await self._teardown_runtime()
100106
await self._mark_stopped()
101107
raise
102108

@@ -400,17 +406,14 @@ async def _initialize_runtime_resources(self) -> None:
400406
)
401407
self._bindings = {}
402408

403-
try:
404-
for binding_id, binding in self.config.indexes.items():
405-
self._bindings[binding_id] = await self._initialize_binding(
406-
binding_id, binding
407-
)
408-
self._register_tools()
409-
except Exception:
410-
# Tear down any bindings already built before re-raising so startup
411-
# fails closed without leaking connections.
412-
await self._teardown_runtime()
413-
raise
409+
# On failure, startup()'s handler tears down any bindings built here, so
410+
# this method does not need its own teardown. (A binding that fails
411+
# mid-build closes its own bare client inside _initialize_binding.)
412+
for binding_id, binding in self.config.indexes.items():
413+
self._bindings[binding_id] = await self._initialize_binding(
414+
binding_id, binding
415+
)
416+
self._register_tools()
414417

415418
async def _initialize_binding(
416419
self, binding_id: str, binding: MCPIndexBindingConfig

tests/unit/test_mcp/test_server.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,45 @@ async def fail_connection(**kwargs):
223223
assert server._bindings == {}
224224

225225

226+
@pytest.mark.asyncio
227+
async def test_startup_tears_down_when_a_post_init_step_fails(monkeypatch):
228+
"""Initialization succeeds but a later step raises; resources must be freed."""
229+
monkeypatch.setattr(
230+
"redisvl.mcp.server.FastMCP.__init__", lambda self, *a, **k: None
231+
)
232+
233+
async def fake_initialize(self):
234+
# Simulate a fully initialized runtime (a live binding) ...
235+
self.config = SimpleNamespace()
236+
self._semaphore = SimpleNamespace()
237+
self._bindings = {
238+
"knowledge": SimpleNamespace(
239+
binding_id="knowledge", index=None, vectorizer=None
240+
)
241+
}
242+
243+
async def fail_mark_running(self):
244+
# ... then fail on the post-init step.
245+
raise RuntimeError("mark running failed")
246+
247+
monkeypatch.setattr(
248+
RedisVLMCPServer, "_initialize_runtime_resources", fake_initialize
249+
)
250+
monkeypatch.setattr(RedisVLMCPServer, "_mark_running", fail_mark_running)
251+
252+
server = RedisVLMCPServer(_dummy_settings())
253+
254+
with pytest.raises(RuntimeError, match="mark running failed"):
255+
await server.startup()
256+
257+
# Teardown ran from startup()'s handler even though the failure was after
258+
# initialization, so nothing leaks and the server is left stopped.
259+
assert server._lifecycle_state.name == "STOPPED"
260+
assert server._bindings == {}
261+
assert server.config is None
262+
assert server._semaphore is None
263+
264+
226265
@pytest.mark.asyncio
227266
async def test_startup_failure_before_index_initialization_closes_client(monkeypatch):
228267
monkeypatch.setattr(

0 commit comments

Comments
 (0)