Skip to content

Commit df141f5

Browse files
cdeustclaude
andcommitted
release: v3.14.12 — fix MCP client deadlock on long upstream responses
Two vectors fixed in mcp_client.py: 1. _read_loop swallowed all exceptions silently. When the reader died (LimitOverrunError, IncompleteReadError, ConnectionResetError, BrokenPipeError, etc.), pending request futures stayed pending forever and _send's await future blocked the caller indefinitely. Reader now rejects every pending future with a McpConnectionError carrying the terminal cause. 2. _send honoured callTimeoutMs: 0 as unbounded await. Combined with silent reader death, deadlock was guaranteed on any upstream that exceeded the line buffer or died without responding. Enforce a 60-minute hard ceiling even when the operator opts into "no timeout". Observed: ingest_codebase hung 8+ minutes with both Cortex and the upstream Rust binary at 0% CPU on a polyglot Android repo. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 050be8b commit df141f5

4 files changed

Lines changed: 92 additions & 10 deletions

File tree

.claude-plugin/plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "cortex",
33
"description": "Persistent memory for Claude Code — remembers across sessions automatically. Install and forget. Scientific retrieval backed by 41 published papers.",
4-
"version": "3.14.11",
4+
"version": "3.14.12",
55
"author": {
66
"name": "Clement Deust",
77
"email": "admin@ai-architect.tools"

CHANGELOG.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,35 @@ adheres to [Semantic Versioning](https://semver.org/).
66

77
## [Unreleased]
88

9+
## [3.14.12] — fix MCP client deadlock on long upstream responses
10+
11+
### Fixed
12+
13+
- **`ingest_codebase` hung indefinitely on polyglot repos.** Two
14+
deadlock vectors in `mcp_client.py`:
15+
16+
1. `_read_loop`'s `except Exception: pass` silently swallowed any
17+
stream-level failure (`LimitOverrunError`, `IncompleteReadError`,
18+
`ConnectionResetError`, `BrokenPipeError`, JSON-side bugs). When
19+
the reader exited, every pending request future stayed pending
20+
forever — `_send`'s `await future` blocked the caller indefinitely.
21+
Reader now rejects every pending future with a
22+
`McpConnectionError` carrying the terminal cause, so callers
23+
surface a clear error instead of hanging.
24+
25+
2. `_send` honoured `callTimeoutMs: 0` as "no timeout at all"
26+
and called `await future` unbounded. Combined with the silent
27+
reader death, this guaranteed deadlock on any upstream that
28+
emitted >limit bytes on a single line or terminated without
29+
responding. We now enforce a 60-minute hard ceiling even when
30+
the operator opts into "no timeout" — well above any legitimate
31+
codebase indexing job (largest observed production runs are
32+
~12 minutes), low enough that a wedged upstream surfaces.
33+
34+
- `_read_loop` now logs non-JSON lines instead of silently dropping
35+
them, so future protocol-level mismatches become visible without
36+
crashing the loop.
37+
938
## [3.14.11] — track automatised-pipeline binary rename + fix pool allowlist
1039

1140
### Fixed

mcp_server/infrastructure/mcp_client.py

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -250,14 +250,24 @@ async def _send(self, method: str, params: dict) -> Any:
250250
self._proc.stdin.write((msg + "\n").encode()) # type: ignore
251251
await self._proc.stdin.drain() # type: ignore
252252

253-
if self._call_timeout_ms is None:
254-
return await future
253+
# Even when the operator opted into "no per-call timeout"
254+
# (callTimeoutMs == 0), enforce a hard ceiling so a wedged
255+
# upstream cannot deadlock the caller forever. 60 minutes is
256+
# well above any legitimate codebase indexing job (the
257+
# largest production runs we have observed are ~12 minutes).
258+
# source: deadlock observed 2026-04-27 — ingest_codebase hung
259+
# for >8 minutes with both Cortex and the upstream sleeping
260+
# at 0% CPU on a polyglot Android repo. Reader had exited
261+
# silently and ``await future`` was unbounded.
262+
effective_timeout = (
263+
self._call_timeout_ms / 1000 if self._call_timeout_ms else 3600.0
264+
)
255265
try:
256-
return await asyncio.wait_for(future, timeout=self._call_timeout_ms / 1000)
266+
return await asyncio.wait_for(future, timeout=effective_timeout)
257267
except asyncio.TimeoutError:
258268
self._pending.pop(req_id, None)
259269
raise McpConnectionError(
260-
f"Timeout after {self._call_timeout_ms}ms: {method}"
270+
f"Timeout after {int(effective_timeout * 1000)}ms: {method}"
261271
)
262272

263273
def _notify(self, method: str, params: dict | None = None) -> None:
@@ -273,10 +283,15 @@ def _touch_activity(self) -> None:
273283
pass
274284

275285
async def _read_loop(self) -> None:
286+
# Track terminal cause so all pending futures get a real error
287+
# instead of hanging forever when the reader exits.
288+
terminal_exc: BaseException | None = None
276289
try:
277290
while True:
278291
line = await self._proc.stdout.readline() # type: ignore
279292
if not line:
293+
# EOF — child closed stdout. Fall through to fail
294+
# pending futures so callers do not block forever.
280295
break
281296
decoded = line.decode("utf-8").strip()
282297
if not decoded or decoded.startswith("Content-Length"):
@@ -296,11 +311,49 @@ async def _read_loop(self) -> None:
296311
else:
297312
future.set_result(msg.get("result"))
298313
except (json.JSONDecodeError, ValueError):
299-
pass
314+
# Bad payload from the upstream is recoverable —
315+
# log and continue rather than killing the loop.
316+
print(
317+
f"[mcp-client] non-JSON line dropped: {decoded[:200]}",
318+
file=sys.stderr,
319+
)
320+
continue
300321
except asyncio.CancelledError:
301-
pass
302-
except Exception:
303-
pass
322+
terminal_exc = None
323+
except (
324+
asyncio.LimitOverrunError,
325+
asyncio.IncompleteReadError,
326+
ConnectionResetError,
327+
BrokenPipeError,
328+
) as exc:
329+
# Stream-level failure: most often a single response line
330+
# exceeded the configured ``limit`` bytes. Surface it as
331+
# the terminal cause for every pending future, so callers
332+
# see a clear McpConnectionError instead of hanging.
333+
terminal_exc = exc
334+
print(
335+
f"[mcp-client] reader stream error: {type(exc).__name__}: {exc}",
336+
file=sys.stderr,
337+
)
338+
except Exception as exc: # noqa: BLE001
339+
terminal_exc = exc
340+
print(
341+
f"[mcp-client] reader unexpected error: {type(exc).__name__}: {exc}",
342+
file=sys.stderr,
343+
)
344+
finally:
345+
# Reader is exiting — wake every pending caller. Without
346+
# this, ``_send``'s ``await future`` blocks forever
347+
# (deadlock observed on long upstream responses).
348+
for fut in list(self._pending.values()):
349+
if not fut.done():
350+
fut.set_exception(
351+
McpConnectionError(
352+
f"Upstream reader terminated: "
353+
f"{type(terminal_exc).__name__ if terminal_exc else 'EOF'}"
354+
)
355+
)
356+
self._pending.clear()
304357

305358
async def _stderr_loop(self) -> None:
306359
log_fh = self._open_stderr_log()

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "neuro-cortex-memory"
7-
version = "3.14.11"
7+
version = "3.14.12"
88
description = "Scientifically-grounded memory system based on computational neuroscience research"
99
readme = "README.md"
1010
license = "MIT"

0 commit comments

Comments
 (0)