Skip to content

Commit 44972d3

Browse files
authored
fix(mcp): omit reflect tool_trace/llm_trace from responses by default (#2242)
The MCP `reflect` tool returned the full `reflect_async` result, which includes `tool_trace` and `llm_trace` — the entire internal agent loop, including full mental-model text. A default reflect response measured 59,657 chars (text 5,987 + tool_trace 52,711), silently consuming tens of KB of MCP-client context on every call, while the REST API omits the trace by default. Add a symmetric `include_trace: bool = False` flag (mirroring the existing `include_based_on`); the trace becomes opt-in for debugging. Applied to both the multi-bank and single-bank reflect registrations, with a regression test covering both.
1 parent aa308ad commit 44972d3

2 files changed

Lines changed: 78 additions & 0 deletions

File tree

hindsight-api-slim/hindsight_api/mcp_tools.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,7 @@ async def reflect(
983983
tags: list[str] | None = None,
984984
tags_match: str = "any",
985985
include_based_on: bool = False,
986+
include_trace: bool = False,
986987
bank_id: str | None = None,
987988
) -> str:
988989
"""
@@ -1013,6 +1014,7 @@ async def reflect(
10131014
tags: Optional tags to filter memories by (e.g., ['project:alpha'])
10141015
tags_match: How to match tags - 'any' (match any tag) or 'all' (match all tags). Default: 'any'
10151016
include_based_on: Include source facts used for synthesis. Defaults to false because broad reflections can exceed MCP client result limits.
1017+
include_trace: Include the reflection's internal tool_trace/llm_trace. Defaults to false because the trace can be tens of KB and overflow MCP client context; enable only for debugging.
10161018
bank_id: Optional bank to reflect in (defaults to session bank). Use for cross-bank operations.
10171019
"""
10181020
try:
@@ -1042,6 +1044,12 @@ async def reflect(
10421044
result_data = json.loads(reflect_result.model_dump_json(indent=2))
10431045
if not include_based_on:
10441046
result_data.pop("based_on", None)
1047+
if not include_trace:
1048+
# The agentic reflect loop's tool_trace/llm_trace can be tens of KB
1049+
# (full mental-model text) and silently overflow MCP client context;
1050+
# the REST API omits it by default too. Opt in via include_trace.
1051+
result_data.pop("tool_trace", None)
1052+
result_data.pop("llm_trace", None)
10451053
if response_schema is not None and hasattr(reflect_result, "structured_output"):
10461054
result_data["structured_output"] = reflect_result.structured_output
10471055
return json.dumps(result_data, indent=2)
@@ -1064,6 +1072,7 @@ async def reflect(
10641072
tags: list[str] | None = None,
10651073
tags_match: str = "any",
10661074
include_based_on: bool = False,
1075+
include_trace: bool = False,
10671076
) -> dict:
10681077
"""
10691078
Generate thoughtful analysis by synthesizing stored memories with the bank's personality.
@@ -1093,6 +1102,7 @@ async def reflect(
10931102
tags: Optional tags to filter memories by (e.g., ['project:alpha'])
10941103
tags_match: How to match tags - 'any' (match any tag) or 'all' (match all tags). Default: 'any'
10951104
include_based_on: Include source facts used for synthesis. Defaults to false because broad reflections can exceed MCP client result limits.
1105+
include_trace: Include the reflection's internal tool_trace/llm_trace. Defaults to false because the trace can be tens of KB and overflow MCP client context; enable only for debugging.
10961106
"""
10971107
try:
10981108
target_bank = config.bank_id_resolver()
@@ -1121,6 +1131,12 @@ async def reflect(
11211131
result_data = reflect_result.model_dump()
11221132
if not include_based_on:
11231133
result_data.pop("based_on", None)
1134+
if not include_trace:
1135+
# The agentic reflect loop's tool_trace/llm_trace can be tens of KB
1136+
# (full mental-model text) and silently overflow MCP client context;
1137+
# the REST API omits it by default too. Opt in via include_trace.
1138+
result_data.pop("tool_trace", None)
1139+
result_data.pop("llm_trace", None)
11241140
if response_schema is not None and hasattr(reflect_result, "structured_output"):
11251141
result_data["structured_output"] = reflect_result.structured_output
11261142
return result_data

hindsight-api-slim/tests/test_mcp_tools.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1947,3 +1947,65 @@ async def test_annotations_apply_in_single_bank_mode(self, mock_memory):
19471947
ann = _tools(_make_mcp_server(mock_memory, {"recall"}, include_bank_id=False))["recall"].annotations
19481948
assert ann is not None
19491949
assert ann.readOnlyHint is True
1950+
1951+
1952+
def _reflect_mcp_with_trace(include_bank_id_param: bool):
1953+
"""An MCP server whose reflect returns a result carrying tool_trace/llm_trace."""
1954+
from fastmcp import FastMCP
1955+
1956+
# Mirrors ReflectResult: the agentic loop's trace fields are large and present.
1957+
reflect_payload = {
1958+
"text": "answer",
1959+
"based_on": {"world": []},
1960+
"tool_trace": [{"tool": "recall", "output": "x" * 1000}],
1961+
"llm_trace": [{"model": "test", "output": "y" * 1000}],
1962+
}
1963+
memory = MagicMock()
1964+
memory.reflect_async = AsyncMock(
1965+
return_value=MagicMock(
1966+
model_dump_json=lambda indent=None: json.dumps(reflect_payload),
1967+
model_dump=lambda: dict(reflect_payload),
1968+
structured_output=None,
1969+
)
1970+
)
1971+
mcp = FastMCP("test")
1972+
config = MCPToolsConfig(
1973+
bank_id_resolver=lambda: "test-bank",
1974+
include_bank_id_param=include_bank_id_param,
1975+
tools={"reflect"},
1976+
)
1977+
register_mcp_tools(mcp, memory, config)
1978+
return mcp
1979+
1980+
1981+
def _reflect_result_data(result) -> dict:
1982+
"""The multi-bank reflect returns a JSON string; single-bank returns a dict."""
1983+
return json.loads(result) if isinstance(result, str) else result
1984+
1985+
1986+
@pytest.mark.asyncio
1987+
class TestReflectTraceOmission:
1988+
"""reflect must not leak the agentic tool_trace/llm_trace into MCP responses by default."""
1989+
1990+
@pytest.mark.parametrize("multi_bank", [True, False])
1991+
async def test_trace_omitted_by_default(self, multi_bank):
1992+
mcp = _reflect_mcp_with_trace(multi_bank)
1993+
data = _reflect_result_data(await _tools(mcp)["reflect"].fn(query="q"))
1994+
assert data["text"] == "answer"
1995+
assert "tool_trace" not in data
1996+
assert "llm_trace" not in data
1997+
1998+
@pytest.mark.parametrize("multi_bank", [True, False])
1999+
async def test_trace_included_when_requested(self, multi_bank):
2000+
mcp = _reflect_mcp_with_trace(multi_bank)
2001+
data = _reflect_result_data(await _tools(mcp)["reflect"].fn(query="q", include_trace=True))
2002+
assert "tool_trace" in data
2003+
assert "llm_trace" in data
2004+
2005+
@pytest.mark.parametrize("multi_bank", [True, False])
2006+
async def test_based_on_flag_is_independent_of_trace(self, multi_bank):
2007+
# include_based_on keeps based_on but must not pull the trace back in.
2008+
mcp = _reflect_mcp_with_trace(multi_bank)
2009+
data = _reflect_result_data(await _tools(mcp)["reflect"].fn(query="q", include_based_on=True))
2010+
assert "based_on" in data
2011+
assert "tool_trace" not in data

0 commit comments

Comments
 (0)