From c5f93baec4b0a54abcaa4f2e0fb988884470eed3 Mon Sep 17 00:00:00 2001 From: Connor Black Date: Tue, 12 May 2026 21:51:08 -0400 Subject: [PATCH 1/2] fix(graph): cap /banks/{bank_id}/graph limit at 200 to keep responses Node-parseable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The graph endpoint accepts ``limit`` and the Control Plane UI defaults to 1000. Edge count scales quadratically with node count for densely-linked banks (memory_units cross-linked via memory_links). Empirical measurement on a 14k-memory bank: limit=200 → 23 MiB, limit=500 → 147 MiB, limit=1000 → 596 MiB. Next.js deserializes the response as a single JS string via response.json(); V8 caps string length at 0x1fffffe8 (~512 MiB). Banks past that cap error out with "Cannot create a string longer than 0x1fffffe8 characters" and the UI shows "Failed to fetch graph data" instead of any graph. Server-side clamp at 200 keeps the API usable for high-volume banks while preserving response payload semantics for smaller ones (where 200 is well under typical graph sizes anyway). Graph viz tools are unusable past ~200 nodes regardless, so this isn't a UX regression. Real fix is paginated/streamed graph loading in the UI; this is a guard to prevent the existing UI from breaking on large banks until that lands. --- hindsight-api-slim/hindsight_api/api/http.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py index c357f4116..1b2f80f4d 100644 --- a/hindsight-api-slim/hindsight_api/api/http.py +++ b/hindsight-api-slim/hindsight_api/api/http.py @@ -2981,6 +2981,13 @@ async def api_graph( request_context: RequestContext = Depends(get_request_context), ): """Get graph data from database, filtered by bank_id and optionally by type.""" + # Edge count scales quadratically with node count for densely-linked banks. + # Empirical measurement on a 14k-memory bank: limit=200 → 23 MiB, + # limit=500 → 147 MiB, limit=1000 → 596 MiB. The Control Plane fetches + # via Next.js which deserializes the response as a single JS string, + # and V8 caps string length at ~512 MiB. Cap server-side so giant banks + # remain visualizable instead of erroring out the UI entirely. + limit = min(max(1, limit), 200) try: data = await app.state.memory.get_graph_data( bank_id, From f74b5f1c89f3257cde8e5c40a79760faf6a08c29 Mon Sep 17 00:00:00 2001 From: Connor Black Date: Tue, 12 May 2026 22:28:59 -0400 Subject: [PATCH 2/2] fix(graph): align signature default with clamp + add clamping test Address Copilot review on #1605: 1. Move limit signature default from 1000 to 200 via Query(...) so OpenAPI docs match runtime behavior. ge=1 added for explicit lower bound. The silent clamp at 200 is preserved (no 422) so existing UI/SDK callers passing limit > 200 keep working with a smaller response. Description on the Query field documents the cap and rationale. 2. Add tests/test_graph_endpoint_clamp.py: - test_graph_limit_above_cap_is_silently_clamped: assert request with limit=1000 returns 200 OK and response.limit == 200, nodes <= 200. - test_graph_limit_below_cap_passes_through: assert request with limit=50 returns 200 OK and response.limit == 50 (no modification). --- hindsight-api-slim/hindsight_api/api/http.py | 23 ++++-- .../tests/test_graph_endpoint_clamp.py | 76 +++++++++++++++++++ 2 files changed, 91 insertions(+), 8 deletions(-) create mode 100644 hindsight-api-slim/tests/test_graph_endpoint_clamp.py diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py index 1b2f80f4d..c5fff0425 100644 --- a/hindsight-api-slim/hindsight_api/api/http.py +++ b/hindsight-api-slim/hindsight_api/api/http.py @@ -2972,7 +2972,16 @@ async def metrics_endpoint(): async def api_graph( bank_id: str, type: str | None = None, - limit: int = 1000, + limit: int = Query( + 200, + ge=1, + description=( + "Max nodes to return. Silently clamped to 200 server-side because " + "edge count scales quadratically with node count on dense banks and " + "the Control Plane UI cannot parse responses past V8's ~512 MiB " + "string limit. Requests above 200 are clamped, not rejected." + ), + ), q: str | None = None, tags: list[str] | None = Query(None), tags_match: str = "all_strict", @@ -2981,13 +2990,11 @@ async def api_graph( request_context: RequestContext = Depends(get_request_context), ): """Get graph data from database, filtered by bank_id and optionally by type.""" - # Edge count scales quadratically with node count for densely-linked banks. - # Empirical measurement on a 14k-memory bank: limit=200 → 23 MiB, - # limit=500 → 147 MiB, limit=1000 → 596 MiB. The Control Plane fetches - # via Next.js which deserializes the response as a single JS string, - # and V8 caps string length at ~512 MiB. Cap server-side so giant banks - # remain visualizable instead of erroring out the UI entirely. - limit = min(max(1, limit), 200) + # Empirical sizing on a 14k-memory bank: limit=200 → 23 MiB, + # limit=500 → 147 MiB, limit=1000 → 596 MiB (past V8's ~512 MiB + # string cap). Clamp silently rather than 422 so existing UI/SDK + # callers passing limit>200 keep working with a smaller response. + limit = min(limit, 200) try: data = await app.state.memory.get_graph_data( bank_id, diff --git a/hindsight-api-slim/tests/test_graph_endpoint_clamp.py b/hindsight-api-slim/tests/test_graph_endpoint_clamp.py new file mode 100644 index 000000000..1a7d2e08e --- /dev/null +++ b/hindsight-api-slim/tests/test_graph_endpoint_clamp.py @@ -0,0 +1,76 @@ +"""Tests for the server-side clamp on /banks/{bank_id}/graph?limit=... + +Edge count scales quadratically with node count for dense banks; the Control +Plane (Next.js) deserializes the response as a single JS string, capped at +~512 MiB by V8. The endpoint silently clamps ``limit`` at 200 to keep +responses parseable while preserving backwards-compat for callers passing +larger values. +""" + +import uuid + +import httpx +import pytest +import pytest_asyncio + +from hindsight_api.api.http import create_app +from hindsight_api.engine.memory_engine import MemoryEngine + +GRAPH_LIMIT_CAP = 200 + + +@pytest_asyncio.fixture +async def graph_clamp_api_client(memory): + app = create_app(memory, initialize_memory=False) + transport = httpx.ASGITransport(app=app) + async with httpx.AsyncClient(transport=transport, base_url="http://test") as client: + yield client + + +@pytest.mark.asyncio +async def test_graph_limit_above_cap_is_silently_clamped( + memory: MemoryEngine, request_context, graph_clamp_api_client: httpx.AsyncClient +): + """Requests above 200 succeed (200 OK) and the response's ``limit`` field + reflects the clamped value, not the requested value.""" + bank_id = f"graph-clamp-{uuid.uuid4().hex[:8]}" + await memory.get_bank_profile(bank_id=bank_id, request_context=request_context) + # A single retain is enough to make the endpoint return a non-empty graph; + # the clamp behavior is independent of bank density. + await memory.retain_async( + bank_id=bank_id, + content="Alice met Bob at the conference.", + request_context=request_context, + ) + + response = await graph_clamp_api_client.get( + f"/v1/default/banks/{bank_id}/graph", + params={"limit": 1000}, + ) + assert response.status_code == 200, response.text + data = response.json() + assert data["limit"] == GRAPH_LIMIT_CAP, ( + f"expected clamped limit={GRAPH_LIMIT_CAP}, got {data['limit']}" + ) + assert len(data["nodes"]) <= GRAPH_LIMIT_CAP + + +@pytest.mark.asyncio +async def test_graph_limit_below_cap_passes_through( + memory: MemoryEngine, request_context, graph_clamp_api_client: httpx.AsyncClient +): + """Requests at or below 200 are not modified.""" + bank_id = f"graph-clamp-passthrough-{uuid.uuid4().hex[:8]}" + await memory.get_bank_profile(bank_id=bank_id, request_context=request_context) + await memory.retain_async( + bank_id=bank_id, + content="Carol travels to Boston monthly.", + request_context=request_context, + ) + + response = await graph_clamp_api_client.get( + f"/v1/default/banks/{bank_id}/graph", + params={"limit": 50}, + ) + assert response.status_code == 200, response.text + assert response.json()["limit"] == 50