Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion hindsight-api-slim/hindsight_api/api/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -2972,7 +2972,16 @@ async def metrics_endpoint():
async def api_graph(
bank_id: str,
type: str | None = None,
limit: int = 1000,
limit: int = Query(
200,
ge=1,
description=(
"Max nodes to return. Silently clamped to 200 server-side because "
"edge count scales quadratically with node count on dense banks and "
"the Control Plane UI cannot parse responses past V8's ~512 MiB "
"string limit. Requests above 200 are clamped, not rejected."
),
),
q: str | None = None,
tags: list[str] | None = Query(None),
tags_match: str = "all_strict",
Expand All @@ -2981,6 +2990,11 @@ async def api_graph(
request_context: RequestContext = Depends(get_request_context),
):
"""Get graph data from database, filtered by bank_id and optionally by type."""
# Empirical sizing on a 14k-memory bank: limit=200 → 23 MiB,
# limit=500 → 147 MiB, limit=1000 → 596 MiB (past V8's ~512 MiB
# string cap). Clamp silently rather than 422 so existing UI/SDK
# callers passing limit>200 keep working with a smaller response.
limit = min(limit, 200)
try:
data = await app.state.memory.get_graph_data(
bank_id,
Expand Down
76 changes: 76 additions & 0 deletions hindsight-api-slim/tests/test_graph_endpoint_clamp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Tests for the server-side clamp on /banks/{bank_id}/graph?limit=...

Edge count scales quadratically with node count for dense banks; the Control
Plane (Next.js) deserializes the response as a single JS string, capped at
~512 MiB by V8. The endpoint silently clamps ``limit`` at 200 to keep
responses parseable while preserving backwards-compat for callers passing
larger values.
"""

import uuid

import httpx
import pytest
import pytest_asyncio

from hindsight_api.api.http import create_app
from hindsight_api.engine.memory_engine import MemoryEngine

GRAPH_LIMIT_CAP = 200


@pytest_asyncio.fixture
async def graph_clamp_api_client(memory):
app = create_app(memory, initialize_memory=False)
transport = httpx.ASGITransport(app=app)
async with httpx.AsyncClient(transport=transport, base_url="http://test") as client:
yield client


@pytest.mark.asyncio
async def test_graph_limit_above_cap_is_silently_clamped(
memory: MemoryEngine, request_context, graph_clamp_api_client: httpx.AsyncClient
):
"""Requests above 200 succeed (200 OK) and the response's ``limit`` field
reflects the clamped value, not the requested value."""
bank_id = f"graph-clamp-{uuid.uuid4().hex[:8]}"
await memory.get_bank_profile(bank_id=bank_id, request_context=request_context)
# A single retain is enough to make the endpoint return a non-empty graph;
# the clamp behavior is independent of bank density.
await memory.retain_async(
bank_id=bank_id,
content="Alice met Bob at the conference.",
request_context=request_context,
)

response = await graph_clamp_api_client.get(
f"/v1/default/banks/{bank_id}/graph",
params={"limit": 1000},
)
assert response.status_code == 200, response.text
data = response.json()
assert data["limit"] == GRAPH_LIMIT_CAP, (
f"expected clamped limit={GRAPH_LIMIT_CAP}, got {data['limit']}"
)
assert len(data["nodes"]) <= GRAPH_LIMIT_CAP


@pytest.mark.asyncio
async def test_graph_limit_below_cap_passes_through(
memory: MemoryEngine, request_context, graph_clamp_api_client: httpx.AsyncClient
):
"""Requests at or below 200 are not modified."""
bank_id = f"graph-clamp-passthrough-{uuid.uuid4().hex[:8]}"
await memory.get_bank_profile(bank_id=bank_id, request_context=request_context)
await memory.retain_async(
bank_id=bank_id,
content="Carol travels to Boston monthly.",
request_context=request_context,
)

response = await graph_clamp_api_client.get(
f"/v1/default/banks/{bank_id}/graph",
params={"limit": 50},
)
assert response.status_code == 200, response.text
assert response.json()["limit"] == 50
Loading