Skip to content

Commit 57a9e4d

Browse files
committed
fix: bound active memory rerank latency
1 parent cc3ee51 commit 57a9e4d

7 files changed

Lines changed: 94 additions & 14 deletions

File tree

plugins/hermes-dory/provider.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,7 +1437,7 @@ def _build_tool_schemas() -> list[dict[str, Any]]:
14371437
},
14381438
{
14391439
"name": "dory_active_memory",
1440-
"description": "Run the bounded active-memory pre-reply pass. Limits: budget_tokens <= 1200, timeout_ms <= 5000. Set include_wake=false if wake was already called.",
1440+
"description": "Run the bounded active-memory pre-reply pass. Limits: budget_tokens <= 1200, timeout_ms <= 30000. Set include_wake=false if wake was already called.",
14411441
"parameters": {
14421442
"type": "object",
14431443
"properties": {
@@ -1458,7 +1458,7 @@ def _build_tool_schemas() -> list[dict[str, Any]]:
14581458
},
14591459
},
14601460
"profile": {"type": "string"},
1461-
"timeout_ms": {"type": "integer", "minimum": 100, "maximum": 5000},
1461+
"timeout_ms": {"type": "integer", "minimum": 100, "maximum": 30000},
14621462
"budget_tokens": {"type": "integer", "minimum": 100, "maximum": 1200},
14631463
"include_wake": {"type": "boolean"},
14641464
"rerank": {"type": "string", "enum": ["auto", "true", "false"]},

src/dory_core/active_memory.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
_PLANNER_MIN_REMAINING_MS = 1800
4646
_COMPOSER_MIN_REMAINING_MS = 2200
4747
_COMPOSER_TIMEOUT_HEADROOM_MS = 6000
48+
_RERANK_TIMEOUT_HEADROOM_MS = 6000
4849
_TOPIC_TOKEN_RE = re.compile(r"[A-Za-z0-9][A-Za-z0-9_-]*")
4950
_TOPIC_STOPWORDS = {
5051
"about",
@@ -232,7 +233,7 @@ def _retrieve_evidence(
232233
mode="hybrid",
233234
corpus="durable",
234235
include_content=True,
235-
rerank="true" if req.rerank == "auto" else req.rerank,
236+
rerank=_active_memory_rerank_mode(req.rerank, deadline),
236237
deadline=deadline,
237238
source_policy=source_policy,
238239
min_remaining_ms=_COMPOSER_MIN_REMAINING_MS,
@@ -516,6 +517,16 @@ def _planning_context_from_helper(helper: WikiHelperContext) -> ActiveMemoryPlan
516517
)
517518

518519

520+
def _active_memory_rerank_mode(
521+
requested: Literal["auto", "true", "false"], deadline: "_Deadline"
522+
) -> Literal["auto", "true", "false"]:
523+
if requested == "false":
524+
return "false"
525+
if deadline.total_ms <= _RERANK_TIMEOUT_HEADROOM_MS:
526+
return "false"
527+
return "true" if requested == "auto" else requested
528+
529+
519530
def _search_candidates(
520531
search_engine: _SearchEngine,
521532
*,

src/dory_core/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,14 @@ class DorySettings(BaseSettings):
8484
query_expansion_max: int = Field(default=2, ge=0, le=5)
8585
query_reranker_enabled: bool = False
8686
query_reranker_provider: Literal["openrouter", "local"] = "openrouter"
87-
query_reranker_candidate_limit: int = Field(default=40, ge=2, le=100)
87+
query_reranker_candidate_limit: int = Field(default=8, ge=2, le=100)
8888
local_reranker_api_key: str | None = Field(
8989
default=None,
9090
validation_alias=AliasChoices("DORY_LOCAL_RERANKER_API_KEY", "DORY_LOCAL_LLM_API_KEY"),
9191
)
9292
local_reranker_base_url: str = "http://127.0.0.1:8000/v1"
9393
local_reranker_model: str = "qwen3-rerank"
94-
local_reranker_timeout_seconds: float = Field(default=30.0, gt=0.0, le=300.0)
94+
local_reranker_timeout_seconds: float = Field(default=5.0, gt=0.0, le=300.0)
9595
eval_judge_enabled: bool = True
9696
max_write_bytes: int = Field(default=10_240, ge=1)
9797
default_wake_budget_tokens: int = Field(default=600, ge=1, le=1500)

src/dory_core/tool_registry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class DoryTool:
8282
http_path="/v1/active-memory",
8383
description=(
8484
"Run the bounded active-memory pre-reply pass. Limits: budget_tokens <= 1200, "
85-
"timeout_ms <= 5000. Set include_wake=false if wake was already called."
85+
"timeout_ms <= 30000. Set include_wake=false if wake was already called."
8686
),
8787
request_model=ActiveMemoryReq,
8888
handler="active_memory",

src/dory_core/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ class ActiveMemoryReq(BaseModel):
133133
project: str | None = None
134134
scope: SearchScope = Field(default_factory=SearchScope)
135135
profile: ActiveMemoryProfile = "auto"
136-
timeout_ms: int = Field(default=3000, ge=100, le=5000)
136+
timeout_ms: int = Field(default=3000, ge=100, le=30000)
137137
budget_tokens: int = Field(default=400, ge=100, le=1200)
138138
include_wake: bool = True
139139
rerank: Literal["auto", "true", "false"] = "auto"

tests/unit/test_active_memory.py

Lines changed: 67 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from datetime import UTC, datetime
44
from pathlib import Path
5-
from time import sleep
5+
from time import monotonic, sleep
66

77
from dory_core.active_memory import ActiveMemoryEngine
88
from dory_core.retrieval_planner import ActiveMemoryComposition, ActiveMemoryPlanningContext, ActiveMemoryRetrievalPlan
@@ -153,6 +153,7 @@ def test_active_memory_builds_memory_block_for_state_question(tmp_path: Path) ->
153153
prompt="what are we working on today",
154154
agent="claude",
155155
cwd=str(tmp_path),
156+
timeout_ms=7000,
156157
)
157158
)
158159

@@ -358,6 +359,7 @@ def search(self, req: SearchReq): # pragma: no cover - test stub
358359
prompt="Before answering a coding question about Dory agent integrations, retrieve only the memory that matters.",
359360
agent="codex",
360361
include_wake=True,
362+
timeout_ms=7000,
361363
)
362364
)
363365

@@ -554,8 +556,8 @@ def search(self, req: SearchReq): # pragma: no cover - test stub
554556
prompt="debug Dory Docker MCP setup",
555557
agent="codex",
556558
include_wake=False,
557-
timeout_ms=5000,
558-
).model_copy(update={"timeout_ms": 7000})
559+
timeout_ms=7000,
560+
)
559561
)
560562

561563
assert "Docker MCP setup fails when the daemon URL is stale." in result.block
@@ -845,8 +847,8 @@ def test_active_memory_uses_planner_queries_and_llm_composition_when_budget_allo
845847
prompt="what are we working on today",
846848
agent="claude",
847849
cwd=str(tmp_path),
848-
timeout_ms=5000,
849-
).model_copy(update={"timeout_ms": 7000})
850+
timeout_ms=7000,
851+
)
850852
)
851853

852854
assert result.summary == "Rooster remains the active focus."
@@ -916,8 +918,8 @@ def compose_active_memory(
916918
prompt="what are we working on today",
917919
agent="claude",
918920
include_wake=False,
919-
timeout_ms=5000,
920-
).model_copy(update={"timeout_ms": 7000})
921+
timeout_ms=7000,
922+
)
921923
)
922924

923925
assert result.summary.startswith("Rooster is the active focus this week.")
@@ -1095,3 +1097,61 @@ def search(self, req: SearchReq):
10951097

10961098
assert 1 <= len(search_engine.requests) < 4
10971099
assert result.kind == "memory"
1100+
1101+
1102+
def test_active_memory_disables_rerank_when_total_timeout_cannot_absorb_it(tmp_path: Path) -> None:
1103+
class ManyQueryPlanner:
1104+
def plan_active_memory(
1105+
self,
1106+
*,
1107+
prompt: str,
1108+
context: ActiveMemoryPlanningContext,
1109+
) -> ActiveMemoryRetrievalPlan:
1110+
del prompt, context
1111+
return ActiveMemoryRetrievalPlan(
1112+
durable_queries=("one", "two", "three"),
1113+
session_queries=(),
1114+
include_sessions=False,
1115+
durable_limit=8,
1116+
session_limit=0,
1117+
)
1118+
1119+
class RerankSensitiveSearchEngine(_StubSearchEngine):
1120+
def search(self, req: SearchReq):
1121+
self.requests.append(req)
1122+
if req.rerank != "false":
1123+
sleep(0.08)
1124+
return super().search(req)
1125+
1126+
search_engine = RerankSensitiveSearchEngine()
1127+
engine = ActiveMemoryEngine(
1128+
wake_builder=WakeBuilder(root=tmp_path),
1129+
search_engine=search_engine,
1130+
planner=ManyQueryPlanner(),
1131+
)
1132+
1133+
started = monotonic()
1134+
result = engine.build(
1135+
ActiveMemoryReq(
1136+
prompt="what are we working on today",
1137+
agent="claude",
1138+
include_wake=False,
1139+
timeout_ms=5000,
1140+
)
1141+
)
1142+
elapsed = monotonic() - started
1143+
1144+
assert result.kind == "memory"
1145+
assert search_engine.requests
1146+
assert {req.rerank for req in search_engine.requests} == {"false"}
1147+
assert elapsed < 0.08
1148+
1149+
1150+
def test_active_memory_request_accepts_larger_timeout_for_slow_local_models() -> None:
1151+
req = ActiveMemoryReq(
1152+
prompt="what are we working on today",
1153+
agent="claude",
1154+
timeout_ms=12000,
1155+
)
1156+
1157+
assert req.timeout_ms == 12000

tests/unit/test_rerank_orchestrator.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,12 @@ def test_rerank_telemetry_logs_safe_metrics_without_content(caplog) -> None:
112112
assert "snippet_chars_after=" in messages
113113
assert sensitive_text not in messages
114114
assert "needle detail" not in messages
115+
116+
117+
def test_rerank_orchestrator_default_candidate_limit_matches_local_latency_budget() -> None:
118+
from dory_core.config import DorySettings
119+
120+
settings = DorySettings()
121+
122+
assert settings.query_reranker_candidate_limit == 8
123+
assert settings.local_reranker_timeout_seconds == 5.0

0 commit comments

Comments
 (0)