Skip to content
This repository was archived by the owner on Jun 3, 2026. It is now read-only.

Commit b7c56e7

Browse files
committed
Address raw search review feedback
1 parent 5a87d56 commit b7c56e7

2 files changed

Lines changed: 45 additions & 15 deletions

File tree

src/api/routes/memory.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -961,27 +961,39 @@ async def search_memory(req: SearchRequest, request: Request, user: dict = Depen
961961
all_results: List[SourceRecord] = []
962962
latency_ms: Dict[str, float] = {}
963963
plan = pipeline.raw_retrieval_plan(req.domains, answer=req.answer)
964+
raw_tasks = []
964965

965966
if "profile" in plan:
966-
results, elapsed = await _timed("profile", _search_profile, pipeline, user_id)
967-
latency_ms["profile"] = elapsed
968-
all_results.extend(results)
967+
raw_tasks.append((
968+
"profile",
969+
_timed("profile", _search_profile, pipeline, user_id, threaded=True),
970+
))
969971
if "temporal" in plan:
970-
results, elapsed = await _timed("temporal", _search_temporal, pipeline, req.query, user_id, req.top_k)
971-
latency_ms["temporal"] = elapsed
972-
all_results.extend(results)
972+
raw_tasks.append((
973+
"temporal",
974+
_timed("temporal", _search_temporal, pipeline, req.query, user_id, req.top_k, threaded=True),
975+
))
973976
if "summary" in plan:
974-
results, elapsed = await _timed("summary", _search_summary, pipeline, req.query, user_id, req.top_k)
975-
latency_ms["summary"] = elapsed
976-
all_results.extend(results)
977+
raw_tasks.append((
978+
"summary",
979+
_timed("summary", _search_summary, pipeline, req.query, user_id, req.top_k),
980+
))
977981
if "snippet" in plan:
978-
results, elapsed = await _timed("snippet", _search_snippet, pipeline, req.query, user_id, req.top_k)
979-
latency_ms["snippet"] = elapsed
980-
all_results.extend(results)
982+
raw_tasks.append((
983+
"snippet",
984+
_timed("snippet", _search_snippet, pipeline, req.query, user_id, req.top_k),
985+
))
981986
if "code" in plan:
982-
results, elapsed = await _timed("code", _search_code, pipeline, req.query, user_id, req.top_k)
983-
latency_ms["code"] = elapsed
984-
all_results.extend(results)
987+
raw_tasks.append((
988+
"code",
989+
_timed("code", _search_code, pipeline, req.query, user_id, req.top_k),
990+
))
991+
992+
if raw_tasks:
993+
raw_results = await asyncio.gather(*(task for _, task in raw_tasks))
994+
for (domain, _), (results, elapsed) in zip(raw_tasks, raw_results):
995+
latency_ms[domain] = elapsed
996+
all_results.extend(results)
985997

986998
all_results.sort(key=lambda record: record.score, reverse=True)
987999

src/pipelines/retrieval.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def __init__(
137137
self._profile_catalog_cache: Dict[str, tuple[float, List[Dict[str, str]], list]] = {}
138138
self._raw_retrieval_plan_cache: Dict[tuple[tuple[str, ...], bool], tuple[str, ...]] = {}
139139
self._cache_ttl_seconds = 60.0
140+
self._profile_catalog_cache_max_users = 256
140141

141142
logger.info("RetrievalPipeline initialized")
142143

@@ -499,8 +500,11 @@ def _fetch_profile_catalog(self, user_id: str):
499500
raw_results — the full SearchResult list, cached for _search_profile
500501
"""
501502
now = time.monotonic()
503+
self._prune_profile_catalog_cache(now)
504+
502505
cached = self._profile_catalog_cache.get(user_id)
503506
if cached and now - cached[0] < self._cache_ttl_seconds:
507+
self._profile_catalog_cache[user_id] = (now, cached[1], cached[2])
504508
return cached[1], cached[2]
505509

506510
try:
@@ -536,6 +540,20 @@ def _fetch_profile_catalog(self, user_id: str):
536540
self._profile_catalog_cache[user_id] = (now, catalog, results)
537541
return catalog, results
538542

543+
def _prune_profile_catalog_cache(self, now: float) -> None:
544+
"""Bound profile catalog cache by TTL and number of cached users."""
545+
expired_user_ids = [
546+
cached_user_id
547+
for cached_user_id, (cached_at, _, _) in self._profile_catalog_cache.items()
548+
if now - cached_at >= self._cache_ttl_seconds
549+
]
550+
for cached_user_id in expired_user_ids:
551+
self._profile_catalog_cache.pop(cached_user_id, None)
552+
553+
while len(self._profile_catalog_cache) >= self._profile_catalog_cache_max_users:
554+
oldest_user_id = next(iter(self._profile_catalog_cache))
555+
self._profile_catalog_cache.pop(oldest_user_id, None)
556+
539557
def raw_retrieval_plan(self, domains: List[str], answer: bool = False) -> tuple[str, ...]:
540558
"""Return a cached deterministic raw-search plan for the requested domains."""
541559
ordered_allowed = ("profile", "temporal", "summary", "snippet", "code")

0 commit comments

Comments
 (0)