Harden raw search score handling

strongkeep-debug · strongkeep-debug · commit 7c6d8d96d153 · 2026-05-21T04:31:44.000-07:00
diff --git a/src/api/routes/memory.py b/src/api/routes/memory.py
@@ -8,6 +8,8 @@
 
 import asyncio
 import logging
+import math
+import threading
 import time
 from typing import Any, Dict, List
 
@@ -113,6 +115,14 @@ def _error(request: Request, detail: str, code: int, elapsed_ms: float = 0) -> J
     return JSONResponse(content=body.model_dump(), status_code=code)
 
 
+def _safe_score(score: Any) -> float:
+    try:
+        value = float(score)
+    except (TypeError, ValueError):
+        return 0.0
+    return value if math.isfinite(value) else 0.0
+
+
 def _detect_chat_provider(*urls: str) -> str:
     for url in urls:
         lowered = (url or "").lower()
@@ -150,8 +160,6 @@ async def _render_chat_share(url: str) -> tuple[str, str]:
 # reuse it across scrape requests. The browser is thread-safe when each
 # request uses its own BrowserContext.
 
-import threading
-
 _browser_lock = threading.Lock()
 _pw_instance = None
 _browser_instance = None
@@ -665,7 +673,7 @@ async def retrieve_memory(req: RetrieveRequest, request: Request, user: dict = D
             sources=[
                 SourceRecord(
                     domain=s.domain, content=s.content,
-                    score=round(s.score, 3), metadata=s.metadata,
+                    score=round(_safe_score(s.score), 3), metadata=s.metadata,
                 )
                 for s in result.sources
             ],
@@ -717,7 +725,7 @@ async def search_memory(req: SearchRequest, request: Request, user: dict = Depen
                 SourceRecord(
                     domain=s.domain,
                     content=s.content,
-                    score=round(s.score, 3),
+                    score=round(_safe_score(s.score), 3),
                     metadata=s.metadata,
                 )
                 for s in all_results
@@ -741,7 +749,7 @@ def _search_profile(pipeline: RetrievalPipeline, user_id: str) -> List[SourceRec
         raw = pipeline.vector_store.search_by_metadata(
             filters={"user_id": user_id, "domain": "profile"}, top_k=100,
         )
-        return [SourceRecord(domain="profile", content=r.content, score=r.score, metadata=r.metadata) for r in raw]
+        return [SourceRecord(domain="profile", content=r.content, score=_safe_score(r.score), metadata=r.metadata) for r in raw]
     except Exception as exc:
         logger.warning("Profile search error: %s", exc)
         return []
@@ -768,7 +776,7 @@ def _search_temporal(pipeline: RetrievalPipeline, query: str, user_id: str, top_
                 parts.append(f"Time: {ev['time']}")
             results.append(SourceRecord(
                 domain="temporal", content=" | ".join(parts),
-                score=ev.get("similarity_score", 0.0), metadata=ev,
+                score=_safe_score(ev.get("similarity_score", 0.0)), metadata=ev,
             ))
         return results
     except Exception as exc:
@@ -783,7 +791,7 @@ async def _search_summary(pipeline: RetrievalPipeline, query: str, user_id: str,
             filters={"user_id": user_id, "domain": "summary"},
         )
         return [
-            SourceRecord(domain="summary", content=r.content, score=r.score, metadata={"id": r.id, **r.metadata})
+            SourceRecord(domain="summary", content=r.content, score=_safe_score(r.score), metadata={"id": r.id, **r.metadata})
             for r in raw
         ]
     except Exception as exc:
diff --git a/src/pipelines/retrieval.py b/src/pipelines/retrieval.py
@@ -23,6 +23,7 @@
 import asyncio
 import hashlib
 import logging
+import math
 import time
 from collections import OrderedDict
 from typing import Any, Callable, Dict, List, Optional
@@ -323,10 +324,15 @@ async def search_raw(
         if not tasks:
             return []
 
-        task_results = await asyncio.gather(*tasks)
+        task_results = await asyncio.gather(*tasks, return_exceptions=True)
         results = [
-            record for domain_results in task_results for record in domain_results
+            record
+            for domain_results in task_results
+            if not self._log_search_error(domain_results)
+            for record in domain_results
         ]
+        for record in results:
+            record.score = self._score_value(record.score)
 
         return sorted(results, key=lambda record: record.score, reverse=True)
 
@@ -426,7 +432,7 @@ def _search_profile(
                 SourceRecord(
                     domain="profile",
                     content=r.content,
-                    score=r.score,
+                    score=self._score_value(r.score),
                     metadata={
                         "id": r.id,
                         "topic": topic,
@@ -465,7 +471,7 @@ async def _search_profile_raw(
                 SourceRecord(
                     domain="profile",
                     content=r.content,
-                    score=r.score,
+                    score=self._score_value(r.score),
                     metadata={"id": r.id, **r.metadata},
                 )
             )
@@ -522,7 +528,7 @@ async def _search_temporal(
                 SourceRecord(
                     domain="temporal",
                     content=content,
-                    score=ev.get("similarity_score", 0.0),
+                    score=self._score_value(ev.get("similarity_score", 0.0)),
                     metadata=ev,
                 )
             )
@@ -555,7 +561,7 @@ async def _search_summary(
                 SourceRecord(
                     domain="summary",
                     content=r.content,
-                    score=r.score,
+                    score=self._score_value(r.score),
                     metadata={"id": r.id, **r.metadata},
                 )
             )
@@ -602,7 +608,7 @@ async def _search_code(
                 SourceRecord(
                     domain="code",
                     content=f"{prefix}{r.content}",
-                    score=r.score,
+                    score=self._score_value(r.score),
                     metadata={"id": r.id, **metadata},
                 )
             )
@@ -650,7 +656,7 @@ async def _search_snippet(
                 SourceRecord(
                     domain="snippet",
                     content=content,
-                    score=r.score,
+                    score=self._score_value(r.score),
                     metadata={"id": r.id, **r.metadata},
                 )
             )
@@ -765,6 +771,19 @@ def _trim_cache(self, cache: OrderedDict, limit: int) -> None:
         while len(cache) > limit:
             cache.popitem(last=False)
 
+    def _log_search_error(self, domain_results: Any) -> bool:
+        if isinstance(domain_results, Exception):
+            logger.warning("Raw search domain failed: %s", domain_results)
+            return True
+        return False
+
+    def _score_value(self, score: Any) -> float:
+        try:
+            value = float(score)
+        except (TypeError, ValueError):
+            return 0.0
+        return value if math.isfinite(value) else 0.0
+
     def _coerce_answer(self, answer: Any) -> str:
         if isinstance(answer, list):
             parts = []
@@ -805,7 +824,8 @@ def _format_tool_results(self, records: List[SourceRecord]) -> str:
 
         lines = []
         for i, rec in enumerate(records, 1):
-            score_str = f" (score: {rec.score:.2f})" if rec.score > 0 else ""
+            score = self._score_value(rec.score)
+            score_str = f" (score: {score:.2f})" if score > 0 else ""
             lines.append(f"{i}. [{rec.domain}]{score_str} {rec.content}")
         return "\n".join(lines)
 
diff --git a/tests/api/test_memory_search_routes.py b/tests/api/test_memory_search_routes.py
@@ -40,7 +40,7 @@ async def search_raw(
             "profile": SourceRecord(
                 domain="profile",
                 content="work / company = XMem",
-                score=0.7,
+                score=None,
             ),
             "code": SourceRecord(
                 domain="code",
@@ -102,6 +102,7 @@ def test_memory_search_route_returns_raw_hits_without_answer(memory_search_app):
 
     assert response.status_code == 200
     assert payload["data"]["total"] == 2
+    assert payload["data"]["results"][0]["score"] == 0.0
     assert payload["data"]["answer"] == ""
     assert payload["data"]["latency"]["raw"]["count"] == 1
     assert pipeline.search_calls[0]["domains"] == ["profile", "summary"]
diff --git a/tests/integration/test_retrieval_pipeline.py b/tests/integration/test_retrieval_pipeline.py
@@ -204,6 +204,35 @@ async def fake_domain(name: str, score: float):
     assert [record.domain for record in results] == ["summary", "temporal", "profile"]
 
 
+@pytest.mark.asyncio
+async def test_raw_search_skips_failed_domains_and_normalizes_scores(
+    vector_store, neo4j_client
+):
+    model = FakeChatModel()
+    pipeline = RetrievalPipeline(
+        model=model, vector_store=vector_store, neo4j_client=neo4j_client
+    )
+
+    async def profile_domain(*_args):
+        return [SourceRecord(domain="profile", content="No backend score", score=None)]
+
+    async def summary_domain(*_args):
+        raise RuntimeError("summary backend offline")
+
+    pipeline._search_profile_raw = profile_domain
+    pipeline._search_summary = summary_domain
+
+    results = await pipeline.search_raw(
+        "latency",
+        "alice",
+        ["profile", "summary"],
+        top_k=5,
+    )
+
+    assert [(record.domain, record.score) for record in results] == [("profile", 0.0)]
+    assert pipeline._format_tool_results(results) == "1. [profile] No backend score"
+
+
 @pytest.mark.asyncio
 async def test_profile_catalog_fetch_does_not_block_event_loop(
     vector_store, neo4j_client