fix: pin prefetch search_type=text to avoid async-vector race

groksrc · claude · groksrc · commit f4b37e2a44b8 · 2026-05-12T15:46:17.000-05:00
BM 0.20.x defaults search_type to "hybrid" when semantic_search is
enabled (default), which mixes FTS with vector search. Vector
indexing is scheduled asynchronously (knowledge_router.py:272-278 +
search_service._schedule_vector_sync_if_enabled), so hybrid search
can miss notes that were just written — the FTS row exists but the
vector row hasn't been added yet.

On a Mac dev box, vector indexing finishes before the test's poll
loop ends. On the GitHub CI runner (cold onnxruntime, shared CPU),
the vector index lags long enough that a 30s budget with 111
prefetch attempts all return zero results. Confirmed via integration
log diagnostics from run 25760814491.

prefetch is a 3s-budget recall hot path with keyword-shaped queries;
FTS is both faster and the semantically correct lookup for it.
queue_prefetch mirrors the change. Agent-facing bm_search is
unchanged — natural-language queries still benefit from hybrid when
the index has caught up.

Also revises the integration test:
- budget back down to 10s (was 30s in the previous flake-mitigation
  attempt; with search_type=text we don't need that margin)
- keeps the improved diagnostic that surfaces failure_count and
  circuit state — the previous run's 111-attempts diagnostic is what
  led us to the right cause

Unit test test_prefetch_calls_search_when_cache_empty pins
search_type="text" so future refactors can't silently regress.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/__init__.py b/__init__.py
@@ -975,12 +975,20 @@ def prefetch(self, query: str, *, session_id: str = "") -> str:
         if not self._initialized or self._actor is None or self._is_circuit_open():
             return ""
         try:
+            # search_type="text" — bypass BM's "hybrid" default which mixes FTS
+            # with vector search. Vector indexing is scheduled asynchronously
+            # in BM (see services/search_service.py:_schedule_vector_sync_if_enabled),
+            # so hybrid search can miss notes that were just written, especially
+            # under cold-start or load. Prefetch is a recall hot path with a
+            # 3s budget and the queries are usually keyword-like — FTS-only is
+            # both faster and more deterministic.
             raw = self._actor.call(
                 "search_notes",
                 {
                     "project": self._project,
                     "query": query,
                     "page_size": 5,
+                    "search_type": "text",
                     "output_format": "json",
                 },
                 timeout=3.0,
@@ -998,12 +1006,16 @@ def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
 
         def _bg() -> None:
             try:
+                # search_type="text" mirrors prefetch() — see note there. The
+                # background path can afford a longer timeout but the
+                # async-vector-indexing race still applies.
                 raw = self._actor.call(  # type: ignore[union-attr]
                     "search_notes",
                     {
                         "project": self._project,
                         "query": query,
                         "page_size": 5,
+                        "search_type": "text",
                         "output_format": "json",
                     },
                     timeout=10.0,
diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -324,12 +324,14 @@ def test_prefetch_against_real_bm(provider, bm):
         "folder": "tests",
     })
 
-    # BM may need time to index the new note. prefetch's own actor.call
-    # times out at 3.0s per attempt; on a cold CI runner (especially with
-    # onnxruntime startup), indexing latency plus a few retries can push
-    # well past the previous 5s budget. 30s gives ~10 attempts and absorbs
-    # the worst observed cold-start delays.
-    deadline = time.monotonic() + 30.0
+    # BM's FTS index is updated synchronously inside the write_note API
+    # path (knowledge_router.py:272), so this loop is really only smoothing
+    # over the round-trip cost of a few RPCs on a slow runner. prefetch
+    # explicitly requests search_type="text" so we don't get pulled onto
+    # BM's hybrid path, where vector indexing is async and would race the
+    # search.
+    budget_secs = 10.0
+    deadline = time.monotonic() + budget_secs
     out = ""
     attempts = 0
     while time.monotonic() < deadline:
@@ -341,7 +343,7 @@ def test_prefetch_against_real_bm(provider, bm):
 
     assert out, (
         f"prefetch returned nothing after {attempts} attempt(s) over "
-        f"{30.0}s; provider._failure_count={provider._failure_count}, "
+        f"{budget_secs}s; provider._failure_count={provider._failure_count}, "
         f"circuit_open={provider._is_circuit_open()}. "
         f"Either BM didn't index the note in time or prefetch's actor.call "
         f"is timing out internally."
diff --git a/tests/test_prefetch.py b/tests/test_prefetch.py
@@ -42,6 +42,9 @@ def test_prefetch_calls_search_when_cache_empty(bm):
     assert bm_args["query"] == "hello world"
     assert bm_args["page_size"] == 5
     assert bm_args["output_format"] == "json"
+    # Pin search_type=text so BM doesn't fall into the hybrid+async-vector
+    # path on the prefetch hot path. See prefetch() comment for rationale.
+    assert bm_args["search_type"] == "text"
 
 
 def test_prefetch_returns_empty_when_uninitialized(bm):