Skip to content

Commit f4b37e2

Browse files
groksrcclaude
andcommitted
fix: pin prefetch search_type=text to avoid async-vector race
BM 0.20.x defaults search_type to "hybrid" when semantic_search is enabled (default), which mixes FTS with vector search. Vector indexing is scheduled asynchronously (knowledge_router.py:272-278 + search_service._schedule_vector_sync_if_enabled), so hybrid search can miss notes that were just written — the FTS row exists but the vector row hasn't been added yet. On a Mac dev box, vector indexing finishes before the test's poll loop ends. On the GitHub CI runner (cold onnxruntime, shared CPU), the vector index lags long enough that a 30s budget with 111 prefetch attempts all return zero results. Confirmed via integration log diagnostics from run 25760814491. prefetch is a 3s-budget recall hot path with keyword-shaped queries; FTS is both faster and the semantically correct lookup for it. queue_prefetch mirrors the change. Agent-facing bm_search is unchanged — natural-language queries still benefit from hybrid when the index has caught up. Also revises the integration test: - budget back down to 10s (was 30s in the previous flake-mitigation attempt; with search_type=text we don't need that margin) - keeps the improved diagnostic that surfaces failure_count and circuit state — the previous run's 111-attempts diagnostic is what led us to the right cause Unit test test_prefetch_calls_search_when_cache_empty pins search_type="text" so future refactors can't silently regress. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 0d8f1e0 commit f4b37e2

3 files changed

Lines changed: 24 additions & 7 deletions

File tree

__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,12 +975,20 @@ def prefetch(self, query: str, *, session_id: str = "") -> str:
975975
if not self._initialized or self._actor is None or self._is_circuit_open():
976976
return ""
977977
try:
978+
# search_type="text" — bypass BM's "hybrid" default which mixes FTS
979+
# with vector search. Vector indexing is scheduled asynchronously
980+
# in BM (see services/search_service.py:_schedule_vector_sync_if_enabled),
981+
# so hybrid search can miss notes that were just written, especially
982+
# under cold-start or load. Prefetch is a recall hot path with a
983+
# 3s budget and the queries are usually keyword-like — FTS-only is
984+
# both faster and more deterministic.
978985
raw = self._actor.call(
979986
"search_notes",
980987
{
981988
"project": self._project,
982989
"query": query,
983990
"page_size": 5,
991+
"search_type": "text",
984992
"output_format": "json",
985993
},
986994
timeout=3.0,
@@ -998,12 +1006,16 @@ def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
9981006

9991007
def _bg() -> None:
10001008
try:
1009+
# search_type="text" mirrors prefetch() — see note there. The
1010+
# background path can afford a longer timeout but the
1011+
# async-vector-indexing race still applies.
10011012
raw = self._actor.call( # type: ignore[union-attr]
10021013
"search_notes",
10031014
{
10041015
"project": self._project,
10051016
"query": query,
10061017
"page_size": 5,
1018+
"search_type": "text",
10071019
"output_format": "json",
10081020
},
10091021
timeout=10.0,

tests/test_integration.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -324,12 +324,14 @@ def test_prefetch_against_real_bm(provider, bm):
324324
"folder": "tests",
325325
})
326326

327-
# BM may need time to index the new note. prefetch's own actor.call
328-
# times out at 3.0s per attempt; on a cold CI runner (especially with
329-
# onnxruntime startup), indexing latency plus a few retries can push
330-
# well past the previous 5s budget. 30s gives ~10 attempts and absorbs
331-
# the worst observed cold-start delays.
332-
deadline = time.monotonic() + 30.0
327+
# BM's FTS index is updated synchronously inside the write_note API
328+
# path (knowledge_router.py:272), so this loop is really only smoothing
329+
# over the round-trip cost of a few RPCs on a slow runner. prefetch
330+
# explicitly requests search_type="text" so we don't get pulled onto
331+
# BM's hybrid path, where vector indexing is async and would race the
332+
# search.
333+
budget_secs = 10.0
334+
deadline = time.monotonic() + budget_secs
333335
out = ""
334336
attempts = 0
335337
while time.monotonic() < deadline:
@@ -341,7 +343,7 @@ def test_prefetch_against_real_bm(provider, bm):
341343

342344
assert out, (
343345
f"prefetch returned nothing after {attempts} attempt(s) over "
344-
f"{30.0}s; provider._failure_count={provider._failure_count}, "
346+
f"{budget_secs}s; provider._failure_count={provider._failure_count}, "
345347
f"circuit_open={provider._is_circuit_open()}. "
346348
f"Either BM didn't index the note in time or prefetch's actor.call "
347349
f"is timing out internally."

tests/test_prefetch.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ def test_prefetch_calls_search_when_cache_empty(bm):
4242
assert bm_args["query"] == "hello world"
4343
assert bm_args["page_size"] == 5
4444
assert bm_args["output_format"] == "json"
45+
# Pin search_type=text so BM doesn't fall into the hybrid+async-vector
46+
# path on the prefetch hot path. See prefetch() comment for rationale.
47+
assert bm_args["search_type"] == "text"
4548

4649

4750
def test_prefetch_returns_empty_when_uninitialized(bm):

0 commit comments

Comments
 (0)