Skip to content

Commit 0458be8

Browse files
committed
fix(search): durabilize verified search and reranker closure
1 parent cf31aca commit 0458be8

File tree

5 files changed

+419
-99
lines changed

5 files changed

+419
-99
lines changed

src/core/reranker.ts

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ interface CrossEncoderModel {
3434
let cachedTokenizer: CrossEncoderTokenizer | null = null;
3535
let cachedModel: CrossEncoderModel | null = null;
3636
let initPromise: Promise<void> | null = null;
37-
/** Set permanently after a non-recoverable load failure so subsequent calls fast-fail. */
38-
let initFailed = false;
3937

4038
/** Tracks reranker operational health for surfacing in search quality */
4139
export type RerankerStatus = 'active' | 'fallback' | 'unavailable';
@@ -46,10 +44,14 @@ export function getRerankerStatus(): RerankerStatus {
4644
return rerankerHealth;
4745
}
4846

47+
function resetLoadedState(): void {
48+
cachedTokenizer = null;
49+
cachedModel = null;
50+
initPromise = null;
51+
}
52+
4953
async function ensureModelLoaded(): Promise<void> {
5054
if (cachedModel && cachedTokenizer) return;
51-
// Fast-fail if a prior attempt already determined the model is unavailable.
52-
if (initFailed) throw new Error('[reranker] Model unavailable (prior load failed)');
5355
if (initPromise) return initPromise;
5456

5557
initPromise = (async () => {
@@ -76,13 +78,13 @@ async function ensureModelLoaded(): Promise<void> {
7678
console.error('[reranker] Cross-encoder loaded successfully');
7779
} catch (err) {
7880
const msg = err instanceof Error ? err.message : String(err);
79-
const isCorrupt =
80-
msg.includes('Protobuf') || msg.includes('parse') || msg.includes('corrupt');
81+
const isCorrupt = /protobuf|parse|corrupt/i.test(msg);
82+
resetLoadedState();
8183

8284
if (isCorrupt) {
83-
// Corrupted cache — clear it so next session re-downloads
85+
// Corrupted cache is recoverable in-session once the bad model files are removed.
8486
console.error(`[reranker] Cache corruption detected: ${msg}`);
85-
console.error('[reranker] Clearing corrupted cache. Next startup will re-download.');
87+
console.error('[reranker] Clearing corrupted cache. Next call will re-download.');
8688
try {
8789
const cacheDir = env.cacheDir ?? null;
8890
if (cacheDir) {
@@ -97,14 +99,11 @@ async function ensureModelLoaded(): Promise<void> {
9799
// Cache clear is best-effort
98100
}
99101
rerankerHealth = 'unavailable';
100-
// Permanent fail — corrupt cache can't be retried in this session.
101-
initFailed = true;
102102
throw err;
103103
}
104104

105105
// Transient error (network, timeout, etc.) — allow retry on next call.
106106
rerankerHealth = 'unavailable';
107-
initPromise = null;
108107
throw err;
109108
}
110109
})();

src/tools/types.ts

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -67,34 +67,70 @@ export interface SearchQuality {
6767
status: 'ok' | 'low_confidence';
6868
confidence: number | string;
6969
hint?: string;
70+
tokenEstimate?: number;
71+
warning?: string;
72+
rerankerStatus?: 'unavailable';
73+
}
74+
75+
export interface SearchResultRelationships {
76+
importedByCount?: number;
77+
hasTests?: boolean;
78+
}
79+
80+
export interface SearchResultHints {
81+
callers?: string[];
82+
tests?: string[];
83+
}
84+
85+
export interface SearchNextHop {
86+
tool: string;
87+
why: string;
88+
args?: Record<string, string>;
89+
}
90+
91+
export interface SearchBudget {
92+
mode: 'compact' | 'full';
93+
resultCount: number;
94+
}
95+
96+
export interface SearchLitePreflight extends Partial<Omit<DecisionCard, 'ready'>> {
97+
ready: boolean;
98+
reason?: string;
7099
}
71100

72101
export interface SearchResultItem {
73102
file: string; // "path:startLine-endLine"
74103
summary: string;
75104
score: number;
105+
relevanceReason?: string;
76106
type?: string; // "componentType:layer"
77107
trend?: 'Rising' | 'Declining';
78108
patternWarning?: string;
79-
relationships?: {
80-
importedByCount?: number;
81-
hasTests?: boolean;
82-
};
83-
hints?: {
84-
callers?: string[];
85-
tests?: string[];
86-
};
109+
relationships?: SearchResultRelationships;
110+
hints?: SearchResultHints;
111+
importedByCount?: number;
112+
topExports?: string[];
113+
layer?: string;
114+
symbol?: string;
115+
symbolKind?: string;
116+
scope?: string;
117+
signaturePreview?: string;
87118
imports?: string[];
88119
exports?: string[];
120+
complexity?: number;
89121
snippet?: string;
90122
}
91123

92124
export interface SearchResponse {
93125
status: string;
94126
searchQuality: SearchQuality;
95-
preflight?: DecisionCard;
127+
budget?: SearchBudget;
128+
preflight?: DecisionCard | SearchLitePreflight;
129+
patternSummary?: string;
130+
bestExample?: string;
131+
nextHops?: SearchNextHop[];
96132
results: SearchResultItem[];
97-
totalResults: number;
133+
totalResults?: number;
98134
relatedMemories?: string[];
99135
}
100136

tests/reranker-recovery.test.ts

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
2+
import { promises as fs } from 'fs';
3+
import os from 'os';
4+
import path from 'path';
5+
import type { SearchResult } from '../src/types/index.js';
6+
import { rmWithRetries } from './test-helpers.js';
7+
8+
const transformersMocks = vi.hoisted(() => ({
9+
tokenizerFromPretrained: vi.fn(),
10+
modelFromPretrained: vi.fn(),
11+
env: { cacheDir: null as string | null }
12+
}));
13+
14+
vi.mock('@huggingface/transformers', () => ({
15+
AutoTokenizer: {
16+
from_pretrained: transformersMocks.tokenizerFromPretrained
17+
},
18+
AutoModelForSequenceClassification: {
19+
from_pretrained: transformersMocks.modelFromPretrained
20+
},
21+
env: transformersMocks.env
22+
}));
23+
24+
function makeResult(score: number, filePath: string): SearchResult {
25+
return {
26+
summary: `Result from ${filePath}`,
27+
snippet: 'export class Foo {}',
28+
filePath,
29+
startLine: 1,
30+
endLine: 10,
31+
score,
32+
language: 'typescript',
33+
metadata: {}
34+
} as SearchResult;
35+
}
36+
37+
describe('reranker corruption recovery', () => {
38+
let tempCacheRoot: string | null = null;
39+
let consoleErrorSpy: ReturnType<typeof vi.spyOn>;
40+
41+
beforeEach(async () => {
42+
vi.resetModules();
43+
transformersMocks.tokenizerFromPretrained.mockReset();
44+
transformersMocks.modelFromPretrained.mockReset();
45+
tempCacheRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'reranker-cache-'));
46+
transformersMocks.env.cacheDir = tempCacheRoot;
47+
consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
48+
});
49+
50+
afterEach(async () => {
51+
consoleErrorSpy.mockRestore();
52+
transformersMocks.env.cacheDir = null;
53+
if (tempCacheRoot) {
54+
await rmWithRetries(tempCacheRoot);
55+
tempCacheRoot = null;
56+
}
57+
});
58+
59+
it('clears corrupt cache and retries on the next same-process call', async () => {
60+
if (!tempCacheRoot) throw new Error('tempCacheRoot not initialized');
61+
62+
const modelCacheDir = path.join(tempCacheRoot, 'Xenova', 'ms-marco-MiniLM-L-6-v2');
63+
await fs.mkdir(modelCacheDir, { recursive: true });
64+
await fs.writeFile(path.join(modelCacheDir, 'model.onnx'), 'corrupt');
65+
66+
const tokenizer = vi.fn((query: string, passage: string) => ({ query, passage }));
67+
const model = vi.fn(async (inputs: { passage: string }) => {
68+
if (inputs.passage.includes('/a.ts')) return { logits: { data: [1] } };
69+
if (inputs.passage.includes('/b.ts')) return { logits: { data: [3] } };
70+
return { logits: { data: [2] } };
71+
});
72+
73+
transformersMocks.tokenizerFromPretrained.mockResolvedValue(tokenizer);
74+
transformersMocks.modelFromPretrained
75+
.mockRejectedValueOnce(new Error('Protobuf parse failure while loading model'))
76+
.mockResolvedValueOnce(model);
77+
78+
const { rerank, getRerankerStatus } = await import('../src/core/reranker.js');
79+
const results = [
80+
makeResult(0.85, '/a.ts'),
81+
makeResult(0.83, '/b.ts'),
82+
makeResult(0.82, '/c.ts')
83+
];
84+
85+
await expect(rerank('auth token', results)).rejects.toThrow(/Protobuf|parse/i);
86+
await expect(fs.access(modelCacheDir)).rejects.toThrow();
87+
expect(getRerankerStatus()).toBe('unavailable');
88+
89+
const reranked = await rerank('auth token', results);
90+
expect(transformersMocks.tokenizerFromPretrained).toHaveBeenCalledTimes(2);
91+
expect(transformersMocks.modelFromPretrained).toHaveBeenCalledTimes(2);
92+
expect(reranked.map((result) => result.filePath)).toEqual(['/b.ts', '/c.ts', '/a.ts']);
93+
expect(getRerankerStatus()).toBe('active');
94+
});
95+
});

0 commit comments

Comments
 (0)