Skip to content

Commit ef96823

Browse files
committed
v0.7.8
- Optimized the UI display effects of some components - Added model list deduplication - Added Markdown rendering to the Btw panel - Codebase added Rerank selection - Added retry mechanism for compression failure
1 parent bb73fef commit ef96823

12 files changed

Lines changed: 1139 additions & 212 deletions

File tree

.github/workflows/publish.yml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,11 @@ jobs:
5656
5757
### What's New
5858
59-
- Add batch processing function to tools like TODO
60-
- Optimize part of UI display to avoid icon display anomalies and terminal jitter
61-
- Update Gemini thinking strength parameters
62-
- Persistent context information
63-
- Enhance robustness of `/btw` command
64-
- Remove outdated API setting parameters
59+
- Optimized the UI display effects of some components
60+
- Added model list deduplication
61+
- Added Markdown rendering to the Btw panel
62+
- Codebase added Rerank selection
63+
- Added retry mechanism for compression failure
6564
6665
### Installation
6766
```bash

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "snow-ai",
3-
"version": "0.7.7",
3+
"version": "0.7.8",
44
"description": "Agentic coding in your terminal",
55
"license": "MIT",
66
"bin": {

source/api/rerank.ts

Lines changed: 358 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,358 @@
1+
import {loadCodebaseConfig} from '../utils/config/codebaseConfig.js';
2+
import {logger} from '../utils/core/logger.js';
3+
import {addProxyToFetchOptions} from '../utils/core/proxyUtils.js';
4+
import {getVersionHeader} from '../utils/core/version.js';
5+
6+
export interface RerankOptions {
7+
model?: string;
8+
query: string;
9+
documents: string[];
10+
topN?: number;
11+
baseUrl?: string;
12+
apiKey?: string;
13+
contextLength?: number;
14+
}
15+
16+
export interface RerankResult {
17+
index: number;
18+
relevanceScore: number;
19+
}
20+
21+
export interface RerankResponse {
22+
results: RerankResult[];
23+
droppedDocuments?: number;
24+
truncatedDocuments?: number;
25+
}
26+
27+
const MAX_RETRIES = 3;
28+
const RETRY_BASE_DELAY_MS = 500;
29+
const CONTEXT_RESERVE_RATIO = 0.95;
30+
const SINGLE_DOC_MAX_RATIO = 0.3;
31+
32+
/**
33+
* Count tokens using tiktoken. Falls back to char-based estimation.
34+
*/
35+
async function countTokens(text: string): Promise<number> {
36+
try {
37+
const {encoding_for_model} = await import('tiktoken');
38+
let encoder;
39+
try {
40+
encoder = encoding_for_model('gpt-5');
41+
} catch {
42+
encoder = encoding_for_model('gpt-3.5-turbo');
43+
}
44+
try {
45+
return encoder.encode(text).length;
46+
} finally {
47+
encoder.free();
48+
}
49+
} catch {
50+
return Math.ceil(text.length / 4);
51+
}
52+
}
53+
54+
/**
55+
* Truncate text to fit within a token budget.
56+
*/
57+
async function truncateText(
58+
text: string,
59+
maxTokens: number,
60+
): Promise<string> {
61+
try {
62+
const {encoding_for_model} = await import('tiktoken');
63+
let encoder;
64+
try {
65+
encoder = encoding_for_model('gpt-5');
66+
} catch {
67+
encoder = encoding_for_model('gpt-3.5-turbo');
68+
}
69+
try {
70+
const tokens = encoder.encode(text);
71+
if (tokens.length <= maxTokens) {
72+
return text;
73+
}
74+
const truncated = tokens.slice(0, maxTokens);
75+
const decoder = new TextDecoder();
76+
return decoder.decode(encoder.decode(truncated));
77+
} finally {
78+
encoder.free();
79+
}
80+
} catch {
81+
const maxChars = maxTokens * 4;
82+
return text.length <= maxChars ? text : text.slice(0, maxChars);
83+
}
84+
}
85+
86+
interface FitResult {
87+
documents: string[];
88+
/** Original indices that survived (maps new index → original index) */
89+
originalIndices: number[];
90+
droppedCount: number;
91+
truncatedCount: number;
92+
}
93+
94+
/**
95+
* Fit documents into the rerank model's context window.
96+
*
97+
* Strategy:
98+
* 1. Reserve tokens for query + request overhead
99+
* 2. Walk documents in order; accumulate until budget exhausted
100+
* 3. If a single document exceeds 30% of context, truncate it
101+
* 4. Drop documents that no longer fit
102+
*/
103+
async function fitDocumentsToContext(
104+
query: string,
105+
documents: string[],
106+
contextLength: number,
107+
): Promise<FitResult> {
108+
const budgetTotal = Math.floor(contextLength * CONTEXT_RESERVE_RATIO);
109+
const queryTokens = await countTokens(query);
110+
const overhead = 50;
111+
let remaining = budgetTotal - queryTokens - overhead;
112+
113+
if (remaining <= 0) {
114+
logger.warn(
115+
`Rerank context budget exhausted by query alone (${queryTokens} tokens, budget ${budgetTotal})`,
116+
);
117+
return {
118+
documents: [],
119+
originalIndices: [],
120+
droppedCount: documents.length,
121+
truncatedCount: 0,
122+
};
123+
}
124+
125+
const singleDocMax = Math.floor(contextLength * SINGLE_DOC_MAX_RATIO);
126+
const fitted: string[] = [];
127+
const originalIndices: number[] = [];
128+
let droppedCount = 0;
129+
let truncatedCount = 0;
130+
131+
for (let i = 0; i < documents.length; i++) {
132+
const doc = documents[i]!;
133+
let docTokens = await countTokens(doc);
134+
135+
if (docTokens > singleDocMax) {
136+
const truncatedDoc = await truncateText(doc, singleDocMax);
137+
docTokens = await countTokens(truncatedDoc);
138+
truncatedCount++;
139+
140+
if (docTokens <= remaining) {
141+
fitted.push(truncatedDoc);
142+
originalIndices.push(i);
143+
remaining -= docTokens;
144+
} else {
145+
droppedCount++;
146+
}
147+
continue;
148+
}
149+
150+
if (docTokens <= remaining) {
151+
fitted.push(doc);
152+
originalIndices.push(i);
153+
remaining -= docTokens;
154+
} else {
155+
droppedCount++;
156+
}
157+
}
158+
159+
if (droppedCount > 0 || truncatedCount > 0) {
160+
logger.info(
161+
`Rerank context fitting: ${documents.length} docs → ${fitted.length} kept, ${truncatedCount} truncated, ${droppedCount} dropped (context ${contextLength} tokens)`,
162+
);
163+
}
164+
165+
return {documents: fitted, originalIndices, droppedCount, truncatedCount};
166+
}
167+
168+
function resolveRerankEndpoint(baseUrl: string): string {
169+
const trimmed = baseUrl.trim().replace(/\/+$/, '');
170+
171+
if (trimmed.endsWith('/rerank')) {
172+
return trimmed;
173+
}
174+
if (trimmed.endsWith('/v1/rerank')) {
175+
return trimmed;
176+
}
177+
if (trimmed.endsWith('/v1')) {
178+
return `${trimmed}/rerank`;
179+
}
180+
return `${trimmed}/v1/rerank`;
181+
}
182+
183+
/**
184+
* Normalize various rerank API response formats into a unified structure.
185+
* Supports Jina, Cohere, and OpenAI-compatible rerank responses.
186+
*/
187+
function normalizeRerankResponse(data: any): RerankResponse {
188+
if (data && Array.isArray(data.results)) {
189+
return {
190+
results: data.results.map((r: any) => ({
191+
index: r.index ?? 0,
192+
relevanceScore: r.relevance_score ?? r.relevanceScore ?? 0,
193+
})),
194+
};
195+
}
196+
if (Array.isArray(data)) {
197+
return {
198+
results: data.map((r: any) => ({
199+
index: r.index ?? 0,
200+
relevanceScore: r.relevance_score ?? r.relevanceScore ?? r.score ?? 0,
201+
})),
202+
};
203+
}
204+
throw new Error(
205+
`Unexpected rerank API response format: ${JSON.stringify(data).slice(0, 200)}`,
206+
);
207+
}
208+
209+
async function callRerankAPI(options: {
210+
url: string;
211+
model: string;
212+
query: string;
213+
documents: string[];
214+
topN?: number;
215+
apiKey?: string;
216+
}): Promise<RerankResponse> {
217+
const {url, model, query, documents, topN, apiKey} = options;
218+
219+
const requestBody: Record<string, unknown> = {
220+
model,
221+
query,
222+
documents,
223+
};
224+
if (topN !== undefined) {
225+
requestBody['top_n'] = topN;
226+
}
227+
228+
const headers: Record<string, string> = {
229+
'Content-Type': 'application/json',
230+
'x-snow': getVersionHeader(),
231+
};
232+
if (apiKey) {
233+
headers['Authorization'] = `Bearer ${apiKey}`;
234+
}
235+
236+
const fetchOptions = addProxyToFetchOptions(url, {
237+
method: 'POST',
238+
headers,
239+
body: JSON.stringify(requestBody),
240+
});
241+
242+
const response = await fetch(url, fetchOptions);
243+
244+
if (!response.ok) {
245+
const errorText = await response.text();
246+
throw new Error(`Rerank API error (${response.status}): ${errorText}`);
247+
}
248+
249+
const data = await response.json();
250+
return normalizeRerankResponse(data);
251+
}
252+
253+
/**
254+
* Rerank documents against a query with automatic retry.
255+
*
256+
* Before calling the API, documents are fitted into the model's context window
257+
* (configured via `reranking.contextLength`). Documents that exceed the budget
258+
* are truncated or dropped, and the response maps indices back to the original
259+
* document array so callers can match results correctly.
260+
*
261+
* @returns Sorted results with relevance scores (indices refer to the original documents array).
262+
* If topN >= documents.length, all documents are returned (full ranking).
263+
*/
264+
export async function rerankDocuments(
265+
options: RerankOptions,
266+
): Promise<RerankResponse> {
267+
const config = loadCodebaseConfig();
268+
const rerankingConfig = config.reranking;
269+
270+
const model = options.model || rerankingConfig.modelName;
271+
const baseUrl = options.baseUrl || rerankingConfig.baseUrl;
272+
const apiKey = options.apiKey || rerankingConfig.apiKey;
273+
const topN = options.topN ?? rerankingConfig.topN;
274+
const contextLength =
275+
options.contextLength ?? rerankingConfig.contextLength;
276+
const {query, documents} = options;
277+
278+
if (!model) {
279+
throw new Error('Reranking model name is required');
280+
}
281+
if (!baseUrl) {
282+
throw new Error('Reranking base URL is required');
283+
}
284+
if (!documents || documents.length === 0) {
285+
throw new Error('Documents are required for reranking');
286+
}
287+
288+
// ── Context length protection ──
289+
const fitResult = await fitDocumentsToContext(
290+
query,
291+
documents,
292+
contextLength,
293+
);
294+
295+
if (fitResult.documents.length === 0) {
296+
logger.warn(
297+
'All documents dropped during context fitting, returning empty results',
298+
);
299+
return {
300+
results: [],
301+
droppedDocuments: fitResult.droppedCount,
302+
truncatedDocuments: fitResult.truncatedCount,
303+
};
304+
}
305+
306+
const url = resolveRerankEndpoint(baseUrl);
307+
const effectiveTopN =
308+
topN >= fitResult.documents.length ? undefined : topN;
309+
310+
let lastError: Error | null = null;
311+
312+
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
313+
try {
314+
logger.info(
315+
`Rerank API call attempt ${attempt}/${MAX_RETRIES} (${fitResult.documents.length}/${documents.length} docs, context ${contextLength})`,
316+
);
317+
318+
const response = await callRerankAPI({
319+
url,
320+
model,
321+
query,
322+
documents: fitResult.documents,
323+
topN: effectiveTopN,
324+
apiKey,
325+
});
326+
327+
// Map fitted indices back to original document indices
328+
const mappedResults: RerankResult[] = response.results.map(r => ({
329+
index: fitResult.originalIndices[r.index] ?? r.index,
330+
relevanceScore: r.relevanceScore,
331+
}));
332+
333+
logger.info(
334+
`Rerank API succeeded on attempt ${attempt}, got ${mappedResults.length} results`,
335+
);
336+
337+
return {
338+
results: mappedResults,
339+
droppedDocuments: fitResult.droppedCount,
340+
truncatedDocuments: fitResult.truncatedCount,
341+
};
342+
} catch (error) {
343+
lastError = error instanceof Error ? error : new Error(String(error));
344+
logger.warn(
345+
`Rerank API attempt ${attempt}/${MAX_RETRIES} failed: ${lastError.message}`,
346+
);
347+
348+
if (attempt < MAX_RETRIES) {
349+
const delay = RETRY_BASE_DELAY_MS * attempt;
350+
await new Promise(resolve => setTimeout(resolve, delay));
351+
}
352+
}
353+
}
354+
355+
throw new Error(
356+
`Rerank API failed after ${MAX_RETRIES} attempts: ${lastError?.message}`,
357+
);
358+
}

0 commit comments

Comments
 (0)