Skip to content

Commit 717c784

Browse files
committed
fix(semantic-search): truncate ai search documents by bytes (greptile)
1 parent fa827c1 commit 717c784

1 file changed

Lines changed: 7 additions & 12 deletions

File tree

packages/plugins/semantic-search/src/sdk/documents.ts

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ const ADDRESS_PREFIX = "tools.";
99
const MAX_AI_SEARCH_FILE_BYTES = 3_500_000;
1010

1111
const textEncoder = new TextEncoder();
12+
const textDecoder = new TextDecoder();
1213

1314
export interface IndexableToolDescriptor {
1415
readonly address: Tool["address"] | string;
@@ -162,18 +163,12 @@ export const listToolManifests = (
162163
);
163164

164165
const truncateToAiSearchLimit = (document: string): string => {
165-
if (textEncoder.encode(document).byteLength <= MAX_AI_SEARCH_FILE_BYTES) return document;
166-
let low = 0;
167-
let high = document.length;
168-
while (low < high) {
169-
const mid = Math.floor((low + high + 1) / 2);
170-
if (textEncoder.encode(document.slice(0, mid)).byteLength <= MAX_AI_SEARCH_FILE_BYTES) {
171-
low = mid;
172-
} else {
173-
high = mid - 1;
174-
}
175-
}
176-
return document.slice(0, low);
166+
const bytes = textEncoder.encode(document);
167+
if (bytes.byteLength <= MAX_AI_SEARCH_FILE_BYTES) return document;
168+
169+
let end = MAX_AI_SEARCH_FILE_BYTES;
170+
while (end > 0 && (bytes[end] & 0xc0) === 0x80) end -= 1;
171+
return textDecoder.decode(bytes.subarray(0, end));
177172
};
178173

179174
export const toolItemKey = (manifest: ToolSchemaManifest): string =>

0 commit comments

Comments
 (0)