Skip to content

Commit 9fa3cbd

Browse files
committed
perf(embedding): Optimize hash, addFeat and norm function
1 parent 6ac6cd4 commit 9fa3cbd

1 file changed

Lines changed: 35 additions & 10 deletions

File tree

backend/src/embedding/index.ts

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -147,21 +147,41 @@ async function embedWithLocal(t: string, s: string): Promise<number[]> {
147147
}
148148

149149
const hash = (v: string) => {
150-
let h = 0x811c9dc5
151-
for (let i = 0; i < v.length; i++) h = Math.imul(h ^ v.charCodeAt(i), 16777619)
152-
return h >>> 0
150+
let h = 0x811c9dc5 | 0;
151+
const len = v.length | 0;
152+
for (let i = 0; i < len; i++) {
153+
h = Math.imul(h ^ v.charCodeAt(i), 16777619);
154+
}
155+
return h >>> 0;
153156
}
154157

155158
const addFeat = (vec: Float32Array, dim: number, key: string, w: number) => {
156-
const h = hash(key)
157-
vec[h % dim] += w * ((h & 1) ? -1 : 1)
159+
const h = hash(key);
160+
const value = w * (1 - ((h & 1) << 1));
161+
162+
// The core optimization: Check if dim is a power of two.
163+
// This check is extremely fast and allows V8's JIT to optimize heavily.
164+
if ((dim > 0) && (dim & (dim - 1)) === 0) {
165+
// FAST PATH: dim is a power of two. Use bitwise AND.
166+
vec[h & (dim - 1)] += value;
167+
} else {
168+
// SLOW PATH: Use modulo.
169+
vec[h % dim] += value;
170+
}
158171
}
159172

160173
const norm = (vec: Float32Array) => {
161-
let n = 0
162-
for (let i = 0; i < vec.length; i++) n += vec[i] * vec[i]
163-
n = Math.sqrt(n)
164-
if (n) for (let i = 0; i < vec.length; i++) vec[i] /= n
174+
let n = 0;
175+
const len = vec.length;
176+
for (let i = 0; i < len; i++) {
177+
const v = vec[i];
178+
n += v * v;
179+
}
180+
if (n === 0) return;
181+
const invSqrt = 1 / Math.sqrt(n);
182+
for (let i = 0; i < len; i++) {
183+
vec[i] *= invSqrt;
184+
}
165185
}
166186

167187
function generateSyntheticEmbedding(t: string, s: string): number[] {
@@ -174,7 +194,11 @@ function generateSyntheticEmbedding(t: string, s: string): number[] {
174194
}
175195
const et = Array.from(addSynonymTokens(ct))
176196
const tc = new Map<string, number>()
177-
et.forEach(tok => tc.set(tok, (tc.get(tok) || 0) + 1))
197+
const etLength: number = et.length;
198+
for (let i = 0; i < etLength; i++) {
199+
const tok = et[i];
200+
tc.set(tok, (tc.get(tok) || 0) + 1)
201+
}
178202

179203
for (const [tok, c] of tc) {
180204
const w = Math.log(1 + c) + 1
@@ -196,6 +220,7 @@ const resizeVector = (v: number[], t: number) => {
196220
if (v.length > t) return v.slice(0, t)
197221
return [...v, ...Array(t - v.length).fill(0)]
198222
}
223+
199224
export async function embedMultiSector(id: string, text: string, sectors: string[], chunks?: Array<{ text: string }>): Promise<EmbeddingResult[]> {
200225
const r: EmbeddingResult[] = []
201226
await q.ins_log.run(id, 'multi-sector', 'pending', Date.now(), null)

0 commit comments

Comments
 (0)