Skip to content

Commit aab0bca

Browse files
committed
2 parents d3ca6d0 + 4de3bb4 commit aab0bca

2 files changed

Lines changed: 36 additions & 16 deletions

File tree

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -363,17 +363,17 @@ make test
363363
</a>
364364
</td>
365365
<td align="center">
366-
<a href="https://github.com/recabasic">
367-
<img src="https://avatars.githubusercontent.com/u/102372274?v=4" width="100;" alt="recabasic"/>
366+
<a href="https://github.com/DoKoB0512">
367+
<img src="https://avatars.githubusercontent.com/u/123281216?v=4" width="100;" alt="DoKoB0512"/>
368368
<br />
369-
<sub><b>Elvoro</b></sub>
369+
<sub><b>DoKoB0512</b></sub>
370370
</a>
371371
</td>
372372
<td align="center">
373-
<a href="https://github.com/DoKoB0512">
374-
<img src="https://avatars.githubusercontent.com/u/123281216?v=4" width="100;" alt="DoKoB0512"/>
373+
<a href="https://github.com/recabasic">
374+
<img src="https://avatars.githubusercontent.com/u/102372274?v=4" width="100;" alt="recabasic"/>
375375
<br />
376-
<sub><b>DoKoB0512</b></sub>
376+
<sub><b>Elvoro</b></sub>
377377
</a>
378378
</td>
379379
<td align="center">

backend/src/embedding/index.ts

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -147,21 +147,36 @@ async function embedWithLocal(t: string, s: string): Promise<number[]> {
147147
}
148148

149149
const hash = (v: string) => {
150-
let h = 0x811c9dc5
151-
for (let i = 0; i < v.length; i++) h = Math.imul(h ^ v.charCodeAt(i), 16777619)
152-
return h >>> 0
150+
let h = 0x811c9dc5 | 0;
151+
const len = v.length | 0;
152+
for (let i = 0; i < len; i++) {
153+
h = Math.imul(h ^ v.charCodeAt(i), 16777619);
154+
}
155+
return h >>> 0;
153156
}
154157

155158
const addFeat = (vec: Float32Array, dim: number, key: string, w: number) => {
156-
const h = hash(key)
157-
vec[h % dim] += w * ((h & 1) ? -1 : 1)
159+
const h = hash(key);
160+
const value = w * (1 - ((h & 1) << 1));
161+
if ((dim > 0) && (dim & (dim - 1)) === 0) {
162+
vec[h & (dim - 1)] += value;
163+
} else {
164+
vec[h % dim] += value;
165+
}
158166
}
159167

160168
const norm = (vec: Float32Array) => {
161-
let n = 0
162-
for (let i = 0; i < vec.length; i++) n += vec[i] * vec[i]
163-
n = Math.sqrt(n)
164-
if (n) for (let i = 0; i < vec.length; i++) vec[i] /= n
169+
let n = 0;
170+
const len = vec.length;
171+
for (let i = 0; i < len; i++) {
172+
const v = vec[i];
173+
n += v * v;
174+
}
175+
if (n === 0) return;
176+
const invSqrt = 1 / Math.sqrt(n);
177+
for (let i = 0; i < len; i++) {
178+
vec[i] *= invSqrt;
179+
}
165180
}
166181

167182
function generateSyntheticEmbedding(t: string, s: string): number[] {
@@ -174,7 +189,11 @@ function generateSyntheticEmbedding(t: string, s: string): number[] {
174189
}
175190
const et = Array.from(addSynonymTokens(ct))
176191
const tc = new Map<string, number>()
177-
et.forEach(tok => tc.set(tok, (tc.get(tok) || 0) + 1))
192+
const etLength: number = et.length;
193+
for (let i = 0; i < etLength; i++) {
194+
const tok = et[i];
195+
tc.set(tok, (tc.get(tok) || 0) + 1)
196+
}
178197

179198
for (const [tok, c] of tc) {
180199
const w = Math.log(1 + c) + 1
@@ -196,6 +215,7 @@ const resizeVector = (v: number[], t: number) => {
196215
if (v.length > t) return v.slice(0, t)
197216
return [...v, ...Array(t - v.length).fill(0)]
198217
}
218+
199219
export async function embedMultiSector(id: string, text: string, sectors: string[], chunks?: Array<{ text: string }>): Promise<EmbeddingResult[]> {
200220
const r: EmbeddingResult[] = []
201221
await q.ins_log.run(id, 'multi-sector', 'pending', Date.now(), null)

0 commit comments

Comments
 (0)