@@ -147,21 +147,36 @@ async function embedWithLocal(t: string, s: string): Promise<number[]> {
147147}
148148
149149const hash = ( v : string ) => {
150- let h = 0x811c9dc5
151- for ( let i = 0 ; i < v . length ; i ++ ) h = Math . imul ( h ^ v . charCodeAt ( i ) , 16777619 )
152- return h >>> 0
150+ let h = 0x811c9dc5 | 0 ;
151+ const len = v . length | 0 ;
152+ for ( let i = 0 ; i < len ; i ++ ) {
153+ h = Math . imul ( h ^ v . charCodeAt ( i ) , 16777619 ) ;
154+ }
155+ return h >>> 0 ;
153156}
154157
155158const addFeat = ( vec : Float32Array , dim : number , key : string , w : number ) => {
156- const h = hash ( key )
157- vec [ h % dim ] += w * ( ( h & 1 ) ? - 1 : 1 )
159+ const h = hash ( key ) ;
160+ const value = w * ( 1 - ( ( h & 1 ) << 1 ) ) ;
161+ if ( ( dim > 0 ) && ( dim & ( dim - 1 ) ) === 0 ) {
162+ vec [ h & ( dim - 1 ) ] += value ;
163+ } else {
164+ vec [ h % dim ] += value ;
165+ }
158166}
159167
160168const norm = ( vec : Float32Array ) => {
161- let n = 0
162- for ( let i = 0 ; i < vec . length ; i ++ ) n += vec [ i ] * vec [ i ]
163- n = Math . sqrt ( n )
164- if ( n ) for ( let i = 0 ; i < vec . length ; i ++ ) vec [ i ] /= n
169+ let n = 0 ;
170+ const len = vec . length ;
171+ for ( let i = 0 ; i < len ; i ++ ) {
172+ const v = vec [ i ] ;
173+ n += v * v ;
174+ }
175+ if ( n === 0 ) return ;
176+ const invSqrt = 1 / Math . sqrt ( n ) ;
177+ for ( let i = 0 ; i < len ; i ++ ) {
178+ vec [ i ] *= invSqrt ;
179+ }
165180}
166181
167182function generateSyntheticEmbedding ( t : string , s : string ) : number [ ] {
@@ -174,7 +189,11 @@ function generateSyntheticEmbedding(t: string, s: string): number[] {
174189 }
175190 const et = Array . from ( addSynonymTokens ( ct ) )
176191 const tc = new Map < string , number > ( )
177- et . forEach ( tok => tc . set ( tok , ( tc . get ( tok ) || 0 ) + 1 ) )
192+ const etLength : number = et . length ;
193+ for ( let i = 0 ; i < etLength ; i ++ ) {
194+ const tok = et [ i ] ;
195+ tc . set ( tok , ( tc . get ( tok ) || 0 ) + 1 )
196+ }
178197
179198 for ( const [ tok , c ] of tc ) {
180199 const w = Math . log ( 1 + c ) + 1
@@ -196,6 +215,7 @@ const resizeVector = (v: number[], t: number) => {
196215 if ( v . length > t ) return v . slice ( 0 , t )
197216 return [ ...v , ...Array ( t - v . length ) . fill ( 0 ) ]
198217}
218+
199219export async function embedMultiSector ( id : string , text : string , sectors : string [ ] , chunks ?: Array < { text : string } > ) : Promise < EmbeddingResult [ ] > {
200220 const r : EmbeddingResult [ ] = [ ]
201221 await q . ins_log . run ( id , 'multi-sector' , 'pending' , Date . now ( ) , null )
0 commit comments