@@ -147,21 +147,41 @@ async function embedWithLocal(t: string, s: string): Promise<number[]> {
147147}
148148
149149const hash = ( v : string ) => {
150- let h = 0x811c9dc5
151- for ( let i = 0 ; i < v . length ; i ++ ) h = Math . imul ( h ^ v . charCodeAt ( i ) , 16777619 )
152- return h >>> 0
150+ let h = 0x811c9dc5 | 0 ;
151+ const len = v . length | 0 ;
152+ for ( let i = 0 ; i < len ; i ++ ) {
153+ h = Math . imul ( h ^ v . charCodeAt ( i ) , 16777619 ) ;
154+ }
155+ return h >>> 0 ;
153156}
154157
155158const addFeat = ( vec : Float32Array , dim : number , key : string , w : number ) => {
156- const h = hash ( key )
157- vec [ h % dim ] += w * ( ( h & 1 ) ? - 1 : 1 )
159+ const h = hash ( key ) ;
160+ const value = w * ( 1 - ( ( h & 1 ) << 1 ) ) ;
161+
162+ // The core optimization: Check if dim is a power of two.
163+ // This check is extremely fast and allows V8's JIT to optimize heavily.
164+ if ( ( dim > 0 ) && ( dim & ( dim - 1 ) ) === 0 ) {
165+ // FAST PATH: dim is a power of two. Use bitwise AND.
166+ vec [ h & ( dim - 1 ) ] += value ;
167+ } else {
168+ // SLOW PATH: Use modulo.
169+ vec [ h % dim ] += value ;
170+ }
158171}
159172
160173const norm = ( vec : Float32Array ) => {
161- let n = 0
162- for ( let i = 0 ; i < vec . length ; i ++ ) n += vec [ i ] * vec [ i ]
163- n = Math . sqrt ( n )
164- if ( n ) for ( let i = 0 ; i < vec . length ; i ++ ) vec [ i ] /= n
174+ let n = 0 ;
175+ const len = vec . length ;
176+ for ( let i = 0 ; i < len ; i ++ ) {
177+ const v = vec [ i ] ;
178+ n += v * v ;
179+ }
180+ if ( n === 0 ) return ;
181+ const invSqrt = 1 / Math . sqrt ( n ) ;
182+ for ( let i = 0 ; i < len ; i ++ ) {
183+ vec [ i ] *= invSqrt ;
184+ }
165185}
166186
167187function generateSyntheticEmbedding ( t : string , s : string ) : number [ ] {
@@ -174,7 +194,11 @@ function generateSyntheticEmbedding(t: string, s: string): number[] {
174194 }
175195 const et = Array . from ( addSynonymTokens ( ct ) )
176196 const tc = new Map < string , number > ( )
177- et . forEach ( tok => tc . set ( tok , ( tc . get ( tok ) || 0 ) + 1 ) )
197+ const etLength : number = et . length ;
198+ for ( let i = 0 ; i < etLength ; i ++ ) {
199+ const tok = et [ i ] ;
200+ tc . set ( tok , ( tc . get ( tok ) || 0 ) + 1 )
201+ }
178202
179203 for ( const [ tok , c ] of tc ) {
180204 const w = Math . log ( 1 + c ) + 1
@@ -196,6 +220,7 @@ const resizeVector = (v: number[], t: number) => {
196220 if ( v . length > t ) return v . slice ( 0 , t )
197221 return [ ...v , ...Array ( t - v . length ) . fill ( 0 ) ]
198222}
223+
199224export async function embedMultiSector ( id : string , text : string , sectors : string [ ] , chunks ?: Array < { text : string } > ) : Promise < EmbeddingResult [ ] > {
200225 const r : EmbeddingResult [ ] = [ ]
201226 await q . ins_log . run ( id , 'multi-sector' , 'pending' , Date . now ( ) , null )
0 commit comments