@@ -358,7 +358,9 @@ export async function generateEmbeddings(
358358 // Use contextLength as the max character count (assumes worst case ~1 char per token)
359359 const maxChars = contextLength
360360
361- // Truncate any chunks that exceed the context length, then batch by total character count
361+ // Truncate individual chunks that exceed the model's context length.
362+ // Ollama processes each input independently, so only per-input length matters.
363+ const OLLAMA_BATCH_SIZE = 100
362364 const prepared : string [ ] = texts . map ( ( text , i ) => {
363365 if ( text . length > maxChars ) {
364366 const lastSentenceEnd = text . lastIndexOf ( '. ' , maxChars )
@@ -372,25 +374,15 @@ export async function generateEmbeddings(
372374 return text
373375 } )
374376
375- // Smart batching: group chunks so total characters per batch stays within maxChars
377+ // Batch by item count — Ollama applies context limits per individual input,
378+ // not across the whole batch, so cumulative character batching is unnecessary.
376379 const batches : string [ ] [ ] = [ ]
377- let currentBatch : string [ ] = [ ]
378- let currentBatchChars = 0
379- for ( const text of prepared ) {
380- if ( currentBatch . length > 0 && currentBatchChars + text . length > maxChars ) {
381- batches . push ( currentBatch )
382- currentBatch = [ ]
383- currentBatchChars = 0
384- }
385- currentBatch . push ( text )
386- currentBatchChars += text . length
387- }
388- if ( currentBatch . length > 0 ) {
389- batches . push ( currentBatch )
380+ for ( let i = 0 ; i < prepared . length ; i += OLLAMA_BATCH_SIZE ) {
381+ batches . push ( prepared . slice ( i , i + OLLAMA_BATCH_SIZE ) )
390382 }
391383
392384 logger . info (
393- `[Ollama] Processing ${ prepared . length } chunks in ${ batches . length } batches (maxChars =${ maxChars } )`
385+ `[Ollama] Processing ${ prepared . length } chunks in ${ batches . length } batches (batchSize =${ OLLAMA_BATCH_SIZE } )`
394386 )
395387
396388 // Process each batch with retry logic
0 commit comments