@@ -414,22 +414,20 @@ export async function POST(request: NextRequest) {
414414 }
415415 }
416416
417- // Normalize scores globally across all results before ranking.
418- // Per-provider normalization would inflate a poor single-provider result
419- // to an artificially high rank when merging across embedding spaces.
417+ // When mixing results from different embedding spaces (OpenAI + Ollama), raw
418+ // cosine distances are not directly comparable. Normalize to [0, 1] only in
419+ // that case so existing consumers of single-provider similarity scores are
420+ // unaffected.
421+ const isMixedProviders = openaiKbIds . length > 0 && ollamaKbIds . length > 0
420422 const normalizeScores = ( items : SearchResult [ ] ) : SearchResult [ ] => {
421- if ( items . length === 0 ) return items
422- // Single result: clamp raw distance to [0,1] to preserve quality signal.
423- // Forcing distance=0 would give a poor single result the best possible rank.
424- if ( items . length === 1 )
425- return [ { ...items [ 0 ] , distance : Math . min ( 1 , Math . max ( 0 , items [ 0 ] . distance ) ) } ]
423+ if ( items . length <= 1 ) return items
426424 const min = Math . min ( ...items . map ( ( r ) => r . distance ) )
427425 const max = Math . max ( ...items . map ( ( r ) => r . distance ) )
428426 const range = max - min || 1
429427 return items . map ( ( r ) => ( { ...r , distance : ( r . distance - min ) / range } ) )
430428 }
431429
432- const results : SearchResult [ ] = normalizeScores ( allResults )
430+ const results : SearchResult [ ] = ( isMixedProviders ? normalizeScores ( allResults ) : allResults )
433431 . sort ( ( a , b ) => a . distance - b . distance )
434432 . slice ( 0 , validatedData . topK )
435433
0 commit comments