@@ -211,47 +211,88 @@ export class FileIndex {
211211
212212 const haystack = caseSensitive ? paths [ i ] ! : lowerPaths [ i ] !
213213
214- // Fused indexOf scan: find positions (SIMD-accelerated in JSC/V8) AND
215- // accumulate gap/consecutive terms inline. The greedy-earliest positions
216- // found here are identical to what the charCodeAt scorer would find, so
217- // we score directly from them — no second scan.
218- let pos = haystack . indexOf ( needleChars [ 0 ] ! )
219- if ( pos === - 1 ) continue
220- posBuf [ 0 ] = pos
221- let gapPenalty = 0
222- let consecBonus = 0
223- let prev = pos
224- for ( let j = 1 ; j < nLen ; j ++ ) {
225- pos = haystack . indexOf ( needleChars [ j ] ! , prev + 1 )
226- if ( pos === - 1 ) continue outer
227- posBuf [ j ] = pos
228- const gap = pos - prev - 1
229- if ( gap === 0 ) consecBonus += BONUS_CONSECUTIVE
230- else gapPenalty += PENALTY_GAP_START + gap * PENALTY_GAP_EXTENSION
231- prev = pos
232- }
233-
234- // Gap-bound reject: if the best-case score (all boundary bonuses) minus
235- // known gap penalties can't beat threshold, skip the boundary pass.
236- if (
237- topK . length === limit &&
238- scoreCeiling + consecBonus - gapPenalty <= threshold
239- ) {
240- continue
214+ // Greedy-leftmost indexOf gives fast but suboptimal positions when the
215+ // first needle char appears early (e.g. 's' in "src/") while the real
216+ // match lives deeper (e.g. "settings/"). We score from multiple start
217+ // positions — the leftmost hit plus every word-boundary occurrence of
218+ // needle[0] — and keep the best. Typical paths have 2–4 boundary starts,
219+ // so the overhead is minimal.
220+
221+ // Collect candidate start positions for needle[0]
222+ const firstChar = needleChars [ 0 ] !
223+ let startCount = 0
224+ // startPositions is stack-allocated (reused array would add complexity
225+ // for marginal gain; paths rarely have >8 boundary starts)
226+ const startPositions : number [ ] = [ ]
227+
228+ // Always try the leftmost occurrence
229+ const firstPos = haystack . indexOf ( firstChar )
230+ if ( firstPos === - 1 ) continue
231+ startPositions [ startCount ++ ] = firstPos
232+
233+ // Also try every word-boundary position where needle[0] occurs
234+ for ( let bp = firstPos + 1 ; bp < haystack . length ; bp ++ ) {
235+ if ( haystack . charCodeAt ( bp ) !== firstChar . charCodeAt ( 0 ) ) continue
236+ // Check if this position is at a word boundary
237+ const prevCode = haystack . charCodeAt ( bp - 1 )
238+ if (
239+ prevCode === 47 || // /
240+ prevCode === 92 || // \
241+ prevCode === 45 || // -
242+ prevCode === 95 || // _
243+ prevCode === 46 || // .
244+ prevCode === 32 // space
245+ ) {
246+ startPositions [ startCount ++ ] = bp
247+ }
241248 }
242249
243- // Boundary/camelCase scoring: check the char before each match position.
244- const path = paths [ i ] !
250+ const originalPath = paths [ i ] !
245251 const hLen = pathLens [ i ] !
246- let score = nLen * SCORE_MATCH + consecBonus - gapPenalty
247- score += scoreBonusAt ( path , posBuf [ 0 ] ! , true )
248- for ( let j = 1 ; j < nLen ; j ++ ) {
249- score += scoreBonusAt ( path , posBuf [ j ] ! , false )
252+ const lengthBonus = Math . max ( 0 , 32 - ( hLen >> 2 ) )
253+ let bestScore = - Infinity
254+
255+ for ( let si = 0 ; si < startCount ; si ++ ) {
256+ posBuf [ 0 ] = startPositions [ si ] !
257+ let gapPenalty = 0
258+ let consecBonus = 0
259+ let prev = posBuf [ 0 ] !
260+ let matched = true
261+ for ( let j = 1 ; j < nLen ; j ++ ) {
262+ const pos = haystack . indexOf ( needleChars [ j ] ! , prev + 1 )
263+ if ( pos === - 1 ) { matched = false ; break }
264+ posBuf [ j ] = pos
265+ const gap = pos - prev - 1
266+ if ( gap === 0 ) consecBonus += BONUS_CONSECUTIVE
267+ else gapPenalty += PENALTY_GAP_START + gap * PENALTY_GAP_EXTENSION
268+ prev = pos
269+ }
270+ if ( ! matched ) continue
271+
272+ // Gap-bound reject for this start position
273+ if (
274+ topK . length === limit &&
275+ scoreCeiling + consecBonus - gapPenalty + lengthBonus <= threshold
276+ ) {
277+ continue
278+ }
279+
280+ // Boundary/camelCase scoring
281+ let score = nLen * SCORE_MATCH + consecBonus - gapPenalty
282+ score += scoreBonusAt ( originalPath , posBuf [ 0 ] ! , true )
283+ for ( let j = 1 ; j < nLen ; j ++ ) {
284+ score += scoreBonusAt ( originalPath , posBuf [ j ] ! , false )
285+ }
286+ score += lengthBonus
287+
288+ if ( score > bestScore ) bestScore = score
250289 }
251- score += Math . max ( 0 , 32 - ( hLen >> 2 ) )
290+
291+ if ( bestScore === - Infinity ) continue
292+ const score = bestScore
252293
253294 if ( topK . length < limit ) {
254- topK . push ( { path, fuzzScore : score } )
295+ topK . push ( { path : originalPath , fuzzScore : score } )
255296 if ( topK . length === limit ) {
256297 topK . sort ( ( a , b ) => a . fuzzScore - b . fuzzScore )
257298 threshold = topK [ 0 ] ! . fuzzScore
@@ -264,7 +305,7 @@ export class FileIndex {
264305 if ( topK [ mid ] ! . fuzzScore < score ) lo = mid + 1
265306 else hi = mid
266307 }
267- topK . splice ( lo , 0 , { path, fuzzScore : score } )
308+ topK . splice ( lo , 0 , { path : originalPath , fuzzScore : score } )
268309 topK . shift ( )
269310 threshold = topK [ 0 ] ! . fuzzScore
270311 }
0 commit comments