From d1ab96d03d93ce1a5d844baeff92520039f35829 Mon Sep 17 00:00:00 2001
From: Dmitriy Kovalenko <dmtr.kovalenko@outlook.com>
Date: Fri, 27 Mar 2026 22:57:39 +0000
Subject: [PATCH] chore: Update docs for - perf: Reduce memory footprint of
 grep index

---
 crates/fff-core/src/score.rs | 233 +++++++++++++++++++++--------------
 1 file changed, 143 insertions(+), 90 deletions(-)

diff --git a/crates/fff-core/src/score.rs b/crates/fff-core/src/score.rs
index 6974bab3..be565264 100644
--- a/crates/fff-core/src/score.rs
+++ b/crates/fff-core/src/score.rs
@@ -2,7 +2,7 @@ use crate::{
     constraints::apply_constraints,
     git::is_modified_status,
     path_utils::calculate_distance_penalty,
-    sort_buffer::{sort_by_key_with_buffer, sort_with_buffer},
+    sort_buffer::sort_with_buffer,
     types::{FileItem, Score, ScoringContext},
 };
 use fff_query_parser::FuzzyQuery;
@@ -34,14 +34,6 @@ impl<'a> FileItems<'a> {
         self.len() == 0
     }
 
-    #[inline]
-    fn get(&self, index: usize) -> Option<&'a FileItem> {
-        match self {
-            FileItems::All(s) => s.get(index),
-            FileItems::Filtered(v) => v.get(index).copied(),
-        }
-    }
-
     /// Build the haystack of relative paths (original casing) for fuzzy matching.
     /// neo_frizbee lowercases internally for comparison but preserves original casing
     /// for capitalization_bonus and matching_case_bonus scoring.
@@ -70,6 +62,7 @@ fn match_fuzzy_parts(
     fuzzy_parts: &[&str],
     working_files: &FileItems<'_>,
     options: &neo_frizbee::Config,
+    max_threads: usize,
 ) -> Vec<neo_frizbee::Match> {
     if fuzzy_parts.is_empty() {
         return vec![];
@@ -90,13 +83,15 @@ fn match_fuzzy_parts(
     }
 
     if valid_parts.len() == 1 {
-        let matches = neo_frizbee::match_list(valid_parts[0], &haystack, options);
+        let matches =
+            neo_frizbee::match_list_parallel(valid_parts[0], &haystack, options, max_threads);
         return matches;
     }
 
     // Multiple parts - match first part, then filter by remaining parts
     // TODO figure out if we can move this logic to my frizbee fork at least
-    let mut matches = neo_frizbee::match_list(valid_parts[0], &haystack, options);
+    let mut matches =
+        neo_frizbee::match_list_parallel(valid_parts[0], &haystack, options, max_threads);
     for part in valid_parts[1..].iter() {
         let mut part_options = *options;
         part_options.max_typos = options.max_typos.map(|t| t.min(part.len() as u16));
@@ -176,49 +171,12 @@ pub fn match_and_score_files<'a>(
         },
     };
 
-    let path_matches = match_fuzzy_parts(fuzzy_parts, &working_files, &options);
-    let primary_text = fuzzy_parts[0]; // Use first part for filename matching
-    let haystack_of_filenames: Vec<&str> = path_matches
-        .iter()
-        .filter_map(|m| {
-            working_files
-                .get(m.index as usize)
-                .map(|f| f.file_name.as_str())
-        })
-        .collect();
-
-    // if there is a / in the query we don't even match filenames
-    let filename_matches = if query_contains_path_separator {
-        vec![]
-    } else {
-        // Use parallel matching only if we have enough filenames to justify overhead
-        // Sequential matching is faster for small result sets (< 1000 matches)
-        let mut list = if haystack_of_filenames.len() > 1000 {
-            neo_frizbee::match_list_parallel(
-                primary_text,
-                &haystack_of_filenames,
-                &options,
-                context.max_threads,
-            )
-        } else {
-            neo_frizbee::match_list(primary_text, &haystack_of_filenames, &options)
-        };
-
-        // Sequential sort is faster for small lists
-        if list.len() > 1000 {
-            list.par_sort_unstable_by_key(|m| m.index);
-        } else {
-            sort_by_key_with_buffer(&mut list, |m| m.index);
-        }
+    let path_matches = match_fuzzy_parts(fuzzy_parts, &working_files, &options, context.max_threads);
+    let primary_needle = fuzzy_parts[0].as_bytes();
 
-        list
-    };
-
-    let mut next_filename_match_index = 0;
     let results: Vec<_> = path_matches
         .into_iter()
-        .enumerate()
-        .map(|(index, path_match)| {
+        .map(|path_match| {
             let file_idx = path_match.index as usize;
             let file = working_files.index(file_idx);
 
@@ -235,45 +193,39 @@ pub fn match_and_score_files<'a>(
             let distance_penalty =
                 calculate_distance_penalty(context.current_file, &file.relative_path);
 
-            let filename_match = filename_matches
-                .get(next_filename_match_index)
-                .and_then(|m| {
-                    if m.index == index as u32 {
-                        next_filename_match_index += 1;
-                        Some(m)
-                    } else {
-                        None
-                    }
-                });
+            // Score filename with greedy matcher (no second match_list call)
+            let (filename_score, filename_exact) = if query_contains_path_separator {
+                (0, false)
+            } else {
+                score_filename_greedy(primary_needle, &file.file_name, &options.scoring)
+            };
+            let filename_matched = filename_score > 0 || filename_exact;
 
             let mut has_special_filename_bonus = false;
-            let filename_bonus = match filename_match {
-                Some(filename_match) if filename_match.exact => {
-                    filename_match.score as i32 / 5 * 2 // 40% bonus for exact filename match
-                }
+            let filename_bonus = if filename_exact {
+                filename_score as i32 / 5 * 2 // 40% bonus for exact filename match
+            } else if filename_matched
+                && filename_score >= path_match.score
+                && !query_contains_path_separator
+            {
                 // 16% bonus for fuzzy filename match but only if the score of matched path is
                 // equal or greater than the score of matched filename, thus we are not allowing
                 // typoed filename to score higher than the path match
-                Some(filename_match)
-                    if filename_match.score >= path_match.score
-                        && !query_contains_path_separator =>
-                {
-                    base_score = filename_match.score as i32;
-
-                    (base_score / 6)
-                        // for large queries around ~300 score the bonus is too big
-                        // it might lead to situations when much more fitting path with a larger
-                        // base score getting filtered out by combination of score + filename bonus
-                        // so we cap it at 10% of the roughly largest score you can get
-                        .min(30)
-                }
-                // 5% bonus for special file but not as much as file name to avoid sitatuions
+                base_score = filename_score as i32;
+
+                (base_score / 6)
+                    // for large queries around ~300 score the bonus is too big
+                    // it might lead to situations when much more fitting path with a larger
+                    // base score getting filtered out by combination of score + filename bonus
+                    // so we cap it at 10% of the roughly largest score you can get
+                    .min(30)
+            } else if !filename_matched && is_special_entry_point_file(&file.file_name) {
+                // 5% bonus for special file but not as much as file name to avoid situations
                 // when you have /user_service/server.rs and /user_service/server/mod.rs
-                None if is_special_entry_point_file(&file.file_name) => {
-                    has_special_filename_bonus = true;
-                    base_score * 5 / 100
-                }
-                _ => 0,
+                has_special_filename_bonus = true;
+                base_score * 5 / 100
+            } else {
+                0
             };
 
             let current_file_penalty = calculate_current_file_penalty(file, base_score, context);
@@ -319,12 +271,15 @@ pub fn match_and_score_files<'a>(
                 git_status_boost,
                 distance_penalty,
                 combo_match_boost,
-                exact_match: path_match.exact || filename_match.is_some_and(|m| m.exact),
-                match_type: match filename_match {
-                    Some(filename_match) if filename_match.exact => "exact_filename",
-                    Some(_) => "fuzzy_filename",
-                    None if path_match.exact => "exact_path",
-                    None => "fuzzy_path",
+                exact_match: path_match.exact || filename_exact,
+                match_type: if filename_exact {
+                    "exact_filename"
+                } else if filename_matched {
+                    "fuzzy_filename"
+                } else if path_match.exact {
+                    "exact_path"
+                } else {
+                    "fuzzy_path"
                 },
             };
 
@@ -335,6 +290,104 @@ pub fn match_and_score_files<'a>(
     sort_and_paginate(results, context)
 }
 
+/// Greedy left-to-right fuzzy match of needle against filename.
+/// Returns (score, exact). Returns (0, false) if no match.
+/// Ported from frizbee's match_greedy but without index tracking (zero allocation).
+#[inline]
+fn score_filename_greedy(needle: &[u8], filename: &str, scoring: &Scoring) -> (u16, bool) {
+    let haystack = filename.as_bytes();
+
+    if needle.is_empty() {
+        return (0, false);
+    }
+
+    let mut score: u16 = 0;
+    let mut haystack_idx: usize = 0;
+
+    let mut delimiter_bonus_enabled = false;
+    let mut prev_is_lower = false;
+    let mut prev_is_delimiter = false;
+
+    for needle_idx in 0..needle.len() {
+        let needle_char = needle[needle_idx];
+        let needle_lower = if needle_char.is_ascii_uppercase() {
+            needle_char + 32
+        } else {
+            needle_char
+        };
+        let needle_upper = if needle_char.is_ascii_lowercase() {
+            needle_char - 32
+        } else {
+            needle_char
+        };
+
+        let start_idx = haystack_idx;
+        let max_idx = haystack.len().saturating_sub(needle.len() - needle_idx);
+        let mut found = false;
+
+        while haystack_idx <= max_idx {
+            let h = haystack[haystack_idx];
+            let h_is_delimiter = matches!(h, b' ' | b'/' | b'.' | b',' | b'_' | b'-' | b':');
+            let h_is_upper = h.is_ascii_uppercase();
+            let h_is_lower = h.is_ascii_lowercase();
+
+            if !h_is_delimiter {
+                delimiter_bonus_enabled = true;
+            }
+
+            if needle_lower != h && needle_upper != h {
+                prev_is_delimiter = delimiter_bonus_enabled && h_is_delimiter;
+                prev_is_lower = h_is_lower;
+                haystack_idx += 1;
+                continue;
+            }
+
+            // Match found
+            score += scoring.match_score;
+
+            // Gap penalty
+            if haystack_idx != start_idx && needle_idx != 0 {
+                score = score.saturating_sub(
+                    scoring.gap_open_penalty
+                        + scoring.gap_extend_penalty
+                            * (haystack_idx - start_idx).saturating_sub(1) as u16,
+                );
+            }
+
+            // Bonuses
+            if needle_char == h {
+                score += scoring.matching_case_bonus;
+            }
+            if h_is_upper && prev_is_lower {
+                score += scoring.capitalization_bonus;
+            }
+            if haystack_idx == 0 {
+                score += scoring.prefix_bonus;
+            }
+            if prev_is_delimiter && !h_is_delimiter {
+                score += scoring.delimiter_bonus;
+            }
+
+            prev_is_delimiter = delimiter_bonus_enabled && h_is_delimiter;
+            prev_is_lower = h_is_lower;
+            haystack_idx += 1;
+            found = true;
+            break;
+        }
+
+        if !found {
+            return (0, false);
+        }
+    }
+
+    let exact = haystack == needle;
+    if exact {
+        score += scoring.exact_match_bonus;
+    }
+
+    (score, exact)
+}
+
 /// Check if a filename is a special entry point file that deserves bonus scoring
 /// These are typically files that serve as module exports or entry points
 fn is_special_entry_point_file(filename: &str) -> bool {