@@ -285,6 +285,8 @@ pub struct GrepResult<'a> {
285285 pub total_files : usize ,
286286 /// Total number of searchable files (after filtering out binary, too-large, etc.).
287287 pub filtered_file_count : usize ,
288+ /// Number of files that contained at least one match.
289+ pub files_with_matches : usize ,
288290 /// The file offset to pass for the next page. `0` if there are no more files.
289291 /// Callers should store this and pass it as `file_offset` in the next call.
290292 pub next_file_offset : usize ,
@@ -1044,7 +1046,12 @@ fn char_indices_to_byte_offsets(line: &str, char_indices: &[usize]) -> SmallVec<
10441046 result
10451047}
10461048
1047- #[ tracing:: instrument( skip_all, level = Level :: DEBUG ) ]
1049+ /// Minimum chunk size for paginated search. Must be large enough for good
1050+ /// thread utilization across rayon's pool (~28 threads on modern hardware)
1051+ /// but small enough to allow early termination after few chunks.
1052+ const PAGINATED_CHUNK_SIZE : usize = 512 ;
1053+
1054+ #[ tracing:: instrument( skip_all, level = Level :: DEBUG , fields( prefiltered_count = files_to_search. len( ) ) ) ]
10481055fn run_file_search < ' a , F > (
10491056 files_to_search : & [ & ' a FileItem ] ,
10501057 options : & GrepSearchOptions ,
@@ -1064,50 +1071,109 @@ where
10641071 } ;
10651072
10661073 let search_start = std:: time:: Instant :: now ( ) ;
1074+ let page_limit = options. page_limit ;
10671075 let budget_exceeded = AtomicBool :: new ( false ) ;
10681076
1069- // Parallel phase: search all files concurrently using rayon.
1070- // Every file is visited (no early-exit gaps), so per_file_results is a
1071- // contiguous, order-preserving subset — pagination offsets stay correct .
1072- // The time budget acts as the work bound; there is no separate file cap.
1073- let per_file_results : Vec < ( usize , & ' a FileItem , Vec < GrepMatch > ) > = files_to_search
1074- . par_iter ( )
1075- . enumerate ( )
1076- . filter_map ( | ( idx , file ) | {
1077- // Time budget check (relaxed — checked once per file, not per line).
1078- if let Some ( budget ) = time_budget
1079- && search_start . elapsed ( ) > budget
1080- {
1081- budget_exceeded . store ( true , Ordering :: Relaxed ) ;
1082- return None ;
1083- }
1077+ // For paginated searches, process files in chunks to enable early
1078+ // termination. Each chunk is searched in parallel with rayon; between
1079+ // chunks we check whether enough matches have been collected .
1080+ //
1081+ // For full searches (page_limit = MAX), one chunk = all files — same
1082+ // throughput as before, no overhead from the chunking loop.
1083+ //
1084+ // For common queries ("x", "if") with ~99% hit rate: the first 512-file
1085+ // chunk yields ~500 matches, far exceeding page_limit=50. We stop after
1086+ // one chunk (~1ms) instead of searching all 93K files (~175ms).
1087+ let chunk_size = if page_limit < usize :: MAX {
1088+ PAGINATED_CHUNK_SIZE
1089+ } else {
1090+ files_to_search . len ( ) . max ( 1 )
1091+ } ;
10841092
1085- let content = file. get_content_for_search ( budget) ?;
1093+ let mut result_files: Vec < & ' a FileItem > = Vec :: new ( ) ;
1094+ let mut all_matches: Vec < GrepMatch > = Vec :: new ( ) ;
1095+ let mut files_consumed: usize = 0 ;
1096+ let mut page_filled = false ;
1097+
1098+ for chunk in files_to_search. chunks ( chunk_size) {
1099+ let chunk_offset = files_consumed;
1100+
1101+ // Parallel phase: search all files in this chunk concurrently.
1102+ // Within a chunk every file is visited (no gaps), so pagination
1103+ // offsets remain correct across chunk boundaries.
1104+ let chunk_results: Vec < ( usize , & ' a FileItem , Vec < GrepMatch > ) > = chunk
1105+ . par_iter ( )
1106+ . enumerate ( )
1107+ . filter_map ( |( local_idx, file) | {
1108+ if let Some ( budget) = time_budget
1109+ && search_start. elapsed ( ) > budget
1110+ {
1111+ budget_exceeded. store ( true , Ordering :: Relaxed ) ;
1112+ return None ;
1113+ }
10861114
1087- // Skip files that are binary but weren't caught by extension heuristic
1088- if crate :: file_picker:: detect_binary_content ( & content) {
1089- return None ;
1090- }
1115+ let content = file. get_content_for_search ( budget) ?;
1116+ let file_matches = search_file ( & content, options. max_matches_per_file ) ;
1117+
1118+ if file_matches. is_empty ( ) {
1119+ return None ;
1120+ }
1121+
1122+ Some ( ( chunk_offset + local_idx, * file, file_matches) )
1123+ } )
1124+ . collect ( ) ;
1125+
1126+ // Every file in the chunk was visited by rayon (matched or not).
1127+ files_consumed = chunk_offset + chunk. len ( ) ;
10911128
1092- let file_matches = search_file ( & content, options. max_matches_per_file ) ;
1129+ // Flatten this chunk's results into the accumulator.
1130+ for ( batch_idx, file, file_matches) in chunk_results {
1131+ let file_result_idx = result_files. len ( ) ;
1132+ result_files. push ( file) ;
10931133
1094- if file_matches. is_empty ( ) {
1095- return None ;
1134+ for mut m in file_matches {
1135+ m. file_index = file_result_idx;
1136+ all_matches. push ( m) ;
10961137 }
10971138
1098- Some ( ( idx, * file, file_matches) )
1099- } )
1100- . collect ( ) ;
1139+ if all_matches. len ( ) >= page_limit {
1140+ // Tighten files_consumed to the file that tipped us over so
1141+ // the next page resumes right after it.
1142+ files_consumed = batch_idx + 1 ;
1143+ page_filled = true ;
1144+ break ;
1145+ }
1146+ }
11011147
1102- collect_grep_results (
1103- per_file_results,
1104- files_to_search. len ( ) ,
1105- options,
1148+ if page_filled || budget_exceeded. load ( Ordering :: Relaxed ) {
1149+ break ;
1150+ }
1151+ }
1152+
1153+ // If no file had any match, we searched the entire slice.
1154+ if result_files. is_empty ( ) {
1155+ files_consumed = files_to_search. len ( ) ;
1156+ }
1157+
1158+ let has_more = budget_exceeded. load ( Ordering :: Relaxed )
1159+ || ( page_filled && files_consumed < files_to_search. len ( ) ) ;
1160+
1161+ let next_file_offset = if has_more {
1162+ options. file_offset + files_consumed
1163+ } else {
1164+ 0
1165+ } ;
1166+
1167+ GrepResult {
1168+ matches : all_matches,
1169+ files_with_matches : result_files. len ( ) ,
1170+ files : result_files,
1171+ total_files_searched : files_consumed,
11061172 total_files,
11071173 filtered_file_count,
1174+ next_file_offset,
11081175 regex_fallback_error,
1109- budget_exceeded. load ( Ordering :: Relaxed ) ,
1110- )
1176+ }
11111177}
11121178
11131179/// Flatten per-file results into the final `GrepResult`.
@@ -1175,6 +1241,7 @@ fn collect_grep_results<'a>(
11751241
11761242 GrepResult {
11771243 matches : all_matches,
1244+ files_with_matches : result_files. len ( ) ,
11781245 files : result_files,
11791246 total_files_searched : files_consumed,
11801247 total_files,
@@ -1243,7 +1310,7 @@ fn prepare_files_to_search<'a>(
12431310
12441311/// Fuzzy grep search using SIMD-accelerated `neo_frizbee::match_list`.
12451312///
1246- /// # Why this doesn't use `grep-searcher` / `GrepSink`
1313+ /// Why this doesn't use `grep-searcher` / `GrepSink`
12471314///
12481315/// PlainText and Regex modes use the `grep-searcher` pipeline: a `Matcher`
12491316/// finds candidate lines, and a `Sink` collects them one at a time. This
@@ -1294,9 +1361,7 @@ fn fuzzy_grep_search<'a>(
12941361 // Use default gap penalties. Higher values (e.g. 20) cause
12951362 // smith-waterman to prefer *dropping needle chars* over paying
12961363 // gap costs, which inflates the typo count and breaks
1297- // transposition matching ("shcema" → "schema" becomes 3 typos
1298- // instead of 1). Scattered matches are filtered by max_typos
1299- // and the match span check below instead.
1364+ // transposition matching ("shcema" → "schema" becomes 3 typos instead of 1)
13001365 exact_match_bonus : 100 ,
13011366 // gap_open_penalty: 4,
13021367 // gap_extend_penalty: 2,
@@ -1321,9 +1386,6 @@ fn fuzzy_grep_search<'a>(
13211386 let perfect_score = ( grep_text. len ( ) as u16 ) * 16 ;
13221387 let min_score = ( perfect_score * 50 ) / 100 ;
13231388
1324- // Maximum allowed span of matched characters in the haystack, relative
1325- // to needle length.
1326- //
13271389 // We allow up to needle_len * 2 to accommodate fuzzy subsequence
13281390 // matches in longer identifiers (e.g. "SortedMap" → "SortedArrayMap"
13291391 // has span 13 for needle 9). Quality is enforced by the density and
@@ -1349,6 +1411,7 @@ fn fuzzy_grep_search<'a>(
13491411 unique_needle_chars. push ( hi) ;
13501412 }
13511413 }
1414+
13521415 // How many distinct needle chars must appear in the file.
13531416 // With max_typos allowed, we need at least (unique_count - max_typos).
13541417 let unique_count = {
@@ -1360,12 +1423,12 @@ fn fuzzy_grep_search<'a>(
13601423 } ;
13611424 let min_chars_required = unique_count. saturating_sub ( max_typos) ;
13621425
1363- let _time_budget = if options. time_budget_ms > 0 {
1426+ let time_budget = if options. time_budget_ms > 0 {
13641427 Some ( std:: time:: Duration :: from_millis ( options. time_budget_ms ) )
13651428 } else {
13661429 None
13671430 } ;
1368- let _search_start = std:: time:: Instant :: now ( ) ;
1431+ let search_start = std:: time:: Instant :: now ( ) ;
13691432 let budget_exceeded = AtomicBool :: new ( false ) ;
13701433 let max_matches_per_file = options. max_matches_per_file ;
13711434
@@ -1378,17 +1441,14 @@ fn fuzzy_grep_search<'a>(
13781441 . map_init (
13791442 || matcher. clone ( ) ,
13801443 |matcher, ( idx, file) | {
1381- // if let Some(budget) = time_budget
1382- // && search_start.elapsed() > budget
1383- // {
1384- // budget_exceeded.store(true, Ordering::Relaxed);
1385- // return None;
1386- // }
1387-
1388- let file_content = file. get_content_for_search ( budget) ?;
1389- if crate :: file_picker:: detect_binary_content ( & file_content) {
1444+ if let Some ( budget) = time_budget
1445+ && search_start. elapsed ( ) > budget
1446+ {
1447+ budget_exceeded. store ( true , Ordering :: Relaxed ) ;
13901448 return None ;
13911449 }
1450+
1451+ let file_content = file. get_content_for_search ( budget) ?;
13921452 let file_bytes: & [ u8 ] = & file_content;
13931453
13941454 // File-level prefilter: check if enough distinct needle chars
@@ -1579,6 +1639,7 @@ fn fuzzy_grep_search<'a>(
15791639///
15801640/// When `query` is empty, returns git-modified/untracked files sorted by
15811641/// frecency for the "welcome state" UI.
1642+ #[ tracing:: instrument( skip( files, options, budget, bigram_index, bigram_overlay) , fields( file_count = files. len( ) ) ) ]
15821643pub fn grep_search < ' a > (
15831644 files : & ' a [ FileItem ] ,
15841645 query : & FFFQuery < ' _ > ,
0 commit comments