@@ -37,7 +37,7 @@ use crate::git::GitStatusCache;
3737use crate :: grep:: { GrepResult , GrepSearchOptions , grep_search} ;
3838use crate :: query_tracker:: QueryTracker ;
3939use crate :: score:: match_and_score_files;
40- use crate :: types:: { FileItem , PaginationArgs , ScoringContext , SearchResult } ;
40+ use crate :: types:: { ContentCacheBudget , FileItem , PaginationArgs , ScoringContext , SearchResult } ;
4141use crate :: { SharedFrecency , SharedPicker } ;
4242use fff_query_parser:: FFFQuery ;
4343use git2:: { Repository , Status , StatusOptions } ;
@@ -47,7 +47,7 @@ use std::io::Read;
4747use std:: path:: { Path , PathBuf } ;
4848use std:: sync:: {
4949 Arc ,
50- atomic:: { AtomicBool , AtomicUsize , Ordering } ,
50+ atomic:: { AtomicBool , AtomicU64 , AtomicUsize , Ordering } ,
5151} ;
5252use std:: time:: SystemTime ;
5353use tracing:: { Level , debug, error, info, warn} ;
@@ -226,6 +226,7 @@ pub struct FilePicker {
226226 warmup_mmap_cache : bool ,
227227 cancelled : Arc < AtomicBool > ,
228228 mode : FFFMode ,
229+ pub cache_budget : Arc < ContentCacheBudget > ,
229230}
230231
231232impl std:: fmt:: Debug for FilePicker {
@@ -247,14 +248,18 @@ impl FilePicker {
247248 & self . base_path
248249 }
249250
250- pub fn warmup_mmap_cache ( & self ) -> bool {
251+ pub fn need_warmup_mmap_cache ( & self ) -> bool {
251252 self . warmup_mmap_cache
252253 }
253254
254255 pub fn mode ( & self ) -> FFFMode {
255256 self . mode
256257 }
257258
259+ pub fn cache_budget ( & self ) -> & ContentCacheBudget {
260+ & self . cache_budget
261+ }
262+
258263 pub fn git_root ( & self ) -> Option < & Path > {
259264 self . sync_data . git_workdir . as_deref ( )
260265 }
@@ -306,6 +311,7 @@ impl FilePicker {
306311 warmup_mmap_cache,
307312 cancelled : Arc :: clone ( & cancelled) ,
308313 mode,
314+ cache_budget : Arc :: new ( ContentCacheBudget :: default ( ) ) ,
309315 } ;
310316
311317 // Place the picker into the shared handle before spawning the
@@ -423,8 +429,9 @@ impl FilePicker {
423429 files : & ' a [ FileItem ] ,
424430 query : & FFFQuery < ' _ > ,
425431 options : & GrepSearchOptions ,
432+ budget : & ContentCacheBudget ,
426433 ) -> GrepResult < ' a > {
427- grep_search ( files, query, options)
434+ grep_search ( files, query, options, budget )
428435 }
429436
430437 // Returns an ongoing or finisshed scan progress
@@ -594,7 +601,7 @@ impl FilePicker {
594601 // mapping here because on linux and macos with the shared map opening it
595602 // should be automatically available everywhere automatically which saves
596603 // some time from doing extra remapping on every search
597- file. invalidate_mmap ( ) ;
604+ file. invalidate_mmap ( & self . cache_budget ) ;
598605 }
599606 }
600607
@@ -680,12 +687,15 @@ impl FilePicker {
680687 ) ;
681688
682689 self . sync_data = sync;
690+ // Old FileItems (and their mmaps) were dropped — reset the budget.
691+ self . cache_budget . reset ( ) ;
683692
684693 if self . warmup_mmap_cache {
685694 // Warmup in background to avoid blocking
686695 let files = self . sync_data . files ( ) . to_vec ( ) ; // Clone all files
696+ let budget = Arc :: clone ( & self . cache_budget ) ;
687697 std:: thread:: spawn ( move || {
688- warmup_mmaps ( & files) ;
698+ warmup_mmaps ( & files, & budget ) ;
689699 } ) ;
690700 }
691701 }
@@ -778,6 +788,8 @@ fn spawn_scan_and_watcher(
778788 let write_result = shared_picker. write ( ) . ok ( ) . map ( |mut guard| {
779789 if let Some ( ref mut picker) = * guard {
780790 picker. sync_data = sync;
791+ // Old FileItems (and their mmaps) were dropped — reset the budget.
792+ picker. cache_budget . reset ( ) ;
781793 }
782794 } ) ;
783795
@@ -791,7 +803,7 @@ fn spawn_scan_and_watcher(
791803 && let Ok ( guard) = shared_picker. read ( )
792804 && let Some ( ref picker) = * guard
793805 {
794- warmup_mmaps ( picker. sync_data . files ( ) ) ;
806+ warmup_mmaps ( picker. sync_data . files ( ) , & picker . cache_budget ) ;
795807 }
796808 }
797809 Err ( e) => {
@@ -844,26 +856,64 @@ fn spawn_scan_and_watcher(
844856 } ) ;
845857}
846858
847- /// Pre-populate mmap caches for all eligible files so the first grep search
848- /// doesn't pay the mmap creation + page fault cost.
859+ /// Pre-populate mmap caches for the most valuable files so the first grep
860+ /// search doesn't pay the mmap creation + page fault cost.
849861///
850- /// Each file is mmap'd and a single byte is read to trigger the page fault.
851- /// This runs in parallel using rayon.
862+ /// All files are collected once, then an O(n) `select_nth_unstable_by`
863+ /// partitions the top [`MAX_CACHED_CONTENT_FILES`] highest-frecency eligible
864+ /// files to the front (binary / empty files are pushed to the end by the
865+ /// comparator). The selected prefix is warmed in parallel via rayon.
866+ ///
867+ /// Files beyond the budget are still available via temporary mmaps on first
868+ /// grep access, so correctness is unaffected.
852869#[ tracing:: instrument( skip( files) , name = "warmup_mmaps" , level = Level :: DEBUG ) ]
853- fn warmup_mmaps ( files : & [ FileItem ] ) {
854- let warmed = std:: sync:: atomic:: AtomicUsize :: new ( 0 ) ;
870+ fn warmup_mmaps ( files : & [ FileItem ] , budget : & ContentCacheBudget ) {
871+ let max_files = budget. max_files ;
872+ let max_bytes: u64 = 512 * 1024 * 1024 ;
873+
874+ // Single collect — no pre-filter. The comparator in select_nth pushes
875+ // ineligible files (binary, empty) to the tail automatically.
876+ let mut all: Vec < & FileItem > = files. iter ( ) . collect ( ) ;
877+
878+ // O(n) partial sort: top max_files eligible-by-frecency files land in
879+ // all[..max_files]. Ineligible files compare as "lowest priority" so
880+ // they naturally sink past the partition boundary.
881+ if all. len ( ) > max_files {
882+ all. select_nth_unstable_by ( max_files, |a, b| {
883+ let a_ok = !a. is_binary && a. size > 0 ;
884+ let b_ok = !b. is_binary && b. size > 0 ;
885+ match ( a_ok, b_ok) {
886+ ( true , false ) => std:: cmp:: Ordering :: Less ,
887+ ( false , true ) => std:: cmp:: Ordering :: Greater ,
888+ ( false , false ) => std:: cmp:: Ordering :: Equal ,
889+ ( true , true ) => b. total_frecency_score . cmp ( & a. total_frecency_score ) ,
890+ }
891+ } ) ;
892+ }
893+
894+ let to_warm = & all[ ..all. len ( ) . min ( max_files) ] ;
895+
896+ let warmed_bytes = AtomicU64 :: new ( 0 ) ;
897+ let budget_exhausted = AtomicBool :: new ( false ) ;
855898
856- files . par_iter ( ) . for_each ( |file| {
857- if file . is_binary || file . size == 0 {
899+ to_warm . par_iter ( ) . for_each ( |file| {
900+ if budget_exhausted . load ( Ordering :: Relaxed ) {
858901 return ;
859902 }
860903
861- if let Some ( content) = file. get_mmap ( ) {
862- // Read the first byte to trigger the initial page fault (mmap)
863- // or ensure the content is cached (Windows buffer).
864- let _ = std:: hint:: black_box ( content. first ( ) ) ;
904+ if file. is_binary || file. size == 0 || file. size > 5 * 1024 * 1024 {
905+ return ;
906+ }
865907
866- warmed. fetch_add ( 1 , Ordering :: Relaxed ) ;
908+ // Byte budget.
909+ let prev_bytes = warmed_bytes. fetch_add ( file. size , Ordering :: Relaxed ) ;
910+ if prev_bytes + file. size > max_bytes {
911+ budget_exhausted. store ( true , Ordering :: Relaxed ) ;
912+ return ;
913+ }
914+
915+ if let Some ( content) = file. get_mmap ( budget) {
916+ let _ = std:: hint:: black_box ( content. first ( ) ) ;
867917 }
868918 } ) ;
869919}
0 commit comments