@@ -388,21 +388,25 @@ impl FileItem {
388388/// Options for creating a [`FilePicker`].
389389pub struct FilePickerOptions {
390390 pub base_path : String ,
391- pub warmup_mmap_cache : bool ,
391+ /// Pre-populate mmap caches for top-frecency files after the initial scan.
392+ pub enable_mmap_cache : bool ,
393+ /// Build bigram content index after the initial scan for faster
394+ /// content-aware filtering. Independent of `enable_mmap_cache`.
395+ pub enable_content_indexing : bool ,
392396 pub mode : FFFMode ,
393397 /// Explicit cache budget. When `None`, the budget is auto-computed from
394398 /// the repo size after the initial scan completes.
395399 pub cache_budget : Option < ContentCacheBudget > ,
396400 /// When `false`, `new_with_shared_state` skips the background file watcher.
397- /// Files are still scanned, warmed up, and bigram-indexed.
398401 pub watch : bool ,
399402}
400403
401404impl Default for FilePickerOptions {
402405 fn default ( ) -> Self {
403406 Self {
404407 base_path : "." . into ( ) ,
405- warmup_mmap_cache : false ,
408+ enable_mmap_cache : false ,
409+ enable_content_indexing : false ,
406410 mode : FFFMode :: default ( ) ,
407411 cache_budget : None ,
408412 watch : true ,
@@ -420,7 +424,8 @@ pub struct FilePicker {
420424 watcher_ready : Arc < AtomicBool > ,
421425 scanned_files_count : Arc < AtomicUsize > ,
422426 background_watcher : Option < BackgroundWatcher > ,
423- warmup_mmap_cache : bool ,
427+ enable_mmap_cache : bool ,
428+ enable_content_indexing : bool ,
424429 watch : bool ,
425430 cancelled : Arc < AtomicBool > ,
426431 // This is a soft lock that we use to prevent rescan be triggered while the
@@ -478,8 +483,16 @@ impl FilePicker {
478483 . and_then ( |p| p. to_str ( ) )
479484 }
480485
481- pub fn need_warmup_mmap_cache ( & self ) -> bool {
482- self . warmup_mmap_cache
486+ pub fn need_enable_mmap_cache ( & self ) -> bool {
487+ self . enable_mmap_cache
488+ }
489+
490+ pub fn need_enable_content_indexing ( & self ) -> bool {
491+ self . enable_content_indexing
492+ }
493+
494+ pub fn need_watch ( & self ) -> bool {
495+ self . watch
483496 }
484497
485498 pub fn mode ( & self ) -> FFFMode {
@@ -631,7 +644,8 @@ impl FilePicker {
631644 post_scan_busy : Arc :: new ( AtomicBool :: new ( false ) ) ,
632645 scanned_files_count : Arc :: new ( AtomicUsize :: new ( 0 ) ) ,
633646 sync_data : FileSync :: new ( ) ,
634- warmup_mmap_cache : options. warmup_mmap_cache ,
647+ enable_mmap_cache : options. enable_mmap_cache ,
648+ enable_content_indexing : options. enable_content_indexing ,
635649 watch : options. watch ,
636650 watcher_ready : Arc :: new ( AtomicBool :: new ( false ) ) ,
637651 } )
@@ -647,13 +661,15 @@ impl FilePicker {
647661 let picker = Self :: new ( options) ?;
648662
649663 info ! (
650- "Spawning background threads: base_path={}, warmup={}, mode={:?}" ,
664+ "Spawning background threads: base_path={}, warmup={}, content_indexing={}, mode={:?}" ,
651665 picker. base_path. display( ) ,
652- picker. warmup_mmap_cache,
666+ picker. enable_mmap_cache,
667+ picker. enable_content_indexing,
653668 picker. mode,
654669 ) ;
655670
656- let warmup = picker. warmup_mmap_cache ;
671+ let warmup = picker. enable_mmap_cache ;
672+ let content_indexing = picker. enable_content_indexing ;
657673 let watch = picker. watch ;
658674 let mode = picker. mode ;
659675
@@ -677,6 +693,7 @@ impl FilePicker {
677693 watcher_ready,
678694 synced_files_count,
679695 warmup,
696+ content_indexing,
680697 watch,
681698 mode,
682699 shared_picker,
@@ -1199,13 +1216,15 @@ impl FilePicker {
11991216
12001217 /// Spawn a background thread to rebuild the bigram index after rescan.
12011218 pub ( crate ) fn spawn_post_rescan_rebuild ( & self , shared_picker : SharedPicker ) -> bool {
1202- if ! self . warmup_mmap_cache || self . cancelled . load ( Ordering :: Relaxed ) {
1219+ if self . cancelled . load ( Ordering :: Relaxed ) {
12031220 return false ;
12041221 }
12051222
12061223 let post_scan_busy = Arc :: clone ( & self . post_scan_busy ) ;
12071224 let cancelled = Arc :: clone ( & self . cancelled ) ;
12081225 let auto_budget = !self . has_explicit_cache_budget ;
1226+ let do_warmup = self . enable_mmap_cache ;
1227+ let do_content_indexing = self . enable_content_indexing ;
12091228
12101229 post_scan_busy. store ( true , Ordering :: Release ) ;
12111230
@@ -1249,7 +1268,7 @@ impl FilePicker {
12491268
12501269 if let Some ( ( files, budget, bp, arena) ) = files_snapshot {
12511270 // Warmup mmap caches.
1252- if !cancelled. load ( Ordering :: Acquire ) {
1271+ if do_warmup && !cancelled. load ( Ordering :: Acquire ) {
12531272 let t = std:: time:: Instant :: now ( ) ;
12541273 warmup_mmaps ( files, & budget, & bp, arena) ;
12551274 info ! (
@@ -1261,7 +1280,7 @@ impl FilePicker {
12611280 }
12621281
12631282 // Build bigram index (lock-free).
1264- if !cancelled. load ( Ordering :: Acquire ) {
1283+ if do_content_indexing && !cancelled. load ( Ordering :: Acquire ) {
12651284 let t = std:: time:: Instant :: now ( ) ;
12661285 info ! (
12671286 "Rescan: starting bigram index build for {} files..." ,
@@ -1294,8 +1313,10 @@ impl FilePicker {
12941313
12951314 post_scan_busy. store ( false , Ordering :: Release ) ;
12961315 info ! (
1297- "Rescan post-scan warmup + bigram total: {:.2}s" ,
1316+ "Rescan post-scan phase total: {:.2}s (warmup={}, content_indexing={}) " ,
12981317 phase_start. elapsed( ) . as_secs_f64( ) ,
1318+ do_warmup,
1319+ do_content_indexing,
12991320 ) ;
13001321 } ) ;
13011322
@@ -1401,7 +1422,8 @@ fn spawn_scan_and_watcher(
14011422 scan_signal : Arc < AtomicBool > ,
14021423 watcher_ready : Arc < AtomicBool > ,
14031424 synced_files_count : Arc < AtomicUsize > ,
1404- warmup_mmap_cache : bool ,
1425+ enable_mmap_cache : bool ,
1426+ enable_content_indexing : bool ,
14051427 watch : bool ,
14061428 mode : FFFMode ,
14071429 shared_picker : SharedPicker ,
@@ -1505,7 +1527,10 @@ fn spawn_scan_and_watcher(
15051527
15061528 watcher_ready. store ( true , Ordering :: Release ) ;
15071529
1508- if warmup_mmap_cache && !cancelled. load ( Ordering :: Acquire ) {
1530+ let need_post_scan =
1531+ ( enable_mmap_cache || enable_content_indexing) && !cancelled. load ( Ordering :: Acquire ) ;
1532+
1533+ if need_post_scan {
15091534 post_scan_busy. store ( true , Ordering :: Release ) ;
15101535 let phase_start = std:: time:: Instant :: now ( ) ;
15111536
@@ -1548,9 +1573,11 @@ fn spawn_scan_and_watcher(
15481573 None
15491574 } ;
15501575
1576+ // both of this is using a custom soft lock not guaranteed by compiler
1577+ // this is required to keep the picker functioning if someone opened a really crazy
1578+ // e.g 10m files directory but potentially unsafe
15511579 if let Some ( ( files, budget, arena) ) = files_snapshot {
1552- // Warmup: populate mmap caches for top-frecency files.
1553- if !cancelled. load ( Ordering :: Acquire ) {
1580+ if enable_mmap_cache && !cancelled. load ( Ordering :: Acquire ) {
15541581 let warmup_start = std:: time:: Instant :: now ( ) ;
15551582 warmup_mmaps ( files, & budget, & base_path, arena) ;
15561583 info ! (
@@ -1561,16 +1588,9 @@ fn spawn_scan_and_watcher(
15611588 ) ;
15621589 }
15631590
1564- // Build bigram index — entirely lock-free.
1565- if !cancelled. load ( Ordering :: Acquire ) {
1566- let bigram_start = std:: time:: Instant :: now ( ) ;
1567- info ! ( "Starting bigram index build for {} files..." , files. len( ) ) ;
1591+ if enable_content_indexing && !cancelled. load ( Ordering :: Acquire ) {
15681592 let ( index, content_binary) =
15691593 build_bigram_index ( files, & budget, & base_path, arena) ;
1570- info ! (
1571- "Bigram index ready in {:.2}s" ,
1572- bigram_start. elapsed( ) . as_secs_f64( ) ,
1573- ) ;
15741594
15751595 if let Ok ( mut guard) = shared_picker. write ( )
15761596 && let Some ( ref mut picker) = * guard
@@ -1593,8 +1613,10 @@ fn spawn_scan_and_watcher(
15931613 post_scan_busy. store ( false , Ordering :: Release ) ;
15941614
15951615 info ! (
1596- "Post-scan warmup + bigram total: {:.2}s" ,
1616+ "Post-scan phase total: {:.2}s (warmup={}, content_indexing={}) " ,
15971617 phase_start. elapsed( ) . as_secs_f64( ) ,
1618+ enable_mmap_cache,
1619+ enable_content_indexing,
15981620 ) ;
15991621 }
16001622
@@ -1677,6 +1699,7 @@ pub(crate) fn warmup_mmaps(
16771699/// so reading further adds no new information to the index.
16781700pub const BIGRAM_CONTENT_CAP : usize = 64 * 1024 ;
16791701
1702+ #[ tracing:: instrument( skip_all, name = "Building Bigram Index" , level = Level :: DEBUG ) ]
16801703pub ( crate ) fn build_bigram_index (
16811704 files : & [ FileItem ] ,
16821705 budget : & ContentCacheBudget ,
0 commit comments