@@ -388,21 +388,25 @@ impl FileItem {
388388/// Options for creating a [`FilePicker`].
389389pub struct FilePickerOptions {
390390 pub base_path : String ,
391- pub warmup_mmap_cache : bool ,
391+ /// Pre-populate mmap caches for top-frecency files after the initial scan.
392+ pub enable_mmap_cache : bool ,
393+ /// Build bigram content index after the initial scan for faster
394+ /// content-aware filtering. Independent of `enable_mmap_cache`.
395+ pub enable_content_indexing : bool ,
392396 pub mode : FFFMode ,
393397 /// Explicit cache budget. When `None`, the budget is auto-computed from
394398 /// the repo size after the initial scan completes.
395399 pub cache_budget : Option < ContentCacheBudget > ,
396400 /// When `false`, `new_with_shared_state` skips the background file watcher.
397- /// Files are still scanned, warmed up, and bigram-indexed.
398401 pub watch : bool ,
399402}
400403
401404impl Default for FilePickerOptions {
402405 fn default ( ) -> Self {
403406 Self {
404407 base_path : "." . into ( ) ,
405- warmup_mmap_cache : false ,
408+ enable_mmap_cache : false ,
409+ enable_content_indexing : false ,
406410 mode : FFFMode :: default ( ) ,
407411 cache_budget : None ,
408412 watch : true ,
@@ -420,7 +424,8 @@ pub struct FilePicker {
420424 watcher_ready : Arc < AtomicBool > ,
421425 scanned_files_count : Arc < AtomicUsize > ,
422426 background_watcher : Option < BackgroundWatcher > ,
423- warmup_mmap_cache : bool ,
427+ enable_mmap_cache : bool ,
428+ enable_content_indexing : bool ,
424429 watch : bool ,
425430 cancelled : Arc < AtomicBool > ,
426431 // This is a soft lock that we use to prevent rescan be triggered while the
@@ -478,8 +483,12 @@ impl FilePicker {
478483 . and_then ( |p| p. to_str ( ) )
479484 }
480485
481- pub fn need_warmup_mmap_cache ( & self ) -> bool {
482- self . warmup_mmap_cache
486+ pub fn need_enable_mmap_cache ( & self ) -> bool {
487+ self . enable_mmap_cache
488+ }
489+
490+ pub fn need_enable_content_indexing ( & self ) -> bool {
491+ self . enable_content_indexing
483492 }
484493
485494 pub fn mode ( & self ) -> FFFMode {
@@ -631,7 +640,8 @@ impl FilePicker {
631640 post_scan_busy : Arc :: new ( AtomicBool :: new ( false ) ) ,
632641 scanned_files_count : Arc :: new ( AtomicUsize :: new ( 0 ) ) ,
633642 sync_data : FileSync :: new ( ) ,
634- warmup_mmap_cache : options. warmup_mmap_cache ,
643+ enable_mmap_cache : options. enable_mmap_cache ,
644+ enable_content_indexing : options. enable_content_indexing ,
635645 watch : options. watch ,
636646 watcher_ready : Arc :: new ( AtomicBool :: new ( false ) ) ,
637647 } )
@@ -647,13 +657,15 @@ impl FilePicker {
647657 let picker = Self :: new ( options) ?;
648658
649659 info ! (
650- "Spawning background threads: base_path={}, warmup={}, mode={:?}" ,
660+ "Spawning background threads: base_path={}, warmup={}, content_indexing={}, mode={:?}" ,
651661 picker. base_path. display( ) ,
652- picker. warmup_mmap_cache,
662+ picker. enable_mmap_cache,
663+ picker. enable_content_indexing,
653664 picker. mode,
654665 ) ;
655666
656- let warmup = picker. warmup_mmap_cache ;
667+ let warmup = picker. enable_mmap_cache ;
668+ let content_indexing = picker. enable_content_indexing ;
657669 let watch = picker. watch ;
658670 let mode = picker. mode ;
659671
@@ -677,6 +689,7 @@ impl FilePicker {
677689 watcher_ready,
678690 synced_files_count,
679691 warmup,
692+ content_indexing,
680693 watch,
681694 mode,
682695 shared_picker,
@@ -1199,13 +1212,15 @@ impl FilePicker {
11991212
12001213 /// Spawn a background thread to rebuild the bigram index after rescan.
12011214 pub ( crate ) fn spawn_post_rescan_rebuild ( & self , shared_picker : SharedPicker ) -> bool {
1202- if ! self . warmup_mmap_cache || self . cancelled . load ( Ordering :: Relaxed ) {
1215+ if self . cancelled . load ( Ordering :: Relaxed ) {
12031216 return false ;
12041217 }
12051218
12061219 let post_scan_busy = Arc :: clone ( & self . post_scan_busy ) ;
12071220 let cancelled = Arc :: clone ( & self . cancelled ) ;
12081221 let auto_budget = !self . has_explicit_cache_budget ;
1222+ let do_warmup = self . enable_mmap_cache ;
1223+ let do_content_indexing = self . enable_content_indexing ;
12091224
12101225 post_scan_busy. store ( true , Ordering :: Release ) ;
12111226
@@ -1249,7 +1264,7 @@ impl FilePicker {
12491264
12501265 if let Some ( ( files, budget, bp, arena) ) = files_snapshot {
12511266 // Warmup mmap caches.
1252- if !cancelled. load ( Ordering :: Acquire ) {
1267+ if do_warmup && !cancelled. load ( Ordering :: Acquire ) {
12531268 let t = std:: time:: Instant :: now ( ) ;
12541269 warmup_mmaps ( files, & budget, & bp, arena) ;
12551270 info ! (
@@ -1261,7 +1276,7 @@ impl FilePicker {
12611276 }
12621277
12631278 // Build bigram index (lock-free).
1264- if !cancelled. load ( Ordering :: Acquire ) {
1279+ if do_content_indexing && !cancelled. load ( Ordering :: Acquire ) {
12651280 let t = std:: time:: Instant :: now ( ) ;
12661281 info ! (
12671282 "Rescan: starting bigram index build for {} files..." ,
@@ -1294,8 +1309,10 @@ impl FilePicker {
12941309
12951310 post_scan_busy. store ( false , Ordering :: Release ) ;
12961311 info ! (
1297- "Rescan post-scan warmup + bigram total: {:.2}s" ,
1312+ "Rescan post-scan phase total: {:.2}s (warmup={}, content_indexing={}) " ,
12981313 phase_start. elapsed( ) . as_secs_f64( ) ,
1314+ do_warmup,
1315+ do_content_indexing,
12991316 ) ;
13001317 } ) ;
13011318
@@ -1401,7 +1418,8 @@ fn spawn_scan_and_watcher(
14011418 scan_signal : Arc < AtomicBool > ,
14021419 watcher_ready : Arc < AtomicBool > ,
14031420 synced_files_count : Arc < AtomicUsize > ,
1404- warmup_mmap_cache : bool ,
1421+ enable_mmap_cache : bool ,
1422+ enable_content_indexing : bool ,
14051423 watch : bool ,
14061424 mode : FFFMode ,
14071425 shared_picker : SharedPicker ,
@@ -1505,7 +1523,10 @@ fn spawn_scan_and_watcher(
15051523
15061524 watcher_ready. store ( true , Ordering :: Release ) ;
15071525
1508- if warmup_mmap_cache && !cancelled. load ( Ordering :: Acquire ) {
1526+ let need_post_scan =
1527+ ( enable_mmap_cache || enable_content_indexing) && !cancelled. load ( Ordering :: Acquire ) ;
1528+
1529+ if need_post_scan {
15091530 post_scan_busy. store ( true , Ordering :: Release ) ;
15101531 let phase_start = std:: time:: Instant :: now ( ) ;
15111532
@@ -1548,9 +1569,11 @@ fn spawn_scan_and_watcher(
15481569 None
15491570 } ;
15501571
1572+ // both of this is using a custom soft lock not guaranteed by compiler
1573+ // this is required to keep the picker functioning if someone opened a really crazy
1574+ // e.g 10m files directory but potentially unsafe
15511575 if let Some ( ( files, budget, arena) ) = files_snapshot {
1552- // Warmup: populate mmap caches for top-frecency files.
1553- if !cancelled. load ( Ordering :: Acquire ) {
1576+ if enable_mmap_cache && !cancelled. load ( Ordering :: Acquire ) {
15541577 let warmup_start = std:: time:: Instant :: now ( ) ;
15551578 warmup_mmaps ( files, & budget, & base_path, arena) ;
15561579 info ! (
@@ -1561,16 +1584,9 @@ fn spawn_scan_and_watcher(
15611584 ) ;
15621585 }
15631586
1564- // Build bigram index — entirely lock-free.
1565- if !cancelled. load ( Ordering :: Acquire ) {
1566- let bigram_start = std:: time:: Instant :: now ( ) ;
1567- info ! ( "Starting bigram index build for {} files..." , files. len( ) ) ;
1587+ if enable_content_indexing && !cancelled. load ( Ordering :: Acquire ) {
15681588 let ( index, content_binary) =
15691589 build_bigram_index ( files, & budget, & base_path, arena) ;
1570- info ! (
1571- "Bigram index ready in {:.2}s" ,
1572- bigram_start. elapsed( ) . as_secs_f64( ) ,
1573- ) ;
15741590
15751591 if let Ok ( mut guard) = shared_picker. write ( )
15761592 && let Some ( ref mut picker) = * guard
@@ -1593,8 +1609,10 @@ fn spawn_scan_and_watcher(
15931609 post_scan_busy. store ( false , Ordering :: Release ) ;
15941610
15951611 info ! (
1596- "Post-scan warmup + bigram total: {:.2}s" ,
1612+ "Post-scan phase total: {:.2}s (warmup={}, content_indexing={}) " ,
15971613 phase_start. elapsed( ) . as_secs_f64( ) ,
1614+ enable_mmap_cache,
1615+ enable_content_indexing,
15981616 ) ;
15991617 }
16001618
@@ -1677,6 +1695,7 @@ pub(crate) fn warmup_mmaps(
16771695/// so reading further adds no new information to the index.
16781696pub const BIGRAM_CONTENT_CAP : usize = 64 * 1024 ;
16791697
1698+ #[ tracing:: instrument( skip_all, name = "Building Bigram Index" , level = Level :: DEBUG ) ]
16801699pub ( crate ) fn build_bigram_index (
16811700 files : & [ FileItem ] ,
16821701 budget : & ContentCacheBudget ,
0 commit comments