@@ -56,7 +56,7 @@ use std::ops::ControlFlow;
5656use std:: path:: { Path , PathBuf } ;
5757use std:: sync:: {
5858 Arc , LazyLock ,
59- atomic:: { AtomicBool , AtomicU64 , AtomicUsize , Ordering } ,
59+ atomic:: { AtomicBool , AtomicUsize , Ordering } ,
6060} ;
6161use std:: thread:: JoinHandle ;
6262use std:: time:: SystemTime ;
@@ -137,7 +137,7 @@ pub(crate) struct FileSync {
137137 indexable_count : usize ,
138138 base_count : usize ,
139139 /// Number of active present files that exists in the file system
140- live_count : usize ,
140+ pub ( crate ) live_count : usize ,
141141 /// Sorted directory table. `StableVec` so post-scan snapshots can keep
142142 /// the allocation alive across a picker drop without copying, and so
143143 /// concurrent readers observe a consistent view via the same shared
@@ -752,24 +752,12 @@ impl FilePicker {
752752
753753 {
754754 let mut guard = shared_picker. write ( ) ?;
755- // If the old picker has a post-scan in flight, wait for it to
756- // finish. cancel() was already called so the rayon loop exits
757- // within microseconds (each worker checks cancelled per item).
758- if let Some ( ref old_picker) = * guard {
759- let flag = Arc :: clone ( & old_picker. signals . post_scan_indexing_active ) ;
760- drop ( guard) ;
761- while flag. load ( Ordering :: Acquire ) {
762- std:: thread:: sleep ( std:: time:: Duration :: from_millis ( 1 ) ) ;
763- }
764- guard = shared_picker. write ( ) ?;
765- }
766755 * guard = Some ( picker) ;
756+ // by dropping the old picker if it exists we triggering
757+ // it's internal `cancelled` flag flip which will automatically clean
758+ // any thread that might be capturing the reference safely & unsfaely
767759 }
768760
769- // `ScanJob::spawn` flips `scanning=true` synchronously before handing
770- // off to the worker thread, so callers that invoke `wait_for_scan`
771- // immediately after `new_with_shared_state` are guaranteed to see
772- // the scan in progress.
773761 ScanJob :: new_initial (
774762 shared_picker,
775763 shared_frecency,
@@ -1283,18 +1271,15 @@ impl FilePicker {
12831271 if self
12841272 . signals
12851273 . post_scan_indexing_active
1286- . load ( Ordering :: Acquire )
1274+ . compare_exchange ( false , true , Ordering :: AcqRel , Ordering :: Acquire )
1275+ . is_err ( )
12871276 {
12881277 tracing:: error!(
12891278 "Can not acquire post scan unsafe snapshot, someone already acquired it"
12901279 ) ;
12911280 return None ;
12921281 }
12931282
1294- self . signals
1295- . post_scan_indexing_active
1296- . store ( true , Ordering :: Release ) ;
1297-
12981283 Some ( PostScanUnsafeSnapshot {
12991284 files : self . sync_data . files . clone ( ) ,
13001285 dirs : self . sync_data . dirs . clone ( ) ,
@@ -1654,6 +1639,14 @@ fn canonical_relative_path(path: &Path, base: &Path) -> Option<String> {
16541639 rel. to_str ( ) . map ( str:: to_owned)
16551640}
16561641
1642+ impl Drop for FilePicker {
1643+ fn drop ( & mut self ) {
1644+ // Cancel any in-flight ScanJob bound to this picker's signals so
1645+ // it cannot mutate the replacement picker after a swap.
1646+ self . signals . cancelled . store ( true , Ordering :: Release ) ;
1647+ }
1648+ }
1649+
16571650#[ derive( Debug , Clone , Copy ) ]
16581651enum FileSlot {
16591652 Base ( usize ) ,
@@ -1715,76 +1708,6 @@ pub struct ScanProgress {
17151708 pub is_warmup_complete : bool ,
17161709}
17171710
1718- /// Pre-populate mmap caches for the most valuable files so the first grep
1719- /// search doesn't pay the mmap creation + page fault cost.
1720- ///
1721- /// All files are collected once, then an O(n) `select_nth_unstable_by`
1722- /// partitions the top [`MAX_CACHED_CONTENT_FILES`] highest-frecency eligible
1723- /// files to the front (binary / empty files are pushed to the end by the
1724- /// comparator). The selected prefix is warmed in parallel via rayon.
1725- ///
1726- /// Files beyond the budget are still available via temporary mmaps on first
1727- /// grep access, so correctness is unaffected.
1728- #[ tracing:: instrument( skip( files) , name = "warmup_mmaps" , level = Level :: DEBUG ) ]
1729- pub ( crate ) fn warmup_mmaps (
1730- files : & [ FileItem ] ,
1731- budget : & ContentCacheBudget ,
1732- base_path : & Path ,
1733- arena : ArenaPtr ,
1734- ) {
1735- let max_files = budget. max_files ;
1736- let max_bytes = budget. max_bytes ;
1737- let max_file_size = budget. max_file_size ;
1738-
1739- // Single collect — no pre-filter. The comparator in select_nth pushes
1740- // ineligible files (binary, empty) to the tail automatically.
1741- let mut all: Vec < & FileItem > = files. iter ( ) . collect ( ) ;
1742-
1743- // O(n) partial sort: top max_files eligible-by-frecency files land in
1744- // all[..max_files]. Ineligible files compare as "lowest priority" so
1745- // they naturally sink past the partition boundary.
1746- if all. len ( ) > max_files {
1747- all. select_nth_unstable_by ( max_files, |a, b| {
1748- let a_ok = !a. is_binary ( ) && a. size > 0 ;
1749- let b_ok = !b. is_binary ( ) && b. size > 0 ;
1750- match ( a_ok, b_ok) {
1751- ( true , false ) => std:: cmp:: Ordering :: Less ,
1752- ( false , true ) => std:: cmp:: Ordering :: Greater ,
1753- ( false , false ) => std:: cmp:: Ordering :: Equal ,
1754- ( true , true ) => b. total_frecency_score ( ) . cmp ( & a. total_frecency_score ( ) ) ,
1755- }
1756- } ) ;
1757- }
1758-
1759- let to_warm = & all[ ..all. len ( ) . min ( max_files) ] ;
1760-
1761- let warmed_bytes = AtomicU64 :: new ( 0 ) ;
1762- let budget_exhausted = AtomicBool :: new ( false ) ;
1763-
1764- BACKGROUND_THREAD_POOL . install ( || {
1765- to_warm. par_iter ( ) . for_each ( |file| {
1766- if budget_exhausted. load ( Ordering :: Relaxed ) {
1767- return ;
1768- }
1769-
1770- if file. is_binary ( ) || file. size == 0 || file. size > max_file_size {
1771- return ;
1772- }
1773-
1774- // Byte budget.
1775- let prev_bytes = warmed_bytes. fetch_add ( file. size , Ordering :: Relaxed ) ;
1776- if prev_bytes + file. size > max_bytes {
1777- budget_exhausted. store ( true , Ordering :: Relaxed ) ;
1778- return ;
1779- }
1780-
1781- if let Some ( content) = file. get_content ( arena, base_path, budget) {
1782- let _ = std:: hint:: black_box ( content. first ( ) ) ;
1783- }
1784- } ) ;
1785- } ) ;
1786- }
1787-
17881711impl FileSync {
17891712 pub ( crate ) fn discover_git_workdir ( base_path : & Path ) -> Option < PathBuf > {
17901713 let git_workdir = Repository :: discover ( base_path)
@@ -1941,12 +1864,15 @@ impl FileSync {
19411864 // (one per partition) to preserve O(log n) lookups.
19421865 //
19431866 // "Indexable" = can possibly contribute bigrams: not binary-by-extension,
1944- // non-zero size, not larger than the bigram/mmap cap. The cap matches
1945- // `ContentCacheBudget::max_file_size` default (10 MB) — any file above
1946- // that is skipped by `build_bigram_index` anyway.
1947- const BIGRAM_ELIGIBLE_MAX_SIZE : u64 = 10 * 1024 * 1024 ;
1948- let is_indexable =
1949- |f : & FileItem | !f. is_binary ( ) && f. size > 0 && f. size <= BIGRAM_ELIGIBLE_MAX_SIZE ;
1867+ // non-zero size, not larger than `BIGRAM_CONTENT_CAP`. Capping indexable
1868+ // size at the bigram scan window means every indexed file is fully
1869+ // covered — no partial-content false negatives. Files above the cap
1870+ // land past `indexable_count` and are always scanned at grep time.
1871+ let is_indexable = |f : & FileItem | {
1872+ !f. is_binary ( )
1873+ && f. size > 0
1874+ && f. size <= crate :: bigram_filter:: BIGRAM_CONTENT_CAP as u64
1875+ } ;
19501876 BACKGROUND_THREAD_POOL . install ( || {
19511877 files. par_sort_unstable_by ( |a, b| {
19521878 // Sort indexables first (true < false when we invert with !).
@@ -1999,6 +1925,33 @@ impl FileSync {
19991925 }
20001926}
20011927
1928+ /// Pre-populate mmap caches for cold tail files so the first grep search
1929+ /// doesn't pay the mmap creation + page fault cost.
1930+ #[ tracing:: instrument( skip( files) , name = "warmup_mmaps" , level = Level :: DEBUG ) ]
1931+ pub ( crate ) fn warmup_mmaps (
1932+ files : & [ FileItem ] ,
1933+ budget : & ContentCacheBudget ,
1934+ base_path : & Path ,
1935+ arena : ArenaPtr ,
1936+ ) {
1937+ // for most of the use cases mmaps limit would be signficantly smaller than arepo
1938+ for file in files. iter ( ) {
1939+ if file. is_likely_hot ( )
1940+ || file. is_binary ( )
1941+ || file. size == 0
1942+ || file. size > budget. max_file_size
1943+ {
1944+ continue ;
1945+ }
1946+
1947+ let _ = file. get_cached_content ( arena, base_path, budget) ;
1948+
1949+ if budget. is_exhausted ( ) {
1950+ break ;
1951+ }
1952+ }
1953+ }
1954+
20021955/// This does both thing (yes sorry all the OOP morons)
20031956/// in one go: populates files chunked storage and creates new directories
20041957fn populates_dirs_files_chunked_storage < ' a > (
0 commit comments