@@ -802,8 +802,8 @@ func (e *Engine) popPendingIndex(workspaceID string) (files []string, overflow b
802802func (e * Engine ) tryStartPendingIndex (root , workspaceID string ) {
803803 files , overflow := e .popPendingIndex (workspaceID )
804804 if overflow {
805- logger .Instance .Info ("[IDX] ♻️ Pending changes exceeded limit for ws=%s — triggering full scan " , filepath .Base (root ))
806- e .StartIndexingAsync (root , workspaceID , nil , false )
805+ logger .Instance .Info ("[IDX] ♻️ Pending changes exceeded limit for ws=%s — triggering full re-index " , filepath .Base (root ))
806+ e .StartIndexingAsync (root , workspaceID , nil , true )
807807 return
808808 }
809809 if len (files ) == 0 {
@@ -817,6 +817,11 @@ func (e *Engine) tryStartPendingIndex(root, workspaceID string) {
817817// If recreate=true and a job is already running, the recreate is queued and
818818// will start immediately after the current job finishes.
819819func (e * Engine ) StartIndexingAsync (root , id string , changedFiles []string , recreate bool ) {
820+ if watch .IsInvalidRoot (root ) {
821+ logger .Instance .Error ("[IDX] ⛔ Refusing to index invalid/dangerous root: %s" , root )
822+ return
823+ }
824+
820825 if _ , loaded := e .indexingJobs .LoadOrStore (id , time .Now ()); loaded {
821826 // A job is already running. If recreate=true, queue it so it fires
822827 // after the current job finishes (via tryStartPendingIndex/defer).
@@ -838,7 +843,17 @@ func (e *Engine) StartIndexingAsync(root, id string, changedFiles []string, recr
838843 logger .Instance .Warn ("[IDX] ⚠️ %d workspaces indexing simultaneously — Ollama requests will serialize implicitly (ws=%s)" , activeCount , filepath .Base (root ))
839844 }
840845
841- indexer .SaveIndexStatus (root , & indexer.IndexStatus {StartedAt : time .Now ().UTC ().Format (time .RFC3339 )})
846+ // Preserve existing Languages data on restart so incremental indexing
847+ // doesn't show processed=0 to AI consumers. Reset only lifecycle fields.
848+ s := indexer .LoadIndexStatus (root )
849+ if s == nil {
850+ s = & indexer.IndexStatus {}
851+ }
852+ s .StartedAt = time .Now ().UTC ().Format (time .RFC3339 )
853+ s .EndedAt = ""
854+ s .Elapsed = ""
855+ s .Error = ""
856+ indexer .SaveIndexStatus (root , s )
842857
843858 go func () {
844859 defer func () {
@@ -858,33 +873,33 @@ func (e *Engine) StartIndexingAsync(root, id string, changedFiles []string, recr
858873 err = e .IndexWorkspace (ctx , root , recreate )
859874 }
860875
876+ finalizeIndexStatus (root , err )
861877 if err != nil {
862878 logger .Instance .Error ("[IDX] ws=%s Background indexing failed: %v" , filepath .Base (root ), err )
863- s := indexer .LoadIndexStatus (root )
864- if s == nil {
865- s = & indexer.IndexStatus {}
866- }
867- s .Error = err .Error ()
868- s .EndedAt = time .Now ().UTC ().Format (time .RFC3339 )
869- if started , pErr := time .Parse (time .RFC3339 , s .StartedAt ); pErr == nil {
870- s .Elapsed = time .Since (started ).Round (time .Second ).String ()
871- }
872- indexer .SaveIndexStatus (root , s )
873879 } else {
874880 logger .Instance .Info ("[IDX] ✅ ws=%s Background indexing completed" , filepath .Base (root ))
875- s := indexer .LoadIndexStatus (root )
876- if s == nil {
877- s = & indexer.IndexStatus {}
878- }
879- s .EndedAt = time .Now ().UTC ().Format (time .RFC3339 )
880- if started , pErr := time .Parse (time .RFC3339 , s .StartedAt ); pErr == nil {
881- s .Elapsed = time .Since (started ).Round (time .Second ).String ()
882- }
883- indexer .SaveIndexStatus (root , s )
884881 }
885882 }()
886883}
887884
885+ // finalizeIndexStatus loads the current status, stamps EndedAt/Elapsed/Error,
886+ // and saves it atomically. Centralises the logic that was duplicated in the
887+ // success and error branches of StartIndexingAsync.
888+ func finalizeIndexStatus (root string , indexErr error ) {
889+ s := indexer .LoadIndexStatus (root )
890+ if s == nil {
891+ s = & indexer.IndexStatus {}
892+ }
893+ s .EndedAt = time .Now ().UTC ().Format (time .RFC3339 )
894+ if started , pErr := time .Parse (time .RFC3339 , s .StartedAt ); pErr == nil {
895+ s .Elapsed = time .Since (started ).Round (time .Second ).String ()
896+ }
897+ if indexErr != nil {
898+ s .Error = indexErr .Error ()
899+ }
900+ indexer .SaveIndexStatus (root , s )
901+ }
902+
888903// IndexFiles indexes specific files in a workspace.
889904func (e * Engine ) IndexFiles (ctx context.Context , root string , files []string ) error {
890905 wctx , err := e .DetectContext (ctx , root )
@@ -953,27 +968,53 @@ func (e *Engine) IndexWorkspace(ctx context.Context, path string, recreate bool)
953968 fileCounts := e .indexer .CountAllFiles (wctx .Root , excludePatterns )
954969 logger .Instance .Info ("[IDX] ws=%s file counts: %v" , wsName , fileCounts )
955970
956- // Pre-populate index_status.json with the real disk totals so that
957- // even languages with 0 changed files still show correct on_disk counts.
958- {
959- s := indexer .LoadIndexStatus (wctx .Root )
960- if s == nil {
961- s = & indexer.IndexStatus {StartedAt : time .Now ().UTC ().Format (time .RFC3339 )}
962- }
963- if s .Languages == nil {
964- s .Languages = make (map [string ]indexer.LangStatus )
965- }
966- for _ , lang := range languages {
967- s .Languages [lang ] = indexer.LangStatus {OnDisk : fileCounts [lang ]}
968- }
969- indexer .SaveIndexStatus (wctx .Root , s )
971+ // Sort languages by file count descending so the dominant language is indexed
972+ // first and AI search works immediately for the most relevant code.
973+ sort .Slice (languages , func (i , j int ) bool {
974+ return fileCounts [languages [i ]] > fileCounts [languages [j ]]
975+ })
976+ logger .Instance .Info ("[IDX] ws=%s indexing order: %v" , wsName , languages )
977+
978+ // Load or create a shared in-memory IndexStatus for the entire indexing run.
979+ // This avoids calling LoadIndexStatus (JSON read + parse) on every Progress tick.
980+ // The single *IndexStatus is updated in-place; only SaveIndexStatus (atomic write)
981+ // hits the disk, and only every 10 files.
982+ s := indexer .LoadIndexStatus (wctx .Root )
983+ if s == nil {
984+ s = & indexer.IndexStatus {StartedAt : time .Now ().UTC ().Format (time .RFC3339 )}
985+ }
986+ if s .Languages == nil {
987+ s .Languages = make (map [string ]indexer.LangStatus )
970988 }
989+ // Pre-populate real on_disk counts so languages with 0 changed files still appear.
990+ for _ , l := range languages {
991+ entry := s .Languages [l ]
992+ entry .OnDisk = fileCounts [l ]
993+ s .Languages [l ] = entry
994+ }
995+ indexer .SaveIndexStatus (wctx .Root , s )
971996
972997 var indexErrors []string
973998 for _ , lang := range languages {
974999 diskTotal := fileCounts [lang ]
9751000 collection := wctx .CollectionName (lang )
9761001 logger .Instance .Info ("[IDX] ws=%s lang=%s ▶ starting (on_disk=%d)" , wsName , lang , diskTotal )
1002+
1003+ // Capture the already-processed count before this run starts.
1004+ // For incremental runs (only changed files), we accumulate on top of
1005+ // whatever was already indexed in Qdrant. For a full re-index
1006+ // (recreate=true, or all files changed), we reset to 0.
1007+ // baseProcessed is captured once per language, before the Progress
1008+ // callback fires, so it's safe to close over it.
1009+ baseProcessed := s .Languages [lang ].Processed
1010+ if recreate {
1011+ baseProcessed = 0
1012+ }
1013+ // firstTick is used to detect on the first Progress callback whether
1014+ // this is a full re-index (totalFiles >= diskTotal) so we can reset
1015+ // baseProcessed to 0 and avoid double-counting.
1016+ firstTick := true
1017+
9771018 err := e .indexer .IndexWorkspace (ctx , wctx .Root , collection , indexer.Options {
9781019 Language : lang ,
9791020 WorkspaceName : wsName ,
@@ -984,18 +1025,23 @@ func (e *Engine) IndexWorkspace(ctx context.Context, path string, recreate bool)
9841025 if doneFiles % 10 != 0 && doneFiles != totalFiles {
9851026 return
9861027 }
987- if s := indexer .LoadIndexStatus (wctx .Root ); s != nil {
988-
989- if s .Languages == nil {
990- s .Languages = make (map [string ]indexer.LangStatus )
1028+ // On the first tick, decide if this is a full re-index.
1029+ // If totalFiles covers all on-disk files, reset base to 0
1030+ // so we don't double-count the existing Processed value.
1031+ if firstTick {
1032+ firstTick = false
1033+ if diskTotal > 0 && totalFiles >= diskTotal {
1034+ baseProcessed = 0
9911035 }
992- ls := s .Languages [lang ]
993- ls .OnDisk = diskTotal // real total files on disk for this language
994- ls .Changed = totalFiles // files that needed re-indexing (changedFiles)
995- ls .Processed = doneFiles
996- s .Languages [lang ] = ls
997- indexer .SaveIndexStatus (wctx .Root , s )
9981036 }
1037+ ls := s .Languages [lang ]
1038+ ls .OnDisk = diskTotal // real total files on disk
1039+ ls .Changed = totalFiles // files that needed re-indexing this run
1040+ // Cumulative total: for incremental runs add to the existing
1041+ // DB count; for full re-indexes (base=0) start from scratch.
1042+ ls .Processed = baseProcessed + doneFiles
1043+ s .Languages [lang ] = ls
1044+ indexer .SaveIndexStatus (wctx .Root , s )
9991045 },
10001046 })
10011047 if err != nil {
0 commit comments