Skip to content

Commit 5198fe5

Browse files
author
razvan
committed
refactor: replace old progressStore with simple IndexStatus file
- Remove progressStore (preRegister, update, carry-over, flusher) - Remove IndexingProgressSummary, BuildIndexingProgress, formatAge, buildIndexingMessage - Remove auto-resume from SearchCode/HybridSearchCode (redundant with DetectContext) - Remove resumeAttempts field from Engine - Add IndexStatus/LangStatus/SaveIndexStatus/LoadIndexStatus in pkg/indexer/ - Indexer writes OnDisk/Changed/Processed via Progress callback - Tools read status directly from .ragcode/index_status.json - Fix TestDetectNoMarkers with AllowedRoots isolation
1 parent 522a68f commit 5198fe5

22 files changed

Lines changed: 208 additions & 1182 deletions

cmd/rag-code-mcp/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import (
1717
)
1818

1919
var (
20-
Version = "2.1.62"
20+
Version = "2.1.63"
2121
Commit = "none"
2222
Date = "24.10.2025"
2323
)

internal/service/engine/engine.go

Lines changed: 33 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,13 @@ type Engine struct {
4545
pendingFiles map[string]map[string]struct{} // workspaceID -> set(filePath)
4646
pendingOverflow map[string]bool // workspaceID -> too many pending changes, fallback to full scan
4747

48-
progress *progressStore
48+
4949

5050
// detectionCache stores resolved WorkspaceContext with TTL to avoid
5151
// repeated full resolver cascades for the same path.
5252
detectionCache sync.Map // map[string]*detectionCacheEntry
5353

54-
// resumeAttempts throttles auto-resume of interrupted indexing.
55-
// Key: workspace ID, Value: time.Time of last resume attempt.
56-
// Prevents CPU/log churn when indexing keeps failing (e.g. Ollama down).
57-
resumeAttempts sync.Map
54+
5855

5956
// connectTriggered tracks whether background indexing was automatically
6057
// triggered for a workspace ID upon initial daemon resolution.
@@ -116,19 +113,16 @@ func NewEngine(idx *indexer.Service, srv *search.Service, registryPath string, c
116113
resolver: res,
117114
config: cfg,
118115
watchers: watcherMgr,
119-
progress: newProgressStore(),
116+
120117
pendingFiles: make(map[string]map[string]struct{}),
121118
pendingOverflow: make(map[string]bool),
122119
}
123120
}
124121

125-
// GetIndexProgress returns the last known indexing progress for a workspace.
126-
// workspaceRoot is used as a hint to load persisted status from disc if not in memory.
127-
func (e *Engine) GetIndexProgress(workspaceID, workspaceRoot string) *IndexProgress {
128-
if e.progress == nil {
129-
return nil
130-
}
131-
return e.progress.get(workspaceID, workspaceRoot)
122+
// GetIndexStatus returns the last known indexing status for a workspace.
123+
// Reads directly from {workspaceRoot}/.ragcode/index_status.json.
124+
func (e *Engine) GetIndexStatus(workspaceRoot string) *indexer.IndexStatus {
125+
return indexer.LoadIndexStatus(workspaceRoot)
132126
}
133127

134128
// ActiveIndexingJobs returns the IDs of workspaces currently being indexed.
@@ -413,22 +407,7 @@ func (e *Engine) SearchCode(ctx context.Context, filePath, queryText string, lim
413407
primaryColl := wctx.CollectionName(primaryLang)
414408
t1 := time.Now()
415409

416-
// Auto-resume: if GlobalPercent is between 1-99 and state is "running",
417-
// the indexer was interrupted — trigger a background re-index.
418-
if idxStatus := loadIndexStatus(wctx.Root); idxStatus != nil {
419-
if idxStatus.WorkspaceID == wctx.ID && idxStatus.GlobalPercent > 0 && idxStatus.GlobalPercent < 100 && idxStatus.State == "running" {
420-
if _, ok := e.indexingJobs.Load(wctx.ID); !ok {
421-
const resumeCooldown = 5 * time.Minute
422-
now := time.Now()
423-
if last, loaded := e.resumeAttempts.Load(wctx.ID); !loaded || now.Sub(last.(time.Time)) > resumeCooldown {
424-
e.resumeAttempts.Store(wctx.ID, now)
425-
logger.Instance.Info("[IDX] ws=%s Indexing interrupted at %d%% — auto-resuming", filepath.Base(wctx.Root), idxStatus.GlobalPercent)
426-
e.StartIndexingAsync(wctx.Root, wctx.ID, nil, false)
427-
}
428-
}
429-
logger.Instance.Info("[IDX] ws=%s Indexing in progress (%d%%) — will search available collections", filepath.Base(wctx.Root), idxStatus.GlobalPercent)
430-
}
431-
}
410+
432411

433412
// Check if the primary collection exists.
434413
// If not, trigger background indexing but do NOT block — the fan-out below
@@ -622,21 +601,7 @@ func (e *Engine) HybridSearchCode(ctx context.Context, filePath, queryText strin
622601

623602
collection := wctx.CollectionName(lang)
624603

625-
// Auto-resume from index_status.json: if indexing was interrupted, resume it.
626-
if idxStatus := loadIndexStatus(wctx.Root); idxStatus != nil {
627-
if idxStatus.WorkspaceID == wctx.ID && idxStatus.GlobalPercent > 0 && idxStatus.GlobalPercent < 100 && idxStatus.State == "running" {
628-
if _, ok := e.indexingJobs.Load(wctx.ID); !ok {
629-
const resumeCooldown = 5 * time.Minute
630-
now := time.Now()
631-
if last, loaded := e.resumeAttempts.Load(wctx.ID); !loaded || now.Sub(last.(time.Time)) > resumeCooldown {
632-
e.resumeAttempts.Store(wctx.ID, now)
633-
logger.Instance.Info("[IDX] ws=%s Indexing interrupted at %d%% — auto-resuming", filepath.Base(wctx.Root), idxStatus.GlobalPercent)
634-
e.StartIndexingAsync(wctx.Root, wctx.ID, nil, false)
635-
}
636-
}
637-
logger.Instance.Info("[IDX] ws=%s Indexing in progress (%d%%) — will search available collections", filepath.Base(wctx.Root), idxStatus.GlobalPercent)
638-
}
639-
}
604+
640605

641606
exists, err := e.search.CollectionExists(ctx, collection)
642607
if err != nil {
@@ -822,10 +787,7 @@ func (e *Engine) StartIndexingAsync(root, id string, changedFiles []string, recr
822787
logger.Instance.Warn("[IDX] ⚠️ %d workspaces indexing simultaneously — Ollama requests will serialize implicitly (ws=%s)", activeCount, filepath.Base(root))
823788
}
824789

825-
jobID := fmt.Sprintf("%s-%d", id, time.Now().UnixNano())
826-
if e.progress != nil {
827-
e.progress.start(id, root, jobID, time.Now())
828-
}
790+
indexer.SaveIndexStatus(root, &indexer.IndexStatus{State: "starting", StartedAt: time.Now().UTC().Format(time.RFC3339)})
829791

830792
go func() {
831793
defer func() {
@@ -847,13 +809,18 @@ func (e *Engine) StartIndexingAsync(root, id string, changedFiles []string, recr
847809

848810
if err != nil {
849811
logger.Instance.Error("[IDX] ws=%s Background indexing failed: %v", filepath.Base(root), err)
850-
if e.progress != nil {
851-
e.progress.fail(id, root, time.Now(), err.Error())
812+
if s := indexer.LoadIndexStatus(root); s != nil {
813+
s.State = "failed"
814+
s.Error = err.Error()
815+
s.EndedAt = time.Now().UTC().Format(time.RFC3339)
816+
indexer.SaveIndexStatus(root, s)
852817
}
853818
} else {
854819
logger.Instance.Info("[IDX] ✅ ws=%s Background indexing completed", filepath.Base(root))
855-
if e.progress != nil {
856-
e.progress.complete(id, root, time.Now())
820+
if s := indexer.LoadIndexStatus(root); s != nil {
821+
s.State = "completed"
822+
s.EndedAt = time.Now().UTC().Format(time.RFC3339)
823+
indexer.SaveIndexStatus(root, s)
857824
}
858825
}
859826
}()
@@ -916,39 +883,34 @@ func (e *Engine) IndexWorkspace(ctx context.Context, path string, recreate bool)
916883

917884
wsName := filepath.Base(wctx.Root)
918885

919-
// Pre-scan: count files for every language before starting indexing.
920-
// This gives progressStore an accurate denominator from the start, so
921-
// GlobalPercent increases monotonically instead of resetting per language.
922886
var excludePatterns []string
923887
if e.config != nil {
924888
excludePatterns = e.config.Workspace.ExcludePatterns
925889
}
926-
// Pre-scan: single WalkDir counting files per language before indexing begins.
927-
// This gives progressStore an accurate denominator from the start, so
928-
// GlobalPercent increases monotonically. One combined walk avoids
929-
// O(languages x files) traversals that per-language scans would incur.
930-
if e.progress != nil {
931-
fileCounts := e.indexer.CountAllFiles(wctx.Root, excludePatterns)
932-
for _, lang := range languages {
933-
e.progress.preRegister(wctx.ID, lang, fileCounts[lang], time.Now())
934-
}
935-
}
936890

937891
var indexErrors []string
938892
for _, lang := range languages {
939893
collection := wctx.CollectionName(lang)
940-
progressCb := func(doneFiles, totalFiles int) {
941-
if e.progress != nil {
942-
e.progress.update(wctx.ID, lang, doneFiles, totalFiles, time.Now())
943-
}
944-
}
945894
logger.Instance.Info("[IDX] ws=%s lang=%s ▶ starting", wsName, lang)
946895
err := e.indexer.IndexWorkspace(ctx, wctx.Root, collection, indexer.Options{
947896
Language: lang,
948897
WorkspaceName: wsName,
949898
ExcludePatterns: excludePatterns,
950899
Recreate: recreate,
951-
Progress: progressCb,
900+
Progress: func(doneFiles, totalFiles int) {
901+
if s := indexer.LoadIndexStatus(wctx.Root); s != nil {
902+
s.State = "running"
903+
if s.Languages == nil {
904+
s.Languages = make(map[string]indexer.LangStatus)
905+
}
906+
ls := s.Languages[lang]
907+
ls.OnDisk = totalFiles
908+
ls.Changed = totalFiles
909+
ls.Processed = doneFiles
910+
s.Languages[lang] = ls
911+
indexer.SaveIndexStatus(wctx.Root, s)
912+
}
913+
},
952914
})
953915
if err != nil {
954916
logger.Instance.Error("[IDX] ws=%s lang=%s ❌ failed: %v", wsName, lang, err)

internal/service/engine/engine_fallback_search_test.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,7 @@ func ValidateEmail(email string) bool {
9595
eng.SetResolver(resolver.New(resolver.Dependencies{Detector: &mockDirDetector{root: root}}))
9696

9797
t.Cleanup(func() {
98-
if eng.progress != nil {
99-
eng.progress.stop()
100-
}
98+
10199
})
102100

103101
return root, eng

internal/service/engine/engine_nonblocking_search_test.go

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,7 @@ func TestSearchCodeReturnsResultsFromOtherLangsWhenPrimaryMissing(t *testing.T)
7878

7979
// Cleanup: stop progress flusher
8080
t.Cleanup(func() {
81-
if eng2.progress != nil {
82-
eng2.progress.stop()
83-
}
81+
8482
})
8583
}
8684

@@ -129,9 +127,7 @@ func TestSearchCodeBlocksWhenZeroCollectionsExist(t *testing.T) {
129127

130128
// Cleanup
131129
t.Cleanup(func() {
132-
if eng2.progress != nil {
133-
eng2.progress.stop()
134-
}
130+
135131
// Wait for bg indexing to drain
136132
for i := 0; i < 100; i++ {
137133
if len(eng2.ActiveIndexingJobs()) == 0 {
@@ -183,9 +179,7 @@ func TestSearchCodeIndexingInProgressStillSearches(t *testing.T) {
183179
// Cleanup the fake job
184180
eng2.indexingJobs.Delete(wctx.ID)
185181
t.Cleanup(func() {
186-
if eng2.progress != nil {
187-
eng2.progress.stop()
188-
}
182+
189183
})
190184
}
191185

@@ -234,9 +228,7 @@ func TestHybridSearchCodeReturnsNilWhenCollectionMissing(t *testing.T) {
234228

235229
// Cleanup
236230
t.Cleanup(func() {
237-
if eng2.progress != nil {
238-
eng2.progress.stop()
239-
}
231+
240232
// Wait for bg indexing to drain
241233
for i := 0; i < 100; i++ {
242234
if len(eng2.ActiveIndexingJobs()) == 0 {
@@ -289,9 +281,7 @@ func TestHybridSearchCodeStillWorksWhenCollectionExists(t *testing.T) {
289281
}
290282

291283
t.Cleanup(func() {
292-
if eng2.progress != nil {
293-
eng2.progress.stop()
294-
}
284+
295285
})
296286
}
297287

internal/service/engine/engine_searchcode_test.go

Lines changed: 0 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"context"
55
"sync/atomic"
66
"testing"
7-
"time"
87

98
"github.com/doITmagic/rag-code-mcp/internal/config"
109
"github.com/doITmagic/rag-code-mcp/internal/service/search"
@@ -316,97 +315,4 @@ func (m *mockDirDetector) DetectFromFilePath(_ context.Context, path string) (*c
316315
}, nil
317316
}
318317

319-
// TestSearchCodeAutoResumesInterruptedIndexing verifies that SearchCode triggers
320-
// a background re-index when index_status.json shows an interrupted indexing
321-
// session (state="running", 1 <= GlobalPercent <= 99).
322-
// It also verifies the 5-minute cooldown prevents repeated resume triggers.
323-
func TestSearchCodeAutoResumesInterruptedIndexing(t *testing.T) {
324-
llmProvider := &countingLLM{}
325-
eng := newEngineCountingLLM(&testStore{existing: map[string]bool{}}, llmProvider)
326-
327-
wsRoot := t.TempDir()
328-
eng.SetResolver(resolver.New(resolver.Dependencies{Detector: &mockDirDetector{root: wsRoot}}))
329-
330-
// Pre-fill the connect trigger so we test the search-based auto-resume,
331-
// not the daemon's on-connect auto-warmup.
332-
wsID := contract.DeriveWorkspaceID(wsRoot, "", "")
333-
eng.connectTriggered.Store(wsID, true)
334-
335-
// Detect context to learn the workspace ID (needed for index_status.json)
336-
wctx, err := eng.DetectContext(context.Background(), "dummy.go")
337-
if err != nil || wctx == nil {
338-
t.Fatal("Failed to detect workspace context")
339-
}
340-
341-
// Write index_status.json that simulates an interrupted indexing session at 50%
342-
now := time.Now()
343-
status := IndexProgress{
344-
JobID: "interrupted-test-job",
345-
WorkspaceID: wctx.ID,
346-
WorkspaceRoot: wsRoot,
347-
State: "running",
348-
GlobalPercent: 50,
349-
StartedAt: now.Add(-10 * time.Minute),
350-
UpdatedAt: now,
351-
Languages: map[string]IndexLanguageProgress{},
352-
}
353-
saveIndexStatus(wsRoot, &status)
354318

355-
// Pre-emptively set connectTriggered so that DetectContext doesn't
356-
// automatically kick off indexing directly when it is called entirely
357-
// bypassing the auto-resume retry mechanism we're trying to test here.
358-
eng.connectTriggered.Store(wctx.ID, true)
359-
360-
// Make the go collection exist so SearchCode gets past the fast-fail check
361-
goColl := CollectionNameFor(wctx.ID, "go")
362-
store := &multiLangStore{
363-
testStore: testStore{
364-
existing: map[string]bool{goColl: true},
365-
},
366-
}
367-
eng.SetSearchService(search.NewService(llmProvider, store))
368-
369-
// First call — auto-resume should fire
370-
_, _ = eng.SearchCode(context.Background(), "dummy.go", "test", 10, false)
371-
372-
// resumeAttempts is updated synchronously before the goroutine starts,
373-
// so it is readable immediately after SearchCode returns.
374-
if _, ok := eng.resumeAttempts.Load(wctx.ID); !ok {
375-
t.Error("expected auto-resume to be triggered: resumeAttempts has no entry for workspace")
376-
}
377-
378-
// Second call within cooldown — should NOT trigger another resume attempt.
379-
// We verify this by checking that resumeAttempts still holds the same timestamp.
380-
first, okFirst := eng.resumeAttempts.Load(wctx.ID)
381-
_, _ = eng.SearchCode(context.Background(), "dummy.go", "test", 10, false)
382-
second, okSecond := eng.resumeAttempts.Load(wctx.ID)
383-
384-
if okFirst && okSecond {
385-
if first.(time.Time) != second.(time.Time) {
386-
t.Error("cooldown violated: auto-resume was triggered again within the 5-minute window")
387-
}
388-
} else {
389-
t.Error("expected resumeAttempts to be populated for both calls")
390-
}
391-
392-
// Stop the progress flusher goroutine before the test returns.
393-
// Without this the goroutine can still write to wsRoot/.ragcode/ while
394-
// t.TempDir() cleanup removes the directory, triggering a race under -race.
395-
t.Cleanup(func() {
396-
if eng.progress != nil {
397-
eng.progress.stop()
398-
}
399-
})
400-
401-
// Wait for the background indexing goroutine to drain before test returns.
402-
deadline := time.Now().Add(5 * time.Second)
403-
for time.Now().Before(deadline) {
404-
if len(eng.ActiveIndexingJobs()) == 0 {
405-
break
406-
}
407-
time.Sleep(10 * time.Millisecond)
408-
}
409-
if jobs := eng.ActiveIndexingJobs(); len(jobs) != 0 {
410-
t.Fatalf("background indexing jobs still active after 5s wait: %v", jobs)
411-
}
412-
}

internal/service/engine/engine_sticky_test.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,7 @@ func TestCheckAndReindexOnConnect_TriggersReindex(t *testing.T) {
8888

8989
// Register cleanup immediately — before any Fatal that could skip it.
9090
t.Cleanup(func() {
91-
if eng.progress != nil {
92-
eng.progress.stop()
93-
}
91+
9492
deadline := time.Now().Add(5 * time.Second)
9593
for time.Now().Before(deadline) {
9694
if len(eng.ActiveIndexingJobs()) == 0 {

0 commit comments

Comments
 (0)