Skip to content

Commit 2e5cc25

Browse files
committed
Merge remote-tracking branch 'origin/refactor/indexing-progress' into refactor/indexing-progress
2 parents 0f5d3ba + b7d82e8 commit 2e5cc25

21 files changed

Lines changed: 474 additions & 150 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ Thumbs.db
5454
.env
5555
.env.local
5656

57+
# Local project config (not for VCS)
58+
.trello.json
59+
5760
# Temporary files
5861
tmp/
5962
temp/

BUGS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ In reality, the index may be completely stale — all 232 files could be unindex
155155

156156
## BUG-003: Top-level Go functions with no AST relations are missing from the vector index
157157

158-
**Status:** Open
158+
**Status:** ✅ Fixed (2026-03-10, PR #40)
159159
**Date confirmed:** 2026-03-09
160160
**Affected component:** Go parser / indexer (`pkg/indexer`, `internal/parser`)
161161
**Severity:** Medium — `rag_list_package_exports` and `rag_search` silently omit exported constructor/loader functions

SUGGESTIONS.md

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,16 @@
11
# Suggestions
22

3-
## Incremental indexing resets status to "starting"
3+
## Incremental indexing resets status to "starting" — ⚠️ Partially addressed
44

5-
Când se re-indexează incremental un singur fișier, `StartIndexingAsync` suprascrie statusul la `state: "starting"` cu totul de la zero, ștergând informația că 99% din index e deja acolo și funcțional. AI-ul vede `"starting"` + `"processed": 0` și crede că nu are date.
5+
When a single file is re-indexed incrementally, `StartIndexingAsync` previously
6+
overwrote the status file with a brand-new object, discarding `Languages` data.
67

7-
Fix-ul corect ar fi: la indexare incrementală, nu reseta starea la `"starting"` — folosește ceva gen `"updating"` sau păstrează `"completed"` cu un sub-status. Dar asta e un issue separat, nu din PR review-ul curent.
8+
**Current state (PR #40):**
9+
- The `State` field is now hidden from external JSON output (`json:"-"`), so AI
10+
consumers no longer see `"starting"` / `"completed"` strings.
11+
- The `Languages` map is now preserved during incremental re-indexing (not wiped).
12+
13+
**Remaining open item:** during incremental re-indexing, `processed` counters reset
14+
to whatever the incremental run reports. The overall `Languages` snapshot from the
15+
last full indexing run is kept, but live progress during the incremental pass may
16+
temporarily show lower counts.

cmd/rag-code-mcp/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import (
1717
)
1818

1919
var (
20-
Version = "2.1.65"
20+
Version = "2.1.67"
2121
Commit = "none"
2222
Date = "24.10.2025"
2323
)

internal/daemon/run.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
_ "github.com/doITmagic/rag-code-mcp/pkg/parser/docs"
2626
_ "github.com/doITmagic/rag-code-mcp/pkg/parser/go"
2727
_ "github.com/doITmagic/rag-code-mcp/pkg/parser/html"
28+
_ "github.com/doITmagic/rag-code-mcp/pkg/parser/javascript"
2829
_ "github.com/doITmagic/rag-code-mcp/pkg/parser/php"
2930
_ "github.com/doITmagic/rag-code-mcp/pkg/parser/python"
3031
"github.com/doITmagic/rag-code-mcp/pkg/storage"

internal/service/engine/engine.go

Lines changed: 92 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -802,8 +802,8 @@ func (e *Engine) popPendingIndex(workspaceID string) (files []string, overflow b
802802
func (e *Engine) tryStartPendingIndex(root, workspaceID string) {
803803
files, overflow := e.popPendingIndex(workspaceID)
804804
if overflow {
805-
logger.Instance.Info("[IDX] ♻️ Pending changes exceeded limit for ws=%s — triggering full scan", filepath.Base(root))
806-
e.StartIndexingAsync(root, workspaceID, nil, false)
805+
logger.Instance.Info("[IDX] ♻️ Pending changes exceeded limit for ws=%s — triggering full re-index", filepath.Base(root))
806+
e.StartIndexingAsync(root, workspaceID, nil, true)
807807
return
808808
}
809809
if len(files) == 0 {
@@ -817,6 +817,11 @@ func (e *Engine) tryStartPendingIndex(root, workspaceID string) {
817817
// If recreate=true and a job is already running, the recreate is queued and
818818
// will start immediately after the current job finishes.
819819
func (e *Engine) StartIndexingAsync(root, id string, changedFiles []string, recreate bool) {
820+
if watch.IsInvalidRoot(root) {
821+
logger.Instance.Error("[IDX] ⛔ Refusing to index invalid/dangerous root: %s", root)
822+
return
823+
}
824+
820825
if _, loaded := e.indexingJobs.LoadOrStore(id, time.Now()); loaded {
821826
// A job is already running. If recreate=true, queue it so it fires
822827
// after the current job finishes (via tryStartPendingIndex/defer).
@@ -838,7 +843,17 @@ func (e *Engine) StartIndexingAsync(root, id string, changedFiles []string, recr
838843
logger.Instance.Warn("[IDX] ⚠️ %d workspaces indexing simultaneously — Ollama requests will serialize implicitly (ws=%s)", activeCount, filepath.Base(root))
839844
}
840845

841-
indexer.SaveIndexStatus(root, &indexer.IndexStatus{StartedAt: time.Now().UTC().Format(time.RFC3339)})
846+
// Preserve existing Languages data on restart so incremental indexing
847+
// doesn't show processed=0 to AI consumers. Reset only lifecycle fields.
848+
s := indexer.LoadIndexStatus(root)
849+
if s == nil {
850+
s = &indexer.IndexStatus{}
851+
}
852+
s.StartedAt = time.Now().UTC().Format(time.RFC3339)
853+
s.EndedAt = ""
854+
s.Elapsed = ""
855+
s.Error = ""
856+
indexer.SaveIndexStatus(root, s)
842857

843858
go func() {
844859
defer func() {
@@ -858,33 +873,33 @@ func (e *Engine) StartIndexingAsync(root, id string, changedFiles []string, recr
858873
err = e.IndexWorkspace(ctx, root, recreate)
859874
}
860875

876+
finalizeIndexStatus(root, err)
861877
if err != nil {
862878
logger.Instance.Error("[IDX] ws=%s Background indexing failed: %v", filepath.Base(root), err)
863-
s := indexer.LoadIndexStatus(root)
864-
if s == nil {
865-
s = &indexer.IndexStatus{}
866-
}
867-
s.Error = err.Error()
868-
s.EndedAt = time.Now().UTC().Format(time.RFC3339)
869-
if started, pErr := time.Parse(time.RFC3339, s.StartedAt); pErr == nil {
870-
s.Elapsed = time.Since(started).Round(time.Second).String()
871-
}
872-
indexer.SaveIndexStatus(root, s)
873879
} else {
874880
logger.Instance.Info("[IDX] ✅ ws=%s Background indexing completed", filepath.Base(root))
875-
s := indexer.LoadIndexStatus(root)
876-
if s == nil {
877-
s = &indexer.IndexStatus{}
878-
}
879-
s.EndedAt = time.Now().UTC().Format(time.RFC3339)
880-
if started, pErr := time.Parse(time.RFC3339, s.StartedAt); pErr == nil {
881-
s.Elapsed = time.Since(started).Round(time.Second).String()
882-
}
883-
indexer.SaveIndexStatus(root, s)
884881
}
885882
}()
886883
}
887884

885+
// finalizeIndexStatus loads the current status, stamps EndedAt/Elapsed/Error,
886+
// and saves it atomically. Centralises the logic that was duplicated in the
887+
// success and error branches of StartIndexingAsync.
888+
func finalizeIndexStatus(root string, indexErr error) {
889+
s := indexer.LoadIndexStatus(root)
890+
if s == nil {
891+
s = &indexer.IndexStatus{}
892+
}
893+
s.EndedAt = time.Now().UTC().Format(time.RFC3339)
894+
if started, pErr := time.Parse(time.RFC3339, s.StartedAt); pErr == nil {
895+
s.Elapsed = time.Since(started).Round(time.Second).String()
896+
}
897+
if indexErr != nil {
898+
s.Error = indexErr.Error()
899+
}
900+
indexer.SaveIndexStatus(root, s)
901+
}
902+
888903
// IndexFiles indexes specific files in a workspace.
889904
func (e *Engine) IndexFiles(ctx context.Context, root string, files []string) error {
890905
wctx, err := e.DetectContext(ctx, root)
@@ -953,27 +968,53 @@ func (e *Engine) IndexWorkspace(ctx context.Context, path string, recreate bool)
953968
fileCounts := e.indexer.CountAllFiles(wctx.Root, excludePatterns)
954969
logger.Instance.Info("[IDX] ws=%s file counts: %v", wsName, fileCounts)
955970

956-
// Pre-populate index_status.json with the real disk totals so that
957-
// even languages with 0 changed files still show correct on_disk counts.
958-
{
959-
s := indexer.LoadIndexStatus(wctx.Root)
960-
if s == nil {
961-
s = &indexer.IndexStatus{StartedAt: time.Now().UTC().Format(time.RFC3339)}
962-
}
963-
if s.Languages == nil {
964-
s.Languages = make(map[string]indexer.LangStatus)
965-
}
966-
for _, lang := range languages {
967-
s.Languages[lang] = indexer.LangStatus{OnDisk: fileCounts[lang]}
968-
}
969-
indexer.SaveIndexStatus(wctx.Root, s)
971+
// Sort languages by file count descending so the dominant language is indexed
972+
// first and AI search works immediately for the most relevant code.
973+
sort.Slice(languages, func(i, j int) bool {
974+
return fileCounts[languages[i]] > fileCounts[languages[j]]
975+
})
976+
logger.Instance.Info("[IDX] ws=%s indexing order: %v", wsName, languages)
977+
978+
// Load or create a shared in-memory IndexStatus for the entire indexing run.
979+
// This avoids calling LoadIndexStatus (JSON read + parse) on every Progress tick.
980+
// The single *IndexStatus is updated in-place; only SaveIndexStatus (atomic write)
981+
// hits the disk, and only every 10 files.
982+
s := indexer.LoadIndexStatus(wctx.Root)
983+
if s == nil {
984+
s = &indexer.IndexStatus{StartedAt: time.Now().UTC().Format(time.RFC3339)}
985+
}
986+
if s.Languages == nil {
987+
s.Languages = make(map[string]indexer.LangStatus)
970988
}
989+
// Pre-populate real on_disk counts so languages with 0 changed files still appear.
990+
for _, l := range languages {
991+
entry := s.Languages[l]
992+
entry.OnDisk = fileCounts[l]
993+
s.Languages[l] = entry
994+
}
995+
indexer.SaveIndexStatus(wctx.Root, s)
971996

972997
var indexErrors []string
973998
for _, lang := range languages {
974999
diskTotal := fileCounts[lang]
9751000
collection := wctx.CollectionName(lang)
9761001
logger.Instance.Info("[IDX] ws=%s lang=%s ▶ starting (on_disk=%d)", wsName, lang, diskTotal)
1002+
1003+
// Capture the already-processed count before this run starts.
1004+
// For incremental runs (only changed files), we accumulate on top of
1005+
// whatever was already indexed in Qdrant. For a full re-index
1006+
// (recreate=true, or all files changed), we reset to 0.
1007+
// baseProcessed is captured once per language, before the Progress
1008+
// callback fires, so it's safe to close over it.
1009+
baseProcessed := s.Languages[lang].Processed
1010+
if recreate {
1011+
baseProcessed = 0
1012+
}
1013+
// firstTick is used to detect on the first Progress callback whether
1014+
// this is a full re-index (totalFiles >= diskTotal) so we can reset
1015+
// baseProcessed to 0 and avoid double-counting.
1016+
firstTick := true
1017+
9771018
err := e.indexer.IndexWorkspace(ctx, wctx.Root, collection, indexer.Options{
9781019
Language: lang,
9791020
WorkspaceName: wsName,
@@ -984,18 +1025,23 @@ func (e *Engine) IndexWorkspace(ctx context.Context, path string, recreate bool)
9841025
if doneFiles%10 != 0 && doneFiles != totalFiles {
9851026
return
9861027
}
987-
if s := indexer.LoadIndexStatus(wctx.Root); s != nil {
988-
989-
if s.Languages == nil {
990-
s.Languages = make(map[string]indexer.LangStatus)
1028+
// On the first tick, decide if this is a full re-index.
1029+
// If totalFiles covers all on-disk files, reset base to 0
1030+
// so we don't double-count the existing Processed value.
1031+
if firstTick {
1032+
firstTick = false
1033+
if diskTotal > 0 && totalFiles >= diskTotal {
1034+
baseProcessed = 0
9911035
}
992-
ls := s.Languages[lang]
993-
ls.OnDisk = diskTotal // real total files on disk for this language
994-
ls.Changed = totalFiles // files that needed re-indexing (changedFiles)
995-
ls.Processed = doneFiles
996-
s.Languages[lang] = ls
997-
indexer.SaveIndexStatus(wctx.Root, s)
9981036
}
1037+
ls := s.Languages[lang]
1038+
ls.OnDisk = diskTotal // real total files on disk
1039+
ls.Changed = totalFiles // files that needed re-indexing this run
1040+
// Cumulative total: for incremental runs add to the existing
1041+
// DB count; for full re-indexes (base=0) start from scratch.
1042+
ls.Processed = baseProcessed + doneFiles
1043+
s.Languages[lang] = ls
1044+
indexer.SaveIndexStatus(wctx.Root, s)
9991045
},
10001046
})
10011047
if err != nil {

internal/service/engine/engine_fallback_search_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,7 @@ func ValidateEmail(email string) bool {
9494
eng := NewEngine(idxSvc, searchSvc, "", &config.Config{})
9595
eng.SetResolver(resolver.New(resolver.Dependencies{Detector: &mockDirDetector{root: root}}))
9696

97-
t.Cleanup(func() {
9897

99-
})
10098

10199
return root, eng
102100
}

internal/service/tools/evaluate_ragcode.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -126,17 +126,11 @@ func (t *EvaluateRagCodeTool) Execute(ctx context.Context, args map[string]inter
126126
}
127127
}
128128

129-
130-
131129
response := ToolResponse{
132130
Status: "success",
133131
Message: b.String(),
134132
Data: data,
135-
Context: ContextMetadata{
136-
WorkspaceRoot: workspaceRoot,
137-
DetectionSource: source,
138-
IndexingStatus: nil,
139-
},
133+
Context: ContextFromWorkspaceWithStatus(wctx, t.engine),
140134
}
141135

142136
return response.JSON()

internal/service/tools/list_package_exports.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,10 @@ func (t *ListPackageExportsTool) Execute(ctx context.Context, args map[string]in
255255
Message: "Found package exports\n\n" + response.String(),
256256
Data: exports,
257257
Context: ContextMetadata{
258-
WorkspaceRoot: wctx.Root,
259-
DetectionSource: wctx.DetectionSource,
260-
Telemetry: telemetry.CalculateSavings(baselineBytes, actualBytes),
258+
WorkspaceRoot: wctx.Root,
259+
DetectionSource: wctx.DetectionSource,
260+
Telemetry: telemetry.CalculateSavings(baselineBytes, actualBytes),
261+
IndexingStatus: t.engine.GetIndexStatus(wctx.Root),
261262
},
262263
}
263264

internal/service/tools/read_file_context.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -311,11 +311,7 @@ func (t *ReadFileContextTool) buildResponse(wctx *engine.WorkspaceContext, res C
311311
Message: fmt.Sprintf("Extracted %s context for lines %d-%d from %s", res.ContextType, res.StartLine, res.EndLine, res.FilePath),
312312
}
313313
if wctx != nil {
314-
resp.Context = ContextMetadata{
315-
WorkspaceRoot: wctx.Root,
316-
DetectionSource: wctx.DetectionSource,
317-
IndexingStatus: nil,
318-
}
314+
resp.Context = ContextFromWorkspaceWithStatus(wctx, t.engine)
319315
}
320316

321317
baselineBytes := int64(0)

0 commit comments

Comments
 (0)