44 "fmt"
55 "os"
66 "path/filepath"
7+ "runtime"
8+ "sort"
9+ "sync"
710 "time"
811
912 "github.com/SimplyLiz/CodeMCP/internal/errors"
@@ -26,6 +29,9 @@ type SCIPIndex struct {
2629 // Documents are all indexed documents
2730 Documents []* Document
2831
32+ // DocumentsByPath is an O(1) lookup map from relative path to document
33+ DocumentsByPath map [string ]* Document
34+
2935 // Symbols maps symbol IDs to symbol information
3036 Symbols map [string ]* SymbolInformation
3137
@@ -44,9 +50,6 @@ type SCIPIndex struct {
4450
4551 // IndexedCommit is the git commit the index was built from
4652 IndexedCommit string
47-
48- // raw is the raw protobuf index
49- raw * scippb.Index
5053}
5154
5255// LoadSCIPIndex loads a SCIP index from the specified path
@@ -93,93 +96,185 @@ func LoadSCIPIndex(path string) (*SCIPIndex, error) {
9396 )
9497 }
9598
96- // Convert to internal representation
97- scipIndex := & SCIPIndex {
98- Metadata : convertMetadata (index .Metadata ),
99- Documents : convertDocuments (index .Documents ),
100- Symbols : make (map [string ]* SymbolInformation ),
101- RefIndex : make (map [string ][]* OccurrenceRef ),
102- ConvertedSymbols : make (map [string ]* SCIPSymbol ),
103- ContainerIndex : make (map [string ]string ),
104- LoadedAt : time .Now (),
105- raw : & index ,
106- }
107-
108- // Build symbol map and reference index in a single pass
109- for _ , doc := range scipIndex .Documents {
110- // Index symbols
111- for _ , sym := range doc .Symbols {
112- scipIndex .Symbols [sym .Symbol ] = sym
113- }
99+ // Convert to internal representation using parallel document processing.
100+ nWorkers := runtime .GOMAXPROCS (0 )
101+
102+ // Phase 1: convert documents and build per-doc indexes in parallel.
103+ type docResult struct {
104+ doc * Document
105+ symbols map [string ]* SymbolInformation
106+ refEntries map [string ][]* OccurrenceRef
107+ containerEntries map [string ]string
108+ }
114109
115- // Build inverted reference index for O(1) lookups
116- for _ , occ := range doc .Occurrences {
117- if occ .Symbol != "" {
118- scipIndex .RefIndex [occ .Symbol ] = append (
119- scipIndex .RefIndex [occ .Symbol ],
120- & OccurrenceRef {Doc : doc , Occ : occ },
121- )
110+ results := make ([]docResult , len (index .Documents ))
111+
112+ var wg sync.WaitGroup
113+ sem := make (chan struct {}, nWorkers )
114+
115+ for i , pbDoc := range index .Documents {
116+ wg .Add (1 )
117+ sem <- struct {}{}
118+ go func (i int , pbDoc * scippb.Document ) {
119+ defer wg .Done ()
120+ defer func () { <- sem }()
121+
122+ doc := convertDocument (pbDoc )
123+ r := docResult {
124+ doc : doc ,
125+ symbols : make (map [string ]* SymbolInformation , len (doc .Symbols )),
126+ refEntries : make (map [string ][]* OccurrenceRef ),
127+ containerEntries : make (map [string ]string ),
122128 }
123- }
124129
125- // Build container index for O(1) containment lookup
126- // First collect all definition occurrences with enclosing ranges
127- type defScope struct {
128- symbol string
129- startLine int32
130- endLine int32
131- }
132- var defScopes []defScope
133- for _ , occ := range doc .Occurrences {
134- if occ .SymbolRoles & SymbolRoleDefinition != 0 && len (occ .EnclosingRange ) >= 3 {
135- startLine := occ .EnclosingRange [0 ]
136- var endLine int32
137- if len (occ .EnclosingRange ) >= 4 {
138- endLine = occ .EnclosingRange [2 ]
139- } else {
140- endLine = startLine
130+ // Index symbols
131+ for _ , sym := range doc .Symbols {
132+ r .symbols [sym .Symbol ] = sym
133+ }
134+
135+ // Build inverted reference index for O(1) lookups
136+ for _ , occ := range doc .Occurrences {
137+ if occ .Symbol != "" {
138+ r .refEntries [occ .Symbol ] = append (
139+ r .refEntries [occ .Symbol ],
140+ & OccurrenceRef {Doc : doc , Occ : occ },
141+ )
141142 }
142- defScopes = append (defScopes , defScope {
143- symbol : occ .Symbol ,
144- startLine : startLine ,
145- endLine : endLine ,
146- })
147143 }
148- }
149144
150- // For each occurrence, find its innermost containing scope
151- for _ , occ := range doc .Occurrences {
152- if len (occ .Range ) < 2 {
153- continue
145+ // Build container index.
146+ // Collect definition occurrences that have enclosing ranges.
147+ type defScope struct {
148+ symbol string
149+ startLine int32
150+ endLine int32
154151 }
155- occLine := occ .Range [0 ]
156-
157- // Find the smallest (innermost) scope containing this occurrence
158- var bestScope * defScope
159- var bestSize int32 = - 1
160- for i := range defScopes {
161- ds := & defScopes [i ]
162- if occLine >= ds .startLine && occLine <= ds .endLine {
163- size := ds .endLine - ds .startLine
164- if bestScope == nil || size < bestSize {
165- bestScope = ds
166- bestSize = size
152+ var defScopes []defScope
153+ for _ , occ := range doc .Occurrences {
154+ if occ .SymbolRoles & SymbolRoleDefinition != 0 && len (occ .EnclosingRange ) >= 3 {
155+ startLine := occ .EnclosingRange [0 ]
156+ var endLine int32
157+ if len (occ .EnclosingRange ) >= 4 {
158+ endLine = occ .EnclosingRange [2 ]
159+ } else {
160+ endLine = startLine
167161 }
162+ defScopes = append (defScopes , defScope {
163+ symbol : occ .Symbol ,
164+ startLine : startLine ,
165+ endLine : endLine ,
166+ })
168167 }
169168 }
170169
171- if bestScope != nil {
172- key := fmt .Sprintf ("%s:%d:%d" , doc .RelativePath , occ .Range [0 ], occ .Range [1 ])
173- scipIndex .ContainerIndex [key ] = bestScope .symbol
170+ if len (defScopes ) > 0 {
171+ // Sort by scope size ascending so the first match is the innermost.
172+ sort .Slice (defScopes , func (a , b int ) bool {
173+ return (defScopes [a ].endLine - defScopes [a ].startLine ) <
174+ (defScopes [b ].endLine - defScopes [b ].startLine )
175+ })
176+
177+ for _ , occ := range doc .Occurrences {
178+ if len (occ .Range ) < 2 {
179+ continue
180+ }
181+ occLine := occ .Range [0 ]
182+ for idx := range defScopes {
183+ ds := & defScopes [idx ]
184+ if occLine >= ds .startLine && occLine <= ds .endLine {
185+ key := fmt .Sprintf ("%s:%d:%d" , doc .RelativePath , occ .Range [0 ], occ .Range [1 ])
186+ r .containerEntries [key ] = ds .symbol
187+ break // first match is innermost (sorted by size asc)
188+ }
189+ }
190+ }
174191 }
192+
193+ results [i ] = r
194+ }(i , pbDoc )
195+ }
196+ wg .Wait ()
197+
198+ // Merge per-doc results into the main index (serial, fast map assignment).
199+ // Pre-size maps based on doc count to reduce rehashing.
200+ totalSyms := 0
201+ totalRefs := 0
202+ totalContainer := 0
203+ docs := make ([]* Document , len (results ))
204+ for i , r := range results {
205+ docs [i ] = r .doc
206+ totalSyms += len (r .symbols )
207+ totalRefs += len (r .refEntries )
208+ totalContainer += len (r .containerEntries )
209+ }
210+
211+ scipIndex := & SCIPIndex {
212+ Metadata : convertMetadata (index .Metadata ),
213+ Documents : docs ,
214+ DocumentsByPath : make (map [string ]* Document , len (docs )),
215+ Symbols : make (map [string ]* SymbolInformation , totalSyms ),
216+ RefIndex : make (map [string ][]* OccurrenceRef , totalRefs ),
217+ ConvertedSymbols : make (map [string ]* SCIPSymbol , totalSyms ),
218+ ContainerIndex : make (map [string ]string , totalContainer ),
219+ LoadedAt : time .Now (),
220+ }
221+
222+ for _ , doc := range docs {
223+ scipIndex .DocumentsByPath [doc .RelativePath ] = doc
224+ }
225+ for _ , r := range results {
226+ for k , v := range r .symbols {
227+ scipIndex .Symbols [k ] = v
228+ }
229+ for k , v := range r .refEntries {
230+ scipIndex .RefIndex [k ] = append (scipIndex .RefIndex [k ], v ... )
175231 }
232+ for k , v := range r .containerEntries {
233+ scipIndex .ContainerIndex [k ] = v
234+ }
235+ }
236+
237+ // Phase 2: pre-convert all symbols in parallel.
238+ // RefIndex and Symbols are fully built at this point (read-only from here).
239+ type symResult struct {
240+ id string
241+ sym * SCIPSymbol
242+ }
243+
244+ symIDs := make ([]string , 0 , len (scipIndex .Symbols ))
245+ for id := range scipIndex .Symbols {
246+ symIDs = append (symIDs , id )
247+ }
248+
249+ symCh := make (chan symResult , len (symIDs ))
250+ batchSize := (len (symIDs ) + nWorkers - 1 ) / nWorkers
251+ if batchSize < 1 {
252+ batchSize = 1
176253 }
177254
178- // Pre-convert all symbols to avoid repeated conversion during queries
179- for symbolId , symInfo := range scipIndex .Symbols {
180- if converted , err := convertToSCIPSymbol (symInfo , scipIndex ); err == nil {
181- scipIndex .ConvertedSymbols [symbolId ] = converted
255+ var wg2 sync.WaitGroup
256+ for b := 0 ; b * batchSize < len (symIDs ); b ++ {
257+ start := b * batchSize
258+ end := start + batchSize
259+ if end > len (symIDs ) {
260+ end = len (symIDs )
182261 }
262+ wg2 .Add (1 )
263+ go func (ids []string ) {
264+ defer wg2 .Done ()
265+ for _ , id := range ids {
266+ if converted , err := convertToSCIPSymbol (scipIndex .Symbols [id ], scipIndex ); err == nil {
267+ symCh <- symResult {id : id , sym : converted }
268+ }
269+ }
270+ }(symIDs [start :end ])
271+ }
272+ go func () {
273+ wg2 .Wait ()
274+ close (symCh )
275+ }()
276+ for r := range symCh {
277+ scipIndex .ConvertedSymbols [r .id ] = r .sym
183278 }
184279
185280 // Extract indexed commit from metadata if available
@@ -203,12 +298,7 @@ func (i *SCIPIndex) IsStale(headCommit string) bool {
203298
204299// GetDocument retrieves a document by its relative path
205300func (i * SCIPIndex ) GetDocument (relativePath string ) * Document {
206- for _ , doc := range i .Documents {
207- if doc .RelativePath == relativePath {
208- return doc
209- }
210- }
211- return nil
301+ return i .DocumentsByPath [relativePath ]
212302}
213303
214304// GetSymbol retrieves symbol information by ID
0 commit comments