@@ -3,6 +3,8 @@ import path from 'path';
33import { CODEBASE_CONTEXT_DIRNAME , KEYWORD_INDEX_FILENAME } from '../constants/codebase-context.js' ;
44import { IndexCorruptedError } from '../errors/index.js' ;
55import type { UsageLocation } from '../types/index.js' ;
6+ import { detectLanguage } from '../utils/language-detection.js' ;
7+ import { findIdentifierOccurrences } from '../utils/tree-sitter.js' ;
68
79interface IndexedChunk {
810 content ?: unknown ;
@@ -59,6 +61,37 @@ function buildPreview(content: string, lineOffset: number): string {
5961 return previewLines . join ( '\n' ) . trim ( ) ;
6062}
6163
64+ function buildPreviewFromFileLines ( lines : string [ ] , line : number ) : string {
65+ const start = Math . max ( 0 , line - 2 ) ;
66+ const end = Math . min ( lines . length , line + 1 ) ;
67+ return lines . slice ( start , end ) . join ( '\n' ) . trim ( ) ;
68+ }
69+
70+ function resolveAbsoluteChunkPath ( rootPath : string , chunk : IndexedChunk ) : string | null {
71+ if ( typeof chunk . filePath === 'string' && chunk . filePath . trim ( ) ) {
72+ const raw = chunk . filePath . trim ( ) ;
73+ if ( path . isAbsolute ( raw ) ) {
74+ return raw ;
75+ }
76+ return path . resolve ( rootPath , raw ) ;
77+ }
78+
79+ if ( typeof chunk . relativePath === 'string' && chunk . relativePath . trim ( ) ) {
80+ return path . resolve ( rootPath , chunk . relativePath . trim ( ) ) ;
81+ }
82+
83+ return null ;
84+ }
85+
86+ async function fileExists ( targetPath : string ) : Promise < boolean > {
87+ try {
88+ const stat = await fs . stat ( targetPath ) ;
89+ return stat . isFile ( ) ;
90+ } catch {
91+ return false ;
92+ }
93+ }
94+
6295export async function findSymbolReferences (
6396 rootPath : string ,
6497 symbol : string ,
@@ -110,34 +143,95 @@ export async function findSymbolReferences(
110143 let usageCount = 0 ;
111144
112145 const escapedSymbol = escapeRegex ( normalizedSymbol ) ;
146+ const prefilter = new RegExp ( `\\b${ escapedSymbol } \\b` ) ;
113147 const matcher = new RegExp ( `\\b${ escapedSymbol } \\b` , 'g' ) ;
114148
149+ // Prefilter candidate files from the keyword index. We do not trust chunk contents for
150+ // exact reference counting when Tree-sitter is available; chunks only guide which files to scan.
151+ const chunksByFile = new Map <
152+ string ,
153+ { relPath : string ; absPath : string | null ; chunks : IndexedChunk [ ] }
154+ > ( ) ;
155+
115156 for ( const chunkRaw of chunks ) {
116157 const chunk = chunkRaw as IndexedChunk ;
117- if ( typeof chunk . content !== 'string' ) {
118- continue ;
158+ if ( typeof chunk . content !== 'string' ) continue ;
159+ if ( ! prefilter . test ( chunk . content ) ) continue ;
160+
161+ const relPath = getUsageFile ( rootPath , chunk ) ;
162+ const absPath = resolveAbsoluteChunkPath ( rootPath , chunk ) ;
163+
164+ const entry = chunksByFile . get ( relPath ) ;
165+ if ( entry ) {
166+ entry . chunks . push ( chunk ) ;
167+ // Prefer a real absolute path when available
168+ if ( ! entry . absPath && absPath ) {
169+ entry . absPath = absPath ;
170+ }
171+ } else {
172+ chunksByFile . set ( relPath , { relPath, absPath, chunks : [ chunk ] } ) ;
173+ }
174+ }
175+
176+ for ( const entry of chunksByFile . values ( ) ) {
177+ const relPath = entry . relPath ;
178+ const absPath = entry . absPath ;
179+
180+ // Preferred: Tree-sitter identifier walk on the real file content.
181+ if ( absPath && ( await fileExists ( absPath ) ) ) {
182+ try {
183+ const raw = await fs . readFile ( absPath , 'utf-8' ) ;
184+ const content = raw . replace ( / \r \n / g, '\n' ) ;
185+ const language = detectLanguage ( absPath ) ;
186+ const occurrences = await findIdentifierOccurrences ( content , language , normalizedSymbol ) ;
187+
188+ if ( occurrences ) {
189+ usageCount += occurrences . length ;
190+
191+ if ( usages . length < normalizedLimit && occurrences . length > 0 ) {
192+ const lines = content . split ( '\n' ) ;
193+ for ( const occ of occurrences ) {
194+ if ( usages . length >= normalizedLimit ) break ;
195+ usages . push ( {
196+ file : relPath ,
197+ line : occ . line ,
198+ preview : buildPreviewFromFileLines ( lines , occ . line )
199+ } ) ;
200+ }
201+ }
202+
203+ continue ;
204+ }
205+ } catch {
206+ // Fall through to chunk-regex fallback (missing grammar, parse failure, etc.)
207+ }
119208 }
120209
121- const chunkContent = chunk . content ;
122- const startLine = typeof chunk . startLine === 'number' ? chunk . startLine : 1 ;
123- matcher . lastIndex = 0 ;
210+ // Fallback: regex scan inside the matched chunks (legacy behavior).
211+ for ( const chunk of entry . chunks ) {
212+ if ( typeof chunk . content !== 'string' ) continue ;
124213
125- let match : RegExpExecArray | null ;
126- while ( ( match = matcher . exec ( chunkContent ) ) !== null ) {
127- usageCount += 1 ;
214+ const chunkContent = chunk . content ;
215+ const startLine = typeof chunk . startLine === 'number' ? chunk . startLine : 1 ;
216+ matcher . lastIndex = 0 ;
128217
129- if ( usages . length >= normalizedLimit ) {
130- continue ;
131- }
218+ let match : RegExpExecArray | null ;
219+ while ( ( match = matcher . exec ( chunkContent ) ) !== null ) {
220+ usageCount += 1 ;
221+
222+ if ( usages . length >= normalizedLimit ) {
223+ continue ;
224+ }
132225
133- const prefix = chunkContent . slice ( 0 , match . index ) ;
134- const lineOffset = prefix . split ( '\n' ) . length - 1 ;
226+ const prefix = chunkContent . slice ( 0 , match . index ) ;
227+ const lineOffset = prefix . split ( '\n' ) . length - 1 ;
135228
136- usages . push ( {
137- file : getUsageFile ( rootPath , chunk ) ,
138- line : startLine + lineOffset ,
139- preview : buildPreview ( chunkContent , lineOffset )
140- } ) ;
229+ usages . push ( {
230+ file : relPath ,
231+ line : startLine + lineOffset ,
232+ preview : buildPreview ( chunkContent , lineOffset )
233+ } ) ;
234+ }
141235 }
142236 }
143237
0 commit comments