@@ -3,6 +3,8 @@ import path from 'path';
33import { CODEBASE_CONTEXT_DIRNAME , KEYWORD_INDEX_FILENAME } from '../constants/codebase-context.js' ;
44import { IndexCorruptedError } from '../errors/index.js' ;
55import type { UsageLocation } from '../types/index.js' ;
6+ import { detectLanguage } from '../utils/language-detection.js' ;
7+ import { findIdentifierOccurrences } from '../utils/tree-sitter.js' ;
68
79interface IndexedChunk {
810 content ?: unknown ;
@@ -59,6 +61,46 @@ function buildPreview(content: string, lineOffset: number): string {
5961 return previewLines . join ( '\n' ) . trim ( ) ;
6062}
6163
64+ function buildPreviewFromFileLines ( lines : string [ ] , line : number ) : string {
65+ const start = Math . max ( 0 , line - 2 ) ;
66+ const end = Math . min ( lines . length , line + 1 ) ;
67+ return lines . slice ( start , end ) . join ( '\n' ) . trim ( ) ;
68+ }
69+
70+ function resolveAbsoluteChunkPath ( rootPath : string , chunk : IndexedChunk ) : string | null {
71+ const resolvedRoot = path . resolve ( rootPath ) ;
72+ const isWithinRoot = ( candidate : string ) : boolean => {
73+ const resolvedCandidate = path . resolve ( candidate ) ;
74+ const relative = path . relative ( resolvedRoot , resolvedCandidate ) ;
75+ return Boolean ( relative ) && ! relative . startsWith ( '..' ) && ! path . isAbsolute ( relative ) ;
76+ } ;
77+
78+ if ( typeof chunk . filePath === 'string' && chunk . filePath . trim ( ) ) {
79+ const raw = chunk . filePath . trim ( ) ;
80+ if ( path . isAbsolute ( raw ) ) {
81+ return isWithinRoot ( raw ) ? raw : null ;
82+ }
83+ const resolved = path . resolve ( resolvedRoot , raw ) ;
84+ return isWithinRoot ( resolved ) ? resolved : null ;
85+ }
86+
87+ if ( typeof chunk . relativePath === 'string' && chunk . relativePath . trim ( ) ) {
88+ const resolved = path . resolve ( resolvedRoot , chunk . relativePath . trim ( ) ) ;
89+ return isWithinRoot ( resolved ) ? resolved : null ;
90+ }
91+
92+ return null ;
93+ }
94+
95+ async function fileExists ( targetPath : string ) : Promise < boolean > {
96+ try {
97+ const stat = await fs . stat ( targetPath ) ;
98+ return stat . isFile ( ) ;
99+ } catch {
100+ return false ;
101+ }
102+ }
103+
62104export async function findSymbolReferences (
63105 rootPath : string ,
64106 symbol : string ,
@@ -110,34 +152,95 @@ export async function findSymbolReferences(
110152 let usageCount = 0 ;
111153
112154 const escapedSymbol = escapeRegex ( normalizedSymbol ) ;
155+ const prefilter = new RegExp ( `\\b${ escapedSymbol } \\b` ) ;
113156 const matcher = new RegExp ( `\\b${ escapedSymbol } \\b` , 'g' ) ;
114157
158+ // Prefilter candidate files from the keyword index. We do not trust chunk contents for
159+ // exact reference counting when Tree-sitter is available; chunks only guide which files to scan.
160+ const chunksByFile = new Map <
161+ string ,
162+ { relPath : string ; absPath : string | null ; chunks : IndexedChunk [ ] }
163+ > ( ) ;
164+
115165 for ( const chunkRaw of chunks ) {
116166 const chunk = chunkRaw as IndexedChunk ;
117- if ( typeof chunk . content !== 'string' ) {
118- continue ;
167+ if ( typeof chunk . content !== 'string' ) continue ;
168+ if ( ! prefilter . test ( chunk . content ) ) continue ;
169+
170+ const relPath = getUsageFile ( rootPath , chunk ) ;
171+ const absPath = resolveAbsoluteChunkPath ( rootPath , chunk ) ;
172+
173+ const entry = chunksByFile . get ( relPath ) ;
174+ if ( entry ) {
175+ entry . chunks . push ( chunk ) ;
176+ // Prefer a real absolute path when available
177+ if ( ! entry . absPath && absPath ) {
178+ entry . absPath = absPath ;
179+ }
180+ } else {
181+ chunksByFile . set ( relPath , { relPath, absPath, chunks : [ chunk ] } ) ;
119182 }
183+ }
120184
121- const chunkContent = chunk . content ;
122- const startLine = typeof chunk . startLine === 'number' ? chunk . startLine : 1 ;
123- matcher . lastIndex = 0 ;
185+ for ( const entry of chunksByFile . values ( ) ) {
186+ const relPath = entry . relPath ;
187+ const absPath = entry . absPath ;
188+
189+ // Preferred: Tree-sitter identifier walk on the real file content.
190+ if ( absPath && ( await fileExists ( absPath ) ) ) {
191+ try {
192+ const raw = await fs . readFile ( absPath , 'utf-8' ) ;
193+ const content = raw . replace ( / \r \n / g, '\n' ) ;
194+ const language = detectLanguage ( absPath ) ;
195+ const occurrences = await findIdentifierOccurrences ( content , language , normalizedSymbol ) ;
196+
197+ if ( occurrences ) {
198+ usageCount += occurrences . length ;
199+
200+ if ( usages . length < normalizedLimit && occurrences . length > 0 ) {
201+ const lines = content . split ( '\n' ) ;
202+ for ( const occ of occurrences ) {
203+ if ( usages . length >= normalizedLimit ) break ;
204+ usages . push ( {
205+ file : relPath ,
206+ line : occ . line ,
207+ preview : buildPreviewFromFileLines ( lines , occ . line )
208+ } ) ;
209+ }
210+ }
211+
212+ continue ;
213+ }
214+ } catch {
215+ // Fall through to chunk-regex fallback (missing grammar, parse failure, etc.)
216+ }
217+ }
124218
125- let match : RegExpExecArray | null ;
126- while ( ( match = matcher . exec ( chunkContent ) ) !== null ) {
127- usageCount += 1 ;
219+ // Fallback: regex scan inside the matched chunks (legacy behavior).
220+ for ( const chunk of entry . chunks ) {
221+ if ( typeof chunk . content !== 'string' ) continue ;
128222
129- if ( usages . length >= normalizedLimit ) {
130- continue ;
131- }
223+ const chunkContent = chunk . content ;
224+ const startLine = typeof chunk . startLine === 'number' ? chunk . startLine : 1 ;
225+ matcher . lastIndex = 0 ;
132226
133- const prefix = chunkContent . slice ( 0 , match . index ) ;
134- const lineOffset = prefix . split ( '\n' ) . length - 1 ;
227+ let match : RegExpExecArray | null ;
228+ while ( ( match = matcher . exec ( chunkContent ) ) !== null ) {
229+ usageCount += 1 ;
135230
136- usages . push ( {
137- file : getUsageFile ( rootPath , chunk ) ,
138- line : startLine + lineOffset ,
139- preview : buildPreview ( chunkContent , lineOffset )
140- } ) ;
231+ if ( usages . length >= normalizedLimit ) {
232+ continue ;
233+ }
234+
235+ const prefix = chunkContent . slice ( 0 , match . index ) ;
236+ const lineOffset = prefix . split ( '\n' ) . length - 1 ;
237+
238+ usages . push ( {
239+ file : relPath ,
240+ line : startLine + lineOffset ,
241+ preview : buildPreview ( chunkContent , lineOffset )
242+ } ) ;
243+ }
141244 }
142245 }
143246
0 commit comments