@@ -83,17 +83,49 @@ function handleModuleDef(node: TreeSitterNode, ctx: ExtractorOutput): string | n
8383 return nameNode . text ;
8484}
8585
86+ function qualifyName ( base : string , currentModule : string | null ) : string {
87+ // For qualified names (`function Base.show ... end` inside `module Foo`,
88+ // or short-form `Foo.bar(x, y) = x + y` inside `module Outer`), the LHS
89+ // is a `scoped_identifier` already containing the qualifier — skip the
90+ // module prefix to avoid producing `Foo.Base.show` / `Outer.Foo.bar`.
91+ if ( currentModule && ! base . includes ( '.' ) ) return `${ currentModule } .${ base } ` ;
92+ return base ;
93+ }
94+
95+ /**
96+ * Extract the call_expression from a function/macro definition's signature.
97+ *
98+ * tree-sitter-julia wraps the signature in a `signature` node whose direct
99+ * children include the `call_expression` for the function name and parameters.
100+ * `findChild` only inspects direct children, so we unwrap one level explicitly.
101+ * Without this step, `findChild(node, 'call_expression')` on a
102+ * `function_definition` would match the *body's* first call_expression
103+ * (e.g. `println(...)` inside the body) instead of the signature.
104+ *
105+ * Grammar assumption: every `function_definition` / `macro_definition` emits a
106+ * `signature` child in the current tree-sitter-julia grammar. The fallback to
107+ * `findChild(node, 'call_expression')` exists only as a defensive measure for
108+ * grammar drift — if it ever fires on a real definition, that fallback would
109+ * silently match the first body call_expression and mis-record the function
110+ * name. Callers must therefore treat a missing `signature` as a parser/grammar
111+ * mismatch worth investigating, not as a routine code path.
112+ */
113+ function signatureCall ( node : TreeSitterNode ) : TreeSitterNode | null {
114+ const sig = findChild ( node , 'signature' ) ;
115+ if ( sig ) return findChild ( sig , 'call_expression' ) ;
116+ return findChild ( node , 'call_expression' ) ;
117+ }
118+
86119function handleFunctionDef (
87120 node : TreeSitterNode ,
88121 ctx : ExtractorOutput ,
89122 currentModule : string | null ,
90123) : void {
91- // function_definition may have a call_expression child as the signature
92- const callSig = findChild ( node , 'call_expression' ) ;
124+ const callSig = signatureCall ( node ) ;
93125 if ( callSig ) {
94126 const funcNameNode = callSig . child ( 0 ) ;
95127 if ( funcNameNode ) {
96- const name = currentModule ? ` ${ currentModule } . ${ funcNameNode . text } ` : funcNameNode . text ;
128+ const name = qualifyName ( funcNameNode . text , currentModule ) ;
97129 const params = extractJuliaParams ( callSig ) ;
98130 ctx . definitions . push ( {
99131 name,
@@ -110,9 +142,8 @@ function handleFunctionDef(
110142 const nameNode = node . childForFieldName ( 'name' ) || findChild ( node , 'identifier' ) ;
111143 if ( ! nameNode ) return ;
112144
113- const name = currentModule ? `${ currentModule } .${ nameNode . text } ` : nameNode . text ;
114145 ctx . definitions . push ( {
115- name,
146+ name : qualifyName ( nameNode . text , currentModule ) ,
116147 kind : 'function' ,
117148 line : node . startPosition . row + 1 ,
118149 endLine : nodeEndLine ( node ) ,
@@ -133,11 +164,10 @@ function handleAssignment(
133164 const funcNameNode = lhs . child ( 0 ) ;
134165 if ( ! funcNameNode ) return ;
135166
136- const name = currentModule ? `${ currentModule } .${ funcNameNode . text } ` : funcNameNode . text ;
137167 const params = extractJuliaParams ( lhs ) ;
138168
139169 ctx . definitions . push ( {
140- name,
170+ name : qualifyName ( funcNameNode . text , currentModule ) ,
141171 kind : 'function' ,
142172 line : node . startPosition . row + 1 ,
143173 endLine : nodeEndLine ( node ) ,
@@ -146,16 +176,74 @@ function handleAssignment(
146176 }
147177}
148178
179+ /**
180+ * Locate the base-name identifier within a `type_head` node.
181+ *
182+ * Handles plain identifiers, `Name <: Super` binary expressions, and
183+ * parameterized forms like `Name{T}` / `Name{T} <: Super{T,1}` by recursing
184+ * into wrapper kinds the Julia grammar actually emits for type heads
185+ * (binary expressions, parametrized type expressions, parameterized
186+ * identifiers). Returns `null` when no identifier can be located — callers
187+ * should skip emitting a definition in that case.
188+ *
189+ * Note: `type_parameter_list` / `type_argument_list` are intentionally
190+ * excluded — Julia's grammar uses `curly_expression` for `{T}` constructs,
191+ * not those node kinds. Including them would risk recursing into a
192+ * type-parameter list and returning a type variable (e.g. `T`) instead of
193+ * the struct name if `findBaseName` were ever called on a node lacking a
194+ * direct `identifier` child.
195+ */
196+ const TYPE_HEAD_WRAPPERS : ReadonlySet < string > = new Set ( [
197+ 'binary_expression' ,
198+ 'parametrized_type_expression' ,
199+ 'parameterized_identifier' ,
200+ ] ) ;
201+
202+ function findBaseName ( node : TreeSitterNode ) : TreeSitterNode | null {
203+ if ( node . type === 'identifier' ) return node ;
204+ const direct = findChild ( node , 'identifier' ) ;
205+ if ( direct ) return direct ;
206+ for ( let i = 0 ; i < node . childCount ; i ++ ) {
207+ const child = node . child ( i ) ;
208+ if ( ! child ) continue ;
209+ if ( TYPE_HEAD_WRAPPERS . has ( child . type ) ) {
210+ const found = findBaseName ( child ) ;
211+ if ( found ) return found ;
212+ }
213+ }
214+ return null ;
215+ }
216+
149217function handleStructDef ( node : TreeSitterNode , ctx : ExtractorOutput ) : void {
150218 // struct_definition: struct type_head fields... end
219+ // type_head wraps the name and optional supertype. The name may be a
220+ // bare `identifier`, a parameterized form (e.g. `Vec{T}`), or either
221+ // of those nested inside a `binary_expression` (`Name <: Super`).
151222 const typeHead = findChild ( node , 'type_head' ) ;
152- const nameNode = typeHead
153- ? ( findChild ( typeHead , 'identifier' ) ?? typeHead )
154- : findChild ( node , 'identifier' ) ;
223+ if ( ! typeHead ) return ;
224+
225+ let nameNode : TreeSitterNode | null ;
226+ let supertypeNode : TreeSitterNode | null = null ;
227+
228+ const binary = findChild ( typeHead , 'binary_expression' ) ;
229+ if ( binary ) {
230+ // Walk into each side of the binary expression to find the base-name
231+ // identifier — handles parameterized forms like `Vec{T} <: AbstractArray{T,1}`.
232+ const sides : TreeSitterNode [ ] = [ ] ;
233+ for ( let i = 0 ; i < binary . childCount ; i ++ ) {
234+ const c = binary . child ( i ) ;
235+ if ( c && c . type !== 'operator' ) sides . push ( c ) ;
236+ }
237+ nameNode = sides [ 0 ] ? findBaseName ( sides [ 0 ] ) : null ;
238+ supertypeNode = sides [ 1 ] ? findBaseName ( sides [ 1 ] ) : null ;
239+ } else {
240+ nameNode = findBaseName ( typeHead ) ;
241+ }
242+
155243 if ( ! nameNode ) return ;
244+ const structName = nameNode . text ;
156245
157246 const children : SubDeclaration [ ] = [ ] ;
158- // Fields are typed_expression children of struct_definition
159247 for ( let i = 0 ; i < node . childCount ; i ++ ) {
160248 const child = node . child ( i ) ;
161249 if ( ! child ) continue ;
@@ -168,33 +256,24 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
168256 line : child . startPosition . row + 1 ,
169257 } ) ;
170258 }
171- }
172- // Plain identifier fields (no type annotation)
173- if ( child . type === 'identifier' && child !== nameNode && typeHead && child !== typeHead ) {
259+ } else if ( child . type === 'identifier' ) {
260+ // Plain identifier fields (no type annotation) appear as direct
261+ // identifier children of struct_definition. The type_head is a
262+ // separate node so there is nothing to filter out here.
174263 children . push ( { name : child . text , kind : 'property' , line : child . startPosition . row + 1 } ) ;
175264 }
176265 }
177266
178- // Check for supertype in type_head (Point <: AbstractPoint)
179- if ( typeHead ) {
180- const subtypeExpr = findChild ( typeHead , 'subtype_expression' ) ;
181- if ( subtypeExpr ) {
182- // Find the supertype identifier
183- for ( let i = 0 ; i < subtypeExpr . childCount ; i ++ ) {
184- const child = subtypeExpr . child ( i ) ;
185- if ( child ?. type === 'identifier' && i > 0 ) {
186- ctx . classes . push ( {
187- name : nameNode . text ,
188- extends : child . text ,
189- line : node . startPosition . row + 1 ,
190- } ) ;
191- }
192- }
193- }
267+ if ( supertypeNode ) {
268+ ctx . classes . push ( {
269+ name : structName ,
270+ extends : supertypeNode . text ,
271+ line : node . startPosition . row + 1 ,
272+ } ) ;
194273 }
195274
196275 ctx . definitions . push ( {
197- name : nameNode . text ,
276+ name : structName ,
198277 kind : 'struct' ,
199278 line : node . startPosition . row + 1 ,
200279 endLine : nodeEndLine ( node ) ,
@@ -203,7 +282,14 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
203282}
204283
205284function handleAbstractDef ( node : TreeSitterNode , ctx : ExtractorOutput ) : void {
206- const nameNode = node . childForFieldName ( 'name' ) || findChild ( node , 'identifier' ) ;
285+ // abstract_definition: `abstract type` type_head `end`
286+ // The identifier is nested inside `type_head` — possibly wrapped in a
287+ // `Name <: Super` binary_expression or a `Name{T,...}` parameterized form.
288+ // Mirror handleStructDef and skip rather than emit a garbled name when no
289+ // base identifier can be located.
290+ const typeHead = findChild ( node , 'type_head' ) ;
291+ if ( ! typeHead ) return ;
292+ const nameNode = findBaseName ( typeHead ) ;
207293 if ( ! nameNode ) return ;
208294
209295 ctx . definitions . push ( {
@@ -219,10 +305,17 @@ function handleMacroDef(
219305 ctx : ExtractorOutput ,
220306 currentModule : string | null ,
221307) : void {
222- const nameNode = node . childForFieldName ( 'name' ) || findChild ( node , 'identifier' ) ;
308+ // macro_definition: `macro` signature/call_expression body `end`.
309+ // The name lives in the same shape as a function signature — unwrap via
310+ // signatureCall so we don't pick up an identifier from the body (e.g.
311+ // `macro mymac(x) x end` would otherwise resolve to `@x`).
312+ const callSig = signatureCall ( node ) ;
313+ const nameNode =
314+ callSig ?. child ( 0 ) ?? node . childForFieldName ( 'name' ) ?? findChild ( node , 'identifier' ) ;
223315 if ( ! nameNode ) return ;
224316
225- const name = currentModule ? `${ currentModule } .@${ nameNode . text } ` : `@${ nameNode . text } ` ;
317+ const base = nameNode . text ;
318+ const name = currentModule ? `${ currentModule } .@${ base } ` : `@${ base } ` ;
226319 ctx . definitions . push ( {
227320 name,
228321 kind : 'function' ,
@@ -232,19 +325,40 @@ function handleMacroDef(
232325}
233326
234327function handleImport ( node : TreeSitterNode , ctx : ExtractorOutput ) : void {
328+ // tree-sitter-julia shapes:
329+ // `using LinearAlgebra` → using_statement [ using, identifier ]
330+ // `import Foo.Bar` → import_statement [ import, scoped_identifier ]
331+ // `import Base: show` → import_statement [ import, selected_import[Base, show] ]
332+ // `import Foo.Bar: baz` → import_statement [ import, selected_import[scoped_identifier, baz] ]
235333 const names : string [ ] = [ ] ;
236334 let source = '' ;
237335
238336 for ( let i = 0 ; i < node . childCount ; i ++ ) {
239337 const child = node . child ( i ) ;
240338 if ( ! child ) continue ;
241- if (
242- child . type === 'identifier' ||
243- child . type === 'scoped_identifier' ||
244- child . type === 'selected_import'
245- ) {
246- if ( ! source ) source = child . text ;
247- names . push ( child . text . split ( '.' ) . pop ( ) || child . text ) ;
339+ if ( child . type === 'identifier' || child . type === 'scoped_identifier' ) {
340+ const txt = child . text ;
341+ if ( ! source ) source = txt ;
342+ names . push ( txt . split ( '.' ) . pop ( ) || txt ) ;
343+ } else if ( child . type === 'selected_import' ) {
344+ // First identifier-bearing node is the source module; the rest are
345+ // imported names. The module may itself be a `scoped_identifier`
346+ // (e.g. `import Foo.Bar: baz`) — handle it alongside bare
347+ // `identifier` and use the trailing segment as the display name,
348+ // mirroring the outer loop.
349+ let first = true ;
350+ for ( let j = 0 ; j < child . childCount ; j ++ ) {
351+ const part = child . child ( j ) ;
352+ if ( ! part ) continue ;
353+ if ( part . type !== 'identifier' && part . type !== 'scoped_identifier' ) continue ;
354+ const txt = part . text ;
355+ if ( first ) {
356+ if ( ! source ) source = txt ;
357+ first = false ;
358+ } else {
359+ names . push ( txt . split ( '.' ) . pop ( ) || txt ) ;
360+ }
361+ }
248362 }
249363 }
250364
@@ -260,8 +374,15 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void {
260374function handleCall ( node : TreeSitterNode , ctx : ExtractorOutput ) : void {
261375 // Don't record if parent is assignment LHS (that's a function definition)
262376 if ( node . parent ?. type === 'assignment' && node === node . parent . child ( 0 ) ) return ;
263- // Don't record if parent is function_definition (that's a signature)
264- if ( node . parent ?. type === 'function_definition' ) return ;
377+ // Skip when this call is the signature of a function/macro definition.
378+ // tree-sitter-julia wraps the signature in a `signature` node whose parent
379+ // is `function_definition` or `macro_definition`. Body calls (e.g.
380+ // `println(name)` inside `function greet ... end`) appear as descendants of
381+ // the body, not as direct children of `signature`, so they are unaffected.
382+ if ( node . parent ?. type === 'signature' ) {
383+ const grand = node . parent . parent ;
384+ if ( grand ?. type === 'function_definition' || grand ?. type === 'macro_definition' ) return ;
385+ }
265386
266387 const funcNode = node . child ( 0 ) ;
267388 if ( ! funcNode ) return ;
0 commit comments