Skip to content

Commit 796352b

Browse files
committed
fix(julia): port parameterized-type / qualified-def / qualified-import fixes to WASM
The native Julia extractor was fixed in #1098 for three issues that were already latent in the WASM extractor but not surfaced by the existing fixtures. Per the dual-engine policy, port the fixes so both engines produce identical results. 1. Parameterized struct names (`struct Vec{T} <: AbstractArray{T,1}`) no longer silently emit the raw type-head text as the definition name — `findBaseName` recurses through `binary_expression`, `parametrized_type_expression`, and related wrappers to locate the base identifier. 2. Qualified function defs / short-form methods inside a module no longer get double-prefixed: `function Base.show ... end` inside `module Foo` now records `Base.show` (not `Foo.Base.show`); same for short-form `Foo.bar(x, y) = x + y` inside `module Outer`. 3. `selected_import` with a qualified module (`import LinearAlgebra.BLAS: gemm`) now correctly records `LinearAlgebra.BLAS` as the import source and `gemm` as the imported name. Also fixes a related latent bug: `findChild(node, 'call_expression')` on a `function_definition` was matching the body's first call (e.g. `println(...)`) instead of the signature, because the signature is wrapped in a `signature` node. Added a `signatureCall` helper mirroring the native code. Closes #1111
1 parent 2ba882a commit 796352b

2 files changed

Lines changed: 175 additions & 38 deletions

File tree

src/extractors/julia.ts

Lines changed: 125 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -83,17 +83,41 @@ function handleModuleDef(node: TreeSitterNode, ctx: ExtractorOutput): string | n
8383
return nameNode.text;
8484
}
8585

86+
function qualifyName(base: string, currentModule: string | null): string {
87+
// For qualified names (`function Base.show ... end` inside `module Foo`,
88+
// or short-form `Foo.bar(x, y) = x + y` inside `module Outer`), the LHS
89+
// is a `scoped_identifier` already containing the qualifier — skip the
90+
// module prefix to avoid producing `Foo.Base.show` / `Outer.Foo.bar`.
91+
if (currentModule && !base.includes('.')) return `${currentModule}.${base}`;
92+
return base;
93+
}
94+
95+
/**
96+
* Extract the call_expression from a function/macro definition's signature.
97+
*
98+
* tree-sitter-julia wraps the signature in a `signature` node whose direct
99+
* children include the `call_expression` for the function name and parameters.
100+
* `findChild` only inspects direct children, so we unwrap one level explicitly.
101+
* Without this step, `findChild(node, 'call_expression')` on a
102+
* `function_definition` would match the *body's* first call_expression
103+
* (e.g. `println(...)` inside the body) instead of the signature.
104+
*/
105+
function signatureCall(node: TreeSitterNode): TreeSitterNode | null {
106+
const sig = findChild(node, 'signature');
107+
if (sig) return findChild(sig, 'call_expression');
108+
return findChild(node, 'call_expression');
109+
}
110+
86111
function handleFunctionDef(
87112
node: TreeSitterNode,
88113
ctx: ExtractorOutput,
89114
currentModule: string | null,
90115
): void {
91-
// function_definition may have a call_expression child as the signature
92-
const callSig = findChild(node, 'call_expression');
116+
const callSig = signatureCall(node);
93117
if (callSig) {
94118
const funcNameNode = callSig.child(0);
95119
if (funcNameNode) {
96-
const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text;
120+
const name = qualifyName(funcNameNode.text, currentModule);
97121
const params = extractJuliaParams(callSig);
98122
ctx.definitions.push({
99123
name,
@@ -110,9 +134,8 @@ function handleFunctionDef(
110134
const nameNode = node.childForFieldName('name') || findChild(node, 'identifier');
111135
if (!nameNode) return;
112136

113-
const name = currentModule ? `${currentModule}.${nameNode.text}` : nameNode.text;
114137
ctx.definitions.push({
115-
name,
138+
name: qualifyName(nameNode.text, currentModule),
116139
kind: 'function',
117140
line: node.startPosition.row + 1,
118141
endLine: nodeEndLine(node),
@@ -133,11 +156,10 @@ function handleAssignment(
133156
const funcNameNode = lhs.child(0);
134157
if (!funcNameNode) return;
135158

136-
const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text;
137159
const params = extractJuliaParams(lhs);
138160

139161
ctx.definitions.push({
140-
name,
162+
name: qualifyName(funcNameNode.text, currentModule),
141163
kind: 'function',
142164
line: node.startPosition.row + 1,
143165
endLine: nodeEndLine(node),
@@ -146,16 +168,69 @@ function handleAssignment(
146168
}
147169
}
148170

171+
/**
172+
* Locate the base-name identifier within a `type_head` node.
173+
*
174+
* Handles plain identifiers, `Name <: Super` binary expressions, and
175+
* parameterized forms like `Name{T}` / `Name{T} <: Super{T,1}` by recursing
176+
* into common wrapper kinds (binary expressions, parametrized type
177+
* expressions, parameterized identifiers, type-parameter / type-argument
178+
* lists). Returns `null` when no identifier can be located — callers should
179+
* skip emitting a definition in that case.
180+
*/
181+
const TYPE_HEAD_WRAPPERS: ReadonlySet<string> = new Set([
182+
'binary_expression',
183+
'parametrized_type_expression',
184+
'parameterized_identifier',
185+
'type_parameter_list',
186+
'type_argument_list',
187+
]);
188+
189+
function findBaseName(node: TreeSitterNode): TreeSitterNode | null {
190+
if (node.type === 'identifier') return node;
191+
const direct = findChild(node, 'identifier');
192+
if (direct) return direct;
193+
for (let i = 0; i < node.childCount; i++) {
194+
const child = node.child(i);
195+
if (!child) continue;
196+
if (TYPE_HEAD_WRAPPERS.has(child.type)) {
197+
const found = findBaseName(child);
198+
if (found) return found;
199+
}
200+
}
201+
return null;
202+
}
203+
149204
function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
150205
// struct_definition: struct type_head fields... end
206+
// type_head wraps the name and optional supertype. The name may be a
207+
// bare `identifier`, a parameterized form (e.g. `Vec{T}`), or either
208+
// of those nested inside a `binary_expression` (`Name <: Super`).
151209
const typeHead = findChild(node, 'type_head');
152-
const nameNode = typeHead
153-
? (findChild(typeHead, 'identifier') ?? typeHead)
154-
: findChild(node, 'identifier');
210+
if (!typeHead) return;
211+
212+
let nameNode: TreeSitterNode | null;
213+
let supertypeNode: TreeSitterNode | null = null;
214+
215+
const binary = findChild(typeHead, 'binary_expression');
216+
if (binary) {
217+
// Walk into each side of the binary expression to find the base-name
218+
// identifier — handles parameterized forms like `Vec{T} <: AbstractArray{T,1}`.
219+
const sides: TreeSitterNode[] = [];
220+
for (let i = 0; i < binary.childCount; i++) {
221+
const c = binary.child(i);
222+
if (c && c.type !== 'operator') sides.push(c);
223+
}
224+
nameNode = sides[0] ? findBaseName(sides[0]) : null;
225+
supertypeNode = sides[1] ? findBaseName(sides[1]) : null;
226+
} else {
227+
nameNode = findBaseName(typeHead);
228+
}
229+
155230
if (!nameNode) return;
231+
const structName = nameNode.text;
156232

157233
const children: SubDeclaration[] = [];
158-
// Fields are typed_expression children of struct_definition
159234
for (let i = 0; i < node.childCount; i++) {
160235
const child = node.child(i);
161236
if (!child) continue;
@@ -168,33 +243,24 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
168243
line: child.startPosition.row + 1,
169244
});
170245
}
171-
}
172-
// Plain identifier fields (no type annotation)
173-
if (child.type === 'identifier' && child !== nameNode && typeHead && child !== typeHead) {
246+
} else if (child.type === 'identifier') {
247+
// Plain identifier fields (no type annotation) appear as direct
248+
// identifier children of struct_definition. The type_head is a
249+
// separate node so there is nothing to filter out here.
174250
children.push({ name: child.text, kind: 'property', line: child.startPosition.row + 1 });
175251
}
176252
}
177253

178-
// Check for supertype in type_head (Point <: AbstractPoint)
179-
if (typeHead) {
180-
const subtypeExpr = findChild(typeHead, 'subtype_expression');
181-
if (subtypeExpr) {
182-
// Find the supertype identifier
183-
for (let i = 0; i < subtypeExpr.childCount; i++) {
184-
const child = subtypeExpr.child(i);
185-
if (child?.type === 'identifier' && i > 0) {
186-
ctx.classes.push({
187-
name: nameNode.text,
188-
extends: child.text,
189-
line: node.startPosition.row + 1,
190-
});
191-
}
192-
}
193-
}
254+
if (supertypeNode) {
255+
ctx.classes.push({
256+
name: structName,
257+
extends: supertypeNode.text,
258+
line: node.startPosition.row + 1,
259+
});
194260
}
195261

196262
ctx.definitions.push({
197-
name: nameNode.text,
263+
name: structName,
198264
kind: 'struct',
199265
line: node.startPosition.row + 1,
200266
endLine: nodeEndLine(node),
@@ -232,19 +298,40 @@ function handleMacroDef(
232298
}
233299

234300
function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void {
301+
// tree-sitter-julia shapes:
302+
// `using LinearAlgebra` → using_statement [ using, identifier ]
303+
// `import Foo.Bar` → import_statement [ import, scoped_identifier ]
304+
// `import Base: show` → import_statement [ import, selected_import[Base, show] ]
305+
// `import Foo.Bar: baz` → import_statement [ import, selected_import[scoped_identifier, baz] ]
235306
const names: string[] = [];
236307
let source = '';
237308

238309
for (let i = 0; i < node.childCount; i++) {
239310
const child = node.child(i);
240311
if (!child) continue;
241-
if (
242-
child.type === 'identifier' ||
243-
child.type === 'scoped_identifier' ||
244-
child.type === 'selected_import'
245-
) {
246-
if (!source) source = child.text;
247-
names.push(child.text.split('.').pop() || child.text);
312+
if (child.type === 'identifier' || child.type === 'scoped_identifier') {
313+
const txt = child.text;
314+
if (!source) source = txt;
315+
names.push(txt.split('.').pop() || txt);
316+
} else if (child.type === 'selected_import') {
317+
// First identifier-bearing node is the source module; the rest are
318+
// imported names. The module may itself be a `scoped_identifier`
319+
// (e.g. `import Foo.Bar: baz`) — handle it alongside bare
320+
// `identifier` and use the trailing segment as the display name,
321+
// mirroring the outer loop.
322+
let first = true;
323+
for (let j = 0; j < child.childCount; j++) {
324+
const part = child.child(j);
325+
if (!part) continue;
326+
if (part.type !== 'identifier' && part.type !== 'scoped_identifier') continue;
327+
const txt = part.text;
328+
if (first) {
329+
if (!source) source = txt;
330+
first = false;
331+
} else {
332+
names.push(txt.split('.').pop() || txt);
333+
}
334+
}
248335
}
249336
}
250337

tests/parsers/julia.test.ts

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,54 @@ import Base: show`);
5555
push!(arr, 1)`);
5656
expect(symbols.calls.length).toBeGreaterThanOrEqual(1);
5757
});
58+
59+
it('extracts parameterized struct base name', () => {
60+
// Parameterized struct names (e.g. `Vec{T}`) must record the base
61+
// identifier — not be silently dropped or include type-parameter text.
62+
const symbols = parseJulia(`struct Vec{T} <: AbstractArray{T,1}
63+
data::Vector{T}
64+
end`);
65+
const names = symbols.definitions.map((d) => d.name);
66+
expect(names).toContain('Vec');
67+
expect(names.every((n) => !n.includes('{') && !n.includes('<'))).toBe(true);
68+
expect(symbols.classes).toHaveLength(1);
69+
expect(symbols.classes[0]).toMatchObject({ name: 'Vec', extends: 'AbstractArray' });
70+
});
71+
72+
it('qualified short-form method does not double-prefix', () => {
73+
// `Foo.bar(x, y) = x + y` inside `module Outer` must record `Foo.bar`,
74+
// not `Outer.Foo.bar` — the scoped_identifier already carries the qualifier.
75+
const symbols = parseJulia(`module Outer
76+
Foo.bar(x, y) = x + y
77+
end`);
78+
const names = symbols.definitions.map((d) => d.name);
79+
expect(names).toContain('Foo.bar');
80+
expect(names).not.toContain('Outer.Foo.bar');
81+
});
82+
83+
it('qualified function def does not double-prefix', () => {
84+
// `function Base.show(io, x) ... end` inside `module Foo` must record
85+
// `Base.show`, not `Foo.Base.show`.
86+
const symbols = parseJulia(`module Foo
87+
function Base.show(io, x)
88+
println(io, x)
89+
end
90+
end`);
91+
const names = symbols.definitions.map((d) => d.name);
92+
expect(names).toContain('Base.show');
93+
expect(names).not.toContain('Foo.Base.show');
94+
});
95+
96+
it('selected_import handles qualified module', () => {
97+
// `import Foo.Bar: baz` — module is a scoped_identifier. The import
98+
// must record `Foo.Bar` as the source and `baz` as the imported name,
99+
// not the malformed `source="baz", names=["baz"]`.
100+
const symbols = parseJulia(`import LinearAlgebra.BLAS: gemm`);
101+
expect(symbols.imports).toHaveLength(1);
102+
expect(symbols.imports[0]).toMatchObject({
103+
source: 'LinearAlgebra.BLAS',
104+
});
105+
expect(symbols.imports[0].names).toContain('gemm');
106+
expect(symbols.imports[0].names).not.toContain('LinearAlgebra.BLAS');
107+
});
58108
});

0 commit comments

Comments
 (0)