Skip to content

Commit 919af21

Browse files
authored
Merge branch 'main' into fix/1136-cargo-rustup-init-shim
2 parents 0824785 + 513edf3 commit 919af21

3 files changed

Lines changed: 292 additions & 48 deletions

File tree

crates/codegraph-core/src/extractors/julia.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ fn handle_module_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) -> O
7777
/// `function_definition` / `macro_definition` in a `signature` node whose
7878
/// first child is the `call_expression` — `find_child` only inspects direct
7979
/// children, so we unwrap one level explicitly.
80+
///
81+
/// Grammar assumption: every `function_definition` / `macro_definition` emits
82+
/// a `signature` child in the current tree-sitter-julia grammar. The fallback
83+
/// to `find_child(node, "call_expression")` exists only as a defensive measure
84+
/// for grammar drift — if it ever fires on a real definition, that fallback
85+
/// would silently match the first body call_expression and mis-record the
86+
/// function name. Callers must therefore treat a missing `signature` as a
87+
/// parser/grammar mismatch worth investigating, not as a routine code path.
8088
fn signature_call<'a>(node: &Node<'a>) -> Option<Node<'a>> {
8189
if let Some(sig) = find_child(node, "signature") {
8290
return find_child(&sig, "call_expression");
@@ -297,9 +305,17 @@ fn handle_abstract_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
297305
///
298306
/// Handles plain identifiers, `Name <: Super` binary expressions, and
299307
/// parameterized forms like `Name{T}` / `Name{T} <: Super{T,1}` by recursing
300-
/// into common wrapper kinds (binary expressions, parametrized identifiers,
301-
/// type-parameter lists). Returns `None` when no identifier can be located —
308+
/// into wrapper kinds the Julia grammar actually emits for type heads
309+
/// (binary expressions, parametrized type expressions, parameterized
310+
/// identifiers). Returns `None` when no identifier can be located —
302311
/// callers should skip emitting a definition in that case.
312+
///
313+
/// Note: `type_parameter_list` / `type_argument_list` are intentionally
314+
/// excluded — Julia's grammar uses `curly_expression` for `{T}` constructs,
315+
/// not those node kinds. Including them would risk recursing into a
316+
/// type-parameter list and returning a type variable (e.g. `T`) instead of
317+
/// the struct name if `find_base_name` were ever called on a node lacking a
318+
/// direct `identifier` child.
303319
fn find_base_name<'a>(node: &Node<'a>) -> Option<Node<'a>> {
304320
// The node itself may already be the identifier (e.g. when called on a
305321
// direct side of a binary_expression like `Point <: AbstractPoint`).
@@ -317,9 +333,7 @@ fn find_base_name<'a>(node: &Node<'a>) -> Option<Node<'a>> {
317333
match child.kind() {
318334
"binary_expression"
319335
| "parametrized_type_expression"
320-
| "parameterized_identifier"
321-
| "type_parameter_list"
322-
| "type_argument_list" => {
336+
| "parameterized_identifier" => {
323337
if let Some(found) = find_base_name(&child) {
324338
return Some(found);
325339
}

src/extractors/julia.ts

Lines changed: 164 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -83,17 +83,49 @@ function handleModuleDef(node: TreeSitterNode, ctx: ExtractorOutput): string | n
8383
return nameNode.text;
8484
}
8585

86+
function qualifyName(base: string, currentModule: string | null): string {
87+
// For qualified names (`function Base.show ... end` inside `module Foo`,
88+
// or short-form `Foo.bar(x, y) = x + y` inside `module Outer`), the LHS
89+
// is a `scoped_identifier` already containing the qualifier — skip the
90+
// module prefix to avoid producing `Foo.Base.show` / `Outer.Foo.bar`.
91+
if (currentModule && !base.includes('.')) return `${currentModule}.${base}`;
92+
return base;
93+
}
94+
95+
/**
96+
* Extract the call_expression from a function/macro definition's signature.
97+
*
98+
* tree-sitter-julia wraps the signature in a `signature` node whose direct
99+
* children include the `call_expression` for the function name and parameters.
100+
* `findChild` only inspects direct children, so we unwrap one level explicitly.
101+
* Without this step, `findChild(node, 'call_expression')` on a
102+
* `function_definition` would match the *body's* first call_expression
103+
* (e.g. `println(...)` inside the body) instead of the signature.
104+
*
105+
* Grammar assumption: every `function_definition` / `macro_definition` emits a
106+
* `signature` child in the current tree-sitter-julia grammar. The fallback to
107+
* `findChild(node, 'call_expression')` exists only as a defensive measure for
108+
* grammar drift — if it ever fires on a real definition, that fallback would
109+
* silently match the first body call_expression and mis-record the function
110+
* name. Callers must therefore treat a missing `signature` as a parser/grammar
111+
* mismatch worth investigating, not as a routine code path.
112+
*/
113+
function signatureCall(node: TreeSitterNode): TreeSitterNode | null {
114+
const sig = findChild(node, 'signature');
115+
if (sig) return findChild(sig, 'call_expression');
116+
return findChild(node, 'call_expression');
117+
}
118+
86119
function handleFunctionDef(
87120
node: TreeSitterNode,
88121
ctx: ExtractorOutput,
89122
currentModule: string | null,
90123
): void {
91-
// function_definition may have a call_expression child as the signature
92-
const callSig = findChild(node, 'call_expression');
124+
const callSig = signatureCall(node);
93125
if (callSig) {
94126
const funcNameNode = callSig.child(0);
95127
if (funcNameNode) {
96-
const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text;
128+
const name = qualifyName(funcNameNode.text, currentModule);
97129
const params = extractJuliaParams(callSig);
98130
ctx.definitions.push({
99131
name,
@@ -110,9 +142,8 @@ function handleFunctionDef(
110142
const nameNode = node.childForFieldName('name') || findChild(node, 'identifier');
111143
if (!nameNode) return;
112144

113-
const name = currentModule ? `${currentModule}.${nameNode.text}` : nameNode.text;
114145
ctx.definitions.push({
115-
name,
146+
name: qualifyName(nameNode.text, currentModule),
116147
kind: 'function',
117148
line: node.startPosition.row + 1,
118149
endLine: nodeEndLine(node),
@@ -133,11 +164,10 @@ function handleAssignment(
133164
const funcNameNode = lhs.child(0);
134165
if (!funcNameNode) return;
135166

136-
const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text;
137167
const params = extractJuliaParams(lhs);
138168

139169
ctx.definitions.push({
140-
name,
170+
name: qualifyName(funcNameNode.text, currentModule),
141171
kind: 'function',
142172
line: node.startPosition.row + 1,
143173
endLine: nodeEndLine(node),
@@ -146,16 +176,74 @@ function handleAssignment(
146176
}
147177
}
148178

179+
/**
180+
* Locate the base-name identifier within a `type_head` node.
181+
*
182+
* Handles plain identifiers, `Name <: Super` binary expressions, and
183+
* parameterized forms like `Name{T}` / `Name{T} <: Super{T,1}` by recursing
184+
* into wrapper kinds the Julia grammar actually emits for type heads
185+
* (binary expressions, parametrized type expressions, parameterized
186+
* identifiers). Returns `null` when no identifier can be located — callers
187+
* should skip emitting a definition in that case.
188+
*
189+
* Note: `type_parameter_list` / `type_argument_list` are intentionally
190+
* excluded — Julia's grammar uses `curly_expression` for `{T}` constructs,
191+
* not those node kinds. Including them would risk recursing into a
192+
* type-parameter list and returning a type variable (e.g. `T`) instead of
193+
* the struct name if `findBaseName` were ever called on a node lacking a
194+
* direct `identifier` child.
195+
*/
196+
const TYPE_HEAD_WRAPPERS: ReadonlySet<string> = new Set([
197+
'binary_expression',
198+
'parametrized_type_expression',
199+
'parameterized_identifier',
200+
]);
201+
202+
function findBaseName(node: TreeSitterNode): TreeSitterNode | null {
203+
if (node.type === 'identifier') return node;
204+
const direct = findChild(node, 'identifier');
205+
if (direct) return direct;
206+
for (let i = 0; i < node.childCount; i++) {
207+
const child = node.child(i);
208+
if (!child) continue;
209+
if (TYPE_HEAD_WRAPPERS.has(child.type)) {
210+
const found = findBaseName(child);
211+
if (found) return found;
212+
}
213+
}
214+
return null;
215+
}
216+
149217
function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
150218
// struct_definition: struct type_head fields... end
219+
// type_head wraps the name and optional supertype. The name may be a
220+
// bare `identifier`, a parameterized form (e.g. `Vec{T}`), or either
221+
// of those nested inside a `binary_expression` (`Name <: Super`).
151222
const typeHead = findChild(node, 'type_head');
152-
const nameNode = typeHead
153-
? (findChild(typeHead, 'identifier') ?? typeHead)
154-
: findChild(node, 'identifier');
223+
if (!typeHead) return;
224+
225+
let nameNode: TreeSitterNode | null;
226+
let supertypeNode: TreeSitterNode | null = null;
227+
228+
const binary = findChild(typeHead, 'binary_expression');
229+
if (binary) {
230+
// Walk into each side of the binary expression to find the base-name
231+
// identifier — handles parameterized forms like `Vec{T} <: AbstractArray{T,1}`.
232+
const sides: TreeSitterNode[] = [];
233+
for (let i = 0; i < binary.childCount; i++) {
234+
const c = binary.child(i);
235+
if (c && c.type !== 'operator') sides.push(c);
236+
}
237+
nameNode = sides[0] ? findBaseName(sides[0]) : null;
238+
supertypeNode = sides[1] ? findBaseName(sides[1]) : null;
239+
} else {
240+
nameNode = findBaseName(typeHead);
241+
}
242+
155243
if (!nameNode) return;
244+
const structName = nameNode.text;
156245

157246
const children: SubDeclaration[] = [];
158-
// Fields are typed_expression children of struct_definition
159247
for (let i = 0; i < node.childCount; i++) {
160248
const child = node.child(i);
161249
if (!child) continue;
@@ -168,33 +256,24 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
168256
line: child.startPosition.row + 1,
169257
});
170258
}
171-
}
172-
// Plain identifier fields (no type annotation)
173-
if (child.type === 'identifier' && child !== nameNode && typeHead && child !== typeHead) {
259+
} else if (child.type === 'identifier') {
260+
// Plain identifier fields (no type annotation) appear as direct
261+
// identifier children of struct_definition. The type_head is a
262+
// separate node so there is nothing to filter out here.
174263
children.push({ name: child.text, kind: 'property', line: child.startPosition.row + 1 });
175264
}
176265
}
177266

178-
// Check for supertype in type_head (Point <: AbstractPoint)
179-
if (typeHead) {
180-
const subtypeExpr = findChild(typeHead, 'subtype_expression');
181-
if (subtypeExpr) {
182-
// Find the supertype identifier
183-
for (let i = 0; i < subtypeExpr.childCount; i++) {
184-
const child = subtypeExpr.child(i);
185-
if (child?.type === 'identifier' && i > 0) {
186-
ctx.classes.push({
187-
name: nameNode.text,
188-
extends: child.text,
189-
line: node.startPosition.row + 1,
190-
});
191-
}
192-
}
193-
}
267+
if (supertypeNode) {
268+
ctx.classes.push({
269+
name: structName,
270+
extends: supertypeNode.text,
271+
line: node.startPosition.row + 1,
272+
});
194273
}
195274

196275
ctx.definitions.push({
197-
name: nameNode.text,
276+
name: structName,
198277
kind: 'struct',
199278
line: node.startPosition.row + 1,
200279
endLine: nodeEndLine(node),
@@ -203,7 +282,14 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
203282
}
204283

205284
function handleAbstractDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
206-
const nameNode = node.childForFieldName('name') || findChild(node, 'identifier');
285+
// abstract_definition: `abstract type` type_head `end`
286+
// The identifier is nested inside `type_head` — possibly wrapped in a
287+
// `Name <: Super` binary_expression or a `Name{T,...}` parameterized form.
288+
// Mirror handleStructDef and skip rather than emit a garbled name when no
289+
// base identifier can be located.
290+
const typeHead = findChild(node, 'type_head');
291+
if (!typeHead) return;
292+
const nameNode = findBaseName(typeHead);
207293
if (!nameNode) return;
208294

209295
ctx.definitions.push({
@@ -219,10 +305,17 @@ function handleMacroDef(
219305
ctx: ExtractorOutput,
220306
currentModule: string | null,
221307
): void {
222-
const nameNode = node.childForFieldName('name') || findChild(node, 'identifier');
308+
// macro_definition: `macro` signature/call_expression body `end`.
309+
// The name lives in the same shape as a function signature — unwrap via
310+
// signatureCall so we don't pick up an identifier from the body (e.g.
311+
// `macro mymac(x) x end` would otherwise resolve to `@x`).
312+
const callSig = signatureCall(node);
313+
const nameNode =
314+
callSig?.child(0) ?? node.childForFieldName('name') ?? findChild(node, 'identifier');
223315
if (!nameNode) return;
224316

225-
const name = currentModule ? `${currentModule}.@${nameNode.text}` : `@${nameNode.text}`;
317+
const base = nameNode.text;
318+
const name = currentModule ? `${currentModule}.@${base}` : `@${base}`;
226319
ctx.definitions.push({
227320
name,
228321
kind: 'function',
@@ -232,19 +325,40 @@ function handleMacroDef(
232325
}
233326

234327
function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void {
328+
// tree-sitter-julia shapes:
329+
// `using LinearAlgebra` → using_statement [ using, identifier ]
330+
// `import Foo.Bar` → import_statement [ import, scoped_identifier ]
331+
// `import Base: show` → import_statement [ import, selected_import[Base, show] ]
332+
// `import Foo.Bar: baz` → import_statement [ import, selected_import[scoped_identifier, baz] ]
235333
const names: string[] = [];
236334
let source = '';
237335

238336
for (let i = 0; i < node.childCount; i++) {
239337
const child = node.child(i);
240338
if (!child) continue;
241-
if (
242-
child.type === 'identifier' ||
243-
child.type === 'scoped_identifier' ||
244-
child.type === 'selected_import'
245-
) {
246-
if (!source) source = child.text;
247-
names.push(child.text.split('.').pop() || child.text);
339+
if (child.type === 'identifier' || child.type === 'scoped_identifier') {
340+
const txt = child.text;
341+
if (!source) source = txt;
342+
names.push(txt.split('.').pop() || txt);
343+
} else if (child.type === 'selected_import') {
344+
// First identifier-bearing node is the source module; the rest are
345+
// imported names. The module may itself be a `scoped_identifier`
346+
// (e.g. `import Foo.Bar: baz`) — handle it alongside bare
347+
// `identifier` and use the trailing segment as the display name,
348+
// mirroring the outer loop.
349+
let first = true;
350+
for (let j = 0; j < child.childCount; j++) {
351+
const part = child.child(j);
352+
if (!part) continue;
353+
if (part.type !== 'identifier' && part.type !== 'scoped_identifier') continue;
354+
const txt = part.text;
355+
if (first) {
356+
if (!source) source = txt;
357+
first = false;
358+
} else {
359+
names.push(txt.split('.').pop() || txt);
360+
}
361+
}
248362
}
249363
}
250364

@@ -260,8 +374,15 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void {
260374
function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
261375
// Don't record if parent is assignment LHS (that's a function definition)
262376
if (node.parent?.type === 'assignment' && node === node.parent.child(0)) return;
263-
// Don't record if parent is function_definition (that's a signature)
264-
if (node.parent?.type === 'function_definition') return;
377+
// Skip when this call is the signature of a function/macro definition.
378+
// tree-sitter-julia wraps the signature in a `signature` node whose parent
379+
// is `function_definition` or `macro_definition`. Body calls (e.g.
380+
// `println(name)` inside `function greet ... end`) appear as descendants of
381+
// the body, not as direct children of `signature`, so they are unaffected.
382+
if (node.parent?.type === 'signature') {
383+
const grand = node.parent.parent;
384+
if (grand?.type === 'function_definition' || grand?.type === 'macro_definition') return;
385+
}
265386

266387
const funcNode = node.child(0);
267388
if (!funcNode) return;

0 commit comments

Comments
 (0)