Skip to content

Commit aee042b

Browse files
authored
fix(r): setMethod emits call edge, not duplicate definition (#1109) (#1125)
In idiomatic S4 code, `setGeneric("greet", ...)` followed by `setMethod("greet", "Person", ...)` produced two `function` definition nodes for the same name. Split the handler: `setGeneric` still emits a function definition (the generic), while `setMethod` emits a call edge to the generic — modelling the implementation registration without duplicating the symbol. The method body's calls are still picked up by the recursive walk of the anonymous function argument. While auditing the WASM extractor, found three sibling handlers that silently produced no output because they didn't unwrap the `argument` node that tree-sitter-r places around each positional string literal: setClass, setGeneric, and source(). The native (Rust) extractor handled this correctly via `first_argument_value`. Introduced a parity helper `firstStringArgument` mirroring the Rust logic and routed all three handlers through it. Closes #1109.
1 parent 02a581c commit aee042b

3 files changed

Lines changed: 187 additions & 45 deletions

File tree

crates/codegraph-core/src/extractors/r_lang.rs

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,14 @@ fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
163163
handle_set_class(node, source, symbols);
164164
return;
165165
}
166-
"setGeneric" | "setMethod" => {
166+
"setGeneric" => {
167167
handle_set_generic(node, source, symbols);
168168
return;
169169
}
170+
"setMethod" => {
171+
handle_set_method(node, source, symbols);
172+
return;
173+
}
170174
_ => {}
171175
}
172176
}
@@ -332,6 +336,23 @@ fn handle_set_generic(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
332336
}
333337
}
334338

339+
// `setMethod("greet", "Person", function(x) ...)` registers an implementation
340+
// of the generic `greet` — it is not a new top-level definition. Emitting a
341+
// definition here produced two `function` nodes with the same name (one from
342+
// setGeneric, one from setMethod) and broke resolution. Emit a call edge to
343+
// the generic instead; the method body's calls are still picked up by the
344+
// recursive walk of the anonymous function argument.
345+
fn handle_set_method(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
346+
if let Some(name) = first_argument_value(node, source, false) {
347+
symbols.calls.push(Call {
348+
name,
349+
line: start_line(node),
350+
dynamic: None,
351+
receiver: None,
352+
});
353+
}
354+
}
355+
335356
#[cfg(test)]
336357
mod tests {
337358
use super::*;
@@ -439,6 +460,51 @@ mod tests {
439460
assert_eq!(d.kind, "function");
440461
}
441462

463+
#[test]
464+
fn set_method_does_not_duplicate_generic_definition() {
465+
// Idiomatic S4: a setGeneric followed by setMethod implementations.
466+
// Only the setGeneric should emit a definition — setMethod registers
467+
// an implementation of the generic, which we model as a call edge.
468+
let code = r#"
469+
setGeneric("greet", function(x) standardGeneric("greet"))
470+
setMethod("greet", "Person", function(x) paste("Hello", x@name))
471+
setMethod("greet", "Animal", function(x) paste("Hi", x@species))
472+
"#;
473+
let s = parse_r(code);
474+
let greet_defs: Vec<&Definition> =
475+
s.definitions.iter().filter(|d| d.name == "greet").collect();
476+
assert_eq!(
477+
greet_defs.len(),
478+
1,
479+
"expected exactly one `greet` definition, got {greet_defs:#?}",
480+
);
481+
assert_eq!(greet_defs[0].kind, "function");
482+
}
483+
484+
#[test]
485+
fn set_method_emits_call_to_generic() {
486+
// setMethod registers an implementation of the generic. The fix emits
487+
// a call edge to the generic so the dispatch relationship is visible
488+
// in the graph.
489+
let s = parse_r(
490+
"setMethod(\"greet\", \"Person\", function(x) paste(\"Hello\", x@name))\n",
491+
);
492+
let calls: Vec<&Call> = s.calls.iter().filter(|c| c.name == "greet").collect();
493+
assert_eq!(calls.len(), 1, "expected setMethod to emit one call to `greet`");
494+
}
495+
496+
#[test]
497+
fn set_method_body_calls_are_still_captured() {
498+
// The recursive walk visits the anonymous function passed to
499+
// setMethod, so calls inside the method body must still appear.
500+
let s = parse_r(
501+
"setMethod(\"greet\", \"Person\", function(x) { helper(x); validate(x) })\n",
502+
);
503+
let names: Vec<&str> = s.calls.iter().map(|c| c.name.as_str()).collect();
504+
assert!(names.contains(&"helper"), "method body call `helper` not captured");
505+
assert!(names.contains(&"validate"), "method body call `validate` not captured");
506+
}
507+
442508
#[test]
443509
fn function_with_double_arrow_assignment() {
444510
// `<<-` is super-assignment in R; the JS extractor accepts it too.

src/extractors/r.ts

Lines changed: 63 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,16 @@ function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
125125
return;
126126
}
127127

128-
if (funcName === 'setGeneric' || funcName === 'setMethod') {
128+
if (funcName === 'setGeneric') {
129129
handleSetGeneric(node, ctx);
130130
return;
131131
}
132132

133+
if (funcName === 'setMethod') {
134+
handleSetMethod(node, ctx);
135+
return;
136+
}
137+
133138
// Regular call
134139
if (funcNode.type === 'identifier') {
135140
ctx.calls.push({ name: funcName, line: node.startPosition.row + 1 });
@@ -212,63 +217,77 @@ function handleLibraryCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
212217
}
213218

214219
function handleSourceCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
215-
for (let i = 0; i < node.childCount; i++) {
216-
const child = node.child(i);
217-
if (!child || child.type !== 'arguments') continue;
218-
for (let j = 0; j < child.childCount; j++) {
219-
const arg = child.child(j);
220-
if (!arg) continue;
221-
if (arg.type === 'string') {
222-
const text = arg.text.replace(/^["']|["']$/g, '');
223-
ctx.imports.push({
224-
source: text,
225-
names: ['source'],
226-
line: node.startPosition.row + 1,
227-
});
228-
return;
229-
}
230-
}
231-
}
220+
// source() only accepts string literals — `source(varname)` is not an import.
221+
const path = firstStringArgument(node);
222+
if (path === null) return;
223+
ctx.imports.push({
224+
source: path,
225+
names: ['source'],
226+
line: node.startPosition.row + 1,
227+
});
232228
}
233229

234230
function handleSetClass(node: TreeSitterNode, ctx: ExtractorOutput): void {
235-
for (let i = 0; i < node.childCount; i++) {
236-
const child = node.child(i);
237-
if (!child || child.type !== 'arguments') continue;
238-
for (let j = 0; j < child.childCount; j++) {
239-
const arg = child.child(j);
240-
if (!arg) continue;
241-
if (arg.type === 'string') {
242-
const name = arg.text.replace(/^["']|["']$/g, '');
243-
ctx.definitions.push({
244-
name,
245-
kind: 'class',
246-
line: node.startPosition.row + 1,
247-
endLine: nodeEndLine(node),
248-
});
249-
return;
250-
}
251-
}
252-
}
231+
const name = firstStringArgument(node);
232+
if (name === null) return;
233+
ctx.definitions.push({
234+
name,
235+
kind: 'class',
236+
line: node.startPosition.row + 1,
237+
endLine: nodeEndLine(node),
238+
});
253239
}
254240

255241
function handleSetGeneric(node: TreeSitterNode, ctx: ExtractorOutput): void {
242+
const name = firstStringArgument(node);
243+
if (name === null) return;
244+
ctx.definitions.push({
245+
name,
246+
kind: 'function',
247+
line: node.startPosition.row + 1,
248+
endLine: nodeEndLine(node),
249+
});
250+
}
251+
252+
// setMethod("greet", "Person", function(x) ...) registers an implementation of
253+
// the generic `greet` — it is not a new top-level definition. Emitting a
254+
// definition here produced two `function` nodes with the same name (one from
255+
// setGeneric, one from setMethod) and broke resolution. Emit a call edge to
256+
// the generic instead; the method body's calls are still picked up by the
257+
// recursive walk of the anonymous function argument.
258+
function handleSetMethod(node: TreeSitterNode, ctx: ExtractorOutput): void {
259+
const name = firstStringArgument(node);
260+
if (name === null) return;
261+
ctx.calls.push({ name, line: node.startPosition.row + 1 });
262+
}
263+
264+
// tree-sitter-r wraps each positional argument in an `argument` node that
265+
// contains the actual `string` (or `identifier`) child, so the inner string
266+
// must be unwrapped — checking `child.type === 'string'` directly misses it.
267+
// Mirrors `first_argument_value` in the Rust extractor for parity.
268+
function firstStringArgument(node: TreeSitterNode): string | null {
256269
for (let i = 0; i < node.childCount; i++) {
257270
const child = node.child(i);
258271
if (!child || child.type !== 'arguments') continue;
259272
for (let j = 0; j < child.childCount; j++) {
260273
const arg = child.child(j);
261274
if (!arg) continue;
262275
if (arg.type === 'string') {
263-
const name = arg.text.replace(/^["']|["']$/g, '');
264-
ctx.definitions.push({
265-
name,
266-
kind: 'function',
267-
line: node.startPosition.row + 1,
268-
endLine: nodeEndLine(node),
269-
});
270-
return;
276+
return stripQuotes(arg.text);
277+
}
278+
if (arg.type === 'argument') {
279+
const valueNode = arg.childForFieldName('value');
280+
if (valueNode && valueNode.type === 'string') return stripQuotes(valueNode.text);
281+
for (let k = 0; k < arg.childCount; k++) {
282+
const inner = arg.child(k);
283+
if (inner && inner.type === 'string') return stripQuotes(inner.text);
284+
}
271285
}
272286
}
273287
}
288+
return null;
289+
}
290+
291+
function stripQuotes(text: string): string {
292+
return text.replace(/^["']|["']$/g, '');
274293
}

tests/parsers/r.test.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,61 @@ mean(c(1, 2, 3))`);
5353
expect(symbols.imports).toContainEqual(expect.objectContaining({ source: 'dplyr' }));
5454
expect(symbols.imports.some((i) => i.source === 'package')).toBe(false);
5555
});
56+
57+
it('extracts source() imports', () => {
58+
// Parity guard: native produces an import with `source: "service.R"`.
59+
// The WASM extractor previously failed silently for the same reason as
60+
// setClass/setGeneric — it didn't unwrap the `argument` node.
61+
const symbols = parseR(`source("service.R")`);
62+
expect(symbols.imports).toContainEqual(
63+
expect.objectContaining({ source: 'service.R', names: ['source'] }),
64+
);
65+
});
66+
67+
it('extracts a class definition from setClass', () => {
68+
// Parity guard: the native extractor produces a `class` definition for
69+
// `setClass(...)`; the WASM extractor previously failed silently because
70+
// it did not unwrap the `argument` node around the string literal.
71+
const symbols = parseR(`setClass("Person", representation(name = "character"))`);
72+
expect(symbols.definitions).toContainEqual(
73+
expect.objectContaining({ name: 'Person', kind: 'class' }),
74+
);
75+
});
76+
77+
it('extracts a function definition from setGeneric', () => {
78+
// Same parity guard for setGeneric — was silently broken in WASM.
79+
const symbols = parseR(`setGeneric("doIt", function(x) standardGeneric("doIt"))`);
80+
expect(symbols.definitions).toContainEqual(
81+
expect.objectContaining({ name: 'doIt', kind: 'function' }),
82+
);
83+
});
84+
85+
it('does not duplicate the generic definition when setMethod is present', () => {
86+
// Idiomatic S4: a setGeneric followed by setMethod implementations.
87+
// Only setGeneric should emit a function definition — setMethod registers
88+
// an implementation, which we model as a call edge to the generic.
89+
const symbols = parseR(`
90+
setGeneric("greet", function(x) standardGeneric("greet"))
91+
setMethod("greet", "Person", function(x) paste("Hello", x@name))
92+
setMethod("greet", "Animal", function(x) paste("Hi", x@species))
93+
`);
94+
const greetDefs = symbols.definitions.filter((d) => d.name === 'greet');
95+
expect(greetDefs).toHaveLength(1);
96+
expect(greetDefs[0]).toMatchObject({ kind: 'function' });
97+
});
98+
99+
it('emits a call to the generic for setMethod', () => {
100+
const symbols = parseR(`setMethod("greet", "Person", function(x) paste("Hello", x@name))`);
101+
const greetCalls = symbols.calls.filter((c) => c.name === 'greet');
102+
expect(greetCalls).toHaveLength(1);
103+
});
104+
105+
it('still captures calls from inside the setMethod body', () => {
106+
// The recursive walk visits the anonymous function passed to setMethod,
107+
// so calls inside the method body must still appear in ctx.calls.
108+
const symbols = parseR(`setMethod("greet", "Person", function(x) { helper(x); validate(x) })`);
109+
const names = symbols.calls.map((c) => c.name);
110+
expect(names).toContain('helper');
111+
expect(names).toContain('validate');
112+
});
56113
});

0 commit comments

Comments
 (0)