Skip to content

Commit a7f4e6d

Browse files
authored
Merge branch 'main' into fix/version-aware-strip-types
2 parents 4a6787c + 4fe7e2d commit a7f4e6d

5 files changed

Lines changed: 370 additions & 5 deletions

File tree

crates/codegraph-core/src/extractors/helpers.rs

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ pub const AST_TEXT_MAX: usize = 200;
100100

101101
/// Language-specific AST node type configuration.
102102
pub struct LangAstConfig {
103+
/// Node types mapping to `"call"` kind (e.g. `call_expression`, `method_invocation`)
104+
pub call_types: &'static [&'static str],
103105
/// Node types mapping to `"new"` kind (e.g. `new_expression`, `object_creation_expression`)
104106
pub new_types: &'static [&'static str],
105107
/// Node types mapping to `"throw"` kind (e.g. `throw_statement`, `raise_statement`)
@@ -120,6 +122,7 @@ pub struct LangAstConfig {
120122
// ── Per-language configs ─────────────────────────────────────────────────────
121123

122124
pub const PYTHON_AST_CONFIG: LangAstConfig = LangAstConfig {
125+
call_types: &["call"],
123126
new_types: &[],
124127
throw_types: &["raise_statement"],
125128
await_types: &["await"],
@@ -130,6 +133,7 @@ pub const PYTHON_AST_CONFIG: LangAstConfig = LangAstConfig {
130133
};
131134

132135
pub const GO_AST_CONFIG: LangAstConfig = LangAstConfig {
136+
call_types: &["call_expression"],
133137
new_types: &[],
134138
throw_types: &[],
135139
await_types: &[],
@@ -140,6 +144,7 @@ pub const GO_AST_CONFIG: LangAstConfig = LangAstConfig {
140144
};
141145

142146
pub const RUST_AST_CONFIG: LangAstConfig = LangAstConfig {
147+
call_types: &["call_expression", "method_call_expression"],
143148
new_types: &[],
144149
throw_types: &[],
145150
await_types: &["await_expression"],
@@ -150,6 +155,7 @@ pub const RUST_AST_CONFIG: LangAstConfig = LangAstConfig {
150155
};
151156

152157
pub const JAVA_AST_CONFIG: LangAstConfig = LangAstConfig {
158+
call_types: &["method_invocation"],
153159
new_types: &["object_creation_expression"],
154160
throw_types: &["throw_statement"],
155161
await_types: &[],
@@ -160,6 +166,7 @@ pub const JAVA_AST_CONFIG: LangAstConfig = LangAstConfig {
160166
};
161167

162168
pub const CSHARP_AST_CONFIG: LangAstConfig = LangAstConfig {
169+
call_types: &["invocation_expression"],
163170
new_types: &["object_creation_expression"],
164171
throw_types: &["throw_statement", "throw_expression"],
165172
await_types: &["await_expression"],
@@ -170,6 +177,7 @@ pub const CSHARP_AST_CONFIG: LangAstConfig = LangAstConfig {
170177
};
171178

172179
pub const RUBY_AST_CONFIG: LangAstConfig = LangAstConfig {
180+
call_types: &["call", "method_call"],
173181
new_types: &[],
174182
throw_types: &[],
175183
await_types: &[],
@@ -180,6 +188,7 @@ pub const RUBY_AST_CONFIG: LangAstConfig = LangAstConfig {
180188
};
181189

182190
pub const PHP_AST_CONFIG: LangAstConfig = LangAstConfig {
191+
call_types: &["function_call_expression", "member_call_expression", "scoped_call_expression"],
183192
new_types: &["object_creation_expression"],
184193
throw_types: &["throw_expression"],
185194
await_types: &[],
@@ -229,6 +238,43 @@ fn walk_ast_nodes_with_config_depth(
229238
}
230239
let kind = node.kind();
231240

241+
// Call extraction — checked first since calls are the most common AST node kind.
242+
// Do NOT recurse children: prevents double-counting nested calls like `a(b())`.
243+
if config.call_types.contains(&kind) {
244+
let name = extract_call_name(node, source);
245+
let receiver = extract_call_receiver(node, source);
246+
let text = truncate(node_text(node, source), AST_TEXT_MAX);
247+
ast_nodes.push(AstNode {
248+
kind: "call".to_string(),
249+
name,
250+
line: start_line(node),
251+
text: Some(text),
252+
receiver,
253+
});
254+
// Recurse into arguments only — nested calls in args should be captured.
255+
// Use child_by_field_name("arguments") — immune to kind-name variation across grammars.
256+
// Falls back to kind-based matching for grammars that don't expose a field name.
257+
let args_node = node.child_by_field_name("arguments").or_else(|| {
258+
for i in 0..node.child_count() {
259+
if let Some(child) = node.child(i) {
260+
let ck = child.kind();
261+
if ck == "arguments" || ck == "argument_list" || ck == "method_arguments" {
262+
return Some(child);
263+
}
264+
}
265+
}
266+
None
267+
});
268+
if let Some(args) = args_node {
269+
for j in 0..args.child_count() {
270+
if let Some(arg) = args.child(j) {
271+
walk_ast_nodes_with_config_depth(&arg, source, ast_nodes, config, depth + 1);
272+
}
273+
}
274+
}
275+
return;
276+
}
277+
232278
if config.new_types.contains(&kind) {
233279
let name = extract_constructor_name(node, source);
234280
let text = truncate(node_text(node, source), AST_TEXT_MAX);
@@ -261,7 +307,9 @@ fn walk_ast_nodes_with_config_depth(
261307
text,
262308
receiver: None,
263309
});
264-
// Fall through to recurse children
310+
// Fall through to recurse children — captures strings, calls, etc. inside await expr.
311+
// The call_types guard at the top of the function already handles `call_expression`
312+
// nodes correctly (recurse-into-args-only), so there is no double-counting risk here.
265313
} else if config.string_types.contains(&kind) {
266314
let raw = node_text(node, source);
267315
let is_raw_string = kind.contains("raw_string");
@@ -400,6 +448,42 @@ fn extract_call_name(node: &Node, source: &[u8]) -> String {
400448
text.split('(').next().unwrap_or("?").to_string()
401449
}
402450

451+
/// Extract receiver from a call node (e.g. `obj` from `obj.method()`).
452+
/// Looks for a member-expression-like function child and extracts the object part.
453+
fn extract_call_receiver(node: &Node, source: &[u8]) -> Option<String> {
454+
// PHP: scoped_call_expression — receiver is the "scope" field (e.g. MyClass in MyClass::method())
455+
if let Some(scope) = node.child_by_field_name("scope") {
456+
return Some(node_text(&scope, source).to_string());
457+
}
458+
// Try "function" field first (JS/TS: call_expression -> member_expression)
459+
// Then "object" (Go, Python), then "receiver" (Ruby)
460+
for field in &["function", "object", "receiver"] {
461+
if let Some(fn_node) = node.child_by_field_name(field) {
462+
// JS/TS/Python: member_expression / attribute with "object" field
463+
if let Some(obj) = fn_node.child_by_field_name("object") {
464+
return Some(node_text(&obj, source).to_string());
465+
}
466+
// Go: selector_expression uses "operand" not "object"
467+
if fn_node.kind() == "selector_expression" {
468+
if let Some(operand) = fn_node.child_by_field_name("operand") {
469+
return Some(node_text(&operand, source).to_string());
470+
}
471+
}
472+
// C#: member_access_expression uses "expression" not "object"
473+
if fn_node.kind() == "member_access_expression" {
474+
if let Some(expr) = fn_node.child_by_field_name("expression") {
475+
return Some(node_text(&expr, source).to_string());
476+
}
477+
}
478+
// For Ruby/Go where the receiver is directly a field
479+
if *field == "object" || *field == "receiver" {
480+
return Some(node_text(&fn_node, source).to_string());
481+
}
482+
}
483+
}
484+
None
485+
}
486+
403487
/// Extract expression text from throw/await — skip the keyword child.
404488
fn extract_child_expression_text(node: &Node, source: &[u8]) -> Option<String> {
405489
const KEYWORDS: &[&str] = &["throw", "raise", "await", "new"];

crates/codegraph-core/src/extractors/javascript.rs

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ fn walk_node_depth(node: &Node, source: &[u8], symbols: &mut FileSymbols, depth:
518518

519519
const TEXT_MAX: usize = 200;
520520

521-
/// Walk the tree collecting new/throw/await/string/regex AST nodes.
521+
/// Walk the tree collecting call/new/throw/await/string/regex AST nodes.
522522
/// Mirrors `walkAst()` in `ast.js:216-276`.
523523
fn walk_ast_nodes(node: &Node, source: &[u8], ast_nodes: &mut Vec<AstNode>) {
524524
walk_ast_nodes_depth(node, source, ast_nodes, 0);
@@ -529,6 +529,28 @@ fn walk_ast_nodes_depth(node: &Node, source: &[u8], ast_nodes: &mut Vec<AstNode>
529529
return;
530530
}
531531
match node.kind() {
532+
"call_expression" => {
533+
let (name, receiver) = extract_js_call_ast(node, source);
534+
let text = truncate(node_text(node, source), TEXT_MAX);
535+
ast_nodes.push(AstNode {
536+
kind: "call".to_string(),
537+
name,
538+
line: start_line(node),
539+
text: Some(text),
540+
receiver,
541+
});
542+
// Recurse into arguments only — nested calls in args should be captured.
543+
if let Some(args) = node.child_by_field_name("arguments")
544+
.or_else(|| find_child(node, "arguments"))
545+
{
546+
for i in 0..args.child_count() {
547+
if let Some(arg) = args.child(i) {
548+
walk_ast_nodes_depth(&arg, source, ast_nodes, depth + 1);
549+
}
550+
}
551+
}
552+
return;
553+
}
532554
"new_expression" => {
533555
let name = extract_new_name(node, source);
534556
let text = truncate(node_text(node, source), TEXT_MAX);
@@ -698,6 +720,34 @@ fn extract_expression_text(node: &Node, source: &[u8]) -> Option<String> {
698720
Some(truncate(node_text(node, source), TEXT_MAX))
699721
}
700722

723+
/// Extract call name and optional receiver from a JS/TS `call_expression`.
724+
/// `fetch()` → ("fetch", None); `obj.method()` → ("obj.method", Some("obj"))
725+
fn extract_js_call_ast(node: &Node, source: &[u8]) -> (String, Option<String>) {
726+
if let Some(fn_node) = node.child_by_field_name("function") {
727+
match fn_node.kind() {
728+
"member_expression" => {
729+
let name = node_text(&fn_node, source).to_string();
730+
let receiver = fn_node.child_by_field_name("object")
731+
.map(|obj| node_text(&obj, source).to_string());
732+
(name, receiver)
733+
}
734+
"identifier" => {
735+
(node_text(&fn_node, source).to_string(), None)
736+
}
737+
_ => {
738+
// Computed call like `fn[key]()` — use full text before `(`
739+
let text = node_text(node, source);
740+
let name = text.split('(').next().unwrap_or("?").to_string();
741+
(name, None)
742+
}
743+
}
744+
} else {
745+
let text = node_text(node, source);
746+
let name = text.split('(').next().unwrap_or("?").to_string();
747+
(name, None)
748+
}
749+
}
750+
701751
// ── Extended kinds helpers ──────────────────────────────────────────────────
702752

703753
fn extract_js_parameters(node: &Node, source: &[u8]) -> Vec<Definition> {

src/ast-analysis/engine.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ function setupVisitors(
172172
// AST-store visitor
173173
let astVisitor: Visitor | null = null;
174174
const astTypeMap = AST_TYPE_MAPS.get(langId);
175-
if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !symbols.astNodes?.length) {
175+
if (doAst && astTypeMap && WALK_EXTENSIONS.has(ext) && !Array.isArray(symbols.astNodes)) {
176176
const nodeIdMap = new Map<string, number>();
177177
for (const row of bulkNodeIdsByFile(db, relPath)) {
178178
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);

src/features/ast.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,10 @@ export async function buildAstNodes(
9393
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
9494
}
9595

96-
if (symbols.calls) {
96+
// When native astNodes includes call entries, skip separate symbols.calls processing
97+
// to avoid duplication. Fall back to symbols.calls for WASM or older native binaries.
98+
const nativeProvidedAstNodes = Array.isArray(symbols.astNodes);
99+
if (symbols.calls && !nativeProvidedAstNodes) {
97100
for (const call of symbols.calls) {
98101
const parentDef = findParentDef(defs, call.line);
99102
let parentNodeId: number | null = null;
@@ -113,7 +116,8 @@ export async function buildAstNodes(
113116
}
114117
}
115118

116-
if (symbols.astNodes?.length) {
119+
if (Array.isArray(symbols.astNodes)) {
120+
// Native engine provided AST nodes (may be empty for files with no AST content)
117121
for (const n of symbols.astNodes) {
118122
const parentDef = findParentDef(defs, n.line);
119123
let parentNodeId: number | null = null;
@@ -132,6 +136,7 @@ export async function buildAstNodes(
132136
});
133137
}
134138
} else {
139+
// WASM fallback — walk tree if available
135140
const ext = path.extname(relPath).toLowerCase();
136141
if (WALK_EXTENSIONS.has(ext) && symbols._tree) {
137142
const astRows: AstRow[] = [];

0 commit comments

Comments
 (0)