@@ -100,6 +100,8 @@ pub const AST_TEXT_MAX: usize = 200;
100100
101101/// Language-specific AST node type configuration.
102102pub struct LangAstConfig {
103+ /// Node types mapping to `"call"` kind (e.g. `call_expression`, `method_invocation`)
104+ pub call_types : & ' static [ & ' static str ] ,
103105 /// Node types mapping to `"new"` kind (e.g. `new_expression`, `object_creation_expression`)
104106 pub new_types : & ' static [ & ' static str ] ,
105107 /// Node types mapping to `"throw"` kind (e.g. `throw_statement`, `raise_statement`)
@@ -120,6 +122,7 @@ pub struct LangAstConfig {
120122// ── Per-language configs ─────────────────────────────────────────────────────
121123
122124pub const PYTHON_AST_CONFIG : LangAstConfig = LangAstConfig {
125+ call_types : & [ "call" ] ,
123126 new_types : & [ ] ,
124127 throw_types : & [ "raise_statement" ] ,
125128 await_types : & [ "await" ] ,
@@ -130,6 +133,7 @@ pub const PYTHON_AST_CONFIG: LangAstConfig = LangAstConfig {
130133} ;
131134
132135pub const GO_AST_CONFIG : LangAstConfig = LangAstConfig {
136+ call_types : & [ "call_expression" ] ,
133137 new_types : & [ ] ,
134138 throw_types : & [ ] ,
135139 await_types : & [ ] ,
@@ -140,6 +144,7 @@ pub const GO_AST_CONFIG: LangAstConfig = LangAstConfig {
140144} ;
141145
142146pub const RUST_AST_CONFIG : LangAstConfig = LangAstConfig {
147+ call_types : & [ "call_expression" , "method_call_expression" ] ,
143148 new_types : & [ ] ,
144149 throw_types : & [ ] ,
145150 await_types : & [ "await_expression" ] ,
@@ -150,6 +155,7 @@ pub const RUST_AST_CONFIG: LangAstConfig = LangAstConfig {
150155} ;
151156
152157pub const JAVA_AST_CONFIG : LangAstConfig = LangAstConfig {
158+ call_types : & [ "method_invocation" ] ,
153159 new_types : & [ "object_creation_expression" ] ,
154160 throw_types : & [ "throw_statement" ] ,
155161 await_types : & [ ] ,
@@ -160,6 +166,7 @@ pub const JAVA_AST_CONFIG: LangAstConfig = LangAstConfig {
160166} ;
161167
162168pub const CSHARP_AST_CONFIG : LangAstConfig = LangAstConfig {
169+ call_types : & [ "invocation_expression" ] ,
163170 new_types : & [ "object_creation_expression" ] ,
164171 throw_types : & [ "throw_statement" , "throw_expression" ] ,
165172 await_types : & [ "await_expression" ] ,
@@ -170,6 +177,7 @@ pub const CSHARP_AST_CONFIG: LangAstConfig = LangAstConfig {
170177} ;
171178
172179pub const RUBY_AST_CONFIG : LangAstConfig = LangAstConfig {
180+ call_types : & [ "call" , "method_call" ] ,
173181 new_types : & [ ] ,
174182 throw_types : & [ ] ,
175183 await_types : & [ ] ,
@@ -180,6 +188,7 @@ pub const RUBY_AST_CONFIG: LangAstConfig = LangAstConfig {
180188} ;
181189
182190pub const PHP_AST_CONFIG : LangAstConfig = LangAstConfig {
191+ call_types : & [ "function_call_expression" , "member_call_expression" , "scoped_call_expression" ] ,
183192 new_types : & [ "object_creation_expression" ] ,
184193 throw_types : & [ "throw_expression" ] ,
185194 await_types : & [ ] ,
@@ -229,6 +238,43 @@ fn walk_ast_nodes_with_config_depth(
229238 }
230239 let kind = node. kind ( ) ;
231240
241+ // Call extraction — checked first since calls are the most common AST node kind.
242+ // Do NOT recurse children: prevents double-counting nested calls like `a(b())`.
243+ if config. call_types . contains ( & kind) {
244+ let name = extract_call_name ( node, source) ;
245+ let receiver = extract_call_receiver ( node, source) ;
246+ let text = truncate ( node_text ( node, source) , AST_TEXT_MAX ) ;
247+ ast_nodes. push ( AstNode {
248+ kind : "call" . to_string ( ) ,
249+ name,
250+ line : start_line ( node) ,
251+ text : Some ( text) ,
252+ receiver,
253+ } ) ;
254+ // Recurse into arguments only — nested calls in args should be captured.
255+ // Use child_by_field_name("arguments") — immune to kind-name variation across grammars.
256+ // Falls back to kind-based matching for grammars that don't expose a field name.
257+ let args_node = node. child_by_field_name ( "arguments" ) . or_else ( || {
258+ for i in 0 ..node. child_count ( ) {
259+ if let Some ( child) = node. child ( i) {
260+ let ck = child. kind ( ) ;
261+ if ck == "arguments" || ck == "argument_list" || ck == "method_arguments" {
262+ return Some ( child) ;
263+ }
264+ }
265+ }
266+ None
267+ } ) ;
268+ if let Some ( args) = args_node {
269+ for j in 0 ..args. child_count ( ) {
270+ if let Some ( arg) = args. child ( j) {
271+ walk_ast_nodes_with_config_depth ( & arg, source, ast_nodes, config, depth + 1 ) ;
272+ }
273+ }
274+ }
275+ return ;
276+ }
277+
232278 if config. new_types . contains ( & kind) {
233279 let name = extract_constructor_name ( node, source) ;
234280 let text = truncate ( node_text ( node, source) , AST_TEXT_MAX ) ;
@@ -261,7 +307,9 @@ fn walk_ast_nodes_with_config_depth(
261307 text,
262308 receiver : None ,
263309 } ) ;
264- // Fall through to recurse children
310+ // Fall through to recurse children — captures strings, calls, etc. inside await expr.
311+ // The call_types guard at the top of the function already handles `call_expression`
312+ // nodes correctly (recurse-into-args-only), so there is no double-counting risk here.
265313 } else if config. string_types . contains ( & kind) {
266314 let raw = node_text ( node, source) ;
267315 let is_raw_string = kind. contains ( "raw_string" ) ;
@@ -400,6 +448,42 @@ fn extract_call_name(node: &Node, source: &[u8]) -> String {
400448 text. split ( '(' ) . next ( ) . unwrap_or ( "?" ) . to_string ( )
401449}
402450
451+ /// Extract receiver from a call node (e.g. `obj` from `obj.method()`).
452+ /// Looks for a member-expression-like function child and extracts the object part.
453+ fn extract_call_receiver ( node : & Node , source : & [ u8 ] ) -> Option < String > {
454+ // PHP: scoped_call_expression — receiver is the "scope" field (e.g. MyClass in MyClass::method())
455+ if let Some ( scope) = node. child_by_field_name ( "scope" ) {
456+ return Some ( node_text ( & scope, source) . to_string ( ) ) ;
457+ }
458+ // Try "function" field first (JS/TS: call_expression -> member_expression)
459+ // Then "object" (Go, Python), then "receiver" (Ruby)
460+ for field in & [ "function" , "object" , "receiver" ] {
461+ if let Some ( fn_node) = node. child_by_field_name ( field) {
462+ // JS/TS/Python: member_expression / attribute with "object" field
463+ if let Some ( obj) = fn_node. child_by_field_name ( "object" ) {
464+ return Some ( node_text ( & obj, source) . to_string ( ) ) ;
465+ }
466+ // Go: selector_expression uses "operand" not "object"
467+ if fn_node. kind ( ) == "selector_expression" {
468+ if let Some ( operand) = fn_node. child_by_field_name ( "operand" ) {
469+ return Some ( node_text ( & operand, source) . to_string ( ) ) ;
470+ }
471+ }
472+ // C#: member_access_expression uses "expression" not "object"
473+ if fn_node. kind ( ) == "member_access_expression" {
474+ if let Some ( expr) = fn_node. child_by_field_name ( "expression" ) {
475+ return Some ( node_text ( & expr, source) . to_string ( ) ) ;
476+ }
477+ }
478+ // For Ruby/Go where the receiver is directly a field
479+ if * field == "object" || * field == "receiver" {
480+ return Some ( node_text ( & fn_node, source) . to_string ( ) ) ;
481+ }
482+ }
483+ }
484+ None
485+ }
486+
403487/// Extract expression text from throw/await — skip the keyword child.
404488fn extract_child_expression_text ( node : & Node , source : & [ u8 ] ) -> Option < String > {
405489 const KEYWORDS : & [ & str ] = & [ "throw" , "raise" , "await" , "new" ] ;
0 commit comments