@@ -19,20 +19,51 @@ impl SymbolExtractor for FSharpExtractor {
1919fn match_fsharp_node ( node : & Node , source : & [ u8 ] , symbols : & mut FileSymbols , _depth : usize ) {
2020 match node. kind ( ) {
2121 "named_module" => handle_named_module ( node, source, symbols) ,
22+ "module_defn" => handle_module_defn ( node, source, symbols) ,
2223 "function_declaration_left" => handle_function_decl ( node, source, symbols) ,
2324 "type_definition" => handle_type_def ( node, source, symbols) ,
2425 "import_decl" => handle_import_decl ( node, source, symbols) ,
2526 "application_expression" => handle_application ( node, source, symbols) ,
2627 "dot_expression" => handle_dot_expression ( node, source, symbols) ,
28+ "value_definition" => handle_value_definition ( node, source, symbols) ,
2729 _ => { }
2830 }
2931}
3032
31- /// Find the enclosing `named_module` and return its identifier text.
33+ /// Find the enclosing module name, walking up through any number of
34+ /// `module_defn` (nested signature modules) wrappers before reaching the
35+ /// top-level `named_module`. Returns the dotted path, e.g. `Outer.Inner`.
36+ ///
37+ /// Source files use `named_module` for the top-level `module M = …` and
38+ /// the signature grammar (cargo 0.3.0) wraps nested signature modules in
39+ /// `module_defn` nodes. The WASM signature grammar currently emits ERROR
40+ /// nodes for nested signature modules so we cannot recover qualification
41+ /// there — tracked under #1161.
3242fn enclosing_module_name ( node : & Node , source : & [ u8 ] ) -> Option < String > {
33- let module = find_parent_of_type ( node, "named_module" ) ?;
34- let id = find_child ( & module, "long_identifier" ) ?;
35- Some ( node_text ( & id, source) . to_string ( ) )
43+ let mut parts: Vec < String > = Vec :: new ( ) ;
44+ let mut current = node. parent ( ) ;
45+ while let Some ( p) = current {
46+ match p. kind ( ) {
47+ "module_defn" => {
48+ if let Some ( id) = find_child ( & p, "identifier" ) {
49+ parts. push ( node_text ( & id, source) . to_string ( ) ) ;
50+ }
51+ }
52+ "named_module" => {
53+ if let Some ( id) = find_child ( & p, "long_identifier" ) {
54+ parts. push ( node_text ( & id, source) . to_string ( ) ) ;
55+ }
56+ break ;
57+ }
58+ _ => { }
59+ }
60+ current = p. parent ( ) ;
61+ }
62+ if parts. is_empty ( ) {
63+ return None ;
64+ }
65+ parts. reverse ( ) ;
66+ Some ( parts. join ( "." ) )
3667}
3768
3869fn handle_named_module ( node : & Node , source : & [ u8 ] , symbols : & mut FileSymbols ) {
@@ -52,6 +83,36 @@ fn handle_named_module(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
5283 } ) ;
5384}
5485
86+ /// Handle nested signature modules (`module Foo = ...`) emitted by the
87+ /// cargo 0.3.0 grammar as `module_defn`. Emits a `module` definition with
88+ /// the dotted parent path (e.g. `Outer.Foo`) and lets the DFS walker
89+ /// continue into child `val` declarations, which pick up the same path via
90+ /// `enclosing_module_name`.
91+ fn handle_module_defn ( node : & Node , source : & [ u8 ] , symbols : & mut FileSymbols ) {
92+ let name_node = match find_child ( node, "identifier" ) {
93+ Some ( n) => n,
94+ None => return ,
95+ } ;
96+ let raw = node_text ( & name_node, source) . to_string ( ) ;
97+ // `enclosing_module_name` walks `node.parent()` upward, so calling it on
98+ // the `module_defn` itself yields the dotted prefix of its enclosing
99+ // module(s) without including this module's own name.
100+ let qualified = match enclosing_module_name ( node, source) {
101+ Some ( prefix) if !prefix. is_empty ( ) => format ! ( "{}.{}" , prefix, raw) ,
102+ _ => raw,
103+ } ;
104+ symbols. definitions . push ( Definition {
105+ name : qualified,
106+ kind : "module" . to_string ( ) ,
107+ line : start_line ( node) ,
108+ end_line : Some ( end_line ( node) ) ,
109+ decorators : None ,
110+ complexity : None ,
111+ cfg : None ,
112+ children : None ,
113+ } ) ;
114+ }
115+
55116fn handle_function_decl ( node : & Node , source : & [ u8 ] , symbols : & mut FileSymbols ) {
56117 // function_declaration_left: first child is the function name identifier,
57118 // followed by argument_patterns.
@@ -300,3 +361,171 @@ fn handle_dot_expression(node: &Node, source: &[u8], symbols: &mut FileSymbols)
300361 } ) ;
301362 }
302363}
364+
365+ /// Handle `val name : type` declarations in `.fsi` signature files.
366+ ///
367+ /// The signature grammar reuses the `value_definition` node kind for `val`
368+ /// declarations, distinguished from the source grammar's `let` bindings by
369+ /// the first child being the literal `val` keyword. Source-file
370+ /// `value_definition` nodes (which start with `let`) are intentionally
371+ /// ignored here to preserve `.fs` extractor parity.
372+ fn handle_value_definition ( node : & Node , source : & [ u8 ] , symbols : & mut FileSymbols ) {
373+ let first = match node. child ( 0 ) {
374+ Some ( c) => c,
375+ None => return ,
376+ } ;
377+ if first. kind ( ) != "val" {
378+ return ;
379+ }
380+
381+ let decl_left = match find_child ( node, "value_declaration_left" ) {
382+ Some ( n) => n,
383+ None => return ,
384+ } ;
385+ let name = match extract_value_name ( & decl_left, source) {
386+ Some ( n) => n,
387+ None => return ,
388+ } ;
389+
390+ let kind = if has_function_type ( node) { "function" } else { "variable" } ;
391+ let module_name = enclosing_module_name ( node, source) ;
392+ let qualified = match module_name {
393+ Some ( m) => format ! ( "{}.{}" , m, name) ,
394+ None => name,
395+ } ;
396+
397+ symbols. definitions . push ( Definition {
398+ name : qualified,
399+ kind : kind. to_string ( ) ,
400+ line : start_line ( node) ,
401+ end_line : Some ( end_line ( node) ) ,
402+ decorators : None ,
403+ complexity : None ,
404+ cfg : None ,
405+ children : None ,
406+ } ) ;
407+ }
408+
409+ fn extract_value_name ( decl_left : & Node , source : & [ u8 ] ) -> Option < String > {
410+ let pattern = find_child ( decl_left, "identifier_pattern" ) ?;
411+ let ident = find_child ( & pattern, "long_identifier_or_op" )
412+ . and_then ( |n| find_child ( & n, "identifier" ) )
413+ . or_else ( || find_child ( & pattern, "identifier" ) ) ?;
414+ Some ( node_text ( & ident, source) . to_string ( ) )
415+ }
416+
417+ fn has_function_type ( node : & Node ) -> bool {
418+ // The grammar wraps every type signature in `curried_spec`. A function type
419+ // (e.g. `val add : int -> int -> int`) contains one or more `arguments_spec`
420+ // children; a plain value (e.g. `val pi : float`) wraps a single `simple_type`.
421+ let Some ( curried) = find_child ( node, "curried_spec" ) else { return false } ;
422+ for i in 0 ..curried. child_count ( ) {
423+ if let Some ( child) = curried. child ( i) {
424+ if child. kind ( ) == "arguments_spec" {
425+ return true ;
426+ }
427+ }
428+ }
429+ false
430+ }
431+
432+ #[ cfg( test) ]
433+ mod tests {
434+ use super :: * ;
435+ use crate :: extractors:: SymbolExtractor ;
436+ use tree_sitter:: Parser ;
437+
438+ fn parse_source ( code : & str ) -> FileSymbols {
439+ let mut parser = Parser :: new ( ) ;
440+ parser
441+ . set_language ( & tree_sitter_fsharp:: LANGUAGE_FSHARP . into ( ) )
442+ . unwrap ( ) ;
443+ let tree = parser. parse ( code. as_bytes ( ) , None ) . unwrap ( ) ;
444+ FSharpExtractor . extract ( & tree, code. as_bytes ( ) , "test.fs" )
445+ }
446+
447+ fn parse_signature ( code : & str ) -> FileSymbols {
448+ let mut parser = Parser :: new ( ) ;
449+ parser
450+ . set_language ( & tree_sitter_fsharp:: LANGUAGE_SIGNATURE . into ( ) )
451+ . unwrap ( ) ;
452+ let tree = parser. parse ( code. as_bytes ( ) , None ) . unwrap ( ) ;
453+ FSharpExtractor . extract ( & tree, code. as_bytes ( ) , "test.fsi" )
454+ }
455+
456+ #[ test]
457+ fn signature_extracts_val_declarations ( ) {
458+ let s = parse_signature ( "namespace MyApp.Domain\n \n val add : int -> int -> int\n val pi : float\n " ) ;
459+ let add = s
460+ . definitions
461+ . iter ( )
462+ . find ( |d| d. name == "add" )
463+ . expect ( "val add should be extracted" ) ;
464+ assert_eq ! ( add. kind, "function" ) ;
465+ let pi = s
466+ . definitions
467+ . iter ( )
468+ . find ( |d| d. name == "pi" )
469+ . expect ( "val pi should be extracted" ) ;
470+ assert_eq ! ( pi. kind, "variable" ) ;
471+ }
472+
473+ #[ test]
474+ fn signature_extracts_bare_val_declarations ( ) {
475+ let s = parse_signature ( "val negate : int -> int\n val count : int\n " ) ;
476+ assert ! ( s
477+ . definitions
478+ . iter( )
479+ . any( |d| d. name == "negate" && d. kind == "function" ) ) ;
480+ assert ! ( s
481+ . definitions
482+ . iter( )
483+ . any( |d| d. name == "count" && d. kind == "variable" ) ) ;
484+ }
485+
486+ #[ test]
487+ fn source_grammar_does_not_extract_let_bindings_as_val ( ) {
488+ // `let x = 5` is a value_definition in the source grammar but its
489+ // first child is `let`, not `val`. Our handler must not extract it
490+ // (preserves prior `.fs` extraction parity — only function_declaration_left
491+ // produces definitions in source files).
492+ let s = parse_source ( "module M\n \n let x = 5\n " ) ;
493+ assert ! (
494+ s. definitions. iter( ) . all( |d| d. name != "x" ) ,
495+ "let bindings in .fs files must not be extracted as val definitions"
496+ ) ;
497+ }
498+
499+ #[ test]
500+ fn signature_qualifies_val_inside_nested_module_defn ( ) {
501+ // The cargo 0.3.0 signature grammar wraps `module Foo = ...` as a
502+ // `module_defn` node (the WASM 0.1.0 grammar emits ERROR for this
503+ // construct — tracked under #1161). The `val` declarations inside
504+ // must be qualified with the module path.
505+ let s = parse_signature ( "namespace X\n \n module Foo =\n val add : int -> int\n " ) ;
506+ assert ! (
507+ s. definitions. iter( ) . any( |d| d. name == "Foo.add" && d. kind == "function" ) ,
508+ "val add nested under `module Foo =` must be indexed as `Foo.add`, got: {:?}" ,
509+ s. definitions. iter( ) . map( |d| & d. name) . collect:: <Vec <_>>( ) ,
510+ ) ;
511+ assert ! (
512+ s. definitions. iter( ) . any( |d| d. name == "Foo" && d. kind == "module" ) ,
513+ "module Foo must be indexed as a module definition"
514+ ) ;
515+ }
516+
517+ #[ test]
518+ fn source_grammar_does_not_extract_val_mutable_class_fields ( ) {
519+ // `val mutable count: int = 0` inside a class is parsed as a `member_defn`
520+ // node in the source grammar — NOT a `value_definition` — so our
521+ // `value_definition`/`val`-first-child handler does not see it.
522+ // This regression guard makes that empirical fact explicit.
523+ let s = parse_source (
524+ "module M\n \n type C() =\n val mutable count: int = 0\n " ,
525+ ) ;
526+ assert ! (
527+ s. definitions. iter( ) . all( |d| d. name != "count" ) ,
528+ "val mutable class fields must not be extracted by the signature value_definition handler"
529+ ) ;
530+ }
531+ }
0 commit comments