Skip to content

Commit e3701bb

Browse files
authored
Merge branch 'main' into fix/1168-libc-verifier-derive
2 parents 260d1ef + 1a6ee7b commit e3701bb

13 files changed

Lines changed: 462 additions & 19 deletions

File tree

crates/codegraph-core/src/extractors/fsharp.rs

Lines changed: 233 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,51 @@ impl SymbolExtractor for FSharpExtractor {
1919
fn match_fsharp_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) {
2020
match node.kind() {
2121
"named_module" => handle_named_module(node, source, symbols),
22+
"module_defn" => handle_module_defn(node, source, symbols),
2223
"function_declaration_left" => handle_function_decl(node, source, symbols),
2324
"type_definition" => handle_type_def(node, source, symbols),
2425
"import_decl" => handle_import_decl(node, source, symbols),
2526
"application_expression" => handle_application(node, source, symbols),
2627
"dot_expression" => handle_dot_expression(node, source, symbols),
28+
"value_definition" => handle_value_definition(node, source, symbols),
2729
_ => {}
2830
}
2931
}
3032

31-
/// Find the enclosing `named_module` and return its identifier text.
33+
/// Find the enclosing module name, walking up through any number of
34+
/// `module_defn` (nested signature modules) wrappers before reaching the
35+
/// top-level `named_module`. Returns the dotted path, e.g. `Outer.Inner`.
36+
///
37+
/// Source files use `named_module` for the top-level `module M = …` and
38+
/// the signature grammar (cargo 0.3.0) wraps nested signature modules in
39+
/// `module_defn` nodes. The WASM signature grammar currently emits ERROR
40+
/// nodes for nested signature modules so we cannot recover qualification
41+
/// there — tracked under #1161.
3242
fn enclosing_module_name(node: &Node, source: &[u8]) -> Option<String> {
33-
let module = find_parent_of_type(node, "named_module")?;
34-
let id = find_child(&module, "long_identifier")?;
35-
Some(node_text(&id, source).to_string())
43+
let mut parts: Vec<String> = Vec::new();
44+
let mut current = node.parent();
45+
while let Some(p) = current {
46+
match p.kind() {
47+
"module_defn" => {
48+
if let Some(id) = find_child(&p, "identifier") {
49+
parts.push(node_text(&id, source).to_string());
50+
}
51+
}
52+
"named_module" => {
53+
if let Some(id) = find_child(&p, "long_identifier") {
54+
parts.push(node_text(&id, source).to_string());
55+
}
56+
break;
57+
}
58+
_ => {}
59+
}
60+
current = p.parent();
61+
}
62+
if parts.is_empty() {
63+
return None;
64+
}
65+
parts.reverse();
66+
Some(parts.join("."))
3667
}
3768

3869
fn handle_named_module(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
@@ -52,6 +83,36 @@ fn handle_named_module(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
5283
});
5384
}
5485

86+
/// Handle nested signature modules (`module Foo = ...`) emitted by the
87+
/// cargo 0.3.0 grammar as `module_defn`. Emits a `module` definition with
88+
/// the dotted parent path (e.g. `Outer.Foo`) and lets the DFS walker
89+
/// continue into child `val` declarations, which pick up the same path via
90+
/// `enclosing_module_name`.
91+
fn handle_module_defn(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
92+
let name_node = match find_child(node, "identifier") {
93+
Some(n) => n,
94+
None => return,
95+
};
96+
let raw = node_text(&name_node, source).to_string();
97+
// `enclosing_module_name` walks `node.parent()` upward, so calling it on
98+
// the `module_defn` itself yields the dotted prefix of its enclosing
99+
// module(s) without including this module's own name.
100+
let qualified = match enclosing_module_name(node, source) {
101+
Some(prefix) if !prefix.is_empty() => format!("{}.{}", prefix, raw),
102+
_ => raw,
103+
};
104+
symbols.definitions.push(Definition {
105+
name: qualified,
106+
kind: "module".to_string(),
107+
line: start_line(node),
108+
end_line: Some(end_line(node)),
109+
decorators: None,
110+
complexity: None,
111+
cfg: None,
112+
children: None,
113+
});
114+
}
115+
55116
fn handle_function_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
56117
// function_declaration_left: first child is the function name identifier,
57118
// followed by argument_patterns.
@@ -300,3 +361,171 @@ fn handle_dot_expression(node: &Node, source: &[u8], symbols: &mut FileSymbols)
300361
});
301362
}
302363
}
364+
365+
/// Handle `val name : type` declarations in `.fsi` signature files.
366+
///
367+
/// The signature grammar reuses the `value_definition` node kind for `val`
368+
/// declarations, distinguished from the source grammar's `let` bindings by
369+
/// the first child being the literal `val` keyword. Source-file
370+
/// `value_definition` nodes (which start with `let`) are intentionally
371+
/// ignored here to preserve `.fs` extractor parity.
372+
fn handle_value_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
373+
let first = match node.child(0) {
374+
Some(c) => c,
375+
None => return,
376+
};
377+
if first.kind() != "val" {
378+
return;
379+
}
380+
381+
let decl_left = match find_child(node, "value_declaration_left") {
382+
Some(n) => n,
383+
None => return,
384+
};
385+
let name = match extract_value_name(&decl_left, source) {
386+
Some(n) => n,
387+
None => return,
388+
};
389+
390+
let kind = if has_function_type(node) { "function" } else { "variable" };
391+
let module_name = enclosing_module_name(node, source);
392+
let qualified = match module_name {
393+
Some(m) => format!("{}.{}", m, name),
394+
None => name,
395+
};
396+
397+
symbols.definitions.push(Definition {
398+
name: qualified,
399+
kind: kind.to_string(),
400+
line: start_line(node),
401+
end_line: Some(end_line(node)),
402+
decorators: None,
403+
complexity: None,
404+
cfg: None,
405+
children: None,
406+
});
407+
}
408+
409+
fn extract_value_name(decl_left: &Node, source: &[u8]) -> Option<String> {
410+
let pattern = find_child(decl_left, "identifier_pattern")?;
411+
let ident = find_child(&pattern, "long_identifier_or_op")
412+
.and_then(|n| find_child(&n, "identifier"))
413+
.or_else(|| find_child(&pattern, "identifier"))?;
414+
Some(node_text(&ident, source).to_string())
415+
}
416+
417+
fn has_function_type(node: &Node) -> bool {
418+
// The grammar wraps every type signature in `curried_spec`. A function type
419+
// (e.g. `val add : int -> int -> int`) contains one or more `arguments_spec`
420+
// children; a plain value (e.g. `val pi : float`) wraps a single `simple_type`.
421+
let Some(curried) = find_child(node, "curried_spec") else { return false };
422+
for i in 0..curried.child_count() {
423+
if let Some(child) = curried.child(i) {
424+
if child.kind() == "arguments_spec" {
425+
return true;
426+
}
427+
}
428+
}
429+
false
430+
}
431+
432+
#[cfg(test)]
433+
mod tests {
434+
use super::*;
435+
use crate::extractors::SymbolExtractor;
436+
use tree_sitter::Parser;
437+
438+
fn parse_source(code: &str) -> FileSymbols {
439+
let mut parser = Parser::new();
440+
parser
441+
.set_language(&tree_sitter_fsharp::LANGUAGE_FSHARP.into())
442+
.unwrap();
443+
let tree = parser.parse(code.as_bytes(), None).unwrap();
444+
FSharpExtractor.extract(&tree, code.as_bytes(), "test.fs")
445+
}
446+
447+
fn parse_signature(code: &str) -> FileSymbols {
448+
let mut parser = Parser::new();
449+
parser
450+
.set_language(&tree_sitter_fsharp::LANGUAGE_SIGNATURE.into())
451+
.unwrap();
452+
let tree = parser.parse(code.as_bytes(), None).unwrap();
453+
FSharpExtractor.extract(&tree, code.as_bytes(), "test.fsi")
454+
}
455+
456+
#[test]
457+
fn signature_extracts_val_declarations() {
458+
let s = parse_signature("namespace MyApp.Domain\n\nval add : int -> int -> int\nval pi : float\n");
459+
let add = s
460+
.definitions
461+
.iter()
462+
.find(|d| d.name == "add")
463+
.expect("val add should be extracted");
464+
assert_eq!(add.kind, "function");
465+
let pi = s
466+
.definitions
467+
.iter()
468+
.find(|d| d.name == "pi")
469+
.expect("val pi should be extracted");
470+
assert_eq!(pi.kind, "variable");
471+
}
472+
473+
#[test]
474+
fn signature_extracts_bare_val_declarations() {
475+
let s = parse_signature("val negate : int -> int\nval count : int\n");
476+
assert!(s
477+
.definitions
478+
.iter()
479+
.any(|d| d.name == "negate" && d.kind == "function"));
480+
assert!(s
481+
.definitions
482+
.iter()
483+
.any(|d| d.name == "count" && d.kind == "variable"));
484+
}
485+
486+
#[test]
487+
fn source_grammar_does_not_extract_let_bindings_as_val() {
488+
// `let x = 5` is a value_definition in the source grammar but its
489+
// first child is `let`, not `val`. Our handler must not extract it
490+
// (preserves prior `.fs` extraction parity — only function_declaration_left
491+
// produces definitions in source files).
492+
let s = parse_source("module M\n\nlet x = 5\n");
493+
assert!(
494+
s.definitions.iter().all(|d| d.name != "x"),
495+
"let bindings in .fs files must not be extracted as val definitions"
496+
);
497+
}
498+
499+
#[test]
500+
fn signature_qualifies_val_inside_nested_module_defn() {
501+
// The cargo 0.3.0 signature grammar wraps `module Foo = ...` as a
502+
// `module_defn` node (the WASM 0.1.0 grammar emits ERROR for this
503+
// construct — tracked under #1161). The `val` declarations inside
504+
// must be qualified with the module path.
505+
let s = parse_signature("namespace X\n\nmodule Foo =\n val add : int -> int\n");
506+
assert!(
507+
s.definitions.iter().any(|d| d.name == "Foo.add" && d.kind == "function"),
508+
"val add nested under `module Foo =` must be indexed as `Foo.add`, got: {:?}",
509+
s.definitions.iter().map(|d| &d.name).collect::<Vec<_>>(),
510+
);
511+
assert!(
512+
s.definitions.iter().any(|d| d.name == "Foo" && d.kind == "module"),
513+
"module Foo must be indexed as a module definition"
514+
);
515+
}
516+
517+
#[test]
518+
fn source_grammar_does_not_extract_val_mutable_class_fields() {
519+
// `val mutable count: int = 0` inside a class is parsed as a `member_defn`
520+
// node in the source grammar — NOT a `value_definition` — so our
521+
// `value_definition`/`val`-first-child handler does not see it.
522+
// This regression guard makes that empirical fact explicit.
523+
let s = parse_source(
524+
"module M\n\ntype C() =\n val mutable count: int = 0\n",
525+
);
526+
assert!(
527+
s.definitions.iter().all(|d| d.name != "count"),
528+
"val mutable class fields must not be extracted by the signature value_definition handler"
529+
);
530+
}
531+
}

crates/codegraph-core/src/extractors/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ pub fn extract_symbols_with_opts(
140140
LanguageKind::Ocaml | LanguageKind::OcamlInterface => {
141141
ocaml::OcamlExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes)
142142
}
143-
LanguageKind::FSharp => {
143+
LanguageKind::FSharp | LanguageKind::FSharpSignature => {
144144
fsharp::FSharpExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes)
145145
}
146146
LanguageKind::ObjC => {

crates/codegraph-core/src/parser_registry.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub enum LanguageKind {
2828
Ocaml,
2929
OcamlInterface,
3030
FSharp,
31+
FSharpSignature,
3132
ObjC,
3233
Gleam,
3334
Julia,
@@ -70,6 +71,7 @@ impl LanguageKind {
7071
Self::Ocaml => "ocaml",
7172
Self::OcamlInterface => "ocaml-interface",
7273
Self::FSharp => "fsharp",
74+
Self::FSharpSignature => "fsharp-signature",
7375
Self::ObjC => "objc",
7476
Self::Gleam => "gleam",
7577
Self::Julia => "julia",
@@ -120,7 +122,8 @@ impl LanguageKind {
120122
"hs" => Some(Self::Haskell),
121123
"ml" => Some(Self::Ocaml),
122124
"mli" => Some(Self::OcamlInterface),
123-
"fs" | "fsx" | "fsi" => Some(Self::FSharp),
125+
"fs" | "fsx" => Some(Self::FSharp),
126+
"fsi" => Some(Self::FSharpSignature),
124127
"m" => Some(Self::ObjC),
125128
"gleam" => Some(Self::Gleam),
126129
"jl" => Some(Self::Julia),
@@ -165,6 +168,7 @@ impl LanguageKind {
165168
"ocaml" => Some(Self::Ocaml),
166169
"ocaml-interface" => Some(Self::OcamlInterface),
167170
"fsharp" => Some(Self::FSharp),
171+
"fsharp-signature" => Some(Self::FSharpSignature),
168172
"objc" => Some(Self::ObjC),
169173
"gleam" => Some(Self::Gleam),
170174
"julia" => Some(Self::Julia),
@@ -207,6 +211,7 @@ impl LanguageKind {
207211
Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
208212
Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(),
209213
Self::FSharp => tree_sitter_fsharp::LANGUAGE_FSHARP.into(),
214+
Self::FSharpSignature => tree_sitter_fsharp::LANGUAGE_SIGNATURE.into(),
210215
Self::ObjC => tree_sitter_objc::LANGUAGE.into(),
211216
Self::Gleam => tree_sitter_gleam::LANGUAGE.into(),
212217
Self::Julia => tree_sitter_julia::LANGUAGE.into(),
@@ -232,8 +237,8 @@ impl LanguageKind {
232237
&[
233238
JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C,
234239
Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml,
235-
OcamlInterface, FSharp, ObjC, Gleam, Julia, Cuda, Clojure, Erlang, Groovy, R, Solidity,
236-
Verilog,
240+
OcamlInterface, FSharp, FSharpSignature, ObjC, Gleam, Julia, Cuda, Clojure, Erlang,
241+
Groovy, R, Solidity, Verilog,
237242
]
238243
}
239244
}
@@ -304,6 +309,7 @@ mod tests {
304309
| LanguageKind::Ocaml
305310
| LanguageKind::OcamlInterface
306311
| LanguageKind::FSharp
312+
| LanguageKind::FSharpSignature
307313
| LanguageKind::ObjC
308314
| LanguageKind::Gleam
309315
| LanguageKind::Julia
@@ -320,7 +326,7 @@ mod tests {
320326
// Because both checks require the same manual update, they reinforce
321327
// each other: a developer who updates the match is reminded to also
322328
// update `all()` and this count.
323-
const EXPECTED_LEN: usize = 35;
329+
const EXPECTED_LEN: usize = 36;
324330
assert_eq!(
325331
LanguageKind::all().len(),
326332
EXPECTED_LEN,

package-lock.json

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@
162162
"tree-sitter-dart": "^1.0.0",
163163
"tree-sitter-elixir": "^0.3.5",
164164
"tree-sitter-erlang": "github:WhatsApp/tree-sitter-erlang#semver:*",
165-
"tree-sitter-fsharp": "^0.1.0",
165+
"tree-sitter-fsharp": "https://github.com/ionide/tree-sitter-fsharp/archive/refs/tags/0.3.0.tar.gz",
166166
"tree-sitter-gleam": "github:gleam-lang/tree-sitter-gleam",
167167
"tree-sitter-go": "^0.25.0",
168168
"tree-sitter-groovy": "^0.1.2",

scripts/build-wasm.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ const grammars = [
206206
{ name: 'tree-sitter-ocaml', pkg: 'tree-sitter-ocaml', sub: 'grammars/ocaml' },
207207
{ name: 'tree-sitter-ocaml_interface', pkg: 'tree-sitter-ocaml', sub: 'grammars/interface' },
208208
{ name: 'tree-sitter-fsharp', pkg: 'tree-sitter-fsharp', sub: 'fsharp' },
209+
{ name: 'tree-sitter-fsharp_signature', pkg: 'tree-sitter-fsharp', sub: 'fsharp_signature' },
209210
{ name: 'tree-sitter-gleam', pkg: 'tree-sitter-gleam', sub: null },
210211
{ name: 'tree-sitter-clojure', pkg: 'tree-sitter-clojure', sub: null },
211212
{ name: 'tree-sitter-julia', pkg: 'tree-sitter-julia', sub: null },

0 commit comments

Comments
 (0)