From b47d387652e4dd6bb406df254b0c278d56a4c28f Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 03:38:28 -0600 Subject: [PATCH 1/4] feat(native): port Julia extractor to Rust Adds tree-sitter-julia dependency and native extractor matching the WASM-side behavior for Julia symbol, import, and call extraction. Part of #1071 --- Cargo.lock | 11 + crates/codegraph-core/Cargo.toml | 1 + crates/codegraph-core/src/change_detection.rs | 3 +- .../codegraph-core/src/extractors/helpers.rs | 10 + crates/codegraph-core/src/extractors/julia.rs | 618 ++++++++++++++++++ crates/codegraph-core/src/extractors/mod.rs | 4 + crates/codegraph-core/src/file_collector.rs | 3 +- crates/codegraph-core/src/parser_registry.rs | 12 +- src/ast-analysis/rules/index.ts | 8 + 9 files changed, 664 insertions(+), 6 deletions(-) create mode 100644 crates/codegraph-core/src/extractors/julia.rs diff --git a/Cargo.lock b/Cargo.lock index 413504b0d..62752b9fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -91,6 +91,7 @@ dependencies = [ "tree-sitter-hcl", "tree-sitter-java", "tree-sitter-javascript", + "tree-sitter-julia", "tree-sitter-kotlin-sg", "tree-sitter-lua", "tree-sitter-ocaml", @@ -839,6 +840,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-julia" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4144731a178812ee867619b1e98b3b91e54c1652304b26e5ebe3175b701de323" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-kotlin-sg" version = "0.4.0" diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index df4361e17..dc33bfb9f 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -35,6 +35,7 @@ tree-sitter-dart = "0.0.4" tree-sitter-zig = "1" tree-sitter-haskell = "0.23" tree-sitter-ocaml = "0.24" +tree-sitter-julia = "0.23" rayon = "1" ignore = "0.4" globset = "0.4" diff --git a/crates/codegraph-core/src/change_detection.rs b/crates/codegraph-core/src/change_detection.rs index 08e4b7419..db5da3e55 100644 --- a/crates/codegraph-core/src/change_detection.rs +++ b/crates/codegraph-core/src/change_detection.rs @@ -774,7 +774,7 @@ mod tests { #[test] fn detect_removed_skips_unsupported_extensions() { - // Files in WASM-only languages (Clojure, Gleam, Julia, F#) live in + // Files in WASM-only languages (Clojure, Gleam, F#) live in // `file_hashes` because the JS-side WASM backfill writes them, but // Rust's narrower file_collector never collects them. Without this // skip, every incremental rebuild would flag them as removed and @@ -783,7 +783,6 @@ mod tests { for path in [ "tests/fixtures/clojure/main.clj", "tests/fixtures/gleam/main.gleam", - "tests/fixtures/julia/main.jl", "tests/fixtures/fsharp/Main.fs", ] { existing.insert( diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index b02531896..a1e800f63 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -360,6 +360,16 @@ pub const OCAML_AST_CONFIG: LangAstConfig = LangAstConfig { string_prefixes: &[], }; +pub const JULIA_AST_CONFIG: LangAstConfig = LangAstConfig { + new_types: &[], + throw_types: &[], + await_types: &[], + string_types: &["string_literal", "prefixed_string_literal"], + regex_types: &[], + quote_chars: &['"'], + string_prefixes: &[], +}; + // ── Generic AST node walker ────────────────────────────────────────────────── /// Node types that represent identifiers across languages. diff --git a/crates/codegraph-core/src/extractors/julia.rs b/crates/codegraph-core/src/extractors/julia.rs new file mode 100644 index 000000000..67e82d13e --- /dev/null +++ b/crates/codegraph-core/src/extractors/julia.rs @@ -0,0 +1,618 @@ +use tree_sitter::{Node, Tree}; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct JuliaExtractor; + +impl SymbolExtractor for JuliaExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_julia(&tree.root_node(), source, &mut symbols, None); + walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &JULIA_AST_CONFIG); + symbols + } +} + +/// Walk Julia tree threading the current enclosing module name. The JS +/// extractor (`src/extractors/julia.ts`) tracks `currentModule` so that +/// definitions inside `module Foo ... end` are prefixed `Foo.bar`. The +/// generic `walk_tree` helper cannot pass extra state, so we open-code a +/// recursive walker here. +fn walk_julia( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + current_module: Option<&str>, +) { + let mut next_module = current_module.map(|s| s.to_string()); + + match node.kind() { + "module_definition" => { + if let Some(name) = handle_module_def(node, source, symbols) { + next_module = Some(name); + } + } + "function_definition" => handle_function_def(node, source, symbols, current_module), + "assignment" => handle_assignment(node, source, symbols, current_module), + "struct_definition" => handle_struct_def(node, source, symbols), + "abstract_definition" => handle_abstract_def(node, source, symbols), + "macro_definition" => handle_macro_def(node, source, symbols, current_module), + "import_statement" | "using_statement" => handle_import(node, source, symbols), + "call_expression" => handle_call(node, source, symbols), + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_julia(&child, source, symbols, next_module.as_deref()); + } + } +} + +fn handle_module_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) -> Option { + let name_node = node + .child_by_field_name("name") + .or_else(|| find_child(node, "identifier"))?; + let name = node_text(&name_node, source).to_string(); + + symbols.definitions.push(Definition { + name: name.clone(), + kind: "module".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + + Some(name) +} + +/// Extract the function-name identifier from a `signature` (or call_expression +/// directly) node. tree-sitter-julia wraps the call signature of a +/// `function_definition` / `macro_definition` in a `signature` node whose +/// first child is the `call_expression` — `find_child` only inspects direct +/// children, so we unwrap one level explicitly. +fn signature_call<'a>(node: &Node<'a>) -> Option> { + if let Some(sig) = find_child(node, "signature") { + return find_child(&sig, "call_expression"); + } + find_child(node, "call_expression") +} + +fn handle_function_def( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + current_module: Option<&str>, +) { + if let Some(call_sig) = signature_call(node) { + if let Some(func_name_node) = call_sig.child(0) { + let base = node_text(&func_name_node, source); + let name = match current_module { + Some(m) => format!("{}.{}", m, base), + None => base.to_string(), + }; + let params = extract_julia_params(&call_sig, source); + symbols.definitions.push(Definition { + name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "julia"), + cfg: build_function_cfg(node, "julia", source), + children: opt_children(params), + }); + return; + } + } + + // Fallback: look for identifier directly + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "identifier")) + { + Some(n) => n, + None => return, + }; + let base = node_text(&name_node, source); + let name = match current_module { + Some(m) => format!("{}.{}", m, base), + None => base.to_string(), + }; + symbols.definitions.push(Definition { + name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "julia"), + cfg: build_function_cfg(node, "julia", source), + children: None, + }); +} + +fn handle_assignment( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + current_module: Option<&str>, +) { + // Short function form: `add(x, y) = x + y` → LHS is a call_expression. + let lhs = match node.child(0) { + Some(c) => c, + None => return, + }; + if lhs.kind() != "call_expression" { + return; + } + let func_name_node = match lhs.child(0) { + Some(c) => c, + None => return, + }; + let base = node_text(&func_name_node, source); + let name = match current_module { + Some(m) => format!("{}.{}", m, base), + None => base.to_string(), + }; + let params = extract_julia_params(&lhs, source); + + symbols.definitions.push(Definition { + name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "julia"), + cfg: build_function_cfg(node, "julia", source), + children: opt_children(params), + }); +} + +fn handle_struct_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // struct_definition: `struct` type_head `end` + // type_head is either a bare `identifier` (no supertype) or a + // `binary_expression` of the form `Name <: Super`. + let type_head = match find_child(node, "type_head") { + Some(th) => th, + None => return, + }; + + let (name_node, supertype): (Node, Option) = if let Some(bin) = + find_child(&type_head, "binary_expression") + { + // First identifier is the struct name, last identifier (after `<:`) is the supertype. + let mut name_id: Option = None; + let mut super_id: Option = None; + for i in 0..bin.child_count() { + if let Some(c) = bin.child(i) { + if c.kind() == "identifier" { + if name_id.is_none() { + name_id = Some(c); + } else { + super_id = Some(c); + } + } + } + } + match name_id { + Some(n) => (n, super_id), + None => return, + } + } else if let Some(id) = find_child(&type_head, "identifier") { + (id, None) + } else { + return; + }; + + let struct_name = node_text(&name_node, source).to_string(); + + let mut children: Vec = Vec::new(); + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + if child.kind() == "typed_expression" { + if let Some(field_name) = find_child(&child, "identifier") { + children.push(child_def( + node_text(&field_name, source).to_string(), + "property", + start_line(&child), + )); + } + } else if child.kind() == "identifier" { + // Plain identifier fields (no type annotation) appear as direct + // identifier children of struct_definition. The type_head is a + // separate node so there is nothing to filter out here. + children.push(child_def( + node_text(&child, source).to_string(), + "property", + start_line(&child), + )); + } + } + + if let Some(sup) = supertype { + symbols.classes.push(ClassRelation { + name: struct_name.clone(), + extends: Some(node_text(&sup, source).to_string()), + implements: None, + line: start_line(node), + }); + } + + symbols.definitions.push(Definition { + name: struct_name, + kind: "struct".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); +} + +fn handle_abstract_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // abstract_definition: `abstract type` type_head `end` + // type_head wraps the name identifier (or a `Name <: Super` binary_expr). + let name_node = match find_child(node, "type_head") { + Some(th) => find_child(&th, "identifier") + .or_else(|| { + find_child(&th, "binary_expression") + .and_then(|bin| find_child(&bin, "identifier")) + }) + .unwrap_or(th), + None => match find_child(node, "identifier") { + Some(n) => n, + None => return, + }, + }; + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_macro_def( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + current_module: Option<&str>, +) { + // macro_definition: `macro` signature/call_expression body `end`. + // The name lives in the same shape as a function signature. + let name_node = if let Some(call_sig) = signature_call(node) { + call_sig.child(0) + } else { + node.child_by_field_name("name") + .or_else(|| find_child(node, "identifier")) + }; + let name_node = match name_node { + Some(n) => n, + None => return, + }; + let base = node_text(&name_node, source); + let name = match current_module { + Some(m) => format!("{}.@{}", m, base), + None => format!("@{}", base), + }; + symbols.definitions.push(Definition { + name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // tree-sitter-julia shapes: + // `using LinearAlgebra` → using_statement [ using, identifier ] + // `using ..Repository` → using_statement [ using, import_path[..] ] + // `import Foo.Bar` → import_statement [ import, scoped_identifier ] + // `import .X` → import_statement [ import, import_path[.] ] + // `import Base: show` → import_statement [ import, selected_import[Base, show] ] + // + // We collect every meaningful sub-node and derive `source` from the first. + let mut names: Vec = Vec::new(); + let mut source_str = String::new(); + + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + match child.kind() { + "identifier" | "scoped_identifier" => { + let txt = node_text(&child, source); + if source_str.is_empty() { + source_str = txt.to_string(); + } + let last = txt.rsplit('.').next().unwrap_or(txt); + names.push(last.to_string()); + } + "import_path" => { + // Use the trailing identifier as the module reference. + let txt = node_text(&child, source); + let stripped = txt.trim_start_matches('.'); + if source_str.is_empty() { + source_str = stripped.to_string(); + } + let last = stripped.rsplit('.').next().unwrap_or(stripped); + if !last.is_empty() { + names.push(last.to_string()); + } + } + "selected_import" => { + // First identifier is the source module; the rest are imported names. + let mut first = true; + for j in 0..child.child_count() { + let Some(part) = child.child(j) else { continue }; + if part.kind() == "identifier" { + let txt = node_text(&part, source).to_string(); + if first { + if source_str.is_empty() { + source_str = txt.clone(); + } + first = false; + } else { + names.push(txt); + } + } + } + } + _ => {} + } + } + + if !source_str.is_empty() { + let names = if names.is_empty() { vec![source_str.clone()] } else { names }; + symbols + .imports + .push(Import::new(source_str, names, start_line(node))); + } +} + +fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // Skip when this call is the LHS of an assignment (it's a short-form + // function definition signature). + if let Some(parent) = node.parent() { + if parent.kind() == "assignment" { + if let Some(first) = parent.child(0) { + if first.id() == node.id() { + return; + } + } + } + // Skip when this call is the signature of a function/macro definition. + // In tree-sitter-julia the signature lives inside a `signature` node + // whose parent is `function_definition` or `macro_definition`. Body + // calls (e.g. `println(name)` inside `function greet ... end`) appear + // as direct children of `function_definition` and MUST be recorded — + // do not blanket-skip on that parent kind. + if parent.kind() == "signature" { + if let Some(grand) = parent.parent() { + if matches!(grand.kind(), "function_definition" | "macro_definition") { + return; + } + } + } + } + + let func_node = match node.child(0) { + Some(n) => n, + None => return, + }; + + match func_node.kind() { + "identifier" => { + symbols.calls.push(Call { + name: node_text(&func_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + "field_expression" | "scoped_identifier" => { + let raw = node_text(&func_node, source); + let parts: Vec<&str> = raw.split('.').collect(); + if parts.len() >= 2 { + let last = parts.last().copied().unwrap_or(""); + let receiver = parts[..parts.len() - 1].join("."); + symbols.calls.push(Call { + name: last.to_string(), + line: start_line(node), + dynamic: None, + receiver: Some(receiver), + }); + } else { + symbols.calls.push(Call { + name: raw.to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + } + _ => {} + } +} + +fn extract_julia_params(call_expr: &Node, source: &[u8]) -> Vec { + let mut params: Vec = Vec::new(); + let arg_list = match find_child(call_expr, "argument_list") + .or_else(|| find_child(call_expr, "tuple_expression")) + { + Some(a) => a, + None => return params, + }; + + for i in 0..arg_list.child_count() { + let Some(child) = arg_list.child(i) else { continue }; + match child.kind() { + "identifier" => { + params.push(child_def( + node_text(&child, source).to_string(), + "parameter", + start_line(&child), + )); + } + "typed_parameter" | "typed_expression" | "optional_parameter" | "default_parameter" => { + if let Some(name_node) = find_child(&child, "identifier") { + params.push(child_def( + node_text(&name_node, source).to_string(), + "parameter", + start_line(&child), + )); + } + } + _ => {} + } + } + params +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_jl(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_julia::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + JuliaExtractor.extract(&tree, code.as_bytes(), "test.jl") + } + + #[test] + fn finds_function() { + let s = parse_jl("function greet(name)\n println(name)\nend\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"greet")); + let g = s.definitions.iter().find(|d| d.name == "greet").unwrap(); + assert_eq!(g.kind, "function"); + } + + #[test] + fn finds_short_form_function() { + let s = parse_jl("add(x, y) = x + y\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"add")); + let add = s.definitions.iter().find(|d| d.name == "add").unwrap(); + assert_eq!(add.kind, "function"); + let params: Vec<&str> = add + .children + .as_ref() + .map(|c| c.iter().map(|p| p.name.as_str()).collect()) + .unwrap_or_default(); + assert!(params.contains(&"x")); + assert!(params.contains(&"y")); + } + + #[test] + fn module_prefixes_inner_functions() { + let s = parse_jl("module Foo\n function bar()\n end\nend\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Foo")); + assert!(names.contains(&"Foo.bar")); + } + + #[test] + fn extracts_struct_with_fields_and_supertype() { + let s = parse_jl("struct Point <: AbstractPoint\n x::Int\n y::Int\nend\n"); + let point = s + .definitions + .iter() + .find(|d| d.name == "Point") + .expect("struct should be found"); + assert_eq!(point.kind, "struct"); + let fields: Vec<&str> = point + .children + .as_ref() + .map(|c| c.iter().map(|p| p.name.as_str()).collect()) + .unwrap_or_default(); + assert!(fields.contains(&"x")); + assert!(fields.contains(&"y")); + assert_eq!(s.classes.len(), 1); + assert_eq!(s.classes[0].name, "Point"); + assert_eq!(s.classes[0].extends, Some("AbstractPoint".to_string())); + } + + #[test] + fn extracts_struct_without_supertype() { + let s = parse_jl("struct Point\n x::Float64\n y::Float64\nend\n"); + let point = s + .definitions + .iter() + .find(|d| d.name == "Point") + .expect("struct should be found"); + assert_eq!(point.kind, "struct"); + assert!(s.classes.is_empty()); + } + + #[test] + fn extracts_abstract_type() { + let s = parse_jl("abstract type AbstractShape end\n"); + let t = s + .definitions + .iter() + .find(|d| d.name == "AbstractShape") + .expect("abstract should be found"); + assert_eq!(t.kind, "type"); + } + + #[test] + fn extracts_macro_def() { + let s = parse_jl("macro mymac(x)\n x\nend\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"@mymac")); + } + + #[test] + fn extracts_qualified_calls() { + let s = parse_jl("function main()\n Repository.save(repo, 1)\n println(\"x\")\nend\n"); + let calls: Vec<(&str, Option<&str>)> = s + .calls + .iter() + .map(|c| (c.name.as_str(), c.receiver.as_deref())) + .collect(); + assert!(calls.iter().any(|(n, r)| *n == "save" && *r == Some("Repository"))); + assert!(calls.iter().any(|(n, r)| *n == "println" && r.is_none())); + } + + #[test] + fn handles_using_import() { + let s = parse_jl("using ..Repository\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "Repository"); + } + + #[test] + fn handles_selected_import() { + let s = parse_jl("import Base: show\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "Base"); + assert!(s.imports[0].names.contains(&"show".to_string())); + } + + #[test] + fn does_not_record_function_signature_as_call() { + let s = parse_jl("function greet(name)\n println(name)\nend\n"); + // `greet` itself must not appear as a call — only println. + let call_names: Vec<&str> = s.calls.iter().map(|c| c.name.as_str()).collect(); + assert!(!call_names.contains(&"greet")); + assert!(call_names.contains(&"println")); + } +} diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index 642f29f98..7dbf60311 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -10,6 +10,7 @@ pub mod hcl; pub mod helpers; pub mod java; pub mod javascript; +pub mod julia; pub mod kotlin; pub mod lua; pub mod ocaml; @@ -114,6 +115,9 @@ pub fn extract_symbols_with_opts( LanguageKind::Lua => { lua::LuaExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } + LanguageKind::Julia => { + julia::JuliaExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } LanguageKind::Dart => { dart::DartExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } diff --git a/crates/codegraph-core/src/file_collector.rs b/crates/codegraph-core/src/file_collector.rs index 0cb157814..4b57449eb 100644 --- a/crates/codegraph-core/src/file_collector.rs +++ b/crates/codegraph-core/src/file_collector.rs @@ -36,6 +36,7 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ "js", "jsx", "mjs", "cjs", "ts", "tsx", "d.ts", "py", "pyi", "go", "rs", "java", "cs", "rb", "rake", "gemspec", "php", "phtml", "tf", "hcl", "c", "h", "cpp", "cc", "cxx", "hpp", "kt", "kts", "swift", "scala", "sh", "bash", "ex", "exs", "lua", "dart", "zig", "hs", "ml", "mli", + "jl", ]; /// Returns whether `path` has an extension the Rust file_collector would accept. @@ -44,7 +45,7 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ /// if `LanguageKind::from_extension` recognizes it OR its raw extension is in /// `SUPPORTED_EXTENSIONS`. Exposed for `change_detection::detect_removed_files` /// so that files outside Rust's capability (e.g. WASM-only `.clj`, `.gleam`, -/// `.jl`) are not flagged as "removed" merely because the orchestrator's +/// `.fs`) are not flagged as "removed" merely because the orchestrator's /// narrower collector never sees them. pub fn is_supported_extension(path: &str) -> bool { if LanguageKind::from_extension(path).is_some() { diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index c87957f29..84badff2a 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -27,6 +27,7 @@ pub enum LanguageKind { Haskell, Ocaml, OcamlInterface, + Julia, } impl LanguageKind { @@ -58,6 +59,7 @@ impl LanguageKind { Self::Haskell => "haskell", Self::Ocaml => "ocaml", Self::OcamlInterface => "ocaml-interface", + Self::Julia => "julia", } } @@ -97,6 +99,7 @@ impl LanguageKind { "hs" => Some(Self::Haskell), "ml" => Some(Self::Ocaml), "mli" => Some(Self::OcamlInterface), + "jl" => Some(Self::Julia), _ => None, } } @@ -129,6 +132,7 @@ impl LanguageKind { "haskell" => Some(Self::Haskell), "ocaml" => Some(Self::Ocaml), "ocaml-interface" => Some(Self::OcamlInterface), + "julia" => Some(Self::Julia), _ => None, } } @@ -160,6 +164,7 @@ impl LanguageKind { Self::Haskell => tree_sitter_haskell::LANGUAGE.into(), Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(), Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), + Self::Julia => tree_sitter_julia::LANGUAGE.into(), } } @@ -175,7 +180,7 @@ impl LanguageKind { &[ JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C, Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml, - OcamlInterface, + OcamlInterface, Julia, ] } } @@ -244,14 +249,15 @@ mod tests { | LanguageKind::Zig | LanguageKind::Haskell | LanguageKind::Ocaml - | LanguageKind::OcamlInterface => (), + | LanguageKind::OcamlInterface + | LanguageKind::Julia => (), }; // IMPORTANT: this constant must equal the number of arms in the match // above AND the length of the slice returned by `LanguageKind::all()`. // Because both checks require the same manual update, they reinforce // each other: a developer who updates the match is reminded to also // update `all()` and this count. - const EXPECTED_LEN: usize = 24; + const EXPECTED_LEN: usize = 25; assert_eq!( LanguageKind::all().len(), EXPECTED_LEN, diff --git a/src/ast-analysis/rules/index.ts b/src/ast-analysis/rules/index.ts index 653cbd59b..3d43d8fbe 100644 --- a/src/ast-analysis/rules/index.ts +++ b/src/ast-analysis/rules/index.ts @@ -153,6 +153,11 @@ const OCAML_AST_TYPES: Record = { string: 'string', }; +const JULIA_AST_TYPES: Record = { + string_literal: 'string', + prefixed_string_literal: 'string', +}; + export const AST_TYPE_MAPS: Map> = new Map([ ['javascript', JS_AST_TYPES], ['typescript', JS_AST_TYPES], @@ -177,6 +182,7 @@ export const AST_TYPE_MAPS: Map> = new Map([ ['haskell', HASKELL_AST_TYPES], ['ocaml', OCAML_AST_TYPES], ['ocaml-interface', OCAML_AST_TYPES], + ['julia', JULIA_AST_TYPES], ]); // ─── Per-language string-extraction config ─────────────────────────────── @@ -211,6 +217,7 @@ const DART_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: const ZIG_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const HASKELL_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' }; const OCAML_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; +const JULIA_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; export const AST_STRING_CONFIGS: Map = new Map([ ['javascript', JS_STRING_CONFIG], @@ -236,6 +243,7 @@ export const AST_STRING_CONFIGS: Map = new Map([ ['haskell', HASKELL_STRING_CONFIG], ['ocaml', OCAML_STRING_CONFIG], ['ocaml-interface', OCAML_STRING_CONFIG], + ['julia', JULIA_STRING_CONFIG], ]); // ─── Per-language "stop-after-collect" kinds ───────────────────────────── From 3393ce754a5b187d23f183b02dd38b2f10b22c60 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 20:08:18 -0600 Subject: [PATCH 2/4] fix(parser): register .jl in NATIVE_SUPPORTED_EXTENSIONS Native Julia support landed in this PR but the JS-side mirror of the Rust LanguageKind enum was not updated, so the drift guard in tests/parsers/native-drop-classification.test.ts (and the WASM-only bucket in classifyNativeDrops) flagged .jl as missing. Add .jl to the set and drop it from the WASM-only test fixture. --- src/domain/parser.ts | 1 + tests/parsers/native-drop-classification.test.ts | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/domain/parser.ts b/src/domain/parser.ts index f1c7dd809..5bd73452d 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -471,6 +471,7 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet = new Set([ '.hs', '.ml', '.mli', + '.jl', ]); /** diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index 24aee1d53..efa67b6b6 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -18,7 +18,6 @@ describe('classifyNativeDrops', () => { 'src/a.fs', 'src/b.gleam', 'src/c.clj', - 'src/d.jl', 'src/e.R', 'src/f.erl', 'src/g.sol', @@ -27,7 +26,7 @@ describe('classifyNativeDrops', () => { 'src/j.v', 'src/k.m', ]); - expect(totals['unsupported-by-native']).toBe(11); + expect(totals['unsupported-by-native']).toBe(10); expect(totals['native-extractor-failure']).toBe(0); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/a.fs']); expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/b.gleam']); From a4a8b5c197e6623725797eac7dde0c77f631a2c6 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 20:08:25 -0600 Subject: [PATCH 3/4] fix(julia): emit base name for parameterized abstract types handle_abstract_def previously fell back to the type_head node itself when no plain identifier was found, dumping the full raw text "Name{T} <: Super{T,1}" as a definition name for any parameterized generic abstract type. The TS counterpart returns early on no-name; the native port should match. Recurse into wrapper shapes (binary_expression, parameterized identifier, type_parameter_list, type_argument_list) to locate the base identifier, and skip emission when none is found. Adds a regression test asserting the base name "AbstractVector" for "abstract type AbstractVector{T} <: AbstractArray{T,1} end". --- crates/codegraph-core/src/extractors/julia.rs | 74 ++++++++++++++++--- 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/crates/codegraph-core/src/extractors/julia.rs b/crates/codegraph-core/src/extractors/julia.rs index 67e82d13e..e535295c8 100644 --- a/crates/codegraph-core/src/extractors/julia.rs +++ b/crates/codegraph-core/src/extractors/julia.rs @@ -258,16 +258,21 @@ fn handle_struct_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { fn handle_abstract_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { // abstract_definition: `abstract type` type_head `end` - // type_head wraps the name identifier (or a `Name <: Super` binary_expr). - let name_node = match find_child(node, "type_head") { - Some(th) => find_child(&th, "identifier") - .or_else(|| { - find_child(&th, "binary_expression") - .and_then(|bin| find_child(&bin, "identifier")) - }) - .unwrap_or(th), - None => match find_child(node, "identifier") { - Some(n) => n, + // type_head wraps the name identifier — possibly nested in a + // `Name <: Super` binary_expression or a `Name{T,...}` parametrized form + // (`parameterized_identifier` / `type_parameter_list`). + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "identifier")) + { + Some(n) => n, + None => match find_child(node, "type_head") { + Some(th) => match find_abstract_name(&th) { + Some(n) => n, + // Mirror the TS extractor: skip rather than emit a garbled + // definition name (e.g. raw `Name{T} <: Super{T,1}` text). + None => return, + }, None => return, }, }; @@ -283,6 +288,38 @@ fn handle_abstract_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { }); } +/// Locate the base-name identifier within a `type_head` node. +/// +/// Handles plain identifiers, `Name <: Super` binary expressions, and +/// parameterized forms like `Name{T}` / `Name{T} <: Super{T,1}` by recursing +/// into common wrapper kinds (binary expressions, parametrized identifiers, +/// type-parameter lists). Returns `None` when no identifier can be located — +/// callers should skip emitting a definition in that case. +fn find_abstract_name<'a>(node: &Node<'a>) -> Option> { + // Direct identifier child wins. + if let Some(id) = find_child(node, "identifier") { + return Some(id); + } + // Otherwise recurse through wrapper shapes that may contain the + // base-name identifier (parameterized or supertyped forms). + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + match child.kind() { + "binary_expression" + | "parametrized_type_expression" + | "parameterized_identifier" + | "type_parameter_list" + | "type_argument_list" => { + if let Some(found) = find_abstract_name(&child) { + return Some(found); + } + } + _ => {} + } + } + None +} + fn handle_macro_def( node: &Node, source: &[u8], @@ -573,6 +610,23 @@ mod tests { assert_eq!(t.kind, "type"); } + #[test] + fn extracts_parameterized_abstract_type_base_name() { + // Parameterized generics with a supertype must record only the base + // identifier — never the raw `Name{T} <: Super{T,1}` text. + let s = parse_jl("abstract type AbstractVector{T} <: AbstractArray{T,1} end\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!( + names.contains(&"AbstractVector"), + "expected base name `AbstractVector`, got {names:?}" + ); + // Guard against the previous garbled-name regression. + assert!( + !names.iter().any(|n| n.contains('{') || n.contains('<')), + "definition name leaked raw type-head text: {names:?}" + ); + } + #[test] fn extracts_macro_def() { let s = parse_jl("macro mymac(x)\n x\nend\n"); From 47b9c1f368f986ac31eb0ad10a771620ac48ffac Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Tue, 12 May 2026 00:00:39 -0600 Subject: [PATCH 4/4] fix(native/julia): handle parameterized structs, qualified defs, qualified selected imports (#1098) --- Cargo.lock | 2 +- crates/codegraph-core/src/extractors/julia.rs | 134 ++++++++++++++---- 2 files changed, 110 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 43fed7922..4aebcd77f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -66,7 +66,7 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "codegraph-core" -version = "3.9.6" +version = "3.10.0" dependencies = [ "globset", "ignore", diff --git a/crates/codegraph-core/src/extractors/julia.rs b/crates/codegraph-core/src/extractors/julia.rs index e535295c8..0f0e7e593 100644 --- a/crates/codegraph-core/src/extractors/julia.rs +++ b/crates/codegraph-core/src/extractors/julia.rs @@ -93,9 +93,12 @@ fn handle_function_def( if let Some(call_sig) = signature_call(node) { if let Some(func_name_node) = call_sig.child(0) { let base = node_text(&func_name_node, source); + // For qualified names (`function Base.show ... end` inside a module), + // the LHS is a `scoped_identifier` already containing the qualifier — + // skip the module prefix to avoid producing `Outer.Base.show`. let name = match current_module { - Some(m) => format!("{}.{}", m, base), - None => base.to_string(), + Some(m) if !base.contains('.') => format!("{}.{}", m, base), + _ => base.to_string(), }; let params = extract_julia_params(&call_sig, source); symbols.definitions.push(Definition { @@ -122,8 +125,8 @@ fn handle_function_def( }; let base = node_text(&name_node, source); let name = match current_module { - Some(m) => format!("{}.{}", m, base), - None => base.to_string(), + Some(m) if !base.contains('.') => format!("{}.{}", m, base), + _ => base.to_string(), }; symbols.definitions.push(Definition { name, @@ -156,9 +159,12 @@ fn handle_assignment( None => return, }; let base = node_text(&func_name_node, source); + // For qualified short-form definitions like `Foo.bar(x, y) = x + y`, + // `func_name_node` is a `scoped_identifier` already containing the + // qualifier — skip the module prefix to avoid producing `Outer.Foo.bar`. let name = match current_module { - Some(m) => format!("{}.{}", m, base), - None => base.to_string(), + Some(m) if !base.contains('.') => format!("{}.{}", m, base), + _ => base.to_string(), }; let params = extract_julia_params(&lhs, source); @@ -176,8 +182,9 @@ fn handle_assignment( fn handle_struct_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { // struct_definition: `struct` type_head `end` - // type_head is either a bare `identifier` (no supertype) or a - // `binary_expression` of the form `Name <: Super`. + // type_head wraps the name and optional supertype. The name may be a + // bare `identifier`, a `parameterized_identifier` (e.g. `Vec{T}`), or + // either of those nested inside a `binary_expression` (`Name <: Super`). let type_head = match find_child(node, "type_head") { Some(th) => th, None => return, @@ -186,26 +193,24 @@ fn handle_struct_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { let (name_node, supertype): (Node, Option) = if let Some(bin) = find_child(&type_head, "binary_expression") { - // First identifier is the struct name, last identifier (after `<:`) is the supertype. - let mut name_id: Option = None; - let mut super_id: Option = None; + // Walk into each side of the binary expression to find the base-name + // identifier — handles parameterized forms like `Vec{T} <: AbstractArray{T,1}`. + let mut sides: Vec = Vec::new(); for i in 0..bin.child_count() { if let Some(c) = bin.child(i) { - if c.kind() == "identifier" { - if name_id.is_none() { - name_id = Some(c); - } else { - super_id = Some(c); - } + if c.kind() != "operator" { + sides.push(c); } } } + let name_id = sides.first().and_then(|n| find_base_name(n)); + let super_id = sides.get(1).and_then(|n| find_base_name(n)); match name_id { Some(n) => (n, super_id), None => return, } - } else if let Some(id) = find_child(&type_head, "identifier") { - (id, None) + } else if let Some(n) = find_base_name(&type_head) { + (n, None) } else { return; }; @@ -267,7 +272,7 @@ fn handle_abstract_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { { Some(n) => n, None => match find_child(node, "type_head") { - Some(th) => match find_abstract_name(&th) { + Some(th) => match find_base_name(&th) { Some(n) => n, // Mirror the TS extractor: skip rather than emit a garbled // definition name (e.g. raw `Name{T} <: Super{T,1}` text). @@ -295,7 +300,12 @@ fn handle_abstract_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { /// into common wrapper kinds (binary expressions, parametrized identifiers, /// type-parameter lists). Returns `None` when no identifier can be located — /// callers should skip emitting a definition in that case. -fn find_abstract_name<'a>(node: &Node<'a>) -> Option> { +fn find_base_name<'a>(node: &Node<'a>) -> Option> { + // The node itself may already be the identifier (e.g. when called on a + // direct side of a binary_expression like `Point <: AbstractPoint`). + if node.kind() == "identifier" { + return Some(*node); + } // Direct identifier child wins. if let Some(id) = find_child(node, "identifier") { return Some(id); @@ -310,7 +320,7 @@ fn find_abstract_name<'a>(node: &Node<'a>) -> Option> { | "parameterized_identifier" | "type_parameter_list" | "type_argument_list" => { - if let Some(found) = find_abstract_name(&child) { + if let Some(found) = find_base_name(&child) { return Some(found); } } @@ -391,11 +401,15 @@ fn handle_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { } } "selected_import" => { - // First identifier is the source module; the rest are imported names. + // First identifier-bearing node is the source module; the rest + // are imported names. The module may itself be a + // `scoped_identifier` (e.g. `import Foo.Bar: baz`) — handle it + // alongside bare `identifier` and use the trailing segment as + // the display name, mirroring the outer loop. let mut first = true; for j in 0..child.child_count() { let Some(part) = child.child(j) else { continue }; - if part.kind() == "identifier" { + if part.kind() == "identifier" || part.kind() == "scoped_identifier" { let txt = node_text(&part, source).to_string(); if first { if source_str.is_empty() { @@ -403,7 +417,8 @@ fn handle_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { } first = false; } else { - names.push(txt); + let last = txt.rsplit('.').next().unwrap_or(&txt).to_string(); + names.push(last); } } } @@ -669,4 +684,73 @@ mod tests { assert!(!call_names.contains(&"greet")); assert!(call_names.contains(&"println")); } + + #[test] + fn extracts_parameterized_struct_base_name() { + // Parameterized struct names (e.g. `Vec{T}`) must record the base + // identifier — not be silently dropped or include type-parameter text. + let s = parse_jl("struct Vec{T} <: AbstractArray{T,1}\n data::Vector{T}\nend\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!( + names.contains(&"Vec"), + "expected base name `Vec`, got {names:?}" + ); + assert!( + !names.iter().any(|n| n.contains('{') || n.contains('<')), + "definition name leaked raw type-head text: {names:?}" + ); + // Supertype should still resolve to the base identifier `AbstractArray`. + assert_eq!(s.classes.len(), 1); + assert_eq!(s.classes[0].name, "Vec"); + assert_eq!(s.classes[0].extends.as_deref(), Some("AbstractArray")); + } + + #[test] + fn qualified_short_form_method_does_not_double_prefix() { + // `Foo.bar(x, y) = x + y` inside `module Outer` must record `Foo.bar`, + // not `Outer.Foo.bar` — the scoped_identifier already carries the + // qualifier. + let s = parse_jl("module Outer\n Foo.bar(x, y) = x + y\nend\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Foo.bar"), "got {names:?}"); + assert!( + !names.iter().any(|n| *n == "Outer.Foo.bar"), + "qualified method got double-prefixed: {names:?}" + ); + } + + #[test] + fn qualified_function_def_does_not_double_prefix() { + // `function Base.show(io, x) ... end` inside `module Foo` must record + // `Base.show`, not `Foo.Base.show`. + let s = parse_jl( + "module Foo\n function Base.show(io, x)\n println(io, x)\n end\nend\n", + ); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Base.show"), "got {names:?}"); + assert!( + !names.iter().any(|n| *n == "Foo.Base.show"), + "qualified function def got double-prefixed: {names:?}" + ); + } + + #[test] + fn selected_import_handles_qualified_module() { + // `import Foo.Bar: baz` — module is a scoped_identifier. The import + // must record `Foo.Bar` as the source and `baz` as the imported name, + // not the malformed `source="baz", names=["baz"]`. + let s = parse_jl("import LinearAlgebra.BLAS: gemm\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "LinearAlgebra.BLAS"); + assert!( + s.imports[0].names.contains(&"gemm".to_string()), + "expected `gemm` in imported names, got {:?}", + s.imports[0].names + ); + assert!( + !s.imports[0].names.contains(&"LinearAlgebra.BLAS".to_string()), + "source module leaked into names: {:?}", + s.imports[0].names + ); + } }