Skip to content

Commit d14295e

Browse files
committed
feat(engine): Julia language extractor (.jl files)
Adds tree-sitter-julia grammar (tree-sitter/tree-sitter-julia v0.25.0) wired through the standard LanguageExtractor pattern. Extracts function definitions (both 'function ... end' and short 'f() = ...' forms), call edges, and 'using'/'import' statements. Key AST quirk: Julia's grammar uses no named fields — the extractor uses a visitNode hook throughout. Short-form functions are assignment nodes with a call_expression LHS, not a dedicated node type. const_statement wraps an inner assignment. 10 new tests all pass.
1 parent 6b3c5ad commit d14295e

6 files changed

Lines changed: 423 additions & 1 deletion

File tree

engine/PATCHES.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,16 @@ Perl — each documented as a numbered entry below when it lands.
2323
to intercept `binary_operator` nodes: rhs=`function_definition` → function
2424
node; plain rhs → variable node. No class support (S3/S4/R5/R6 are runtime
2525
constructs). `library()`/`require()` are extracted as plain function calls.
26+
27+
### Julia (added 2026-05-30)
28+
- WASM grammar: `engine/src/extraction/wasm/tree-sitter-julia.wasm` from tree-sitter/tree-sitter-julia v0.25.0
29+
- Extension map: `.jl``julia`
30+
- Extractor: `engine/src/extraction/languages/julia.ts`
31+
- Tests: added `Julia Extraction` describe block in `engine/__tests__/extraction.test.ts`
32+
- Notes: Julia's tree-sitter grammar uses **no named fields** (all `childForFieldName` calls
33+
return null). The extractor uses `visitNode` throughout. Key AST quirks:
34+
- `function_definition`: children by index — `[0]=function`, `[1]=signature(call_expression(identifier, argument_list))`, `[2]=block`, `[3]=end`
35+
- Short-form functions `f() = expr` are parsed as `assignment` with a `call_expression` on the LHS (not a dedicated `short_function_definition` node)
36+
- `const_statement` wraps an inner `assignment` node
37+
- `macro_definition` has the same structure as `function_definition`
38+
- Call edges work via the standard `call_expression` in `callTypes`; `visitFunctionBody` recursively finds them

engine/__tests__/extraction.test.ts

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4434,3 +4434,140 @@ lapply(1:10, function(x) x * 2)
44344434
expect(functions).toHaveLength(0);
44354435
});
44364436
});
4437+
4438+
describe('Julia Extraction', () => {
4439+
it('should detect Julia files', () => {
4440+
expect(detectLanguage('analysis.jl')).toBe('julia');
4441+
expect(detectLanguage('src/utils.jl')).toBe('julia');
4442+
});
4443+
4444+
it('should report Julia as supported', () => {
4445+
expect(isLanguageSupported('julia')).toBe(true);
4446+
expect(getSupportedLanguages()).toContain('julia');
4447+
});
4448+
4449+
it('should create a file node for Julia files', () => {
4450+
const code = `
4451+
function hello(name)
4452+
"Hello, \${name}"
4453+
end
4454+
`;
4455+
const result = extractFromSource('hello.jl', code);
4456+
4457+
const fileNode = result.nodes.find((n) => n.kind === 'file');
4458+
expect(fileNode).toBeDefined();
4459+
expect(fileNode?.name).toBe('hello.jl');
4460+
expect(fileNode?.language).toBe('julia');
4461+
});
4462+
4463+
it('should extract long-form function definitions (function ... end)', () => {
4464+
const code = `
4465+
function hello(name)
4466+
"Hello, \${name}"
4467+
end
4468+
4469+
function greet()
4470+
hello("world")
4471+
end
4472+
4473+
function main()
4474+
println(greet())
4475+
end
4476+
`;
4477+
const result = extractFromSource('funcs.jl', code);
4478+
4479+
const functions = result.nodes.filter((n) => n.kind === 'function');
4480+
const names = functions.map((f) => f.name).sort();
4481+
expect(names).toContain('hello');
4482+
expect(names).toContain('greet');
4483+
expect(names).toContain('main');
4484+
});
4485+
4486+
it('should extract short-form function definitions (f() = ...)', () => {
4487+
const code = `
4488+
greet() = hello("world")
4489+
add(a, b) = a + b
4490+
`;
4491+
const result = extractFromSource('short.jl', code);
4492+
4493+
const functions = result.nodes.filter((n) => n.kind === 'function');
4494+
const names = functions.map((f) => f.name).sort();
4495+
expect(names).toContain('greet');
4496+
expect(names).toContain('add');
4497+
});
4498+
4499+
it('should extract call edges from function bodies', () => {
4500+
const code = `
4501+
function hello(name)
4502+
"Hello, \${name}"
4503+
end
4504+
4505+
greet() = hello("world")
4506+
4507+
function main()
4508+
println(greet())
4509+
end
4510+
`;
4511+
const result = extractFromSource('calls.jl', code);
4512+
4513+
const calls = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls');
4514+
const callNames = calls.map((c) => c.referenceName);
4515+
4516+
// greet calls hello; main calls println and greet
4517+
expect(callNames).toContain('hello');
4518+
expect(callNames).toContain('greet');
4519+
expect(callNames).toContain('println');
4520+
});
4521+
4522+
it('should extract using statements as imports', () => {
4523+
const code = `
4524+
using Pkg
4525+
using Statistics: mean, std
4526+
`;
4527+
const result = extractFromSource('imports.jl', code);
4528+
4529+
const imports = result.nodes.filter((n) => n.kind === 'import');
4530+
const names = imports.map((i) => i.name);
4531+
expect(names).toContain('Pkg');
4532+
expect(names).toContain('Statistics');
4533+
});
4534+
4535+
it('should extract import statements', () => {
4536+
const code = `
4537+
import Statistics
4538+
import Base: show, print
4539+
`;
4540+
const result = extractFromSource('imports2.jl', code);
4541+
4542+
const imports = result.nodes.filter((n) => n.kind === 'import');
4543+
const names = imports.map((i) => i.name);
4544+
expect(names).toContain('Statistics');
4545+
expect(names).toContain('Base');
4546+
});
4547+
4548+
it('should extract plain variable assignments', () => {
4549+
const code = `
4550+
x = 42
4551+
name = "Alice"
4552+
`;
4553+
const result = extractFromSource('vars.jl', code);
4554+
4555+
const variables = result.nodes.filter((n) => n.kind === 'variable');
4556+
const names = variables.map((v) => v.name);
4557+
expect(names).toContain('x');
4558+
expect(names).toContain('name');
4559+
});
4560+
4561+
it('should extract const declarations as constants', () => {
4562+
const code = `
4563+
const PI_VAL = 3.14159
4564+
const MAX_SIZE = 1000
4565+
`;
4566+
const result = extractFromSource('consts.jl', code);
4567+
4568+
const constants = result.nodes.filter((n) => n.kind === 'constant');
4569+
const names = constants.map((c) => c.name);
4570+
expect(names).toContain('PI_VAL');
4571+
expect(names).toContain('MAX_SIZE');
4572+
});
4573+
});

engine/src/extraction/grammars.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ const WASM_GRAMMAR_FILES: Record<GrammarLanguage, string> = {
3939
luau: 'tree-sitter-luau.wasm',
4040
objc: 'tree-sitter-objc.wasm',
4141
r: 'tree-sitter-r.wasm',
42+
julia: 'tree-sitter-julia.wasm',
4243
};
4344

4445
/**
@@ -98,6 +99,7 @@ export const EXTENSION_MAP: Record<string, Language> = {
9899
'.mm': 'objc',
99100
'.R': 'r',
100101
'.r': 'r',
102+
'.jl': 'julia',
101103
// XML: file-level tracking; the MyBatis extractor matches `<mapper namespace="...">`
102104
// shape and emits SQL-statement nodes (other XML returns empty).
103105
'.xml': 'xml',
@@ -182,7 +184,7 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise<v
182184
// ABI-13 build that corrupts the shared WASM heap under web-tree-sitter
183185
// 0.25 (drops nested calls/imports on every file after the first); we
184186
// vendor the upstream ABI-15 wasm instead.
185-
const wasmPath = (lang === 'pascal' || lang === 'scala' || lang === 'lua' || lang === 'luau' || lang === 'r')
187+
const wasmPath = (lang === 'pascal' || lang === 'scala' || lang === 'lua' || lang === 'luau' || lang === 'r' || lang === 'julia')
186188
? path.join(__dirname, 'wasm', wasmFile)
187189
: require.resolve(`tree-sitter-wasms/out/${wasmFile}`);
188190
const language = await WasmLanguage.load(wasmPath);
@@ -369,6 +371,7 @@ export function getLanguageDisplayName(language: Language): string {
369371
luau: 'Luau',
370372
objc: 'Objective-C',
371373
r: 'R',
374+
julia: 'Julia',
372375
yaml: 'YAML',
373376
twig: 'Twig',
374377
xml: 'XML',

engine/src/extraction/languages/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import { luaExtractor } from './lua';
2727
import { luauExtractor } from './luau';
2828
import { objcExtractor } from './objc';
2929
import { rExtractor } from './r';
30+
import { juliaExtractor } from './julia';
3031

3132
export const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
3233
typescript: typescriptExtractor,
@@ -51,4 +52,5 @@ export const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
5152
luau: luauExtractor,
5253
objc: objcExtractor,
5354
r: rExtractor,
55+
julia: juliaExtractor,
5456
};

0 commit comments

Comments
 (0)