diff --git a/package-lock.json b/package-lock.json index b84a46004..c842c8317 100644 --- a/package-lock.json +++ b/package-lock.json @@ -20,6 +20,7 @@ "@biomejs/biome": "^2.4.4", "@commitlint/cli": "^20.4", "@commitlint/config-conventional": "^20.0", + "@eagleoutice/tree-sitter-r": "^1.1.2", "@huggingface/transformers": "^3.8.1", "@tree-sitter-grammars/tree-sitter-hcl": "^1.2.0", "@tree-sitter-grammars/tree-sitter-lua": "^0.4.1", @@ -32,13 +33,18 @@ "tree-sitter-c": "^0.24.1", "tree-sitter-c-sharp": "^0.23.1", "tree-sitter-cli": "^0.26.5", + "tree-sitter-clojure": "github:sogaiu/tree-sitter-clojure", "tree-sitter-cpp": "^0.23.4", "tree-sitter-dart": "^1.0.0", "tree-sitter-elixir": "^0.3.5", + "tree-sitter-erlang": "github:WhatsApp/tree-sitter-erlang#semver:*", + "tree-sitter-fsharp": "^0.1.0", + "tree-sitter-gleam": "github:gleam-lang/tree-sitter-gleam", "tree-sitter-go": "^0.25.0", "tree-sitter-haskell": "^0.23.1", "tree-sitter-java": "^0.23.5", "tree-sitter-javascript": "^0.25.0", + "tree-sitter-julia": "^0.23.1", "tree-sitter-kotlin": "^0.3.8", "tree-sitter-ocaml": "^0.24.2", "tree-sitter-php": "^0.24.2", @@ -586,6 +592,26 @@ "node": ">=v18" } }, + "node_modules/@eagleoutice/tree-sitter-r": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@eagleoutice/tree-sitter-r/-/tree-sitter-r-1.1.2.tgz", + "integrity": "sha512-HR0RyoI5uxcfsdZvTMqSM8GJyGo6DQTkfdgqygQ6N+D0KQObRH4RxYgBZ6ePsGq/36RBAqv8y0NeQvAFL2N4dQ==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.0.0", + "node-gyp-build": "^4.8.0" + }, + "peerDependencies": { + "tree-sitter": "^0.21.0" + }, + "peerDependenciesMeta": { + "tree_sitter": { + "optional": true + } + } + }, "node_modules/@emnapi/core": { "version": "1.9.1", "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.9.1.tgz", @@ -1288,9 +1314,6 @@ "cpu": [ "arm64" ], - "libc": [ - "glibc" - ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1304,9 +1327,6 @@ "cpu": [ "x64" ], - "libc": [ - "glibc" - ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1320,9 +1340,6 @@ "cpu": [ "x64" ], - "libc": [ - "musl" - ], "license": "Apache-2.0", "optional": true, "os": [ @@ -5969,6 +5986,22 @@ "node": ">=10" } }, + "node_modules/prettier": { + "version": "2.8.8", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-2.8.8.tgz", + "integrity": "sha512-tdN8qQGvNjw4CHbY+XXk0JgCXn9QiF21a55rBe5LJAU+kDyC4WQn4+awm2Xfk2lQMk5fKup9XgzTZtGkjBdP9Q==", + "dev": true, + "license": "MIT", + "bin": { + "prettier": "bin-prettier.js" + }, + "engines": { + "node": ">=10.13.0" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" + } + }, "node_modules/process-nextick-args": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", @@ -7122,6 +7155,11 @@ "node": ">=12.0.0" } }, + "node_modules/tree-sitter-clojure": { + "version": "0.0.13", + "resolved": "git+ssh://git@github.com/sogaiu/tree-sitter-clojure.git#e43eff80d17cf34852dcd92ca5e6986d23a7040f", + "dev": true + }, "node_modules/tree-sitter-cpp": { "version": "0.23.4", "resolved": "https://registry.npmjs.org/tree-sitter-cpp/-/tree-sitter-cpp-0.23.4.tgz", @@ -7196,6 +7234,60 @@ "dev": true, "license": "MIT" }, + "node_modules/tree-sitter-erlang": { + "version": "0.0.0", + "resolved": "git+ssh://git@github.com/WhatsApp/tree-sitter-erlang.git#5dba13dcd531c19bf99829e2e0bb31f2e08e16fe", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "nan": "^2.14.1", + "prettier": "^2.2.1", + "tree-sitter-cli": "^0.20.8" + } + }, + "node_modules/tree-sitter-erlang/node_modules/tree-sitter-cli": { + "version": "0.20.8", + "resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.20.8.tgz", + "integrity": "sha512-XjTcS3wdTy/2cc/ptMLc/WRyOLECRYcMTrSWyhZnj1oGSOWbHLTklgsgRICU3cPfb0vy+oZCC33M43u6R1HSCA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "tree-sitter": "cli.js" + } + }, + "node_modules/tree-sitter-fsharp": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/tree-sitter-fsharp/-/tree-sitter-fsharp-0.1.0.tgz", + "integrity": "sha512-TCK+Jkg3qvEe4o6JxqUlE+vUc9lWhHhD7Toglu5Y04/PKa9DgACzqU5Jp9BYZnyhgGLEe+30kVOyFTY/iC/n1Q==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.1.0", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.0" + }, + "peerDependenciesMeta": { + "tree_sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-gleam": { + "version": "1.0.0", + "resolved": "git+ssh://git@github.com/gleam-lang/tree-sitter-gleam.git#0bb1b0ae1a3555180ae7b0004851da747fc230d1", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "nan": "^2.18.0" + }, + "peerDependencies": { + "tree-sitter": "^0.21.0" + } + }, "node_modules/tree-sitter-go": { "version": "0.25.0", "resolved": "https://registry.npmjs.org/tree-sitter-go/-/tree-sitter-go-0.25.0.tgz", @@ -7276,6 +7368,26 @@ } } }, + "node_modules/tree-sitter-julia": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/tree-sitter-julia/-/tree-sitter-julia-0.23.1.tgz", + "integrity": "sha512-3vShY0GIu8ajR6hXzE0pyUk6kkfg4pGx3Bfzm6lGmR9aC3fe+LgoBMlaFJ7JY+t0fNFccc77J8HVP67ukuDMxQ==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, "node_modules/tree-sitter-kotlin": { "version": "0.3.8", "resolved": "https://registry.npmjs.org/tree-sitter-kotlin/-/tree-sitter-kotlin-0.3.8.tgz", diff --git a/package.json b/package.json index 71487ac5b..c7fd32263 100644 --- a/package.json +++ b/package.json @@ -146,21 +146,27 @@ "@tree-sitter-grammars/tree-sitter-hcl": "^1.2.0", "@tree-sitter-grammars/tree-sitter-lua": "^0.4.1", "@tree-sitter-grammars/tree-sitter-zig": "^1.1.2", + "@eagleoutice/tree-sitter-r": "^1.1.2", "@types/better-sqlite3": "^7.6.13", "@vitest/coverage-v8": "^4.0.18", "commit-and-tag-version": "^12.5", "husky": "^9.1", "tree-sitter-bash": "^0.25.1", + "tree-sitter-dart": "^1.0.0", + "tree-sitter-elixir": "^0.3.5", + "tree-sitter-erlang": "github:WhatsApp/tree-sitter-erlang#semver:*", "tree-sitter-c": "^0.24.1", + "tree-sitter-clojure": "github:sogaiu/tree-sitter-clojure", "tree-sitter-c-sharp": "^0.23.1", "tree-sitter-cli": "^0.26.5", "tree-sitter-cpp": "^0.23.4", - "tree-sitter-dart": "^1.0.0", - "tree-sitter-elixir": "^0.3.5", + "tree-sitter-fsharp": "^0.1.0", + "tree-sitter-gleam": "github:gleam-lang/tree-sitter-gleam", "tree-sitter-go": "^0.25.0", "tree-sitter-haskell": "^0.23.1", "tree-sitter-java": "^0.23.5", "tree-sitter-javascript": "^0.25.0", + "tree-sitter-julia": "^0.23.1", "tree-sitter-kotlin": "^0.3.8", "tree-sitter-ocaml": "^0.24.2", "tree-sitter-php": "^0.24.2", diff --git a/scripts/build-wasm.ts b/scripts/build-wasm.ts index da10d30be..1f6d88678 100644 --- a/scripts/build-wasm.ts +++ b/scripts/build-wasm.ts @@ -46,6 +46,12 @@ const grammars = [ { name: 'tree-sitter-zig', pkg: '@tree-sitter-grammars/tree-sitter-zig', sub: null }, { name: 'tree-sitter-haskell', pkg: 'tree-sitter-haskell', sub: null }, { name: 'tree-sitter-ocaml', pkg: 'tree-sitter-ocaml', sub: 'grammars/ocaml' }, + { name: 'tree-sitter-fsharp', pkg: 'tree-sitter-fsharp', sub: 'fsharp' }, + { name: 'tree-sitter-gleam', pkg: 'tree-sitter-gleam', sub: null }, + { name: 'tree-sitter-clojure', pkg: 'tree-sitter-clojure', sub: null }, + { name: 'tree-sitter-julia', pkg: 'tree-sitter-julia', sub: null }, + { name: 'tree-sitter-r', pkg: '@eagleoutice/tree-sitter-r', sub: null }, + { name: 'tree-sitter-erlang', pkg: 'tree-sitter-erlang', sub: null }, ]; let failed = 0; diff --git a/src/domain/parser.ts b/src/domain/parser.ts index bc7c4543e..2dcaf8e14 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -17,20 +17,26 @@ import type { // Re-export all extractors for backward compatibility export { extractBashSymbols, + extractClojureSymbols, extractCppSymbols, extractCSharpSymbols, extractCSymbols, extractDartSymbols, extractElixirSymbols, + extractErlangSymbols, + extractFSharpSymbols, + extractGleamSymbols, extractGoSymbols, extractHaskellSymbols, extractHCLSymbols, extractJavaSymbols, + extractJuliaSymbols, extractKotlinSymbols, extractLuaSymbols, extractOCamlSymbols, extractPHPSymbols, extractPythonSymbols, + extractRSymbols, extractRubySymbols, extractRustSymbols, extractScalaSymbols, @@ -41,20 +47,26 @@ export { import { extractBashSymbols, + extractClojureSymbols, extractCppSymbols, extractCSharpSymbols, extractCSymbols, extractDartSymbols, extractElixirSymbols, + extractErlangSymbols, + extractFSharpSymbols, + extractGleamSymbols, extractGoSymbols, extractHaskellSymbols, extractHCLSymbols, extractJavaSymbols, + extractJuliaSymbols, extractKotlinSymbols, extractLuaSymbols, extractOCamlSymbols, extractPHPSymbols, extractPythonSymbols, + extractRSymbols, extractRubySymbols, extractRustSymbols, extractScalaSymbols, @@ -534,6 +546,48 @@ export const LANGUAGE_REGISTRY: LanguageRegistryEntry[] = [ extractor: extractOCamlSymbols, required: false, }, + { + id: 'fsharp', + extensions: ['.fs', '.fsx', '.fsi'], + grammarFile: 'tree-sitter-fsharp.wasm', + extractor: extractFSharpSymbols, + required: false, + }, + { + id: 'gleam', + extensions: ['.gleam'], + grammarFile: 'tree-sitter-gleam.wasm', + extractor: extractGleamSymbols, + required: false, + }, + { + id: 'clojure', + extensions: ['.clj', '.cljs', '.cljc'], + grammarFile: 'tree-sitter-clojure.wasm', + extractor: extractClojureSymbols, + required: false, + }, + { + id: 'julia', + extensions: ['.jl'], + grammarFile: 'tree-sitter-julia.wasm', + extractor: extractJuliaSymbols, + required: false, + }, + { + id: 'r', + extensions: ['.r', '.R'], + grammarFile: 'tree-sitter-r.wasm', + extractor: extractRSymbols, + required: false, + }, + { + id: 'erlang', + extensions: ['.erl', '.hrl'], + grammarFile: 'tree-sitter-erlang.wasm', + extractor: extractErlangSymbols, + required: false, + }, ]; const _extToLang: Map = new Map(); diff --git a/src/extractors/clojure.ts b/src/extractors/clojure.ts new file mode 100644 index 000000000..0d7490898 --- /dev/null +++ b/src/extractors/clojure.ts @@ -0,0 +1,273 @@ +import type { ExtractorOutput, SubDeclaration, TreeSitterNode, TreeSitterTree } from '../types.js'; +import { nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Clojure files. + * + * Clojure tree-sitter grammar (sogaiu/tree-sitter-clojure) notes: + * - The grammar is minimal: everything is a list/vector/map/symbol + * - We detect definitions by the first symbol in a list: defn, def, defprotocol, etc. + * - Namespace: (ns name ...) + * - Imports: (:require ...) inside ns, or (require ...) + */ +export function extractClojureSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkClojureNode(tree.rootNode, ctx, null); + return ctx; +} + +function walkClojureNode( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentNs: string | null, +): void { + let nextNs = currentNs; + + if (node.type === 'list_lit') { + nextNs = handleListForm(node, ctx, currentNs) ?? currentNs; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkClojureNode(child, ctx, nextNs); + } +} + +/** Returns new namespace name if this is an `ns` form, otherwise null. */ +function handleListForm( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentNs: string | null, +): string | null { + const firstSym = findFirstSymbol(node); + if (!firstSym) return null; + + const name = firstSym.text; + + switch (name) { + case 'ns': + return handleNsForm(node, ctx); + case 'def': + case 'defonce': + handleDefForm(node, ctx, currentNs, 'variable'); + return null; + case 'defn': + case 'defn-': + handleDefnForm(node, ctx, currentNs, name === 'defn-' ? 'private' : 'public'); + return null; + case 'defmacro': + handleDefnForm(node, ctx, currentNs, 'public'); + return null; + case 'defprotocol': + handleDefprotocol(node, ctx); + return null; + case 'defrecord': + case 'deftype': + handleDefrecord(node, ctx, name); + return null; + case 'defmulti': + handleDefForm(node, ctx, currentNs, 'function'); + return null; + case 'defmethod': + handleDefnForm(node, ctx, currentNs, 'public'); + return null; + case 'require': + case 'use': + case 'import': + handleImportForm(node, ctx, name); + return null; + default: { + // Regular function call + if (!name.startsWith(':') && !name.startsWith('(')) { + ctx.calls.push({ name, line: node.startPosition.row + 1 }); + } + return null; + } + } +} + +function findFirstSymbol(listNode: TreeSitterNode): TreeSitterNode | null { + for (let i = 0; i < listNode.childCount; i++) { + const child = listNode.child(i); + if (!child) continue; + // Skip delimiters and metadata + if ('()[]{}#'.includes(child.type) || child.type === 'meta_lit') continue; + if (child.type === 'sym_lit' || child.type === 'kwd_lit') return child; + break; + } + return null; +} + +function findSecondSymbol(listNode: TreeSitterNode): TreeSitterNode | null { + let count = 0; + for (let i = 0; i < listNode.childCount; i++) { + const child = listNode.child(i); + if (!child) continue; + if ('()[]{}#'.includes(child.type) || child.type === 'meta_lit') continue; + if (child.type === 'sym_lit' || child.type === 'kwd_lit') { + count++; + if (count === 2) return child; + } + } + return null; +} + +function handleNsForm(node: TreeSitterNode, ctx: ExtractorOutput): string | null { + const nameNode = findSecondSymbol(node); + if (!nameNode) return null; + + const nsName = nameNode.text; + ctx.definitions.push({ + name: nsName, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + + // Extract requires from ns form + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'list_lit') { + const kw = findFirstSymbol(child); + if (kw && (kw.text === ':require' || kw.text === ':import' || kw.text === ':use')) { + extractNsRequires(child, ctx); + } + } + } + + return nsName; +} + +function extractNsRequires(requireForm: TreeSitterNode, ctx: ExtractorOutput): void { + for (let i = 0; i < requireForm.childCount; i++) { + const child = requireForm.child(i); + if (!child) continue; + // Vector form: [some.ns :as alias] + if (child.type === 'vec_lit') { + const sym = findFirstSymbol(child); + if (sym) { + ctx.imports.push({ + source: sym.text, + names: [sym.text.split('.').pop() || sym.text], + line: child.startPosition.row + 1, + }); + } + } + // Symbol form: some.ns + if (child.type === 'sym_lit' && i > 0) { + const text = child.text; + if (!text.startsWith(':')) { + ctx.imports.push({ + source: text, + names: [text.split('.').pop() || text], + line: child.startPosition.row + 1, + }); + } + } + } +} + +function handleDefForm( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentNs: string | null, + kindOrFallback: 'variable' | 'function', +): void { + const nameNode = findSecondSymbol(node); + if (!nameNode) return; + + const rawName = nameNode.text; + const fullName = currentNs ? `${currentNs}/${rawName}` : rawName; + + ctx.definitions.push({ + name: fullName, + kind: kindOrFallback, + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleDefnForm( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentNs: string | null, + visibility: 'public' | 'private', +): void { + const nameNode = findSecondSymbol(node); + if (!nameNode) return; + + const rawName = nameNode.text; + const fullName = currentNs ? `${currentNs}/${rawName}` : rawName; + const params = extractClojureParams(node); + + ctx.definitions.push({ + name: fullName, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility, + children: params.length > 0 ? params : undefined, + }); +} + +function extractClojureParams(defnNode: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + // Find the parameter vector [x y z] + for (let i = 0; i < defnNode.childCount; i++) { + const child = defnNode.child(i); + if (!child || child.type !== 'vec_lit') continue; + for (let j = 0; j < child.childCount; j++) { + const param = child.child(j); + if (!param) continue; + if (param.type === 'sym_lit') { + params.push({ name: param.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + break; // Only first vector is params + } + return params; +} + +function handleDefprotocol(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = findSecondSymbol(node); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'interface', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleDefrecord(node: TreeSitterNode, ctx: ExtractorOutput, keyword: string): void { + const nameNode = findSecondSymbol(node); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: keyword === 'defrecord' ? 'record' : 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleImportForm(node: TreeSitterNode, ctx: ExtractorOutput, keyword: string): void { + const nameNode = findSecondSymbol(node); + if (!nameNode) return; + + ctx.imports.push({ + source: nameNode.text, + names: [keyword], + line: node.startPosition.row + 1, + }); +} diff --git a/src/extractors/erlang.ts b/src/extractors/erlang.ts new file mode 100644 index 000000000..a78b29493 --- /dev/null +++ b/src/extractors/erlang.ts @@ -0,0 +1,252 @@ +import type { ExtractorOutput, SubDeclaration, TreeSitterNode, TreeSitterTree } from '../types.js'; +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Erlang files. + * + * tree-sitter-erlang (WhatsApp) grammar notes: + * - module_attribute: -module(name). + * - record_decl: -record(name, {fields}). + * - fun_decl: contains function_clause children + * - function_clause: atom expr_args clause_body + * - call: function calls, with remote child for module:func + * - expr_args: parenthesized argument lists + */ +export function extractErlangSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkErlangNode(tree.rootNode, ctx); + return ctx; +} + +function walkErlangNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'module_attribute': + handleModuleAttr(node, ctx); + break; + case 'record_decl': + handleRecordDecl(node, ctx); + break; + case 'type_alias': + case 'opaque': + handleTypeAlias(node, ctx); + break; + case 'fun_decl': + handleFunDecl(node, ctx); + break; + case 'function_clause': + // Only handle if not inside fun_decl (fun_decl handles its own clauses) + if (node.parent?.type !== 'fun_decl') { + handleFunctionClause(node, ctx); + } + break; + case 'pp_define': + handleDefine(node, ctx); + break; + case 'pp_include': + case 'pp_include_lib': + handleInclude(node, ctx); + break; + case 'import_attribute': + handleImportAttr(node, ctx); + break; + case 'call': + handleCall(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkErlangNode(child, ctx); + } +} + +function handleModuleAttr(node: TreeSitterNode, ctx: ExtractorOutput): void { + // module_attribute: - module ( atom ) . + const nameNode = findChild(node, 'atom'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleRecordDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + // record_decl: - record ( atom , { record_field, ... } ) . + const nameNode = findChild(node, 'atom'); + if (!nameNode) return; + + const children: SubDeclaration[] = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'record_field' || child.type === 'typed_record_field') { + const fieldName = findChild(child, 'atom'); + if (fieldName) { + children.push({ + name: fieldName.text, + kind: 'property', + line: child.startPosition.row + 1, + }); + } + } + } + + ctx.definitions.push({ + name: nameNode.text, + kind: 'record', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: children.length > 0 ? children : undefined, + }); +} + +function handleTypeAlias(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = findChild(node, 'atom'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleFunDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + // fun_decl contains one or more function_clause children + dots + // Extract from the first function_clause + const clause = findChild(node, 'function_clause'); + if (!clause) return; + + handleFunctionClause(clause, ctx); +} + +function handleFunctionClause(node: TreeSitterNode, ctx: ExtractorOutput): void { + // function_clause: atom expr_args clause_body + const nameNode = findChild(node, 'atom'); + if (!nameNode) return; + + // Don't duplicate if we already have this function + if (ctx.definitions.some((d) => d.name === nameNode.text && d.kind === 'function')) return; + + const params = extractErlangParams(node); + + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node.parent?.type === 'fun_decl' ? node.parent : node), + children: params.length > 0 ? params : undefined, + visibility: 'public', + }); +} + +function extractErlangParams(clauseNode: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + const argsNode = findChild(clauseNode, 'expr_args'); + if (!argsNode) return params; + + for (let i = 0; i < argsNode.childCount; i++) { + const child = argsNode.child(i); + if (!child) continue; + if (child.type === 'var') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + if (child.type === 'atom') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } + return params; +} + +function handleDefine(node: TreeSitterNode, ctx: ExtractorOutput): void { + // pp_define: -define(NAME, value). + const nameNode = + findChild(node, 'var') || findChild(node, 'atom') || findChild(node, 'macro_lhs'); + if (!nameNode) return; + + const name = + nameNode.type === 'macro_lhs' + ? (findChild(nameNode, 'var')?.text ?? nameNode.text) + : nameNode.text; + + ctx.definitions.push({ + name, + kind: 'variable', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleInclude(node: TreeSitterNode, ctx: ExtractorOutput): void { + const strNode = findChild(node, 'string'); + if (!strNode) return; + + const source = strNode.text.replace(/^"|"$/g, ''); + ctx.imports.push({ + source, + names: ['include'], + line: node.startPosition.row + 1, + }); +} + +function handleImportAttr(node: TreeSitterNode, ctx: ExtractorOutput): void { + const moduleNode = findChild(node, 'atom'); + if (!moduleNode) return; + + const names: string[] = []; + // Find exported function names + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'fa') { + const fnName = findChild(child, 'atom'); + if (fnName) names.push(fnName.text); + } + } + + ctx.imports.push({ + source: moduleNode.text, + names: names.length > 0 ? names : [moduleNode.text], + line: node.startPosition.row + 1, + }); +} + +function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void { + // call: first child is function ref (atom or remote), then expr_args + const funcNode = node.child(0); + if (!funcNode) return; + + if (funcNode.type === 'atom' || funcNode.type === 'identifier') { + ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 }); + } else if (funcNode.type === 'remote') { + // module:function — remote has atom : atom children + const atoms: string[] = []; + for (let i = 0; i < funcNode.childCount; i++) { + const child = funcNode.child(i); + if (child && (child.type === 'atom' || child.type === 'var')) { + atoms.push(child.text); + } + } + if (atoms.length >= 2) { + ctx.calls.push({ + name: atoms[atoms.length - 1]!, + receiver: atoms.slice(0, -1).join(':'), + line: node.startPosition.row + 1, + }); + } else if (atoms.length === 1) { + ctx.calls.push({ name: atoms[0]!, line: node.startPosition.row + 1 }); + } + } +} diff --git a/src/extractors/fsharp.ts b/src/extractors/fsharp.ts new file mode 100644 index 000000000..ef54dd34e --- /dev/null +++ b/src/extractors/fsharp.ts @@ -0,0 +1,253 @@ +import type { + Call, + ExtractorOutput, + SubDeclaration, + TreeSitterNode, + TreeSitterTree, +} from '../types.js'; +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from F# files. + * + * tree-sitter-fsharp grammar notes: + * - named_module: top-level module declaration + * - function_declaration_left: LHS of `let name params = ...` + * - import_decl: `open Namespace` + * - type_definition > union_type_defn / record_type_defn + * - application_expression: function calls + */ +export function extractFSharpSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkFSharpNode(tree.rootNode, ctx, null); + return ctx; +} + +function walkFSharpNode( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, +): void { + let nextModule = currentModule; + + switch (node.type) { + case 'named_module': + nextModule = handleNamedModule(node, ctx); + break; + case 'function_declaration_left': + handleFunctionDecl(node, ctx, currentModule); + break; + case 'type_definition': + handleTypeDef(node, ctx); + break; + case 'import_decl': + handleImportDecl(node, ctx); + break; + case 'application_expression': + handleApplication(node, ctx); + break; + case 'dot_expression': + handleDotExpression(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkFSharpNode(child, ctx, nextModule); + } +} + +function handleNamedModule(node: TreeSitterNode, ctx: ExtractorOutput): string | null { + const nameNode = findChild(node, 'long_identifier'); + if (!nameNode) return null; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + + return nameNode.text; +} + +function handleFunctionDecl( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, +): void { + // function_declaration_left: "add x y" — first child is the name identifier + const nameNode = findChild(node, 'identifier'); + if (!nameNode) return; + + // Avoid duplicates — the walk will also visit children + if ( + ctx.definitions.some((d) => d.name === nameNode.text && d.line === node.startPosition.row + 1) + ) + return; + + const params = extractFSharpParams(node); + const name = currentModule ? `${currentModule}.${nameNode.text}` : nameNode.text; + + ctx.definitions.push({ + name, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node.parent ?? node), + children: params.length > 0 ? params : undefined, + }); +} + +function extractFSharpParams(declLeft: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + const argPatterns = findChild(declLeft, 'argument_patterns'); + if (!argPatterns) return params; + + collectParamIdentifiers(argPatterns, params); + return params; +} + +function collectParamIdentifiers(node: TreeSitterNode, params: SubDeclaration[]): void { + if (node.type === 'identifier') { + params.push({ name: node.text, kind: 'parameter', line: node.startPosition.row + 1 }); + return; + } + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) collectParamIdentifiers(child, params); + } +} + +function handleTypeDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + // type_definition contains union_type_defn, record_type_defn, etc. + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + + if ( + child.type === 'union_type_defn' || + child.type === 'record_type_defn' || + child.type === 'type_abbreviation_defn' || + child.type === 'class_type_defn' || + child.type === 'interface_type_defn' || + child.type === 'type_defn' + ) { + const nameNode = findChild(child, 'type_name'); + const name = nameNode + ? (findChild(nameNode, 'identifier')?.text ?? nameNode.text) + : findChild(child, 'identifier')?.text; + if (!name) continue; + + const kind = determineFSharpTypeKind(child); + const children: SubDeclaration[] = []; + extractFSharpTypeMembers(child, children); + + ctx.definitions.push({ + name, + kind, + line: child.startPosition.row + 1, + endLine: nodeEndLine(child), + children: children.length > 0 ? children : undefined, + }); + } + } +} + +function determineFSharpTypeKind( + typeDefn: TreeSitterNode, +): 'class' | 'type' | 'record' | 'enum' | 'interface' { + switch (typeDefn.type) { + case 'union_type_defn': + return 'enum'; + case 'record_type_defn': + return 'record'; + case 'class_type_defn': + return 'class'; + case 'interface_type_defn': + return 'interface'; + default: + return 'type'; + } +} + +function extractFSharpTypeMembers(typeDefn: TreeSitterNode, children: SubDeclaration[]): void { + for (let i = 0; i < typeDefn.childCount; i++) { + const child = typeDefn.child(i); + if (!child) continue; + + if (child.type === 'union_type_case') { + const nameNode = findChild(child, 'identifier'); + if (nameNode) { + children.push({ + name: nameNode.text, + kind: 'property', + line: child.startPosition.row + 1, + }); + } + } + if (child.type === 'record_field') { + const nameNode = child.childForFieldName('name') || findChild(child, 'identifier'); + if (nameNode) { + children.push({ + name: nameNode.text, + kind: 'property', + line: child.startPosition.row + 1, + }); + } + } + // Recurse into containers like union_type_cases + if (child.type === 'union_type_cases' || child.type === 'record_fields') { + extractFSharpTypeMembers(child, children); + } + } +} + +function handleImportDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + const moduleNode = findChild(node, 'long_identifier'); + if (!moduleNode) return; + + const source = moduleNode.text; + ctx.imports.push({ + source, + names: [source.split('.').pop() || source], + line: node.startPosition.row + 1, + }); +} + +function handleApplication(node: TreeSitterNode, ctx: ExtractorOutput): void { + const funcNode = node.child(0); + if (!funcNode) return; + + if (funcNode.type === 'identifier' || funcNode.type === 'long_identifier') { + ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 }); + } else if (funcNode.type === 'long_identifier_or_op') { + const id = findChild(funcNode, 'identifier') || findChild(funcNode, 'long_identifier'); + if (id) ctx.calls.push({ name: id.text, line: node.startPosition.row + 1 }); + } +} + +function handleDotExpression(node: TreeSitterNode, ctx: ExtractorOutput): void { + const parts: string[] = []; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && (child.type === 'identifier' || child.type === 'long_identifier')) { + parts.push(child.text); + } + } + if (parts.length >= 2) { + const call: Call = { + name: parts[parts.length - 1]!, + receiver: parts.slice(0, -1).join('.'), + line: node.startPosition.row + 1, + }; + ctx.calls.push(call); + } +} diff --git a/src/extractors/gleam.ts b/src/extractors/gleam.ts new file mode 100644 index 000000000..b7889c584 --- /dev/null +++ b/src/extractors/gleam.ts @@ -0,0 +1,246 @@ +import type { + Call, + ExtractorOutput, + SubDeclaration, + TreeSitterNode, + TreeSitterTree, +} from '../types.js'; +import { findChild, nodeEndLine, stripQuotes } from './helpers.js'; + +/** + * Extract symbols from Gleam files. + * + * Gleam tree-sitter grammar (gleam-lang/tree-sitter-gleam) notes: + * - Functions: function with name, parameters, body fields + * - Types: type_definition with name, constructors + * - Type aliases: type_alias + * - Imports: import with module, unqualified_imports + * - External functions: external_function + * - Constants: constant + */ +export function extractGleamSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkGleamNode(tree.rootNode, ctx); + return ctx; +} + +function walkGleamNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'function': + handleFunction(node, ctx); + break; + case 'type_definition': + handleTypeDef(node, ctx); + break; + case 'type_alias': + handleTypeAlias(node, ctx); + break; + case 'import': + handleImport(node, ctx); + break; + case 'external_function': + handleExternalFunction(node, ctx); + break; + case 'constant': + handleConstant(node, ctx); + break; + case 'function_call': + case 'call': + handleCall(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkGleamNode(child, ctx); + } +} + +function handleFunction(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name') || findChild(node, 'identifier'); + if (!nameNode) return; + + const visibility = isPublic(node) ? 'public' : 'private'; + const params = extractParams(node); + + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility, + children: params.length > 0 ? params : undefined, + }); +} + +function handleExternalFunction(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name') || findChild(node, 'identifier'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility: isPublic(node) ? 'public' : 'private', + }); +} + +function handleTypeDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name') || findChild(node, 'type_name'); + if (!nameNode) return; + + const children: SubDeclaration[] = []; + // Extract constructors + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'data_constructor' || child.type === 'type_constructor') { + const ctorName = child.childForFieldName('name') || findChild(child, 'constructor_name'); + if (ctorName) { + children.push({ name: ctorName.text, kind: 'property', line: child.startPosition.row + 1 }); + } + } + // Recurse into constructors block + if (child.type === 'data_constructors' || child.type === 'type_constructors') { + for (let j = 0; j < child.childCount; j++) { + const ctor = child.child(j); + if (!ctor) continue; + if (ctor.type === 'data_constructor' || ctor.type === 'type_constructor') { + const ctorName = ctor.childForFieldName('name') || findChild(ctor, 'constructor_name'); + if (ctorName) { + children.push({ + name: ctorName.text, + kind: 'property', + line: ctor.startPosition.row + 1, + }); + } + } + } + } + } + + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility: isPublic(node) ? 'public' : 'private', + children: children.length > 0 ? children : undefined, + }); +} + +function handleTypeAlias(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name') || findChild(node, 'type_name'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility: isPublic(node) ? 'public' : 'private', + }); +} + +function handleConstant(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name') || findChild(node, 'identifier'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'variable', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + visibility: isPublic(node) ? 'public' : 'private', + }); +} + +function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void { + const moduleNode = + node.childForFieldName('module') || findChild(node, 'module') || findChild(node, 'string'); + if (!moduleNode) return; + + const source = stripQuotes(moduleNode.text); + const names: string[] = []; + + // Check for unqualified imports + const unqualified = findChild(node, 'unqualified_imports'); + if (unqualified) { + for (let i = 0; i < unqualified.childCount; i++) { + const item = unqualified.child(i); + if (item && (item.type === 'unqualified_import' || item.type === 'identifier')) { + const nameNode = item.childForFieldName('name') || item; + if (nameNode.type !== ',') names.push(nameNode.text); + } + } + } + + // Check for alias (as) + const alias = node.childForFieldName('alias') || findChild(node, 'identifier'); + if (alias && alias !== moduleNode) { + names.push(alias.text); + } + + ctx.imports.push({ + source, + names: names.length > 0 ? names : [source.split('/').pop() || source], + line: node.startPosition.row + 1, + }); +} + +function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void { + const funcNode = node.childForFieldName('function') || node.child(0); + if (!funcNode) return; + + if (funcNode.type === 'identifier' || funcNode.type === 'variable') { + ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 }); + } else if (funcNode.type === 'field_access' || funcNode.type === 'module_select') { + const field = funcNode.childForFieldName('field') || funcNode.childForFieldName('label'); + const record = funcNode.child(0); + if (field) { + const call: Call = { name: field.text, line: node.startPosition.row + 1 }; + if (record && record !== field) call.receiver = record.text; + ctx.calls.push(call); + } + } +} + +function extractParams(funcNode: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + const paramsNode = + funcNode.childForFieldName('parameters') || findChild(funcNode, 'function_parameters'); + if (!paramsNode) return params; + + for (let i = 0; i < paramsNode.childCount; i++) { + const param = paramsNode.child(i); + if (!param) continue; + if (param.type === 'function_parameter' || param.type === 'parameter') { + const nameNode = param.childForFieldName('name') || findChild(param, 'identifier'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + if (param.type === 'identifier') { + params.push({ name: param.text, kind: 'parameter', line: param.startPosition.row + 1 }); + } + } + return params; +} + +function isPublic(node: TreeSitterNode): boolean { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'visibility_modifier' || child.text === 'pub') return true; + } + return false; +} diff --git a/src/extractors/index.ts b/src/extractors/index.ts index 65fd3087c..537994aa4 100644 --- a/src/extractors/index.ts +++ b/src/extractors/index.ts @@ -1,19 +1,25 @@ export { extractBashSymbols } from './bash.js'; export { extractCSymbols } from './c.js'; +export { extractClojureSymbols } from './clojure.js'; export { extractCppSymbols } from './cpp.js'; export { extractCSharpSymbols } from './csharp.js'; export { extractDartSymbols } from './dart.js'; export { extractElixirSymbols } from './elixir.js'; +export { extractErlangSymbols } from './erlang.js'; +export { extractFSharpSymbols } from './fsharp.js'; +export { extractGleamSymbols } from './gleam.js'; export { extractGoSymbols } from './go.js'; export { extractHaskellSymbols } from './haskell.js'; export { extractHCLSymbols } from './hcl.js'; export { extractJavaSymbols } from './java.js'; export { extractSymbols } from './javascript.js'; +export { extractJuliaSymbols } from './julia.js'; export { extractKotlinSymbols } from './kotlin.js'; export { extractLuaSymbols } from './lua.js'; export { extractOCamlSymbols } from './ocaml.js'; export { extractPHPSymbols } from './php.js'; export { extractPythonSymbols } from './python.js'; +export { extractRSymbols } from './r.js'; export { extractRubySymbols } from './ruby.js'; export { extractRustSymbols } from './rust.js'; export { extractScalaSymbols } from './scala.js'; diff --git a/src/extractors/julia.ts b/src/extractors/julia.ts new file mode 100644 index 000000000..9a0681c9c --- /dev/null +++ b/src/extractors/julia.ts @@ -0,0 +1,318 @@ +import type { ExtractorOutput, SubDeclaration, TreeSitterNode, TreeSitterTree } from '../types.js'; +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from Julia files. + * + * tree-sitter-julia grammar notes: + * - function_definition: `function name(params)...end` + * - assignment: `name(params) = expr` (short form), LHS is call_expression + * - struct_definition: `struct TypeHead...end`, name is in type_head + * - module_definition: `module Name...end` + * - import_statement / using_statement + * - macro_definition: `macro name(params)...end` + * - abstract_definition: `abstract type Name end` + * - call_expression: function calls + */ +export function extractJuliaSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkJuliaNode(tree.rootNode, ctx, null); + return ctx; +} + +function walkJuliaNode( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, +): void { + let nextModule = currentModule; + + switch (node.type) { + case 'module_definition': + nextModule = handleModuleDef(node, ctx); + break; + case 'function_definition': + handleFunctionDef(node, ctx, currentModule); + break; + case 'assignment': + handleAssignment(node, ctx, currentModule); + break; + case 'struct_definition': + handleStructDef(node, ctx); + break; + case 'abstract_definition': + handleAbstractDef(node, ctx); + break; + case 'macro_definition': + handleMacroDef(node, ctx, currentModule); + break; + case 'import_statement': + case 'using_statement': + handleImport(node, ctx); + break; + case 'call_expression': + handleCall(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkJuliaNode(child, ctx, nextModule); + } +} + +function handleModuleDef(node: TreeSitterNode, ctx: ExtractorOutput): string | null { + const nameNode = node.childForFieldName('name') || findChild(node, 'identifier'); + if (!nameNode) return null; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'module', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + + return nameNode.text; +} + +function handleFunctionDef( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, +): void { + // function_definition may have a call_expression child as the signature + const callSig = findChild(node, 'call_expression'); + if (callSig) { + const funcNameNode = callSig.child(0); + if (funcNameNode) { + const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text; + const params = extractJuliaParams(callSig); + ctx.definitions.push({ + name, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); + return; + } + } + + // Fallback: look for identifier directly + const nameNode = node.childForFieldName('name') || findChild(node, 'identifier'); + if (!nameNode) return; + + const name = currentModule ? `${currentModule}.${nameNode.text}` : nameNode.text; + ctx.definitions.push({ + name, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleAssignment( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, +): void { + // assignment: LHS operator RHS + // Short function form: add(x, y) = x + y → LHS is call_expression + const lhs = node.child(0); + if (!lhs) return; + + if (lhs.type === 'call_expression') { + const funcNameNode = lhs.child(0); + if (!funcNameNode) return; + + const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text; + const params = extractJuliaParams(lhs); + + ctx.definitions.push({ + name, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); + } +} + +function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + // struct_definition: struct type_head fields... end + const typeHead = findChild(node, 'type_head'); + const nameNode = typeHead + ? (findChild(typeHead, 'identifier') ?? typeHead) + : findChild(node, 'identifier'); + if (!nameNode) return; + + const children: SubDeclaration[] = []; + // Fields are typed_expression children of struct_definition + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'typed_expression') { + const fieldName = findChild(child, 'identifier'); + if (fieldName) { + children.push({ + name: fieldName.text, + kind: 'property', + line: child.startPosition.row + 1, + }); + } + } + // Plain identifier fields (no type annotation) + if (child.type === 'identifier' && child !== nameNode && typeHead && child !== typeHead) { + children.push({ name: child.text, kind: 'property', line: child.startPosition.row + 1 }); + } + } + + // Check for supertype in type_head (Point <: AbstractPoint) + if (typeHead) { + const subtypeExpr = findChild(typeHead, 'subtype_expression'); + if (subtypeExpr) { + // Find the supertype identifier + for (let i = 0; i < subtypeExpr.childCount; i++) { + const child = subtypeExpr.child(i); + if (child?.type === 'identifier' && i > 0) { + ctx.classes.push({ + name: nameNode.text, + extends: child.text, + line: node.startPosition.row + 1, + }); + } + } + } + } + + ctx.definitions.push({ + name: nameNode.text, + kind: 'struct', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: children.length > 0 ? children : undefined, + }); +} + +function handleAbstractDef(node: TreeSitterNode, ctx: ExtractorOutput): void { + const nameNode = node.childForFieldName('name') || findChild(node, 'identifier'); + if (!nameNode) return; + + ctx.definitions.push({ + name: nameNode.text, + kind: 'type', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleMacroDef( + node: TreeSitterNode, + ctx: ExtractorOutput, + currentModule: string | null, +): void { + const nameNode = node.childForFieldName('name') || findChild(node, 'identifier'); + if (!nameNode) return; + + const name = currentModule ? `${currentModule}.@${nameNode.text}` : `@${nameNode.text}`; + ctx.definitions.push({ + name, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); +} + +function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void { + const names: string[] = []; + let source = ''; + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if ( + child.type === 'identifier' || + child.type === 'scoped_identifier' || + child.type === 'selected_import' + ) { + if (!source) source = child.text; + names.push(child.text.split('.').pop() || child.text); + } + } + + if (source) { + ctx.imports.push({ + source, + names: names.length > 0 ? names : [source], + line: node.startPosition.row + 1, + }); + } +} + +function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void { + // Don't record if parent is assignment LHS (that's a function definition) + if (node.parent?.type === 'assignment' && node === node.parent.child(0)) return; + // Don't record if parent is function_definition (that's a signature) + if (node.parent?.type === 'function_definition') return; + + const funcNode = node.child(0); + if (!funcNode) return; + + if (funcNode.type === 'identifier') { + ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 }); + } else if (funcNode.type === 'field_expression' || funcNode.type === 'scoped_identifier') { + const parts = funcNode.text.split('.'); + if (parts.length >= 2) { + ctx.calls.push({ + name: parts[parts.length - 1]!, + receiver: parts.slice(0, -1).join('.'), + line: node.startPosition.row + 1, + }); + } else { + ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 }); + } + } +} + +function extractJuliaParams(callExpr: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + const argList = findChild(callExpr, 'argument_list') || findChild(callExpr, 'tuple_expression'); + if (!argList) return params; + + for (let i = 0; i < argList.childCount; i++) { + const child = argList.child(i); + if (!child) continue; + if (child.type === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + if (child.type === 'typed_parameter' || child.type === 'typed_expression') { + const nameNode = findChild(child, 'identifier'); + if (nameNode) { + params.push({ + name: nameNode.text, + kind: 'parameter', + line: child.startPosition.row + 1, + }); + } + } + if (child.type === 'optional_parameter' || child.type === 'default_parameter') { + const nameNode = findChild(child, 'identifier'); + if (nameNode) { + params.push({ + name: nameNode.text, + kind: 'parameter', + line: child.startPosition.row + 1, + }); + } + } + } + return params; +} diff --git a/src/extractors/r.ts b/src/extractors/r.ts new file mode 100644 index 000000000..19cf0e723 --- /dev/null +++ b/src/extractors/r.ts @@ -0,0 +1,253 @@ +import type { ExtractorOutput, SubDeclaration, TreeSitterNode, TreeSitterTree } from '../types.js'; +import { findChild, nodeEndLine } from './helpers.js'; + +/** + * Extract symbols from R files. + * + * tree-sitter-r grammar (r-lib/tree-sitter-r) notes: + * - Assignments: binary_operator with `<-` or `=` operator + * - Functions: function_definition as RHS of assignment + * - Calls: call node with function/arguments fields + * - Imports: library() and require() calls + * - S4 classes: setClass(), setRefClass() + */ +export function extractRSymbols(tree: TreeSitterTree, _filePath: string): ExtractorOutput { + const ctx: ExtractorOutput = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + + walkRNode(tree.rootNode, ctx); + return ctx; +} + +function walkRNode(node: TreeSitterNode, ctx: ExtractorOutput): void { + switch (node.type) { + case 'binary_operator': + handleBinaryOp(node, ctx); + break; + case 'call': + handleCall(node, ctx); + break; + } + + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child) walkRNode(child, ctx); + } +} + +function handleBinaryOp(node: TreeSitterNode, ctx: ExtractorOutput): void { + // binary_operator: child[0]=LHS, child[1]=operator (<- or =), child[2]=RHS + if (node.childCount < 3) return; + + const lhs = node.child(0); + const op = node.child(1); + const rhs = node.child(2); + + if (!lhs || !op || !rhs) return; + if (op.text !== '<-' && op.text !== '=' && op.text !== '<<-') return; + if (lhs.type !== 'identifier') return; + + if (rhs.type === 'function_definition') { + const params = extractRParams(rhs); + ctx.definitions.push({ + name: lhs.text, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + children: params.length > 0 ? params : undefined, + }); + } else { + // Variable assignment — only record top-level + if (node.parent?.type === 'program') { + ctx.definitions.push({ + name: lhs.text, + kind: 'variable', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + } + } +} + +function extractRParams(funcDef: TreeSitterNode): SubDeclaration[] { + const params: SubDeclaration[] = []; + const paramsNode = findChild(funcDef, 'parameters'); + if (!paramsNode) return params; + + for (let i = 0; i < paramsNode.childCount; i++) { + const child = paramsNode.child(i); + if (!child) continue; + if (child.type === 'parameter') { + // parameter node has name and possibly default value + const nameNode = child.childForFieldName('name') || findChild(child, 'identifier'); + if (nameNode) { + params.push({ name: nameNode.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } else if (child.text && child.text !== ',' && child.text !== '(' && child.text !== ')') { + // Some grammars have the param as plain text + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } + if (child.type === 'identifier') { + params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 }); + } + } + return params; +} + +function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void { + // call: child[0]=function, then arguments + const funcNode = node.child(0); + if (!funcNode) return; + + const funcName = funcNode.text; + + // library() and require() are imports + if (funcName === 'library' || funcName === 'require') { + handleLibraryCall(node, ctx); + return; + } + + // source() is a file import + if (funcName === 'source') { + handleSourceCall(node, ctx); + return; + } + + // setClass / setRefClass for S4 + if (funcName === 'setClass' || funcName === 'setRefClass') { + handleSetClass(node, ctx); + return; + } + + if (funcName === 'setGeneric' || funcName === 'setMethod') { + handleSetGeneric(node, ctx); + return; + } + + // Regular call + if (funcNode.type === 'identifier') { + ctx.calls.push({ name: funcName, line: node.startPosition.row + 1 }); + } else if (funcNode.type === 'namespace_operator') { + // pkg::func + const parts = funcName.split('::'); + if (parts.length >= 2) { + ctx.calls.push({ + name: parts[parts.length - 1]!, + receiver: parts.slice(0, -1).join('::'), + line: node.startPosition.row + 1, + }); + } + } +} + +function handleLibraryCall(node: TreeSitterNode, ctx: ExtractorOutput): void { + // Find the package name in arguments + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + if (child.type === 'arguments') { + for (let j = 0; j < child.childCount; j++) { + const arg = child.child(j); + if (!arg) continue; + if (arg.type === 'identifier') { + ctx.imports.push({ + source: arg.text, + names: [arg.text], + line: node.startPosition.row + 1, + }); + return; + } + if (arg.type === 'string' || arg.type === 'string_content') { + const text = arg.text.replace(/^["']|["']$/g, ''); + ctx.imports.push({ + source: text, + names: [text], + line: node.startPosition.row + 1, + }); + return; + } + // Argument might be wrapped + if (arg.type === 'argument') { + const id = findChild(arg, 'identifier') || findChild(arg, 'string'); + if (id) { + const text = id.text.replace(/^["']|["']$/g, ''); + ctx.imports.push({ + source: text, + names: [text], + line: node.startPosition.row + 1, + }); + return; + } + } + } + } + } +} + +function handleSourceCall(node: TreeSitterNode, ctx: ExtractorOutput): void { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child || child.type !== 'arguments') continue; + for (let j = 0; j < child.childCount; j++) { + const arg = child.child(j); + if (!arg) continue; + if (arg.type === 'string') { + const text = arg.text.replace(/^["']|["']$/g, ''); + ctx.imports.push({ + source: text, + names: ['source'], + line: node.startPosition.row + 1, + }); + return; + } + } + } +} + +function handleSetClass(node: TreeSitterNode, ctx: ExtractorOutput): void { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child || child.type !== 'arguments') continue; + for (let j = 0; j < child.childCount; j++) { + const arg = child.child(j); + if (!arg) continue; + if (arg.type === 'string') { + const name = arg.text.replace(/^["']|["']$/g, ''); + ctx.definitions.push({ + name, + kind: 'class', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + return; + } + } + } +} + +function handleSetGeneric(node: TreeSitterNode, ctx: ExtractorOutput): void { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child || child.type !== 'arguments') continue; + for (let j = 0; j < child.childCount; j++) { + const arg = child.child(j); + if (!arg) continue; + if (arg.type === 'string') { + const name = arg.text.replace(/^["']|["']$/g, ''); + ctx.definitions.push({ + name, + kind: 'function', + line: node.startPosition.row + 1, + endLine: nodeEndLine(node), + }); + return; + } + } + } +} diff --git a/src/types.ts b/src/types.ts index 200685b1a..0491ec8a5 100644 --- a/src/types.ts +++ b/src/types.ts @@ -96,7 +96,13 @@ export type LanguageId = | 'dart' | 'zig' | 'haskell' - | 'ocaml'; + | 'ocaml' + | 'fsharp' + | 'gleam' + | 'clojure' + | 'julia' + | 'r' + | 'erlang'; /** Engine mode selector. */ export type EngineMode = 'native' | 'wasm' | 'auto'; diff --git a/tests/parsers/clojure.test.ts b/tests/parsers/clojure.test.ts new file mode 100644 index 000000000..e4c8176ab --- /dev/null +++ b/tests/parsers/clojure.test.ts @@ -0,0 +1,52 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractClojureSymbols } from '../../src/domain/parser.js'; + +describe('Clojure parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseClojure(code) { + const parser = parsers.get('clojure'); + if (!parser) throw new Error('Clojure parser not available'); + const tree = parser.parse(code); + return extractClojureSymbols(tree, 'test.clj'); + } + + it('extracts namespace definitions', () => { + const symbols = parseClojure(`(ns myapp.core + (:require [clojure.string :as str]))`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'myapp.core', kind: 'module' }), + ); + }); + + it('extracts function definitions', () => { + const symbols = parseClojure(`(defn greet [name] + (str "Hello " name))`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'function' })); + }); + + it('extracts protocol definitions', () => { + const symbols = parseClojure(`(defprotocol Printable + (print-it [this]))`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'Printable', kind: 'interface' }), + ); + }); + + it('extracts imports from ns form', () => { + const symbols = parseClojure(`(ns myapp.core + (:require [clojure.string :as str] + [clojure.set]))`); + expect(symbols.imports.length).toBeGreaterThanOrEqual(1); + }); + + it('extracts function calls', () => { + const symbols = parseClojure(`(println "Hello") +(map inc [1 2 3])`); + expect(symbols.calls.length).toBeGreaterThanOrEqual(1); + }); +}); diff --git a/tests/parsers/erlang.test.ts b/tests/parsers/erlang.test.ts new file mode 100644 index 000000000..cde7e4220 --- /dev/null +++ b/tests/parsers/erlang.test.ts @@ -0,0 +1,46 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractErlangSymbols } from '../../src/domain/parser.js'; + +describe('Erlang parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseErlang(code) { + const parser = parsers.get('erlang'); + if (!parser) throw new Error('Erlang parser not available'); + const tree = parser.parse(code); + return extractErlangSymbols(tree, 'test.erl'); + } + + it('extracts module declarations', () => { + const symbols = parseErlang(`-module(mymodule).`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'mymodule', kind: 'module' }), + ); + }); + + it('extracts function definitions', () => { + const symbols = parseErlang(`greet(Name) -> + io:format("Hello ~s~n", [Name]).`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'function' })); + }); + + it('extracts record definitions', () => { + const symbols = parseErlang(`-record(person, {name, age}).`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'record' })); + }); + + it('extracts import attributes', () => { + const symbols = parseErlang(`-import(lists, [map/2, filter/2]).`); + expect(symbols.imports.length).toBeGreaterThanOrEqual(1); + }); + + it('extracts function calls', () => { + const symbols = parseErlang(`start() -> + io:format("Hello~n").`); + expect(symbols.calls.length).toBeGreaterThanOrEqual(1); + }); +}); diff --git a/tests/parsers/fsharp.test.ts b/tests/parsers/fsharp.test.ts new file mode 100644 index 000000000..4957617b8 --- /dev/null +++ b/tests/parsers/fsharp.test.ts @@ -0,0 +1,52 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractFSharpSymbols } from '../../src/domain/parser.js'; + +describe('F# parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseFSharp(code) { + const parser = parsers.get('fsharp'); + if (!parser) throw new Error('F# parser not available'); + const tree = parser.parse(code); + return extractFSharpSymbols(tree, 'test.fs'); + } + + it('extracts module definitions', () => { + const symbols = parseFSharp(`module MyApp.Utils + +let add x y = x + y`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MyApp.Utils', kind: 'module' }), + ); + }); + + it('extracts function definitions', () => { + const symbols = parseFSharp(`let add x y = x + y`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'function' })); + }); + + it('extracts type definitions', () => { + const symbols = parseFSharp(`type Color = + | Red + | Green + | Blue`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ kind: expect.stringMatching(/type|enum/) }), + ); + }); + + it('extracts open directives as imports', () => { + const symbols = parseFSharp(`open System +open System.IO`); + expect(symbols.imports.length).toBeGreaterThanOrEqual(1); + }); + + it('extracts function calls', () => { + const symbols = parseFSharp(`let result = List.map (fun x -> x + 1) [1; 2; 3]`); + expect(symbols.calls.length).toBeGreaterThanOrEqual(1); + }); +}); diff --git a/tests/parsers/gleam.test.ts b/tests/parsers/gleam.test.ts new file mode 100644 index 000000000..c634fe5aa --- /dev/null +++ b/tests/parsers/gleam.test.ts @@ -0,0 +1,48 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractGleamSymbols } from '../../src/domain/parser.js'; + +describe('Gleam parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseGleam(code) { + const parser = parsers.get('gleam'); + if (!parser) throw new Error('Gleam parser not available'); + const tree = parser.parse(code); + return extractGleamSymbols(tree, 'test.gleam'); + } + + it('extracts public function definitions', () => { + const symbols = parseGleam(`pub fn greet(name: String) -> String { + "Hello " <> name +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'greet', kind: 'function' }), + ); + }); + + it('extracts type definitions', () => { + const symbols = parseGleam(`pub type Color { + Red + Green + Blue +}`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'type' })); + }); + + it('extracts imports', () => { + const symbols = parseGleam(`import gleam/io +import gleam/string`); + expect(symbols.imports.length).toBeGreaterThanOrEqual(1); + }); + + it('extracts function calls', () => { + const symbols = parseGleam(`pub fn main() { + io.println("Hello") +}`); + expect(symbols.calls.length).toBeGreaterThanOrEqual(1); + }); +}); diff --git a/tests/parsers/julia.test.ts b/tests/parsers/julia.test.ts new file mode 100644 index 000000000..b1fc3bb35 --- /dev/null +++ b/tests/parsers/julia.test.ts @@ -0,0 +1,58 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractJuliaSymbols } from '../../src/domain/parser.js'; + +describe('Julia parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseJulia(code) { + const parser = parsers.get('julia'); + if (!parser) throw new Error('Julia parser not available'); + const tree = parser.parse(code); + return extractJuliaSymbols(tree, 'test.jl'); + } + + it('extracts function definitions', () => { + const symbols = parseJulia(`function greet(name) + println("Hello $name") +end`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'function' })); + }); + + it('extracts short function definitions', () => { + const symbols = parseJulia(`add(x, y) = x + y`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'function' })); + }); + + it('extracts struct definitions', () => { + const symbols = parseJulia(`struct Point + x::Float64 + y::Float64 +end`); + expect(symbols.definitions).toContainEqual(expect.objectContaining({ kind: 'struct' })); + }); + + it('extracts module definitions', () => { + const symbols = parseJulia(`module MyModule + export greet +end`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'MyModule', kind: 'module' }), + ); + }); + + it('extracts import/using statements', () => { + const symbols = parseJulia(`using LinearAlgebra +import Base: show`); + expect(symbols.imports.length).toBeGreaterThanOrEqual(1); + }); + + it('extracts function calls', () => { + const symbols = parseJulia(`println("Hello") +push!(arr, 1)`); + expect(symbols.calls.length).toBeGreaterThanOrEqual(1); + }); +}); diff --git a/tests/parsers/r.test.ts b/tests/parsers/r.test.ts new file mode 100644 index 000000000..85380c590 --- /dev/null +++ b/tests/parsers/r.test.ts @@ -0,0 +1,47 @@ +import { beforeAll, describe, expect, it } from 'vitest'; +import { createParsers, extractRSymbols } from '../../src/domain/parser.js'; + +describe('R parser', () => { + let parsers: any; + + beforeAll(async () => { + parsers = await createParsers(); + }); + + function parseR(code) { + const parser = parsers.get('r'); + if (!parser) throw new Error('R parser not available'); + const tree = parser.parse(code); + return extractRSymbols(tree, 'test.R'); + } + + it('extracts function definitions', () => { + const symbols = parseR(`greet <- function(name) { + paste("Hello", name) +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'greet', kind: 'function' }), + ); + }); + + it('extracts function definitions with = assignment', () => { + const symbols = parseR(`add = function(x, y) { + x + y +}`); + expect(symbols.definitions).toContainEqual( + expect.objectContaining({ name: 'add', kind: 'function' }), + ); + }); + + it('extracts library imports', () => { + const symbols = parseR(`library(dplyr) +require(ggplot2)`); + expect(symbols.imports.length).toBeGreaterThanOrEqual(1); + }); + + it('extracts function calls', () => { + const symbols = parseR(`print("Hello") +mean(c(1, 2, 3))`); + expect(symbols.calls.length).toBeGreaterThanOrEqual(1); + }); +});