diff --git a/.github/workflows/lsp-tests.yml b/.github/workflows/lsp-tests.yml index c7db091e..30ccd4d8 100644 --- a/.github/workflows/lsp-tests.yml +++ b/.github/workflows/lsp-tests.yml @@ -66,7 +66,7 @@ jobs: - name: Setup PHP for phpactor uses: shivammathur/setup-php@v2 with: - php-version: '8.1' + php-version: '8.2' tools: composer - name: Install phpactor (PHP language server) diff --git a/Cargo.toml b/Cargo.toml index cbbbdd8a..61a76df1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,6 +44,7 @@ tree-sitter-html = "0.23.2" tree-sitter-md = "0.3.2" tree-sitter-yaml = "0.6.1" tree-sitter-solidity = "=1.2.10" +tree-sitter-crystal = { git = "https://github.com/crystal-lang-tools/tree-sitter-crystal", rev = "f71f4ca62ac0" } ast-grep-core = "0.36.1" ast-grep-language = "0.36.1" clap = { version = "4.3", features = ["derive"] } diff --git a/README.md b/README.md index 8fbfec03..1d25cb59 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ npx -y @probelabs/probe@latest agent "Refactor the login function" --allow-edit - **Smart Ranking**: BM25, TF-IDF, and hybrid algorithms with optional BERT reranking - **Token-Aware**: `--max-tokens` budget, session-based dedup to avoid repeating context - **Built-in Agent**: Multi-provider (Anthropic, OpenAI, Google, Bedrock) with retry, fallback, and context compaction -- **Multi-Language**: Rust, Python, JavaScript, TypeScript, Go, C/C++, Java, Ruby, PHP, Swift, Solidity, C#, and more +- **Multi-Language**: Rust, Python, JavaScript, TypeScript, Go, C/C++, Java, Ruby, PHP, Swift, Solidity, Crystal, C#, and more --- @@ -469,6 +469,7 @@ cargo install --path . | PHP | `.php` | | Swift | `.swift` | | Solidity | `.sol` | +| Crystal | `.cr` | | C# | `.cs` | | Markdown | `.md` | diff --git a/docs/reference/adding-languages.md b/docs/reference/adding-languages.md index 1b3e1398..84c85789 100644 --- a/docs/reference/adding-languages.md +++ b/docs/reference/adding-languages.md @@ -1,10 +1,13 @@ # Adding Support for New Languages -This guide lists the exact files you must modify to add a new language to Probe. +This guide lists the main files you must audit to add a new language to Probe. ## Required File Modifications -Adding a new language requires modifying exactly **11 files**. Missing any of these will result in incomplete language support. +Adding a new language requires auditing every language map that affects parsing, +search, extraction, query, CLI validation, docs, and LSP/indexing behavior. +The list below covers the core Probe CLI path; LSP daemon support adds +additional maps under `lsp-daemon/src`. ### Step 1: Core Language Implementation (4 files) ### Step 2: Search & Extract Integration (4 files) diff --git a/docs/reference/crystal-language-support-plan.md b/docs/reference/crystal-language-support-plan.md new file mode 100644 index 00000000..e805fca2 --- /dev/null +++ b/docs/reference/crystal-language-support-plan.md @@ -0,0 +1,426 @@ +# Crystal Language Support Plan + +This plan describes how to add first-class Crystal support to Probe. The goal is +parity with existing tree-sitter-backed languages: `search`, `extract`, +`symbols`, `query`, language filtering, source context, documentation, and +best-effort LSP daemon integration. + +## Current Feasibility + +- Crystal source files use the `.cr` extension. +- The Crystal language repository is `crystal-lang/crystal`. +- The current Crystal release observed during planning was `1.20.2`, published + on 2026-05-15. +- The likely tree-sitter grammar is + `https://github.com/crystal-lang-tools/tree-sitter-crystal`. +- `tree-sitter-crystal` has Rust bindings and tree-sitter metadata for `.cr` + files, but no GitHub release was available during planning. +- `cargo search tree-sitter-crystal --limit 5` returned no crates.io package in + this environment, so implementation should assume a pinned git dependency + unless crates.io availability changes. +- Probe currently uses `tree-sitter = "0.24.5"`, so the first implementation + task must verify grammar/runtime compatibility before broader wiring. + +## Branching + +Start from latest `main`, not from an active feature branch: + +```bash +git fetch origin +git checkout main +git pull --ff-only origin main +git checkout -b add-crystal-language-support +``` + +If the work starts while another PR branch is checked out, keep those changes +separate and do not commit Crystal work on top of the old branch. + +## Dependency Plan + +Add `tree-sitter-crystal` to both Rust crates that directly use tree-sitter +grammars: + +- `Cargo.toml` +- `lsp-daemon/Cargo.toml` + +Expected shape if no crates.io release exists: + +```toml +tree-sitter-crystal = { git = "https://github.com/crystal-lang-tools/tree-sitter-crystal", rev = "" } +``` + +Use a specific commit rather than a floating branch. After adding the dependency, +run at least: + +```bash +cargo check -p probe-code +cargo check -p lsp-daemon +``` + +If the grammar fails with a tree-sitter ABI/runtime mismatch, try an older +grammar commit before considering vendoring. Do not proceed with the full +integration until a stable dependency path is proven. + +Implementation note: `50ca9e6fcfb16a2cbcad59203cfd8ad650e25c49` built but +failed at runtime with tree-sitter language ABI 15. Pinning `f71f4ca62ac0` +keeps the Rust `LANGUAGE` binding and uses language ABI 14, which is compatible +with Probe's current tree-sitter runtime. + +## Core Language Integration + +Add a Crystal implementation: + +- `src/language/crystal.rs` +- `src/language/mod.rs` +- `src/language/factory.rs` + +The language module should follow the style of `src/language/solidity.rs` and +`src/language/ruby.rs`, because Crystal is syntactically Ruby-like but needs +explicit symbol support beyond Ruby's current minimal implementation. + +`CrystalLanguage::get_tree_sitter_language()` should return: + +```rust +tree_sitter_crystal::LANGUAGE.into() +``` + +Verify the exact exported symbol from the dependency. The upstream Rust binding +observed during planning exposes `LANGUAGE`. + +Candidate symbol node kinds from `tree-sitter-crystal`: + +- `class_def` +- `module_def` +- `struct_def` +- `enum_def` +- `method_def` +- `abstract_method_def` +- `macro_def` +- `lib_def` +- `fun_def` +- `alias` +- `annotation_def` +- `type_def` +- `union_def` + +Implement: + +- `is_acceptable_parent()` +- `is_symbol_node()` +- `is_test_node()` +- `find_parent_function()` +- `get_symbol_signature()` + +Recommended parent/function handling: + +- Treat `method_def`, `abstract_method_def`, `macro_def`, and `fun_def` as + function-like parents. +- Treat `class_def`, `module_def`, `struct_def`, `enum_def`, and `lib_def` as + containers. +- Include `alias`, `annotation_def`, `type_def`, and `union_def` as symbol + nodes even if they are not large extraction parents. + +Recommended test detection: + +- Test files: rely on existing file-level test directory and naming filters + where possible. +- Crystal spec files commonly use `_spec.cr`; ensure existing test-file + detection catches that extension pattern. +- Node-level tests should detect common spec DSL calls such as `describe`, + `context`, `it`, and `pending` where the grammar exposes call nodes clearly. + +Recommended signature handling: + +- For container nodes, return the declaration header and replace the body with + a compact form such as `class User ... end` or `module API ... end`. +- For method-like nodes, return the `def` or `macro` signature without the body. +- For aliases, annotations, type defs, and union defs, return the full one-line + declaration when possible. + +## Query Support + +Update `src/query.rs`. + +`ast-grep-language` may not include Crystal. If not, extend the existing local +wrapper pattern: + +```rust +enum ProbeQueryLang { + Builtin(SupportLang), + Solidity, + Crystal, +} +``` + +Then map: + +- `crystal` +- `cr` + +to `ProbeQueryLang::Crystal`, and return +`tree_sitter_crystal::LANGUAGE.into()` from `get_ts_language()`. + +Add `.cr` to query file extension matching and auto-detection. Verify both: + +```bash +cargo run -- query 'def active? : Bool' tests/fixtures/crystal/project1 --language crystal +cargo run -- query 'class User < Serializable' tests/fixtures/crystal/project1 +``` + +## Search and Extraction Wiring + +Add Crystal mappings wherever Probe maps languages to extensions or display +names: + +- `src/cli.rs` +- `src/main.rs` +- `src/semantic_context.rs` +- `src/search/filters.rs` +- `src/search/file_list_cache.rs` +- `src/search/results_formatter.rs` +- `src/search/search_output.rs` +- `src/extract/formatter.rs` + +Expected mappings: + +- Language names: `crystal`, `cr` +- Extension: `.cr` +- Syntax label: `crystal` +- Comment prefix: `#` + +Search language filters must work through both CLI option and query hints: + +```bash +cargo run -- search "HTTP::Server" tests/fixtures/crystal/project1 --language crystal --no-gitignore +cargo run -- search "HTTP::Server AND lang:crystal" tests/fixtures/crystal/project1 --no-gitignore +``` + +## LSP Daemon Integration + +Crystal LSP support should be best-effort and must not block tree-sitter support +unless the ticket specifically requires LSP behavior. + +Update: + +- `lsp-daemon/src/language_detector.rs` +- `lsp-daemon/src/lsp_registry.rs` +- `lsp-daemon/src/lsp_server.rs` +- `lsp-daemon/src/workspace_resolver.rs` +- `lsp-daemon/src/indexing/pipelines.rs` +- `lsp-daemon/src/indexing/lsp_enrichment_worker.rs` +- `lsp-daemon/src/lsp_database_adapter.rs` + +Add: + +- `Language::Crystal` +- `Language::Crystal.as_str() == "crystal"` +- `.cr` extension detection +- LSP `languageId = "crystal"` +- Workspace markers: `shard.yml`, `shard.lock` +- Pipeline extension list: `["cr"]` +- Tree-sitter parser map for `crystal` and `cr` + +LSP server candidates to evaluate: + +- `crystalline` +- `crystal-language-server` + +`crystalline` was observed as passively maintained and explicitly limited. Pick +the default only after checking installability and basic initialize/open +behavior. If neither server is reliable in this environment, document Crystal +as tree-sitter supported and LSP configurable by user override. + +## Documentation + +Update public docs after implementation: + +- `README.md` +- `docs/reference/supported-languages.md` +- `docs/reference/adding-languages.md` if the generic checklist changes +- `lsp-daemon/README.md` if a default Crystal LSP is added +- npm MCP/tool descriptions if they enumerate supported languages + +Documentation should state Crystal support covers `.cr` files and tree-sitter +AST extraction. Only claim LSP support if an LSP server was configured and +smoke-tested. + +## Test Fixtures + +Create a realistic Crystal fixture: + +```text +tests/fixtures/crystal/project1/ + shard.yml + src/ + server.cr + calculator.cr + models/user.cr + spec/ + calculator_spec.cr +``` + +Fixture should include: + +- `module` +- `class` +- `struct` +- `enum` +- instance method +- class method +- abstract method +- macro +- alias +- annotation or annotation definition if the grammar handles it cleanly +- `lib`/`fun` declaration if practical +- spec DSL calls in `_spec.cr` + +Add a test file such as `tests/crystal_language_tests.rs`, mirroring +`tests/solidity_language_tests.rs`. + +Minimum regression tests: + +- `extract_symbols()` returns top-level modules/classes and nested methods. +- `process_file_for_extraction(..., Some("symbol_name"), ...)` extracts a + Crystal method without pulling unrelated methods. +- `perform_query()` supports `--language crystal`. +- `perform_probe()` with `language: Some("crystal")` returns only `.cr` files. +- Test exclusion skips `_spec.cr` when `allow_tests` is false. +- Source context reports `"language": "crystal"` for `.cr` files. +- Language aliases normalize `cr` to `crystal`. + +## Required Real-Repository Dogfood + +Testing on a real Crystal project is required before raising the PR. Use the +official Crystal compiler repository as the primary dogfood target: + +- `https://github.com/crystal-lang/crystal` + +Run this after fixture and focused unit tests pass: + +```bash +tmpdir=$(mktemp -d) +git clone --depth 1 https://github.com/crystal-lang/crystal "$tmpdir/crystal" +``` + +Run representative commands against that checkout: + +```bash +cargo run -- symbols "$tmpdir/crystal/src/compiler/crystal/compiler.cr" +cargo run -- query 'def run' "$tmpdir/crystal/src" --language crystal --max-results 20 +cargo run -- search "SemanticVisitor" "$tmpdir/crystal/src" --language crystal --max-results 20 --no-gitignore +cargo run -- extract "$tmpdir/crystal/src/compiler/crystal/compiler.cr#compile" +``` + +Adjust exact files and symbol names based on the current upstream tree, but do +not replace this with only synthetic fixtures. At minimum, verify `symbols`, +`extract`, `query`, and `search --language crystal` on files from +`crystal-lang/crystal`. Save the successful command output summaries for the PR +body. + +Current branch verification on an up-to-date local `crystal-lang/crystal` +checkout: + +- `probe symbols src/compiler/crystal/compiler.cr` extracted `module Crystal`, + `class Compiler`, enums, nested `CompilationUnit`, and `compile` methods. +- `probe query 'def compile' src/compiler/crystal --language crystal --format json` + returned 4 method matches across `command.cr`, `compiler.cr`, and + `interpreter/compiler.cr`. +- `probe query 'def compile' src/compiler/crystal --language cr --max-results 3 --with-context --format json` + accepted the `cr` alias and returned Crystal query context metadata. +- `probe query 'class Compiler' src/compiler/crystal/compiler.cr --format json` + auto-detected `.cr` and returned the full `class Compiler` block. +- `probe search 'Crystal::System::Dir AND lang:crystal' . --no-gitignore --max-results 5` + parsed `Crystal::System::Dir` as a namespaced term and returned only `.cr` + files. +- `probe search '"Crystal::System::Dir" AND lang:crystal' . --strict-elastic-syntax --max-results 3 --no-gitignore` + verified strict syntax works for quoted Crystal namespaced constants. +- `probe search 'describe AND lang:crystal' spec --max-results 5 --no-gitignore --format json` + returned zero results by default, while adding `--allow-tests --max-bytes 700 --max-tokens 250` + returned Crystal spec blocks within the requested limits. +- `probe extract src/compiler/crystal/compiler.cr#compile --format plain` + found both `compile` method definitions in `compiler.cr`. +- `probe extract src/compiler/crystal/compiler.cr:228 --format plain` + extracted the enclosing `compile` method from a line target. +- `probe extract src/compiler/crystal/compiler.cr#compile --dry-run --format plain` + reported both matching method ranges without returning code. + +Current branch LSP verification: + +- `cargo test -p lsp-daemon test_crystal_parser_pool_and_node_mapping` + verified Crystal parser pool creation for `crystal` and `cr`. +- `cargo test -p lsp-daemon test_crystal_symbol_extraction_uses_parser_pool` + verified the tree-sitter analyzer extracts Crystal module, class, and method + names instead of keyword tokens. +- `cargo test -p lsp-daemon test_find_symbol_at_position_uses_crystal_tree_sitter` + verified Crystal `find_symbol_at_position()` resolution for both `crystal` + and `cr`. +- Crystal LSP tool version checks could not run in this environment because the + tools are not installed: `crystalline --version`, + `crystal-language-server --version`, and `crystal --version` all failed with + `command not found`. + +## Verification Checklist + +Run focused checks first: + +```bash +cargo fmt --all -- --check +cargo test --test crystal_language_tests +cargo test query::tests::test_crystal_query_support +cargo test search::filters::tests::test_normalize_language_names +``` + +Then run broader checks appropriate to the touched surfaces: + +```bash +cargo check --workspace +cargo test --test integration_tests +cargo test --test symbols_tests +cargo test --test query_command_tests +cargo test --test query_command_json_tests +``` + +If LSP daemon files are changed, add: + +```bash +cargo test -p lsp-daemon language_detector +cargo test -p lsp-daemon lsp_registry +cargo test -p lsp-daemon test_crystal_parser_pool_and_node_mapping +cargo test -p lsp-daemon test_crystal_symbol_extraction_uses_parser_pool +cargo test -p lsp-daemon test_find_symbol_at_position_uses_crystal_tree_sitter +crystalline --version +crystal-language-server --version +crystal --version +``` + +Use exact test names after implementation, because existing test module names +may differ. + +## PR Criteria + +The PR should not be raised until all of these are true: + +- Crystal dependency is pinned and compatible with Probe's tree-sitter runtime. +- `.cr` files are parsed through the normal parser pool. +- `probe symbols` works on fixture and real Crystal files. +- `probe extract` works by symbol name and line target. +- `probe query --language crystal` works through ast-grep. +- `probe search --language crystal` and `lang:crystal` filtering work. +- Test exclusion handles Crystal spec files. +- LSP daemon maps recognize Crystal, or the PR explicitly states LSP is + configurable/out of scope. +- Docs and supported-language lists are updated. +- Real dogfood on `https://github.com/crystal-lang/crystal` has been run and is + included in the PR description. + +## Known Risks + +- `tree-sitter-crystal` may require pinning an exact git commit because it has + no release in the observed repository state. +- The grammar may use a `tree-sitter-language` binding style that must be + checked against Probe's existing `LANGUAGE.into()` pattern. +- Crystal LSP options may not provide full call hierarchy or reference + behavior. Avoid claiming full LSP feature parity without a live smoke test. +- Crystal macros and Ruby-like DSL calls may produce broad AST nodes. Keep + extraction tests realistic so Probe returns useful blocks rather than entire + files. diff --git a/docs/reference/supported-languages.md b/docs/reference/supported-languages.md index b76d8783..5cc09ba3 100644 --- a/docs/reference/supported-languages.md +++ b/docs/reference/supported-languages.md @@ -19,6 +19,7 @@ Probe provides language-aware code search and extraction for a wide range of pro | PHP | `.php` | ✅ | ✅ | | Swift | `.swift` | ✅ | ✅ | | Solidity | `.sol` | ✅ | ✅ | +| Crystal | `.cr` | ✅ | ✅ | | C# | `.cs` | ✅ | ✅ | | Markdown | `.md`, `.markdown` | ✅ | ✅ | | YAML | `.yaml`, `.yml` | ✅ | ✅ | @@ -137,6 +138,14 @@ Go also implements special handling for nested struct types. - **Comment Handling**: Associates NatSpec and regular comments with Solidity code blocks - **Test Detection**: Identifies Foundry-style `.t.sol` files, `*Test.sol` contracts, `setUp`, `test*`, and `invariant_*` functions +### Crystal + +- **Module/Class Extraction**: Extracts modules, classes, structs, enums, libraries, and unions +- **Method Extraction**: Extracts instance methods, class methods, abstract methods, macros, and C binding functions +- **Type Extraction**: Extracts aliases, type definitions, and annotation definitions +- **Comment Handling**: Uses Crystal's `#` comments when formatting contextual output +- **Test Detection**: Identifies Crystal spec files such as `*_spec.cr` and common spec DSL blocks + ### Markdown - **Section Extraction**: Extracts complete sections based on headings diff --git a/lsp-daemon/Cargo.toml b/lsp-daemon/Cargo.toml index 1f4880d2..6665b1b3 100644 --- a/lsp-daemon/Cargo.toml +++ b/lsp-daemon/Cargo.toml @@ -82,6 +82,7 @@ tree-sitter-ruby = "0.23.1" tree-sitter-swift = "0.7.0" tree-sitter-php = "0.23.11" tree-sitter-solidity = "=1.2.10" +tree-sitter-crystal = { git = "https://github.com/crystal-lang-tools/tree-sitter-crystal", rev = "f71f4ca62ac0" } [target.'cfg(unix)'.dependencies] libc = "0.2" diff --git a/lsp-daemon/README.md b/lsp-daemon/README.md index 5dd621ed..2f31b71a 100644 --- a/lsp-daemon/README.md +++ b/lsp-daemon/README.md @@ -230,6 +230,7 @@ class LspDaemonClient: | Ruby | solargraph | ✅ Configured | | PHP | intelephense | ✅ Configured | | Swift | sourcekit-lsp | ✅ Configured | +| Crystal | crystalline | ⚠️ Configured; install required | | Kotlin | kotlin-language-server | ✅ Configured | | Scala | metals | ✅ Configured | | Haskell | haskell-language-server | ✅ Configured | @@ -318,4 +319,4 @@ Contributions are welcome! Please see the main probe repository for contribution ## 📄 License -MIT - See LICENSE file in the repository root \ No newline at end of file +MIT - See LICENSE file in the repository root diff --git a/lsp-daemon/src/analyzer/tree_sitter_analyzer.rs b/lsp-daemon/src/analyzer/tree_sitter_analyzer.rs index 03bbfd80..a6d6d9e1 100644 --- a/lsp-daemon/src/analyzer/tree_sitter_analyzer.rs +++ b/lsp-daemon/src/analyzer/tree_sitter_analyzer.rs @@ -32,6 +32,7 @@ fn extension_to_language_name(extension: &str) -> Option<&'static str> { "swift" => Some("swift"), "cs" => Some("csharp"), "sol" => Some("solidity"), + "cr" => Some("crystal"), _ => None, } } @@ -100,6 +101,7 @@ impl ParserPool { "c" => Some(tree_sitter_c::LANGUAGE), "cpp" | "c++" | "cxx" => Some(tree_sitter_cpp::LANGUAGE), "solidity" | "sol" => Some(tree_sitter_solidity::LANGUAGE), + "crystal" | "cr" => Some(tree_sitter_crystal::LANGUAGE), _ => None, }; @@ -447,6 +449,7 @@ impl TreeSitterAnalyzer { "go" => self.map_go_node_to_symbol(node_kind), "java" => self.map_java_node_to_symbol(node_kind), "c" | "cpp" | "c++" => self.map_c_node_to_symbol(node_kind), + "crystal" | "cr" => self.map_crystal_node_to_symbol(node_kind), _ => self.map_generic_node_to_symbol(node_kind), }; @@ -574,6 +577,21 @@ impl TreeSitterAnalyzer { } } + /// Map Crystal node kinds to symbol kinds + fn map_crystal_node_to_symbol(&self, node_kind: &str) -> Option { + match node_kind { + "method_def" | "abstract_method_def" | "fun_def" => Some(SymbolKind::Function), + "macro_def" => Some(SymbolKind::Macro), + "class_def" => Some(SymbolKind::Class), + "module_def" => Some(SymbolKind::Module), + "struct_def" => Some(SymbolKind::Struct), + "enum_def" => Some(SymbolKind::Enum), + "lib_def" => Some(SymbolKind::Interface), + "alias" | "annotation_def" | "type_def" | "union_def" => Some(SymbolKind::Type), + _ => None, + } + } + /// Generic node mapping for unknown languages fn map_generic_node_to_symbol(&self, node_kind: &str) -> Option { if node_kind.contains("function") { @@ -616,6 +634,7 @@ impl TreeSitterAnalyzer { | "class_name" | "module_name" | "parameter_name" + | "constant" ) { let start_byte = child.start_byte(); let end_byte = child.end_byte(); @@ -627,14 +646,19 @@ impl TreeSitterAnalyzer { message: format!("Invalid UTF-8 in symbol name: {}", e), } })?; - return Ok(name.to_string()); + if !self.is_keyword_or_invalid(name) { + return Ok(name.to_string()); + } } } // Recursively search in nested nodes for complex patterns if let Ok(nested_name) = self.extract_symbol_name(child, content) { if !nested_name.is_empty() - && nested_name.chars().all(|c| c.is_alphanumeric() || c == '_') + && !self.is_keyword_or_invalid(&nested_name) + && nested_name + .chars() + .all(|c| c.is_alphanumeric() || c == '_' || c == '?' || c == '!') { return Ok(nested_name); } @@ -676,11 +700,14 @@ impl TreeSitterAnalyzer { text.split_whitespace() .find(|word| { !word.is_empty() + && !self.is_keyword_or_invalid(word) && word .chars() .next() .map_or(false, |c| c.is_alphabetic() || c == '_') - && word.chars().all(|c| c.is_alphanumeric() || c == '_') + && word.chars().all(|c| { + c.is_alphanumeric() || c == '_' || c == '?' || c == '!' + }) }) .unwrap_or("") .to_string() @@ -695,6 +722,52 @@ impl TreeSitterAnalyzer { Ok(String::new()) } + /// Check if text is a keyword or invalid identifier. + fn is_keyword_or_invalid(&self, text: &str) -> bool { + matches!( + text, + "function" + | "fn" + | "def" + | "class" + | "struct" + | "enum" + | "trait" + | "interface" + | "impl" + | "mod" + | "module" + | "namespace" + | "package" + | "import" + | "export" + | "const" + | "let" + | "var" + | "static" + | "async" + | "await" + | "return" + | "if" + | "else" + | "for" + | "while" + | "match" + | "switch" + | "case" + | "default" + | "break" + | "continue" + | "pub" + | "private" + | "protected" + | "public" + | "override" + | "virtual" + | "abstract" + ) || text.is_empty() + } + /// Extract function signature from AST node fn extract_function_signature( &self, @@ -798,6 +871,19 @@ impl TreeSitterAnalyzer { | "function_definition" | "compound_statement" ), + "crystal" | "cr" => matches!( + node_kind, + "class_def" + | "module_def" + | "struct_def" + | "enum_def" + | "lib_def" + | "union_def" + | "method_def" + | "abstract_method_def" + | "macro_def" + | "fun_def" + ), _ => false, } } @@ -942,6 +1028,7 @@ impl CodeAnalyzer for TreeSitterAnalyzer { "java".to_string(), "c".to_string(), "cpp".to_string(), + "crystal".to_string(), ] } @@ -1009,7 +1096,7 @@ impl CodeAnalyzer for TreeSitterAnalyzer { mod tests { use super::*; use crate::symbol::SymbolUIDGenerator; - use std::path::PathBuf; + use std::path::{Path, PathBuf}; fn create_test_analyzer() -> TreeSitterAnalyzer { let uid_generator = Arc::new(SymbolUIDGenerator::new()); @@ -1092,6 +1179,95 @@ mod tests { assert_eq!(analyzer.map_typescript_node_to_symbol("unknown_node"), None); } + #[test] + fn test_crystal_parser_pool_and_node_mapping() { + let analyzer = create_test_analyzer(); + let mut pool = ParserPool::new(); + + assert!( + pool.get_parser("crystal").is_some(), + "Crystal parser should be available by language name" + ); + assert!( + pool.get_parser("cr").is_some(), + "Crystal parser should be available by extension alias" + ); + assert_eq!( + analyzer.map_crystal_node_to_symbol("class_def"), + Some(SymbolKind::Class) + ); + assert_eq!( + analyzer.map_crystal_node_to_symbol("module_def"), + Some(SymbolKind::Module) + ); + assert_eq!( + analyzer.map_crystal_node_to_symbol("method_def"), + Some(SymbolKind::Function) + ); + assert_eq!( + analyzer.map_crystal_node_to_symbol("macro_def"), + Some(SymbolKind::Macro) + ); + assert!(analyzer.creates_scope("class_def", "crystal")); + assert!(analyzer.creates_scope("method_def", "cr")); + } + + #[tokio::test] + async fn test_crystal_symbol_extraction_uses_parser_pool() { + let analyzer = create_test_analyzer(); + let uid_generator = Arc::new(SymbolUIDGenerator::new()); + let context = AnalysisContext::new( + 1, + 2, + "crystal".to_string(), + PathBuf::from("."), + PathBuf::from("sample.cr"), + uid_generator, + ); + let crystal_code = r#" +module Demo + class User + def active? : Bool + true + end + end +end +"#; + + let result = analyzer + .analyze_file(crystal_code, Path::new("sample.cr"), "crystal", &context) + .await + .expect("Crystal analysis should use the parser pool"); + + let symbols = result + .symbols + .iter() + .map(|symbol| format!("{}:{:?}", symbol.name, symbol.kind)) + .collect::>(); + + assert!( + result + .symbols + .iter() + .any(|symbol| symbol.name == "Demo" && symbol.kind == SymbolKind::Module), + "expected Demo module in symbols: {symbols:?}" + ); + assert!( + result + .symbols + .iter() + .any(|symbol| symbol.name == "User" && symbol.kind == SymbolKind::Class), + "expected User class in symbols: {symbols:?}" + ); + assert!( + result + .symbols + .iter() + .any(|symbol| symbol.name == "active?" && symbol.kind == SymbolKind::Function), + "expected active? method in symbols: {symbols:?}" + ); + } + #[test] fn test_function_signature_cleaning() { let analyzer = create_test_analyzer(); diff --git a/lsp-daemon/src/daemon.rs b/lsp-daemon/src/daemon.rs index 85e6bba5..4bb02851 100644 --- a/lsp-daemon/src/daemon.rs +++ b/lsp-daemon/src/daemon.rs @@ -7223,6 +7223,12 @@ impl LspDaemon { .ok()?; Some(()) } + "cr" => { + parser + .set_language(&tree_sitter_crystal::LANGUAGE.into()) + .ok()?; + Some(()) + } _ => None, }?; diff --git a/lsp-daemon/src/fqn.rs b/lsp-daemon/src/fqn.rs index 4a0eef0e..13c2a28c 100644 --- a/lsp-daemon/src/fqn.rs +++ b/lsp-daemon/src/fqn.rs @@ -45,6 +45,7 @@ pub fn get_fqn_from_ast_with_content( "c" => Some(tree_sitter_c::LANGUAGE), "cpp" | "cc" | "cxx" => Some(tree_sitter_cpp::LANGUAGE), "sol" => Some(tree_sitter_solidity::LANGUAGE), + "cr" => Some(tree_sitter_crystal::LANGUAGE), _ => None, }; @@ -102,6 +103,8 @@ fn language_to_extension(language: &str) -> Option<&'static str> { "go" => Some("go"), "c" => Some("c"), "cpp" | "c++" | "cxx" => Some("cpp"), + "solidity" | "sol" => Some("sol"), + "crystal" | "cr" => Some("cr"), _ => None, } } @@ -362,6 +365,7 @@ fn get_language_separator(extension: &str) -> &str { match extension { "rs" | "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "rb" => "::", "py" | "js" | "ts" | "jsx" | "tsx" | "java" | "go" | "cs" | "sol" => ".", + "cr" => "::", "php" => "\\", _ => "::", // Default to Rust-style for unknown languages } @@ -389,6 +393,10 @@ fn is_method_node(node: &tree_sitter::Node, extension: &str) -> bool { | "modifier_definition" | "fallback_receive_definition" ), + "cr" => matches!( + kind, + "method_def" | "abstract_method_def" | "macro_def" | "fun_def" + ), _ => kind.contains("function") || kind.contains("method"), } } @@ -419,6 +427,10 @@ fn is_namespace_node(node: &tree_sitter::Node, extension: &str) -> bool { | "struct_declaration" | "enum_declaration" ), + "cr" => matches!( + kind, + "class_def" | "module_def" | "struct_def" | "enum_def" | "lib_def" | "union_def" + ), _ => { // Fallback for unknown languages: try to detect common node types kind.contains("class") || kind.contains("struct") || kind.contains("namespace") diff --git a/lsp-daemon/src/indexing/ast_extractor.rs b/lsp-daemon/src/indexing/ast_extractor.rs index 24a50abb..75d8e26f 100644 --- a/lsp-daemon/src/indexing/ast_extractor.rs +++ b/lsp-daemon/src/indexing/ast_extractor.rs @@ -700,6 +700,7 @@ impl AstSymbolExtractor { crate::language_detector::Language::Solidity => { Ok(tree_sitter_solidity::LANGUAGE.into()) } + crate::language_detector::Language::Crystal => Ok(tree_sitter_crystal::LANGUAGE.into()), _ => Err(anyhow::anyhow!("Unsupported language: {:?}", language)), } } @@ -851,6 +852,18 @@ impl AstSymbolExtractor { "user_defined_type_definition" => (SymbolKind::Type, true), _ => (SymbolKind::Function, false), }, + crate::language_detector::Language::Crystal => match node_kind { + "method_def" | "abstract_method_def" | "macro_def" | "fun_def" => { + (SymbolKind::Function, true) + } + "class_def" => (SymbolKind::Class, true), + "module_def" => (SymbolKind::Module, true), + "struct_def" => (SymbolKind::Struct, true), + "enum_def" => (SymbolKind::Enum, true), + "lib_def" => (SymbolKind::Interface, true), + "alias" | "annotation_def" | "type_def" | "union_def" => (SymbolKind::Type, true), + _ => (SymbolKind::Function, false), + }, _ => { // For other languages, try some common patterns match node_kind { @@ -935,12 +948,21 @@ impl AstSymbolExtractor { /// Extract symbol name from a tree-sitter node fn extract_symbol_name(&self, node: tree_sitter::Node, content: &[u8]) -> Option { + if let Some(name_node) = node.child_by_field_name("name") { + if let Ok(text) = name_node.utf8_text(content) { + let trimmed = text.trim(); + if !trimmed.is_empty() { + return Some(trimmed.to_string()); + } + } + } + let mut cursor = node.walk(); // Look for identifier nodes in the children for child in node.children(&mut cursor) { match child.kind() { - "identifier" | "type_identifier" | "field_identifier" => { + "identifier" | "type_identifier" | "field_identifier" | "constant" => { let name = child.utf8_text(content).unwrap_or(""); if !name.is_empty() { return Some(name.to_string()); diff --git a/lsp-daemon/src/indexing/config.rs b/lsp-daemon/src/indexing/config.rs index ebed2a15..52f8b5d4 100644 --- a/lsp-daemon/src/indexing/config.rs +++ b/lsp-daemon/src/indexing/config.rs @@ -1402,6 +1402,7 @@ fn load_language_configs_from_env() -> Result Result { + features.set_language_feature("extract_macros".to_string(), true); + features.set_language_feature("extract_modules".to_string(), true); + } _ => {} } @@ -1547,6 +1552,7 @@ fn default_extensions_for_language(language: Language) -> Vec { "hxx".to_string(), ], Language::Solidity => vec!["sol".to_string()], + Language::Crystal => vec!["cr".to_string()], _ => vec![], } } @@ -1565,6 +1571,7 @@ impl FromStr for Language { "c" => Ok(Language::C), "cpp" | "c++" => Ok(Language::Cpp), "solidity" | "sol" => Ok(Language::Solidity), + "crystal" | "cr" => Ok(Language::Crystal), _ => Err(anyhow!("Unknown language: {}", s)), } } diff --git a/lsp-daemon/src/indexing/file_detector.rs b/lsp-daemon/src/indexing/file_detector.rs index 446aaf17..fe8dd4a0 100644 --- a/lsp-daemon/src/indexing/file_detector.rs +++ b/lsp-daemon/src/indexing/file_detector.rs @@ -123,6 +123,7 @@ impl Default for DetectionConfig { "php", "swift", "sol", + "cr", "cs", "kt", "scala", @@ -385,7 +386,7 @@ impl FileChangeDetector { // Check if this extension is supported based on our known languages let supported_languages = [ "rs", "js", "jsx", "ts", "tsx", "py", "go", "c", "h", "cpp", "cc", "cxx", "hpp", - "hxx", "java", "rb", "php", "swift", "sol", "cs", "kt", "scala", "clj", "ex", + "hxx", "java", "rb", "php", "swift", "sol", "cr", "cs", "kt", "scala", "clj", "ex", "exs", "erl", "hrl", "hs", "lhs", "ml", "mli", "fs", "fsx", "fsi", "dart", "jl", "r", "R", "m", "mm", "pl", "pm", "sh", "bash", "zsh", "fish", "lua", "vim", "sql", ]; diff --git a/lsp-daemon/src/indexing/lsp_enrichment_worker.rs b/lsp-daemon/src/indexing/lsp_enrichment_worker.rs index 16a1ab38..d1dae439 100644 --- a/lsp-daemon/src/indexing/lsp_enrichment_worker.rs +++ b/lsp-daemon/src/indexing/lsp_enrichment_worker.rs @@ -2614,6 +2614,7 @@ impl LspEnrichmentWorkerPool { "rb" => Language::Ruby, "php" => Language::Php, "swift" => Language::Swift, + "cr" => Language::Crystal, "kt" => Language::Kotlin, "scala" => Language::Scala, "hs" => Language::Haskell, diff --git a/lsp-daemon/src/indexing/pipelines.rs b/lsp-daemon/src/indexing/pipelines.rs index b7440540..891aef93 100644 --- a/lsp-daemon/src/indexing/pipelines.rs +++ b/lsp-daemon/src/indexing/pipelines.rs @@ -81,6 +81,12 @@ impl PipelineConfig { features.set_language_feature("extract_receivers".to_string(), true); (vec!["go".to_string()], features) } + Language::Crystal => { + let mut features = IndexingFeatures::default(); + features.set_language_feature("extract_macros".to_string(), true); + features.set_language_feature("extract_modules".to_string(), true); + (vec!["cr".to_string()], features) + } Language::Java => { let mut features = IndexingFeatures::default(); features.set_language_feature("extract_annotations".to_string(), true); diff --git a/lsp-daemon/src/language_detector.rs b/lsp-daemon/src/language_detector.rs index 7497c66e..d9c3f95e 100644 --- a/lsp-daemon/src/language_detector.rs +++ b/lsp-daemon/src/language_detector.rs @@ -21,6 +21,7 @@ pub enum Language { Php, Swift, Solidity, + Crystal, Kotlin, Scala, Haskell, @@ -47,6 +48,7 @@ impl Language { Language::Php => "php", Language::Swift => "swift", Language::Solidity => "solidity", + Language::Crystal => "crystal", Language::Kotlin => "kotlin", Language::Scala => "scala", Language::Haskell => "haskell", @@ -75,6 +77,7 @@ impl Language { "php" => Some(Language::Php), "swift" => Some(Language::Swift), "solidity" | "sol" => Some(Language::Solidity), + "crystal" | "cr" => Some(Language::Crystal), "kotlin" => Some(Language::Kotlin), "scala" => Some(Language::Scala), "haskell" => Some(Language::Haskell), @@ -152,6 +155,9 @@ impl LanguageDetector { // Solidity extension_map.insert("sol".to_string(), Language::Solidity); + // Crystal + extension_map.insert("cr".to_string(), Language::Crystal); + // Kotlin extension_map.insert("kt".to_string(), Language::Kotlin); extension_map.insert("kts".to_string(), Language::Kotlin); diff --git a/lsp-daemon/src/lsp_database_adapter.rs b/lsp-daemon/src/lsp_database_adapter.rs index c0f29871..9bf22bcb 100644 --- a/lsp-daemon/src/lsp_database_adapter.rs +++ b/lsp-daemon/src/lsp_database_adapter.rs @@ -826,6 +826,10 @@ impl LspDatabaseAdapter { debug!("[TREE_SITTER] Using tree-sitter-solidity"); Some(tree_sitter_solidity::LANGUAGE.into()) } + "crystal" | "cr" => { + debug!("[TREE_SITTER] Using tree-sitter-crystal"); + Some(tree_sitter_crystal::LANGUAGE.into()) + } "php" => { debug!("[TREE_SITTER] Using tree-sitter-php"); Some(tree_sitter_php::LANGUAGE_PHP.into()) @@ -954,6 +958,10 @@ impl LspDatabaseAdapter { "constructor_declaration" => true, // C/C++ symbols (function_declarator is unique to C/C++) "function_declarator" | "struct_specifier" | "enum_specifier" => true, + // Crystal symbols + "class_def" | "module_def" | "struct_def" | "enum_def" | "lib_def" | "union_def" + | "method_def" | "abstract_method_def" | "macro_def" | "fun_def" | "alias" + | "annotation_def" | "type_def" => true, _ => false, } } @@ -1173,7 +1181,11 @@ impl LspDatabaseAdapter { fn is_identifier_node(&self, node: &tree_sitter::Node) -> bool { matches!( node.kind(), - "identifier" | "type_identifier" | "field_identifier" | "property_identifier" + "identifier" + | "type_identifier" + | "field_identifier" + | "property_identifier" + | "constant" ) } @@ -1230,16 +1242,25 @@ impl LspDatabaseAdapter { | "function_declaration" | "function_definition" | "func_declaration" => SymbolKind::Function, - "method_definition" | "method_declaration" => SymbolKind::Method, + "method_definition" | "method_declaration" | "method_def" | "abstract_method_def" => { + SymbolKind::Method + } + "macro_def" => SymbolKind::Macro, + "fun_def" => SymbolKind::Function, "constructor_declaration" => SymbolKind::Constructor, - "class_declaration" | "class_definition" => SymbolKind::Class, - "struct_item" | "struct_specifier" => SymbolKind::Struct, - "enum_item" | "enum_specifier" | "enum_declaration" => SymbolKind::Enum, + "class_declaration" | "class_definition" | "class_def" => SymbolKind::Class, + "struct_item" | "struct_specifier" | "struct_def" => SymbolKind::Struct, + "enum_item" | "enum_specifier" | "enum_declaration" | "enum_def" => SymbolKind::Enum, "trait_item" => SymbolKind::Trait, - "interface_declaration" => SymbolKind::Interface, + "interface_declaration" | "lib_def" => SymbolKind::Interface, "impl_item" => SymbolKind::Impl, - "mod_item" | "namespace" => SymbolKind::Module, - "type_declaration" | "type_alias_declaration" => SymbolKind::Type, + "mod_item" | "namespace" | "module_def" => SymbolKind::Module, + "type_declaration" + | "type_alias_declaration" + | "alias" + | "annotation_def" + | "type_def" + | "union_def" => SymbolKind::Type, "variable_declarator" | "variable_declaration" => SymbolKind::Variable, "field_declaration" => SymbolKind::Field, _ => SymbolKind::Function, // Default fallback @@ -2304,6 +2325,7 @@ impl LspDatabaseAdapter { "go" => "go", "c++" | "cpp" => "cpp", "c" => "c", + "crystal" => "cr", _ => language, // Fallback to original if no mapping } } @@ -2405,6 +2427,7 @@ impl LspDatabaseAdapter { match extension { "rs" | "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "rb" => "::", "py" | "js" | "ts" | "jsx" | "tsx" | "java" | "go" | "cs" | "sol" => ".", + "cr" => "::", "php" => "\\", _ => "::", // Default to Rust-style for unknown languages } @@ -2430,6 +2453,10 @@ impl LspDatabaseAdapter { | "modifier_definition" | "fallback_receive_definition" ), + "cr" => matches!( + kind, + "method_def" | "abstract_method_def" | "macro_def" | "fun_def" + ), _ => kind.contains("function") || kind.contains("method"), } } @@ -2461,6 +2488,10 @@ impl LspDatabaseAdapter { | "struct_declaration" | "enum_declaration" ), + "cr" => matches!( + kind, + "class_def" | "module_def" | "struct_def" | "enum_def" | "lib_def" | "union_def" + ), _ => { kind.contains("class") || kind.contains("struct") @@ -2472,10 +2503,19 @@ impl LspDatabaseAdapter { /// Extract name from a tree-sitter node fn extract_node_name(node: tree_sitter::Node, content: &[u8]) -> Option { + if let Some(name_node) = node.child_by_field_name("name") { + if let Ok(text) = name_node.utf8_text(content) { + let trimmed = text.trim(); + if !trimmed.is_empty() { + return Some(trimmed.to_string()); + } + } + } + // Try to find identifier child node let mut cursor = node.walk(); for child in node.children(&mut cursor) { - if child.kind() == "identifier" || child.kind() == "name" { + if matches!(child.kind(), "identifier" | "name" | "constant") { return Some(child.utf8_text(content).unwrap_or("").to_string()); } } @@ -2769,6 +2809,35 @@ mod tests { path } + #[test] + fn test_find_symbol_at_position_uses_crystal_tree_sitter() { + let adapter = create_test_adapter(); + let crystal_code = r#" +module Demo + class User + def active? : Bool + true + end + end +end +"#; + let file_path = PathBuf::from("sample.cr"); + + let class_symbol = adapter + .find_symbol_at_position(crystal_code, &file_path, 2, 10, "crystal") + .expect("Crystal tree-sitter symbol lookup should parse") + .expect("class position should resolve to a Crystal symbol"); + assert_eq!(class_symbol.name, "User"); + assert_eq!(class_symbol.kind, SymbolKind::Class); + + let method_symbol = adapter + .find_symbol_at_position(crystal_code, &file_path, 4, 6, "cr") + .expect("Crystal alias should select the tree-sitter parser") + .expect("method body position should resolve to enclosing method"); + assert_eq!(method_symbol.name, "active?"); + assert_eq!(method_symbol.kind, SymbolKind::Method); + } + #[tokio::test] async fn test_resolve_symbol_at_location_rust_function() { let adapter = LspDatabaseAdapter::new(); diff --git a/lsp-daemon/src/lsp_registry.rs b/lsp-daemon/src/lsp_registry.rs index ab4b854b..d113542b 100644 --- a/lsp-daemon/src/lsp_registry.rs +++ b/lsp-daemon/src/lsp_registry.rs @@ -295,6 +295,21 @@ impl LspRegistry { }, }); + // Crystal + self.register(LspServerConfig { + language: Language::Crystal, + command: "crystalline".to_string(), + args: vec![], + initialization_options: None, + root_markers: vec!["shard.yml".to_string(), "shard.lock".to_string()], + initialization_timeout_secs: 30, + capabilities: LspServerCapabilities { + call_hierarchy: false, + references: true, + implementations: false, + }, + }); + // Kotlin self.register(LspServerConfig { language: Language::Kotlin, @@ -504,6 +519,7 @@ impl LspRegistry { "php" => Language::Php, "swift" => Language::Swift, "solidity" => Language::Solidity, + "crystal" => Language::Crystal, "kotlin" => Language::Kotlin, "scala" => Language::Scala, "haskell" => Language::Haskell, diff --git a/lsp-daemon/src/lsp_server.rs b/lsp-daemon/src/lsp_server.rs index 95865d73..7fe81071 100644 --- a/lsp-daemon/src/lsp_server.rs +++ b/lsp-daemon/src/lsp_server.rs @@ -2346,6 +2346,7 @@ impl LspServer { Some("php") => "php", Some("swift") => "swift", Some("sol") => "solidity", + Some("cr") => "crystal", Some("kt") | Some("kts") => "kotlin", Some("scala") | Some("sc") => "scala", Some("hs") => "haskell", diff --git a/lsp-daemon/src/relationship/tree_sitter_extractor.rs b/lsp-daemon/src/relationship/tree_sitter_extractor.rs index 3893749d..918f9070 100644 --- a/lsp-daemon/src/relationship/tree_sitter_extractor.rs +++ b/lsp-daemon/src/relationship/tree_sitter_extractor.rs @@ -68,6 +68,7 @@ impl RelationshipParserPool { "c" => Some(tree_sitter_c::LANGUAGE), "cpp" | "c++" | "cxx" => Some(tree_sitter_cpp::LANGUAGE), "solidity" | "sol" => Some(tree_sitter_solidity::LANGUAGE), + "crystal" | "cr" => Some(tree_sitter_crystal::LANGUAGE), _ => None, }; diff --git a/lsp-daemon/src/symbol/language_support.rs b/lsp-daemon/src/symbol/language_support.rs index 4b73faf9..55e7ab79 100644 --- a/lsp-daemon/src/symbol/language_support.rs +++ b/lsp-daemon/src/symbol/language_support.rs @@ -331,6 +331,36 @@ impl LanguageRules { } } + /// Create rules for Crystal + pub fn crystal() -> Self { + Self { + scope_separator: "::".to_string(), + anonymous_prefix: "anon".to_string(), + supports_overloading: false, + case_sensitive: true, + signature_normalization: SignatureNormalization::RemoveParameterNames, + visibility_affects_uid: false, + default_visibility: "public".to_string(), + file_extensions: vec!["cr".to_string()], + signature_keywords: vec![ + "module".to_string(), + "class".to_string(), + "struct".to_string(), + "enum".to_string(), + "def".to_string(), + "macro".to_string(), + "abstract".to_string(), + "private".to_string(), + "protected".to_string(), + "alias".to_string(), + "annotation".to_string(), + "lib".to_string(), + "fun".to_string(), + ], + type_aliases: vec![], + } + } + /// Check if this language supports a specific feature pub fn supports_feature(&self, feature: &str) -> bool { match feature { @@ -521,6 +551,7 @@ impl LanguageRulesFactory { "c" => Some(LanguageRules::c()), "cpp" | "c++" | "cxx" => Some(LanguageRules::cpp()), "solidity" | "sol" => Some(LanguageRules::solidity()), + "crystal" | "cr" => Some(LanguageRules::crystal()), _ => None, } } @@ -537,6 +568,7 @@ impl LanguageRulesFactory { "c".to_string(), "cpp".to_string(), "solidity".to_string(), + "crystal".to_string(), ] } diff --git a/lsp-daemon/src/symbol/uid_generator.rs b/lsp-daemon/src/symbol/uid_generator.rs index 8061afae..20f1bff4 100644 --- a/lsp-daemon/src/symbol/uid_generator.rs +++ b/lsp-daemon/src/symbol/uid_generator.rs @@ -28,6 +28,7 @@ fn extension_to_language_name(extension: &str) -> Option<&'static str> { "swift" => Some("swift"), "cs" => Some("csharp"), "sol" => Some("solidity"), + "cr" => Some("crystal"), _ => None, } } @@ -103,6 +104,7 @@ impl SymbolUIDGenerator { rules.insert("cpp".to_string(), LanguageRules::cpp()); rules.insert("c++".to_string(), LanguageRules::cpp()); rules.insert("solidity".to_string(), LanguageRules::solidity()); + rules.insert("crystal".to_string(), LanguageRules::crystal()); rules } diff --git a/lsp-daemon/src/workspace/config.rs b/lsp-daemon/src/workspace/config.rs index aa8f744f..3cb5e5a8 100644 --- a/lsp-daemon/src/workspace/config.rs +++ b/lsp-daemon/src/workspace/config.rs @@ -244,6 +244,7 @@ impl Default for WorkspaceConfig { "c".to_string(), "cpp".to_string(), "solidity".to_string(), + "crystal".to_string(), ], git_integration: true, incremental_indexing: true, diff --git a/lsp-daemon/src/workspace/project.rs b/lsp-daemon/src/workspace/project.rs index 2d2be6b7..8b1578a7 100644 --- a/lsp-daemon/src/workspace/project.rs +++ b/lsp-daemon/src/workspace/project.rs @@ -554,6 +554,7 @@ where "rb" => Some("ruby"), "swift" => Some("swift"), "sol" => Some("solidity"), + "cr" => Some("crystal"), "kt" => Some("kotlin"), "cs" => Some("csharp"), "scala" => Some("scala"), diff --git a/lsp-daemon/src/workspace_resolver.rs b/lsp-daemon/src/workspace_resolver.rs index a9cbdde1..0ac5b927 100644 --- a/lsp-daemon/src/workspace_resolver.rs +++ b/lsp-daemon/src/workspace_resolver.rs @@ -250,6 +250,7 @@ impl WorkspaceResolver { Language::Php => vec!["composer.json", "composer.lock"], Language::Swift => vec!["Package.swift", "*.xcodeproj"], Language::Solidity => vec!["foundry.toml", "hardhat.config.js", "hardhat.config.ts"], + Language::Crystal => vec!["shard.yml", "shard.lock"], Language::Kotlin => vec!["build.gradle.kts", "build.gradle"], Language::Scala => vec!["build.sbt", "build.sc"], Language::Haskell => vec!["stack.yaml", "*.cabal", "cabal.project"], diff --git a/npm/src/agent/acp/tools.js b/npm/src/agent/acp/tools.js index 5986a7ff..42447240 100644 --- a/npm/src/agent/acp/tools.js +++ b/npm/src/agent/acp/tools.js @@ -322,7 +322,7 @@ export class ACPToolManager { }, language: { type: 'string', - description: 'Programming language to search in (rust, javascript, python, go, etc.)' + description: 'Programming language to search in (rust, javascript, python, go, crystal, etc.)' }, max_results: { type: 'number', @@ -382,4 +382,4 @@ export class ACPToolManager { } ]; } -} \ No newline at end of file +} diff --git a/src/cli.rs b/src/cli.rs index ac57e953..e6ea41f0 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -169,6 +169,7 @@ pub enum Commands { "php", "swift", "solidity", "sol", + "crystal", "cr", "csharp", "cs", "yaml", "yml" ])] @@ -351,6 +352,7 @@ pub enum Commands { "php", "swift", "solidity", "sol", + "crystal", "cr", "csharp", "cs", "yaml", "yml" ])] diff --git a/src/debug_tree_sitter.rs b/src/debug_tree_sitter.rs index a92310d3..3eb2a60a 100644 --- a/src/debug_tree_sitter.rs +++ b/src/debug_tree_sitter.rs @@ -2,7 +2,7 @@ use anyhow::{Context, Result}; use clap::Parser; use colored::Colorize; use std::path::Path; -use tree_sitter::{Language as TSLanguage, Node, Parser as TSParser}; +use tree_sitter::{Node, Parser as TSParser}; use probe_code::language::factory::get_language_impl; @@ -79,7 +79,7 @@ fn main() -> Result<()> { "{}", format!( "Language: {} (extension: {})", - get_language_name(&language), + get_language_name(extension), extension ) .cyan() @@ -137,21 +137,22 @@ fn main() -> Result<()> { Ok(()) } -fn get_language_name(language: &TSLanguage) -> &str { - let version = language.version(); - match version { - _ if format!("{language:?}").contains("rust") => "Rust", - _ if format!("{language:?}").contains("javascript") => "JavaScript", - _ if format!("{language:?}").contains("typescript") => "TypeScript", - _ if format!("{language:?}").contains("python") => "Python", - _ if format!("{language:?}").contains("go") => "Go", - _ if format!("{language:?}").contains("java") => "Java", - _ if format!("{language:?}").contains("c") => "C/C++", - _ if format!("{language:?}").contains("ruby") => "Ruby", - _ if format!("{language:?}").contains("php") => "PHP", - _ if format!("{language:?}").contains("swift") => "Swift", - _ if format!("{language:?}").contains("solidity") => "Solidity", - _ if format!("{language:?}").contains("csharp") => "C#", +fn get_language_name(extension: &str) -> &str { + match extension { + "rs" => "Rust", + "js" | "jsx" => "JavaScript", + "ts" | "tsx" => "TypeScript", + "py" => "Python", + "go" => "Go", + "java" => "Java", + "c" | "h" => "C", + "cpp" | "cc" | "cxx" | "hpp" | "hxx" => "C++", + "rb" => "Ruby", + "php" => "PHP", + "swift" => "Swift", + "sol" => "Solidity", + "cr" => "Crystal", + "cs" => "C#", _ => "Unknown", } } @@ -232,6 +233,21 @@ fn extract_symbol_info( "constructor_declaration" => ("constructor", vec!["identifier"]), "field_declaration" => ("field", vec!["identifier"]), + // Crystal + "class_def" => ("class", vec!["constant", "identifier"]), + "module_def" => ("module", vec!["constant", "identifier"]), + "struct_def" => ("struct", vec!["constant", "identifier"]), + "enum_def" => ("enum", vec!["constant", "identifier"]), + "method_def" | "abstract_method_def" => { + ("method", vec!["identifier", "constant", "operator"]) + } + "macro_def" => ("macro", vec!["identifier", "constant"]), + "lib_def" => ("library", vec!["constant", "identifier"]), + "fun_def" => ("function", vec!["identifier", "constant"]), + "alias" => ("alias", vec!["constant", "identifier"]), + "annotation_def" => ("annotation", vec!["constant", "identifier"]), + "type_def" | "union_def" => ("type", vec!["constant", "identifier"]), + _ => return None, }; @@ -500,4 +516,36 @@ const arrow = (x, y) => x + y; .iter() .any(|s| s.name == "getValue" && s.symbol_kind == "method")); } + + #[test] + fn test_crystal_symbol_detection() { + let crystal_code = r#" +module ProbeFixture + class Calculator + def add(left : Int32, right : Int32) : Int32 + left + right + end + end +end +"#; + + let language_impl = get_language_impl("cr").unwrap(); + let language = language_impl.get_tree_sitter_language(); + let mut parser = TSParser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(crystal_code, None).unwrap(); + + let symbols = find_all_symbols(tree.root_node(), crystal_code.as_bytes(), false); + + assert!(!symbols.is_empty()); + assert!(symbols + .iter() + .any(|s| s.name == "ProbeFixture" && s.symbol_kind == "module")); + assert!(symbols + .iter() + .any(|s| s.name == "Calculator" && s.symbol_kind == "class")); + assert!(symbols + .iter() + .any(|s| s.name == "add" && s.symbol_kind == "method")); + } } diff --git a/src/extract/formatter.rs b/src/extract/formatter.rs index d1289857..52f39720 100644 --- a/src/extract/formatter.rs +++ b/src/extract/formatter.rs @@ -1053,6 +1053,7 @@ pub fn get_language_from_extension(extension: &str) -> &'static str { "swift" => "swift", "cs" => "csharp", "sol" => "solidity", + "cr" => "crystal", "scala" => "scala", "dart" => "dart", "ex" | "exs" => "elixir", diff --git a/src/extract/symbols.rs b/src/extract/symbols.rs index f3baf125..a20d255b 100644 --- a/src/extract/symbols.rs +++ b/src/extract/symbols.rs @@ -82,6 +82,12 @@ fn is_container_node(kind: &str) -> bool { | "module_declaration" | "contract_declaration" | "library_declaration" + | "class_def" + | "module_def" + | "struct_def" + | "enum_def" + | "lib_def" + | "union_def" | "enum_declaration" | "enum_item" | "struct_declaration" @@ -191,6 +197,7 @@ fn collect_children_symbols( | "enum_body" | "struct_body" | "contract_body" + | "expressions" | "object_type" | "interface_body" | "statement_block" @@ -281,15 +288,17 @@ fn normalize_kind(kind: &str) -> String { | "function_definition" | "function_expression" | "arrow_function" => "function", - "method_declaration" | "method_definition" => "method", - "struct_item" | "struct_type" | "struct_declaration" => "struct", + "method_declaration" | "method_definition" | "method_def" | "abstract_method_def" => { + "method" + } + "struct_item" | "struct_type" | "struct_declaration" | "struct_def" => "struct", "impl_item" => "impl", "trait_item" => "trait", - "enum_item" | "enum_declaration" => "enum", - "mod_item" | "module_declaration" | "namespace_declaration" => "module", + "enum_item" | "enum_declaration" | "enum_def" => "enum", + "mod_item" | "module_declaration" | "namespace_declaration" | "module_def" => "module", "contract_declaration" => "contract", "library_declaration" => "library", - "class_declaration" | "class_definition" => "class", + "class_declaration" | "class_definition" | "class_def" => "class", "interface_declaration" => "interface", "const_item" | "const_declaration" => "const", "state_variable_declaration" => "variable", @@ -298,8 +307,14 @@ fn normalize_kind(kind: &str) -> String { | "type_alias_declaration" | "type_declaration" | "type_spec" - | "user_defined_type_definition" => "type", - "macro_definition" => "macro", + | "user_defined_type_definition" + | "type_def" + | "union_def" => "type", + "macro_definition" | "macro_def" => "macro", + "lib_def" => "library", + "fun_def" => "function", + "alias" => "alias", + "annotation_def" => "annotation", "use_declaration" => "use", "variable_declarator" | "lexical_declaration" diff --git a/src/language/crystal.rs b/src/language/crystal.rs new file mode 100644 index 00000000..9b6fb6f8 --- /dev/null +++ b/src/language/crystal.rs @@ -0,0 +1,145 @@ +use super::language_trait::LanguageImpl; +use tree_sitter::{Language as TSLanguage, Node}; + +/// Implementation of LanguageImpl for Crystal. +pub struct CrystalLanguage; + +impl Default for CrystalLanguage { + fn default() -> Self { + Self::new() + } +} + +impl CrystalLanguage { + pub fn new() -> Self { + CrystalLanguage + } + + fn is_container(kind: &str) -> bool { + matches!( + kind, + "class_def" | "module_def" | "struct_def" | "enum_def" | "lib_def" | "union_def" + ) + } + + fn is_function_like(kind: &str) -> bool { + matches!( + kind, + "method_def" | "abstract_method_def" | "macro_def" | "fun_def" + ) + } + + fn body_signature(node: &Node, source: &[u8], container: bool) -> Option { + let end = node + .child_by_field_name("body") + .map(|body| body.start_byte()) + .unwrap_or_else(|| node.end_byte()); + let sig = String::from_utf8_lossy(&source[node.start_byte()..end]) + .trim() + .to_string(); + + if sig.is_empty() { + None + } else if container { + Some(format!("{sig} ... end")) + } else { + Some(sig) + } + } + + fn named_child_text(node: &Node, source: &[u8]) -> Option { + let name = node.child_by_field_name("name")?; + let text = name.utf8_text(source).ok()?.trim(); + if text.is_empty() { + None + } else { + Some(text.to_string()) + } + } +} + +impl LanguageImpl for CrystalLanguage { + fn get_tree_sitter_language(&self) -> TSLanguage { + tree_sitter_crystal::LANGUAGE.into() + } + + fn get_extension(&self) -> &'static str { + "cr" + } + + fn is_acceptable_parent(&self, node: &Node) -> bool { + matches!( + node.kind(), + "class_def" + | "module_def" + | "struct_def" + | "enum_def" + | "method_def" + | "abstract_method_def" + | "macro_def" + | "lib_def" + | "fun_def" + | "alias" + | "annotation_def" + | "type_def" + | "union_def" + ) + } + + fn is_symbol_node(&self, node: &Node) -> bool { + self.is_acceptable_parent(node) + } + + fn is_test_node(&self, node: &Node, source: &[u8]) -> bool { + match node.kind() { + "method_def" | "abstract_method_def" | "macro_def" => { + if let Some(name) = Self::named_child_text(node, source) { + return name.starts_with("test_"); + } + } + "call" | "implicit_object_call" | "assign_call" => { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if matches!(child.kind(), "identifier" | "constant") { + let name = child.utf8_text(source).unwrap_or(""); + if matches!(name, "describe" | "context" | "it" | "pending") { + return true; + } + } + } + } + _ => {} + } + + false + } + + fn find_parent_function<'a>(&self, node: Node<'a>) -> Option> { + let mut current = node; + while let Some(parent) = current.parent() { + if Self::is_function_like(parent.kind()) { + return Some(parent); + } + current = parent; + } + None + } + + fn get_symbol_signature(&self, node: &Node, source: &[u8]) -> Option { + match node.kind() { + kind if Self::is_container(kind) => Self::body_signature(node, source, true), + kind if Self::is_function_like(kind) => Self::body_signature(node, source, false), + "alias" | "annotation_def" | "type_def" => Some( + String::from_utf8_lossy(&source[node.start_byte()..node.end_byte()]) + .trim() + .lines() + .next() + .unwrap_or("") + .trim() + .to_string(), + ) + .filter(|sig| !sig.is_empty()), + _ => None, + } + } +} diff --git a/src/language/factory.rs b/src/language/factory.rs index 58b60eaf..e0d5b488 100644 --- a/src/language/factory.rs +++ b/src/language/factory.rs @@ -1,5 +1,6 @@ use probe_code::language::c::CLanguage; use probe_code::language::cpp::CppLanguage; +use probe_code::language::crystal::CrystalLanguage; use probe_code::language::csharp::CSharpLanguage; use probe_code::language::go::GoLanguage; use probe_code::language::html::HtmlLanguage; @@ -33,6 +34,7 @@ pub fn get_language_impl(extension: &str) -> Option> { "swift" => Some(Box::new(SwiftLanguage::new())), "cs" => Some(Box::new(CSharpLanguage::new())), "sol" => Some(Box::new(SolidityLanguage::new())), + "cr" => Some(Box::new(CrystalLanguage::new())), "html" | "htm" => Some(Box::new(HtmlLanguage::new())), "md" | "markdown" => Some(Box::new(MarkdownLanguage::new())), "yaml" | "yml" => Some(Box::new(YamlLanguage::new())), diff --git a/src/language/mod.rs b/src/language/mod.rs index 168dc973..d36478ca 100644 --- a/src/language/mod.rs +++ b/src/language/mod.rs @@ -4,6 +4,7 @@ // Import submodules pub mod block_handling; pub mod common; +pub mod crystal; pub mod factory; pub mod language_trait; pub mod parser; diff --git a/src/language/parser_pool.rs b/src/language/parser_pool.rs index 346563a0..77289944 100644 --- a/src/language/parser_pool.rs +++ b/src/language/parser_pool.rs @@ -44,7 +44,7 @@ lazy_static::lazy_static! { let critical_languages = ["rs", "js", "ts", "py", "go", "java"]; // Tier 2: Common languages - warm with lower priority - let common_languages = ["cpp", "c", "jsx", "tsx", "rb", "php", "cs", "sol"]; + let common_languages = ["cpp", "c", "jsx", "tsx", "rb", "php", "cs", "sol", "cr"]; // Tier 3: Specialized languages - warm last let specialized_languages = ["swift", "h", "cc", "cxx", "hpp", "hxx"]; @@ -137,7 +137,7 @@ pub fn smart_warm_parser_pool_for_directory(path: &Path) { let priority_order = [ "rs", "js", "ts", "py", "go", "java", // Tier 1: Critical "cpp", "c", "jsx", "tsx", "rb", "php", "cs", // Tier 2: Common - "swift", "sol", "h", "cc", "cxx", "hpp", "hxx", // Tier 3: Specialized + "swift", "sol", "cr", "h", "cc", "cxx", "hpp", "hxx", // Tier 3: Specialized ]; // Warm detected languages in priority order diff --git a/src/language/test_detection.rs b/src/language/test_detection.rs index e1cebf23..18daf2d8 100644 --- a/src/language/test_detection.rs +++ b/src/language/test_detection.rs @@ -93,6 +93,14 @@ pub fn is_test_file(path: &Path) -> bool { return true; } + // Crystal: spec files conventionally use *_spec.cr + if file_name.ends_with("_spec.cr") { + if _debug_mode { + println!("DEBUG: Test file detected (Crystal pattern): {file_name}"); + } + return true; + } + // PHP: *Test.php, Test*.php if file_name.ends_with("Test.php") || file_name.starts_with("Test") && file_name.ends_with(".php") diff --git a/src/language/tests.rs b/src/language/tests.rs index ef995732..739fe4db 100644 --- a/src/language/tests.rs +++ b/src/language/tests.rs @@ -6,6 +6,7 @@ use tree_sitter::Language; extern crate tree_sitter_c; extern crate tree_sitter_c_sharp; extern crate tree_sitter_cpp; +extern crate tree_sitter_crystal; extern crate tree_sitter_go; extern crate tree_sitter_java; extern crate tree_sitter_javascript; @@ -33,6 +34,7 @@ fn get_language(extension: &str) -> Option { "swift" => Some(tree_sitter_swift::LANGUAGE.into()), "cs" => Some(tree_sitter_c_sharp::LANGUAGE.into()), "sol" => Some(tree_sitter_solidity::LANGUAGE.into()), + "cr" => Some(tree_sitter_crystal::LANGUAGE.into()), // It seems tree_sitter_php::LANGUAGE doesn't exist, so we'll return None for PHP "php" => None, _ => None, @@ -58,6 +60,7 @@ fn test_get_language() { assert!(get_language("swift").is_some()); // Swift assert!(get_language("cs").is_some()); // C# assert!(get_language("sol").is_some()); // Solidity + assert!(get_language("cr").is_some()); // Crystal assert!(get_language("php").is_none()); // PHP (not supported in current tree-sitter version) // Test unsupported language @@ -65,6 +68,21 @@ fn test_get_language() { assert!(get_language("").is_none()); } +#[test] +fn test_crystal_language_implementation() { + let crystal_impl = get_language_impl("cr"); + assert!( + crystal_impl.is_some(), + "Should be able to get Crystal language implementation" + ); + + let language = get_language("cr"); + assert!( + language.is_some(), + "Should be able to get Crystal tree-sitter language" + ); +} + #[test] fn test_solidity_language_implementation() { let solidity_impl = get_language_impl("sol"); diff --git a/src/lsp_integration/client.rs b/src/lsp_integration/client.rs index 267dabd3..e3ea737e 100644 --- a/src/lsp_integration/client.rs +++ b/src/lsp_integration/client.rs @@ -913,6 +913,7 @@ impl LspClient { "php" => Some(Language::Php), "swift" => Some(Language::Swift), "solidity" | "sol" => Some(Language::Solidity), + "crystal" | "cr" => Some(Language::Crystal), "kotlin" | "kt" => Some(Language::Kotlin), "scala" => Some(Language::Scala), "haskell" | "hs" => Some(Language::Haskell), diff --git a/src/lsp_integration/management.rs b/src/lsp_integration/management.rs index 1b4e11cb..03c5eb89 100644 --- a/src/lsp_integration/management.rs +++ b/src/lsp_integration/management.rs @@ -5757,6 +5757,7 @@ impl LspManager { "ruby" | "rb" => Ok(Language::Ruby), "swift" => Ok(Language::Swift), "solidity" | "sol" => Ok(Language::Solidity), + "crystal" | "cr" => Ok(Language::Crystal), "kotlin" | "kt" => Ok(Language::Kotlin), "scala" => Ok(Language::Scala), _ => Err(anyhow::anyhow!("Unsupported language: {}", lang_str)), diff --git a/src/lsp_integration/readiness.rs b/src/lsp_integration/readiness.rs index 93db9018..9b085a64 100644 --- a/src/lsp_integration/readiness.rs +++ b/src/lsp_integration/readiness.rs @@ -262,6 +262,7 @@ fn determine_language_from_path(file_path: &Path) -> Option { "php" => "php", "swift" => "swift", "sol" => "solidity", + "cr" => "crystal", _ => extension, // fallback to extension }; return Some(language_name.to_string()); diff --git a/src/main.rs b/src/main.rs index 83c387af..e92effdf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -864,6 +864,7 @@ async fn main() -> Result<()> { "rb" => "ruby", "cs" => "csharp", "sol" => "solidity", + "cr" => "crystal", _ => lang, // Return the original language if no alias is found } }), diff --git a/src/query.rs b/src/query.rs index 56125068..afb339db 100644 --- a/src/query.rs +++ b/src/query.rs @@ -40,6 +40,7 @@ pub struct QueryOptions<'a> { enum ProbeQueryLang { Builtin(SupportLang), Solidity, + Crystal, } impl Language for ProbeQueryLang { @@ -47,6 +48,7 @@ impl Language for ProbeQueryLang { match self { ProbeQueryLang::Builtin(lang) => lang.get_ts_language(), ProbeQueryLang::Solidity => tree_sitter_solidity::LANGUAGE.into(), + ProbeQueryLang::Crystal => tree_sitter_crystal::LANGUAGE.into(), } } } @@ -66,6 +68,7 @@ fn get_language(lang: &str) -> Option { "php" => Some(ProbeQueryLang::Builtin(SupportLang::Php)), "swift" => Some(ProbeQueryLang::Builtin(SupportLang::Swift)), "solidity" | "sol" => Some(ProbeQueryLang::Solidity), + "crystal" | "cr" => Some(ProbeQueryLang::Crystal), "csharp" => Some(ProbeQueryLang::Builtin(SupportLang::CSharp)), _ => None, } @@ -86,6 +89,7 @@ fn get_file_extension(lang: &str) -> Vec<&str> { "php" => vec![".php"], "swift" => vec![".swift"], "solidity" | "sol" => vec![".sol"], + "crystal" | "cr" => vec![".cr"], "csharp" => vec![".cs"], _ => vec![], } @@ -162,6 +166,7 @@ fn query_file(file_path: &Path, options: &QueryOptions) -> Result> "php" => Some(ProbeQueryLang::Builtin(SupportLang::Php)), "swift" => Some(ProbeQueryLang::Builtin(SupportLang::Swift)), "sol" => Some(ProbeQueryLang::Solidity), + "cr" => Some(ProbeQueryLang::Crystal), "cs" => Some(ProbeQueryLang::Builtin(SupportLang::CSharp)), _ => None, // Unsupported extension }; @@ -674,4 +679,38 @@ contract Counter { assert_eq!(matches[0].file_path, file); assert!(matches[0].matched_text.contains("function increment()")); } + + #[test] + fn test_crystal_query_support() { + let temp_dir = TempDir::new().unwrap(); + let file = temp_dir.path().join("counter.cr"); + fs::write( + &file, + r#" +class Counter + def increment : Int32 + 1 + end +end +"#, + ) + .unwrap(); + + let options = QueryOptions { + path: temp_dir.path(), + pattern: "def increment : Int32", + language: Some("crystal"), + ignore: &[], + allow_tests: true, + max_results: Some(10), + with_context: false, + format: "json", + no_gitignore: true, + }; + + let matches = perform_query(&options).expect("Crystal query should run"); + assert_eq!(matches.len(), 1); + assert_eq!(matches[0].file_path, file); + assert!(matches[0].matched_text.contains("def increment")); + } } diff --git a/src/search/elastic_query.rs b/src/search/elastic_query.rs index e6c6085e..c2f0d260 100644 --- a/src/search/elastic_query.rs +++ b/src/search/elastic_query.rs @@ -840,22 +840,45 @@ impl Parser { println!("DEBUG: Ident => {first}"); } if let Some(Token::Colon) = self.peek() { - // We have "field:" self.next(); // consume colon - // Next could be ident or quoted - match self.peek() { - Some(Token::Ident(ident2)) => { - let val2 = ident2.clone(); - self.next(); - Ok(make_term(vec![val2], Some(first), false, false, false)) + + if let Some(Token::Colon) = self.peek() { + self.next(); // consume the second colon in a namespace separator + let Some(Token::Ident(next_ident)) = self.next() else { + return Ok(make_term(vec![first], None, false, false, false)); + }; + + let mut qualified = format!("{first}::{next_ident}"); + while matches!(self.peek(), Some(Token::Colon)) + && matches!(self.tokens.get(self.pos + 1), Some(Token::Colon)) + { + self.next(); // consume first colon + self.next(); // consume second colon + let Some(Token::Ident(part)) = self.next() else { + break; + }; + qualified.push_str("::"); + qualified.push_str(&part); } - Some(Token::QuotedString(qs)) => { - let qval = qs.clone(); - self.next(); - Ok(make_term(vec![qval], Some(first), false, false, true)) + + Ok(make_term(vec![qualified], None, false, false, false)) + } else { + // We have "field:". + // Next could be ident or quoted. + match self.peek() { + Some(Token::Ident(ident2)) => { + let val2 = ident2.clone(); + self.next(); + Ok(make_term(vec![val2], Some(first), false, false, false)) + } + Some(Token::QuotedString(qs)) => { + let qval = qs.clone(); + self.next(); + Ok(make_term(vec![qval], Some(first), false, false, true)) + } + // If nothing or other token => empty term + _ => Ok(make_term(vec![], Some(first), false, false, false)), } - // If nothing or other token => empty term - _ => Ok(make_term(vec![], Some(first), false, false, false)), } } else { // Just a plain ident diff --git a/src/search/elastic_query_tests.rs b/src/search/elastic_query_tests.rs index 6f605e0e..8aa2179a 100644 --- a/src/search/elastic_query_tests.rs +++ b/src/search/elastic_query_tests.rs @@ -697,6 +697,32 @@ fn test_quoted_strings() { } ); + let namespaced = parse_query_test("HTTP::Server AND lang:crystal").unwrap(); + match namespaced { + Expr::And(left, right) => { + match *left { + Expr::Term { + keywords, field, .. + } => { + assert_eq!(field, None); + assert_eq!(keywords, vec!["http".to_string(), "server".to_string()]); + } + other => panic!("expected namespaced term on left, got {other:?}"), + } + + match *right { + Expr::Term { + keywords, field, .. + } => { + assert_eq!(field, Some("lang".to_string())); + assert_eq!(keywords, vec!["crystal".to_string()]); + } + other => panic!("expected lang field term on right, got {other:?}"), + } + } + other => panic!("expected AND expression for namespaced query, got {other:?}"), + } + // Quoted string with escaped quotes let keywords11 = vec!["function_with_\"quotes\"".to_string()]; assert_parse_eq( diff --git a/src/search/file_list_cache.rs b/src/search/file_list_cache.rs index c793c6b9..681d6ff2 100644 --- a/src/search/file_list_cache.rs +++ b/src/search/file_list_cache.rs @@ -268,6 +268,7 @@ fn build_file_list( "*_test.rb", "test_*.rb", "*_spec.rb", + "*_spec.cr", "*Test.php", "test_*.php", "**/tests/**", @@ -345,6 +346,13 @@ fn build_file_list( continue; } + if !allow_tests && is_test_path(path, entry.path()) { + if debug_mode { + println!("DEBUG: Skipping test file: {:?}", entry.path()); + } + continue; + } + files.push(entry.path().to_path_buf()); } @@ -378,6 +386,51 @@ fn build_file_list( }) } +fn is_test_path(search_root: &Path, file_path: &Path) -> bool { + if search_root.is_dir() + && search_root + .file_name() + .and_then(|name| name.to_str()) + .is_some_and(is_test_dir_name) + { + return true; + } + + let path_to_check = file_path + .strip_prefix(search_root) + .ok() + .filter(|relative| !relative.as_os_str().is_empty()) + .unwrap_or(file_path); + + let has_test_dir = path_to_check.components().any(|component| { + let name = component.as_os_str().to_string_lossy(); + is_test_dir_name(&name) + }); + + if has_test_dir { + return true; + } + + let Some(file_name) = path_to_check.file_name().and_then(|name| name.to_str()) else { + return false; + }; + + file_name.starts_with("test_") + || file_name.contains("_test.") + || file_name.contains("_spec.") + || file_name.contains(".test.") + || file_name.contains(".spec.") + || file_name.ends_with("Test.java") + || file_name.ends_with("Test.php") +} + +fn is_test_dir_name(name: &str) -> bool { + matches!( + name, + "test" | "tests" | "__test__" | "__tests__" | "spec" | "specs" + ) +} + /// Find files whose names match query words /// Returns a map of file paths to the term indices that matched the filename #[allow(clippy::too_many_arguments)] @@ -507,6 +560,7 @@ fn get_language_extensions(language: &str) -> Vec { "php" => vec![".php".to_string()], "swift" => vec![".swift".to_string()], "solidity" => vec![".sol".to_string()], + "crystal" => vec![".cr".to_string()], "csharp" => vec![".cs".to_string()], "markdown" => vec![".md".to_string(), ".markdown".to_string()], "yaml" => vec![".yaml".to_string(), ".yml".to_string()], diff --git a/src/search/filters.rs b/src/search/filters.rs index 8ac91ac2..9d5203d2 100644 --- a/src/search/filters.rs +++ b/src/search/filters.rs @@ -474,6 +474,7 @@ fn normalize_language_name(lang: &str) -> String { "rb" => "ruby".to_string(), "cs" => "csharp".to_string(), "sol" => "solidity".to_string(), + "cr" => "crystal".to_string(), "cpp" | "cc" | "cxx" => "cpp".to_string(), "h" | "hpp" | "hxx" => "c".to_string(), other => other.to_string(), @@ -533,6 +534,9 @@ fn get_extensions_for_type(file_type: &str) -> Option> { "solidity" | "sol" => { extensions.insert("sol".to_string()); } + "crystal" | "cr" => { + extensions.insert("cr".to_string()); + } "kotlin" => { extensions.insert("kt".to_string()); extensions.insert("kts".to_string()); diff --git a/src/search/results_formatter.rs b/src/search/results_formatter.rs index b011c5a2..b568bb4b 100644 --- a/src/search/results_formatter.rs +++ b/src/search/results_formatter.rs @@ -134,6 +134,7 @@ pub fn format_and_print_search_results(results: &[SearchResult], dry_run: bool) "rb" => "ruby", "php" => "php", "sol" => "solidity", + "cr" => "crystal", "sh" => "bash", "md" => "markdown", "json" => "json", diff --git a/src/search/search_output.rs b/src/search/search_output.rs index 30211d68..de153354 100644 --- a/src/search/search_output.rs +++ b/src/search/search_output.rs @@ -332,6 +332,7 @@ fn format_and_print_color_results( "rb" => "ruby", "php" => "php", "sol" => "solidity", + "cr" => "crystal", "sh" => "bash", "md" => "markdown", "json" => "json", @@ -2347,7 +2348,7 @@ fn get_comment_prefix(extension: &str) -> &'static str { | "tsx" | "cs" | "swift" | "go" | "php" | "sol" => "//", // Python-style comments - "py" | "rb" | "sh" | "bash" | "pl" | "r" | "yaml" | "yml" => "#", + "py" | "rb" | "cr" | "sh" | "bash" | "pl" | "r" | "yaml" | "yml" => "#", // HTML-style comments "md" | "markdown" => "