Skip to content

Commit 09c87ec

Browse files
committed
feat: implement codebase indexing, fuzzy file search, and case-insensitive grep
1 parent 1bc3e13 commit 09c87ec

3 files changed

Lines changed: 244 additions & 12 deletions

File tree

Cargo.lock

Lines changed: 40 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "rust_tui_coder"
3-
version = "0.2.3"
3+
version = "0.2.4"
44
edition = "2021"
55
description = "AI-powered terminal coding assistant with interactive TUI, supporting multiple LLMs and comprehensive development tools"
66
license = "MIT OR Apache-2.0"
@@ -34,3 +34,4 @@ serde_json = "1.0.111"
3434
toml = "0.8.8"
3535
futures-util = "0.3.30"
3636
chrono = "0.4"
37+
regex = "1.12.2"

src/agent.rs

Lines changed: 202 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
use crate::config::LlmConfig;
22
use crate::llm::Message;
33
use futures_util::StreamExt;
4+
use regex::Regex;
45
use serde::{Deserialize, Serialize};
6+
use std::collections::HashMap;
57
use std::env;
68
use std::fs;
79
use std::io;
@@ -72,6 +74,23 @@ impl ToolCall {
7274
let pattern = self.parameters.get("pattern")?.as_str()?.to_string();
7375
Some(Tool::GlobSearch { pattern })
7476
}
77+
"INDEX_CODEBASE" => {
78+
let path = self.parameters.get("path")?.as_str()?.to_string();
79+
Some(Tool::IndexCodebase { path })
80+
}
81+
"SEARCH_INDEX" => {
82+
let query = self.parameters.get("query")?.as_str()?.to_string();
83+
Some(Tool::SearchIndex { query })
84+
}
85+
"FUZZY_FIND" => {
86+
let pattern = self.parameters.get("pattern")?.as_str()?.to_string();
87+
let path = self
88+
.parameters
89+
.get("path")
90+
.and_then(|p| p.as_str())
91+
.map(|s| s.to_string());
92+
Some(Tool::FuzzyFind { pattern, path })
93+
}
7594
"EXECUTE_CODE" => {
7695
let language = self.parameters.get("language")?.as_str()?.to_string();
7796
let code = self.parameters.get("code")?.as_str()?.to_string();
@@ -203,6 +222,16 @@ pub enum Tool {
203222
GlobSearch {
204223
pattern: String,
205224
},
225+
IndexCodebase {
226+
path: String,
227+
},
228+
SearchIndex {
229+
query: String,
230+
},
231+
FuzzyFind {
232+
pattern: String,
233+
path: Option<String>,
234+
},
206235

207236
// Code Execution & Compilation
208237
ExecuteCode {
@@ -358,7 +387,7 @@ impl Tool {
358387
Tool::GrepSearch { pattern, path } => {
359388
let search_path = path.as_ref().map(|s| s.as_str()).unwrap_or(".");
360389
let mut cmd = Command::new("grep");
361-
cmd.arg("-r").arg("-n").arg(pattern).arg(search_path);
390+
cmd.arg("-r").arg("-n").arg("-i").arg(pattern).arg(search_path);
362391
let output = cmd.output()?;
363392

364393
if output.status.success() {
@@ -383,6 +412,141 @@ impl Tool {
383412
Ok(format!("No files found matching pattern '{}'", pattern))
384413
}
385414
}
415+
Tool::FuzzyFind { pattern, path } => {
416+
let search_path = path.as_ref().map(|s| s.as_str()).unwrap_or(".");
417+
let mut cmd = Command::new("find");
418+
cmd.arg(search_path).arg("-type").arg("f");
419+
let output = cmd.output()?;
420+
421+
if output.status.success() {
422+
let stdout_str = String::from_utf8_lossy(&output.stdout);
423+
let pattern_lower = pattern.to_lowercase();
424+
let files_vec: Vec<_> = stdout_str
425+
.lines()
426+
.filter(|line| !line.is_empty())
427+
.filter(|line| line.to_lowercase().contains(&pattern_lower))
428+
.collect();
429+
if files_vec.is_empty() {
430+
Ok(format!("No files found matching fuzzy pattern '{}' in '{}'", pattern, search_path))
431+
} else {
432+
Ok(format!("Files matching fuzzy pattern '{}':\n{}", pattern, files_vec.join("\n")))
433+
}
434+
} else {
435+
Ok(format!("Failed to search files in '{}'", search_path))
436+
}
437+
}
438+
439+
Tool::IndexCodebase { path } => {
440+
let root_path = Path::new(path);
441+
if !root_path.exists() {
442+
return Err(io::Error::new(io::ErrorKind::NotFound, format!("Path '{}' does not exist", path)));
443+
}
444+
445+
let mut index_data: HashMap<String, Vec<String>> = HashMap::new();
446+
let mut file_count = 0;
447+
let mut symbol_count = 0;
448+
449+
// Regex patterns for different languages
450+
let rust_fn = Regex::new(r"fn\s+([a-zA-Z_][a-zA-Z0-9_]*)").unwrap();
451+
let rust_struct = Regex::new(r"(struct|enum|trait)\s+([a-zA-Z_][a-zA-Z0-9_]*)").unwrap();
452+
let py_def = Regex::new(r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)").unwrap();
453+
let py_class = Regex::new(r"class\s+([a-zA-Z_][a-zA-Z0-9_]*)").unwrap();
454+
let js_func = Regex::new(r"(?:function\s+([a-zA-Z_][a-zA-Z0-9_]*)|([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*function)").unwrap();
455+
let js_class = Regex::new(r"class\s+([a-zA-Z_][a-zA-Z0-9_]*)").unwrap();
456+
let js_const = Regex::new(r"(?:const|let|var)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=").unwrap();
457+
458+
fn visit_dirs(dir: &Path, cb: &mut dyn FnMut(&Path)) -> io::Result<()> {
459+
if dir.is_dir() {
460+
for entry in fs::read_dir(dir)? {
461+
let entry = entry?;
462+
let path = entry.path();
463+
if path.is_dir() {
464+
if !path.file_name().unwrap().to_string_lossy().starts_with('.') {
465+
visit_dirs(&path, cb)?;
466+
}
467+
} else {
468+
cb(&path);
469+
}
470+
}
471+
}
472+
Ok(())
473+
}
474+
visit_dirs(root_path, &mut |file_path| {
475+
if let Ok(content) = fs::read_to_string(file_path) {
476+
let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
477+
let file_str = file_path.to_string_lossy().to_string();
478+
let mut symbols = Vec::new();
479+
match ext {
480+
"rs" => {
481+
for cap in rust_fn.captures_iter(&content) {
482+
symbols.push(format!("Function: {}", &cap[1]));
483+
}
484+
for cap in rust_struct.captures_iter(&content) {
485+
symbols.push(format!("{}: {}", &cap[1], &cap[2]));
486+
}
487+
},
488+
"py" => {
489+
for cap in py_def.captures_iter(&content) {
490+
symbols.push(format!("Function: {}", &cap[1]));
491+
}
492+
for cap in py_class.captures_iter(&content) {
493+
symbols.push(format!("Class: {}", &cap[1]));
494+
}
495+
},
496+
"js" | "ts" | "jsx" | "tsx" => {
497+
for cap in js_func.captures_iter(&content) {
498+
if let Some(name) = cap.get(1).or(cap.get(2)) {
499+
symbols.push(format!("Function: {}", name.as_str()));
500+
}
501+
}
502+
for cap in js_class.captures_iter(&content) {
503+
symbols.push(format!("Class: {}", &cap[1]));
504+
}
505+
for cap in js_const.captures_iter(&content) {
506+
symbols.push(format!("Variable: {}", &cap[1]));
507+
}
508+
},
509+
_ => {}
510+
}
511+
512+
if !symbols.is_empty() {
513+
index_data.insert(file_str, symbols);
514+
file_count += 1;
515+
symbol_count += index_data.values().last().unwrap().len();
516+
}
517+
}
518+
})?;
519+
520+
// Save index to file
521+
let json = serde_json::to_string_pretty(&index_data).unwrap();
522+
fs::write(".agent_index.json", json)?;
523+
524+
Ok(format!("Indexed {} files and found {} symbols. Index saved to .agent_index.json", file_count, symbol_count))
525+
}
526+
527+
Tool::SearchIndex { query } => {
528+
if !Path::new(".agent_index.json").exists() {
529+
return Ok("Index not found. Please run INDEX_CODEBASE first.".to_string());
530+
}
531+
let content = fs::read_to_string(".agent_index.json")?;
532+
let index: HashMap<String, Vec<String>> = serde_json::from_str(&content).unwrap_or_default();
533+
let query_lower = query.to_lowercase();
534+
let mut results = Vec::new();
535+
for (file, symbols) in index {
536+
for symbol in symbols {
537+
if symbol.to_lowercase().contains(&query_lower) {
538+
results.push(format!("{} -> {}", symbol, file));
539+
}
540+
}
541+
}
542+
543+
if results.is_empty() {
544+
Ok(format!("No symbols found matching '{}'", query))
545+
} else {
546+
results.sort();
547+
Ok(format!("Found {} matches for '{}':\n{}", results.len(), query, results.join("\n")))
548+
}
549+
}
386550

387551
// Code Execution & Compilation
388552
Tool::ExecuteCode { language, code } => {
@@ -1022,11 +1186,14 @@ The tool will execute automatically and you will receive the result. Then you ca
10221186
11. **CREATE_DIRECTORY** `<path>` - Create directories (recursive)
10231187
10241188
### Search & Navigation
1025-
12. **GREP_SEARCH** `<pattern> [path]` - Search for text patterns using ripgrep (fast, regex support)
1189+
12. **GREP_SEARCH** `<pattern> [path]` - Search for text patterns using ripgrep (fast, regex support, case-insensitive)
10261190
13. **GLOB_SEARCH** `<pattern>` - Find files matching glob patterns (*.rs, **/test/**, etc.)
1191+
14. **FUZZY_FIND** `<pattern> [path]` - Fuzzy search for file paths (e.g. "user" matches "src/user_model.rs")
1192+
15. **INDEX_CODEBASE** `<path>` - Scan directory and build a symbol index (functions, classes)
1193+
16. **SEARCH_INDEX** `<query>` - Search the built index for symbols
10271194
10281195
### Code Execution & Compilation
1029-
14. **EXECUTE_CODE** `<language> <code>` - Execute code in multiple languages:
1196+
15. **EXECUTE_CODE** `<language> <code>` - Execute code in multiple languages:
10301197
- Python (python, py)
10311198
- JavaScript/Node.js (javascript, js, node)
10321199
- Bash/Shell (bash, sh)
@@ -1175,31 +1342,38 @@ TOOL: {{"name": "GET_TIME", "parameters": {{}}}}
11751342
### 2. Exploration Phase
11761343
- Always start with LIST_FILES to understand project structure
11771344
- Use GLOB_SEARCH to find relevant files (*.rs for Rust, *.py for Python, etc.)
1345+
- Use FUZZY_FIND to locate files by partial name (e.g. "user" -> "src/user_model.rs")
11781346
- READ_FILE key configuration files (Cargo.toml, package.json, requirements.txt, etc.)
11791347
1180-
### 3. Planning Phase (MANDATORY for complex tasks)
1348+
### 3. Code Understanding Phase
1349+
- Use GREP_SEARCH to find code definitions, references, or TODOs
1350+
- Use FUZZY_FIND to jump to specific files
1351+
- READ_FILE to examine the code context and logic
1352+
- Analyze the code structure before making changes
1353+
1354+
### 4. Planning Phase (MANDATORY for complex tasks)
11811355
- **FIRST STEP**: Use CREATE_PLAN to break down the task
11821356
- Break complex tasks into manageable steps
11831357
- Identify dependencies and prerequisites
11841358
- Plan file modifications before executing
11851359
1186-
### 4. Implementation Phase
1360+
### 5. Implementation Phase
11871361
- Use SEARCH_REPLACE for precise edits (prefer over WRITE_FILE for modifications)
11881362
- APPEND_FILE for adding to existing files
11891363
- Verify changes with READ_FILE
11901364
- Test modifications with EXECUTE_CODE or RUN_COMMAND
11911365
1192-
### 5. Verification Phase
1366+
### 6. Verification Phase
11931367
- Use RUN_LINT to check code quality
11941368
- Execute tests with RUN_TESTS
11951369
- Build/compile with RUN_COMMAND
11961370
- Verify functionality with EXECUTE_CODE
11971371
1198-
### 6. Completion Phase
1372+
### 7. Completion Phase
11991373
- Use UPDATE_PLAN to mark steps as completed
12001374
- Use CLEAR_PLAN when all steps are done
12011375
1202-
### 7. Error Recovery
1376+
### 8. Error Recovery
12031377
- If SEARCH_REPLACE fails, check exact string matching
12041378
- If EXECUTE_CODE fails, try RUN_COMMAND with compilation
12051379
- If RUN_COMMAND fails, simplify the command or check permissions
@@ -1220,6 +1394,7 @@ TOOL: {{"name": "SEARCH_REPLACE", "parameters": {{"path": "src/main.rs", "old_st
12201394
### Development Tasks:
12211395
TOOL: {{"name": "LIST_FILES", "parameters": {{"path": "."}}}}
12221396
TOOL: {{"name": "GREP_SEARCH", "parameters": {{"pattern": "TODO|FIXME", "path": "src/"}}}}
1397+
TOOL: {{"name": "FUZZY_FIND", "parameters": {{"pattern": "main", "path": "src/"}}}}
12231398
TOOL: {{"name": "EXECUTE_CODE", "parameters": {{"language": "rust", "code": "fn main() {{ println!(\"test\"); }}"}}}}
12241399
TOOL: {{"name": "RUN_LINT", "parameters": {{"language": "rust"}}}}
12251400
@@ -1270,6 +1445,7 @@ TOOL: {{"name": "GET_OS_INFO", "parameters": {{}}}}
12701445
- **Code quality matters** - Use linters and tests to maintain standards
12711446
- **Use ReAct pattern** - Always REASON before you ACT, then OBSERVE the results
12721447
- **Check OS compatibility** - Use GET_OS_INFO when executing OS-specific commands
1448+
- **AUTONOMOUS EXECUTION** - You must ALWAYS execute the tool. Never ask the user for permission to run a tool unless explicitly told to do so.
12731449
- **Leverage time awareness** - Use GET_TIME when timestamps or scheduling matters
12741450
12751451
**REMEMBER: For complex tasks, your FIRST response MUST contain CREATE_PLAN. For simple tasks, use tools directly. Your responses should contain actual tool calls that will be executed, not descriptions of tool usage.**{}
@@ -1281,8 +1457,9 @@ TOOL: {{"name": "GET_OS_INFO", "parameters": {{}}}}
12811457
fn parse_tool_call(&self, response: &str) -> Option<Tool> {
12821458
let lines: Vec<&str> = response.lines().collect();
12831459
for line in lines {
1284-
if line.starts_with("TOOL:") {
1285-
let tool_part = line[6..].trim();
1460+
let trimmed_line = line.trim();
1461+
if trimmed_line.starts_with("TOOL:") {
1462+
let tool_part = trimmed_line.strip_prefix("TOOL:").unwrap().trim();
12861463

12871464
// Try JSON format first
12881465
if tool_part.starts_with('{') {
@@ -1387,6 +1564,18 @@ TOOL: {{"name": "GET_OS_INFO", "parameters": {{}}}}
13871564
"GLOB_SEARCH" => Some(Tool::GlobSearch {
13881565
pattern: params.to_string(),
13891566
}),
1567+
"FUZZY_FIND" => {
1568+
let parts: Vec<&str> = params.splitn(2, ' ').collect();
1569+
let pattern = parts[0].to_string();
1570+
let path = parts.get(1).map(|s| s.to_string());
1571+
Some(Tool::FuzzyFind { pattern, path })
1572+
}
1573+
"INDEX_CODEBASE" => Some(Tool::IndexCodebase {
1574+
path: params.to_string(),
1575+
}),
1576+
"SEARCH_INDEX" => Some(Tool::SearchIndex {
1577+
query: params.to_string(),
1578+
}),
13901579
"GIT_STATUS" => Some(Tool::GitStatus),
13911580
"GIT_DIFF" => Some(Tool::GitDiff),
13921581
"GIT_COMMIT" => Some(Tool::GitCommit {
@@ -1601,6 +1790,9 @@ TOOL: {{"name": "GET_OS_INFO", "parameters": {{}}}}
16011790
Tool::CreateDirectory { path } => format!("CREATE_DIRECTORY {}", path),
16021791
Tool::GrepSearch { pattern, path: _ } => format!("GREP_SEARCH {}", pattern),
16031792
Tool::GlobSearch { pattern } => format!("GLOB_SEARCH {}", pattern),
1793+
Tool::FuzzyFind { pattern, path: _ } => format!("FUZZY_FIND {}", pattern),
1794+
Tool::IndexCodebase { path } => format!("INDEX_CODEBASE {}", path),
1795+
Tool::SearchIndex { query } => format!("SEARCH_INDEX {}", query),
16041796
Tool::ExecuteCode { language, code: _ } => format!("EXECUTE_CODE {}", language),
16051797
Tool::RunCommand { command } => format!("RUN_COMMAND {}", command),
16061798
Tool::GitStatus => "GIT_STATUS".to_string(),

0 commit comments

Comments
 (0)