Skip to content

Commit 000cfe3

Browse files
haasonsaasclaude
andcommitted
feat: add file triage, dynamic context boundaries, and robust LLM parsing
Phase 1 improvements to the review pipeline, informed by competitive analysis of Greptile, CodeRabbit, and Qodo Merge architectures: - File triage (#29): Heuristic classifier skips lock files, generated files, deletion-only, whitespace-only, and comment-only changes before expensive LLM review. 45 unit tests. - Dynamic context boundaries (#25): Context fetcher now searches upward for enclosing function/class definitions instead of using fixed-window context. Uses asymmetric padding (more before, less after) matching Qodo's approach. 6 unit tests. - Robust LLM output parsing (#28): 5-strategy fallback chain for parsing LLM responses — primary format, numbered lists, markdown bullets, file:line format, and JSON extraction. Prevents silent comment loss when models deviate from the expected format. 11 unit tests. - Fix rust-embed v8 derive macro (Embed -> RustEmbed) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent aa2f3c4 commit 000cfe3

File tree

7 files changed

+1169
-5
lines changed

7 files changed

+1169
-5
lines changed

src/core/context.rs

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::collections::HashSet;
55
use std::path::Path;
66
use std::path::PathBuf;
77

8+
use crate::core::function_chunker::find_enclosing_boundary_line;
89
use crate::core::SymbolIndex;
910
#[derive(Debug, Clone, Serialize, Deserialize)]
1011
pub struct LLMContextChunk {
@@ -50,8 +51,19 @@ impl ContextFetcher {
5051
}
5152
let start = start.max(1);
5253
let end = end.max(start);
53-
let start_idx = start.saturating_sub(1);
54-
let end_idx = end.min(file_lines.len());
54+
55+
// Dynamic context: expand start to enclosing function boundary
56+
let expanded_start =
57+
find_enclosing_boundary_line(&content, file_path, start, 10)
58+
.filter(|&boundary| boundary >= start.saturating_sub(10))
59+
.unwrap_or_else(|| start.saturating_sub(5)); // fallback: 5 lines before
60+
let expanded_start = expanded_start.max(1);
61+
62+
// Asymmetric: less context after (1 extra line)
63+
let expanded_end = (end + 1).min(file_lines.len());
64+
65+
let start_idx = expanded_start.saturating_sub(1);
66+
let end_idx = expanded_end.min(file_lines.len());
5567

5668
if start_idx < file_lines.len() {
5769
let chunk_content = truncate_with_notice(
@@ -62,7 +74,7 @@ impl ContextFetcher {
6274
file_path: file_path.clone(),
6375
content: chunk_content,
6476
context_type: ContextType::FileContent,
65-
line_range: Some((start, end)),
77+
line_range: Some((expanded_start, expanded_end)),
6678
});
6779
}
6880
}
@@ -330,6 +342,28 @@ mod tests {
330342
let merged = merge_ranges(&ranges);
331343
assert_eq!(merged, vec![(1, 10)]);
332344
}
345+
346+
#[tokio::test]
347+
async fn test_fetch_context_expands_to_function_boundary() {
348+
// Create a temp file with a function
349+
let dir = tempfile::tempdir().unwrap();
350+
let file_path = dir.path().join("test.rs");
351+
let content = "use std::io;\n\npub fn process(x: i32) -> bool {\n let y = x + 1;\n y > 0\n}\n\npub fn other() {\n println!(\"hi\");\n}\n";
352+
std::fs::write(&file_path, content).unwrap();
353+
354+
let fetcher = ContextFetcher::new(dir.path().to_path_buf());
355+
let relative = PathBuf::from("test.rs");
356+
// Request context for line 4-5 (inside process function)
357+
let chunks = fetcher
358+
.fetch_context_for_file(&relative, &[(4, 5)])
359+
.await
360+
.unwrap();
361+
362+
assert!(!chunks.is_empty());
363+
// Should expand to include the function signature (line 3)
364+
let chunk = &chunks[0];
365+
assert!(chunk.content.contains("pub fn process"));
366+
}
333367
}
334368

335369
async fn read_file_lossy(path: &Path) -> Result<String> {

src/core/function_chunker.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,21 @@ fn find_function_end(lines: &[&str], start: usize, language: &str) -> usize {
282282
}
283283
}
284284

285+
/// Given file content and a line number, find the start line of the enclosing
286+
/// function/class boundary. Searches upward from `line` up to `max_search_lines`.
287+
/// Returns the boundary start line, or None if no boundary found.
288+
pub fn find_enclosing_boundary_line(
289+
content: &str,
290+
file_path: &Path,
291+
line: usize,
292+
_max_search_lines: usize,
293+
) -> Option<usize> {
294+
let language = detect_language(file_path);
295+
let boundaries = detect_function_boundaries(content, &language);
296+
// Find the innermost function containing this line
297+
find_enclosing_function(&boundaries, line).map(|b| b.start_line)
298+
}
299+
285300
fn detect_language(file_path: &Path) -> String {
286301
file_path
287302
.extension()
@@ -851,6 +866,51 @@ fn next_func() {
851866
assert!(result.is_some());
852867
}
853868

869+
#[test]
870+
fn test_find_enclosing_boundary_line_rust() {
871+
let content =
872+
"use std::io;\n\npub fn process(x: i32) -> bool {\n let y = x + 1;\n y > 0\n}\n";
873+
let path = Path::new("test.rs");
874+
// Line 4 is inside process() which starts at line 3
875+
let result = find_enclosing_boundary_line(content, path, 4, 10);
876+
assert_eq!(result, Some(3));
877+
}
878+
879+
#[test]
880+
fn test_find_enclosing_boundary_line_no_function() {
881+
let content = "let x = 1;\nlet y = 2;\n";
882+
let path = Path::new("test.rs");
883+
let result = find_enclosing_boundary_line(content, path, 1, 10);
884+
assert!(result.is_none());
885+
}
886+
887+
#[test]
888+
fn test_find_enclosing_boundary_line_python() {
889+
let content =
890+
"import os\n\ndef handle_request(req):\n data = req.json()\n return data\n";
891+
let path = Path::new("handler.py");
892+
// Line 4 is inside handle_request() which starts at line 3
893+
let result = find_enclosing_boundary_line(content, path, 4, 10);
894+
assert_eq!(result, Some(3));
895+
}
896+
897+
#[test]
898+
fn test_find_enclosing_boundary_line_unsupported_language() {
899+
let content = "some text\nmore text\n";
900+
let path = Path::new("readme.txt");
901+
let result = find_enclosing_boundary_line(content, path, 1, 10);
902+
assert!(result.is_none());
903+
}
904+
905+
#[test]
906+
fn test_find_enclosing_boundary_nested_functions() {
907+
let content = "pub fn outer() {\n fn inner() {\n let x = 1;\n }\n}\n";
908+
let path = Path::new("test.rs");
909+
// Line 3 is inside inner() which starts at line 2
910+
let result = find_enclosing_boundary_line(content, path, 3, 10);
911+
assert_eq!(result, Some(2));
912+
}
913+
854914
#[test]
855915
fn test_find_function_end_escaped_backslash_in_string() {
856916
// A string containing "\\" (escaped backslash) on the same line as braces.

0 commit comments

Comments
 (0)