Skip to content

Commit 7e18de2

Browse files
haasonsaasclaude
andcommitted
fix: 2 TDD bugs — JSON array bracket mismatch, hunk false file-header
- llm_response: replace find/rfind bracket approach with depth-counting bracket matcher that correctly handles multiple separate JSON arrays and validates content even when it starts with '[' - diff_parser: defer is_file_header check until expected hunk lines are consumed, preventing false positive when removed/added lines start with "--"/"++ " (e.g. SQL comments) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 5286d2e commit 7e18de2

File tree

2 files changed

+124
-16
lines changed

2 files changed

+124
-16
lines changed

src/core/diff_parser.rs

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,8 +368,15 @@ impl DiffParser {
368368
while *i < lines.len()
369369
&& !lines[*i].starts_with("@@")
370370
&& !lines[*i].starts_with("diff --git")
371-
&& !is_file_header(lines, *i)
372371
{
372+
// Only check for file headers once we've consumed all expected
373+
// hunk lines. Inside the hunk, "--- " / "+++ " lines are just
374+
// removals/additions whose content happens to start with "-- "/"++ ".
375+
let consumed_old = old_line.saturating_sub(old_start);
376+
let consumed_new = new_line.saturating_sub(new_start);
377+
if consumed_old >= old_lines && consumed_new >= new_lines && is_file_header(lines, *i) {
378+
break;
379+
}
373380
let line = lines[*i];
374381
if line.starts_with("\\ No newline at end of file") {
375382
*i += 1;
@@ -661,6 +668,48 @@ diff --git a/test.txt b/test.txt
661668
);
662669
}
663670

671+
// ── Bug: is_file_header false positive terminates hunk early ────────
672+
//
673+
// When a removed line starts with "-- " (raw "--- ") AND the next line
674+
// is an addition starting with "++ " (raw "+++ "), the is_file_header
675+
// closure returns true and the hunk loop exits prematurely, losing
676+
// the remaining changes.
677+
//
678+
// The fix is to track consumed old/new line counts and only check
679+
// is_file_header after the expected hunk lines have been consumed.
680+
681+
#[test]
682+
fn test_parse_hunk_false_file_header_from_dashes_and_pluses() {
683+
// Both conditions for is_file_header triggered inside a hunk:
684+
// - removed line whose content is "-- SQL comment" → raw "--- SQL comment"
685+
// - added line whose content is "++ new comment" → raw "+++ new comment"
686+
let diff_text = "\
687+
diff --git a/test.sql b/test.sql
688+
--- a/test.sql
689+
+++ b/test.sql
690+
@@ -1,3 +1,3 @@
691+
first line
692+
--- SQL comment
693+
+++ new SQL comment
694+
third line
695+
";
696+
let diffs = DiffParser::parse_unified_diff(diff_text).unwrap();
697+
assert_eq!(diffs.len(), 1);
698+
let hunk = &diffs[0].hunks[0];
699+
// Should have 4 changes: context, removed, added, context
700+
assert_eq!(
701+
hunk.changes.len(),
702+
4,
703+
"Hunk should contain all 4 lines (context, removed, added, context), \
704+
not stop at the false file header. Got {} changes: {:?}",
705+
hunk.changes.len(),
706+
hunk.changes
707+
.iter()
708+
.map(|c| format!("{:?}: {}", c.change_type, c.content))
709+
.collect::<Vec<_>>()
710+
);
711+
}
712+
664713
#[test]
665714
fn test_extract_file_path_file_in_a_subdirectory() {
666715
// Verify the regex path correctly preserves a/ subdirectory

src/parsing/llm_response.rs

Lines changed: 74 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -287,22 +287,43 @@ fn extract_json_from_code_block(content: &str) -> Option<String> {
287287
}
288288

289289
/// Find a bare JSON array in the content (not in a code block).
290+
///
291+
/// Uses bracket-depth counting to find the matching `]` for each `[`,
292+
/// then validates with serde. This correctly handles multiple separate
293+
/// arrays and nested brackets inside JSON strings.
290294
fn find_json_array(content: &str) -> Option<String> {
291-
// Find the first '[' and try to parse from there
292-
let trimmed = content.trim();
293-
if trimmed.starts_with('[') {
294-
// The whole content might be a JSON array
295-
return Some(trimmed.to_string());
296-
}
297-
298-
// Look for a JSON array somewhere in the content
299-
if let Some(start) = content.find('[') {
300-
if let Some(end) = content.rfind(']') {
301-
if end > start {
302-
let candidate = &content[start..=end];
303-
// Quick validation: try to parse it
304-
if serde_json::from_str::<Vec<serde_json::Value>>(candidate).is_ok() {
305-
return Some(candidate.to_string());
295+
// Try each '[' as a potential array start
296+
for (start, _) in content.char_indices().filter(|&(_, ch)| ch == '[') {
297+
let mut depth = 0i32;
298+
let mut in_string = false;
299+
let mut escape_next = false;
300+
301+
for (offset, ch) in content[start..].char_indices() {
302+
if escape_next {
303+
escape_next = false;
304+
continue;
305+
}
306+
if ch == '\\' && in_string {
307+
escape_next = true;
308+
continue;
309+
}
310+
if ch == '"' {
311+
in_string = !in_string;
312+
continue;
313+
}
314+
if !in_string {
315+
if ch == '[' {
316+
depth += 1;
317+
} else if ch == ']' {
318+
depth -= 1;
319+
if depth == 0 {
320+
let end = start + offset;
321+
let candidate = &content[start..=end];
322+
if serde_json::from_str::<Vec<serde_json::Value>>(candidate).is_ok() {
323+
return Some(candidate.to_string());
324+
}
325+
break; // this '[' didn't lead to valid JSON, try next
326+
}
306327
}
307328
}
308329
}
@@ -877,6 +898,44 @@ let data = &input;
877898
assert_eq!(comments[0].line_number, 5);
878899
}
879900

901+
// ── Bug: find_json_array uses mismatched brackets ──────────────────
902+
//
903+
// `find_json_array` uses `find('[')` (first) + `rfind(']')` (last).
904+
// When two separate JSON arrays appear in the text, this grabs from
905+
// the first `[` to the last `]`, including non-JSON text between them.
906+
// The serde validation rejects the invalid combined string, causing
907+
// BOTH arrays to be silently lost.
908+
909+
#[test]
910+
fn find_json_array_two_separate_arrays() {
911+
// Two valid JSON arrays separated by text — should extract the first one
912+
let input =
913+
"First: [{\"line\": 1, \"issue\": \"bug1\"}] and second: [{\"line\": 2, \"issue\": \"bug2\"}]";
914+
let result = find_json_array(input);
915+
assert!(
916+
result.is_some(),
917+
"Should find at least the first valid JSON array, not fail on mismatched brackets"
918+
);
919+
let json_str = result.unwrap();
920+
let parsed: Vec<serde_json::Value> = serde_json::from_str(&json_str).unwrap();
921+
assert_eq!(parsed.len(), 1);
922+
}
923+
924+
#[test]
925+
fn find_json_array_validates_when_content_starts_with_bracket() {
926+
// Content starts with '[' but isn't valid JSON — should try to find
927+
// a valid array elsewhere, not return the invalid trimmed content
928+
let input = "[not json] here is the real one: [{\"line\": 5, \"issue\": \"Bug\"}]";
929+
let result = find_json_array(input);
930+
assert!(
931+
result.is_some(),
932+
"Should find the valid JSON array even when content starts with '['",
933+
);
934+
let parsed: Vec<serde_json::Value> =
935+
serde_json::from_str(&result.unwrap()).expect("Should be valid JSON");
936+
assert_eq!(parsed.len(), 1);
937+
}
938+
880939
#[test]
881940
fn parse_file_line_format_does_not_match_urls() {
882941
// URLs with port numbers like http://localhost:8080 should not be parsed as file:line

0 commit comments

Comments
 (0)