fix: 2 TDD bugs — JSON array bracket mismatch, hunk false file-header

haasonsaas · claude · haasonsaas · commit 7e18de2b4c86 · 2026-03-11T15:21:21.000-07:00
- llm_response: replace find/rfind bracket approach with depth-counting
  bracket matcher that correctly handles multiple separate JSON arrays
  and validates content even when it starts with '['
- diff_parser: defer is_file_header check until expected hunk lines are
  consumed, preventing false positive when removed/added lines start
  with "--"/"++ " (e.g. SQL comments)

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/src/core/diff_parser.rs b/src/core/diff_parser.rs
@@ -368,8 +368,15 @@ impl DiffParser {
         while *i < lines.len()
             && !lines[*i].starts_with("@@")
             && !lines[*i].starts_with("diff --git")
-            && !is_file_header(lines, *i)
         {
+            // Only check for file headers once we've consumed all expected
+            // hunk lines. Inside the hunk, "--- " / "+++ " lines are just
+            // removals/additions whose content happens to start with "-- "/"++ ".
+            let consumed_old = old_line.saturating_sub(old_start);
+            let consumed_new = new_line.saturating_sub(new_start);
+            if consumed_old >= old_lines && consumed_new >= new_lines && is_file_header(lines, *i) {
+                break;
+            }
             let line = lines[*i];
             if line.starts_with("\\ No newline at end of file") {
                 *i += 1;
@@ -661,6 +668,48 @@ diff --git a/test.txt b/test.txt
         );
     }
 
+    // ── Bug: is_file_header false positive terminates hunk early ────────
+    //
+    // When a removed line starts with "-- " (raw "--- ") AND the next line
+    // is an addition starting with "++ " (raw "+++ "), the is_file_header
+    // closure returns true and the hunk loop exits prematurely, losing
+    // the remaining changes.
+    //
+    // The fix is to track consumed old/new line counts and only check
+    // is_file_header after the expected hunk lines have been consumed.
+
+    #[test]
+    fn test_parse_hunk_false_file_header_from_dashes_and_pluses() {
+        // Both conditions for is_file_header triggered inside a hunk:
+        //   - removed line whose content is "-- SQL comment" → raw "--- SQL comment"
+        //   - added line whose content is "++ new comment"  → raw "+++ new comment"
+        let diff_text = "\
+diff --git a/test.sql b/test.sql
+--- a/test.sql
++++ b/test.sql
+@@ -1,3 +1,3 @@
+ first line
+--- SQL comment
++++ new SQL comment
+ third line
+";
+        let diffs = DiffParser::parse_unified_diff(diff_text).unwrap();
+        assert_eq!(diffs.len(), 1);
+        let hunk = &diffs[0].hunks[0];
+        // Should have 4 changes: context, removed, added, context
+        assert_eq!(
+            hunk.changes.len(),
+            4,
+            "Hunk should contain all 4 lines (context, removed, added, context), \
+             not stop at the false file header. Got {} changes: {:?}",
+            hunk.changes.len(),
+            hunk.changes
+                .iter()
+                .map(|c| format!("{:?}: {}", c.change_type, c.content))
+                .collect::<Vec<_>>()
+        );
+    }
+
     #[test]
     fn test_extract_file_path_file_in_a_subdirectory() {
         // Verify the regex path correctly preserves a/ subdirectory
diff --git a/src/parsing/llm_response.rs b/src/parsing/llm_response.rs
@@ -287,22 +287,43 @@ fn extract_json_from_code_block(content: &str) -> Option<String> {
 }
 
 /// Find a bare JSON array in the content (not in a code block).
+///
+/// Uses bracket-depth counting to find the matching `]` for each `[`,
+/// then validates with serde. This correctly handles multiple separate
+/// arrays and nested brackets inside JSON strings.
 fn find_json_array(content: &str) -> Option<String> {
-    // Find the first '[' and try to parse from there
-    let trimmed = content.trim();
-    if trimmed.starts_with('[') {
-        // The whole content might be a JSON array
-        return Some(trimmed.to_string());
-    }
-
-    // Look for a JSON array somewhere in the content
-    if let Some(start) = content.find('[') {
-        if let Some(end) = content.rfind(']') {
-            if end > start {
-                let candidate = &content[start..=end];
-                // Quick validation: try to parse it
-                if serde_json::from_str::<Vec<serde_json::Value>>(candidate).is_ok() {
-                    return Some(candidate.to_string());
+    // Try each '[' as a potential array start
+    for (start, _) in content.char_indices().filter(|&(_, ch)| ch == '[') {
+        let mut depth = 0i32;
+        let mut in_string = false;
+        let mut escape_next = false;
+
+        for (offset, ch) in content[start..].char_indices() {
+            if escape_next {
+                escape_next = false;
+                continue;
+            }
+            if ch == '\\' && in_string {
+                escape_next = true;
+                continue;
+            }
+            if ch == '"' {
+                in_string = !in_string;
+                continue;
+            }
+            if !in_string {
+                if ch == '[' {
+                    depth += 1;
+                } else if ch == ']' {
+                    depth -= 1;
+                    if depth == 0 {
+                        let end = start + offset;
+                        let candidate = &content[start..=end];
+                        if serde_json::from_str::<Vec<serde_json::Value>>(candidate).is_ok() {
+                            return Some(candidate.to_string());
+                        }
+                        break; // this '[' didn't lead to valid JSON, try next
+                    }
                 }
             }
         }
@@ -877,6 +898,44 @@ let data = &input;
         assert_eq!(comments[0].line_number, 5);
     }
 
+    // ── Bug: find_json_array uses mismatched brackets ──────────────────
+    //
+    // `find_json_array` uses `find('[')` (first) + `rfind(']')` (last).
+    // When two separate JSON arrays appear in the text, this grabs from
+    // the first `[` to the last `]`, including non-JSON text between them.
+    // The serde validation rejects the invalid combined string, causing
+    // BOTH arrays to be silently lost.
+
+    #[test]
+    fn find_json_array_two_separate_arrays() {
+        // Two valid JSON arrays separated by text — should extract the first one
+        let input =
+            "First: [{\"line\": 1, \"issue\": \"bug1\"}] and second: [{\"line\": 2, \"issue\": \"bug2\"}]";
+        let result = find_json_array(input);
+        assert!(
+            result.is_some(),
+            "Should find at least the first valid JSON array, not fail on mismatched brackets"
+        );
+        let json_str = result.unwrap();
+        let parsed: Vec<serde_json::Value> = serde_json::from_str(&json_str).unwrap();
+        assert_eq!(parsed.len(), 1);
+    }
+
+    #[test]
+    fn find_json_array_validates_when_content_starts_with_bracket() {
+        // Content starts with '[' but isn't valid JSON — should try to find
+        // a valid array elsewhere, not return the invalid trimmed content
+        let input = "[not json] here is the real one: [{\"line\": 5, \"issue\": \"Bug\"}]";
+        let result = find_json_array(input);
+        assert!(
+            result.is_some(),
+            "Should find the valid JSON array even when content starts with '['",
+        );
+        let parsed: Vec<serde_json::Value> =
+            serde_json::from_str(&result.unwrap()).expect("Should be valid JSON");
+        assert_eq!(parsed.len(), 1);
+    }
+
     #[test]
     fn parse_file_line_format_does_not_match_urls() {
         // URLs with port numbers like http://localhost:8080 should not be parsed as file:line