Improve diff parsing and output formatting

haasonsaas · haasonsaas · commit 769463e790e1 · 2025-12-24T23:44:05.000-08:00
diff --git a/src/core/diff_parser.rs b/src/core/diff_parser.rs
@@ -49,6 +49,12 @@ impl DiffParser {
             if lines[i].starts_with("diff --git") {
                 let diff = Self::parse_single_file_diff(&lines, &mut i)?;
                 diffs.push(diff);
+            } else if lines[i].starts_with("--- ")
+                && i + 1 < lines.len()
+                && lines[i + 1].starts_with("+++ ")
+            {
+                let diff = Self::parse_simple_file_diff(&lines, &mut i)?;
+                diffs.push(diff);
             } else {
                 i += 1;
             }
@@ -173,7 +179,11 @@ impl DiffParser {
         let file_path = Self::extract_file_path(file_line)?;
         *i += 1;
 
+        let mut is_binary = false;
         while *i < lines.len() && !lines[*i].starts_with("@@") && !lines[*i].starts_with("diff --git") {
+            if lines[*i].starts_with("Binary files") || lines[*i].starts_with("GIT binary patch") {
+                is_binary = true;
+            }
             *i += 1;
         }
 
@@ -189,7 +199,51 @@ impl DiffParser {
             old_content: None,
             new_content: None,
             hunks,
-            is_binary: false,
+            is_binary,
+        })
+    }
+
+    fn parse_simple_file_diff(lines: &[&str], i: &mut usize) -> Result<UnifiedDiff> {
+        let old_line = lines[*i];
+        let new_line = lines.get(*i + 1).unwrap_or(&"");
+
+        let old_path = Self::extract_path_from_header(old_line, "--- ")?;
+        let new_path = Self::extract_path_from_header(new_line, "+++ ")?;
+
+        let file_path = if new_path != "/dev/null" {
+            new_path
+        } else {
+            old_path
+        };
+
+        *i += 2;
+
+        let mut hunks = Vec::new();
+        let mut is_binary = false;
+
+        while *i < lines.len()
+            && !lines[*i].starts_with("diff --git")
+            && !(lines[*i].starts_with("--- ")
+                && *i + 1 < lines.len()
+                && lines[*i + 1].starts_with("+++ "))
+        {
+            if lines[*i].starts_with("Binary files") || lines[*i].starts_with("GIT binary patch") {
+                is_binary = true;
+            }
+            if lines[*i].starts_with("@@") {
+                let hunk = Self::parse_hunk(lines, i)?;
+                hunks.push(hunk);
+            } else {
+                *i += 1;
+            }
+        }
+
+        Ok(UnifiedDiff {
+            file_path: PathBuf::from(file_path),
+            old_content: None,
+            new_content: None,
+            hunks,
+            is_binary,
         })
     }
 
@@ -202,6 +256,15 @@ impl DiffParser {
         }
     }
 
+    fn extract_path_from_header(line: &str, prefix: &str) -> Result<String> {
+        let raw = line
+            .strip_prefix(prefix)
+            .ok_or_else(|| anyhow::anyhow!("Invalid file header: {}", line))?
+            .trim();
+        let path = raw.split_whitespace().next().unwrap_or(raw);
+        Ok(path.trim_start_matches("a/").trim_start_matches("b/").to_string())
+    }
+
     fn parse_hunk(lines: &[&str], i: &mut usize) -> Result<DiffHunk> {
         let header = lines[*i];
         let (old_start, old_lines, new_start, new_lines) = Self::parse_hunk_header(header)?;
@@ -211,7 +274,12 @@ impl DiffParser {
         let mut old_line = old_start;
         let mut new_line = new_start;
 
-        while *i < lines.len() && !lines[*i].starts_with("@@") && !lines[*i].starts_with("diff --git") {
+        while *i < lines.len()
+            && !lines[*i].starts_with("@@")
+            && !lines[*i].starts_with("diff --git")
+            && !lines[*i].starts_with("--- ")
+            && !lines[*i].starts_with("+++ ")
+        {
             let line = lines[*i];
             if line.is_empty() {
                 *i += 1;
@@ -302,4 +370,19 @@ mod tests {
         assert_eq!(diff.file_path, PathBuf::from("test.txt"));
         assert!(!diff.hunks.is_empty());
     }
-}
+
+    #[test]
+    fn test_parse_unified_diff_without_git_header() {
+        let diff_text = "\
+--- a/foo.txt\n\
++++ b/foo.txt\n\
+@@ -1,1 +1,1 @@\n\
+-hello\n\
++world\n";
+
+        let diffs = DiffParser::parse_unified_diff(diff_text).unwrap();
+        assert_eq!(diffs.len(), 1);
+        assert_eq!(diffs[0].file_path, PathBuf::from("foo.txt"));
+        assert_eq!(diffs[0].hunks.len(), 1);
+    }
+}
diff --git a/src/main.rs b/src/main.rs
@@ -223,6 +223,10 @@ async fn review_command(
             info!("Skipping excluded file: {}", diff.file_path.display());
             continue;
         }
+        if diff.is_binary || diff.hunks.is_empty() {
+            info!("Skipping non-text diff: {}", diff.file_path.display());
+            continue;
+        }
         
         let mut context_chunks = context_fetcher.fetch_context_for_file(
             &diff.file_path,
@@ -692,6 +696,10 @@ async fn review_diff_content_raw(
             info!("Skipping excluded file: {}", diff.file_path.display());
             continue;
         }
+        if diff.is_binary || diff.hunks.is_empty() {
+            info!("Skipping non-text diff: {}", diff.file_path.display());
+            continue;
+        }
 
         let mut context_chunks = context_fetcher.fetch_context_for_file(
             &diff.file_path,
@@ -785,7 +793,9 @@ async fn review_diff_content_raw(
 
 fn parse_llm_response(content: &str, file_path: &PathBuf) -> Result<Vec<core::comment::RawComment>> {
     let mut comments = Vec::new();
-    let line_pattern = regex::Regex::new(r"(?i)line\s+(\d+):\s*(.+)")?;
+    static LINE_PATTERN: Lazy<Regex> = Lazy::new(|| {
+        Regex::new(r"(?i)line\s+(\d+):\s*(.+)").unwrap()
+    });
     
     for line in content.lines() {
         let trimmed = line.trim();
@@ -801,7 +811,7 @@ fn parse_llm_response(content: &str, file_path: &PathBuf) -> Result<Vec<core::co
             continue;
         }
         
-        if let Some(caps) = line_pattern.captures(line) {
+        if let Some(caps) = LINE_PATTERN.captures(line) {
             let line_number: usize = caps.get(1).unwrap().as_str().parse()?;
             let comment_text = caps.get(2).unwrap().as_str().trim();
             
@@ -867,6 +877,9 @@ fn format_as_patch(comments: &[core::Comment]) -> String {
             comment.severity,
             comment.content
         ));
+        if let Some(suggestion) = &comment.suggestion {
+            output.push_str(&format!("# Suggestion: {}\n", suggestion));
+        }
     }
     output
 }
@@ -886,8 +899,13 @@ fn format_as_markdown(comments: &[core::Comment]) -> String {
     
     // Severity breakdown
     output.push_str("### Issues by Severity\n\n");
-    for (severity, count) in &summary.by_severity {
-        let emoji = match severity.as_str() {
+    let severity_order = ["Error", "Warning", "Info", "Suggestion"];
+    for severity in severity_order {
+        let count = summary.by_severity.get(severity).copied().unwrap_or(0);
+        if count == 0 {
+            continue;
+        }
+        let emoji = match severity {
             "Error" => "🔴",
             "Warning" => "🟡", 
             "Info" => "🔵",
@@ -900,8 +918,23 @@ fn format_as_markdown(comments: &[core::Comment]) -> String {
     
     // Category breakdown  
     output.push_str("### Issues by Category\n\n");
-    for (category, count) in &summary.by_category {
-        let emoji = match category.as_str() {
+    let category_order = [
+        "Security",
+        "Performance",
+        "Bug",
+        "Maintainability",
+        "Testing",
+        "Style",
+        "Documentation",
+        "Architecture",
+        "BestPractice",
+    ];
+    for category in category_order {
+        let count = summary.by_category.get(category).copied().unwrap_or(0);
+        if count == 0 {
+            continue;
+        }
+        let emoji = match category {
             "Security" => "🔒",
             "Performance" => "⚡",
             "Bug" => "🐛",
@@ -1036,6 +1069,10 @@ async fn smart_review_command(
             info!("Skipping excluded file: {}", diff.file_path.display());
             continue;
         }
+        if diff.is_binary || diff.hunks.is_empty() {
+            info!("Skipping non-text diff: {}", diff.file_path.display());
+            continue;
+        }
         
         let mut context_chunks = context_fetcher.fetch_context_for_file(
             &diff.file_path,
@@ -1443,15 +1480,16 @@ fn format_detailed_comment(comment: &core::Comment) -> String {
         comment.category
     ));
     
-    output.push_str(&format!("**Confidence:** {:.0}% | ", comment.confidence * 100.0));
-    if !comment.tags.is_empty() {
-        output.push_str("**Tags:** ");
+    if comment.tags.is_empty() {
+        output.push_str(&format!("**Confidence:** {:.0}%\n\n", comment.confidence * 100.0));
+    } else {
+        output.push_str(&format!("**Confidence:** {:.0}% | **Tags:** ", comment.confidence * 100.0));
         for (i, tag) in comment.tags.iter().enumerate() {
             if i > 0 { output.push_str(", "); }
             output.push_str(&format!("`{}`", tag));
         }
+        output.push_str("\n\n");
     }
-    output.push_str("\n\n");
     
     output.push_str(&format!("{}\n\n", comment.content));