Skip to content

Commit 769463e

Browse files
committed
Improve diff parsing and output formatting
1 parent d864510 commit 769463e

File tree

2 files changed

+134
-13
lines changed

2 files changed

+134
-13
lines changed

src/core/diff_parser.rs

Lines changed: 86 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ impl DiffParser {
4949
if lines[i].starts_with("diff --git") {
5050
let diff = Self::parse_single_file_diff(&lines, &mut i)?;
5151
diffs.push(diff);
52+
} else if lines[i].starts_with("--- ")
53+
&& i + 1 < lines.len()
54+
&& lines[i + 1].starts_with("+++ ")
55+
{
56+
let diff = Self::parse_simple_file_diff(&lines, &mut i)?;
57+
diffs.push(diff);
5258
} else {
5359
i += 1;
5460
}
@@ -173,7 +179,11 @@ impl DiffParser {
173179
let file_path = Self::extract_file_path(file_line)?;
174180
*i += 1;
175181

182+
let mut is_binary = false;
176183
while *i < lines.len() && !lines[*i].starts_with("@@") && !lines[*i].starts_with("diff --git") {
184+
if lines[*i].starts_with("Binary files") || lines[*i].starts_with("GIT binary patch") {
185+
is_binary = true;
186+
}
177187
*i += 1;
178188
}
179189

@@ -189,7 +199,51 @@ impl DiffParser {
189199
old_content: None,
190200
new_content: None,
191201
hunks,
192-
is_binary: false,
202+
is_binary,
203+
})
204+
}
205+
206+
fn parse_simple_file_diff(lines: &[&str], i: &mut usize) -> Result<UnifiedDiff> {
207+
let old_line = lines[*i];
208+
let new_line = lines.get(*i + 1).unwrap_or(&"");
209+
210+
let old_path = Self::extract_path_from_header(old_line, "--- ")?;
211+
let new_path = Self::extract_path_from_header(new_line, "+++ ")?;
212+
213+
let file_path = if new_path != "/dev/null" {
214+
new_path
215+
} else {
216+
old_path
217+
};
218+
219+
*i += 2;
220+
221+
let mut hunks = Vec::new();
222+
let mut is_binary = false;
223+
224+
while *i < lines.len()
225+
&& !lines[*i].starts_with("diff --git")
226+
&& !(lines[*i].starts_with("--- ")
227+
&& *i + 1 < lines.len()
228+
&& lines[*i + 1].starts_with("+++ "))
229+
{
230+
if lines[*i].starts_with("Binary files") || lines[*i].starts_with("GIT binary patch") {
231+
is_binary = true;
232+
}
233+
if lines[*i].starts_with("@@") {
234+
let hunk = Self::parse_hunk(lines, i)?;
235+
hunks.push(hunk);
236+
} else {
237+
*i += 1;
238+
}
239+
}
240+
241+
Ok(UnifiedDiff {
242+
file_path: PathBuf::from(file_path),
243+
old_content: None,
244+
new_content: None,
245+
hunks,
246+
is_binary,
193247
})
194248
}
195249

@@ -202,6 +256,15 @@ impl DiffParser {
202256
}
203257
}
204258

259+
fn extract_path_from_header(line: &str, prefix: &str) -> Result<String> {
260+
let raw = line
261+
.strip_prefix(prefix)
262+
.ok_or_else(|| anyhow::anyhow!("Invalid file header: {}", line))?
263+
.trim();
264+
let path = raw.split_whitespace().next().unwrap_or(raw);
265+
Ok(path.trim_start_matches("a/").trim_start_matches("b/").to_string())
266+
}
267+
205268
fn parse_hunk(lines: &[&str], i: &mut usize) -> Result<DiffHunk> {
206269
let header = lines[*i];
207270
let (old_start, old_lines, new_start, new_lines) = Self::parse_hunk_header(header)?;
@@ -211,7 +274,12 @@ impl DiffParser {
211274
let mut old_line = old_start;
212275
let mut new_line = new_start;
213276

214-
while *i < lines.len() && !lines[*i].starts_with("@@") && !lines[*i].starts_with("diff --git") {
277+
while *i < lines.len()
278+
&& !lines[*i].starts_with("@@")
279+
&& !lines[*i].starts_with("diff --git")
280+
&& !lines[*i].starts_with("--- ")
281+
&& !lines[*i].starts_with("+++ ")
282+
{
215283
let line = lines[*i];
216284
if line.is_empty() {
217285
*i += 1;
@@ -302,4 +370,19 @@ mod tests {
302370
assert_eq!(diff.file_path, PathBuf::from("test.txt"));
303371
assert!(!diff.hunks.is_empty());
304372
}
305-
}
373+
374+
#[test]
375+
fn test_parse_unified_diff_without_git_header() {
376+
let diff_text = "\
377+
--- a/foo.txt\n\
378+
+++ b/foo.txt\n\
379+
@@ -1,1 +1,1 @@\n\
380+
-hello\n\
381+
+world\n";
382+
383+
let diffs = DiffParser::parse_unified_diff(diff_text).unwrap();
384+
assert_eq!(diffs.len(), 1);
385+
assert_eq!(diffs[0].file_path, PathBuf::from("foo.txt"));
386+
assert_eq!(diffs[0].hunks.len(), 1);
387+
}
388+
}

src/main.rs

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,10 @@ async fn review_command(
223223
info!("Skipping excluded file: {}", diff.file_path.display());
224224
continue;
225225
}
226+
if diff.is_binary || diff.hunks.is_empty() {
227+
info!("Skipping non-text diff: {}", diff.file_path.display());
228+
continue;
229+
}
226230

227231
let mut context_chunks = context_fetcher.fetch_context_for_file(
228232
&diff.file_path,
@@ -692,6 +696,10 @@ async fn review_diff_content_raw(
692696
info!("Skipping excluded file: {}", diff.file_path.display());
693697
continue;
694698
}
699+
if diff.is_binary || diff.hunks.is_empty() {
700+
info!("Skipping non-text diff: {}", diff.file_path.display());
701+
continue;
702+
}
695703

696704
let mut context_chunks = context_fetcher.fetch_context_for_file(
697705
&diff.file_path,
@@ -785,7 +793,9 @@ async fn review_diff_content_raw(
785793

786794
fn parse_llm_response(content: &str, file_path: &PathBuf) -> Result<Vec<core::comment::RawComment>> {
787795
let mut comments = Vec::new();
788-
let line_pattern = regex::Regex::new(r"(?i)line\s+(\d+):\s*(.+)")?;
796+
static LINE_PATTERN: Lazy<Regex> = Lazy::new(|| {
797+
Regex::new(r"(?i)line\s+(\d+):\s*(.+)").unwrap()
798+
});
789799

790800
for line in content.lines() {
791801
let trimmed = line.trim();
@@ -801,7 +811,7 @@ fn parse_llm_response(content: &str, file_path: &PathBuf) -> Result<Vec<core::co
801811
continue;
802812
}
803813

804-
if let Some(caps) = line_pattern.captures(line) {
814+
if let Some(caps) = LINE_PATTERN.captures(line) {
805815
let line_number: usize = caps.get(1).unwrap().as_str().parse()?;
806816
let comment_text = caps.get(2).unwrap().as_str().trim();
807817

@@ -867,6 +877,9 @@ fn format_as_patch(comments: &[core::Comment]) -> String {
867877
comment.severity,
868878
comment.content
869879
));
880+
if let Some(suggestion) = &comment.suggestion {
881+
output.push_str(&format!("# Suggestion: {}\n", suggestion));
882+
}
870883
}
871884
output
872885
}
@@ -886,8 +899,13 @@ fn format_as_markdown(comments: &[core::Comment]) -> String {
886899

887900
// Severity breakdown
888901
output.push_str("### Issues by Severity\n\n");
889-
for (severity, count) in &summary.by_severity {
890-
let emoji = match severity.as_str() {
902+
let severity_order = ["Error", "Warning", "Info", "Suggestion"];
903+
for severity in severity_order {
904+
let count = summary.by_severity.get(severity).copied().unwrap_or(0);
905+
if count == 0 {
906+
continue;
907+
}
908+
let emoji = match severity {
891909
"Error" => "🔴",
892910
"Warning" => "🟡",
893911
"Info" => "🔵",
@@ -900,8 +918,23 @@ fn format_as_markdown(comments: &[core::Comment]) -> String {
900918

901919
// Category breakdown
902920
output.push_str("### Issues by Category\n\n");
903-
for (category, count) in &summary.by_category {
904-
let emoji = match category.as_str() {
921+
let category_order = [
922+
"Security",
923+
"Performance",
924+
"Bug",
925+
"Maintainability",
926+
"Testing",
927+
"Style",
928+
"Documentation",
929+
"Architecture",
930+
"BestPractice",
931+
];
932+
for category in category_order {
933+
let count = summary.by_category.get(category).copied().unwrap_or(0);
934+
if count == 0 {
935+
continue;
936+
}
937+
let emoji = match category {
905938
"Security" => "🔒",
906939
"Performance" => "⚡",
907940
"Bug" => "🐛",
@@ -1036,6 +1069,10 @@ async fn smart_review_command(
10361069
info!("Skipping excluded file: {}", diff.file_path.display());
10371070
continue;
10381071
}
1072+
if diff.is_binary || diff.hunks.is_empty() {
1073+
info!("Skipping non-text diff: {}", diff.file_path.display());
1074+
continue;
1075+
}
10391076

10401077
let mut context_chunks = context_fetcher.fetch_context_for_file(
10411078
&diff.file_path,
@@ -1443,15 +1480,16 @@ fn format_detailed_comment(comment: &core::Comment) -> String {
14431480
comment.category
14441481
));
14451482

1446-
output.push_str(&format!("**Confidence:** {:.0}% | ", comment.confidence * 100.0));
1447-
if !comment.tags.is_empty() {
1448-
output.push_str("**Tags:** ");
1483+
if comment.tags.is_empty() {
1484+
output.push_str(&format!("**Confidence:** {:.0}%\n\n", comment.confidence * 100.0));
1485+
} else {
1486+
output.push_str(&format!("**Confidence:** {:.0}% | **Tags:** ", comment.confidence * 100.0));
14491487
for (i, tag) in comment.tags.iter().enumerate() {
14501488
if i > 0 { output.push_str(", "); }
14511489
output.push_str(&format!("`{}`", tag));
14521490
}
1491+
output.push_str("\n\n");
14531492
}
1454-
output.push_str("\n\n");
14551493

14561494
output.push_str(&format!("{}\n\n", comment.content));
14571495

0 commit comments

Comments
 (0)