Skip to content

Commit d4b8b30

Browse files
committed
fix: address review pipeline false skips and UTF-8 safety
Prevent compression analysis from skipping files in per-file review mode, make verification diff truncation UTF-8 safe, and tighten comment-only triage to avoid dereference false positives while improving compression packing order. Made-with: Cursor
1 parent 2158b46 commit d4b8b30

File tree

4 files changed

+42
-22
lines changed

4 files changed

+42
-22
lines changed

src/review/compression.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,14 +205,14 @@ pub fn compress_diffs(
205205
}
206206

207207
// Stage 2: Compressed — remove deletion-only hunks, drop tail files
208-
// Sort by token count (largest first) for greedy packing
209-
file_tokens.sort_by(|a, b| b.1.cmp(&a.1));
208+
// Sort by token count (smallest first) to maximize files reviewed.
209+
file_tokens.sort_by(|a, b| a.1.cmp(&b.1));
210210

211211
let mut included = Vec::new();
212212
let mut skipped = Vec::new();
213213
let mut budget_used = 0;
214214

215-
for &(idx, tokens) in &file_tokens {
215+
for &(idx, _tokens) in &file_tokens {
216216
// Re-estimate with compression
217217
let compressed_tokens = if let Some(compressed) = compress_diff(&diffs[idx]) {
218218
estimate_diff_tokens(&compressed)

src/review/pipeline.rs

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,11 @@ async fn review_diff_content_raw_inner(
188188
compression_result.batches.len(),
189189
compression_result.skipped_indices.len(),
190190
);
191-
let compression_skipped: std::collections::HashSet<usize> =
192-
compression_result.skipped_indices.iter().copied().collect();
193191
if !compression_result.skipped_summary.is_empty() {
194-
info!("Compression skipped files:\n{}", compression_result.skipped_summary);
192+
info!(
193+
"Compression analysis skipped files (advisory in per-file mode):\n{}",
194+
compression_result.skipped_summary
195+
);
195196
}
196197

197198
// Gather git history for enhanced context
@@ -304,16 +305,6 @@ async fn review_diff_content_raw_inner(
304305
continue;
305306
}
306307

307-
// Compression: skip files that exceed the token budget
308-
if compression_skipped.contains(&diff_index) {
309-
info!(
310-
"Skipping {} (compression: exceeds token budget)",
311-
diff.file_path.display()
312-
);
313-
files_skipped += 1;
314-
continue;
315-
}
316-
317308
// Emit progress: preparing this file
318309
if let Some(ref cb) = on_progress {
319310
cb(ProgressUpdate {

src/review/triage.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ const LOCK_FILES: &[&str] = &[
4242

4343
/// Comment line prefixes (after trimming leading whitespace).
4444
const COMMENT_PREFIXES: &[&str] = &[
45-
"//", "#", "/*", "*/", "*", "--", "<!--", "\"\"\"", "'''",
45+
"//", "#", "/*", "*/", "* ", "--", "<!--", "\"\"\"", "'''",
4646
];
4747

4848
/// Classify a diff using fast heuristics (no LLM call).
@@ -631,6 +631,18 @@ mod tests {
631631
assert_eq!(triage_diff(&diff), TriageResult::NeedsReview);
632632
}
633633

634+
#[test]
635+
fn test_comment_only_does_not_match_pointer_dereference_code() {
636+
let diff = make_diff(
637+
"src/lib.rs",
638+
vec![
639+
make_line(1, ChangeType::Added, "*ptr = compute();"),
640+
make_line(2, ChangeType::Added, "*buffer = data;"),
641+
],
642+
);
643+
assert_eq!(triage_diff(&diff), TriageResult::NeedsReview);
644+
}
645+
634646
// ── Edge cases ───────────────────────────────────────────────────────
635647

636648
#[test]

src/review/verification.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,7 @@ pub fn is_auto_zero(content: &str) -> bool {
116116
fn build_verification_prompt(comments: &[Comment], diff_content: &str) -> String {
117117
let mut prompt = String::from("## Code Diff\n```\n");
118118
// Include a truncated version of the diff
119-
let truncated_diff = if diff_content.len() > 8000 {
120-
&diff_content[..8000]
121-
} else {
122-
diff_content
123-
};
119+
let truncated_diff = safe_utf8_prefix(diff_content, 8000);
124120
prompt.push_str(truncated_diff);
125121
prompt.push_str("\n```\n\n## Findings to Verify\n\n");
126122

@@ -142,6 +138,18 @@ fn build_verification_prompt(comments: &[Comment], diff_content: &str) -> String
142138
prompt
143139
}
144140

141+
fn safe_utf8_prefix(content: &str, max_bytes: usize) -> &str {
142+
if content.len() <= max_bytes {
143+
return content;
144+
}
145+
146+
let mut end = max_bytes;
147+
while end > 0 && !content.is_char_boundary(end) {
148+
end -= 1;
149+
}
150+
&content[..end]
151+
}
152+
145153
fn parse_verification_response(content: &str, comments: &[Comment]) -> Vec<VerificationResult> {
146154
static FINDING_PATTERN: Lazy<Regex> = Lazy::new(|| {
147155
Regex::new(r"(?i)FINDING\s+(\d+)\s*:\s*score\s*=\s*(\d+)\s+accurate\s*=\s*(true|false)\s+reason\s*=\s*(.+)")
@@ -261,6 +269,15 @@ mod tests {
261269
assert!(prompt.len() < long_diff.len() + 1000); // truncated
262270
}
263271

272+
#[test]
273+
fn test_build_verification_prompt_utf8_safe_truncation() {
274+
// 3000 emojis => 12k bytes, and byte 8000 is not a char boundary.
275+
let long_diff = "😀".repeat(3000);
276+
let comments = vec![make_comment("c1", "issue", 10)];
277+
let prompt = build_verification_prompt(&comments, &long_diff);
278+
assert!(prompt.contains("## Code Diff"));
279+
}
280+
264281
#[test]
265282
fn test_build_verification_prompt_includes_suggestion() {
266283
let mut comment = make_comment("c1", "Use parameterized queries", 10);

0 commit comments

Comments
 (0)