Skip to content

Commit 26ae42a

Browse files
haasonsaasclaude
andcommitted
fix: 5 TDD-discovered bugs — error rate, diff parsing, default model, file status
- Error rate now counts only explicit review.failed events, not all non-completed - Empty lines in diffs treated as context instead of being silently skipped - parse_text_diff sets is_new/is_deleted based on empty old/new content - Default model changed from Sonnet to Opus per project conventions - Added 5 new tests covering all discovered issues Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent fcfe977 commit 26ae42a

File tree

4 files changed

+128
-11
lines changed

4 files changed

+128
-11
lines changed

src/config.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,7 @@ impl Config {
10611061
}
10621062

10631063
fn default_model() -> String {
1064-
"claude-sonnet-4-6".to_string()
1064+
"claude-opus-4-6".to_string()
10651065
}
10661066

10671067
fn default_temperature() -> f32 {
@@ -1764,4 +1764,16 @@ temperature: 0.3
17641764
assert!(config.model_embedding.is_none());
17651765
assert!(config.fallback_models.is_empty());
17661766
}
1767+
1768+
#[test]
1769+
fn test_default_model_is_frontier() {
1770+
// Per CLAUDE.md: "Always use frontier models (Opus) for AI-powered features
1771+
// — never default to Sonnet or smaller"
1772+
let model = default_model();
1773+
assert!(
1774+
model.contains("opus"),
1775+
"Default model should be Opus (frontier), got: {}",
1776+
model
1777+
);
1778+
}
17671779
}

src/core/diff_parser.rs

Lines changed: 67 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,8 @@ impl DiffParser {
181181
new_content: Some(new_content.to_string()),
182182
hunks,
183183
is_binary: false,
184-
is_deleted: false,
185-
is_new: false,
184+
is_deleted: new_content.is_empty() && !old_content.is_empty(),
185+
is_new: old_content.is_empty() && !new_content.is_empty(),
186186
})
187187
}
188188

@@ -368,17 +368,16 @@ impl DiffParser {
368368
*i += 1;
369369
continue;
370370
}
371-
if line.is_empty() {
372-
*i += 1;
373-
continue;
374-
}
375-
376-
let (change_type, content) = match line.chars().next() {
371+
let (change_type, content) = if line.is_empty() {
372+
// Some diff tools emit truly empty lines for empty context lines
373+
// (omitting the leading space). Treat as context to keep line numbers in sync.
374+
(ChangeType::Context, "")
375+
} else { match line.chars().next() {
377376
Some('+') => (ChangeType::Added, &line[1..]),
378377
Some('-') => (ChangeType::Removed, &line[1..]),
379378
Some(' ') => (ChangeType::Context, &line[1..]),
380379
_ => (ChangeType::Context, line),
381-
};
380+
}};
382381

383382
let diff_line = match change_type {
384383
ChangeType::Added => {
@@ -540,4 +539,63 @@ index 0000000..f735c20\n\
540539
assert!(diffs[0].is_new);
541540
assert!(!diffs[0].is_deleted);
542541
}
542+
543+
#[test]
544+
fn test_parse_hunk_empty_lines_not_skipped() {
545+
// BUG: empty lines in diff body are skipped, causing line number desync.
546+
// An empty line (no leading space) in some diff tools represents an empty
547+
// context line. The parser should treat it as context, not skip it.
548+
let diff_text = "\
549+
diff --git a/test.txt b/test.txt\n\
550+
index abc..def 100644\n\
551+
--- a/test.txt\n\
552+
+++ b/test.txt\n\
553+
@@ -1,4 +1,4 @@\n\
554+
line1\n\
555+
\n\
556+
-old_line3\n\
557+
+new_line3\n";
558+
// The empty line (between "line1" and "-old_line3") should be treated as
559+
// context line 2. Without it, line numbers after the empty line are wrong.
560+
let diffs = DiffParser::parse_unified_diff(diff_text).unwrap();
561+
assert_eq!(diffs.len(), 1);
562+
let hunk = &diffs[0].hunks[0];
563+
564+
// Find the removed line — it should be on line 3, not line 2
565+
let removed = hunk
566+
.changes
567+
.iter()
568+
.find(|c| c.change_type == ChangeType::Removed)
569+
.expect("Should have a removed line");
570+
assert_eq!(
571+
removed.old_line_no,
572+
Some(3),
573+
"Removed line should be on old line 3 (after the empty context line 2), got {:?}",
574+
removed.old_line_no
575+
);
576+
}
577+
578+
#[test]
579+
fn test_parse_text_diff_sets_is_new_for_new_file() {
580+
// BUG: parse_text_diff always sets is_new=false even when old_content is empty
581+
let diff =
582+
DiffParser::parse_text_diff("", "new content\n", PathBuf::from("new_file.rs"))
583+
.unwrap();
584+
assert!(
585+
diff.is_new,
586+
"parse_text_diff with empty old content should set is_new=true"
587+
);
588+
}
589+
590+
#[test]
591+
fn test_parse_text_diff_sets_is_deleted_for_deleted_file() {
592+
// BUG: parse_text_diff always sets is_deleted=false even when new_content is empty
593+
let diff =
594+
DiffParser::parse_text_diff("old content\n", "", PathBuf::from("deleted_file.rs"))
595+
.unwrap();
596+
assert!(
597+
diff.is_deleted,
598+
"parse_text_diff with empty new content should set is_deleted=true"
599+
);
600+
}
543601
}

src/server/github.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1177,4 +1177,5 @@ mod tests {
11771177

11781178
assert!(verify_webhook_signature(secret, body, &signature).is_ok());
11791179
}
1180+
11801181
}

src/server/storage_json.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,10 @@ impl StorageBackend for JsonStorageBackend {
178178
.iter()
179179
.filter(|e| e.event_type == "review.completed")
180180
.count() as i64;
181-
let failed = total - completed;
181+
let failed = events
182+
.iter()
183+
.filter(|e| e.event_type == "review.failed")
184+
.count() as i64;
182185
let error_rate = if total > 0 {
183186
failed as f64 / total as f64
184187
} else {
@@ -1371,4 +1374,47 @@ mod tests {
13711374
let events = backend.list_events(&EventFilters::default()).await.unwrap();
13721375
assert!(events.is_empty());
13731376
}
1377+
1378+
#[tokio::test]
1379+
async fn test_error_rate_only_counts_explicit_failures() {
1380+
// BUG: `failed = total - completed` misclassifies non-failure events as failures.
1381+
// Events with type "review.started" or "review.timeout" are counted as failures.
1382+
let dir = tempfile::tempdir().unwrap();
1383+
let backend = JsonStorageBackend::new(&dir.path().join("reviews.json"));
1384+
let now = now_ts();
1385+
1386+
// 1 completed, 1 failed, 1 timeout (not a failure per se)
1387+
backend
1388+
.save_review(&make_session_with_event(
1389+
"r1", now, ReviewStatus::Complete, "review.completed", "gpt-4o", "github", 100,
1390+
))
1391+
.await
1392+
.unwrap();
1393+
backend
1394+
.save_review(&make_session_with_event(
1395+
"r2", now, ReviewStatus::Failed, "review.failed", "gpt-4o", "github", 200,
1396+
))
1397+
.await
1398+
.unwrap();
1399+
backend
1400+
.save_review(&make_session_with_event(
1401+
"r3", now, ReviewStatus::Failed, "review.timeout", "gpt-4o", "github", 300,
1402+
))
1403+
.await
1404+
.unwrap();
1405+
1406+
let stats = backend
1407+
.get_event_stats(&EventFilters::default())
1408+
.await
1409+
.unwrap();
1410+
// Only "review.failed" should count as a failure (1 out of 3)
1411+
// BUG: currently reports 2/3 because timeout is also counted as failed
1412+
let expected_error_rate = 1.0 / 3.0;
1413+
assert!(
1414+
(stats.error_rate - expected_error_rate).abs() < 0.01,
1415+
"Error rate should be {:.2} (only explicit failures), got {:.2}",
1416+
expected_error_rate,
1417+
stats.error_rate
1418+
);
1419+
}
13741420
}

0 commit comments

Comments
 (0)