Skip to content

Commit d36c276

Browse files
haasonsaasclaude
andcommitted
fix: 7 TDD-discovered bugs — path stripping, hunk parsing, storage, endpoint detection
- extract_path_from_header: use strip_prefix instead of trim_start_matches to avoid stripping nested a/ and b/ directory prefixes - extract_file_path fallback: same strip_prefix fix for the regex fallback - parse_hunk: only treat "--- " as file boundary when followed by "+++ ", fixing premature hunk termination on removed lines starting with "-- " - list_events: add missing github_repo filter (JSON backend was ignoring it) - prune: flush to disk after removing reviews (was only removing in memory) - get_event_stats: strip limit/offset before computing stats to avoid computing aggregates on a paginated subset - Config::is_local_endpoint: delegate to common::is_local_endpoint instead of naive string matching that treated all non-OpenAI/Anthropic URLs as local - is_known_public_api: add OpenRouter, Together, Groq, Fireworks, DeepInfra, Perplexity to the known cloud provider list - has_excessive_repetition: fix empty loop range at boundary text lengths Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9d9bee8 commit d36c276

File tree

5 files changed

+290
-44
lines changed

5 files changed

+290
-44
lines changed

src/adapters/common.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,12 @@ fn is_known_public_api(host: &str) -> bool {
151151
|| host.contains("mistral.ai")
152152
|| host.contains("cohere.ai")
153153
|| host.contains("cohere.com")
154+
|| host.contains("openrouter.ai")
155+
|| host.contains("together.ai")
156+
|| host.contains("groq.com")
157+
|| host.contains("fireworks.ai")
158+
|| host.contains("deepinfra.com")
159+
|| host.contains("perplexity.ai")
154160
}
155161

156162
#[cfg(test)]

src/config.rs

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,13 +1034,7 @@ impl Config {
10341034
/// Returns true if the configured base_url points to a local/self-hosted server.
10351035
pub fn is_local_endpoint(&self) -> bool {
10361036
match self.base_url.as_deref() {
1037-
Some(url) => {
1038-
url.contains("localhost")
1039-
|| url.contains("127.0.0.1")
1040-
|| url.contains("0.0.0.0")
1041-
|| url.contains("[::1]")
1042-
|| (!url.contains("openai.com") && !url.contains("anthropic.com"))
1043-
}
1037+
Some(url) => crate::adapters::common::is_local_endpoint(url),
10441038
None => false,
10451039
}
10461040
}
@@ -1776,4 +1770,30 @@ temperature: 0.3
17761770
model
17771771
);
17781772
}
1773+
1774+
#[test]
1775+
fn test_is_local_endpoint_openrouter_not_local() {
1776+
// BUG: Config::is_local_endpoint returns true for any URL that doesn't
1777+
// contain "openai.com" or "anthropic.com", including cloud providers
1778+
let config = Config {
1779+
base_url: Some("https://openrouter.ai/api/v1".to_string()),
1780+
..Default::default()
1781+
};
1782+
assert!(
1783+
!config.is_local_endpoint(),
1784+
"OpenRouter is a cloud provider, not a local endpoint"
1785+
);
1786+
}
1787+
1788+
#[test]
1789+
fn test_is_local_endpoint_azure_not_local() {
1790+
let config = Config {
1791+
base_url: Some("https://myinstance.openai.azure.com/v1".to_string()),
1792+
..Default::default()
1793+
};
1794+
assert!(
1795+
!config.is_local_endpoint(),
1796+
"Azure OpenAI is a cloud provider, not a local endpoint"
1797+
);
1798+
}
17791799
}

src/core/diff_parser.rs

Lines changed: 78 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -315,8 +315,8 @@ impl DiffParser {
315315

316316
let parts: Vec<&str> = line.split_whitespace().collect();
317317
if parts.len() >= 4 {
318-
let a_path = parts[2].trim_start_matches("a/");
319-
let b_path = parts[3].trim_start_matches("b/");
318+
let a_path = parts[2].strip_prefix("a/").unwrap_or(parts[2]);
319+
let b_path = parts[3].strip_prefix("b/").unwrap_or(parts[3]);
320320
let chosen = if b_path != "/dev/null" {
321321
b_path
322322
} else {
@@ -342,10 +342,11 @@ impl DiffParser {
342342
} else {
343343
raw.split_whitespace().next().unwrap_or(raw)
344344
};
345-
Ok(path
346-
.trim_start_matches("a/")
347-
.trim_start_matches("b/")
348-
.to_string())
345+
let path = path
346+
.strip_prefix("a/")
347+
.or_else(|| path.strip_prefix("b/"))
348+
.unwrap_or(path);
349+
Ok(path.to_string())
349350
}
350351

351352
fn parse_hunk(lines: &[&str], i: &mut usize) -> Result<DiffHunk> {
@@ -357,11 +358,17 @@ impl DiffParser {
357358
let mut old_line = old_start;
358359
let mut new_line = new_start;
359360

361+
let is_file_header = |lines: &[&str], idx: usize| -> bool {
362+
lines[idx].starts_with("--- ")
363+
&& lines
364+
.get(idx + 1)
365+
.is_some_and(|next| next.starts_with("+++ "))
366+
};
367+
360368
while *i < lines.len()
361369
&& !lines[*i].starts_with("@@")
362370
&& !lines[*i].starts_with("diff --git")
363-
&& !lines[*i].starts_with("--- ")
364-
&& !lines[*i].starts_with("+++ ")
371+
&& !is_file_header(lines, *i)
365372
{
366373
let line = lines[*i];
367374
if line.starts_with("\\ No newline at end of file") {
@@ -599,4 +606,67 @@ index abc..def 100644\n\
599606
"parse_text_diff with empty new content should set is_deleted=true"
600607
);
601608
}
609+
610+
#[test]
611+
fn test_extract_path_from_header_file_in_b_directory() {
612+
// BUG: trim_start_matches("a/").trim_start_matches("b/") strips both prefixes
613+
// from files inside a directory named "b", e.g. "a/b/config.yaml" -> "config.yaml"
614+
let path = DiffParser::extract_path_from_header("--- a/b/config.yaml", "--- ").unwrap();
615+
assert_eq!(
616+
path, "b/config.yaml",
617+
"File in b/ directory should keep the b/ prefix"
618+
);
619+
}
620+
621+
#[test]
622+
fn test_extract_path_from_header_file_in_a_directory() {
623+
// Same bug: "b/a/test.txt" -> "test.txt" via double strip
624+
let path = DiffParser::extract_path_from_header("+++ b/a/test.txt", "+++ ").unwrap();
625+
assert_eq!(
626+
path, "a/test.txt",
627+
"File in a/ directory should keep the a/ prefix"
628+
);
629+
}
630+
631+
#[test]
632+
fn test_parse_hunk_removed_line_starting_with_double_dash() {
633+
// BUG: a removed line whose content starts with "-- " produces a diff line
634+
// starting with "--- ", which the parser treats as a file header and stops.
635+
let diff_text = "\
636+
diff --git a/test.txt b/test.txt
637+
--- a/test.txt
638+
+++ b/test.txt
639+
@@ -1,3 +1,2 @@
640+
first line
641+
--- this is a separator
642+
third line
643+
";
644+
let diffs = DiffParser::parse_unified_diff(diff_text).unwrap();
645+
assert_eq!(diffs.len(), 1);
646+
let hunk = &diffs[0].hunks[0];
647+
// Should have 3 changes: context, removed, context
648+
assert_eq!(
649+
hunk.changes.len(),
650+
3,
651+
"Hunk should contain all 3 lines, not stop at the '--- ' line. Got {} changes.",
652+
hunk.changes.len()
653+
);
654+
let removed = hunk
655+
.changes
656+
.iter()
657+
.find(|c| c.change_type == ChangeType::Removed);
658+
assert!(
659+
removed.is_some(),
660+
"Should have a removed line with content '-- this is a separator'"
661+
);
662+
}
663+
664+
#[test]
665+
fn test_extract_file_path_file_in_a_subdirectory() {
666+
// Verify the regex path correctly preserves a/ subdirectory
667+
let path =
668+
DiffParser::extract_file_path("diff --git a/a/nested/file.rs b/a/nested/file.rs")
669+
.unwrap();
670+
assert_eq!(path, "a/nested/file.rs");
671+
}
602672
}

src/review/pipeline.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1148,7 +1148,8 @@ fn has_excessive_repetition(text: &str) -> bool {
11481148
return false;
11491149
}
11501150
let window = 20.min(text.len() / 5);
1151-
for start in 0..text.len().saturating_sub(window * 5) {
1151+
let search_end = text.len().saturating_sub(window);
1152+
for start in 0..search_end.max(1) {
11521153
if !text.is_char_boundary(start) || !text.is_char_boundary(start + window) {
11531154
continue;
11541155
}
@@ -1910,4 +1911,24 @@ mod tests {
19101911
let config = config::Config::default();
19111912
assert!(!config.multi_pass_specialized);
19121913
}
1914+
1915+
#[test]
1916+
fn test_has_excessive_repetition_boundary_120_chars() {
1917+
// BUG: when text.len() == 100, window=20, the old loop range was 0..0 (empty).
1918+
// Even after fixing the range, the check is count > 5, so we need 6 repetitions.
1919+
let pattern = "abcdefghij1234567890"; // 20 chars
1920+
let text = pattern.repeat(6); // 120 chars, 6 repetitions
1921+
assert_eq!(text.len(), 120);
1922+
assert!(
1923+
has_excessive_repetition(&text),
1924+
"120-char string with 6x repeated 20-char pattern should be detected"
1925+
);
1926+
}
1927+
1928+
#[test]
1929+
fn test_has_excessive_repetition_short_not_detected() {
1930+
// Strings under 100 chars should always return false
1931+
let text = "abc".repeat(30); // 90 chars
1932+
assert!(!has_excessive_repetition(&text));
1933+
}
19131934
}

0 commit comments

Comments
 (0)