Skip to content

Commit aabcdae

Browse files
committed
Add diff metadata and prompt truncation limits
1 parent 14175d3 commit aabcdae

File tree

7 files changed

+185
-15
lines changed

7 files changed

+185
-15
lines changed

.diffscope.yml.example

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
model: gpt-4o
66
temperature: 0.2
77
max_tokens: 4000
8+
max_context_chars: 20000 # 0 disables context truncation
9+
max_diff_chars: 40000 # 0 disables diff truncation
810

911
# API configuration (optional - can use environment variables)
1012
# api_key: your-api-key-here

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,10 @@ Create a `.diffscope.yml` file in your repository:
190190
model: gpt-4o
191191
temperature: 0.2
192192
max_tokens: 4000
193+
max_context_chars: 20000
194+
max_diff_chars: 40000
195+
max_context_chars: 20000 # 0 disables context truncation
196+
max_diff_chars: 40000 # 0 disables diff truncation
193197
system_prompt: "Focus on security vulnerabilities, performance issues, and best practices"
194198
openai_use_responses: true # Use OpenAI Responses API (recommended) instead of chat completions
195199
@@ -421,8 +425,8 @@ exclude_patterns:
421425
- "**/*.generated.*"
422426

423427
# Review configuration
424-
max_diff_size: 10000
425-
context_lines: 3
428+
max_context_chars: 20000
429+
max_diff_chars: 40000
426430
```
427431
428432
### Integration with Other CI Tools

src/config.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@ pub struct Config {
1414
#[serde(default = "default_max_tokens")]
1515
pub max_tokens: usize,
1616

17+
#[serde(default = "default_max_context_chars")]
18+
pub max_context_chars: usize,
19+
20+
#[serde(default = "default_max_diff_chars")]
21+
pub max_diff_chars: usize,
22+
1723
pub system_prompt: Option<String>,
1824
pub api_key: Option<String>,
1925
pub base_url: Option<String>,
@@ -78,6 +84,8 @@ impl Default for Config {
7884
model: default_model(),
7985
temperature: default_temperature(),
8086
max_tokens: default_max_tokens(),
87+
max_context_chars: default_max_context_chars(),
88+
max_diff_chars: default_max_diff_chars(),
8189
system_prompt: None,
8290
api_key: None,
8391
base_url: None,
@@ -230,6 +238,14 @@ fn default_max_tokens() -> usize {
230238
4000
231239
}
232240

241+
fn default_max_context_chars() -> usize {
242+
20000
243+
}
244+
245+
fn default_max_diff_chars() -> usize {
246+
40000
247+
}
248+
233249
fn default_true() -> bool {
234250
true
235251
}

src/core/diff_parser.rs

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ pub struct UnifiedDiff {
1010
pub new_content: Option<String>,
1111
pub hunks: Vec<DiffHunk>,
1212
pub is_binary: bool,
13+
pub is_deleted: bool,
14+
pub is_new: bool,
1315
}
1416

1517
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -179,6 +181,8 @@ impl DiffParser {
179181
new_content: Some(new_content.to_string()),
180182
hunks,
181183
is_binary: false,
184+
is_deleted: false,
185+
is_new: false,
182186
})
183187
}
184188

@@ -188,13 +192,36 @@ impl DiffParser {
188192
*i += 1;
189193

190194
let mut is_binary = false;
195+
let mut is_deleted = false;
196+
let mut is_new = false;
191197
while *i < lines.len()
192198
&& !lines[*i].starts_with("@@")
193199
&& !lines[*i].starts_with("diff --git")
194200
{
195-
if lines[*i].starts_with("Binary files") || lines[*i].starts_with("GIT binary patch") {
201+
let line = lines[*i];
202+
if line.starts_with("Binary files") || line.starts_with("GIT binary patch") {
196203
is_binary = true;
197204
}
205+
if line.starts_with("deleted file mode") {
206+
is_deleted = true;
207+
}
208+
if line.starts_with("new file mode") {
209+
is_new = true;
210+
}
211+
if line.starts_with("--- ") {
212+
if let Ok(path) = Self::extract_path_from_header(line, "--- ") {
213+
if path == "/dev/null" {
214+
is_new = true;
215+
}
216+
}
217+
}
218+
if line.starts_with("+++ ") {
219+
if let Ok(path) = Self::extract_path_from_header(line, "+++ ") {
220+
if path == "/dev/null" {
221+
is_deleted = true;
222+
}
223+
}
224+
}
198225
*i += 1;
199226
}
200227

@@ -211,6 +238,8 @@ impl DiffParser {
211238
new_content: None,
212239
hunks,
213240
is_binary,
241+
is_deleted,
242+
is_new,
214243
})
215244
}
216245

@@ -221,6 +250,8 @@ impl DiffParser {
221250
let old_path = Self::extract_path_from_header(old_line, "--- ")?;
222251
let new_path = Self::extract_path_from_header(new_line, "+++ ")?;
223252

253+
let is_new = old_path == "/dev/null";
254+
let is_deleted = new_path == "/dev/null";
224255
let file_path = if new_path != "/dev/null" {
225256
new_path
226257
} else {
@@ -255,6 +286,8 @@ impl DiffParser {
255286
new_content: None,
256287
hunks,
257288
is_binary,
289+
is_deleted,
290+
is_new,
258291
})
259292
}
260293

@@ -473,4 +506,38 @@ index 83db48f..f735c20 100644\n\
473506
assert_eq!(diffs.len(), 1);
474507
assert_eq!(diffs[0].hunks.len(), 1);
475508
}
509+
510+
#[test]
511+
fn test_parse_deleted_file() {
512+
let diff_text = "\
513+
diff --git a/foo.txt b/foo.txt\n\
514+
deleted file mode 100644\n\
515+
index 83db48f..0000000\n\
516+
--- a/foo.txt\n\
517+
+++ /dev/null\n\
518+
@@ -1,1 +0,0 @@\n\
519+
-hello\n";
520+
521+
let diffs = DiffParser::parse_unified_diff(diff_text).unwrap();
522+
assert_eq!(diffs.len(), 1);
523+
assert!(diffs[0].is_deleted);
524+
assert!(!diffs[0].is_new);
525+
}
526+
527+
#[test]
528+
fn test_parse_new_file() {
529+
let diff_text = "\
530+
diff --git a/foo.txt b/foo.txt\n\
531+
new file mode 100644\n\
532+
index 0000000..f735c20\n\
533+
--- /dev/null\n\
534+
+++ b/foo.txt\n\
535+
@@ -0,0 +1,1 @@\n\
536+
+hello\n";
537+
538+
let diffs = DiffParser::parse_unified_diff(diff_text).unwrap();
539+
assert_eq!(diffs.len(), 1);
540+
assert!(diffs[0].is_new);
541+
assert!(!diffs[0].is_deleted);
542+
}
476543
}

src/core/prompt.rs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub struct PromptConfig {
99
pub max_tokens: usize,
1010
pub include_context: bool,
1111
pub max_context_chars: usize,
12+
pub max_diff_chars: usize,
1213
}
1314

1415
impl Default for PromptConfig {
@@ -52,6 +53,7 @@ Line 28: Performance - O(n²) algorithm for large dataset. Will be slow with man
5253
max_tokens: 2000,
5354
include_context: true,
5455
max_context_chars: 20000,
56+
max_diff_chars: 40000,
5557
}
5658
}
5759
}
@@ -88,21 +90,40 @@ impl PromptBuilder {
8890

8991
fn format_diff(&self, diff: &UnifiedDiff) -> Result<String> {
9092
let mut output = String::new();
93+
let mut truncated = false;
9194
output.push_str(&format!("File: {}\n", diff.file_path.display()));
9295

93-
for hunk in &diff.hunks {
94-
output.push_str(&format!("{}\n", hunk.context));
96+
'hunks: for hunk in &diff.hunks {
97+
let header = format!("{}\n", hunk.context);
98+
if self.config.max_diff_chars > 0
99+
&& output.len().saturating_add(header.len()) > self.config.max_diff_chars
100+
{
101+
truncated = true;
102+
break;
103+
}
104+
output.push_str(&header);
95105

96106
for change in &hunk.changes {
97107
let prefix = match change.change_type {
98108
crate::core::diff_parser::ChangeType::Added => "+",
99109
crate::core::diff_parser::ChangeType::Removed => "-",
100110
crate::core::diff_parser::ChangeType::Context => " ",
101111
};
102-
output.push_str(&format!("{}{}\n", prefix, change.content));
112+
let line = format!("{}{}\n", prefix, change.content);
113+
if self.config.max_diff_chars > 0
114+
&& output.len().saturating_add(line.len()) > self.config.max_diff_chars
115+
{
116+
truncated = true;
117+
break 'hunks;
118+
}
119+
output.push_str(&line);
103120
}
104121
}
105122

123+
if truncated {
124+
output.push_str("[Diff truncated]\n");
125+
}
126+
106127
Ok(output)
107128
}
108129

src/core/smart_review_prompt.rs

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,16 @@ impl SmartReviewPromptBuilder {
77
pub fn build_enhanced_review_prompt(
88
diff: &UnifiedDiff,
99
context_chunks: &[LLMContextChunk],
10+
max_context_chars: usize,
11+
max_diff_chars: usize,
1012
) -> Result<(String, String)> {
1113
let system_prompt = Self::build_smart_review_system_prompt();
12-
let user_prompt = Self::build_smart_review_user_prompt(diff, context_chunks)?;
14+
let user_prompt = Self::build_smart_review_user_prompt(
15+
diff,
16+
context_chunks,
17+
max_context_chars,
18+
max_diff_chars,
19+
)?;
1320

1421
Ok((system_prompt, user_prompt))
1522
}
@@ -72,10 +79,13 @@ TAGS: [comma-separated relevant tags]
7279
fn build_smart_review_user_prompt(
7380
diff: &UnifiedDiff,
7481
context_chunks: &[LLMContextChunk],
82+
max_context_chars: usize,
83+
max_diff_chars: usize,
7584
) -> Result<String> {
7685
let mut prompt = String::new();
7786
let mut context_chars = 0usize;
78-
const MAX_CONTEXT_CHARS: usize = 20000;
87+
let mut diff_chars = 0usize;
88+
let mut diff_truncated = false;
7989

8090
prompt.push_str(&format!(
8191
"Please review the following code changes in file: {}\n\n",
@@ -104,7 +114,9 @@ TAGS: [comma-separated relevant tags]
104114
.collect::<Vec<_>>()
105115
.join("\n")
106116
);
107-
if context_chars.saturating_add(block.len()) > MAX_CONTEXT_CHARS {
117+
if max_context_chars > 0
118+
&& context_chars.saturating_add(block.len()) > max_context_chars
119+
{
108120
prompt.push_str("[Context truncated]\n\n");
109121
break;
110122
}
@@ -117,15 +129,26 @@ TAGS: [comma-separated relevant tags]
117129

118130
// Format the diff with line numbers and change indicators
119131
for hunk in &diff.hunks {
120-
prompt.push_str(&format!(
132+
let hunk_header = format!(
121133
"### Hunk: Lines {}-{} (was {}-{})\n\n",
122134
hunk.new_start,
123135
hunk.new_start + hunk.new_lines,
124136
hunk.old_start,
125137
hunk.old_start + hunk.old_lines
126-
));
138+
);
139+
if max_diff_chars > 0 && diff_chars.saturating_add(hunk_header.len()) > max_diff_chars {
140+
diff_truncated = true;
141+
break;
142+
}
143+
prompt.push_str(&hunk_header);
144+
diff_chars = diff_chars.saturating_add(hunk_header.len());
127145

146+
if max_diff_chars > 0 && diff_chars.saturating_add("```diff\n".len()) > max_diff_chars {
147+
diff_truncated = true;
148+
break;
149+
}
128150
prompt.push_str("```diff\n");
151+
diff_chars = diff_chars.saturating_add("```diff\n".len());
129152
let mut line_num = hunk.new_start;
130153

131154
for line in &hunk.changes {
@@ -135,7 +158,14 @@ TAGS: [comma-separated relevant tags]
135158
crate::core::diff_parser::ChangeType::Context => " ",
136159
};
137160

138-
prompt.push_str(&format!("{}{:4} {}\n", prefix, line_num, line.content));
161+
let rendered = format!("{}{:4} {}\n", prefix, line_num, line.content);
162+
if max_diff_chars > 0 && diff_chars.saturating_add(rendered.len()) > max_diff_chars
163+
{
164+
diff_truncated = true;
165+
break;
166+
}
167+
prompt.push_str(&rendered);
168+
diff_chars = diff_chars.saturating_add(rendered.len());
139169

140170
if !matches!(
141171
line.change_type,
@@ -146,6 +176,15 @@ TAGS: [comma-separated relevant tags]
146176
}
147177

148178
prompt.push_str("```\n\n");
179+
diff_chars = diff_chars.saturating_add("```\n\n".len());
180+
181+
if diff_truncated {
182+
break;
183+
}
184+
}
185+
186+
if diff_truncated {
187+
prompt.push_str("[Diff truncated]\n\n");
149188
}
150189

151190
prompt.push_str("## Review Instructions\n\n");

0 commit comments

Comments
 (0)