Skip to content

Commit d2b833a

Browse files
committed
refactor: split pipeline repo support helpers
Separate diff chunking, instruction discovery, git history, and convention store helpers so repository support code is easier to navigate without changing behavior. Made-with: Cursor
1 parent 33af269 commit d2b833a

File tree

5 files changed

+217
-193
lines changed

5 files changed

+217
-193
lines changed
Lines changed: 13 additions & 193 deletions
Original file line numberDiff line numberDiff line change
@@ -1,193 +1,13 @@
1-
use std::path::{Path, PathBuf};
2-
use tracing::{info, warn};
3-
4-
use crate::config;
5-
use crate::core;
6-
7-
pub(super) fn chunk_diff_for_context(diff_content: &str, max_chars: usize) -> Vec<String> {
8-
if diff_content.len() <= max_chars {
9-
return vec![diff_content.to_string()];
10-
}
11-
12-
let mut chunks = Vec::new();
13-
let mut current_chunk = String::new();
14-
15-
for section in diff_content.split("\ndiff --git ") {
16-
let section = if chunks.is_empty() && current_chunk.is_empty() {
17-
section.to_string()
18-
} else {
19-
format!("diff --git {}", section)
20-
};
21-
22-
if current_chunk.len() + section.len() > max_chars && !current_chunk.is_empty() {
23-
chunks.push(current_chunk);
24-
current_chunk = section;
25-
} else {
26-
current_chunk.push_str(&section);
27-
}
28-
}
29-
30-
if !current_chunk.is_empty() {
31-
chunks.push(current_chunk);
32-
}
33-
34-
chunks
35-
}
36-
37-
pub(super) fn detect_instruction_files(repo_path: &Path) -> Vec<(String, String)> {
38-
const INSTRUCTION_FILES: &[&str] = &[
39-
".cursorrules",
40-
"CLAUDE.md",
41-
".claude/CLAUDE.md",
42-
"agents.md",
43-
".github/copilot-instructions.md",
44-
"GEMINI.md",
45-
".diffscope-instructions.md",
46-
];
47-
const MAX_INSTRUCTION_SIZE: u64 = 10_000;
48-
49-
let mut results = Vec::new();
50-
for filename in INSTRUCTION_FILES {
51-
let path = repo_path.join(filename);
52-
if path.is_file() {
53-
if let Ok(meta) = std::fs::metadata(&path) {
54-
if meta.len() > MAX_INSTRUCTION_SIZE {
55-
warn!(
56-
"Skipping instruction file {} ({} bytes exceeds {})",
57-
filename,
58-
meta.len(),
59-
MAX_INSTRUCTION_SIZE
60-
);
61-
continue;
62-
}
63-
}
64-
if let Ok(content) = std::fs::read_to_string(&path) {
65-
let trimmed = content.trim().to_string();
66-
if !trimmed.is_empty() {
67-
info!("Auto-detected instruction file: {}", filename);
68-
results.push((filename.to_string(), trimmed));
69-
}
70-
}
71-
}
72-
}
73-
results
74-
}
75-
76-
pub(super) fn gather_git_log(repo_path: &Path) -> Option<String> {
77-
let output = std::process::Command::new("git")
78-
.args([
79-
"log",
80-
"--numstat",
81-
"--format=commit %H%nAuthor: %an <%ae>%nDate: %ai%n%n %s%n",
82-
"-100",
83-
])
84-
.current_dir(repo_path)
85-
.output();
86-
match output {
87-
Ok(out) if out.status.success() => {
88-
let log_text = String::from_utf8_lossy(&out.stdout).to_string();
89-
if log_text.trim().is_empty() {
90-
None
91-
} else {
92-
info!("Gathered git log ({} bytes)", log_text.len());
93-
Some(log_text)
94-
}
95-
}
96-
_ => {
97-
info!("Git log unavailable (not a git repo or git not found)");
98-
None
99-
}
100-
}
101-
}
102-
103-
pub(super) fn resolve_convention_store_path(config: &config::Config) -> Option<PathBuf> {
104-
if let Some(ref path) = config.convention_store_path {
105-
return Some(PathBuf::from(path));
106-
}
107-
dirs::data_local_dir().map(|dir| dir.join("diffscope").join("conventions.json"))
108-
}
109-
110-
pub(super) fn save_convention_store(
111-
store: &core::convention_learner::ConventionStore,
112-
path: &PathBuf,
113-
) {
114-
if let Ok(json) = store.to_json() {
115-
if let Some(parent) = path.parent() {
116-
let _ = std::fs::create_dir_all(parent);
117-
}
118-
if let Err(error) = std::fs::write(path, json) {
119-
warn!(
120-
"Failed to save convention store to {}: {}",
121-
path.display(),
122-
error
123-
);
124-
}
125-
}
126-
}
127-
128-
#[cfg(test)]
129-
mod tests {
130-
use super::*;
131-
132-
#[test]
133-
fn detect_instruction_files_empty_dir() {
134-
let dir = tempfile::tempdir().unwrap();
135-
let results = detect_instruction_files(dir.path());
136-
assert!(results.is_empty());
137-
}
138-
139-
#[test]
140-
fn detect_instruction_files_finds_cursorrules() {
141-
let dir = tempfile::tempdir().unwrap();
142-
std::fs::write(dir.path().join(".cursorrules"), "Use tabs not spaces").unwrap();
143-
let results = detect_instruction_files(dir.path());
144-
assert_eq!(results.len(), 1);
145-
assert_eq!(results[0].0, ".cursorrules");
146-
assert!(results[0].1.contains("Use tabs"));
147-
}
148-
149-
#[test]
150-
fn chunk_diff_small_diff_returns_single_chunk() {
151-
let diff = "diff --git a/foo.rs b/foo.rs\n+hello\n";
152-
let chunks = chunk_diff_for_context(diff, 1000);
153-
assert_eq!(chunks.len(), 1);
154-
assert_eq!(chunks[0], diff);
155-
}
156-
157-
#[test]
158-
fn chunk_diff_splits_at_file_boundaries() {
159-
let diff = "diff --git a/a.rs b/a.rs\n+line1\n\ndiff --git a/b.rs b/b.rs\n+line2\n\ndiff --git a/c.rs b/c.rs\n+line3\n";
160-
let chunks = chunk_diff_for_context(diff, 40);
161-
assert!(chunks.len() >= 2);
162-
for chunk in &chunks {
163-
assert!(chunk.contains("diff --git"));
164-
}
165-
}
166-
167-
#[test]
168-
fn chunk_diff_empty_input() {
169-
let chunks = chunk_diff_for_context("", 100);
170-
assert_eq!(chunks.len(), 1);
171-
assert_eq!(chunks[0], "");
172-
}
173-
174-
#[test]
175-
fn chunk_diff_single_large_file_not_split_midfile() {
176-
let diff = format!("diff --git a/big.rs b/big.rs\n{}", "+line\n".repeat(100));
177-
let chunks = chunk_diff_for_context(&diff, 50);
178-
assert_eq!(chunks.len(), 1);
179-
}
180-
181-
#[test]
182-
fn chunk_diff_preserves_all_content() {
183-
let file_a = "diff --git a/a.rs b/a.rs\n+alpha\n";
184-
let file_b = "\ndiff --git a/b.rs b/b.rs\n+beta\n";
185-
let file_c = "\ndiff --git a/c.rs b/c.rs\n+gamma\n";
186-
let diff = format!("{}{}{}", file_a, file_b, file_c);
187-
let chunks = chunk_diff_for_context(&diff, 50);
188-
let rejoined = chunks.join("");
189-
assert!(rejoined.contains("+alpha"));
190-
assert!(rejoined.contains("+beta"));
191-
assert!(rejoined.contains("+gamma"));
192-
}
193-
}
1+
#[path = "repo_support/conventions.rs"]
2+
mod conventions;
3+
#[path = "repo_support/diff.rs"]
4+
mod diff;
5+
#[path = "repo_support/git.rs"]
6+
mod git;
7+
#[path = "repo_support/instructions.rs"]
8+
mod instructions;
9+
10+
pub(super) use conventions::{resolve_convention_store_path, save_convention_store};
11+
pub(super) use diff::chunk_diff_for_context;
12+
pub(super) use git::gather_git_log;
13+
pub(super) use instructions::detect_instruction_files;
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
use std::path::PathBuf;
2+
use tracing::warn;
3+
4+
use crate::config;
5+
use crate::core;
6+
7+
pub(in super::super) fn resolve_convention_store_path(config: &config::Config) -> Option<PathBuf> {
8+
if let Some(ref path) = config.convention_store_path {
9+
return Some(PathBuf::from(path));
10+
}
11+
dirs::data_local_dir().map(|dir| dir.join("diffscope").join("conventions.json"))
12+
}
13+
14+
pub(in super::super) fn save_convention_store(
15+
store: &core::convention_learner::ConventionStore,
16+
path: &PathBuf,
17+
) {
18+
if let Ok(json) = store.to_json() {
19+
if let Some(parent) = path.parent() {
20+
let _ = std::fs::create_dir_all(parent);
21+
}
22+
if let Err(error) = std::fs::write(path, json) {
23+
warn!(
24+
"Failed to save convention store to {}: {}",
25+
path.display(),
26+
error
27+
);
28+
}
29+
}
30+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
pub(in super::super) fn chunk_diff_for_context(
2+
diff_content: &str,
3+
max_chars: usize,
4+
) -> Vec<String> {
5+
if diff_content.len() <= max_chars {
6+
return vec![diff_content.to_string()];
7+
}
8+
9+
let mut chunks = Vec::new();
10+
let mut current_chunk = String::new();
11+
12+
for section in diff_content.split("\ndiff --git ") {
13+
let section = if chunks.is_empty() && current_chunk.is_empty() {
14+
section.to_string()
15+
} else {
16+
format!("diff --git {}", section)
17+
};
18+
19+
if current_chunk.len() + section.len() > max_chars && !current_chunk.is_empty() {
20+
chunks.push(current_chunk);
21+
current_chunk = section;
22+
} else {
23+
current_chunk.push_str(&section);
24+
}
25+
}
26+
27+
if !current_chunk.is_empty() {
28+
chunks.push(current_chunk);
29+
}
30+
31+
chunks
32+
}
33+
34+
#[cfg(test)]
35+
mod tests {
36+
use super::*;
37+
38+
#[test]
39+
fn chunk_diff_small_diff_returns_single_chunk() {
40+
let diff = "diff --git a/foo.rs b/foo.rs\n+hello\n";
41+
let chunks = chunk_diff_for_context(diff, 1000);
42+
assert_eq!(chunks.len(), 1);
43+
assert_eq!(chunks[0], diff);
44+
}
45+
46+
#[test]
47+
fn chunk_diff_splits_at_file_boundaries() {
48+
let diff = "diff --git a/a.rs b/a.rs\n+line1\n\ndiff --git a/b.rs b/b.rs\n+line2\n\ndiff --git a/c.rs b/c.rs\n+line3\n";
49+
let chunks = chunk_diff_for_context(diff, 40);
50+
assert!(chunks.len() >= 2);
51+
for chunk in &chunks {
52+
assert!(chunk.contains("diff --git"));
53+
}
54+
}
55+
56+
#[test]
57+
fn chunk_diff_empty_input() {
58+
let chunks = chunk_diff_for_context("", 100);
59+
assert_eq!(chunks.len(), 1);
60+
assert_eq!(chunks[0], "");
61+
}
62+
63+
#[test]
64+
fn chunk_diff_single_large_file_not_split_midfile() {
65+
let diff = format!("diff --git a/big.rs b/big.rs\n{}", "+line\n".repeat(100));
66+
let chunks = chunk_diff_for_context(&diff, 50);
67+
assert_eq!(chunks.len(), 1);
68+
}
69+
70+
#[test]
71+
fn chunk_diff_preserves_all_content() {
72+
let file_a = "diff --git a/a.rs b/a.rs\n+alpha\n";
73+
let file_b = "\ndiff --git a/b.rs b/b.rs\n+beta\n";
74+
let file_c = "\ndiff --git a/c.rs b/c.rs\n+gamma\n";
75+
let diff = format!("{}{}{}", file_a, file_b, file_c);
76+
let chunks = chunk_diff_for_context(&diff, 50);
77+
let rejoined = chunks.join("");
78+
assert!(rejoined.contains("+alpha"));
79+
assert!(rejoined.contains("+beta"));
80+
assert!(rejoined.contains("+gamma"));
81+
}
82+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
use std::path::Path;
2+
use tracing::info;
3+
4+
pub(in super::super) fn gather_git_log(repo_path: &Path) -> Option<String> {
5+
let output = std::process::Command::new("git")
6+
.args([
7+
"log",
8+
"--numstat",
9+
"--format=commit %H%nAuthor: %an <%ae>%nDate: %ai%n%n %s%n",
10+
"-100",
11+
])
12+
.current_dir(repo_path)
13+
.output();
14+
match output {
15+
Ok(out) if out.status.success() => {
16+
let log_text = String::from_utf8_lossy(&out.stdout).to_string();
17+
if log_text.trim().is_empty() {
18+
None
19+
} else {
20+
info!("Gathered git log ({} bytes)", log_text.len());
21+
Some(log_text)
22+
}
23+
}
24+
_ => {
25+
info!("Git log unavailable (not a git repo or git not found)");
26+
None
27+
}
28+
}
29+
}

0 commit comments

Comments
 (0)