Skip to content

Commit 5585c06

Browse files
committed
refactor: split eval runner, feedback, and review helpers
Separate shared input, matching, and persistence helpers from the main command flows so the next behavioral changes stay easier to isolate and test. Made-with: Cursor
1 parent b42804c commit 5585c06

10 files changed

Lines changed: 522 additions & 438 deletions

File tree

src/commands/eval/runner.rs

Lines changed: 5 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -1,158 +1,6 @@
1-
use anyhow::Result;
2-
use std::collections::HashSet;
3-
use std::path::{Path, PathBuf};
1+
#[path = "runner/execute.rs"]
2+
mod execute;
3+
#[path = "runner/matching.rs"]
4+
mod matching;
45

5-
use crate::config;
6-
use crate::core::eval_benchmarks::FixtureResult as BenchmarkFixtureResult;
7-
use crate::review::review_diff_content_raw;
8-
9-
use super::metrics::{compute_rule_metrics, summarize_rule_metrics};
10-
use super::pattern::summarize_for_eval;
11-
use super::{EvalFixtureResult, LoadedEvalFixture};
12-
13-
pub(super) async fn run_eval_fixture(
14-
config: &config::Config,
15-
loaded_fixture: LoadedEvalFixture,
16-
) -> Result<EvalFixtureResult> {
17-
let LoadedEvalFixture {
18-
fixture_path,
19-
fixture,
20-
suite_name,
21-
suite_thresholds,
22-
difficulty,
23-
} = loaded_fixture;
24-
let fixture_name = fixture.name.unwrap_or_else(|| {
25-
fixture_path
26-
.file_name()
27-
.and_then(|value| value.to_str())
28-
.unwrap_or("fixture")
29-
.to_string()
30-
});
31-
let fixture_dir = fixture_path
32-
.parent()
33-
.map(Path::to_path_buf)
34-
.unwrap_or_else(|| PathBuf::from("."));
35-
36-
let diff_content = match (fixture.diff, fixture.diff_file) {
37-
(Some(diff), _) => diff,
38-
(None, Some(diff_file)) => {
39-
let path = if diff_file.is_absolute() {
40-
diff_file
41-
} else {
42-
fixture_dir.join(diff_file)
43-
};
44-
std::fs::read_to_string(path)?
45-
}
46-
(None, None) => anyhow::bail!(
47-
"Fixture '{}' must define either diff or diff_file",
48-
fixture_name
49-
),
50-
};
51-
52-
let repo_path = fixture
53-
.repo_path
54-
.map(|path| {
55-
if path.is_absolute() {
56-
path
57-
} else {
58-
fixture_dir.join(path)
59-
}
60-
})
61-
.unwrap_or_else(|| PathBuf::from("."));
62-
63-
let review_result = review_diff_content_raw(&diff_content, config.clone(), &repo_path).await?;
64-
let comments = review_result.comments;
65-
let total_comments = comments.len();
66-
let mut failures = Vec::new();
67-
let mut required_matches = 0usize;
68-
let required_total = fixture.expect.must_find.len();
69-
let mut used_comment_indices = HashSet::new();
70-
let mut unexpected_comment_indices = HashSet::new();
71-
let mut matched_pairs = Vec::new();
72-
73-
for (expected_idx, expected) in fixture.expect.must_find.iter().enumerate() {
74-
let found = comments
75-
.iter()
76-
.enumerate()
77-
.find(|(comment_idx, comment)| {
78-
!used_comment_indices.contains(comment_idx) && expected.matches(comment)
79-
})
80-
.map(|(comment_idx, _)| comment_idx);
81-
82-
if let Some(comment_idx) = found {
83-
used_comment_indices.insert(comment_idx);
84-
matched_pairs.push((expected_idx, comment_idx));
85-
required_matches = required_matches.saturating_add(1);
86-
} else {
87-
failures.push(format!("Missing expected finding: {}", expected.describe()));
88-
}
89-
}
90-
91-
for unexpected in &fixture.expect.must_not_find {
92-
if let Some((comment_idx, comment)) = comments
93-
.iter()
94-
.enumerate()
95-
.find(|(_, comment)| unexpected.matches(comment))
96-
{
97-
unexpected_comment_indices.insert(comment_idx);
98-
failures.push(format!(
99-
"Unexpected finding matched {}:{} '{}'",
100-
comment.file_path.display(),
101-
comment.line_number,
102-
summarize_for_eval(&comment.content)
103-
));
104-
}
105-
}
106-
107-
let rule_metrics = compute_rule_metrics(&fixture.expect.must_find, &comments, &matched_pairs);
108-
let rule_summary = summarize_rule_metrics(&rule_metrics);
109-
110-
if let Some(min_total) = fixture.expect.min_total {
111-
if total_comments < min_total {
112-
failures.push(format!(
113-
"Expected at least {} comments, got {}",
114-
min_total, total_comments
115-
));
116-
}
117-
}
118-
if let Some(max_total) = fixture.expect.max_total {
119-
if total_comments > max_total {
120-
failures.push(format!(
121-
"Expected at most {} comments, got {}",
122-
max_total, total_comments
123-
));
124-
}
125-
}
126-
127-
let benchmark_metrics = suite_name.as_ref().map(|_| {
128-
let accounted_for = used_comment_indices
129-
.union(&unexpected_comment_indices)
130-
.count();
131-
let extra_findings = total_comments.saturating_sub(accounted_for);
132-
let mut result = BenchmarkFixtureResult::compute(
133-
&fixture_name,
134-
fixture.expect.must_find.len(),
135-
fixture.expect.must_not_find.len(),
136-
required_matches,
137-
unexpected_comment_indices.len(),
138-
extra_findings,
139-
);
140-
result.details = failures.clone();
141-
result
142-
});
143-
144-
Ok(EvalFixtureResult {
145-
fixture: fixture_name,
146-
suite: suite_name,
147-
passed: failures.is_empty(),
148-
total_comments,
149-
required_matches,
150-
required_total,
151-
benchmark_metrics,
152-
suite_thresholds,
153-
difficulty,
154-
rule_metrics,
155-
rule_summary,
156-
failures,
157-
})
158-
}
6+
pub(super) use execute::run_eval_fixture;
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
use anyhow::Result;
2+
use std::path::{Path, PathBuf};
3+
4+
use crate::config;
5+
use crate::core::eval_benchmarks::FixtureResult as BenchmarkFixtureResult;
6+
use crate::review::review_diff_content_raw;
7+
8+
use super::super::{EvalFixtureResult, LoadedEvalFixture};
9+
use super::matching::evaluate_fixture_expectations;
10+
11+
pub(in super::super) async fn run_eval_fixture(
12+
config: &config::Config,
13+
loaded_fixture: LoadedEvalFixture,
14+
) -> Result<EvalFixtureResult> {
15+
let LoadedEvalFixture {
16+
fixture_path,
17+
fixture,
18+
suite_name,
19+
suite_thresholds,
20+
difficulty,
21+
} = loaded_fixture;
22+
let fixture_name = fixture.name.clone().unwrap_or_else(|| {
23+
fixture_path
24+
.file_name()
25+
.and_then(|value| value.to_str())
26+
.unwrap_or("fixture")
27+
.to_string()
28+
});
29+
let fixture_dir = fixture_path
30+
.parent()
31+
.map(Path::to_path_buf)
32+
.unwrap_or_else(|| PathBuf::from("."));
33+
34+
let diff_content = match (fixture.diff.clone(), fixture.diff_file.clone()) {
35+
(Some(diff), _) => diff,
36+
(None, Some(diff_file)) => {
37+
let path = if diff_file.is_absolute() {
38+
diff_file
39+
} else {
40+
fixture_dir.join(diff_file)
41+
};
42+
std::fs::read_to_string(path)?
43+
}
44+
(None, None) => anyhow::bail!(
45+
"Fixture '{}' must define either diff or diff_file",
46+
fixture_name
47+
),
48+
};
49+
50+
let repo_path = fixture
51+
.repo_path
52+
.clone()
53+
.map(|path| {
54+
if path.is_absolute() {
55+
path
56+
} else {
57+
fixture_dir.join(path)
58+
}
59+
})
60+
.unwrap_or_else(|| PathBuf::from("."));
61+
62+
let review_result = review_diff_content_raw(&diff_content, config.clone(), &repo_path).await?;
63+
let comments = review_result.comments;
64+
let total_comments = comments.len();
65+
let match_summary = evaluate_fixture_expectations(&fixture.expect, &comments);
66+
let mut failures = match_summary.failures;
67+
68+
if let Some(min_total) = fixture.expect.min_total {
69+
if total_comments < min_total {
70+
failures.push(format!(
71+
"Expected at least {} comments, got {}",
72+
min_total, total_comments
73+
));
74+
}
75+
}
76+
if let Some(max_total) = fixture.expect.max_total {
77+
if total_comments > max_total {
78+
failures.push(format!(
79+
"Expected at most {} comments, got {}",
80+
max_total, total_comments
81+
));
82+
}
83+
}
84+
85+
let benchmark_metrics = suite_name.as_ref().map(|_| {
86+
let accounted_for = match_summary
87+
.used_comment_indices
88+
.union(&match_summary.unexpected_comment_indices)
89+
.count();
90+
let extra_findings = total_comments.saturating_sub(accounted_for);
91+
let mut result = BenchmarkFixtureResult::compute(
92+
&fixture_name,
93+
fixture.expect.must_find.len(),
94+
fixture.expect.must_not_find.len(),
95+
match_summary.required_matches,
96+
match_summary.unexpected_comment_indices.len(),
97+
extra_findings,
98+
);
99+
result.details = failures.clone();
100+
result
101+
});
102+
103+
Ok(EvalFixtureResult {
104+
fixture: fixture_name,
105+
suite: suite_name,
106+
passed: failures.is_empty(),
107+
total_comments,
108+
required_matches: match_summary.required_matches,
109+
required_total: match_summary.required_total,
110+
benchmark_metrics,
111+
suite_thresholds,
112+
difficulty,
113+
rule_metrics: match_summary.rule_metrics,
114+
rule_summary: match_summary.rule_summary,
115+
failures,
116+
})
117+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
use std::collections::HashSet;
2+
3+
use crate::core;
4+
5+
use super::super::metrics::{compute_rule_metrics, summarize_rule_metrics};
6+
use super::super::pattern::summarize_for_eval;
7+
use super::super::{EvalExpectations, EvalRuleMetrics, EvalRuleScoreSummary};
8+
9+
pub(super) struct FixtureMatchSummary {
10+
pub(super) failures: Vec<String>,
11+
pub(super) required_matches: usize,
12+
pub(super) required_total: usize,
13+
pub(super) rule_metrics: Vec<EvalRuleMetrics>,
14+
pub(super) rule_summary: Option<EvalRuleScoreSummary>,
15+
pub(super) used_comment_indices: HashSet<usize>,
16+
pub(super) unexpected_comment_indices: HashSet<usize>,
17+
}
18+
19+
pub(super) fn evaluate_fixture_expectations(
20+
expectations: &EvalExpectations,
21+
comments: &[core::Comment],
22+
) -> FixtureMatchSummary {
23+
let mut failures = Vec::new();
24+
let mut required_matches = 0usize;
25+
let required_total = expectations.must_find.len();
26+
let mut used_comment_indices = HashSet::new();
27+
let mut unexpected_comment_indices = HashSet::new();
28+
let mut matched_pairs = Vec::new();
29+
30+
for (expected_idx, expected) in expectations.must_find.iter().enumerate() {
31+
let found = comments
32+
.iter()
33+
.enumerate()
34+
.find(|(comment_idx, comment)| {
35+
!used_comment_indices.contains(comment_idx) && expected.matches(comment)
36+
})
37+
.map(|(comment_idx, _)| comment_idx);
38+
39+
if let Some(comment_idx) = found {
40+
used_comment_indices.insert(comment_idx);
41+
matched_pairs.push((expected_idx, comment_idx));
42+
required_matches = required_matches.saturating_add(1);
43+
} else {
44+
failures.push(format!("Missing expected finding: {}", expected.describe()));
45+
}
46+
}
47+
48+
for unexpected in &expectations.must_not_find {
49+
if let Some((comment_idx, comment)) = comments
50+
.iter()
51+
.enumerate()
52+
.find(|(_, comment)| unexpected.matches(comment))
53+
{
54+
unexpected_comment_indices.insert(comment_idx);
55+
failures.push(format!(
56+
"Unexpected finding matched {}:{} '{}'",
57+
comment.file_path.display(),
58+
comment.line_number,
59+
summarize_for_eval(&comment.content)
60+
));
61+
}
62+
}
63+
64+
let rule_metrics = compute_rule_metrics(&expectations.must_find, comments, &matched_pairs);
65+
let rule_summary = summarize_rule_metrics(&rule_metrics);
66+
67+
FixtureMatchSummary {
68+
failures,
69+
required_matches,
70+
required_total,
71+
rule_metrics,
72+
rule_summary,
73+
used_comment_indices,
74+
unexpected_comment_indices,
75+
}
76+
}

0 commit comments

Comments
 (0)