@@ -160,6 +160,29 @@ mod tests {
160160
161161 use super :: * ;
162162
163+ fn empty_report ( ) -> EvalReport {
164+ EvalReport {
165+ run : Default :: default ( ) ,
166+ fixtures_total : 0 ,
167+ fixtures_passed : 0 ,
168+ fixtures_failed : 0 ,
169+ rule_metrics : vec ! [ ] ,
170+ rule_summary : None ,
171+ benchmark_summary : None ,
172+ suite_results : vec ! [ ] ,
173+ benchmark_by_category : HashMap :: new ( ) ,
174+ benchmark_by_language : HashMap :: new ( ) ,
175+ benchmark_by_difficulty : HashMap :: new ( ) ,
176+ suite_comparisons : vec ! [ ] ,
177+ category_comparisons : vec ! [ ] ,
178+ language_comparisons : vec ! [ ] ,
179+ verification_health : None ,
180+ warnings : vec ! [ ] ,
181+ threshold_failures : vec ! [ ] ,
182+ results : vec ! [ ] ,
183+ }
184+ }
185+
163186 fn metrics ( micro_f1 : f32 , weighted_score : f32 , fixture_count : usize ) -> AggregateMetrics {
164187 AggregateMetrics {
165188 micro_f1,
@@ -169,6 +192,38 @@ mod tests {
169192 }
170193 }
171194
195+ #[ test]
196+ fn build_suite_comparisons_intersects_current_and_baseline ( ) {
197+ let current = vec ! [ EvalSuiteResult {
198+ suite: "review-depth-infra" . to_string( ) ,
199+ fixture_count: 2 ,
200+ aggregate: metrics( 0.8 , 0.75 , 2 ) ,
201+ thresholds_enforced: false ,
202+ threshold_pass: true ,
203+ threshold_failures: vec![ ] ,
204+ } ] ;
205+ let baseline = EvalReport {
206+ suite_results : vec ! [ EvalSuiteResult {
207+ suite: "review-depth-infra" . to_string( ) ,
208+ fixture_count: 2 ,
209+ aggregate: metrics( 0.9 , 0.85 , 2 ) ,
210+ thresholds_enforced: false ,
211+ threshold_pass: true ,
212+ threshold_failures: vec![ ] ,
213+ } ] ,
214+ ..empty_report ( )
215+ } ;
216+
217+ let comparisons = build_suite_comparisons ( & current, Some ( & baseline) ) ;
218+
219+ assert_eq ! ( comparisons. len( ) , 1 ) ;
220+ assert_eq ! ( comparisons[ 0 ] . name, "review-depth-infra" ) ;
221+ assert ! ( ( comparisons[ 0 ] . micro_f1_delta + 0.1 ) . abs( ) < f32 :: EPSILON ) ;
222+ assert ! ( ( comparisons[ 0 ] . weighted_score_delta + 0.1 ) . abs( ) < f32 :: EPSILON ) ;
223+ assert_eq ! ( comparisons[ 0 ] . current_fixture_count, 2 ) ;
224+ assert_eq ! ( comparisons[ 0 ] . baseline_fixture_count, 2 ) ;
225+ }
226+
172227 #[ test]
173228 fn build_named_breakdown_comparisons_intersects_current_and_baseline ( ) {
174229 let current = HashMap :: from ( [
@@ -250,6 +305,108 @@ mod tests {
250305 assert_eq ! ( health. request_failure_count, 1 ) ;
251306 }
252307
308+ #[ test]
309+ fn build_verification_health_returns_none_for_non_verification_warnings_only ( ) {
310+ let results = vec ! [ EvalFixtureResult {
311+ fixture: "suite/a" . to_string( ) ,
312+ suite: Some ( "suite" . to_string( ) ) ,
313+ passed: true ,
314+ total_comments: 1 ,
315+ required_matches: 1 ,
316+ required_total: 1 ,
317+ benchmark_metrics: None ,
318+ suite_thresholds: None ,
319+ difficulty: None ,
320+ metadata: None ,
321+ rule_metrics: vec![ ] ,
322+ rule_summary: None ,
323+ warnings: vec![ "reproduction validator warning" . to_string( ) ] ,
324+ verification_report: None ,
325+ agent_activity: None ,
326+ reproduction_summary: None ,
327+ artifact_path: None ,
328+ failures: vec![ ] ,
329+ dag_traces: vec![ ] ,
330+ } ] ;
331+
332+ assert ! ( build_verification_health( & results) . is_none( ) ) ;
333+ }
334+
335+ #[ test]
336+ fn build_verification_health_detects_verifier_only_warning_text ( ) {
337+ let results = vec ! [ EvalFixtureResult {
338+ fixture: "suite/a" . to_string( ) ,
339+ suite: Some ( "suite" . to_string( ) ) ,
340+ passed: true ,
341+ total_comments: 1 ,
342+ required_matches: 1 ,
343+ required_total: 1 ,
344+ benchmark_metrics: None ,
345+ suite_thresholds: None ,
346+ difficulty: None ,
347+ metadata: None ,
348+ rule_metrics: vec![ ] ,
349+ rule_summary: None ,
350+ warnings: vec![ "verifier request error: timeout" . to_string( ) ] ,
351+ verification_report: None ,
352+ agent_activity: None ,
353+ reproduction_summary: None ,
354+ artifact_path: None ,
355+ failures: vec![ ] ,
356+ dag_traces: vec![ ] ,
357+ } ] ;
358+
359+ let health = build_verification_health ( & results) . unwrap ( ) ;
360+
361+ assert_eq ! ( health. warnings_total, 1 ) ;
362+ assert_eq ! ( health. fixtures_with_warnings, 1 ) ;
363+ assert_eq ! ( health. request_failure_count, 1 ) ;
364+ assert_eq ! ( health. total_checks, 1 ) ;
365+ }
366+
367+ #[ test]
368+ fn build_verification_health_keeps_zero_percent_when_no_checks_ran ( ) {
369+ let results = vec ! [ EvalFixtureResult {
370+ fixture: "suite/a" . to_string( ) ,
371+ suite: Some ( "suite" . to_string( ) ) ,
372+ passed: true ,
373+ total_comments: 0 ,
374+ required_matches: 0 ,
375+ required_total: 0 ,
376+ benchmark_metrics: None ,
377+ suite_thresholds: None ,
378+ difficulty: None ,
379+ metadata: None ,
380+ rule_metrics: vec![ ] ,
381+ rule_summary: None ,
382+ warnings: vec![ ] ,
383+ verification_report: Some ( EvalVerificationReport {
384+ consensus_mode: "majority" . to_string( ) ,
385+ required_votes: 1 ,
386+ judge_count: 1 ,
387+ judges: vec![ EvalVerificationJudgeReport {
388+ model: "judge" . to_string( ) ,
389+ total_comments: 0 ,
390+ passed_comments: 0 ,
391+ filtered_comments: 0 ,
392+ abstained_comments: 0 ,
393+ warnings: vec![ ] ,
394+ } ] ,
395+ } ) ,
396+ agent_activity: None ,
397+ reproduction_summary: None ,
398+ artifact_path: None ,
399+ failures: vec![ ] ,
400+ dag_traces: vec![ ] ,
401+ } ] ;
402+
403+ let health = build_verification_health ( & results) . unwrap ( ) ;
404+
405+ assert_eq ! ( health. total_checks, 0 ) ;
406+ assert_eq ! ( health. verified_checks, 0 ) ;
407+ assert_eq ! ( health. verified_pct, 0.0 ) ;
408+ }
409+
253410 #[ test]
254411 fn build_verification_health_uses_judge_reports_without_warnings ( ) {
255412 let results = vec ! [ EvalFixtureResult {
0 commit comments