@@ -61,22 +61,60 @@ pub(in super::super) fn build_verification_health(
6161) -> Option < EvalVerificationHealth > {
6262 let warnings_total = results
6363 . iter ( )
64- . map ( |result| result. warnings . len ( ) )
64+ . map ( |result| {
65+ result
66+ . warnings
67+ . iter ( )
68+ . filter ( |warning| is_verification_warning ( warning) )
69+ . count ( )
70+ } )
6571 . sum :: < usize > ( ) ;
66- if warnings_total == 0 {
67- return None ;
68- }
6972
7073 let mut health = EvalVerificationHealth {
7174 warnings_total,
7275 fixtures_with_warnings : results
7376 . iter ( )
74- . filter ( |result| !result. warnings . is_empty ( ) )
77+ . filter ( |result| {
78+ result
79+ . warnings
80+ . iter ( )
81+ . any ( |warning| is_verification_warning ( warning) )
82+ } )
7583 . count ( ) ,
7684 ..Default :: default ( )
7785 } ;
7886
87+ let mut observed_verification = false ;
88+ for result in results {
89+ if let Some ( report) = result. verification_report . as_ref ( ) {
90+ observed_verification = true ;
91+ for judge in & report. judges {
92+ health. total_checks += judge. total_comments ;
93+ health. verified_checks += judge. passed_comments + judge. filtered_comments ;
94+ }
95+ } else if result. total_comments > 0
96+ && result
97+ . warnings
98+ . iter ( )
99+ . any ( |warning| is_verification_warning ( warning) )
100+ {
101+ observed_verification = true ;
102+ health. total_checks += result. total_comments ;
103+ }
104+ }
105+
106+ if health. total_checks > 0 {
107+ health. verified_pct = health. verified_checks as f32 / health. total_checks as f32 ;
108+ }
109+
110+ if !observed_verification && warnings_total == 0 {
111+ return None ;
112+ }
113+
79114 for warning in results. iter ( ) . flat_map ( |result| & result. warnings ) {
115+ if !is_verification_warning ( warning) {
116+ continue ;
117+ }
80118 let lower = warning. to_ascii_lowercase ( ) ;
81119 if lower. contains ( "verification fail-open kept" ) {
82120 health. fail_open_warning_count += 1 ;
@@ -92,6 +130,11 @@ pub(in super::super) fn build_verification_health(
92130 Some ( health)
93131}
94132
133+ fn is_verification_warning ( warning : & str ) -> bool {
134+ let lower = warning. to_ascii_lowercase ( ) ;
135+ lower. contains ( "verification" ) || lower. contains ( "verifier" )
136+ }
137+
95138fn build_comparison (
96139 name : String ,
97140 current : & BenchmarkAggregateMetrics ,
@@ -112,6 +155,7 @@ fn build_comparison(
112155
113156#[ cfg( test) ]
114157mod tests {
158+ use crate :: commands:: eval:: { EvalVerificationJudgeReport , EvalVerificationReport } ;
115159 use crate :: core:: eval_benchmarks:: AggregateMetrics ;
116160
117161 use super :: * ;
@@ -196,10 +240,58 @@ mod tests {
196240 ] ;
197241
198242 let health = build_verification_health ( & results) . unwrap ( ) ;
243+ assert_eq ! ( health. verified_checks, 0 ) ;
244+ assert_eq ! ( health. total_checks, 1 ) ;
245+ assert_eq ! ( health. verified_pct, 0.0 ) ;
199246 assert_eq ! ( health. warnings_total, 2 ) ;
200247 assert_eq ! ( health. fixtures_with_warnings, 1 ) ;
201248 assert_eq ! ( health. fail_open_warning_count, 2 ) ;
202249 assert_eq ! ( health. parse_failure_count, 1 ) ;
203250 assert_eq ! ( health. request_failure_count, 1 ) ;
204251 }
252+
253+ #[ test]
254+ fn build_verification_health_uses_judge_reports_without_warnings ( ) {
255+ let results = vec ! [ EvalFixtureResult {
256+ fixture: "suite/a" . to_string( ) ,
257+ suite: Some ( "suite" . to_string( ) ) ,
258+ passed: true ,
259+ total_comments: 5 ,
260+ required_matches: 1 ,
261+ required_total: 1 ,
262+ benchmark_metrics: None ,
263+ suite_thresholds: None ,
264+ difficulty: None ,
265+ metadata: None ,
266+ rule_metrics: vec![ ] ,
267+ rule_summary: None ,
268+ warnings: vec![ ] ,
269+ verification_report: Some ( EvalVerificationReport {
270+ consensus_mode: "majority" . to_string( ) ,
271+ required_votes: 1 ,
272+ judge_count: 1 ,
273+ judges: vec![ EvalVerificationJudgeReport {
274+ model: "judge" . to_string( ) ,
275+ total_comments: 5 ,
276+ passed_comments: 3 ,
277+ filtered_comments: 1 ,
278+ abstained_comments: 1 ,
279+ warnings: vec![ ] ,
280+ } ] ,
281+ } ) ,
282+ agent_activity: None ,
283+ reproduction_summary: None ,
284+ artifact_path: None ,
285+ failures: vec![ ] ,
286+ dag_traces: vec![ ] ,
287+ } ] ;
288+
289+ let health = build_verification_health ( & results) . unwrap ( ) ;
290+
291+ assert_eq ! ( health. verified_checks, 4 ) ;
292+ assert_eq ! ( health. total_checks, 5 ) ;
293+ assert ! ( ( health. verified_pct - 0.8 ) . abs( ) < f32 :: EPSILON ) ;
294+ assert_eq ! ( health. warnings_total, 0 ) ;
295+ assert_eq ! ( health. fixtures_with_warnings, 0 ) ;
296+ }
205297}
0 commit comments