evalops
diff --git a/‎src/core/code_summary.rs‎
Lines changed: 59 additions & 2 deletions b/‎src/core/code_summary.rs‎
Lines changed: 59 additions & 2 deletions
diff --git a/‎src/core/composable_pipeline.rs‎
Lines changed: 55 additions & 0 deletions b/‎src/core/composable_pipeline.rs‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎src/core/convention_learner.rs‎
Lines changed: 109 additions & 3 deletions b/‎src/core/convention_learner.rs‎
Lines changed: 109 additions & 3 deletions
diff --git a/‎src/core/eval_benchmarks.rs‎
Lines changed: 46 additions & 0 deletions b/‎src/core/eval_benchmarks.rs‎
Lines changed: 46 additions & 0 deletions
@@ -397,13 +397,18 @@ fn extract_code_blocks(content: &str, language: &str) -> Vec<(String, String, us
         }
     }
 
+    if lines.is_empty() {
+        return blocks;
+    }
+
     for (i, (name, start)) in matches.iter().enumerate() {
         let end = if i + 1 < matches.len() {
             matches[i + 1].1.saturating_sub(1)
         } else {
-            lines.len().saturating_sub(1)
+            lines.len() - 1
         };
-        let code = lines[*start..=end.min(lines.len() - 1)].join("\n");
+        let end = end.min(lines.len() - 1);
+        let code = lines[*start..=end].join("\n");
         blocks.push((name.clone(), code, start + 1, end + 1));
     }
 
@@ -649,4 +654,56 @@ mod tests {
         assert!(removed.is_some());
         assert_eq!(cache.len(), 0);
     }
+
+    #[test]
+    fn test_summarize_empty_code() {
+        let summary = summarize_code_heuristic("empty_func", "", Path::new("test.rs"), (1, 1));
+        assert!(summary.summary.contains("empty_func"));
+    }
+
+    #[test]
+    fn test_extract_code_blocks_empty_content() {
+        let blocks = extract_code_blocks("", "rs");
+        assert!(blocks.is_empty());
+    }
+
+    #[test]
+    fn test_extract_code_blocks_no_functions() {
+        let blocks = extract_code_blocks("let x = 1;\nlet y = 2;\n", "rs");
+        assert!(blocks.is_empty());
+    }
+
+    #[test]
+    fn test_summarize_file_symbols_empty_content() {
+        let mut cache = SummaryCache::new();
+        let results = summarize_file_symbols(Path::new("empty.rs"), "", &mut cache);
+        assert!(results.is_empty());
+    }
+
+    #[test]
+    fn test_build_embedding_text_truncation() {
+        let long_code = "x".repeat(1000);
+        let embedding = build_embedding_text("long_func", "A function", &long_code);
+        assert!(embedding.len() <= 600); // summary + truncated code
+    }
+
+    #[test]
+    fn test_detect_symbol_kind_unknown_language() {
+        let kind = detect_symbol_kind("something()", "");
+        assert_eq!(kind, "Symbol");
+    }
+
+    #[test]
+    fn test_cache_remove_nonexistent() {
+        let mut cache = SummaryCache::new();
+        let removed = cache.remove(Path::new("nope.rs"), "missing");
+        assert!(removed.is_none());
+    }
+
+    #[test]
+    fn test_summarize_single_line_function() {
+        let code = "fn one_liner() -> bool { true }";
+        let summary = summarize_code_heuristic("one_liner", code, Path::new("test.rs"), (1, 1));
+        assert!(summary.summary.contains("one_liner"));
+    }
 }
@@ -619,4 +619,59 @@ mod tests {
         assert_eq!(ctx.diffs.len(), 1);
         assert!(ctx.comments.is_empty());
     }
+
+    #[test]
+    fn test_empty_pipeline_no_stages() {
+        let pipeline = Pipeline::new();
+        let mut ctx = PipelineContext::new();
+        let result = pipeline.execute(&mut ctx);
+        assert!(result.is_ok());
+        assert!(ctx.stage_results.is_empty());
+    }
+
+    #[test]
+    fn test_pipeline_abort_stops_later_stages() {
+        let pipeline = PipelineBuilder::new()
+            .add(Box::new(FnStage::new(
+                "aborter",
+                StageType::Custom("test".to_string()),
+                |ctx| {
+                    ctx.abort("test abort");
+                    Ok(())
+                },
+            )))
+            .add(Box::new(TaggingStage))
+            .build();
+
+        let mut ctx = PipelineContext::new();
+        let result = pipeline.execute(&mut ctx);
+        assert!(result.is_ok());
+        assert!(ctx.aborted);
+        assert_eq!(ctx.abort_reason.as_deref(), Some("test abort"));
+        // Tagging stage should not have run
+        assert_eq!(ctx.stage_results.len(), 1);
+    }
+
+    #[test]
+    fn test_max_comments_truncates() {
+        let mut ctx = PipelineContext::new();
+        for i in 0..10 {
+            ctx.comments.push(make_comment("test.rs", i + 1, &format!("comment {i}"), 0.8));
+        }
+
+        let stage = MaxCommentsStage::new(5);
+        stage.execute(&mut ctx).unwrap();
+        assert_eq!(ctx.comments.len(), 5);
+    }
+
+    #[test]
+    fn test_confidence_filter_removes_low() {
+        let mut ctx = PipelineContext::new();
+        ctx.comments.push(make_comment("test.rs", 1, "low confidence", 0.3));
+        ctx.comments.push(make_comment("test.rs", 2, "high confidence", 0.9));
+
+        let stage = ConfidenceFilterStage::new(0.5);
+        stage.execute(&mut ctx).unwrap();
+        assert_eq!(ctx.comments.len(), 1);
+    }
 }
@@ -36,7 +36,7 @@ impl ConventionPattern {
         let z = 1.96; // 95% confidence
         let denominator = 1.0 + z * z / n;
         let center = p + z * z / (2.0 * n);
-        let spread = z * ((p * (1.0 - p) + z * z / (4.0 * n)) / n).sqrt();
+        let spread = z * ((p * (1.0 - p) / n) + (z * z / (4.0 * n * n))).sqrt();
         ((center - spread) / denominator).clamp(0.0, 1.0)
     }
 
@@ -259,9 +259,10 @@ impl ConventionStore {
 /// Normalize comment text into a pattern key (lowercased, stopwords removed).
 fn normalize_pattern(text: &str) -> String {
     let lower = text.to_lowercase();
-    let tokens: Vec<&str> = lower
-        .split_whitespace()
+    let tokens: Vec<String> = lower
+        .split(|c: char| !c.is_alphanumeric() && c != '_')
         .filter(|w| w.len() > 2 && !STOPWORDS.contains(w))
+        .map(|w| w.to_string())
         .collect();
     tokens.join(" ")
 }
@@ -525,4 +526,109 @@ mod tests {
         let suppressed = store.suppression_patterns();
         assert_eq!(suppressed.len(), 1);
     }
+
+    #[test]
+    fn test_normalize_all_stopwords_returns_empty() {
+        let result = normalize_pattern("the and for are but not");
+        assert!(result.is_empty());
+    }
+
+    #[test]
+    fn test_normalize_short_words_filtered() {
+        let result = normalize_pattern("a b c do be");
+        assert!(result.is_empty());
+    }
+
+    #[test]
+    fn test_normalize_strips_punctuation() {
+        let a = normalize_pattern("Missing error-handling for API calls");
+        let b = normalize_pattern("Missing error handling for API calls");
+        // Hyphen should be treated as separator, matching split behavior of extract_tokens
+        assert!(a.contains("missing"));
+        assert!(a.contains("error"));
+        assert!(a.contains("handling"));
+        assert_eq!(a, b);
+    }
+
+    #[test]
+    fn test_confidence_single_observation() {
+        let pattern = ConventionPattern {
+            pattern_text: "test".to_string(),
+            category: "Bug".to_string(),
+            accepted_count: 1,
+            rejected_count: 0,
+            file_patterns: Vec::new(),
+            first_seen: "2024-01-01".to_string(),
+            last_seen: "2024-01-01".to_string(),
+        };
+        // Single observation should return 0 confidence (not enough data)
+        assert_eq!(pattern.confidence(), 0.0);
+    }
+
+    #[test]
+    fn test_confidence_all_accepted() {
+        let pattern = ConventionPattern {
+            pattern_text: "test".to_string(),
+            category: "Bug".to_string(),
+            accepted_count: 10,
+            rejected_count: 0,
+            file_patterns: Vec::new(),
+            first_seen: "2024-01-01".to_string(),
+            last_seen: "2024-01-01".to_string(),
+        };
+        let conf = pattern.confidence();
+        assert!(conf > 0.5, "High acceptance should yield high confidence: {conf}");
+    }
+
+    #[test]
+    fn test_confidence_all_rejected() {
+        let pattern = ConventionPattern {
+            pattern_text: "test".to_string(),
+            category: "Bug".to_string(),
+            accepted_count: 0,
+            rejected_count: 10,
+            file_patterns: Vec::new(),
+            first_seen: "2024-01-01".to_string(),
+            last_seen: "2024-01-01".to_string(),
+        };
+        let conf = pattern.confidence();
+        assert!(conf < 0.1, "All rejected should yield low confidence: {conf}");
+    }
+
+    #[test]
+    fn test_score_comment_pattern_fallthrough() {
+        // Pattern exists, category matches, but neither suppress nor boost threshold met
+        let mut store = ConventionStore::new();
+        // 2 accepted, 1 rejected = 66% acceptance (below boost 75%, above suppress 25%)
+        store.record_feedback("borderline pattern", "Bug", true, None, "2024-01-01");
+        store.record_feedback("borderline pattern", "Bug", true, None, "2024-01-02");
+        store.record_feedback("borderline pattern", "Bug", false, None, "2024-01-03");
+
+        let score = store.score_comment("borderline pattern", "Bug");
+        // Should fall through to token-based scoring, not hit suppress/boost early returns
+        assert!(score.abs() <= 0.3);
+    }
+
+    #[test]
+    fn test_record_feedback_empty_comment() {
+        let mut store = ConventionStore::new();
+        store.record_feedback("", "Bug", true, None, "2024-01-01");
+        // Empty comment normalizes to empty key, should be rejected
+        assert!(store.patterns.is_empty());
+    }
+
+    #[test]
+    fn test_extract_tokens_consistency() {
+        // Verify extract_tokens and normalize_pattern produce compatible results
+        let text = "Missing error handling for API call";
+        let normalized = normalize_pattern(text);
+        let tokens = extract_tokens(text);
+        // Every token should appear in the normalized pattern
+        for token in &tokens {
+            assert!(
+                normalized.contains(token.as_str()),
+                "Token '{token}' not found in normalized '{normalized}'"
+            );
+        }
+    }
 }
@@ -829,4 +829,50 @@ mod tests {
         assert!(t.min_precision > 0.0);
         assert!(t.max_false_positive_rate > 0.0);
     }
+
+    #[test]
+    fn test_fixture_result_all_zeros() {
+        let result = FixtureResult::compute("zero", 0, 0, 0, 0, 0);
+        // No TPs, no FPs, no FNs — precision and recall default to 1.0
+        assert!((result.precision - 1.0).abs() < f32::EPSILON);
+        assert!((result.recall - 1.0).abs() < f32::EPSILON);
+    }
+
+    #[test]
+    fn test_fixture_result_perfect_score() {
+        let result = FixtureResult::compute("perfect", 5, 0, 5, 0, 0);
+        assert!((result.precision - 1.0).abs() < f32::EPSILON);
+        assert!((result.recall - 1.0).abs() < f32::EPSILON);
+        assert!((result.f1 - 1.0).abs() < f32::EPSILON);
+    }
+
+    #[test]
+    fn test_fixture_result_no_true_positives() {
+        let result = FixtureResult::compute("bad", 5, 0, 0, 0, 5);
+        assert!((result.precision).abs() < f32::EPSILON);
+        assert!((result.recall).abs() < f32::EPSILON);
+    }
+
+    #[test]
+    fn test_aggregate_metrics_empty() {
+        let agg = AggregateMetrics::compute(&[], None);
+        assert_eq!(agg.fixture_count, 0);
+    }
+
+    #[test]
+    fn test_aggregate_metrics_single_fixture() {
+        let result = FixtureResult::compute("single", 10, 0, 8, 0, 2);
+        let agg = AggregateMetrics::compute(&[&result], None);
+        assert_eq!(agg.fixture_count, 1);
+        assert!(agg.micro_precision > 0.0);
+        assert!(agg.micro_recall > 0.0);
+    }
+
+    #[test]
+    fn test_empty_suite_evaluation() {
+        let suite = BenchmarkSuite::new("empty", "No fixtures");
+        assert_eq!(suite.fixture_count(), 0);
+        let by_cat = suite.fixtures_by_category();
+        assert!(by_cat.is_empty());
+    }
 }