evalops
diff --git a/‎TODO.md‎
Lines changed: 143 additions & 0 deletions b/‎TODO.md‎
Lines changed: 143 additions & 0 deletions
diff --git a/‎src/commands/eval/pattern/matching.rs‎
Lines changed: 6 additions & 228 deletions b/‎src/commands/eval/pattern/matching.rs‎
Lines changed: 6 additions & 228 deletions
@@ -42,3 +42,146 @@
 - [x] Keep `ReviewSession` focused on per-review state in `session.rs`.
 - [x] Update imports in `pipeline.rs`, `prepare.rs`, and `postprocess.rs`.
 - [x] Validate, commit, and push.
+
+## Wave 3+ — ongoing carving backlog
+
+### Working rules
+
+- [x] Keep refactors behavior-preserving.
+- [x] Validate every checkpoint with `cargo fmt --check`, `cargo clippy --all-targets -- -D warnings`, `cargo test`, and `bash scripts/check-workflows.sh`.
+- [x] Commit and push after each validated slice.
+- [ ] Prefer files that still mix orchestration + parsing/formatting + persistence.
+- [ ] Prefer files that remain large after the first round of carving or that keep attracting unrelated edits.
+
+### Recently completed checkpoints
+
+- [x] Split `src/commands/eval/report.rs`.
+- [x] Split `src/commands/misc/lsp_check.rs`.
+- [x] Split `src/commands/smart_review.rs`.
+- [x] Split `src/commands/eval/thresholds/evaluation.rs`.
+- [x] Split `src/commands/eval/runner/execute.rs`.
+- [x] Split `src/commands/feedback_eval/report/build/aggregate.rs`.
+
+### Immediate queue
+
+- [x] `src/commands/eval/pattern/matching.rs`: split normalized rule-id helpers, matcher predicates, and focused matcher tests.
+- [ ] `src/commands/eval/metrics/rules.rs`: separate aggregate math, rule counting, and summary reduction helpers.
+- [ ] `src/commands/doctor/endpoint/inference.rs`: split request building, HTTP execution/error handling, and response parsing.
+- [ ] `src/commands/feedback_eval/report/build/stats.rs`: split threshold confusion-matrix scoring from bucket primitives.
+- [ ] `src/commands/doctor/command/display.rs`: separate header/config output, endpoint listing, and inference result rendering.
+- [ ] `src/commands/doctor/command/run.rs`: separate endpoint discovery, recommendation flow, and test helpers.
+- [ ] `src/commands/eval/runner/matching.rs`: split required-match search, unexpected-match detection, and rule metric assembly.
+- [ ] `src/commands/eval/runner/execute/loading.rs`: separate diff resolution from repo-path resolution if it grows again.
+- [ ] `src/commands/feedback_eval/report/examples.rs`: split ranking helpers from example builders.
+- [ ] `src/commands/doctor/system.rs`: carve environment probes vs output helpers.
+
+### Commands backlog
+
+- [ ] `src/commands/eval/types.rs`: split fixture, pattern, report, and run-option types if churn keeps touching unrelated structs.
+- [ ] `src/commands/feedback_eval/types.rs`: separate input payload types from report/output types.
+- [ ] `src/commands/feedback_eval/input/loading.rs`: split format detection from JSON parsing/loading.
+- [ ] `src/commands/feedback_eval/input/conversion.rs`: split review-session conversion from label normalization helpers.
+- [ ] `src/commands/pr.rs`: separate summary-only flow, full review flow, and comment-posting orchestration.
+- [ ] `src/commands/pr/gh.rs`: carve PR resolution, diff fetching, and metadata fetching.
+- [ ] `src/commands/git/suggest.rs`: split commit-message prompting from PR-title prompting and response extraction.
+- [ ] `src/commands/review/command.rs`: split review/check/compare entrypoints if they keep diverging.
+- [ ] `src/commands/misc/feedback/command.rs`: separate file loading/ID normalization from store persistence.
+- [ ] `src/commands/misc/feedback/apply.rs`: split acceptance/rejection counters from store mutation helpers.
+- [ ] `src/commands/misc/discussion/command.rs`: separate the interactive loop from single-shot execution.
+- [ ] `src/commands/misc/discussion/selection.rs`: split file loading/ID repair from selection rules.
+- [ ] `src/commands/misc/changelog.rs`: evaluate splitting changelog collection from output formatting.
+- [ ] `src/commands/eval/command.rs`: separate CLI option prep, fixture execution, and report lifecycle.
+- [ ] `src/commands/feedback_eval/command.rs`: separate input loading from report/output orchestration.
+
+### Review pipeline backlog
+
+- [ ] `src/review/pipeline/execution/responses/processing.rs`: split raw response normalization, comment extraction, and merge logic.
+- [ ] `src/review/pipeline/execution/responses/validation.rs`: separate schema validation from per-comment sanitization.
+- [ ] `src/review/pipeline/prepare/runner.rs`: split per-diff orchestration, pre-analysis/triage decisions, and progress updates.
+- [ ] `src/review/pipeline/context/symbols.rs`: split symbol search, snippet selection, and fallback behavior.
+- [ ] `src/review/pipeline/context/related.rs`: separate related-file discovery from ranking/selection.
+- [ ] `src/review/pipeline/guidance.rs`: carve guidance assembly, repo-support guidance, and prompt-facing formatting.
+- [ ] `src/review/pipeline/session.rs`: split session construction from runtime state transitions.
+- [ ] `src/review/pipeline/services.rs`: separate service wiring from optional feature initialization.
+- [ ] `src/review/pipeline/file_context/sources.rs`: split repo sources, symbol sources, and supplemental context sources.
+- [ ] `src/review/pipeline/comments.rs`: separate comment assembly, filtering, and metadata stamping.
+- [ ] `src/review/pipeline/postprocess/dedup.rs`: split duplicate detection, scoring, and merge/rewrite behavior.
+- [ ] `src/review/pipeline/postprocess/feedback.rs`: separate store lookups from suppression/annotation decisions.
+- [ ] `src/review/pipeline/execution/dispatcher.rs`: carve request scheduling, concurrency control, and result collection.
+- [ ] `src/review/pipeline.rs`: keep trimming top-level orchestration as helpers mature.
+
+### Review helper backlog
+
+- [ ] `src/review/rule_helpers/reporting.rs`: separate rendering/formatting from score/rationale helpers.
+- [ ] `src/review/rule_helpers/runtime.rs`: split runtime state, caching, and dispatch helpers.
+- [ ] `src/review/context_helpers/ranking.rs`: separate scoring inputs from final ranking/selection.
+- [ ] `src/review/context_helpers/pattern_repositories.rs`: split pattern loading, matching, and repo fallback logic.
+- [ ] `src/review/filters.rs`: carve severity/category filters, suppression filters, and dedup-like passes.
+- [ ] `src/review/feedback.rs`: split persistence, semantic examples, and suppression statistics.
+- [ ] `src/review/triage.rs`: separate heuristics, explanations, and scoring/reporting.
+- [ ] `src/review/compression.rs`: split chunking, summarization, and token-budget planning.
+- [ ] `src/review/verification/parser.rs`: separate parser stages and error handling.
+- [ ] `src/review/verification/prompt.rs`: split prompt assembly from example selection.
+
+### Core backlog
+
+- [ ] `src/config.rs`: split defaulting, loading, validation, migration, and path-resolution logic.
+- [ ] `src/core/comment.rs`: separate model types, ID generation, formatting helpers, and feedback-related transforms.
+- [ ] `src/core/symbol_index.rs`: carve command detection, indexing, retrieval, and language-map handling.
+- [ ] `src/core/symbol_graph.rs`: separate graph construction, traversal, and serialization helpers.
+- [ ] `src/core/semantic.rs`: split semantic extraction, matching, and persistence boundaries.
+- [ ] `src/core/pr_summary.rs`: carve stats calculation, prompt generation, response parsing, and diagram support.
+- [ ] `src/core/enhanced_review.rs`: split orchestration, prompt building, and response handling.
+- [ ] `src/core/eval_benchmarks.rs`: separate benchmark fixtures, thresholds, scoring, and aggregation.
+- [ ] `src/core/prompt.rs`: split prompt fragments, model-specific tuning, and reusable builders.
+- [ ] `src/core/context.rs`: separate context assembly, provenance handling, and formatting.
+- [ ] `src/core/offline.rs`: split endpoint/model probing, metadata parsing, and recommendation helpers.
+- [ ] `src/core/function_chunker.rs`: separate parsing, chunk planning, and scoring heuristics.
+- [ ] `src/core/agent_tools.rs`: carve tool registry, schema building, and execution adapters.
+- [ ] `src/core/agent_loop.rs`: separate loop orchestration, state transitions, and tool/result handling.
+- [ ] `src/core/code_summary.rs`: split summary planning, extraction, and formatting.
+- [ ] `src/core/changelog.rs`: separate git/history ingestion from final changelog rendering.
+- [ ] `src/core/multi_pass.rs`: split pass planning, execution, and result merging.
+- [ ] `src/core/composable_pipeline.rs`: separate stage wiring from execution semantics.
+- [ ] `src/core/convention_learner.rs`: split store persistence, scoring, and feedback ingestion.
+- [ ] `src/core/git_history.rs`: carve log collection, parsing, and summarization.
+- [ ] `src/core/diff_parser.rs`: separate unified/text diff parsing, hunk tracking, and post-processing.
+- [ ] `src/core/interactive.rs`: split REPL/input loop, commands, and output formatting.
+
+### Server and storage backlog
+
+- [ ] `src/server/api.rs`: split route handlers by domain plus shared request/response helpers.
+- [ ] `src/server/state.rs`: separate session state, queueing, and persistence coordination.
+- [ ] `src/server/storage_json.rs`: carve file I/O, indexing, migrations, and query helpers.
+- [ ] `src/server/storage_pg.rs`: separate SQL-backed persistence domains and query grouping.
+- [ ] `src/server/github.rs`: split webhook parsing, API interactions, and review-session orchestration.
+- [ ] `src/server/metrics.rs`: separate metric registration from event emission helpers.
+- [ ] `src/server/mod.rs`: keep top-level wiring thin as submodules mature.
+
+### Adapters, parsing, and plugins backlog
+
+- [ ] `src/adapters/llm.rs`: split request shaping, retry/policy logic, and response normalization.
+- [ ] `src/adapters/openai.rs`: carve request builders, streaming handling, and schema/response parsing.
+- [ ] `src/adapters/anthropic.rs`: carve request conversion, retries, and response parsing.
+- [ ] `src/adapters/ollama.rs`: separate local model capabilities, request building, and response parsing.
+- [ ] `src/adapters/common.rs`: split shared retry/auth/http helpers.
+- [ ] `src/parsing/llm_response.rs`: separate fenced-block parsing, comment extraction, and validation.
+- [ ] `src/parsing/smart_response.rs`: split structured smart-review parsing from fallbacks.
+- [ ] `src/plugins/builtin/secret_scanner.rs`: carve rule loading, scanning, and finding shaping.
+- [ ] `src/plugins/builtin/supply_chain.rs`: separate manifest parsing, registry lookups, and finding generation.
+- [ ] `src/plugins/builtin/eslint.rs`: split command execution, parser helpers, and finding conversion.
+- [ ] `src/plugins/builtin/semgrep.rs`: split command assembly, result parsing, and finding mapping.
+- [ ] `src/plugins/builtin/duplicate_filter.rs`: separate fingerprinting from suppression heuristics.
+- [ ] `src/plugins/plugin.rs`: split plugin traits/types from execution helpers.
+
+### Output and entrypoint backlog
+
+- [ ] `src/output/format.rs`: separate smart review formatting, patch output, and walkthrough generation.
+- [ ] `src/main.rs`: carve CLI wiring by command group and shared config/bootstrap helpers.
+- [ ] `src/vault.rs`: split vault discovery, parsing, and maintenance operations.
+
+### Nice-to-have / monitor
+
+- [ ] Revisit freshly split files once they cross roughly 150 LOC again, especially `src/commands/eval/pattern/matching.rs`, `src/commands/eval/metrics/rules.rs`, `src/commands/doctor/endpoint/inference.rs`, and `src/commands/feedback_eval/report/build/stats.rs`.
+- [ ] Keep module roots thin: if a root file only re-exports helpers, leave it alone until child files grow again.
+- [ ] Favor extracting pure helpers and test-only builders before moving async orchestration.
@@ -1,228 +1,6 @@
-use regex::Regex;
-
-use crate::core;
-use crate::review::normalize_rule_id;
-
-use super::super::EvalPattern;
-
-impl EvalPattern {
-    pub(in super::super) fn matches(&self, comment: &core::Comment) -> bool {
-        if self.is_empty() {
-            return false;
-        }
-
-        let content_lower = comment.content.to_ascii_lowercase();
-
-        if let Some(file) = &self.file {
-            let file = file.trim();
-            if !file.is_empty() {
-                let candidate = comment.file_path.to_string_lossy();
-                if !(candidate == file || candidate.ends_with(file)) {
-                    return false;
-                }
-            }
-        }
-
-        if let Some(line) = self.line {
-            if comment.line_number != line {
-                return false;
-            }
-        }
-
-        if let Some(contains) = &self.contains {
-            let needle = contains.trim().to_ascii_lowercase();
-            if !needle.is_empty() && !content_lower.contains(&needle) {
-                return false;
-            }
-        }
-
-        let contains_any: Vec<String> = self
-            .contains_any
-            .iter()
-            .map(|value| value.trim().to_ascii_lowercase())
-            .filter(|value| !value.is_empty())
-            .collect();
-        if !contains_any.is_empty()
-            && !contains_any
-                .iter()
-                .any(|needle| content_lower.contains(needle))
-        {
-            return false;
-        }
-
-        let tags_any: Vec<&str> = self
-            .tags_any
-            .iter()
-            .map(String::as_str)
-            .map(str::trim)
-            .filter(|value| !value.is_empty())
-            .collect();
-        if !tags_any.is_empty()
-            && !tags_any.iter().any(|expected| {
-                comment
-                    .tags
-                    .iter()
-                    .any(|tag| tag.eq_ignore_ascii_case(expected))
-            })
-        {
-            return false;
-        }
-
-        if let Some(pattern) = self.matches_regex.as_deref().map(str::trim) {
-            if !pattern.is_empty()
-                && !Regex::new(pattern)
-                    .map(|regex| regex.is_match(&comment.content))
-                    .unwrap_or(false)
-            {
-                return false;
-            }
-        }
-
-        if let Some(severity) = &self.severity {
-            if !comment
-                .severity
-                .to_string()
-                .eq_ignore_ascii_case(severity.trim())
-            {
-                return false;
-            }
-        }
-
-        if let Some(category) = &self.category {
-            if !comment
-                .category
-                .to_string()
-                .eq_ignore_ascii_case(category.trim())
-            {
-                return false;
-            }
-        }
-
-        if let Some(min_confidence) = self.confidence_at_least {
-            if comment.confidence < min_confidence {
-                return false;
-            }
-        }
-
-        if let Some(max_confidence) = self.confidence_at_most {
-            if comment.confidence > max_confidence {
-                return false;
-            }
-        }
-
-        if let Some(fix_effort) = &self.fix_effort {
-            let expected = fix_effort.trim();
-            if !expected.is_empty()
-                && !format!("{:?}", comment.fix_effort).eq_ignore_ascii_case(expected)
-            {
-                return false;
-            }
-        }
-
-        if let Some(rule_id) = &self.rule_id {
-            if self.require_rule_id {
-                let expected = rule_id.trim().to_ascii_lowercase();
-                let actual = comment
-                    .rule_id
-                    .as_deref()
-                    .map(|value| value.trim().to_ascii_lowercase())
-                    .unwrap_or_default();
-                if expected != actual {
-                    return false;
-                }
-            }
-        }
-
-        true
-    }
-
-    pub(in super::super) fn normalized_rule_id(&self) -> Option<String> {
-        normalize_rule_id(self.rule_id.as_deref())
-    }
-
-    fn is_empty(&self) -> bool {
-        self.file.as_deref().map(str::trim).unwrap_or("").is_empty()
-            && self.line.is_none()
-            && self
-                .contains
-                .as_deref()
-                .map(str::trim)
-                .unwrap_or("")
-                .is_empty()
-            && self
-                .contains_any
-                .iter()
-                .all(|value| value.trim().is_empty())
-            && self
-                .matches_regex
-                .as_deref()
-                .map(str::trim)
-                .unwrap_or("")
-                .is_empty()
-            && self
-                .severity
-                .as_deref()
-                .map(str::trim)
-                .unwrap_or("")
-                .is_empty()
-            && self
-                .category
-                .as_deref()
-                .map(str::trim)
-                .unwrap_or("")
-                .is_empty()
-            && self.tags_any.iter().all(|value| value.trim().is_empty())
-            && self.confidence_at_least.is_none()
-            && self.confidence_at_most.is_none()
-            && self
-                .fix_effort
-                .as_deref()
-                .map(str::trim)
-                .unwrap_or("")
-                .is_empty()
-            && (!self.require_rule_id
-                || self
-                    .rule_id
-                    .as_deref()
-                    .map(str::trim)
-                    .unwrap_or("")
-                    .is_empty())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::core::comment::{Category, FixEffort, Severity};
-    use std::path::PathBuf;
-
-    #[test]
-    fn test_eval_pattern_matches_regex_tags_and_confidence() {
-        let comment = core::Comment {
-            id: "comment-1".to_string(),
-            file_path: PathBuf::from("src/lib.rs"),
-            line_number: 12,
-            content: "Calling panic!(user_input) here can crash the request path".to_string(),
-            rule_id: Some("panic.user-input".to_string()),
-            severity: Severity::Warning,
-            category: Category::Bug,
-            suggestion: Some("Return an error instead of panicking".to_string()),
-            confidence: 0.91,
-            code_suggestion: None,
-            tags: vec!["reliability".to_string(), "panic".to_string()],
-            fix_effort: FixEffort::Low,
-            feedback: None,
-        };
-
-        let pattern = EvalPattern {
-            contains_any: vec!["panic".to_string(), "unwrap".to_string()],
-            matches_regex: Some("panic!\\([^)]*user_input[^)]*\\)".to_string()),
-            tags_any: vec!["security".to_string(), "reliability".to_string()],
-            confidence_at_least: Some(0.9),
-            fix_effort: Some("low".to_string()),
-            ..Default::default()
-        };
-
-        assert!(pattern.matches(&comment));
-    }
-}
+#[path = "matching/predicates.rs"]
+mod predicates;
+#[path = "matching/rule_id.rs"]
+mod rule_id;
+#[path = "matching/run.rs"]
+mod run;