Add rule-priority ordering and eval regression quality gates

haasonsaas · haasonsaas · commit 8e23e1e368a7 · 2026-03-07T19:42:25.000-08:00
diff --git a/.diffscope.yml.example b/.diffscope.yml.example
@@ -69,6 +69,10 @@ rules_files:
   - ".diffscope-rules.yml"
   - "rules/**/*.yml"
 max_active_rules: 30
+rule_priority:
+  - "sec.shell.injection"
+  - "sec.auth.guard"
+  - "reliability.unwrap_panic"
 
 # API configuration (optional - can use environment variables)
 # api_key: your-api-key-here
diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
@@ -0,0 +1,80 @@
+name: Eval Quality
+
+on:
+  pull_request:
+    paths:
+      - 'src/**'
+      - 'eval/**'
+      - '.github/workflows/eval.yml'
+      - 'Cargo.toml'
+      - 'Cargo.lock'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  eval:
+    if: ${{ secrets.OPENAI_API_KEY != '' }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: dtolnay/rust-toolchain@stable
+      - uses: Swatinem/rust-cache@v2
+
+      - name: Build current branch binary
+        run: cargo build --release
+
+      - name: Build baseline report from origin/main
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          git fetch origin main --depth=1
+          git worktree add /tmp/diffscope-main origin/main
+          cd /tmp/diffscope-main
+          cargo build --release
+          ./target/release/diffscope eval \
+            --model gpt-4o-mini \
+            --temperature 0 \
+            --fixtures eval/fixtures \
+            --output /tmp/eval-baseline.json
+
+      - name: Run eval thresholds on current branch
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          ./target/release/diffscope eval \
+            --model gpt-4o-mini \
+            --temperature 0 \
+            --fixtures eval/fixtures \
+            --output eval-current.json \
+            --baseline /tmp/eval-baseline.json \
+            --max-micro-f1-drop 0.20 \
+            --min-micro-f1 0.20 \
+            --min-rule-f1 sec.shell.injection=0.10 \
+            --min-rule-f1 reliability.unwrap_panic=0.10 \
+            --max-rule-f1-drop sec.shell.injection=0.25 \
+            --max-rule-f1-drop reliability.unwrap_panic=0.25
+
+      - name: Upload eval reports
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-reports
+          path: |
+            eval-current.json
+            /tmp/eval-baseline.json
+
+  eval-skipped:
+    if: ${{ secrets.OPENAI_API_KEY == '' }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Skip message
+        run: echo "Skipping eval workflow because OPENAI_API_KEY secret is not configured."
diff --git a/README.md b/README.md
@@ -146,6 +146,18 @@ expect:
 Starter fixtures live in `eval/fixtures/repo_regressions`.
 Markdown and smart-review reports now include rule-level issue breakdown tables when rule ids are available.
 
+Threshold flags for CI gates:
+```bash
+diffscope eval \
+  --fixtures eval/fixtures \
+  --output eval-report.json \
+  --baseline eval-baseline.json \
+  --max-micro-f1-drop 0.15 \
+  --min-micro-f1 0.30 \
+  --min-rule-f1 sec.shell.injection=0.20 \
+  --max-rule-f1-drop sec.shell.injection=0.15
+```
+
 ### Smart Review (Enhanced Analysis)
 ```bash
 # Get professional-grade analysis with confidence scoring
@@ -284,6 +296,10 @@ rules_files:
   - ".diffscope-rules.yml"
   - "rules/**/*.yml"
 max_active_rules: 30
+rule_priority:
+  - "sec.shell.injection"
+  - "sec.auth.guard"
+  - "reliability.unwrap_panic"
 
 # Built-in plugins (enabled by default)
 plugins:
@@ -517,6 +533,8 @@ Jonathan Haas <jonathan@haas.holdings>
 
 ## Advanced CI/CD Integration
 
+See `.github/workflows/eval.yml` for a ready-to-run quality gate that compares PR eval metrics against `origin/main` and fails on micro-F1 or rule-level regressions.
+
 ### Enterprise GitHub Actions Workflow
 
 Here's an example of how large organizations use diffscope in production CI/CD pipelines:
diff --git a/src/config.rs b/src/config.rs
@@ -104,6 +104,9 @@ pub struct Config {
 
     #[serde(default = "default_max_active_rules")]
     pub max_active_rules: usize,
+
+    #[serde(default)]
+    pub rule_priority: Vec<String>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
@@ -210,6 +213,7 @@ impl Default for Config {
             pattern_repositories: Vec::new(),
             rules_files: Vec::new(),
             max_active_rules: default_max_active_rules(),
+            rule_priority: Vec::new(),
         }
     }
 }
@@ -420,6 +424,17 @@ impl Config {
         if self.max_active_rules == 0 {
             self.max_active_rules = default_max_active_rules();
         }
+        self.rule_priority = self
+            .rule_priority
+            .iter()
+            .map(|rule| rule.trim().to_ascii_lowercase())
+            .filter(|rule| !rule.is_empty())
+            .fold(Vec::new(), |mut acc, rule| {
+                if !acc.contains(&rule) {
+                    acc.push(rule);
+                }
+                acc
+            });
     }
 
     pub fn get_path_config(&self, file_path: &Path) -> Option<&PathConfig> {
diff --git a/src/main.rs b/src/main.rs

Original file line number	Diff line number	Diff line change
`@@ -104,6 +104,9 @@ pub struct Config {`
`104`	`104`
`105`	`105`	`#[serde(default = "default_max_active_rules")]`
`106`	`106`	`pub max_active_rules: usize,`
	`107`	`+`
	`108`	`+ #[serde(default)]`
	`109`	`+ pub rule_priority: Vec<String>,`
`107`	`110`	`}`
`108`	`111`
`109`	`112`	`#[derive(Debug, Clone, Serialize, Deserialize, Default)]`
`@@ -210,6 +213,7 @@ impl Default for Config {`
`210`	`213`	`pattern_repositories: Vec::new(),`
`211`	`214`	`rules_files: Vec::new(),`
`212`	`215`	`max_active_rules: default_max_active_rules(),`
	`216`	`+ rule_priority: Vec::new(),`
`213`	`217`	`}`
`214`	`218`	`}`
`215`	`219`	`}`
`@@ -420,6 +424,17 @@ impl Config {`
`420`	`424`	`if self.max_active_rules == 0 {`
`421`	`425`	`self.max_active_rules = default_max_active_rules();`
`422`	`426`	`}`
	`427`	`+ self.rule_priority = self`
	`428`	`+ .rule_priority`
	`429`	`+ .iter()`
	`430`	`+ .map(\|rule\| rule.trim().to_ascii_lowercase())`
	`431`	`+ .filter(\|rule\| !rule.is_empty())`
	`432`	`+ .fold(Vec::new(), \|mut acc, rule\| {`
	`433`	`+ if !acc.contains(&rule) {`
	`434`	`+ acc.push(rule);`
	`435`	`+ }`
	`436`	`+ acc`
	`437`	`+ });`
`423`	`438`	`}`
`424`	`439`
`425`	`440`	`pub fn get_path_config(&self, file_path: &Path) -> Option<&PathConfig> {`