Skip to content

Commit 8e23e1e

Browse files
committed
Add rule-priority ordering and eval regression quality gates
1 parent 44a5ccb commit 8e23e1e

File tree

5 files changed

+654
-47
lines changed

5 files changed

+654
-47
lines changed

.diffscope.yml.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ rules_files:
6969
- ".diffscope-rules.yml"
7070
- "rules/**/*.yml"
7171
max_active_rules: 30
72+
rule_priority:
73+
- "sec.shell.injection"
74+
- "sec.auth.guard"
75+
- "reliability.unwrap_panic"
7276

7377
# API configuration (optional - can use environment variables)
7478
# api_key: your-api-key-here

.github/workflows/eval.yml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
name: Eval Quality
2+
3+
on:
4+
pull_request:
5+
paths:
6+
- 'src/**'
7+
- 'eval/**'
8+
- '.github/workflows/eval.yml'
9+
- 'Cargo.toml'
10+
- 'Cargo.lock'
11+
workflow_dispatch:
12+
13+
permissions:
14+
contents: read
15+
16+
env:
17+
CARGO_TERM_COLOR: always
18+
19+
jobs:
20+
eval:
21+
if: ${{ secrets.OPENAI_API_KEY != '' }}
22+
runs-on: ubuntu-latest
23+
timeout-minutes: 60
24+
steps:
25+
- uses: actions/checkout@v4
26+
with:
27+
fetch-depth: 0
28+
29+
- uses: dtolnay/rust-toolchain@stable
30+
- uses: Swatinem/rust-cache@v2
31+
32+
- name: Build current branch binary
33+
run: cargo build --release
34+
35+
- name: Build baseline report from origin/main
36+
env:
37+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
38+
run: |
39+
git fetch origin main --depth=1
40+
git worktree add /tmp/diffscope-main origin/main
41+
cd /tmp/diffscope-main
42+
cargo build --release
43+
./target/release/diffscope eval \
44+
--model gpt-4o-mini \
45+
--temperature 0 \
46+
--fixtures eval/fixtures \
47+
--output /tmp/eval-baseline.json
48+
49+
- name: Run eval thresholds on current branch
50+
env:
51+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
52+
run: |
53+
./target/release/diffscope eval \
54+
--model gpt-4o-mini \
55+
--temperature 0 \
56+
--fixtures eval/fixtures \
57+
--output eval-current.json \
58+
--baseline /tmp/eval-baseline.json \
59+
--max-micro-f1-drop 0.20 \
60+
--min-micro-f1 0.20 \
61+
--min-rule-f1 sec.shell.injection=0.10 \
62+
--min-rule-f1 reliability.unwrap_panic=0.10 \
63+
--max-rule-f1-drop sec.shell.injection=0.25 \
64+
--max-rule-f1-drop reliability.unwrap_panic=0.25
65+
66+
- name: Upload eval reports
67+
if: always()
68+
uses: actions/upload-artifact@v4
69+
with:
70+
name: eval-reports
71+
path: |
72+
eval-current.json
73+
/tmp/eval-baseline.json
74+
75+
eval-skipped:
76+
if: ${{ secrets.OPENAI_API_KEY == '' }}
77+
runs-on: ubuntu-latest
78+
steps:
79+
- name: Skip message
80+
run: echo "Skipping eval workflow because OPENAI_API_KEY secret is not configured."

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,18 @@ expect:
146146
Starter fixtures live in `eval/fixtures/repo_regressions`.
147147
Markdown and smart-review reports now include rule-level issue breakdown tables when rule ids are available.
148148

149+
Threshold flags for CI gates:
150+
```bash
151+
diffscope eval \
152+
--fixtures eval/fixtures \
153+
--output eval-report.json \
154+
--baseline eval-baseline.json \
155+
--max-micro-f1-drop 0.15 \
156+
--min-micro-f1 0.30 \
157+
--min-rule-f1 sec.shell.injection=0.20 \
158+
--max-rule-f1-drop sec.shell.injection=0.15
159+
```
160+
149161
### Smart Review (Enhanced Analysis)
150162
```bash
151163
# Get professional-grade analysis with confidence scoring
@@ -284,6 +296,10 @@ rules_files:
284296
- ".diffscope-rules.yml"
285297
- "rules/**/*.yml"
286298
max_active_rules: 30
299+
rule_priority:
300+
- "sec.shell.injection"
301+
- "sec.auth.guard"
302+
- "reliability.unwrap_panic"
287303
288304
# Built-in plugins (enabled by default)
289305
plugins:
@@ -517,6 +533,8 @@ Jonathan Haas <jonathan@haas.holdings>
517533
518534
## Advanced CI/CD Integration
519535
536+
See `.github/workflows/eval.yml` for a ready-to-run quality gate that compares PR eval metrics against `origin/main` and fails on micro-F1 or rule-level regressions.
537+
520538
### Enterprise GitHub Actions Workflow
521539
522540
Here's an example of how large organizations use diffscope in production CI/CD pipelines:

src/config.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ pub struct Config {
104104

105105
#[serde(default = "default_max_active_rules")]
106106
pub max_active_rules: usize,
107+
108+
#[serde(default)]
109+
pub rule_priority: Vec<String>,
107110
}
108111

109112
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
@@ -210,6 +213,7 @@ impl Default for Config {
210213
pattern_repositories: Vec::new(),
211214
rules_files: Vec::new(),
212215
max_active_rules: default_max_active_rules(),
216+
rule_priority: Vec::new(),
213217
}
214218
}
215219
}
@@ -420,6 +424,17 @@ impl Config {
420424
if self.max_active_rules == 0 {
421425
self.max_active_rules = default_max_active_rules();
422426
}
427+
self.rule_priority = self
428+
.rule_priority
429+
.iter()
430+
.map(|rule| rule.trim().to_ascii_lowercase())
431+
.filter(|rule| !rule.is_empty())
432+
.fold(Vec::new(), |mut acc, rule| {
433+
if !acc.contains(&rule) {
434+
acc.push(rule);
435+
}
436+
acc
437+
});
423438
}
424439

425440
pub fn get_path_config(&self, file_path: &Path) -> Option<&PathConfig> {

0 commit comments

Comments
 (0)