Skip to content

chore(deps): bump rustls-webpki from 0.103.9 to 0.103.10 #171

chore(deps): bump rustls-webpki from 0.103.9 to 0.103.10

chore(deps): bump rustls-webpki from 0.103.9 to 0.103.10 #171

Workflow file for this run

name: Eval Quality
on:
pull_request:
paths:
- 'src/**'
- 'eval/**'
- '.github/workflows/eval.yml'
- 'Cargo.toml'
- 'Cargo.lock'
workflow_dispatch:
permissions:
contents: read
env:
CARGO_TERM_COLOR: always
jobs:
eval:
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Check eval secret
id: secret-check
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
if [ -n "${OPENAI_API_KEY}" ]; then
echo "configured=true" >> "$GITHUB_OUTPUT"
else
echo "configured=false" >> "$GITHUB_OUTPUT"
fi
- uses: actions/checkout@v5
if: ${{ steps.secret-check.outputs.configured == 'true' }}
with:
fetch-depth: 0
- uses: dtolnay/rust-toolchain@1.88.0
if: ${{ steps.secret-check.outputs.configured == 'true' }}
- uses: Swatinem/rust-cache@v2
if: ${{ steps.secret-check.outputs.configured == 'true' }}
- name: Build current branch binary
if: ${{ steps.secret-check.outputs.configured == 'true' }}
run: cargo build --release
- name: Build baseline report from origin/main
if: ${{ steps.secret-check.outputs.configured == 'true' }}
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
git fetch origin main --depth=1
git worktree add /tmp/diffscope-main origin/main
cd /tmp/diffscope-main
cargo build --release
./target/release/diffscope eval \
--model gpt-4o-mini \
--temperature 0 \
--fixtures eval/fixtures \
--output /tmp/eval-baseline.json
- name: Run eval thresholds on current branch
if: ${{ steps.secret-check.outputs.configured == 'true' }}
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
./target/release/diffscope eval \
--model gpt-4o-mini \
--temperature 0 \
--fixtures eval/fixtures \
--output eval-current.json \
--baseline /tmp/eval-baseline.json \
--max-micro-f1-drop 0.20 \
--min-micro-f1 0.20 \
--min-verification-health 0.80 \
--min-rule-f1 sec.shell.injection=0.10 \
--min-rule-f1 reliability.unwrap_panic=0.10 \
--max-rule-f1-drop sec.shell.injection=0.25 \
--max-rule-f1-drop reliability.unwrap_panic=0.25
- name: Upload eval reports
if: ${{ always() && steps.secret-check.outputs.configured == 'true' }}
uses: actions/upload-artifact@v4
with:
name: eval-reports
path: |
eval-current.json
/tmp/eval-baseline.json
- name: Skip message
if: ${{ steps.secret-check.outputs.configured != 'true' }}
run: echo "Skipping eval workflow because OPENAI_API_KEY secret is not configured."