forked from Dicklesworthstone/pi_agent_rust
-
Notifications
You must be signed in to change notification settings - Fork 0
255 lines (225 loc) · 9.56 KB
/
bench.yml
File metadata and controls
255 lines (225 loc) · 9.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# Benchmark CI workflow
# Runs on pushes to main and PRs to detect performance regressions
name: Benchmarks
on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch: # Allow manual triggering
env:
CARGO_TERM_COLOR: always
RUSTFLAGS: -D warnings
jobs:
benchmark:
runs-on: ubuntu-latest
defaults:
run:
working-directory: pi_agent_rust
shell: bash
steps:
- name: Free disk space
working-directory: /
run: |
set -euxo pipefail
df -h /
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc \
/opt/hostedtoolcache/CodeQL /opt/hostedtoolcache/go \
/opt/hostedtoolcache/Python /opt/hostedtoolcache/Ruby \
/opt/hostedtoolcache/Java_Temurin-Hotspot_jdk \
/usr/local/share/powershell /usr/share/swift \
/usr/local/graalvm /usr/local/.ghcup /usr/local/julia*
sudo docker image prune --all --force || true
sudo apt-get clean || true
df -h /
- name: Checkout pi_agent_rust
uses: actions/checkout@v4
with:
path: pi_agent_rust
- name: Install system deps (bc, fd, rg, xcb) [linux]
run: |
set -euxo pipefail
sudo apt-get update
sudo apt-get install -y bc fd-find ripgrep libxcb1-dev libxcb-render0-dev libxcb-shape0-dev libxcb-xfixes0-dev
sudo ln -sf "$(command -v fdfind)" /usr/local/bin/fd
- name: Install Rust nightly
uses: dtolnay/rust-toolchain@nightly
with:
components: clippy
- name: Cache cargo registry
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
pi_agent_rust/target
key: ${{ runner.os }}-cargo-bench-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-bench-
${{ runner.os }}-cargo-
- name: Standardize benchmark environment
run: |
set -euxo pipefail
# Apply benchmark-optimal OS settings (best-effort, some may fail in containers)
sudo scripts/bench_env_setup.sh apply || echo "::warning::Could not fully apply bench env (expected in containers)"
# Emit environment fingerprint for artifact tracking
mkdir -p target/perf
scripts/bench_env_setup.sh fingerprint | tee target/perf/bench_env_fingerprint.json || true
# Validate and report
echo "## Benchmark Environment" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
scripts/bench_env_setup.sh validate 2>&1 | sed 's/\x1b\[[0-9;]*m//g' >> $GITHUB_STEP_SUMMARY || true
echo '```' >> $GITHUB_STEP_SUMMARY
- name: Build release binary
run: cargo build --release
- name: Check binary size
run: |
SIZE_BYTES=$(stat --printf="%s" target/release/pi)
SIZE_MB=$(echo "scale=2; $SIZE_BYTES / 1024 / 1024" | bc)
echo "## Binary Size" >> $GITHUB_STEP_SUMMARY
echo "- Size: ${SIZE_MB}MB" >> $GITHUB_STEP_SUMMARY
echo "- Budget: 20MB" >> $GITHUB_STEP_SUMMARY
if (( $(echo "$SIZE_MB > 20" | bc -l) )); then
echo "::error::Binary size ${SIZE_MB}MB exceeds 20MB budget"
exit 1
fi
echo "- Status: ✅ Within budget" >> $GITHUB_STEP_SUMMARY
- name: Run micro-benchmarks (tools)
run: cargo bench --bench tools -- --noplot
- name: Run micro-benchmarks (extensions)
run: cargo bench --bench extensions -- --noplot
- name: Run system benchmarks
run: cargo bench --bench system -- --noplot
env:
PI_BENCH_BINARY: target/release/pi
- name: Run TUI performance benchmarks (PERF-8/9)
run: cargo bench --bench tui_perf -- --noplot --save-baseline pr
- name: Generate PiJS workload perf data (JSONL)
run: |
set -euxo pipefail
PERF_PROFILE=perf
mkdir -p "target/perf/${PERF_PROFILE}"
PI_BENCH_BUILD_PROFILE="${PERF_PROFILE}" cargo run --profile "${PERF_PROFILE}" --bin pijs_workload -- --iterations 2000 --tool-calls 1 > "target/perf/${PERF_PROFILE}/pijs_workload_${PERF_PROFILE}.jsonl"
PI_BENCH_BUILD_PROFILE="${PERF_PROFILE}" cargo run --profile "${PERF_PROFILE}" --bin pijs_workload -- --iterations 2000 --tool-calls 10 >> "target/perf/${PERF_PROFILE}/pijs_workload_${PERF_PROFILE}.jsonl"
- name: Generate benchmark summary
run: |
echo "## Benchmark Results" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Startup Time" >> $GITHUB_STEP_SUMMARY
if [ -d "target/criterion/startup" ]; then
for dir in target/criterion/startup/*/; do
name=$(basename "$dir")
if [ -f "$dir/new/estimates.json" ]; then
mean=$(jq -r '.mean.point_estimate' "$dir/new/estimates.json" 2>/dev/null || echo "N/A")
echo "- $name: ${mean}ns" >> $GITHUB_STEP_SUMMARY
fi
done
fi
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Truncation" >> $GITHUB_STEP_SUMMARY
if [ -d "target/criterion/truncation" ]; then
for dir in target/criterion/truncation/*/; do
name=$(basename "$dir")
if [ -f "$dir/new/estimates.json" ]; then
mean=$(jq -r '.mean.point_estimate' "$dir/new/estimates.json" 2>/dev/null || echo "N/A")
echo "- $name: ${mean}ns" >> $GITHUB_STEP_SUMMARY
fi
done
fi
echo "" >> $GITHUB_STEP_SUMMARY
echo "### TUI Rendering (PERF-8)" >> $GITHUB_STEP_SUMMARY
for group in build_conversation_content view viewport_operations markdown_rendering; do
if [ -d "target/criterion/$group" ]; then
echo "**$group**" >> $GITHUB_STEP_SUMMARY
for dir in target/criterion/"$group"/*/; do
name=$(basename "$dir")
if [ -f "$dir/new/estimates.json" ]; then
mean=$(jq -r '.mean.point_estimate' "$dir/new/estimates.json" 2>/dev/null || echo "N/A")
echo "- $name: ${mean}ns" >> $GITHUB_STEP_SUMMARY
fi
done
fi
done
- name: Perf budget gate
run: cargo test --test perf_budgets -- --nocapture
- name: Restore benchmark environment
if: always()
run: sudo scripts/bench_env_setup.sh restore || true
- name: Upload benchmark results
if: always()
uses: actions/upload-artifact@v4
with:
name: benchmark-results-${{ github.sha }}
path: |
pi_agent_rust/target/criterion/
pi_agent_rust/target/perf/
retention-days: 30
# Compare with main branch baseline (only on PRs)
compare:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
needs: benchmark
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v4
with:
ref: main
path: main-branch
- uses: actions/checkout@v4
with:
path: pr-branch
- name: Install system deps (xcb) [linux]
run: |
set -euxo pipefail
sudo apt-get update
sudo apt-get install -y libxcb1-dev libxcb-render0-dev libxcb-shape0-dev libxcb-xfixes0-dev
- name: Install Rust nightly
uses: dtolnay/rust-toolchain@nightly
- name: Cache cargo registry
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
key: ${{ runner.os }}-cargo-compare-${{ hashFiles('**/Cargo.lock') }}
- name: Install critcmp
run: cargo install critcmp
- name: Build and benchmark main
working-directory: main-branch
run: |
cargo build --release
cargo bench --bench tools -- --noplot --save-baseline main 2>/dev/null || true
cargo bench --bench tui_perf -- --noplot --save-baseline main 2>/dev/null || true
- name: Copy main baseline to PR target dir
run: |
set -euxo pipefail
if [ -d "main-branch/target/criterion" ]; then
mkdir -p pr-branch/target
mkdir -p pr-branch/target/criterion
cp -R main-branch/target/criterion/. pr-branch/target/criterion/
fi
- name: Build and benchmark PR
working-directory: pr-branch
run: |
cargo build --release
cargo bench --bench tools -- --noplot --baseline main 2>/dev/null || echo "Baseline comparison skipped"
cargo bench --bench tui_perf -- --noplot --baseline main 2>/dev/null || echo "TUI baseline comparison skipped"
- name: Compare TUI performance (fail on >20% regression)
working-directory: pr-branch
env:
PERF_REGRESSION_THRESHOLD: ${{ vars.PERF_REGRESSION_THRESHOLD || '20' }}
run: |
set -euo pipefail
echo "## TUI Performance Comparison" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Run critcmp with threshold
if critcmp main pr --threshold "${PERF_REGRESSION_THRESHOLD}" 2>/dev/null; then
echo "✅ No TUI performance regressions detected (threshold: ${PERF_REGRESSION_THRESHOLD}%)" >> $GITHUB_STEP_SUMMARY
else
echo "❌ TUI performance regression detected (>${PERF_REGRESSION_THRESHOLD}%)" >> $GITHUB_STEP_SUMMARY
# Show detailed comparison
critcmp main pr 2>/dev/null >> $GITHUB_STEP_SUMMARY || true
exit 1
fi