Skip to content

Commit ea3b3ee

Browse files
authored
perf: benchmarks (#35)
1 parent 48a19ca commit ea3b3ee

22 files changed

+3453
-0
lines changed

.github/workflows/benchmarks.yml

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
name: Benchmarks
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
iterations:
7+
description: "Number of iterations for realistic workload benchmark"
8+
required: false
9+
default: "200"
10+
qps_duration:
11+
description: "Duration in seconds for each QPS level"
12+
required: false
13+
default: "10"
14+
compare_with:
15+
description: "Run ID to compare results against (optional)"
16+
required: false
17+
default: ""
18+
19+
jobs:
20+
benchmark:
21+
name: Run Benchmarks
22+
runs-on: ubuntu-latest
23+
timeout-minutes: 30
24+
25+
steps:
26+
- name: Checkout
27+
uses: actions/checkout@v4
28+
29+
- name: Install uv
30+
uses: astral-sh/setup-uv@v4
31+
with:
32+
version: "latest"
33+
34+
- name: Setup Python
35+
run: uv python install 3.9
36+
37+
- name: Cache uv + Python installs + venv
38+
uses: actions/cache@v4
39+
with:
40+
path: |
41+
~/.cache/uv
42+
~/.local/share/uv/python
43+
.venv
44+
key: ${{ runner.os }}-uv-benchmark-3.9-${{ hashFiles('uv.lock') }}
45+
46+
- name: Install dependencies
47+
run: |
48+
uv sync --all-extras
49+
uv pip install flask requests psutil
50+
51+
- name: Get system info
52+
id: sysinfo
53+
run: |
54+
echo "python_version=$(python --version)" >> $GITHUB_OUTPUT
55+
echo "os=$(uname -s)" >> $GITHUB_OUTPUT
56+
echo "arch=$(uname -m)" >> $GITHUB_OUTPUT
57+
echo "cpu_count=$(nproc)" >> $GITHUB_OUTPUT
58+
echo "memory_gb=$(free -g | awk '/^Mem:/{print $2}')" >> $GITHUB_OUTPUT
59+
60+
- name: Run realistic workload benchmark
61+
id: realistic
62+
env:
63+
BENCHMARK_ITERATIONS: ${{ inputs.iterations }}
64+
run: |
65+
uv run python benchmarks/bench/realistic_workload.py 2>&1 | tee realistic_output.txt
66+
# Extract just the results JSON
67+
cat benchmarks/results/realistic-workload.json
68+
69+
- name: Run fixed QPS latency benchmark
70+
id: fixed_qps
71+
env:
72+
BENCHMARK_QPS_DURATION: ${{ inputs.qps_duration }}
73+
run: |
74+
uv run python benchmarks/bench/fixed_qps_latency.py 2>&1 | tee fixed_qps_output.txt
75+
# Extract just the results JSON
76+
cat benchmarks/results/fixed-qps-latency.json
77+
78+
- name: Generate structured results
79+
id: results
80+
run: |
81+
cat > benchmarks/results/benchmark-summary.json << 'EOF'
82+
{
83+
"metadata": {
84+
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
85+
"run_id": "${{ github.run_id }}",
86+
"run_number": "${{ github.run_number }}",
87+
"commit_sha": "${{ github.sha }}",
88+
"branch": "${{ github.ref_name }}",
89+
"triggered_by": "${{ github.actor }}",
90+
"environment": {
91+
"python_version": "${{ steps.sysinfo.outputs.python_version }}",
92+
"os": "${{ steps.sysinfo.outputs.os }}",
93+
"arch": "${{ steps.sysinfo.outputs.arch }}",
94+
"cpu_count": "${{ steps.sysinfo.outputs.cpu_count }}",
95+
"memory_gb": "${{ steps.sysinfo.outputs.memory_gb }}"
96+
}
97+
}
98+
}
99+
EOF
100+
101+
# Create a proper JSON with jq
102+
jq -n \
103+
--slurpfile realistic benchmarks/results/realistic-workload.json \
104+
--slurpfile fixed_qps benchmarks/results/fixed-qps-latency.json \
105+
--arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
106+
--arg run_id "${{ github.run_id }}" \
107+
--arg run_number "${{ github.run_number }}" \
108+
--arg commit_sha "${{ github.sha }}" \
109+
--arg branch "${{ github.ref_name }}" \
110+
--arg triggered_by "${{ github.actor }}" \
111+
--arg python_version "${{ steps.sysinfo.outputs.python_version }}" \
112+
--arg os "${{ steps.sysinfo.outputs.os }}" \
113+
--arg arch "${{ steps.sysinfo.outputs.arch }}" \
114+
--arg cpu_count "${{ steps.sysinfo.outputs.cpu_count }}" \
115+
--arg memory_gb "${{ steps.sysinfo.outputs.memory_gb }}" \
116+
'{
117+
metadata: {
118+
timestamp: $timestamp,
119+
run_id: $run_id,
120+
run_number: ($run_number | tonumber),
121+
commit_sha: $commit_sha,
122+
branch: $branch,
123+
triggered_by: $triggered_by,
124+
environment: {
125+
python_version: $python_version,
126+
os: $os,
127+
arch: $arch,
128+
cpu_count: ($cpu_count | tonumber),
129+
memory_gb: ($memory_gb | tonumber)
130+
}
131+
},
132+
realistic_workload: $realistic[0],
133+
fixed_qps_latency: $fixed_qps[0]
134+
}' > benchmarks/results/benchmark-summary.json
135+
136+
- name: Generate markdown summary
137+
run: |
138+
SUMMARY_FILE="benchmarks/results/benchmark-summary.md"
139+
140+
cat > "$SUMMARY_FILE" << EOF
141+
# Benchmark Results
142+
143+
**Date**: $(date -u +%Y-%m-%d)
144+
**Commit**: ${{ github.sha }}
145+
**Branch**: ${{ github.ref_name }}
146+
**Run ID**: ${{ github.run_id }}
147+
148+
## Environment
149+
- Python: ${{ steps.sysinfo.outputs.python_version }}
150+
- OS: ${{ steps.sysinfo.outputs.os }} (${{ steps.sysinfo.outputs.arch }})
151+
- CPUs: ${{ steps.sysinfo.outputs.cpu_count }}
152+
- Memory: ${{ steps.sysinfo.outputs.memory_gb }} GB
153+
154+
## Realistic Workload Results
155+
156+
EOF
157+
158+
# Parse and format realistic workload results
159+
jq -r '
160+
"| Endpoint | Baseline | SDK (100%) | Overhead | SDK (10%) | Overhead |",
161+
"|----------|----------|------------|----------|-----------|----------|",
162+
(.comparison_100 | to_entries[] |
163+
"| \(.key) | \(.value.baseline_mean_ms | . * 10 | round / 10)ms | \(.value.sdk_mean_ms | . * 10 | round / 10)ms | +\(.value.mean_overhead_ms | . * 10 | round / 10)ms (\(.value.mean_overhead_pct | round)%) | - | - |"
164+
)
165+
' benchmarks/results/realistic-workload.json >> "$SUMMARY_FILE"
166+
167+
cat >> "$SUMMARY_FILE" << 'EOF'
168+
169+
## Fixed QPS Latency Results
170+
171+
### Mean Latency
172+
173+
EOF
174+
175+
jq -r '
176+
"| QPS | Baseline | SDK (100%) | Overhead | SDK (10%) | Overhead |",
177+
"|-----|----------|------------|----------|-----------|----------|",
178+
(.baseline | to_entries[] |
179+
. as $b |
180+
($b.key | tostring) as $qps |
181+
"| \($qps) | \($b.value.mean_ms | . * 10 | round / 10)ms | - | - | - | - |"
182+
)
183+
' benchmarks/results/fixed-qps-latency.json >> "$SUMMARY_FILE"
184+
185+
cat >> "$SUMMARY_FILE" << 'EOF'
186+
187+
---
188+
189+
📊 **Full results available in artifacts**
190+
191+
EOF
192+
193+
# Also write to GitHub step summary for UI display
194+
cat "$SUMMARY_FILE" >> $GITHUB_STEP_SUMMARY
195+
196+
- name: Upload benchmark results
197+
uses: actions/upload-artifact@v4
198+
with:
199+
name: benchmark-results-${{ github.run_id }}
200+
path: |
201+
benchmarks/results/*.json
202+
benchmarks/results/*.md
203+
realistic_output.txt
204+
fixed_qps_output.txt
205+
retention-days: 90
206+
207+
- name: Download comparison results (if specified)
208+
if: ${{ inputs.compare_with != '' }}
209+
uses: actions/download-artifact@v4
210+
with:
211+
name: benchmark-results-${{ inputs.compare_with }}
212+
path: benchmarks/results/comparison/
213+
continue-on-error: true
214+
215+
- name: Compare with previous run
216+
if: ${{ inputs.compare_with != '' }}
217+
run: |
218+
if [ -f benchmarks/results/comparison/benchmark-summary.json ]; then
219+
echo "## Comparison with Run ${{ inputs.compare_with }}" >> $GITHUB_STEP_SUMMARY
220+
echo "" >> $GITHUB_STEP_SUMMARY
221+
222+
# Compare realistic workload results
223+
PREV_READ=$(jq '.realistic_workload.comparison_100.typical_read.mean_overhead_ms' benchmarks/results/comparison/benchmark-summary.json)
224+
CURR_READ=$(jq '.realistic_workload.comparison_100.typical_read.mean_overhead_ms' benchmarks/results/benchmark-summary.json)
225+
226+
PREV_WRITE=$(jq '.realistic_workload.comparison_100.typical_write.mean_overhead_ms' benchmarks/results/comparison/benchmark-summary.json)
227+
CURR_WRITE=$(jq '.realistic_workload.comparison_100.typical_write.mean_overhead_ms' benchmarks/results/benchmark-summary.json)
228+
229+
echo "| Metric | Previous | Current | Delta |" >> $GITHUB_STEP_SUMMARY
230+
echo "|--------|----------|---------|-------|" >> $GITHUB_STEP_SUMMARY
231+
echo "| Read API overhead | ${PREV_READ}ms | ${CURR_READ}ms | $(echo "$CURR_READ - $PREV_READ" | bc)ms |" >> $GITHUB_STEP_SUMMARY
232+
echo "| Write API overhead | ${PREV_WRITE}ms | ${CURR_WRITE}ms | $(echo "$CURR_WRITE - $PREV_WRITE" | bc)ms |" >> $GITHUB_STEP_SUMMARY
233+
else
234+
echo "⚠️ Could not find comparison results for run ${{ inputs.compare_with }}" >> $GITHUB_STEP_SUMMARY
235+
fi
236+
237+
- name: Check for performance regression
238+
id: regression
239+
run: |
240+
# Check if overhead exceeds threshold (3ms for 100% sampling)
241+
THRESHOLD_MS=3.0
242+
243+
READ_OVERHEAD=$(jq '.comparison_100.typical_read.mean_overhead_ms' benchmarks/results/realistic-workload.json)
244+
WRITE_OVERHEAD=$(jq '.comparison_100.typical_write.mean_overhead_ms' benchmarks/results/realistic-workload.json)
245+
MIXED_OVERHEAD=$(jq '.comparison_100.realistic_mixed.mean_overhead_ms' benchmarks/results/realistic-workload.json)
246+
247+
REGRESSION=false
248+
249+
if (( $(echo "$READ_OVERHEAD > $THRESHOLD_MS" | bc -l) )); then
250+
echo "⚠️ Read API overhead ($READ_OVERHEAD ms) exceeds threshold ($THRESHOLD_MS ms)" >> $GITHUB_STEP_SUMMARY
251+
REGRESSION=true
252+
fi
253+
254+
if (( $(echo "$WRITE_OVERHEAD > $THRESHOLD_MS" | bc -l) )); then
255+
echo "⚠️ Write API overhead ($WRITE_OVERHEAD ms) exceeds threshold ($THRESHOLD_MS ms)" >> $GITHUB_STEP_SUMMARY
256+
REGRESSION=true
257+
fi
258+
259+
if (( $(echo "$MIXED_OVERHEAD > $THRESHOLD_MS" | bc -l) )); then
260+
echo "⚠️ Mixed API overhead ($MIXED_OVERHEAD ms) exceeds threshold ($THRESHOLD_MS ms)" >> $GITHUB_STEP_SUMMARY
261+
REGRESSION=true
262+
fi
263+
264+
if [ "$REGRESSION" = true ]; then
265+
echo "" >> $GITHUB_STEP_SUMMARY
266+
echo "### ⚠️ Performance regression detected" >> $GITHUB_STEP_SUMMARY
267+
echo "regression=true" >> $GITHUB_OUTPUT
268+
else
269+
echo "" >> $GITHUB_STEP_SUMMARY
270+
echo "### ✅ No performance regression detected" >> $GITHUB_STEP_SUMMARY
271+
echo "regression=false" >> $GITHUB_OUTPUT
272+
fi
273+
274+
- name: Output JSON results
275+
run: |
276+
echo "### Structured Results (JSON)"
277+
echo ""
278+
echo '```json'
279+
cat benchmarks/results/benchmark-summary.json
280+
echo '```'

benchmarks/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Benchmark results (regenerated each run)
2+
results/
3+
4+
# Trace directories created during benchmarks
5+
.benchmark-traces*/

0 commit comments

Comments
 (0)