|
| 1 | +name: Benchmarks |
| 2 | + |
| 3 | +on: |
| 4 | + workflow_dispatch: |
| 5 | + inputs: |
| 6 | + iterations: |
| 7 | + description: "Number of iterations for realistic workload benchmark" |
| 8 | + required: false |
| 9 | + default: "200" |
| 10 | + qps_duration: |
| 11 | + description: "Duration in seconds for each QPS level" |
| 12 | + required: false |
| 13 | + default: "10" |
| 14 | + compare_with: |
| 15 | + description: "Run ID to compare results against (optional)" |
| 16 | + required: false |
| 17 | + default: "" |
| 18 | + |
| 19 | +jobs: |
| 20 | + benchmark: |
| 21 | + name: Run Benchmarks |
| 22 | + runs-on: ubuntu-latest |
| 23 | + timeout-minutes: 30 |
| 24 | + |
| 25 | + steps: |
| 26 | + - name: Checkout |
| 27 | + uses: actions/checkout@v4 |
| 28 | + |
| 29 | + - name: Install uv |
| 30 | + uses: astral-sh/setup-uv@v4 |
| 31 | + with: |
| 32 | + version: "latest" |
| 33 | + |
| 34 | + - name: Setup Python |
| 35 | + run: uv python install 3.9 |
| 36 | + |
| 37 | + - name: Cache uv + Python installs + venv |
| 38 | + uses: actions/cache@v4 |
| 39 | + with: |
| 40 | + path: | |
| 41 | + ~/.cache/uv |
| 42 | + ~/.local/share/uv/python |
| 43 | + .venv |
| 44 | + key: ${{ runner.os }}-uv-benchmark-3.9-${{ hashFiles('uv.lock') }} |
| 45 | + |
| 46 | + - name: Install dependencies |
| 47 | + run: | |
| 48 | + uv sync --all-extras |
| 49 | + uv pip install flask requests psutil |
| 50 | +
|
| 51 | + - name: Get system info |
| 52 | + id: sysinfo |
| 53 | + run: | |
| 54 | + echo "python_version=$(python --version)" >> $GITHUB_OUTPUT |
| 55 | + echo "os=$(uname -s)" >> $GITHUB_OUTPUT |
| 56 | + echo "arch=$(uname -m)" >> $GITHUB_OUTPUT |
| 57 | + echo "cpu_count=$(nproc)" >> $GITHUB_OUTPUT |
| 58 | + echo "memory_gb=$(free -g | awk '/^Mem:/{print $2}')" >> $GITHUB_OUTPUT |
| 59 | +
|
| 60 | + - name: Run realistic workload benchmark |
| 61 | + id: realistic |
| 62 | + env: |
| 63 | + BENCHMARK_ITERATIONS: ${{ inputs.iterations }} |
| 64 | + run: | |
| 65 | + uv run python benchmarks/bench/realistic_workload.py 2>&1 | tee realistic_output.txt |
| 66 | + # Extract just the results JSON |
| 67 | + cat benchmarks/results/realistic-workload.json |
| 68 | +
|
| 69 | + - name: Run fixed QPS latency benchmark |
| 70 | + id: fixed_qps |
| 71 | + env: |
| 72 | + BENCHMARK_QPS_DURATION: ${{ inputs.qps_duration }} |
| 73 | + run: | |
| 74 | + uv run python benchmarks/bench/fixed_qps_latency.py 2>&1 | tee fixed_qps_output.txt |
| 75 | + # Extract just the results JSON |
| 76 | + cat benchmarks/results/fixed-qps-latency.json |
| 77 | +
|
| 78 | + - name: Generate structured results |
| 79 | + id: results |
| 80 | + run: | |
| 81 | + cat > benchmarks/results/benchmark-summary.json << 'EOF' |
| 82 | + { |
| 83 | + "metadata": { |
| 84 | + "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", |
| 85 | + "run_id": "${{ github.run_id }}", |
| 86 | + "run_number": "${{ github.run_number }}", |
| 87 | + "commit_sha": "${{ github.sha }}", |
| 88 | + "branch": "${{ github.ref_name }}", |
| 89 | + "triggered_by": "${{ github.actor }}", |
| 90 | + "environment": { |
| 91 | + "python_version": "${{ steps.sysinfo.outputs.python_version }}", |
| 92 | + "os": "${{ steps.sysinfo.outputs.os }}", |
| 93 | + "arch": "${{ steps.sysinfo.outputs.arch }}", |
| 94 | + "cpu_count": "${{ steps.sysinfo.outputs.cpu_count }}", |
| 95 | + "memory_gb": "${{ steps.sysinfo.outputs.memory_gb }}" |
| 96 | + } |
| 97 | + } |
| 98 | + } |
| 99 | + EOF |
| 100 | +
|
| 101 | + # Create a proper JSON with jq |
| 102 | + jq -n \ |
| 103 | + --slurpfile realistic benchmarks/results/realistic-workload.json \ |
| 104 | + --slurpfile fixed_qps benchmarks/results/fixed-qps-latency.json \ |
| 105 | + --arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ |
| 106 | + --arg run_id "${{ github.run_id }}" \ |
| 107 | + --arg run_number "${{ github.run_number }}" \ |
| 108 | + --arg commit_sha "${{ github.sha }}" \ |
| 109 | + --arg branch "${{ github.ref_name }}" \ |
| 110 | + --arg triggered_by "${{ github.actor }}" \ |
| 111 | + --arg python_version "${{ steps.sysinfo.outputs.python_version }}" \ |
| 112 | + --arg os "${{ steps.sysinfo.outputs.os }}" \ |
| 113 | + --arg arch "${{ steps.sysinfo.outputs.arch }}" \ |
| 114 | + --arg cpu_count "${{ steps.sysinfo.outputs.cpu_count }}" \ |
| 115 | + --arg memory_gb "${{ steps.sysinfo.outputs.memory_gb }}" \ |
| 116 | + '{ |
| 117 | + metadata: { |
| 118 | + timestamp: $timestamp, |
| 119 | + run_id: $run_id, |
| 120 | + run_number: ($run_number | tonumber), |
| 121 | + commit_sha: $commit_sha, |
| 122 | + branch: $branch, |
| 123 | + triggered_by: $triggered_by, |
| 124 | + environment: { |
| 125 | + python_version: $python_version, |
| 126 | + os: $os, |
| 127 | + arch: $arch, |
| 128 | + cpu_count: ($cpu_count | tonumber), |
| 129 | + memory_gb: ($memory_gb | tonumber) |
| 130 | + } |
| 131 | + }, |
| 132 | + realistic_workload: $realistic[0], |
| 133 | + fixed_qps_latency: $fixed_qps[0] |
| 134 | + }' > benchmarks/results/benchmark-summary.json |
| 135 | +
|
| 136 | + - name: Generate markdown summary |
| 137 | + run: | |
| 138 | + SUMMARY_FILE="benchmarks/results/benchmark-summary.md" |
| 139 | +
|
| 140 | + cat > "$SUMMARY_FILE" << EOF |
| 141 | + # Benchmark Results |
| 142 | +
|
| 143 | + **Date**: $(date -u +%Y-%m-%d) |
| 144 | + **Commit**: ${{ github.sha }} |
| 145 | + **Branch**: ${{ github.ref_name }} |
| 146 | + **Run ID**: ${{ github.run_id }} |
| 147 | +
|
| 148 | + ## Environment |
| 149 | + - Python: ${{ steps.sysinfo.outputs.python_version }} |
| 150 | + - OS: ${{ steps.sysinfo.outputs.os }} (${{ steps.sysinfo.outputs.arch }}) |
| 151 | + - CPUs: ${{ steps.sysinfo.outputs.cpu_count }} |
| 152 | + - Memory: ${{ steps.sysinfo.outputs.memory_gb }} GB |
| 153 | +
|
| 154 | + ## Realistic Workload Results |
| 155 | +
|
| 156 | + EOF |
| 157 | +
|
| 158 | + # Parse and format realistic workload results |
| 159 | + jq -r ' |
| 160 | + "| Endpoint | Baseline | SDK (100%) | Overhead | SDK (10%) | Overhead |", |
| 161 | + "|----------|----------|------------|----------|-----------|----------|", |
| 162 | + (.comparison_100 | to_entries[] | |
| 163 | + "| \(.key) | \(.value.baseline_mean_ms | . * 10 | round / 10)ms | \(.value.sdk_mean_ms | . * 10 | round / 10)ms | +\(.value.mean_overhead_ms | . * 10 | round / 10)ms (\(.value.mean_overhead_pct | round)%) | - | - |" |
| 164 | + ) |
| 165 | + ' benchmarks/results/realistic-workload.json >> "$SUMMARY_FILE" |
| 166 | +
|
| 167 | + cat >> "$SUMMARY_FILE" << 'EOF' |
| 168 | +
|
| 169 | + ## Fixed QPS Latency Results |
| 170 | +
|
| 171 | + ### Mean Latency |
| 172 | +
|
| 173 | + EOF |
| 174 | +
|
| 175 | + jq -r ' |
| 176 | + "| QPS | Baseline | SDK (100%) | Overhead | SDK (10%) | Overhead |", |
| 177 | + "|-----|----------|------------|----------|-----------|----------|", |
| 178 | + (.baseline | to_entries[] | |
| 179 | + . as $b | |
| 180 | + ($b.key | tostring) as $qps | |
| 181 | + "| \($qps) | \($b.value.mean_ms | . * 10 | round / 10)ms | - | - | - | - |" |
| 182 | + ) |
| 183 | + ' benchmarks/results/fixed-qps-latency.json >> "$SUMMARY_FILE" |
| 184 | +
|
| 185 | + cat >> "$SUMMARY_FILE" << 'EOF' |
| 186 | +
|
| 187 | + --- |
| 188 | +
|
| 189 | + 📊 **Full results available in artifacts** |
| 190 | +
|
| 191 | + EOF |
| 192 | +
|
| 193 | + # Also write to GitHub step summary for UI display |
| 194 | + cat "$SUMMARY_FILE" >> $GITHUB_STEP_SUMMARY |
| 195 | +
|
| 196 | + - name: Upload benchmark results |
| 197 | + uses: actions/upload-artifact@v4 |
| 198 | + with: |
| 199 | + name: benchmark-results-${{ github.run_id }} |
| 200 | + path: | |
| 201 | + benchmarks/results/*.json |
| 202 | + benchmarks/results/*.md |
| 203 | + realistic_output.txt |
| 204 | + fixed_qps_output.txt |
| 205 | + retention-days: 90 |
| 206 | + |
| 207 | + - name: Download comparison results (if specified) |
| 208 | + if: ${{ inputs.compare_with != '' }} |
| 209 | + uses: actions/download-artifact@v4 |
| 210 | + with: |
| 211 | + name: benchmark-results-${{ inputs.compare_with }} |
| 212 | + path: benchmarks/results/comparison/ |
| 213 | + continue-on-error: true |
| 214 | + |
| 215 | + - name: Compare with previous run |
| 216 | + if: ${{ inputs.compare_with != '' }} |
| 217 | + run: | |
| 218 | + if [ -f benchmarks/results/comparison/benchmark-summary.json ]; then |
| 219 | + echo "## Comparison with Run ${{ inputs.compare_with }}" >> $GITHUB_STEP_SUMMARY |
| 220 | + echo "" >> $GITHUB_STEP_SUMMARY |
| 221 | +
|
| 222 | + # Compare realistic workload results |
| 223 | + PREV_READ=$(jq '.realistic_workload.comparison_100.typical_read.mean_overhead_ms' benchmarks/results/comparison/benchmark-summary.json) |
| 224 | + CURR_READ=$(jq '.realistic_workload.comparison_100.typical_read.mean_overhead_ms' benchmarks/results/benchmark-summary.json) |
| 225 | +
|
| 226 | + PREV_WRITE=$(jq '.realistic_workload.comparison_100.typical_write.mean_overhead_ms' benchmarks/results/comparison/benchmark-summary.json) |
| 227 | + CURR_WRITE=$(jq '.realistic_workload.comparison_100.typical_write.mean_overhead_ms' benchmarks/results/benchmark-summary.json) |
| 228 | +
|
| 229 | + echo "| Metric | Previous | Current | Delta |" >> $GITHUB_STEP_SUMMARY |
| 230 | + echo "|--------|----------|---------|-------|" >> $GITHUB_STEP_SUMMARY |
| 231 | + echo "| Read API overhead | ${PREV_READ}ms | ${CURR_READ}ms | $(echo "$CURR_READ - $PREV_READ" | bc)ms |" >> $GITHUB_STEP_SUMMARY |
| 232 | + echo "| Write API overhead | ${PREV_WRITE}ms | ${CURR_WRITE}ms | $(echo "$CURR_WRITE - $PREV_WRITE" | bc)ms |" >> $GITHUB_STEP_SUMMARY |
| 233 | + else |
| 234 | + echo "⚠️ Could not find comparison results for run ${{ inputs.compare_with }}" >> $GITHUB_STEP_SUMMARY |
| 235 | + fi |
| 236 | +
|
| 237 | + - name: Check for performance regression |
| 238 | + id: regression |
| 239 | + run: | |
| 240 | + # Check if overhead exceeds threshold (3ms for 100% sampling) |
| 241 | + THRESHOLD_MS=3.0 |
| 242 | +
|
| 243 | + READ_OVERHEAD=$(jq '.comparison_100.typical_read.mean_overhead_ms' benchmarks/results/realistic-workload.json) |
| 244 | + WRITE_OVERHEAD=$(jq '.comparison_100.typical_write.mean_overhead_ms' benchmarks/results/realistic-workload.json) |
| 245 | + MIXED_OVERHEAD=$(jq '.comparison_100.realistic_mixed.mean_overhead_ms' benchmarks/results/realistic-workload.json) |
| 246 | +
|
| 247 | + REGRESSION=false |
| 248 | +
|
| 249 | + if (( $(echo "$READ_OVERHEAD > $THRESHOLD_MS" | bc -l) )); then |
| 250 | + echo "⚠️ Read API overhead ($READ_OVERHEAD ms) exceeds threshold ($THRESHOLD_MS ms)" >> $GITHUB_STEP_SUMMARY |
| 251 | + REGRESSION=true |
| 252 | + fi |
| 253 | +
|
| 254 | + if (( $(echo "$WRITE_OVERHEAD > $THRESHOLD_MS" | bc -l) )); then |
| 255 | + echo "⚠️ Write API overhead ($WRITE_OVERHEAD ms) exceeds threshold ($THRESHOLD_MS ms)" >> $GITHUB_STEP_SUMMARY |
| 256 | + REGRESSION=true |
| 257 | + fi |
| 258 | +
|
| 259 | + if (( $(echo "$MIXED_OVERHEAD > $THRESHOLD_MS" | bc -l) )); then |
| 260 | + echo "⚠️ Mixed API overhead ($MIXED_OVERHEAD ms) exceeds threshold ($THRESHOLD_MS ms)" >> $GITHUB_STEP_SUMMARY |
| 261 | + REGRESSION=true |
| 262 | + fi |
| 263 | +
|
| 264 | + if [ "$REGRESSION" = true ]; then |
| 265 | + echo "" >> $GITHUB_STEP_SUMMARY |
| 266 | + echo "### ⚠️ Performance regression detected" >> $GITHUB_STEP_SUMMARY |
| 267 | + echo "regression=true" >> $GITHUB_OUTPUT |
| 268 | + else |
| 269 | + echo "" >> $GITHUB_STEP_SUMMARY |
| 270 | + echo "### ✅ No performance regression detected" >> $GITHUB_STEP_SUMMARY |
| 271 | + echo "regression=false" >> $GITHUB_OUTPUT |
| 272 | + fi |
| 273 | +
|
| 274 | + - name: Output JSON results |
| 275 | + run: | |
| 276 | + echo "### Structured Results (JSON)" |
| 277 | + echo "" |
| 278 | + echo '```json' |
| 279 | + cat benchmarks/results/benchmark-summary.json |
| 280 | + echo '```' |
0 commit comments