Benchmarks #2

Workflow file for this run

.github/workflows/benchmarks.yml at 903b9db

	name: Benchmarks

	on:
	workflow_dispatch:
	inputs:
	iterations:
	description: "Number of iterations for realistic workload benchmark"
	required: false
	default: "200"
	qps_duration:
	description: "Duration in seconds for each QPS level"
	required: false
	default: "10"
	compare_with:
	description: "Run ID to compare results against (optional)"
	required: false
	default: ""

	jobs:
	benchmark:
	name: Run Benchmarks
	runs-on: ubuntu-latest
	timeout-minutes: 30

	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Install uv
	uses: astral-sh/setup-uv@v4
	with:
	version: "latest"

	- name: Setup Python
	run: uv python install 3.9

	- name: Cache uv + Python installs + venv
	uses: actions/cache@v4
	with:
	path: \|
	~/.cache/uv
	~/.local/share/uv/python
	.venv
	key: ${{ runner.os }}-uv-benchmark-3.9-${{ hashFiles('uv.lock') }}

	- name: Install dependencies
	run: \|
	uv sync --all-extras
	uv pip install flask requests psutil

	- name: Get system info
	id: sysinfo
	run: \|
	echo "python_version=$(python --version)" >> $GITHUB_OUTPUT
	echo "os=$(uname -s)" >> $GITHUB_OUTPUT
	echo "arch=$(uname -m)" >> $GITHUB_OUTPUT
	echo "cpu_count=$(nproc)" >> $GITHUB_OUTPUT
	echo "memory_gb=$(free -g \| awk '/^Mem:/{print $2}')" >> $GITHUB_OUTPUT

	- name: Run realistic workload benchmark
	id: realistic
	env:
	BENCHMARK_ITERATIONS: ${{ inputs.iterations }}
	run: \|
	uv run python benchmarks/bench/realistic_workload.py 2>&1 \| tee realistic_output.txt
	# Extract just the results JSON
	cat benchmarks/results/realistic-workload.json

	- name: Run fixed QPS latency benchmark
	id: fixed_qps
	env:
	BENCHMARK_QPS_DURATION: ${{ inputs.qps_duration }}
	run: \|
	uv run python benchmarks/bench/fixed_qps_latency.py 2>&1 \| tee fixed_qps_output.txt
	# Extract just the results JSON
	cat benchmarks/results/fixed-qps-latency.json

	- name: Generate structured results
	id: results
	run: \|
	cat > benchmarks/results/benchmark-summary.json << 'EOF'
	{
	"metadata": {
	"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
	"run_id": "${{ github.run_id }}",
	"run_number": "${{ github.run_number }}",
	"commit_sha": "${{ github.sha }}",
	"branch": "${{ github.ref_name }}",
	"triggered_by": "${{ github.actor }}",
	"environment": {
	"python_version": "${{ steps.sysinfo.outputs.python_version }}",
	"os": "${{ steps.sysinfo.outputs.os }}",
	"arch": "${{ steps.sysinfo.outputs.arch }}",
	"cpu_count": "${{ steps.sysinfo.outputs.cpu_count }}",
	"memory_gb": "${{ steps.sysinfo.outputs.memory_gb }}"
	}
	}
	}
	EOF

	# Create a proper JSON with jq
	jq -n \
	--slurpfile realistic benchmarks/results/realistic-workload.json \
	--slurpfile fixed_qps benchmarks/results/fixed-qps-latency.json \
	--arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
	--arg run_id "${{ github.run_id }}" \
	--arg run_number "${{ github.run_number }}" \
	--arg commit_sha "${{ github.sha }}" \
	--arg branch "${{ github.ref_name }}" \
	--arg triggered_by "${{ github.actor }}" \
	--arg python_version "${{ steps.sysinfo.outputs.python_version }}" \
	--arg os "${{ steps.sysinfo.outputs.os }}" \
	--arg arch "${{ steps.sysinfo.outputs.arch }}" \
	--arg cpu_count "${{ steps.sysinfo.outputs.cpu_count }}" \
	--arg memory_gb "${{ steps.sysinfo.outputs.memory_gb }}" \
	'{
	metadata: {
	timestamp: $timestamp,
	run_id: $run_id,
	run_number: ($run_number \| tonumber),
	commit_sha: $commit_sha,
	branch: $branch,
	triggered_by: $triggered_by,
	environment: {
	python_version: $python_version,
	os: $os,
	arch: $arch,
	cpu_count: ($cpu_count \| tonumber),
	memory_gb: ($memory_gb \| tonumber)
	}
	},
	realistic_workload: $realistic[0],
	fixed_qps_latency: $fixed_qps[0]
	}' > benchmarks/results/benchmark-summary.json

	- name: Generate markdown summary
	run: \|
	SUMMARY_FILE="benchmarks/results/benchmark-summary.md"

	cat > "$SUMMARY_FILE" << EOF
	# Benchmark Results

	Date: $(date -u +%Y-%m-%d)
	Commit: ${{ github.sha }}
	Branch: ${{ github.ref_name }}
	Run ID: ${{ github.run_id }}

	## Environment
	- Python: ${{ steps.sysinfo.outputs.python_version }}
	- OS: ${{ steps.sysinfo.outputs.os }} (${{ steps.sysinfo.outputs.arch }})
	- CPUs: ${{ steps.sysinfo.outputs.cpu_count }}
	- Memory: ${{ steps.sysinfo.outputs.memory_gb }} GB

	## Realistic Workload Results

	EOF

	# Parse and format realistic workload results
	jq -r '
	"\| Endpoint \| Baseline \| SDK (100%) \| Overhead \| SDK (10%) \| Overhead \|",
	"\|----------\|----------\|------------\|----------\|-----------\|----------\|",
	(.comparison_100 \| to_entries[] \|
	"\| \(.key) \| \(.value.baseline_mean_ms \| . * 10 \| round / 10)ms \| \(.value.sdk_mean_ms \| . * 10 \| round / 10)ms \| +\(.value.mean_overhead_ms \| . * 10 \| round / 10)ms (\(.value.mean_overhead_pct \| round)%) \| - \| - \|"
	)
	' benchmarks/results/realistic-workload.json >> "$SUMMARY_FILE"

	cat >> "$SUMMARY_FILE" << 'EOF'

	## Fixed QPS Latency Results

	### Mean Latency

	EOF

	jq -r '
	"\| QPS \| Baseline \| SDK (100%) \| Overhead \| SDK (10%) \| Overhead \|",
	"\|-----\|----------\|------------\|----------\|-----------\|----------\|",
	(.baseline \| to_entries[] \|
	. as $b \|
	($b.key \| tostring) as $qps \|
	"\| \($qps) \| \($b.value.mean_ms \| . * 10 \| round / 10)ms \| - \| - \| - \| - \|"
	)
	' benchmarks/results/fixed-qps-latency.json >> "$SUMMARY_FILE"

	cat >> "$SUMMARY_FILE" << 'EOF'

	---

	📊 Full results available in artifacts

	EOF

	# Also write to GitHub step summary for UI display
	cat "$SUMMARY_FILE" >> $GITHUB_STEP_SUMMARY

	- name: Upload benchmark results
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results-${{ github.run_id }}
	path: \|
	benchmarks/results/*.json
	benchmarks/results/*.md
	realistic_output.txt
	fixed_qps_output.txt
	retention-days: 90

	- name: Download comparison results (if specified)
	if: ${{ inputs.compare_with != '' }}
	uses: actions/download-artifact@v4
	with:
	name: benchmark-results-${{ inputs.compare_with }}
	path: benchmarks/results/comparison/
	continue-on-error: true

	- name: Compare with previous run
	if: ${{ inputs.compare_with != '' }}
	run: \|
	if [ -f benchmarks/results/comparison/benchmark-summary.json ]; then
	echo "## Comparison with Run ${{ inputs.compare_with }}" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY

	# Compare realistic workload results
	PREV_READ=$(jq '.realistic_workload.comparison_100.typical_read.mean_overhead_ms' benchmarks/results/comparison/benchmark-summary.json)
	CURR_READ=$(jq '.realistic_workload.comparison_100.typical_read.mean_overhead_ms' benchmarks/results/benchmark-summary.json)

	PREV_WRITE=$(jq '.realistic_workload.comparison_100.typical_write.mean_overhead_ms' benchmarks/results/comparison/benchmark-summary.json)
	CURR_WRITE=$(jq '.realistic_workload.comparison_100.typical_write.mean_overhead_ms' benchmarks/results/benchmark-summary.json)

	echo "\| Metric \| Previous \| Current \| Delta \|" >> $GITHUB_STEP_SUMMARY
	echo "\|--------\|----------\|---------\|-------\|" >> $GITHUB_STEP_SUMMARY
	echo "\| Read API overhead \| ${PREV_READ}ms \| ${CURR_READ}ms \| $(echo "$CURR_READ - $PREV_READ" \| bc)ms \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Write API overhead \| ${PREV_WRITE}ms \| ${CURR_WRITE}ms \| $(echo "$CURR_WRITE - $PREV_WRITE" \| bc)ms \|" >> $GITHUB_STEP_SUMMARY
	else
	echo "⚠️ Could not find comparison results for run ${{ inputs.compare_with }}" >> $GITHUB_STEP_SUMMARY
	fi

	- name: Check for performance regression
	id: regression
	run: \|
	# Check if overhead exceeds threshold (3ms for 100% sampling)
	THRESHOLD_MS=3.0

	READ_OVERHEAD=$(jq '.comparison_100.typical_read.mean_overhead_ms' benchmarks/results/realistic-workload.json)
	WRITE_OVERHEAD=$(jq '.comparison_100.typical_write.mean_overhead_ms' benchmarks/results/realistic-workload.json)
	MIXED_OVERHEAD=$(jq '.comparison_100.realistic_mixed.mean_overhead_ms' benchmarks/results/realistic-workload.json)

	REGRESSION=false

	if (( $(echo "$READ_OVERHEAD > $THRESHOLD_MS" \| bc -l) )); then
	echo "⚠️ Read API overhead ($READ_OVERHEAD ms) exceeds threshold ($THRESHOLD_MS ms)" >> $GITHUB_STEP_SUMMARY
	REGRESSION=true
	fi

	if (( $(echo "$WRITE_OVERHEAD > $THRESHOLD_MS" \| bc -l) )); then
	echo "⚠️ Write API overhead ($WRITE_OVERHEAD ms) exceeds threshold ($THRESHOLD_MS ms)" >> $GITHUB_STEP_SUMMARY
	REGRESSION=true
	fi

	if (( $(echo "$MIXED_OVERHEAD > $THRESHOLD_MS" \| bc -l) )); then
	echo "⚠️ Mixed API overhead ($MIXED_OVERHEAD ms) exceeds threshold ($THRESHOLD_MS ms)" >> $GITHUB_STEP_SUMMARY
	REGRESSION=true
	fi

	if [ "$REGRESSION" = true ]; then
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "### ⚠️ Performance regression detected" >> $GITHUB_STEP_SUMMARY
	echo "regression=true" >> $GITHUB_OUTPUT
	else
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "### ✅ No performance regression detected" >> $GITHUB_STEP_SUMMARY
	echo "regression=false" >> $GITHUB_OUTPUT
	fi

	- name: Output JSON results
	run: \|
	echo "### Structured Results (JSON)"
	echo ""
	echo '```json'
	cat benchmarks/results/benchmark-summary.json
	echo '```'

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Benchmarks #2

Workflow file

Benchmarks #2

Uh oh!

Workflow file for this run