Merge pull request #255 from MDA2AV/multi/site-command

MDA2AV · web-flow · commit 900bd4e61202 · 2026-03-29T16:17:44.000Z
add --save to pr command and update site docs
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -13,3 +13,6 @@
 | `/benchmark` | Run all benchmark tests |
 | `/benchmark -t baseline` | Run a specific test |
 | `/benchmark -f <framework> -t <test>` | Run a specific framework and test |
+| `/benchmark --save` | Run and save results (updates leaderboard on merge) |
+
+Results are automatically compared against the current leaderboard.
diff --git a/.github/workflows/benchmark-pr.yml b/.github/workflows/benchmark-pr.yml
@@ -13,9 +13,13 @@ on:
         description: 'Profile (e.g. baseline, baseline-h2, leave empty for all)'
         required: false
         default: ''
+      save:
+        description: 'Save results (true/false)'
+        required: false
+        default: ''
 
 permissions:
-  contents: read
+  contents: write
   pull-requests: write
 
 concurrency:
@@ -35,9 +39,28 @@ jobs:
         id: bench
         run: |
           log=$(mktemp)
-          ./scripts/benchmark.sh "${{ inputs.framework }}" ${{ inputs.profile }} 2>&1 | tee "$log"
+          SAVE_FLAG=""
+          if [ "${{ inputs.save }}" = "true" ]; then
+            SAVE_FLAG="--save"
+          fi
+          ./scripts/benchmark.sh "${{ inputs.framework }}" ${{ inputs.profile }} $SAVE_FLAG 2>&1 | tee "$log"
           echo "log_file=$log" >> "$GITHUB_OUTPUT"
 
+      - name: Commit saved results
+        if: inputs.save == 'true'
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add results/ site/data/ site/static/logs/ 2>/dev/null || true
+          if git diff --cached --quiet; then
+            echo "No results to commit"
+          else
+            git commit -m "Benchmark results: ${{ inputs.framework }} ${{ inputs.profile }}"
+            git push origin HEAD:refs/pull/${{ inputs.pr }}/head
+          fi
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Compare with main
         id: compare
         run: |
diff --git a/.github/workflows/pr-commands.yml b/.github/workflows/pr-commands.yml
@@ -40,11 +40,18 @@ jobs:
               EXPLICIT_TEST=$(echo "$COMMENT_BODY" | grep -oP '/benchmark\s+\K[^-]\S*' || echo "")
             fi
 
+            # Parse --save flag
+            SAVE_FLAG=""
+            if echo "$COMMENT_BODY" | grep -q '\-\-save'; then
+              SAVE_FLAG="true"
+            fi
+
             FRAMEWORK="${EXPLICIT_FW:-$AUTO_FRAMEWORK}"
             echo "framework=$FRAMEWORK" >> "$GITHUB_OUTPUT"
             echo "profile=$EXPLICIT_TEST" >> "$GITHUB_OUTPUT"
+            echo "save=$SAVE_FLAG" >> "$GITHUB_OUTPUT"
             echo "Detected framework: $FRAMEWORK (auto: $AUTO_FRAMEWORK, explicit: $EXPLICIT_FW)"
-            echo "Profile: $EXPLICIT_TEST"
+            echo "Profile: $EXPLICIT_TEST, Save: $SAVE_FLAG"
 
           elif echo "$COMMENT_BODY" | grep -q '/validate'; then
             echo "command=validate" >> "$GITHUB_OUTPUT"
@@ -89,9 +96,10 @@ jobs:
           gh workflow run benchmark-pr.yml \
             -f pr=${{ steps.parse.outputs.pr }} \
             -f framework=${{ steps.parse.outputs.framework }} \
-            -f profile="${{ steps.parse.outputs.profile }}"
+            -f profile="${{ steps.parse.outputs.profile }}" \
+            -f save="${{ steps.parse.outputs.save }}"
 
-          gh pr comment "${{ steps.parse.outputs.pr }}" --body "🚀 Benchmark run triggered for \`${{ steps.parse.outputs.framework }}\`${{ steps.parse.outputs.profile && format(' (test: {0})', steps.parse.outputs.profile) || ' (all tests)' }}. Results will be posted here when done."
+          gh pr comment "${{ steps.parse.outputs.pr }}" --body "🚀 Benchmark run triggered for \`${{ steps.parse.outputs.framework }}\`${{ steps.parse.outputs.profile && format(' (test: {0})', steps.parse.outputs.profile) || ' (all tests)' }}${{ steps.parse.outputs.save == 'true' && ' with --save' || '' }}. Results will be posted here when done."
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
diff --git a/README.md b/README.md
@@ -12,11 +12,17 @@ HTTP framework benchmark platform.
 
 Tag **@BennyFranciscus** on your PR for help with implementation or benchmark questions.
 
-| Command | Action |
-|---------|--------|
-| `/validate` | Run the 18-point validation suite |
-| `/benchmark` | Run all benchmark profiles |
-| `/benchmark baseline` | Run a specific profile |
+| Command | Description |
+|---------|-------------|
+| `/validate` | Run the 18-point validation suite (auto-detects framework from PR) |
+| `/validate -f <framework>` | Validate a specific framework |
+| `/benchmark` | Run all benchmark tests (auto-detects framework) |
+| `/benchmark -t <test>` | Run a specific test profile |
+| `/benchmark -f <framework> -t <test>` | Run a specific framework and test |
+| `/benchmark --save` | Run and save results (updates leaderboard on merge) |
+| `/benchmark -f <framework> -t <test> --save` | Run, compare with main, and save results |
+
+Results are automatically compared against the current leaderboard and deltas are posted in the PR comment.
 
 ---
 
diff --git a/site/content/docs/add-framework/ci.md b/site/content/docs/add-framework/ci.md
@@ -2,40 +2,66 @@
 title: CI & Runner
 ---
 
-## GitHub Actions
+## PR Commands
 
-HttpArena uses four GitHub Actions workflows to automate validation, benchmarking, and deployment.
+Comment on any PR to trigger validation or benchmarks. The framework is auto-detected from changed files, or you can specify it explicitly.
 
-### Validate Framework
+| Command | Description |
+|---------|-------------|
+| `/validate` | Run the 18-point validation suite |
+| `/validate -f <framework>` | Validate a specific framework |
+| `/benchmark` | Run all benchmark tests |
+| `/benchmark -t <test>` | Run a specific test profile |
+| `/benchmark -f <framework> -t <test>` | Run a specific framework and test |
+| `/benchmark --save` | Run and save results (updates leaderboard on merge) |
 
-**Trigger:** Automatically on every PR that modifies files under `frameworks/` or `scripts/validate.sh`.
+### Flags
 
-Detects which frameworks were changed in the PR and runs `./scripts/validate.sh` against each one. If validation fails, the PR check fails — you must fix the issues before merging.
+- **`-f <framework>`** — Override auto-detection. Use the directory name under `frameworks/` (e.g. `-f actix`, `-f go-fasthttp`).
+- **`-t <test>`** — Run a specific test profile (e.g. `-t baseline`, `-t mixed`, `-t async-db`).
+- **`--save`** — Save benchmark results to the PR branch. When the PR is merged, results are included in the next site deployment and appear on the leaderboard.
 
-### Benchmark
+### Comparison with main
 
-**Trigger:** Automatically when a push to `main` modifies files under `frameworks/`, or manually via workflow dispatch.
+After every benchmark run, results are automatically compared against the current published data on main. A delta table is posted in the PR comment showing changes in RPS, latency, CPU, and memory for each connection count. New frameworks with no prior results show "NEW" instead of deltas.
+
+## GitHub Actions
+
+HttpArena uses four GitHub Actions workflows.
+
+### PR Commands (`pr-commands.yml`)
 
-When triggered automatically, it detects which frameworks changed in the latest commit and benchmarks only those. When triggered manually, you can specify:
-- **Framework** — a specific framework name, or leave empty to benchmark all changed frameworks
-- **Profile** — a specific test profile (e.g. `baseline`, `baseline-h2`), or leave empty to run all profiles
+**Trigger:** Comment on a PR containing `/validate` or `/benchmark`.
 
-Results are committed and pushed to `main` automatically by the HttpArena Bot.
+Parses the command and flags from the comment, detects the framework from changed PR files (or uses the `-f` flag), and either runs validation directly or dispatches the benchmark workflow. Adds a rocket reaction to the comment and posts results when done.
 
-### Benchmark PR
+### Benchmark PR (`benchmark-pr.yml`)
 
-**Trigger:** Manual only (workflow dispatch). Requires a PR number and framework name.
+**Trigger:** Dispatched by the PR Commands workflow, or manually via workflow dispatch.
 
-Checks out the PR branch, runs the benchmark, and posts the results as a comment on the PR. This lets maintainers benchmark a new framework submission before merging, so contributors can see how their implementation performs on the hosted runner. An optional profile parameter lets you run a specific test instead of the full suite.
+Checks out the PR branch, runs the benchmark with optional `--save`, compares results against main using `scripts/compare.sh`, and posts a comment with raw results and a comparison table. If `--save` is used, results are committed and pushed to the PR branch.
 
-### Deploy Site
+### Benchmark (`benchmark.yml`)
+
+**Trigger:** Automatically when a push to `main` modifies files under `frameworks/`, or manually via workflow dispatch.
+
+Detects which frameworks changed and benchmarks them with `--save`. Results are committed to `main` and the site data is rebuilt.
+
+### Deploy Site (`deploy.yml`)
 
 **Trigger:** Automatically when a push to `main` modifies files under `site/`, or manually via workflow dispatch.
 
-Builds the Hugo site and deploys it to GitHub Pages. This runs on GitHub-hosted Ubuntu runners (not the self-hosted runner).
+Builds the Hugo site and deploys it to GitHub Pages. Runs on GitHub-hosted runners (not the self-hosted benchmark machine).
+
+## Hosted Runner
 
-## Hosted runner
+The Validate, Benchmark, and Benchmark PR workflows run on a **self-hosted runner** — a dedicated 64-core bare-metal machine configured for reproducible benchmarking:
 
-The Validate, Benchmark, and Benchmark PR workflows run on a **self-hosted runner** — a dedicated bare-metal machine configured for reproducible, low-noise benchmarking. This ensures all frameworks are tested on identical hardware under controlled conditions, with CPU governors locked, background services minimized, and no resource contention from other CI jobs.
+- CPU governor locked to `performance` mode during benchmarks
+- Kernel caches dropped between runs
+- Docker daemon restarted for clean state
+- `somaxconn` and TCP backlog tuned for high connection counts
+- `ulimit` set to maximum file descriptors
+- Host networking for minimal overhead
 
-Only the Deploy Site workflow uses GitHub-hosted runners, since it only builds static HTML and doesn't need controlled hardware.
+Only the Deploy Site workflow uses GitHub-hosted runners.
diff --git a/site/content/docs/add-framework/meta-json.md b/site/content/docs/add-framework/meta-json.md
@@ -41,8 +41,9 @@ Create a `meta.json` file in your framework directory:
 | `upload` | HTTP/1.1 | `/upload` |
 | `compression` | HTTP/1.1 | `/compression` |
 | `noisy` | HTTP/1.1 | `/baseline11` |
-| `mixed` | HTTP/1.1 | `/baseline11`, `/json`, `/db`, `/upload`, `/compression` |
+| `mixed` | HTTP/1.1 | `/baseline11`, `/json`, `/db`, `/upload`, `/compression`, `/static/*`, `/async-db` |
 | `static` | HTTP/1.1 | `/static/*` (port 8080) |
+| `async-db` | HTTP/1.1 | `/async-db` (requires `DATABASE_URL` env var) |
 | `baseline-h2` | HTTP/2 | `/baseline2` (TLS, port 8443) |
 | `static-h2` | HTTP/2 | `/static/*` (TLS, port 8443) |
 | `baseline-h3` | HTTP/3 | `/baseline2` (QUIC, port 8443) |
@@ -52,3 +53,13 @@ Create a `meta.json` file in your framework directory:
 | `echo-ws` | WebSocket | `/ws` echo (port 8080) |
 
 Only include profiles your framework supports. Frameworks missing a profile simply don't appear in that profile's leaderboard.
+
+### async-db
+
+The `async-db` profile requires an async PostgreSQL driver. The benchmark script starts a Postgres sidecar with 100K rows and passes `DATABASE_URL=postgres://bench:bench@localhost:5432/benchmark` to your container. Your framework must:
+
+1. Connect to Postgres using the `DATABASE_URL` environment variable
+2. Implement `GET /async-db?min=X&max=Y` that queries: `SELECT id, name, category, price, quantity, active, tags, rating_score, rating_count FROM items WHERE price BETWEEN $1 AND $2 LIMIT 50`
+3. Return JSON: `{"items": [...], "count": N}` with nested `rating: {score, count}` and `tags` as a JSON array
+4. Return `{"items":[],"count":0}` if the database is unavailable
+5. Use lazy connection initialization — retry connecting if Postgres isn't ready at startup