From e6d2a5aa1ba97533d93eab61c2d784002cd97c46 Mon Sep 17 00:00:00 2001
From: Wolfgang Walther <walther@technowledgy.de>
Date: Fri, 8 May 2026 20:48:25 +0200
Subject: [PATCH 1/7] nix(loadtest): remove noise from report

Ultimately, we only look at the `rate` column, so we can just as well
remove all other columns.

This makes the next step, when we split results by request type, much
less noisy.
---
 nix/tools/loadtest.nix | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix
index 034fb63282..9831efe4c4 100644
--- a/nix/tools/loadtest.nix
+++ b/nix/tools/loadtest.nix
@@ -282,8 +282,7 @@ let
 
         pd.read_json(sys.stdin) \
           .set_index('param') \
-          .drop(['branch', 'earliest', 'end', 'latest']) \
-          .fillna("") \
+          .loc['rate'] \
           .convert_dtypes() \
           .to_markdown(sys.stdout, floatfmt='.0f')
       '';

From ab69a66c3fe340383a1557b4ac8dea90ee8468d0 Mon Sep 17 00:00:00 2001
From: Wolfgang Walther <walther@technowledgy.de>
Date: Fri, 8 May 2026 22:27:57 +0200
Subject: [PATCH 2/7] nix(loadtest): group results by URL

Different requests hit different code paths and perform very
differently. By looking at each request type separately, we should be
able to get a much better idea of what kind of change in performance
we're looking at and where the root cause might be.

It will hopefully also allow us to migrate some of the other test-cases
into the main loadtest.
---
 nix/tools/loadtest.nix | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix
index 9831efe4c4..e7a684294e 100644
--- a/nix/tools/loadtest.nix
+++ b/nix/tools/loadtest.nix
@@ -267,7 +267,8 @@ let
         workingDir = "/";
       }
       ''
-        ${vegeta}/bin/vegeta report -type=json "$_arg_file" \
+        ${vegeta}/bin/vegeta encode "$_arg_file" \
+          | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by(.url) | map({(.[0].url | sub("^http://postgrest";"")): add | .latency / length | 10e6 / . }) | .[]' \
           | ${jq}/bin/jq --arg branch "$(basename "$_arg_file" .bin)" '. + {branch: $branch}'
       '';
 
@@ -281,10 +282,10 @@ let
         import pandas as pd
 
         pd.read_json(sys.stdin) \
-          .set_index('param') \
-          .loc['rate'] \
+          .set_index('rate') \
+          .drop(['branch']) \
           .convert_dtypes() \
-          .to_markdown(sys.stdout, floatfmt='.0f')
+          .to_markdown(sys.stdout, floatfmt='.1f')
       '';
 
 
@@ -304,8 +305,8 @@ let
         echo -e "## Loadtest results $marker\n"
 
         find loadtest -type f -iname '*.bin' -exec ${reporter} {} \; \
-          | ${jq}/bin/jq '[paths(scalars) as $path | {param: $path | join("."), (.branch): getpath($path)}]' \
-          | ${jq}/bin/jq --slurp 'flatten | group_by(.param) | map(add)' \
+          | ${jq}/bin/jq '[paths(scalars) as $path | {rate: $path | join("."), (.branch): getpath($path)}]' \
+          | ${jq}/bin/jq --slurp 'flatten | group_by(.rate) | map(add)' \
           | ${toMarkdown}
 
         echo -e "\n\n## Loadtest elapsed seconds vs CPU/MEM usage $marker\n"

From 372785906275542256cd1876a395b863ad6f52e5 Mon Sep 17 00:00:00 2001
From: Wolfgang Walther <walther@technowledgy.de>
Date: Sat, 16 May 2026 00:08:34 +0200
Subject: [PATCH 3/7] nix(loadtest): report minimum latency instead of rate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We previously used "rate", i.e. number of requests per second, as the
primary metric to judge loadtest results. However, this has always been
varying from run to run quite a bit, especially in CI where other jobs
possibly run on the same VM host.

The run-to-run variance has massively increased after splitting the
results up per request. Example run in CI with rate on the PR
introducing this change (on which we would expect no change at all):

| rate [1/s]                         |   main |   head |    Δ |
|:-----------------------------------|-------:|-------:|-----:|
| /                                  |    9.4 |    9.5 |   1% |
| /actors                            |  870.4 | 1023.0 |  18% |
| /actors?actor=eq.1                 |  188.5 |  198.6 |   5% |
| /actors?actor=eq.1&columns=name    |  197.3 |  167.1 | -15% |
| /actors?select=*,roles(*,films(*)) |  153.9 |  144.9 |  -6% |
| /films?columns=id,title            |  157.9 |  182.6 |  16% |
| /films?columns=id,title,year,...   |   87.0 |   87.1 |   0% |
| /roles                             |  204.5 |  267.3 |  31% |
| /rpc/call_me                       |  231.3 |  208.8 | -10% |
| /rpc/call_me?name=John             |  212.2 |  201.7 |  -5% |

From the data we can easily tell that the very reason that rate as a
paramter has only worked, so far, because the data was *heavily*
dominated by the requests on the root endpoint for OpenAPI. The longer
duration makes the request much less vulnerable for concurrent activity.
For all other requests its essentially not possible to judge the effect
of a PR this way.

One way to counter this would be to massively increase the time the
loadtest runs. More samples will result in a smoother average. However,
that's not practical for usability of CI. In the original PR #1812 I
already evaluated using the *minimum latency* as the most reliable
criteriumi, but this has never really caught on. The theory behind this
is: The variation in timings between requests is happening because of
concurrent activity, priority chosen by the scheduler, availability of
resources and such - all factors *outside* our control, and *irrelevant*
to the Haskell code we're writing.

Using the minimum latency is an estimation of how fast the code can run
*in the best case*. This might not be a number relevant for production,
but it's much more directly related to the code we write.

Here's to show how variation becomes *much* smaller with minimum latency
as the parameter:

| min latency [μs]                   |     main |   head |    Δ |
|:-----------------------------------|---------:|-------:|-----:|
| /                                  |   1275.3 | 1263.6 |  -1% |
| /actors                            |     10.0 |    9.9 |  -1% |
| /actors?actor=eq.1                 |     50.7 |   48.3 |  -5% |
| /actors?actor=eq.1&columns=name    |     54.1 |   54.0 |   0% |
| /actors?select=*,roles(*,films(*)) |     63.2 |   61.9 |  -2% |
| /films?columns=id,title            |     51.1 |   50.7 |  -1% |
| /films?columns=id,title,year,...   |    121.9 |  121.8 |   0% |
| /roles                             |     42.9 |   42.6 |  -1% |
| /rpc/call_me                       |     45.6 |   45.4 |   0% |
| /rpc/call_me?name=John             |     44.4 |   44.2 |   0% |

Since we're separating results per request now, we can only sensibly
focus on *one* parameter - otherwise this would get really clunky
UI-wise. Especially for automated CI failures, minimum latency is the
logical choice.
---
 nix/tools/loadtest.nix | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix
index e7a684294e..2c60f3bcb0 100644
--- a/nix/tools/loadtest.nix
+++ b/nix/tools/loadtest.nix
@@ -268,7 +268,7 @@ let
       }
       ''
         ${vegeta}/bin/vegeta encode "$_arg_file" \
-          | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by(.url) | map({(.[0].url | sub("^http://postgrest";"")): add | .latency / length | 10e6 / . }) | .[]' \
+          | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by(.url) | map({(.[0].url | sub("^http://postgrest";"")): map(.latency) | min / 10e3 }) | .[]' \
           | ${jq}/bin/jq --arg branch "$(basename "$_arg_file" .bin)" '. + {branch: $branch}'
       '';
 
@@ -282,7 +282,8 @@ let
         import pandas as pd
 
         pd.read_json(sys.stdin) \
-          .set_index('rate') \
+          .rename(columns={'min': 'min latency [μs]'}) \
+          .set_index('min latency [μs]') \
           .drop(['branch']) \
           .convert_dtypes() \
           .to_markdown(sys.stdout, floatfmt='.1f')
@@ -305,8 +306,8 @@ let
         echo -e "## Loadtest results $marker\n"
 
         find loadtest -type f -iname '*.bin' -exec ${reporter} {} \; \
-          | ${jq}/bin/jq '[paths(scalars) as $path | {rate: $path | join("."), (.branch): getpath($path)}]' \
-          | ${jq}/bin/jq --slurp 'flatten | group_by(.rate) | map(add)' \
+          | ${jq}/bin/jq '[paths(scalars) as $path | {min: $path | join("."), (.branch): getpath($path)}]' \
+          | ${jq}/bin/jq --slurp 'flatten | group_by(.min) | map(add)' \
           | ${toMarkdown}
 
         echo -e "\n\n## Loadtest elapsed seconds vs CPU/MEM usage $marker\n"

From 1d2e4297b1b73bbc0baf4e3565013b597e710a74 Mon Sep 17 00:00:00 2001
From: Wolfgang Walther <walther@technowledgy.de>
Date: Sun, 17 May 2026 16:24:51 +0200
Subject: [PATCH 4/7] nix(loadtest): move errors into mixed

Because we separate loadtest results per URL now, we can move the error
tests into the regular mixed bag of loadtests - we will be able to tell
from the misspelled URLs when we hit a regression in that area.

We should be able to do similar things for JWT tests, but we'll need
more infrastructure here.
---
 .github/workflows/test.yaml |  2 +-
 nix/tools/loadtest.nix      | 16 ++++------------
 test/load/errors.http       | 15 ---------------
 test/load/errors.sql        |  7 -------
 test/load/fixtures.sql      |  6 ++++++
 test/load/targets.http      | 16 ++++++++++++++++
 6 files changed, 27 insertions(+), 35 deletions(-)
 delete mode 100644 test/load/errors.http
 delete mode 100644 test/load/errors.sql

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index ad9e7d111d..ca91b3cb74 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -127,7 +127,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        kind: ['mixed', 'errors', 'jwt-hs', 'jwt-hs-cache', 'jwt-hs-cache-worst', 'jwt-rsa', 'jwt-rsa-cache', 'jwt-rsa-cache-worst']
+        kind: ['mixed', 'jwt-hs', 'jwt-hs-cache', 'jwt-hs-cache-worst', 'jwt-rsa', 'jwt-rsa-cache', 'jwt-rsa-cache-worst']
     name: Loadtest
     runs-on: ubuntu-24.04
     steps:
diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix
index 2c60f3bcb0..323583ebb7 100644
--- a/nix/tools/loadtest.nix
+++ b/nix/tools/loadtest.nix
@@ -46,7 +46,7 @@ let
           "ARG_OPTIONAL_SINGLE([testdir], [t], [Directory to load tests and fixtures from], [./test/load])"
           "ARG_OPTIONAL_SINGLE([kind], [k], [Kind of loadtest], [mixed])"
           "ARG_OPTIONAL_SINGLE([method],, [HTTP method used for the jwt loadtests], [OPTIONS])"
-          "ARG_TYPE_GROUP_SET([KIND], [KIND], [kind], [mixed,errors,jwt-hs,jwt-hs-cache,jwt-hs-cache-worst,jwt-rsa,jwt-rsa-cache,jwt-rsa-cache-worst])"
+          "ARG_TYPE_GROUP_SET([KIND], [KIND], [kind], [mixed,jwt-hs,jwt-hs-cache,jwt-hs-cache-worst,jwt-rsa,jwt-rsa-cache,jwt-rsa-cache-worst])"
           "ARG_TYPE_GROUP_SET([METHOD], [METHOD], [method], [OPTIONS,GET])"
           "ARG_OPTIONAL_SINGLE([monitor], [m], [Monitoring file], [./loadtest/result.csv])"
           "ARG_LEFTOVERS([additional vegeta arguments])"
@@ -139,27 +139,19 @@ let
             ${runner} -lazy -targets gen_targets.http -output \"$abs_output\" \"''${_arg_leftovers[@]}\""
             ;;
 
+          # here we sleep purposefully to check how much memory does the schema cache consume in the final report
           mixed)
             # shellcheck disable=SC2145
             ${withTools.withPg} -f "$_arg_testdir"/fixtures.sql \
-            ${withTools.withPgrst} -m "$_arg_monitor" \
-            sh -c "cd \"$_arg_testdir\" && \
-            ${runner} -targets targets.http -output \"$abs_output\" \"''${_arg_leftovers[@]}\""
-            ;;
-
-          # here we sleep purposefully to check how much memory does the schema cache consume in the final report
-          errors)
-            # shellcheck disable=SC2145
-            ${withTools.withPg} -f "$_arg_testdir"/errors.sql \
             ${withTools.withPgrst} --timeout 2 --sleep 5 -m "$_arg_monitor" \
             sh -c "cd \"$_arg_testdir\" && \
-            ${runner} -targets errors.http -output \"$abs_output\" \"''${_arg_leftovers[@]}\""
+            ${runner} -targets targets.http -output \"$abs_output\" \"''${_arg_leftovers[@]}\""
             ;;
         esac
 
         ${vegeta}/bin/vegeta report -type=text "$_arg_output"
 
-        if [ "$_arg_kind" != "errors" ]; then
+        if [ "$_arg_kind" != "mixed" ]; then
           # fail in case 401 happened on jwt loadtests
           unauthorized_count="$(${vegeta}/bin/vegeta report -type=json "$_arg_output" \
             | ${jq}/bin/jq -r '.status_codes["401"] // 0')"
diff --git a/test/load/errors.http b/test/load/errors.http
deleted file mode 100644
index ea5db90569..0000000000
--- a/test/load/errors.http
+++ /dev/null
@@ -1,15 +0,0 @@
-# Misspelled relations
-GET http://postgrest/actoxs?actor=eq.1
-Prefer: tx=commit
-
-# Misspelled relations on embeds
-GET http://postgrest/actors?select=*,rolws(*,films(*))
-Prefer: tx=commit
-
-# Misspelled function names
-GET http://postgrest/rpc/call_me_x?name=John
-Prefer: tx=commit
-
-# Permission denied errors
-GET http://postgrest/actors_1
-Prefer: tx=commit
diff --git a/test/load/errors.sql b/test/load/errors.sql
deleted file mode 100644
index 02abc37f3a..0000000000
--- a/test/load/errors.sql
+++ /dev/null
@@ -1,7 +0,0 @@
-\ir fixtures.sql
-
-SELECT format('CREATE TABLE test.actors_%s ();', n)
-FROM generate_series(1, 450) n -- 500 is the upper limit for table not found error hint generation
-\gexec
-
--- TODO add many function for fuzzy search (somehow this is making the loadtest start slow)
diff --git a/test/load/fixtures.sql b/test/load/fixtures.sql
index 75bd6a8bb2..8f844c4835 100644
--- a/test/load/fixtures.sql
+++ b/test/load/fixtures.sql
@@ -49,3 +49,9 @@ REVOKE ALL PRIVILEGES ON TABLE
 FROM postgrest_test_anonymous;
 
 GRANT ALL ON TABLE authors_only TO postgrest_test_author;
+
+SELECT format('CREATE TABLE test.actors_%s ();', n)
+FROM generate_series(1, 450) n -- 500 is the upper limit for table not found error hint generation
+\gexec
+
+-- TODO add many function for fuzzy search (somehow this is making the loadtest start slow)
diff --git a/test/load/targets.http b/test/load/targets.http
index ce65472a70..7553ade018 100644
--- a/test/load/targets.http
+++ b/test/load/targets.http
@@ -33,3 +33,19 @@ POST http://postgrest/rpc/call_me
 @rpc.json
 
 OPTIONS http://postgrest/actors
+
+# Misspelled relations
+GET http://postgrest/actoxs?actor=eq.1
+Prefer: tx=commit
+
+# Misspelled relations on embeds
+GET http://postgrest/actors?select=*,rolws(*,films(*))
+Prefer: tx=commit
+
+# Misspelled function names
+GET http://postgrest/rpc/call_me_x?name=John
+Prefer: tx=commit
+
+# Permission denied errors
+GET http://postgrest/actors_1
+Prefer: tx=commit

From 3cd09340d8f838f52f5e4c6f00ae3a1287dbda71 Mon Sep 17 00:00:00 2001
From: Wolfgang Walther <walther@technowledgy.de>
Date: Mon, 18 May 2026 19:30:46 +0200
Subject: [PATCH 5/7] nix(loadtest): additionally group results by status code
 and method

This gives us a much better idea immediately when looking at the report.
It also allows us to differentiate between different return codes on the
same URI, which might come in handy when dealing with expired JWT and
such.

A nice side-effect: All the error-related requests are now grouped
together in the 4xx section.
---
 nix/tools/loadtest.nix | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix
index 323583ebb7..60158a5c17 100644
--- a/nix/tools/loadtest.nix
+++ b/nix/tools/loadtest.nix
@@ -260,7 +260,7 @@ let
       }
       ''
         ${vegeta}/bin/vegeta encode "$_arg_file" \
-          | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by(.url) | map({(.[0].url | sub("^http://postgrest";"")): map(.latency) | min / 10e3 }) | .[]' \
+          | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by("\(.code) \(.method) \(.url)") | map({("\(.[0].code) \(.[0].method) \(.[0].url)" | sub("http://postgrest";"")): map(.latency) | min / 10e3 }) | .[]' \
           | ${jq}/bin/jq --arg branch "$(basename "$_arg_file" .bin)" '. + {branch: $branch}'
       '';
 

From 3f8c06085a67ad1c5f7f3cd80f941abf757927be Mon Sep 17 00:00:00 2001
From: Wolfgang Walther <walther@technowledgy.de>
Date: Mon, 18 May 2026 22:42:42 +0200
Subject: [PATCH 6/7] p95

---
 nix/tools/loadtest.nix | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix
index 60158a5c17..cd4181990d 100644
--- a/nix/tools/loadtest.nix
+++ b/nix/tools/loadtest.nix
@@ -260,7 +260,7 @@ let
       }
       ''
         ${vegeta}/bin/vegeta encode "$_arg_file" \
-          | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by("\(.code) \(.method) \(.url)") | map({("\(.[0].code) \(.[0].method) \(.[0].url)" | sub("http://postgrest";"")): map(.latency) | min / 10e3 }) | .[]' \
+          | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by("\(.code) \(.method) \(.url)") | map({("\(.[0].code) \(.[0].method) \(.[0].url)" | sub("http://postgrest";"")): map(.latency) | sort | .[length * 0.95 | floor] / 10e3 }) | .[]' \
           | ${jq}/bin/jq --arg branch "$(basename "$_arg_file" .bin)" '. + {branch: $branch}'
       '';
 
@@ -274,8 +274,8 @@ let
         import pandas as pd
 
         pd.read_json(sys.stdin) \
-          .rename(columns={'min': 'min latency [μs]'}) \
-          .set_index('min latency [μs]') \
+          .rename(columns={'p95': 'P95 latency [μs]'}) \
+          .set_index('P95 latency [μs]') \
           .drop(['branch']) \
           .convert_dtypes() \
           .to_markdown(sys.stdout, floatfmt='.1f')
@@ -298,8 +298,8 @@ let
         echo -e "## Loadtest results $marker\n"
 
         find loadtest -type f -iname '*.bin' -exec ${reporter} {} \; \
-          | ${jq}/bin/jq '[paths(scalars) as $path | {min: $path | join("."), (.branch): getpath($path)}]' \
-          | ${jq}/bin/jq --slurp 'flatten | group_by(.min) | map(add)' \
+          | ${jq}/bin/jq '[paths(scalars) as $path | {p95: $path | join("."), (.branch): getpath($path)}]' \
+          | ${jq}/bin/jq --slurp 'flatten | group_by(.p95) | map(add)' \
           | ${toMarkdown}
 
         echo -e "\n\n## Loadtest elapsed seconds vs CPU/MEM usage $marker\n"

From 5f3b6b5851e76db78df2dc27c136d95c3aab675f Mon Sep 17 00:00:00 2001
From: Wolfgang Walther <walther@technowledgy.de>
Date: Tue, 19 May 2026 12:32:56 +0200
Subject: [PATCH 7/7] p50

---
 nix/tools/loadtest.nix | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix
index cd4181990d..9cbc81c11c 100644
--- a/nix/tools/loadtest.nix
+++ b/nix/tools/loadtest.nix
@@ -260,7 +260,7 @@ let
       }
       ''
         ${vegeta}/bin/vegeta encode "$_arg_file" \
-          | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by("\(.code) \(.method) \(.url)") | map({("\(.[0].code) \(.[0].method) \(.[0].url)" | sub("http://postgrest";"")): map(.latency) | sort | .[length * 0.95 | floor] / 10e3 }) | .[]' \
+          | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by("\(.code) \(.method) \(.url)") | map({("\(.[0].code) \(.[0].method) \(.[0].url)" | sub("http://postgrest";"")): map(.latency) | sort | .[length * 0.50 | floor] / 10e3 }) | .[]' \
           | ${jq}/bin/jq --arg branch "$(basename "$_arg_file" .bin)" '. + {branch: $branch}'
       '';
 
@@ -274,8 +274,8 @@ let
         import pandas as pd
 
         pd.read_json(sys.stdin) \
-          .rename(columns={'p95': 'P95 latency [μs]'}) \
-          .set_index('P95 latency [μs]') \
+          .rename(columns={'p50': 'P50 latency [μs]'}) \
+          .set_index('P50 latency [μs]') \
           .drop(['branch']) \
           .convert_dtypes() \
           .to_markdown(sys.stdout, floatfmt='.1f')
@@ -298,8 +298,8 @@ let
         echo -e "## Loadtest results $marker\n"
 
         find loadtest -type f -iname '*.bin' -exec ${reporter} {} \; \
-          | ${jq}/bin/jq '[paths(scalars) as $path | {p95: $path | join("."), (.branch): getpath($path)}]' \
-          | ${jq}/bin/jq --slurp 'flatten | group_by(.p95) | map(add)' \
+          | ${jq}/bin/jq '[paths(scalars) as $path | {p50: $path | join("."), (.branch): getpath($path)}]' \
+          | ${jq}/bin/jq --slurp 'flatten | group_by(.p50) | map(add)' \
           | ${toMarkdown}
 
         echo -e "\n\n## Loadtest elapsed seconds vs CPU/MEM usage $marker\n"