From e6d2a5aa1ba97533d93eab61c2d784002cd97c46 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Fri, 8 May 2026 20:48:25 +0200 Subject: [PATCH 1/7] nix(loadtest): remove noise from report Ultimately, we only look at the `rate` column, so we can just as well remove all other columns. This makes the next step, when we split results by request type, much less noisy. --- nix/tools/loadtest.nix | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix index 034fb63282..9831efe4c4 100644 --- a/nix/tools/loadtest.nix +++ b/nix/tools/loadtest.nix @@ -282,8 +282,7 @@ let pd.read_json(sys.stdin) \ .set_index('param') \ - .drop(['branch', 'earliest', 'end', 'latest']) \ - .fillna("") \ + .loc['rate'] \ .convert_dtypes() \ .to_markdown(sys.stdout, floatfmt='.0f') ''; From ab69a66c3fe340383a1557b4ac8dea90ee8468d0 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Fri, 8 May 2026 22:27:57 +0200 Subject: [PATCH 2/7] nix(loadtest): group results by URL Different requests hit different code paths and perform very differently. By looking at each request type separately, we should be able to get a much better idea of what kind of change in performance we're looking at and where the root cause might be. It will hopefully also allow us to migrate some of the other test-cases into the main loadtest. --- nix/tools/loadtest.nix | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix index 9831efe4c4..e7a684294e 100644 --- a/nix/tools/loadtest.nix +++ b/nix/tools/loadtest.nix @@ -267,7 +267,8 @@ let workingDir = "/"; } '' - ${vegeta}/bin/vegeta report -type=json "$_arg_file" \ + ${vegeta}/bin/vegeta encode "$_arg_file" \ + | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by(.url) | map({(.[0].url | sub("^http://postgrest";"")): add | .latency / length | 10e6 / . }) | .[]' \ | ${jq}/bin/jq --arg branch "$(basename "$_arg_file" .bin)" '. + {branch: $branch}' ''; @@ -281,10 +282,10 @@ let import pandas as pd pd.read_json(sys.stdin) \ - .set_index('param') \ - .loc['rate'] \ + .set_index('rate') \ + .drop(['branch']) \ .convert_dtypes() \ - .to_markdown(sys.stdout, floatfmt='.0f') + .to_markdown(sys.stdout, floatfmt='.1f') ''; @@ -304,8 +305,8 @@ let echo -e "## Loadtest results $marker\n" find loadtest -type f -iname '*.bin' -exec ${reporter} {} \; \ - | ${jq}/bin/jq '[paths(scalars) as $path | {param: $path | join("."), (.branch): getpath($path)}]' \ - | ${jq}/bin/jq --slurp 'flatten | group_by(.param) | map(add)' \ + | ${jq}/bin/jq '[paths(scalars) as $path | {rate: $path | join("."), (.branch): getpath($path)}]' \ + | ${jq}/bin/jq --slurp 'flatten | group_by(.rate) | map(add)' \ | ${toMarkdown} echo -e "\n\n## Loadtest elapsed seconds vs CPU/MEM usage $marker\n" From 372785906275542256cd1876a395b863ad6f52e5 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Sat, 16 May 2026 00:08:34 +0200 Subject: [PATCH 3/7] nix(loadtest): report minimum latency instead of rate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We previously used "rate", i.e. number of requests per second, as the primary metric to judge loadtest results. However, this has always been varying from run to run quite a bit, especially in CI where other jobs possibly run on the same VM host. The run-to-run variance has massively increased after splitting the results up per request. Example run in CI with rate on the PR introducing this change (on which we would expect no change at all): | rate [1/s] | main | head | Δ | |:-----------------------------------|-------:|-------:|-----:| | / | 9.4 | 9.5 | 1% | | /actors | 870.4 | 1023.0 | 18% | | /actors?actor=eq.1 | 188.5 | 198.6 | 5% | | /actors?actor=eq.1&columns=name | 197.3 | 167.1 | -15% | | /actors?select=*,roles(*,films(*)) | 153.9 | 144.9 | -6% | | /films?columns=id,title | 157.9 | 182.6 | 16% | | /films?columns=id,title,year,... | 87.0 | 87.1 | 0% | | /roles | 204.5 | 267.3 | 31% | | /rpc/call_me | 231.3 | 208.8 | -10% | | /rpc/call_me?name=John | 212.2 | 201.7 | -5% | From the data we can easily tell that the very reason that rate as a paramter has only worked, so far, because the data was *heavily* dominated by the requests on the root endpoint for OpenAPI. The longer duration makes the request much less vulnerable for concurrent activity. For all other requests its essentially not possible to judge the effect of a PR this way. One way to counter this would be to massively increase the time the loadtest runs. More samples will result in a smoother average. However, that's not practical for usability of CI. In the original PR #1812 I already evaluated using the *minimum latency* as the most reliable criteriumi, but this has never really caught on. The theory behind this is: The variation in timings between requests is happening because of concurrent activity, priority chosen by the scheduler, availability of resources and such - all factors *outside* our control, and *irrelevant* to the Haskell code we're writing. Using the minimum latency is an estimation of how fast the code can run *in the best case*. This might not be a number relevant for production, but it's much more directly related to the code we write. Here's to show how variation becomes *much* smaller with minimum latency as the parameter: | min latency [μs] | main | head | Δ | |:-----------------------------------|---------:|-------:|-----:| | / | 1275.3 | 1263.6 | -1% | | /actors | 10.0 | 9.9 | -1% | | /actors?actor=eq.1 | 50.7 | 48.3 | -5% | | /actors?actor=eq.1&columns=name | 54.1 | 54.0 | 0% | | /actors?select=*,roles(*,films(*)) | 63.2 | 61.9 | -2% | | /films?columns=id,title | 51.1 | 50.7 | -1% | | /films?columns=id,title,year,... | 121.9 | 121.8 | 0% | | /roles | 42.9 | 42.6 | -1% | | /rpc/call_me | 45.6 | 45.4 | 0% | | /rpc/call_me?name=John | 44.4 | 44.2 | 0% | Since we're separating results per request now, we can only sensibly focus on *one* parameter - otherwise this would get really clunky UI-wise. Especially for automated CI failures, minimum latency is the logical choice. --- nix/tools/loadtest.nix | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix index e7a684294e..2c60f3bcb0 100644 --- a/nix/tools/loadtest.nix +++ b/nix/tools/loadtest.nix @@ -268,7 +268,7 @@ let } '' ${vegeta}/bin/vegeta encode "$_arg_file" \ - | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by(.url) | map({(.[0].url | sub("^http://postgrest";"")): add | .latency / length | 10e6 / . }) | .[]' \ + | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by(.url) | map({(.[0].url | sub("^http://postgrest";"")): map(.latency) | min / 10e3 }) | .[]' \ | ${jq}/bin/jq --arg branch "$(basename "$_arg_file" .bin)" '. + {branch: $branch}' ''; @@ -282,7 +282,8 @@ let import pandas as pd pd.read_json(sys.stdin) \ - .set_index('rate') \ + .rename(columns={'min': 'min latency [μs]'}) \ + .set_index('min latency [μs]') \ .drop(['branch']) \ .convert_dtypes() \ .to_markdown(sys.stdout, floatfmt='.1f') @@ -305,8 +306,8 @@ let echo -e "## Loadtest results $marker\n" find loadtest -type f -iname '*.bin' -exec ${reporter} {} \; \ - | ${jq}/bin/jq '[paths(scalars) as $path | {rate: $path | join("."), (.branch): getpath($path)}]' \ - | ${jq}/bin/jq --slurp 'flatten | group_by(.rate) | map(add)' \ + | ${jq}/bin/jq '[paths(scalars) as $path | {min: $path | join("."), (.branch): getpath($path)}]' \ + | ${jq}/bin/jq --slurp 'flatten | group_by(.min) | map(add)' \ | ${toMarkdown} echo -e "\n\n## Loadtest elapsed seconds vs CPU/MEM usage $marker\n" From 1d2e4297b1b73bbc0baf4e3565013b597e710a74 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Sun, 17 May 2026 16:24:51 +0200 Subject: [PATCH 4/7] nix(loadtest): move errors into mixed Because we separate loadtest results per URL now, we can move the error tests into the regular mixed bag of loadtests - we will be able to tell from the misspelled URLs when we hit a regression in that area. We should be able to do similar things for JWT tests, but we'll need more infrastructure here. --- .github/workflows/test.yaml | 2 +- nix/tools/loadtest.nix | 16 ++++------------ test/load/errors.http | 15 --------------- test/load/errors.sql | 7 ------- test/load/fixtures.sql | 6 ++++++ test/load/targets.http | 16 ++++++++++++++++ 6 files changed, 27 insertions(+), 35 deletions(-) delete mode 100644 test/load/errors.http delete mode 100644 test/load/errors.sql diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index ad9e7d111d..ca91b3cb74 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -127,7 +127,7 @@ jobs: strategy: fail-fast: false matrix: - kind: ['mixed', 'errors', 'jwt-hs', 'jwt-hs-cache', 'jwt-hs-cache-worst', 'jwt-rsa', 'jwt-rsa-cache', 'jwt-rsa-cache-worst'] + kind: ['mixed', 'jwt-hs', 'jwt-hs-cache', 'jwt-hs-cache-worst', 'jwt-rsa', 'jwt-rsa-cache', 'jwt-rsa-cache-worst'] name: Loadtest runs-on: ubuntu-24.04 steps: diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix index 2c60f3bcb0..323583ebb7 100644 --- a/nix/tools/loadtest.nix +++ b/nix/tools/loadtest.nix @@ -46,7 +46,7 @@ let "ARG_OPTIONAL_SINGLE([testdir], [t], [Directory to load tests and fixtures from], [./test/load])" "ARG_OPTIONAL_SINGLE([kind], [k], [Kind of loadtest], [mixed])" "ARG_OPTIONAL_SINGLE([method],, [HTTP method used for the jwt loadtests], [OPTIONS])" - "ARG_TYPE_GROUP_SET([KIND], [KIND], [kind], [mixed,errors,jwt-hs,jwt-hs-cache,jwt-hs-cache-worst,jwt-rsa,jwt-rsa-cache,jwt-rsa-cache-worst])" + "ARG_TYPE_GROUP_SET([KIND], [KIND], [kind], [mixed,jwt-hs,jwt-hs-cache,jwt-hs-cache-worst,jwt-rsa,jwt-rsa-cache,jwt-rsa-cache-worst])" "ARG_TYPE_GROUP_SET([METHOD], [METHOD], [method], [OPTIONS,GET])" "ARG_OPTIONAL_SINGLE([monitor], [m], [Monitoring file], [./loadtest/result.csv])" "ARG_LEFTOVERS([additional vegeta arguments])" @@ -139,27 +139,19 @@ let ${runner} -lazy -targets gen_targets.http -output \"$abs_output\" \"''${_arg_leftovers[@]}\"" ;; + # here we sleep purposefully to check how much memory does the schema cache consume in the final report mixed) # shellcheck disable=SC2145 ${withTools.withPg} -f "$_arg_testdir"/fixtures.sql \ - ${withTools.withPgrst} -m "$_arg_monitor" \ - sh -c "cd \"$_arg_testdir\" && \ - ${runner} -targets targets.http -output \"$abs_output\" \"''${_arg_leftovers[@]}\"" - ;; - - # here we sleep purposefully to check how much memory does the schema cache consume in the final report - errors) - # shellcheck disable=SC2145 - ${withTools.withPg} -f "$_arg_testdir"/errors.sql \ ${withTools.withPgrst} --timeout 2 --sleep 5 -m "$_arg_monitor" \ sh -c "cd \"$_arg_testdir\" && \ - ${runner} -targets errors.http -output \"$abs_output\" \"''${_arg_leftovers[@]}\"" + ${runner} -targets targets.http -output \"$abs_output\" \"''${_arg_leftovers[@]}\"" ;; esac ${vegeta}/bin/vegeta report -type=text "$_arg_output" - if [ "$_arg_kind" != "errors" ]; then + if [ "$_arg_kind" != "mixed" ]; then # fail in case 401 happened on jwt loadtests unauthorized_count="$(${vegeta}/bin/vegeta report -type=json "$_arg_output" \ | ${jq}/bin/jq -r '.status_codes["401"] // 0')" diff --git a/test/load/errors.http b/test/load/errors.http deleted file mode 100644 index ea5db90569..0000000000 --- a/test/load/errors.http +++ /dev/null @@ -1,15 +0,0 @@ -# Misspelled relations -GET http://postgrest/actoxs?actor=eq.1 -Prefer: tx=commit - -# Misspelled relations on embeds -GET http://postgrest/actors?select=*,rolws(*,films(*)) -Prefer: tx=commit - -# Misspelled function names -GET http://postgrest/rpc/call_me_x?name=John -Prefer: tx=commit - -# Permission denied errors -GET http://postgrest/actors_1 -Prefer: tx=commit diff --git a/test/load/errors.sql b/test/load/errors.sql deleted file mode 100644 index 02abc37f3a..0000000000 --- a/test/load/errors.sql +++ /dev/null @@ -1,7 +0,0 @@ -\ir fixtures.sql - -SELECT format('CREATE TABLE test.actors_%s ();', n) -FROM generate_series(1, 450) n -- 500 is the upper limit for table not found error hint generation -\gexec - --- TODO add many function for fuzzy search (somehow this is making the loadtest start slow) diff --git a/test/load/fixtures.sql b/test/load/fixtures.sql index 75bd6a8bb2..8f844c4835 100644 --- a/test/load/fixtures.sql +++ b/test/load/fixtures.sql @@ -49,3 +49,9 @@ REVOKE ALL PRIVILEGES ON TABLE FROM postgrest_test_anonymous; GRANT ALL ON TABLE authors_only TO postgrest_test_author; + +SELECT format('CREATE TABLE test.actors_%s ();', n) +FROM generate_series(1, 450) n -- 500 is the upper limit for table not found error hint generation +\gexec + +-- TODO add many function for fuzzy search (somehow this is making the loadtest start slow) diff --git a/test/load/targets.http b/test/load/targets.http index ce65472a70..7553ade018 100644 --- a/test/load/targets.http +++ b/test/load/targets.http @@ -33,3 +33,19 @@ POST http://postgrest/rpc/call_me @rpc.json OPTIONS http://postgrest/actors + +# Misspelled relations +GET http://postgrest/actoxs?actor=eq.1 +Prefer: tx=commit + +# Misspelled relations on embeds +GET http://postgrest/actors?select=*,rolws(*,films(*)) +Prefer: tx=commit + +# Misspelled function names +GET http://postgrest/rpc/call_me_x?name=John +Prefer: tx=commit + +# Permission denied errors +GET http://postgrest/actors_1 +Prefer: tx=commit From 3cd09340d8f838f52f5e4c6f00ae3a1287dbda71 Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Mon, 18 May 2026 19:30:46 +0200 Subject: [PATCH 5/7] nix(loadtest): additionally group results by status code and method This gives us a much better idea immediately when looking at the report. It also allows us to differentiate between different return codes on the same URI, which might come in handy when dealing with expired JWT and such. A nice side-effect: All the error-related requests are now grouped together in the 4xx section. --- nix/tools/loadtest.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix index 323583ebb7..60158a5c17 100644 --- a/nix/tools/loadtest.nix +++ b/nix/tools/loadtest.nix @@ -260,7 +260,7 @@ let } '' ${vegeta}/bin/vegeta encode "$_arg_file" \ - | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by(.url) | map({(.[0].url | sub("^http://postgrest";"")): map(.latency) | min / 10e3 }) | .[]' \ + | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by("\(.code) \(.method) \(.url)") | map({("\(.[0].code) \(.[0].method) \(.[0].url)" | sub("http://postgrest";"")): map(.latency) | min / 10e3 }) | .[]' \ | ${jq}/bin/jq --arg branch "$(basename "$_arg_file" .bin)" '. + {branch: $branch}' ''; From 3f8c06085a67ad1c5f7f3cd80f941abf757927be Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Mon, 18 May 2026 22:42:42 +0200 Subject: [PATCH 6/7] p95 --- nix/tools/loadtest.nix | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix index 60158a5c17..cd4181990d 100644 --- a/nix/tools/loadtest.nix +++ b/nix/tools/loadtest.nix @@ -260,7 +260,7 @@ let } '' ${vegeta}/bin/vegeta encode "$_arg_file" \ - | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by("\(.code) \(.method) \(.url)") | map({("\(.[0].code) \(.[0].method) \(.[0].url)" | sub("http://postgrest";"")): map(.latency) | min / 10e3 }) | .[]' \ + | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by("\(.code) \(.method) \(.url)") | map({("\(.[0].code) \(.[0].method) \(.[0].url)" | sub("http://postgrest";"")): map(.latency) | sort | .[length * 0.95 | floor] / 10e3 }) | .[]' \ | ${jq}/bin/jq --arg branch "$(basename "$_arg_file" .bin)" '. + {branch: $branch}' ''; @@ -274,8 +274,8 @@ let import pandas as pd pd.read_json(sys.stdin) \ - .rename(columns={'min': 'min latency [μs]'}) \ - .set_index('min latency [μs]') \ + .rename(columns={'p95': 'P95 latency [μs]'}) \ + .set_index('P95 latency [μs]') \ .drop(['branch']) \ .convert_dtypes() \ .to_markdown(sys.stdout, floatfmt='.1f') @@ -298,8 +298,8 @@ let echo -e "## Loadtest results $marker\n" find loadtest -type f -iname '*.bin' -exec ${reporter} {} \; \ - | ${jq}/bin/jq '[paths(scalars) as $path | {min: $path | join("."), (.branch): getpath($path)}]' \ - | ${jq}/bin/jq --slurp 'flatten | group_by(.min) | map(add)' \ + | ${jq}/bin/jq '[paths(scalars) as $path | {p95: $path | join("."), (.branch): getpath($path)}]' \ + | ${jq}/bin/jq --slurp 'flatten | group_by(.p95) | map(add)' \ | ${toMarkdown} echo -e "\n\n## Loadtest elapsed seconds vs CPU/MEM usage $marker\n" From 5f3b6b5851e76db78df2dc27c136d95c3aab675f Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Tue, 19 May 2026 12:32:56 +0200 Subject: [PATCH 7/7] p50 --- nix/tools/loadtest.nix | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nix/tools/loadtest.nix b/nix/tools/loadtest.nix index cd4181990d..9cbc81c11c 100644 --- a/nix/tools/loadtest.nix +++ b/nix/tools/loadtest.nix @@ -260,7 +260,7 @@ let } '' ${vegeta}/bin/vegeta encode "$_arg_file" \ - | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by("\(.code) \(.method) \(.url)") | map({("\(.[0].code) \(.[0].method) \(.[0].url)" | sub("http://postgrest";"")): map(.latency) | sort | .[length * 0.95 | floor] / 10e3 }) | .[]' \ + | ${jq}/bin/jq --slurp 'map(select(.url != "")) | group_by("\(.code) \(.method) \(.url)") | map({("\(.[0].code) \(.[0].method) \(.[0].url)" | sub("http://postgrest";"")): map(.latency) | sort | .[length * 0.50 | floor] / 10e3 }) | .[]' \ | ${jq}/bin/jq --arg branch "$(basename "$_arg_file" .bin)" '. + {branch: $branch}' ''; @@ -274,8 +274,8 @@ let import pandas as pd pd.read_json(sys.stdin) \ - .rename(columns={'p95': 'P95 latency [μs]'}) \ - .set_index('P95 latency [μs]') \ + .rename(columns={'p50': 'P50 latency [μs]'}) \ + .set_index('P50 latency [μs]') \ .drop(['branch']) \ .convert_dtypes() \ .to_markdown(sys.stdout, floatfmt='.1f') @@ -298,8 +298,8 @@ let echo -e "## Loadtest results $marker\n" find loadtest -type f -iname '*.bin' -exec ${reporter} {} \; \ - | ${jq}/bin/jq '[paths(scalars) as $path | {p95: $path | join("."), (.branch): getpath($path)}]' \ - | ${jq}/bin/jq --slurp 'flatten | group_by(.p95) | map(add)' \ + | ${jq}/bin/jq '[paths(scalars) as $path | {p50: $path | join("."), (.branch): getpath($path)}]' \ + | ${jq}/bin/jq --slurp 'flatten | group_by(.p50) | map(add)' \ | ${toMarkdown} echo -e "\n\n## Loadtest elapsed seconds vs CPU/MEM usage $marker\n"