Skip to content

Commit cbe2b97

Browse files
committed
fix: gauntlet cycle 2 must-fixes for PR-5.1.5 (operator docs + deterministic fallback)
- requires-superuser operator docs un-drifted: migrations/README.md's bootstrap ordering section and migrate-schema.py's comment/docstring/error text no longer enumerate a stale 002/004/005 list (006/007 carry the marker too); the in-file marker is named as authoritative. README also documents 006/007 and the drift-repairing commit_timestamp re-stamp SQL. - The query-summary all-NULL fallback arm now joins commits and orders by c.timestamp DESC NULLS LAST, so a series ingested entirely inside the transient unstamped window deterministically returns its newest value (matching the replaced join-ordered query); LEFT join keeps orphan rows fail-visible. Regression test seeds two unstamped rows and asserts the newer commit's value wins. - The migrate loader's denormalization UPDATE is now drift-repairing (IS DISTINCT FROM instead of IS NULL), matching the documented repair form. - summary.ts module header reworded (IS NOT DISTINCT FROM is no longer the query-summary mechanism); 006/007 historical-rationale note added to the skip-scan docblock; orphaned no-Docker comment in groups.test.ts re-attached to its describe; conn type annotation; _UPDATE_VALUE_MUTATIONS header note. Deferred (nits, cycle-2 triage): mapWithConcurrency unit test, poolMax default pin, summary-path coverage of the remaining groupPred pin combinations. Signed-off-by: "Connor Tsui" <connor@spiraldb.com>
1 parent 572b3bd commit cbe2b97

7 files changed

Lines changed: 118 additions & 54 deletions

File tree

benchmarks-website/migrate/src/postgres.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,14 +257,17 @@ pub fn load(duckdb_path: &Path, dsn: &str, ca_cert: Option<&Path>) -> Result<Loa
257257
// 006, the read path's latest-per-series sort key). The v3 DuckDB source has
258258
// no such column, so it cannot ride the COPY's 1:1 column mapping; it is
259259
// derived here from the already-loaded `commits` dim (`TABLE_SPECS` loads
260-
// `commits` first), inside the same all-or-nothing transaction.
260+
// `commits` first), inside the same all-or-nothing transaction. `IS DISTINCT
261+
// FROM` (rather than `IS NULL`) makes the stamp drift-repairing as well as
262+
// NULL-filling, the same repair form migrations/README.md documents for
263+
// operational re-stamping.
261264
let stamped = tx
262265
.execute(
263266
"UPDATE query_measurements q
264267
SET commit_timestamp = c.timestamp
265268
FROM commits c
266269
WHERE c.commit_sha = q.commit_sha
267-
AND q.commit_timestamp IS NULL",
270+
AND q.commit_timestamp IS DISTINCT FROM c.timestamp",
268271
&[],
269272
)
270273
.context("denormalizing commit_timestamp onto query_measurements")?;

benchmarks-website/web/lib/groups.test.ts

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -462,8 +462,6 @@ describe.skipIf(!dockerAvailable())('summary math fidelity (testcontainers Postg
462462
});
463463
});
464464

465-
// Slug-decode rejection short-circuits to a 400 before any DB call, so this
466-
// runs without Docker, matching the chart route's input-validation contract.
467465
// PR-5.1.5 read-path skip-scan fidelity. The canonical fixture above cannot
468466
// exercise these paths: it seeds a single query group with one format per
469467
// engine and stamps every commit_timestamp, so the summary skip scan's
@@ -506,6 +504,10 @@ describe.skipIf(!dockerAvailable())(
506504
[4, COMMIT_A, 1, 'e2', 'f1', 555, true],
507505
[5, COMMIT_A, 2, 'e1', 'f1', 666, true],
508506
[6, COMMIT_B, 1, 'e9', 'f9', 333, false],
507+
// A second, OLDER unstamped row for e9:f9: the all-NULL fallback must
508+
// deterministically pick COMMIT_B's 333 (newest via the commits join),
509+
// not an arbitrary row.
510+
[7, COMMIT_A, 1, 'e9', 'f9', 999, false],
509511
];
510512
for (const [mid, sha, queryIdx, engine, format, valueNs, stamped] of rows) {
511513
await pool.query(
@@ -571,8 +573,11 @@ describe.skipIf(!dockerAvailable())(
571573
const byName = new Map(summary.rankings.map((r) => [r.name, r.totalRuntime]));
572574
// e1:f1's latest for Q1 is the STAMPED 111, not the newer NULL-stamped 222
573575
// (plus its Q2 value 666); e9:f9 has only NULL-stamped rows and must still
574-
// appear via the fallback arm. Flipping the probe to a plain
575-
// `commit_timestamp DESC` (NULLS FIRST) order fails this test.
576+
// appear via the fallback arm, which must pick the NEWEST commit's 333
577+
// over the older 999 (the fallback orders by the joined
578+
// commits.timestamp). Flipping the probe to a plain `commit_timestamp
579+
// DESC` (NULLS FIRST) order, or dropping the fallback's ORDER BY, fails
580+
// this test.
576581
expect(byName.get('e1:f1')).toBeCloseTo(111 + 666, 6);
577582
expect(byName.get('e9:f9')).toBeCloseTo(333, 6);
578583
});
@@ -647,6 +652,8 @@ describe.skipIf(!dockerAvailable())(
647652
},
648653
);
649654

655+
// Slug-decode rejection short-circuits to a 400 before any DB call, so this
656+
// runs without Docker, matching the chart route's input-validation contract.
650657
describe('GET /api/group/[slug] input validation (no DB)', () => {
651658
it('returns 400 for a structurally malformed slug', async () => {
652659
const slug = 'not-a-slug';

benchmarks-website/web/lib/summary.ts

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,13 @@
1010
* gated on a v2 dataset allowlist via [`queryGroupHasV2Summary`].
1111
*
1212
* Behaviour-preservation notes (substrate migration, DuckDB -> Postgres):
13-
* - `IS NOT DISTINCT FROM` gives `NULL == NULL` equality on the nullable
14-
* `dataset_variant` / `scale_factor` dims, exactly as the DuckDB query did.
13+
* - Nullable-dim equality (`dataset_variant` / `scale_factor`) in the
14+
* query-group summary is rendered as `col IS NULL` / `col = $n` per the
15+
* concrete key's build-time value, the index-sargable form of the DuckDB
16+
* `NULL == NULL` semantics (the same rule as `sargableDimEq` in
17+
* `queries.ts`; PR-5.1.5). The compression summaries' self-joins below are
18+
* the only remaining `IS NOT DISTINCT FROM` users -- there the dim is a
19+
* join column, not a build-time constant, and the tables are small.
1520
* - value columns are read `::float8` so node-postgres returns a JS `number`
1621
* matching the Rust `CAST(... AS DOUBLE)`, rather than the bigint-as-string
1722
* default.
@@ -355,7 +360,11 @@ async function collectQuerySummary(
355360
// "Latest per series" is a recursive-CTE skip scan (loose index scan) over the
356361
// covering index `idx_query_measurements_summary` (dataset, dataset_variant,
357362
// scale_factor, storage, query_idx, engine, format, commit_timestamp DESC)
358-
// INCLUDE (value_ns) from migrations 006/007 (PR-5.1.5 fix c). The previous
363+
// INCLUDE (value_ns) from migrations 006/007 (PR-5.1.5 fix c). Those
364+
// migrations' in-file rationale describes the interim `DISTINCT ON` consumer
365+
// and predates the cold-render decision that shipped this skip scan; the
366+
// files are frozen post-apply, so this comment (and migrations/README.md) is
367+
// the current record. The previous
359368
// `DISTINCT ON (query_idx, engine, format) ... ORDER BY commit_timestamp DESC
360369
// NULLS LAST` form scanned the group's entire history (~1.8M index entries,
361370
// ~2.4s warm for tpcds at the prod seed); the skip scan instead does one
@@ -398,17 +407,18 @@ async function collectQuerySummary(
398407
// index is `commit_timestamp DESC`, i.e. NULLS FIRST, so that order is not
399408
// index-provided. The per-series latest probe therefore takes the newest
400409
// `commit_timestamp IS NOT NULL` row first (index-ordered descent past the
401-
// NULL block, ordinal 1) and falls back to an arbitrary NULL-timestamp row
402-
// (ordinal 2) only when the series has no timestamped rows, which is
403-
// exactly the NULLS LAST semantics; the two branches are selected via the
404-
// same `ORDER BY br LIMIT 1` guarantee as the successor probe. (Which row
405-
// wins among all-NULL ties is unspecified, as it already was under
406-
// `DISTINCT ON`. One deliberate divergence from the replaced query: the
407-
// old form's `JOIN commits` silently excluded an orphan row whose
408-
// commit_sha has no commits row, while the skip scan never joins commits,
409-
// so such an orphan -- impossible unless a writer violates the
410-
// commits-upsert-first invariant -- would now surface via the NULL
411-
// fallback instead of vanishing. Fail-visible is preferred.)
410+
// NULL block, ordinal 1) and falls back to the NULL-timestamp rows
411+
// (ordinal 2) only when the series has no timestamped rows; the two
412+
// branches are selected via the same `ORDER BY br LIMIT 1` guarantee as
413+
// the successor probe. The fallback branch joins `commits` and orders by
414+
// `c.timestamp DESC NULLS LAST` so an all-unstamped series (one ingested
415+
// entirely inside the transient window) still returns its NEWEST value,
416+
// matching the replaced join-ordered query, rather than an arbitrary row.
417+
// The join is LEFT so an orphan row whose commit_sha has no commits row
418+
// -- impossible unless a writer violates the commits-upsert-first
419+
// invariant -- still surfaces (last) instead of vanishing as it did under
420+
// the old INNER JOIN; fail-visible is preferred. The fallback's join cost
421+
// is irrelevant: it executes only for series with zero stamped rows.
412422
//
413423
// The `value_ns > 0` filter rides inside every probe: it is read from the
414424
// index leaf (INCLUDE), so the enumeration lands directly on series that have
@@ -491,12 +501,14 @@ async function collectQuerySummary(
491501
UNION ALL
492502
(SELECT 2 AS br, q.value_ns::float8 AS value_ns
493503
FROM query_measurements q
504+
LEFT JOIN commits c ON c.commit_sha = q.commit_sha
494505
WHERE ${groupPred}
495506
AND q.query_idx = s.query_idx
496507
AND q.engine = s.engine
497508
AND q.format = s.format
498509
AND q.value_ns > 0
499510
AND q.commit_timestamp IS NULL
511+
ORDER BY c.timestamp DESC NULLS LAST
500512
LIMIT 1)
501513
ORDER BY br
502514
LIMIT 1

migrations/README.md

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -54,29 +54,65 @@ lexicographic order, which equals numeric order under this convention.
5454
`rds_iam` membership: the read service authenticates with a static password
5555
(Vercel has no AWS credentials to mint IAM tokens), and on RDS `rds_iam`
5656
membership forces IAM-only auth. Added during the Phase-4 operator-gate work.
57+
- `006_read_path_perf.sql` — denormalizes `commits.timestamp` onto
58+
`query_measurements.commit_timestamp` (with a one-time backfill) and adds the
59+
read-path-perf indexes (PR-5.1.5). Applied by the RDS master (it ALTERs a
60+
master-owned table), hence its `requires-superuser` marker.
61+
- `007_summary_covering_index.sql` — rebuilds the summary index with
62+
`INCLUDE (value_ns)` so the latest-per-series read is index-only (PR-5.1.5).
63+
Same `requires-superuser` marker as 006. Note: 006/007's in-file rationale
64+
describes the `DISTINCT ON` read path that shipped first; the read service
65+
later moved to recursive-CTE skip scans (the PR-5.1.5 cold-render fix) that
66+
use the same indexes, and the files are frozen post-apply (see Authoring
67+
rules), so their prose is historical. The current consumer is documented in
68+
`benchmarks-website/web/lib/summary.ts` / `queries.ts`.
5769

5870
This README + the runner ship in PR-1.2; `001`/`002` land in PR-1.3, `003` in
59-
PR-1.4, `004` in PR-2.1, and `005` in the Phase-4 operator-gate work (the v4
60-
read-service identity).
71+
PR-1.4, `004` in PR-2.1, `005` in the Phase-4 operator-gate work (the v4
72+
read-service identity), and `006`/`007` in PR-5.1.5 (read-path perf).
6173

62-
## Bootstrap ordering — `requires-superuser` migrations (002 / 004 / 005)
74+
## Re-stamping `commit_timestamp` (operational note)
6375

64-
Migrations `002_iam_db_user.sql`, `004_ingest_role.sql`, and `005_read_role.sql`
65-
carry a `-- migrate-schema: requires-superuser` marker comment. Before applying a marked
66-
file, the runner ([`scripts/migrate-schema.py`](../scripts/migrate-schema.py))
67-
asserts the connected role is **master-capable** — the capability proxy is
68-
`rolsuper OR rolcreaterole` (a true superuser locally, or the RDS master, which
69-
has `CREATEROLE`). If the connected role has neither, the runner raises
76+
`query_measurements.commit_timestamp` is a denormalized copy of
77+
`commits.timestamp` (006). Writers stamp it at insert; 006 backfilled
78+
pre-existing rows. If rows are ever observed unstamped or drifted (a writer
79+
predating the stamp, or a commits re-ingest that changed a timestamp), the
80+
repair is the drift-tolerant form of the backfill, run as any role with UPDATE
81+
on the table:
82+
83+
```sql
84+
UPDATE query_measurements q
85+
SET commit_timestamp = c.timestamp
86+
FROM commits c
87+
WHERE c.commit_sha = q.commit_sha
88+
AND q.commit_timestamp IS DISTINCT FROM c.timestamp;
89+
```
90+
91+
(`IS DISTINCT FROM` also fills NULLs, so this strictly supersedes the
92+
NULL-only 006 form. Follow a large repair with `VACUUM ANALYZE
93+
query_measurements` — the 006 prod backfill showed the planner needs it.)
94+
95+
## Bootstrap ordering — `requires-superuser` migrations
96+
97+
Migrations whose header carries a `-- migrate-schema: requires-superuser`
98+
marker comment (currently `002`, `004`, `005`, `006`, and `007`; the marker in
99+
the file is authoritative, not this list) must be applied by a master-capable
100+
role. Before applying a marked file, the runner
101+
([`scripts/migrate-schema.py`](../scripts/migrate-schema.py)) asserts the
102+
connected role is **master-capable** — the capability proxy is `rolsuper OR
103+
rolcreaterole` (a true superuser locally, or the RDS master, which has
104+
`CREATEROLE`). If the connected role has neither, the runner raises
70105
`PermissionError` and refuses to apply the file, rather than failing partway
71-
through its privileged `CREATE ROLE` / `GRANT` / `ALTER DEFAULT PRIVILEGES`
72-
statements.
73-
74-
The ordering contract this enforces: **apply the bootstrap migrations (002/004/005)
75-
as the RDS master before any `migrator`-role deploy.** The `migrator` IAM user
76-
that `schema-deploy.yml` assumes into is itself created by `002` and is not
77-
master-capable, so it cannot apply `002`/`004`/`005`; those must land first under the
78-
master. `001` (plain DDL) and `003` (a ledger grant) carry no marker and apply
79-
under either role. A future migration that needs a master-capable role (another
80-
`CREATE ROLE`, `ALTER DEFAULT PRIVILEGES`, or other superuser-only DDL) should
81-
carry `-- migrate-schema: requires-superuser` on a comment line so the same
106+
through its privileged `CREATE ROLE` / `GRANT` / `ALTER DEFAULT PRIVILEGES` /
107+
master-owned-table DDL statements.
108+
109+
The ordering contract this enforces: **apply every marked migration as the RDS
110+
master before any `migrator`-role deploy.** The `migrator` IAM user that
111+
`schema-deploy.yml` assumes into is itself created by `002` and is not
112+
master-capable, so it cannot apply the marked files; those must land first
113+
under the master. `001` (plain DDL) and `003` (a ledger grant) carry no marker
114+
and apply under either role. A future migration that needs a master-capable
115+
role (another `CREATE ROLE`, `ALTER DEFAULT PRIVILEGES`, DDL on master-owned
116+
tables, or other superuser-only DDL) should carry
117+
`-- migrate-schema: requires-superuser` on a comment line so the same
82118
preflight guards it.

scripts/migrate-schema.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@
6060

6161
# A migration may require privileges a least-privilege deploy role (the
6262
# `migrator` IAM role) does not hold: creating login roles, self-granting role
63-
# membership, or `ALTER DEFAULT PRIVILEGES FOR ROLE`. The bootstrap migrations
64-
# `002_iam_db_user.sql`, `004_ingest_role.sql`, and `005_read_role.sql` are exactly
65-
# this class and MUST be applied by a master-capable role (the RDS master, or a
66-
# superuser locally) in the one-time bootstrap, BEFORE any `migrator`-connected
67-
# deploy. That ordering is
63+
# membership, `ALTER DEFAULT PRIVILEGES FOR ROLE`, or DDL on master-owned
64+
# tables. The role-bootstrap migrations (002/004/005) and the master-owned-table
65+
# migrations (006/007) are exactly this class and MUST be applied by a
66+
# master-capable role (the RDS master, or a superuser locally) BEFORE any
67+
# `migrator`-connected deploy reaches them. That ordering is
6868
# documented in each migration's header but was previously enforced nowhere: a
6969
# `migrator`-connected `apply` reaching `004` would fail deep inside a `DO` block
7070
# with an opaque `InsufficientPrivilege` and roll the migration back mid-statement.
@@ -113,10 +113,11 @@ def _role_is_master_capable(conn: psycopg.Connection) -> bool:
113113
def _assert_master_capable(conn: psycopg.Connection, filename: str) -> None:
114114
"""Raise `PermissionError` if the connected role cannot apply a marked migration.
115115
116-
Enforces the documented bootstrap ordering (the marked `requires-superuser`
117-
migrations -- currently `002`/`004`/`005` -- are applied by the master before
118-
any `migrator` run) with a clear, actionable message instead of
119-
an opaque mid-`DO`-block `InsufficientPrivilege` rollback.
116+
Enforces the documented bootstrap ordering (every marked `requires-superuser`
117+
migration is applied by the master before any `migrator` run; the marker in
118+
each file is authoritative -- see migrations/README.md) with a clear,
119+
actionable message instead of an opaque mid-`DO`-block
120+
`InsufficientPrivilege` rollback.
120121
"""
121122
if _role_is_master_capable(conn):
122123
return
@@ -126,9 +127,9 @@ def _assert_master_capable(conn: psycopg.Connection, filename: str) -> None:
126127
f"migration {filename} is marked `{_REQUIRES_SUPERUSER_DIRECTIVE}` and must be "
127128
f"applied by a master-capable role (a superuser, or the RDS master with "
128129
f"CREATEROLE), but the connected role `{current_user}` has neither rolsuper nor "
129-
f"rolcreaterole. Apply all marked `requires-superuser` bootstrap migrations "
130-
f"(currently 002/004/005) as the RDS master before any migrator deploy; see "
131-
f"migrations/README.md and the migration header."
130+
f"rolcreaterole. Apply all marked `requires-superuser` migrations as the RDS "
131+
f"master before any migrator deploy; see migrations/README.md and the "
132+
f"migration header."
132133
)
133134

134135

@@ -221,7 +222,7 @@ def apply(conn: psycopg.Connection, migrations_dir: Path) -> int:
221222
"DDL."
222223
)
223224
# Bootstrap ordering guard: a migration marked `requires-superuser`
224-
# (002/004/005) must be applied by a master-capable role. Check BEFORE
225+
# must be applied by a master-capable role. Check BEFORE
225226
# opening the migration's transaction so a least-privilege `migrator`
226227
# connection fails loud + early with no partial DDL, rather than rolling
227228
# back mid-`DO`-block with an opaque InsufficientPrivilege.

scripts/test_migrate_schema.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,9 @@ def test_real_migrations_index_columns(conn: psycopg.Connection) -> None:
832832
)
833833

834834

835-
def test_006_backfills_preexisting_query_measurements(conn, tmp_path: Path) -> None:
835+
def test_006_backfills_preexisting_query_measurements(
836+
conn: psycopg.Connection, tmp_path: Path
837+
) -> None:
836838
"""006's one-time backfill UPDATE fills `commit_timestamp` on PRE-EXISTING rows.
837839
838840
This is the statement that stamped the 4.85M prod rows at apply time, and it is

scripts/test_post_ingest_postgres.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,9 @@ def test_query_measurement_insert_populates_commit_timestamp(schema_conn: psycop
330330
# CONFLICT DO UPDATE SET list owns (dim columns deliberately excluded). Mirrors
331331
# the SET lists in benchmarks-website/server/src/ingest.rs; a stale/incorrect SET
332332
# list in any table is caught by the parametrized update test below.
333+
# `commit_timestamp` is also in the query_measurements SET list but is DERIVED
334+
# from `commits` (not a record field), so it is pinned by
335+
# `test_query_measurement_insert_populates_commit_timestamp` instead of here.
333336
_UPDATE_VALUE_MUTATIONS = {
334337
"query_measurement": {
335338
"value_ns": 4242,

0 commit comments

Comments
 (0)