fix: gauntlet cycle 1 must-fixes for PR-5.1.5 (skip-scan hardening + coverage)

connortsui20 · connortsui20 · commit 572b3bdf6ee9 · 2026-06-11T15:34:11.000-04:00
Applies the four must-fix and three cheap should-fix findings from the PR-5.1.5
sub-phase gauntlet (preset pr-2, parallel executors):

- Branch-ordinal hardening: every skip-scan successor/latest probe now tags its
  UNION ALL arms with a constant ordinal and selects via ORDER BY br LIMIT 1,
  so the choice is SQL-guaranteed instead of relying on Append's undocumented
  syntactic arm order. Verified equivalent + still index-descent-shaped on the
  2.1M-row prototype container (21ms -&gt; 31ms).
- NULLS-LAST fidelity tests: a stamped row beats a newer NULL-stamped row, and
  an all-NULL-stamped series still surfaces via the fallback arm.
- Summary successor-branch tests: multi-format engine, second engine, and
  second query_idx all survive enumeration.
- Discovery-parity test: collectQueryGroups equals the replaced GROUP BY oracle
  over a fixture spanning two datasets, every NULL/non-NULL variant and scale
  combination, and two storages.
- Migration 006's backfill UPDATE is now exercised against pre-existing rows
  (apply 001, seed, apply the rest, assert stamped + no drift).
- The index-shape test parses INCLUDE payloads instead of silently swallowing
  them, pins INCLUDE (value_ns) and commit_timestamp DESC on the summary index.
- Docs: collectGroups docblock re-attached, README pool default 4 -&gt; 8, e2e
  schema init now includes 007 with an accurate comment, orphan-row divergence
  documented in summary.ts, stale DISTINCT ON comment reworded.

Deferred (per the synthesizer's should-fix/nit triage): deriving
SUMMARY_CONCURRENCY from the pool config, sharing sargableDimEq with
summary.ts.

Signed-off-by: "Connor Tsui" &lt;connor@spiraldb.com&gt;
diff --git a/benchmarks-website/migrate/tests/postgres_e2e.rs b/benchmarks-website/migrate/tests/postgres_e2e.rs
@@ -31,14 +31,18 @@ use vortex_bench_migrate::verify::run_postgres_value_verify;
 use vortex_bench_server::family;
 use vortex_bench_server::schema::COMMITS_DDL;
 
-/// The authoritative Postgres schema, applied to the container at init: the base
-/// DDL plus the 006 read-path migration, whose denormalized
-/// `query_measurements.commit_timestamp` column the loader's post-COPY
-/// denormalization UPDATE requires (matching the prod target, where every
-/// migration is applied before a load).
+/// The Postgres schema applied to the container at init: the schema-shape
+/// migrations the loader touches -- the 001 base DDL, the 006 read-path
+/// migration (whose denormalized `query_measurements.commit_timestamp` column
+/// the loader's post-COPY denormalization UPDATE requires), and the 007
+/// covering-index swap (index-only, but kept so the container's index set
+/// matches prod's). The 002-005 role/grant migrations are deliberately
+/// omitted: they configure RDS auth, which a throwaway container neither has
+/// nor needs.
 const SCHEMA_SQL: &str = concat!(
     include_str!("../../../migrations/001_initial_schema.sql"),
     include_str!("../../../migrations/006_read_path_perf.sql"),
+    include_str!("../../../migrations/007_summary_covering_index.sql"),
 );
 
 /// Per-table row counts the fixture loads. Drives the count assertions.
diff --git a/benchmarks-website/web/README.md b/benchmarks-website/web/README.md
@@ -35,7 +35,7 @@ Connection config is read by `lib/db.ts`:
 | `BENCH_DB_REGION` | for IAM | AWS region for the RDS IAM signer; required when no password is set. IAM token signing also needs AWS credentials in the runtime environment. |
 | `BENCH_DB_SSL` | no (`verify-full`) | `verify-full` validates the certificate chain and hostname; `disable` is for local non-TLS containers only. Any other value fails loudly. |
 | `BENCH_DB_CA` | prod | PEM contents of the Amazon RDS CA bundle; Node's trust store does not include the RDS roots, so `verify-full` against RDS fails without it. |
-| `BENCH_DB_POOL_MAX` | no (4) | Max pool connections per serverless instance. |
+| `BENCH_DB_POOL_MAX` | no (8) | Max pool connections per serverless instance; the per-render summary fan-out (`SUMMARY_CONCURRENCY`) is sized to this default. |
 
 ## CDN caching
 
diff --git a/benchmarks-website/web/lib/groups.test.ts b/benchmarks-website/web/lib/groups.test.ts
@@ -464,6 +464,189 @@ describe.skipIf(!dockerAvailable())('summary math fidelity (testcontainers Postg
 
 // Slug-decode rejection short-circuits to a 400 before any DB call, so this
 // runs without Docker, matching the chart route's input-validation contract.
+// PR-5.1.5 read-path skip-scan fidelity. The canonical fixture above cannot
+// exercise these paths: it seeds a single query group with one format per
+// engine and stamps every commit_timestamp, so the summary skip scan's
+// format-successor branch, the NULLS-LAST latest emulation (a transient
+// NULL-stamped row must not beat an older stamped row; an all-NULL series must
+// still appear via the fallback arm), and most of the discovery successor's
+// NULL-partition branches never execute. This block seeds exactly those shapes.
+describe.skipIf(!dockerAvailable())(
+  'read-path skip-scan fidelity (testcontainers Postgres)',
+  () => {
+    let container: StartedPostgreSqlContainer;
+
+    const COMMIT_A = 'a'.repeat(40);
+    const COMMIT_B = 'b'.repeat(40);
+
+    beforeAll(async () => {
+      container = await startBenchContainer();
+      const pool = getPool();
+      // COMMIT_B is NEWER than COMMIT_A: the NULLS-LAST probe must still prefer
+      // COMMIT_A's stamped row over COMMIT_B's unstamped one.
+      await pool.query(
+        `INSERT INTO commits (commit_sha, timestamp, message, tree_sha, url) VALUES
+         ($1, '2026-05-01T12:00:00Z', 'older', $3, $4),
+         ($2, '2026-05-02T12:00:00Z', 'newer', $3, $5)`,
+        [COMMIT_A, COMMIT_B, TREE_SHA, commitUrl(COMMIT_A), commitUrl(COMMIT_B)],
+      );
+      // One v2-allowlisted query group (tpch / NULL / '1' / nvme) shaped to fire
+      // every summary successor branch: e1 has TWO formats (format successor),
+      // e2 is a second engine (engine successor), q2 exists for e1:f1 (query_idx
+      // successor). e1:f1 additionally carries a NEWER NULL-stamped row that must
+      // lose to the stamped 111, and e9:f9 is an all-NULL-stamped series that
+      // must surface through the fallback arm.
+      const rows: ReadonlyArray<
+        readonly [number, string, number, string, string, number, boolean]
+      > = [
+        // [measurement_id, sha, query_idx, engine, format, value_ns, stamped]
+        [1, COMMIT_A, 1, 'e1', 'f1', 111, true],
+        [2, COMMIT_B, 1, 'e1', 'f1', 222, false],
+        [3, COMMIT_A, 1, 'e1', 'f2', 444, true],
+        [4, COMMIT_A, 1, 'e2', 'f1', 555, true],
+        [5, COMMIT_A, 2, 'e1', 'f1', 666, true],
+        [6, COMMIT_B, 1, 'e9', 'f9', 333, false],
+      ];
+      for (const [mid, sha, queryIdx, engine, format, valueNs, stamped] of rows) {
+        await pool.query(
+          `INSERT INTO query_measurements
+           (measurement_id, commit_sha, dataset, dataset_variant, scale_factor,
+            query_idx, storage, engine, format, value_ns, all_runtimes_ns,
+            commit_timestamp)
+         VALUES ($1, $2, 'tpch', NULL, '1', $3, 'nvme', $4, $5, $6, '{1}'::bigint[],
+                 CASE WHEN $7 THEN (SELECT timestamp FROM commits WHERE commit_sha = $2)
+                      ELSE NULL END)`,
+          [mid, sha, queryIdx, engine, format, valueNs, stamped],
+        );
+      }
+      // Discovery fan-out: every NULL/non-NULL combination of the two nullable
+      // dimensions across two storages and two query indices, plus a second
+      // sparse dataset, so all 15 successor branches and both NULL-partition
+      // steps execute against the GROUP BY oracle below.
+      let mid = 100;
+      for (const variant of ['v1', 'v2', null]) {
+        for (const scale of ['1', '10', null]) {
+          for (const storage of ['nvme', 's3']) {
+            for (const queryIdx of [1, 2]) {
+              mid += 1;
+              await pool.query(
+                `INSERT INTO query_measurements
+                 (measurement_id, commit_sha, dataset, dataset_variant, scale_factor,
+                  query_idx, storage, engine, format, value_ns, all_runtimes_ns)
+               VALUES ($1, $2, 'alpha', $3, $4, $5, $6, 'e1', 'f1', 1, '{1}'::bigint[])`,
+                [mid, COMMIT_A, variant, scale, queryIdx, storage],
+              );
+            }
+          }
+        }
+      }
+      await pool.query(
+        `INSERT INTO query_measurements
+         (measurement_id, commit_sha, dataset, dataset_variant, scale_factor,
+          query_idx, storage, engine, format, value_ns, all_runtimes_ns)
+       VALUES (200, $1, 'beta', NULL, NULL, 7, 's3', 'e1', 'f1', 1, '{1}'::bigint[])`,
+        [COMMIT_A],
+      );
+    });
+
+    afterAll(async () => {
+      await resetPool();
+      await container.stop();
+    });
+
+    it('prefers a stamped row over a newer NULL-stamped row and keeps all-NULL series', async () => {
+      const summary = await collectGroupSummary(
+        {
+          k: 'QueryGroup',
+          dataset: 'tpch',
+          dataset_variant: null,
+          scale_factor: '1',
+          storage: 'nvme',
+        },
+        [],
+      );
+      if (summary === null || summary.type !== 'queryBenchmark') {
+        throw new Error(`expected queryBenchmark summary, got ${summary?.type}`);
+      }
+      const byName = new Map(summary.rankings.map((r) => [r.name, r.totalRuntime]));
+      // e1:f1's latest for Q1 is the STAMPED 111, not the newer NULL-stamped 222
+      // (plus its Q2 value 666); e9:f9 has only NULL-stamped rows and must still
+      // appear via the fallback arm. Flipping the probe to a plain
+      // `commit_timestamp DESC` (NULLS FIRST) order fails this test.
+      expect(byName.get('e1:f1')).toBeCloseTo(111 + 666, 6);
+      expect(byName.get('e9:f9')).toBeCloseTo(333, 6);
+    });
+
+    it('enumerates the format, engine, and query_idx successor branches', async () => {
+      const summary = await collectGroupSummary(
+        {
+          k: 'QueryGroup',
+          dataset: 'tpch',
+          dataset_variant: null,
+          scale_factor: '1',
+          storage: 'nvme',
+        },
+        [],
+      );
+      if (summary === null || summary.type !== 'queryBenchmark') {
+        throw new Error(`expected queryBenchmark summary, got ${summary?.type}`);
+      }
+      const byName = new Map(summary.rankings.map((r) => [r.name, r.totalRuntime]));
+      // Four distinct series: e1's second format (format successor), e2 (engine
+      // successor), and e1:f1's q2 row (query_idx successor) all survive.
+      expect([...byName.keys()].sort()).toEqual(['e1:f1', 'e1:f2', 'e2:f1', 'e9:f9']);
+      expect(byName.get('e1:f2')).toBeCloseTo(444, 6);
+      expect(byName.get('e2:f1')).toBeCloseTo(555, 6);
+    });
+
+    it('discovery skip scan matches the GROUP BY oracle across NULL partitions', async () => {
+      // The replaced GROUP BY is the oracle: identical tuples, identical
+      // NULLS FIRST presentation order, computed over the same seeded data.
+      const oracle = await getPool().query<{
+        dataset: string;
+        dataset_variant: string | null;
+        scale_factor: string | null;
+        storage: string;
+        query_idx: number;
+      }>(
+        `SELECT dataset, dataset_variant, scale_factor, storage, query_idx
+         FROM query_measurements
+        GROUP BY dataset, dataset_variant, scale_factor, storage, query_idx
+        ORDER BY dataset, dataset_variant NULLS FIRST,
+                 scale_factor NULLS FIRST, storage, query_idx`,
+      );
+      const expected = new Map<string, string[]>();
+      for (const row of oracle.rows) {
+        const groupSlug = groupKeyToSlug({
+          k: 'QueryGroup',
+          dataset: row.dataset,
+          dataset_variant: row.dataset_variant,
+          scale_factor: row.scale_factor,
+          storage: row.storage,
+        });
+        const chartSlug = chartKeyToSlug({
+          k: 'QueryMeasurement',
+          dataset: row.dataset,
+          dataset_variant: row.dataset_variant,
+          scale_factor: row.scale_factor,
+          storage: row.storage,
+          query_idx: row.query_idx,
+        });
+        const charts = expected.get(groupSlug) ?? [];
+        charts.push(chartSlug);
+        expected.set(groupSlug, charts);
+      }
+      const groups = await collectGroups();
+      const actual = new Map<string, string[]>(
+        groups
+          .filter((g) => groupKeyFromSlug(g.slug).k === 'QueryGroup')
+          .map((g) => [g.slug, g.charts.map((c) => c.slug)]),
+      );
+      expect(actual).toEqual(expected);
+    });
+  },
+);
+
 describe('GET /api/group/[slug] input validation (no DB)', () => {
   it('returns 400 for a structurally malformed slug', async () => {
     const slug = 'not-a-slug';
diff --git a/benchmarks-website/web/lib/queries.ts b/benchmarks-website/web/lib/queries.ts
@@ -770,10 +770,16 @@ const DISCOVERY_COLS = 'q.dataset, q.dataset_variant, q.scale_factor, q.storage,
  * query_idx)` tuple in index order (ASC, NULLS LAST on the two nullable
  * columns). A single row comparison cannot express this (it would not be a
  * btree index qual past column 1, and NULL components poison it), so the
- * successor is a `UNION ALL` of mutually-ordered single-inequality branches,
- * evaluated lazily under the caller's `LIMIT 1` (Append stops at the first
- * row): deepest level first (next query_idx within the same group), then next
- * storage, scale_factor, dataset_variant, dataset.
+ * successor is a `UNION ALL` of single-inequality branches that partition the
+ * tuples greater than `s` -- deepest level first (next query_idx within the
+ * same group), then next storage, scale_factor, dataset_variant, dataset.
+ * Every qualifying row satisfies exactly one branch and all of branch N's rows
+ * precede branch N+1's in tuple order, so the successor is the row from the
+ * lowest-numbered non-empty branch: each branch carries a constant `br`
+ * ordinal and the caller selects via `ORDER BY br LIMIT 1`, a SQL-guaranteed
+ * choice rather than a reliance on Append's (undocumented) syntactic arm
+ * order. See `collectQuerySummary` for the same construction and its cost
+ * note.
  *
  * The nullable levels (scale_factor, dataset_variant) follow NULLS LAST order
  * with two branches each: `col > s.col` walks the non-NULL values (vacuously
@@ -827,7 +833,7 @@ function discoverySuccessorSql(): string {
   branches.push('q.dataset > s.dataset');
   return branches
     .map(
-      (branch) => `(SELECT ${DISCOVERY_COLS}
+      (branch, i) => `(SELECT ${i + 1} AS br, ${DISCOVERY_COLS}
              FROM query_measurements q
             WHERE ${branch}
             ORDER BY ${DISCOVERY_COLS}
@@ -862,6 +868,7 @@ async function collectQueryGroups(): Promise<Group[]> {
         FROM tuples s
         CROSS JOIN LATERAL (
           ${discoverySuccessorSql()}
+          ORDER BY br
           LIMIT 1
         ) nxt
     )
@@ -1042,12 +1049,6 @@ function discoverGroups(groupKind: GroupKind): Promise<Group[]> {
   }
 }
 
-/**
- * Collect every group + chart link derivable from the data, the shared
- * implementation behind `GET /api/groups`. Iterates the [`FAMILIES`] registry
- * in order, attaches each group's summary + description, then applies the
- * canonical [`GROUP_ORDER`].
- */
 /**
  * Bound on how many per-group summary queries run concurrently in
  * [`collectGroups`] (PR-5.1.5 fix e). Kept at the `BENCH_DB_POOL_MAX` default so
@@ -1086,8 +1087,14 @@ async function mapWithConcurrency<T, R>(
   return results;
 }
 
+/**
+ * Collect every group + chart link derivable from the data, the shared
+ * implementation behind `GET /api/groups`. Iterates the [`FAMILIES`] registry
+ * in order, attaches each group's summary + description, then applies the
+ * canonical [`GROUP_ORDER`].
+ */
 export async function collectGroups(): Promise<Group[]> {
-  // Discover families in parallel — each scans a different fact table, so they
+  // Discover families in parallel -- each scans a different fact table, so they
   // overlap rather than running one after another (PR-5.1.5 fix e). Concat
   // preserves the FAMILIES registry order the final sort expects.
   const perFamily = await Promise.all(FAMILIES.map((family) => discoverGroups(family.groupKind)));
diff --git a/benchmarks-website/web/lib/summary.ts b/benchmarks-website/web/lib/summary.ts
@@ -371,10 +371,17 @@ async function collectQuerySummary(
   //    are index columns 5-7 (the planner degrades the row form to a filter over
   //    a full scan). It is instead three single-column-inequality branches (next
   //    format within the series' query_idx+engine, then next engine within its
-  //    query_idx, then next query_idx), each fully index-sargable. `UNION ALL`
-  //    under `LIMIT 1` evaluates the branches lazily in order (Append stops at
-  //    the first row), and branch order equals tuple-successor order, so the
-  //    first non-empty branch IS the successor.
+  //    query_idx, then next query_idx), each fully index-sargable. The branches
+  //    partition the tuples greater than the current series (every qualifying
+  //    row satisfies exactly one branch, and all of branch N's rows precede all
+  //    of branch N+1's rows in tuple order), so the successor is the row from
+  //    the lowest-numbered non-empty branch. Each branch carries a constant
+  //    `br` ordinal and the union is selected via `ORDER BY br LIMIT 1`, which
+  //    makes that choice a SQL-guaranteed ordering rather than a reliance on
+  //    Append evaluating `UNION ALL` arms in syntactic order (which Postgres
+  //    does today but does not document). The ordinal sort costs at most one
+  //    extra single-row descent per branch per step (~1.5x measured), not the
+  //    full-scan cliff the skip scan exists to avoid.
   //
   //  - Every probe's ORDER BY spells out the full index prefix (dataset,
   //    dataset_variant, scale_factor, storage, ...) even though those columns
@@ -391,10 +398,17 @@ async function collectQuerySummary(
   //    index is `commit_timestamp DESC`, i.e. NULLS FIRST, so that order is not
   //    index-provided. The per-series latest probe therefore takes the newest
   //    `commit_timestamp IS NOT NULL` row first (index-ordered descent past the
-  //    NULL block) and falls back to an arbitrary NULL-timestamp row only when
-  //    the series has no timestamped rows, which is exactly the NULLS LAST
-  //    semantics. (Which row wins among all-NULL ties is unspecified, as it
-  //    already was under `DISTINCT ON`.)
+  //    NULL block, ordinal 1) and falls back to an arbitrary NULL-timestamp row
+  //    (ordinal 2) only when the series has no timestamped rows, which is
+  //    exactly the NULLS LAST semantics; the two branches are selected via the
+  //    same `ORDER BY br LIMIT 1` guarantee as the successor probe. (Which row
+  //    wins among all-NULL ties is unspecified, as it already was under
+  //    `DISTINCT ON`. One deliberate divergence from the replaced query: the
+  //    old form's `JOIN commits` silently excluded an orphan row whose
+  //    commit_sha has no commits row, while the skip scan never joins commits,
+  //    so such an orphan -- impossible unless a writer violates the
+  //    commits-upsert-first invariant -- would now surface via the NULL
+  //    fallback instead of vanishing. Fail-visible is preferred.)
   //
   // The `value_ns > 0` filter rides inside every probe: it is read from the
   // index leaf (INCLUDE), so the enumeration lands directly on series that have
@@ -429,7 +443,7 @@ async function collectQuerySummary(
       SELECT nxt.query_idx, nxt.engine, nxt.format
         FROM series s
         CROSS JOIN LATERAL (
-          (SELECT q.query_idx, q.engine, q.format
+          (SELECT 1 AS br, q.query_idx, q.engine, q.format
              FROM query_measurements q
             WHERE ${groupPred}
               AND q.query_idx = s.query_idx
@@ -439,7 +453,7 @@ async function collectQuerySummary(
             ORDER BY ${indexOrder}
             LIMIT 1)
           UNION ALL
-          (SELECT q.query_idx, q.engine, q.format
+          (SELECT 2 AS br, q.query_idx, q.engine, q.format
              FROM query_measurements q
             WHERE ${groupPred}
               AND q.query_idx = s.query_idx
@@ -448,13 +462,14 @@ async function collectQuerySummary(
             ORDER BY ${indexOrder}
             LIMIT 1)
           UNION ALL
-          (SELECT q.query_idx, q.engine, q.format
+          (SELECT 3 AS br, q.query_idx, q.engine, q.format
              FROM query_measurements q
             WHERE ${groupPred}
               AND q.query_idx > s.query_idx
               AND q.value_ns > 0
             ORDER BY ${indexOrder}
             LIMIT 1)
+          ORDER BY br
           LIMIT 1
         ) nxt
     )
@@ -463,7 +478,7 @@ async function collectQuerySummary(
            latest.value_ns AS value_ns
       FROM series s
       CROSS JOIN LATERAL (
-        (SELECT q.value_ns::float8 AS value_ns
+        (SELECT 1 AS br, q.value_ns::float8 AS value_ns
            FROM query_measurements q
           WHERE ${groupPred}
             AND q.query_idx = s.query_idx
@@ -474,7 +489,7 @@ async function collectQuerySummary(
           ORDER BY ${indexOrder}, q.commit_timestamp DESC
           LIMIT 1)
         UNION ALL
-        (SELECT q.value_ns::float8 AS value_ns
+        (SELECT 2 AS br, q.value_ns::float8 AS value_ns
            FROM query_measurements q
           WHERE ${groupPred}
             AND q.query_idx = s.query_idx
@@ -483,6 +498,7 @@ async function collectQuerySummary(
             AND q.value_ns > 0
             AND q.commit_timestamp IS NULL
           LIMIT 1)
+        ORDER BY br
         LIMIT 1
       ) latest
      ORDER BY s.query_idx, s.engine || ':' || s.format
diff --git a/scripts/test_migrate_schema.py b/scripts/test_migrate_schema.py