migrate+web: covering summary index + DISTINCT ON for index-only latest-per-series (PR-5.1.5 fix c)

connortsui20 · connortsui20 · commit a4834ba1fc94 · 2026-06-11T13:54:37.000-04:00
Measuring 006s idx_query_measurements_summary on prod showed the planner would not use it: value_ns (the &gt;0 filter + the projection) was off-index, so it meant a per-row heap fetch over a groups ~870K rows -- the planner fell back to bitmap+sort (~8.6s warm for tpcds). migration 007 replaces the index with the same key columns + INCLUDE (value_ns), and collectQuerySummary switches from a commits-join row_number() window to DISTINCT ON (query_idx, engine, format) ORDER BY commit_timestamp DESC. Measured on prod: Index Only Scan, Heap Fetches: 0, ~1.95s warm (down from 8.6s). 007 also drops the one-off idx_qm_summary_test created by hand to measure the design (no-op outside prod). Docker-verified: web vitest 211 + migrate pytest 24 (count 6-&gt;7) green; tsc/eslint/prettier/ruff clean.

Signed-off-by: "Connor Tsui" &lt;connor@spiraldb.com&gt;
diff --git a/benchmarks-website/web/lib/summary.ts b/benchmarks-website/web/lib/summary.ts
@@ -352,21 +352,23 @@ async function collectQuerySummary(
   // penalty model: each series scores the geomean of `(10 + value) / (10 +
   // best)` over every query, imputing a penalty where the series has no value.
   //
-  // "Latest" orders by the DENORMALIZED `commit_timestamp` (migration 006,
-  // PR-5.1.5 fix c) instead of joining `commits` and ordering by `c.timestamp`,
-  // so the whole window is backed by `idx_query_measurements_summary`
-  // (dataset, dataset_variant, scale_factor, storage, query_idx, engine, format,
-  // commit_timestamp DESC): the sargable group filter seeks the index prefix and
-  // the remaining (query_idx, engine, format, commit_timestamp DESC) order
-  // matches the PARTITION BY + ORDER BY, so the latest row per series is the
-  // first index entry per partition -- no `commits` join, no full-history scan,
-  // no work_mem window spill (was ~6.2s warm for tpcds at the prod seed).
-  // `NULLS LAST` keeps a transient NULL (a row inserted by a writer not yet
-  // populating `commit_timestamp`, before the post-deploy re-backfill) from
-  // winning "latest"; the timestamp value itself is the same `commits.timestamp`
-  // the join used, so the same-second-tie behavior (an accepted tradeoff) is
-  // unchanged. `$1` = dataset, `$2` = storage; variant/scale params append only
-  // when non-null.
+  // "Latest per series" is a `DISTINCT ON (query_idx, engine, format)` ordered by
+  // the DENORMALIZED `commit_timestamp DESC` (migrations 006/007, PR-5.1.5 fix c)
+  // instead of joining `commits` and `row_number()`-windowing the whole group.
+  // The covering index `idx_query_measurements_summary` (dataset, dataset_variant,
+  // scale_factor, storage, query_idx, engine, format, commit_timestamp DESC)
+  // INCLUDE (value_ns) makes this an Index Only Scan: the sargable group filter
+  // seeks the index prefix, the remaining (query_idx, engine, format,
+  // commit_timestamp DESC) order matches the DISTINCT ON + ORDER BY, and value_ns
+  // (both the `> 0` filter and the projection) is read from the index leaf so
+  // there is no heap fetch. ~1.95s warm for tpcds at the prod seed, vs ~6.2s for
+  // the original commits-join `row_number()` window (006's non-covering index was
+  // ignored because value_ns forced a per-row heap fetch -- see 007). `NULLS LAST`
+  // keeps a transient NULL (a row from a writer not yet populating
+  // `commit_timestamp`, before the post-deploy re-backfill) from winning "latest";
+  // the timestamp value is the same `commits.timestamp` the join used, so the
+  // same-second-tie behavior (an accepted tradeoff) is unchanged. `$1` = dataset,
+  // `$2` = storage; variant/scale params append only when non-null.
   const params: unknown[] = [dataset, storage];
   const variantPred =
     datasetVariant === null
@@ -377,24 +379,20 @@ async function collectQuerySummary(
       ? 'q.scale_factor IS NULL'
       : `q.scale_factor = $${params.push(scaleFactor)}`;
   const text = `
-    WITH latest AS (
-      SELECT q.query_idx,
-             q.engine || ':' || q.format AS series,
-             q.value_ns::float8 AS value_ns,
-             row_number() OVER (
-               PARTITION BY q.query_idx, q.engine, q.format
-               ORDER BY q.commit_timestamp DESC NULLS LAST
-             ) AS rn
-        FROM query_measurements q
-       WHERE q.dataset = $1
-         AND ${variantPred}
-         AND ${scalePred}
-         AND q.storage = $2
-         AND q.value_ns > 0
-    )
     SELECT query_idx, series, value_ns
-      FROM latest
-     WHERE rn = 1
+      FROM (
+        SELECT DISTINCT ON (q.query_idx, q.engine, q.format)
+               q.query_idx AS query_idx,
+               q.engine || ':' || q.format AS series,
+               q.value_ns::float8 AS value_ns
+          FROM query_measurements q
+         WHERE q.dataset = $1
+           AND ${variantPred}
+           AND ${scalePred}
+           AND q.storage = $2
+           AND q.value_ns > 0
+         ORDER BY q.query_idx, q.engine, q.format, q.commit_timestamp DESC NULLS LAST
+      ) latest
      ORDER BY query_idx, series
   `;
   const rows = (
diff --git a/migrations/007_summary_covering_index.sql b/migrations/007_summary_covering_index.sql
@@ -0,0 +1,31 @@
+-- SPDX-License-Identifier: Apache-2.0
+-- SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+-- migrate-schema: requires-superuser
+-- DROP/CREATE INDEX on `query_measurements` (owned by the RDS master), so this
+-- carries the same requires-superuser marker as 005/006 and is applied by the
+-- master, the operator path PR-5.0 used.
+
+-- Read-path performance follow-up to 006 (PR-5.1.5 fix c). Measuring the 006
+-- `idx_query_measurements_summary` against the prod seed showed the planner would
+-- not use it for the per-group "latest value per series" summary: `value_ns`
+-- (both the `value_ns > 0` filter and the projected value) was off the index, so
+-- using it meant a heap fetch for every one of a group's ~870K rows -- more
+-- expensive than the bitmap+sort the planner fell back to (~8.6s warm for tpcds).
+-- Adding `value_ns` as an INCLUDE (non-key) payload makes the index COVER the
+-- summary, so a `DISTINCT ON (query_idx, engine, format) ... ORDER BY
+-- commit_timestamp DESC` resolves to an Index Only Scan (Heap Fetches: 0) at
+-- ~1.95s warm. (A loose-index/recursive-CTE skip scan would reach ms but at a
+-- large SQL-complexity cost; ~2s under the bounded-concurrency summary fan-out is
+-- the pragmatic target for a CDN-fronted dashboard.)
+DROP INDEX IF EXISTS idx_query_measurements_summary;
+CREATE INDEX IF NOT EXISTS idx_query_measurements_summary
+    ON query_measurements (dataset, dataset_variant, scale_factor, storage,
+                           query_idx, engine, format, commit_timestamp DESC)
+    INCLUDE (value_ns);
+
+-- Cleanup: a one-off `idx_qm_summary_test` was created by hand on prod to measure
+-- the covering-index design before formalizing it here. It does not exist in
+-- testcontainers (this DROP is a no-op there) or any other environment; dropping
+-- it keeps prod in sync with the migration set.
+DROP INDEX IF EXISTS idx_qm_summary_test;
diff --git a/scripts/test_migrate_schema.py b/scripts/test_migrate_schema.py
@@ -682,11 +682,11 @@ def test_apply_uses_public_schema_under_custom_search_path(
 
 
 def test_real_migrations_apply_cleanly(conn: psycopg.Connection) -> None:
-    """The real migrations (`001` through `006`) apply against vanilla Postgres
+    """The real migrations (`001` through `007`) apply against vanilla Postgres
     and are recorded in the ledger in order."""
     applied = runner.apply(conn, REPO_MIGRATIONS_DIR)
 
-    assert applied == 6
+    assert applied == 7
     with conn.cursor() as cur:
         cur.execute("SELECT filename FROM public._applied_migrations ORDER BY filename")
         assert [row[0] for row in cur.fetchall()] == [
@@ -696,6 +696,7 @@ def test_real_migrations_apply_cleanly(conn: psycopg.Connection) -> None:
             "004_ingest_role.sql",
             "005_read_role.sql",
             "006_read_path_perf.sql",
+            "007_summary_covering_index.sql",
         ]
 
 
@@ -704,7 +705,7 @@ def test_real_migrations_idempotent(conn: psycopg.Connection) -> None:
     first = runner.apply(conn, REPO_MIGRATIONS_DIR)
     second = runner.apply(conn, REPO_MIGRATIONS_DIR)
 
-    assert first == 6
+    assert first == 7
     assert second == 0
 
 
@@ -1516,8 +1517,8 @@ def test_real_migrations_apply_as_non_superuser_createrole_master(
             assert is_super is False, "fidelity requires a non-superuser master login"
 
             applied = runner.apply(master, REPO_MIGRATIONS_DIR)
-            assert applied == 6, (
-                "all six real migrations must apply under the non-superuser master"
+            assert applied == 7, (
+                "all seven real migrations must apply under the non-superuser master"
             )
 
         # Verify, on the superuser connection, that the bootstrap produced a usable