perf(planner): port SQLite's covering-scan width cost model (B9h slice)

MagicalTux · claude · MagicalTux · commit e91ae0d35f71 · 2026-07-05T09:23:49.000+09:00
A no-`WHERE` bare projection (and `count(*)`) is answered from a covering
secondary index only when that index's estimated row is *strictly* narrower than
the table's — SQLite's `estimateTableWidth` / `estimateIndexWidth` cost model,
now ported exactly rather than approximated. The table width is the sum of each
column's size estimate (`+1` when there is no INTEGER PRIMARY KEY); an index's is
the sum of its key columns' estimates plus one for the trailing rowid; the
per-column estimate is 1 for an integer/real/numeric/untyped column, 5 for a
`TEXT`/`BLOB`, and `k/4+1` for a sized `VARCHAR(k)` (new `col_szest`), compared in
`LogEst` units (new `logest`). Among qualifying indexes the narrowest wins, ties
broken by the most-recently-created.

This fixes both directions of the previous heuristic: the over-use (a covering
index no narrower than the table — e.g. `SELECT b,c` over a two-column index on a
three-column table — now `SCAN`s the table) and the under-use (`count(*)` over
two or more indexes now counts the cheapest instead of falling back to a table
scan). `count_covering_index` delegates its choice to the shared model. The
covered scan's no-`ORDER BY` row order (index order) matches SQLite in lockstep.
The width test applies only to a pure projection — a `GROUP BY` / `DISTINCT` /
`ORDER BY` still reads a covering index to supply its ordering regardless of
width. Verified against a from-source build of sqlite3 3.50.4
(`tests/eqp_covering_index_cost.rs`; `tests/count_covering.rs` updated to the
cost-correct expectations).

Co-Authored-By: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/ROADMAP.md b/ROADMAP.md
@@ -1261,15 +1261,22 @@ With B9a-seek and `FOR IN-OPERATOR` shipped, the only open EQP-fidelity thread i
 - **B9h — cost-model single-table index *choice*.** SQLite prefers, among indexes
   sharing an equality prefix, the one whose walk does the most work (composite `(b,c)`
   over `(b)` for a trailing range / `GROUP BY`/`ORDER BY c`; a *covering* index over a
-  narrower one; the smallest covering index for `count(*)`/`IN`), and decides *whether*
-  a no-WHERE query covers at all (narrow index vs wide-row table scan). graphite picks
-  by longest-equality-prefix only. The covering-scan no-`ORDER BY` row-order parity
-  (investigated 2026-07-04 as B9i — graphite already walks index order, so it is *not* an
-  execution-order bug) and the secondary-index `SEARCH` + `GROUP BY`/`DISTINCT` b-tree
-  (left open by B9d) both ride here. **Deferred by design:** the pinned oracle has no
-  stat4, so its choices depend on row-width / index-width / index-count heuristics
-  graphite can't reproduce without diverging the EQP corpus — same class as B1b/B4;
-  needs a stat4-enabled oracle.
+  narrower one), and decides *whether* a no-WHERE query covers at all.
+  **The no-`WHERE` covering-scan choice is DONE (2026-07-05).** It turned out to be a
+  purely *structural* cost — not stat4-dependent — so it is exactly reproducible against
+  the pinned oracle: graphite now ports SQLite's `estimateTableWidth`/`estimateIndexWidth`
+  (`col_szest` + `logest` free fns) and uses a covering index for a no-`WHERE` scan /
+  `count(*)` only when its estimated row is *strictly narrower* than the table's, picking
+  the narrowest (ties → newest / highest rootpage). This fixed both the over-use
+  (`SELECT b,c` over a 2-col-index-on-a-3-col-table now `SCAN t`) and the under-use
+  (`count(*)` over ≥2 indexes now picks the cheapest instead of bailing), and the covered
+  scan's no-`ORDER BY` row order (index order) matches in lockstep (`covering_scan` +
+  `count_covering_index` delegate to the shared model; `tests/eqp_covering_index_cost.rs`,
+  `tests/count_covering.rs`). **Still open (rides here):** the composite-vs-narrow choice
+  *with* a WHERE equality prefix, covering-scan *with* a WHERE predicate on covered
+  columns (`SELECT b FROM t WHERE c>0` → covering `(b,c)`), and the secondary-index
+  `SEARCH` + `GROUP BY`/`DISTINCT` b-tree (left open by B9d). These are structural too and
+  now unblocked — a stat4 oracle is only needed for genuinely data-driven choices (B4).
 - **B9j — collation-aware index *selection* for a non-default-collation index.**
   `collect_eq_constraints` / `collect_range_constraints` compare an explicit `COLLATE`
   to the *column's* collation. When an index carries a *non-default* collation
diff --git a/src/exec/mod.rs b/src/exec/mod.rs
@@ -17916,20 +17916,78 @@ impl Connection {
         if self.group_by_is_rowid(sel, meta, label) {
             return None;
         }
-        let mut covering = self.indexes_of(&t.name).ok()?.into_iter().filter(|idx| {
-            idx.partial.is_none()
-                && idx.key_exprs.is_none()
-                && self.query_cols_covered(sel, meta, &idx.cols)
-        });
-        let chosen = covering.next()?;
-        // Ambiguous (two or more covering indexes): keep the plain scan rather
-        // than guess which one sqlite's cost model would pick.
-        if covering.next().is_some() {
-            return None;
-        }
+        let covering: Vec<_> = self
+            .indexes_of(&t.name)
+            .ok()?
+            .into_iter()
+            .filter(|idx| {
+                idx.partial.is_none()
+                    && idx.key_exprs.is_none()
+                    && self.query_cols_covered(sel, meta, &idx.cols)
+            })
+            .collect();
+        // A `GROUP BY` / `DISTINCT` / `ORDER BY` query walks the index to produce
+        // its keys in order (avoiding a full sort — for a partial sort the index
+        // still supplies the leading terms), so SQLite reads from a covering index
+        // there *regardless* of width; only a bare projection is a pure width
+        // choice. (A fully sort-satisfying scan already bailed above via
+        // `order_satisfied_by_scan`.) For these we keep the conservative
+        // single-candidate rule — which index an ambiguous walk picks is the
+        // ordered-scan path's job.
+        if !sel.group_by.is_empty() || sel.distinct || !sel.order_by.is_empty() {
+            if covering.len() != 1 {
+                return None;
+            }
+            let chosen = covering.into_iter().next()?;
+            return Some((chosen.name, chosen.root, chosen.cols));
+        }
+        // Plain no-`WHERE` projection: port SQLite's covering-scan cost choice
+        // (`estimateTableWidth` / `estimateIndexWidth`): the table's estimated row
+        // width is `Σ szEst(col) (+1 if no INTEGER PRIMARY KEY)`; an index's is
+        // `Σ szEst(key col) + 1` (the trailing rowid). A covering index is used only
+        // when its width (in `LogEst` units) is *strictly* less than the table's,
+        // and among the candidates the narrowest wins — ties broken by the
+        // most-recently-created index (highest rootpage; SQLite considers indexes
+        // newest-first and keeps the first of an equal cost). Verified against the
+        // sqlite3 3.50.4 planner.
+        let szests = self.table_col_szests(&t.name).unwrap_or_default();
+        let szest_of = |i: usize| szests.get(i).copied().unwrap_or(1);
+        let mut wtable: u32 = (0..meta.columns.len()).map(szest_of).sum();
+        if meta.ipk.is_none() {
+            wtable += 1;
+        }
+        let sz_tab = logest(u64::from(wtable) * 4);
+        let chosen = covering
+            .into_iter()
+            .map(|idx| {
+                let widx: u32 = idx.cols.iter().map(|&c| szest_of(c)).sum::<u32>() + 1;
+                (logest(u64::from(widx) * 4), idx)
+            })
+            .filter(|(sz_idx, _)| *sz_idx < sz_tab)
+            .min_by(|(sa, ia), (sb, ib)| sa.cmp(sb).then(ib.root.cmp(&ia.root)))?
+            .1;
         Some((chosen.name, chosen.root, chosen.cols))
     }
 
+    /// The per-column [`col_szest`] estimates for a rowid table, aligned with its
+    /// declared column order (which matches `TableMeta::columns` for a rowid
+    /// table). Parses the stored `CREATE TABLE` for the raw declared type of each
+    /// column (an untyped column is `1`, not the `BLOB` fallback other paths use).
+    /// Returns an empty vector when the table can't be resolved, so callers fall
+    /// back to a size of `1` per column.
+    fn table_col_szests(&self, table: &str) -> Option<Vec<u32>> {
+        let obj = self.schema.table(table)?;
+        let Ok(Statement::CreateTable(ct)) = sql::parse_one(obj.sql.as_deref()?) else {
+            return None;
+        };
+        Some(
+            ct.columns
+                .iter()
+                .map(|c| col_szest(c.type_name.as_deref()))
+                .collect(),
+        )
+    }
+
     /// SQLite's min/max optimization: a query whose only aggregate is a single
     /// `min(col)` / `max(col)` (no `GROUP BY`, no `HAVING`, no `WHERE`, no second
     /// aggregate; the call may be wrapped in scalar expressions and may be
@@ -18311,18 +18369,13 @@ impl Connection {
         if meta.without_rowid {
             return None;
         }
-        // Exactly one full (non-partial, non-expression) secondary index.
-        let mut chosen: Option<(String, u32)> = None;
-        for idx in self.indexes_of(&t.name).ok()? {
-            if idx.partial.is_some() || idx.key_exprs.is_some() {
-                continue;
-            }
-            if chosen.is_some() {
-                return None; // ambiguous: more than one candidate
-            }
-            chosen = Some((idx.name, idx.root));
-        }
-        chosen
+        // A `count(*)` needs no columns, so every full secondary index "covers" it.
+        // The choice — and whether a covering scan is cheaper than a plain table
+        // scan at all — is the shared cost model in `covering_scan` (which picks the
+        // narrowest index strictly narrower than the table, or `None` so the caller
+        // `SCAN`s the table).
+        let (name, root, _) = self.covering_scan(sel, &meta, &Params::default())?;
+        Some((name, root))
     }
 
     /// Whether a single-table scan already yields rows in the query's `ORDER BY`
@@ -27013,6 +27066,70 @@ fn column_resolves_scoped(
     })
 }
 
+/// SQLite's `sqlite3LogEst` — an integer approximation of `10*log2(x)`, the unit
+/// the query planner costs rows and row-widths in. Ported verbatim so a covering
+/// index's estimated width can be compared exactly the way SQLite does.
+fn logest(mut x: u64) -> i16 {
+    const A: [i16; 8] = [0, 2, 3, 5, 6, 7, 8, 9];
+    let mut y: i16 = 40;
+    if x < 8 {
+        if x < 2 {
+            return 0;
+        }
+        while x < 8 {
+            y -= 10;
+            x <<= 1;
+        }
+    } else {
+        while x > 255 {
+            y += 40;
+            x >>= 4;
+        }
+        while x > 15 {
+            y += 10;
+            x >>= 1;
+        }
+    }
+    A[(x & 7) as usize] + y - 10
+}
+
+/// The estimated per-column size SQLite records (`estimateTableWidth` via
+/// `sqlite3AffinityType`), scaled so an integer/real/numeric or untyped column is
+/// `1`. A `TEXT`/`BLOB`/`CLOB`/`CHAR` with no size is `5`; a sized `VARCHAR(k)` /
+/// `CHAR(k)` / `BLOB(k)` is `k/4 + 1` (capped at 255). Only TEXT/BLOB-affinity
+/// columns carry a size; numeric affinities are always `1`.
+fn col_szest(type_name: Option<&str>) -> u32 {
+    let Some(t) = type_name else { return 1 };
+    if t.trim().is_empty() {
+        return 1;
+    }
+    let up = t.to_ascii_uppercase();
+    // The first unsigned integer literal in `s`, if any.
+    fn first_uint(s: &str) -> Option<u32> {
+        let start = s.find(|c: char| c.is_ascii_digit())?;
+        let end = s[start..]
+            .find(|c: char| !c.is_ascii_digit())
+            .map(|e| start + e)
+            .unwrap_or(s.len());
+        s[start..end].parse().ok()
+    }
+    let v: u32 = match eval::Affinity::from_type(Some(t)) {
+        // A size for a text column sits after the "CHAR" token (`VARCHAR(k)`,
+        // `CHAR(k)`); a bare `TEXT`/`CLOB` carries none → 16 (→ szEst 5).
+        eval::Affinity::Text => up
+            .rfind("CHAR")
+            .and_then(|p| first_uint(&up[p + 4..]))
+            .unwrap_or(16),
+        // A `BLOB(k)` size sits immediately after "BLOB("; a bare `BLOB` → 16.
+        eval::Affinity::Blob => match up.find("BLOB") {
+            Some(p) if up[p + 4..].starts_with('(') => first_uint(&up[p + 4..]).unwrap_or(16),
+            _ => 16,
+        },
+        _ => 0,
+    };
+    (v / 4 + 1).min(255)
+}
+
 fn walk_shallow_columns(e: &Expr, f: &mut impl FnMut(Option<&str>, Option<&str>, &str, bool)) {
     match e {
         Expr::Column {
diff --git a/tests/count_covering.rs b/tests/count_covering.rs
@@ -1,9 +1,12 @@
 //! `SELECT count(*)` answered via a covering secondary index (roadmap B2b).
 //!
-//! When a single rowid table has exactly one full secondary index, sqlite (and
-//! graphitesql) counts that index's entries — `EXPLAIN QUERY PLAN` reports
-//! `SCAN t USING COVERING INDEX <name>`. With zero or multiple such indexes,
-//! graphitesql conservatively keeps the plain `SCAN t` plan (no guessing).
+//! `count(*)` is answered from a covering secondary index when that index's
+//! estimated row is *strictly narrower* than the table's (SQLite's
+//! `estimateTableWidth`/`estimateIndexWidth` cost model). `EXPLAIN QUERY PLAN`
+//! then reports `SCAN t USING COVERING INDEX <name>`, choosing the narrowest
+//! qualifying index (ties → the most-recently-created). An index no narrower than
+//! the table — e.g. the sole non-key column indexed on a two-column table — is
+//! not used, and the plan stays `SCAN t`.
 
 #![cfg(feature = "std")]
 
@@ -33,15 +36,35 @@ fn count(conn: &Connection, sql: &str) -> i64 {
 
 #[test]
 fn one_index_uses_covering_index_in_eqp() {
+    // The index (b + rowid = 2 units) is narrower than the 3-column table, so it
+    // is used to count. (On a two-column table it would tie the table and SCAN.)
+    let mut c = Connection::open_memory().unwrap();
+    c.execute("CREATE TABLE t(a INTEGER PRIMARY KEY, b, c)")
+        .unwrap();
+    c.execute("CREATE INDEX ib ON t(b)").unwrap();
+    c.execute("INSERT INTO t VALUES(1,10,100),(2,20,200)")
+        .unwrap();
+    assert_eq!(
+        detail(&c, "EXPLAIN QUERY PLAN SELECT count(*) FROM t"),
+        ["SCAN t USING COVERING INDEX ib"]
+    );
+}
+
+#[test]
+fn index_no_narrower_than_table_keeps_plain_scan() {
+    // On a two-column table, indexing the sole non-key column gives an index
+    // (b + rowid = 2) exactly as wide as the table (a + b = 2), so SQLite scans
+    // the table rather than the index.
     let mut c = Connection::open_memory().unwrap();
     c.execute("CREATE TABLE t(a INTEGER PRIMARY KEY, b)")
         .unwrap();
     c.execute("CREATE INDEX ib ON t(b)").unwrap();
     c.execute("INSERT INTO t VALUES(1,10),(2,20)").unwrap();
     assert_eq!(
         detail(&c, "EXPLAIN QUERY PLAN SELECT count(*) FROM t"),
-        ["SCAN t USING COVERING INDEX ib"]
+        ["SCAN t"]
     );
+    assert_eq!(count(&c, "SELECT count(*) FROM t"), 2);
 }
 
 #[test]
@@ -69,29 +92,30 @@ fn no_index_keeps_plain_scan() {
 }
 
 #[test]
-fn multiple_indexes_fall_back_to_plain_scan() {
+fn multiple_indexes_pick_cheapest_covering_index() {
     let mut c = Connection::open_memory().unwrap();
     c.execute("CREATE TABLE t(a INTEGER PRIMARY KEY, b, c)")
         .unwrap();
     c.execute("CREATE INDEX ib ON t(b)").unwrap();
     c.execute("CREATE INDEX ic ON t(c)").unwrap();
     c.execute("INSERT INTO t VALUES(1,10,100),(2,20,200)")
         .unwrap();
-    // Ambiguous index choice => keep the plain SCAN (no guessing).
+    // Both indexes are narrower than the table and equally wide, so SQLite counts
+    // the most-recently-created one (ic), matching its cost-model tie-break.
     assert_eq!(
         detail(&c, "EXPLAIN QUERY PLAN SELECT count(*) FROM t"),
-        ["SCAN t"]
+        ["SCAN t USING COVERING INDEX ic"]
     );
     assert_eq!(count(&c, "SELECT count(*) FROM t"), 2);
 }
 
 #[test]
 fn count_correct_after_delete() {
     let mut c = Connection::open_memory().unwrap();
-    c.execute("CREATE TABLE t(a INTEGER PRIMARY KEY, b)")
+    c.execute("CREATE TABLE t(a INTEGER PRIMARY KEY, b, c)")
         .unwrap();
     c.execute("CREATE INDEX ib ON t(b)").unwrap();
-    c.execute("INSERT INTO t VALUES(1,10),(2,20),(3,30),(4,40)")
+    c.execute("INSERT INTO t VALUES(1,10,1),(2,20,2),(3,30,3),(4,40,4)")
         .unwrap();
     assert_eq!(count(&c, "SELECT count(*) FROM t"), 4);
     c.execute("DELETE FROM t WHERE a IN (2,3)").unwrap();
diff --git a/tests/covering_scan.rs b/tests/covering_scan.rs
@@ -124,18 +124,18 @@ fn order_by_rowid_keeps_table_scan() {
 }
 
 #[test]
-fn ambiguous_covering_indexes_keep_plain_scan() {
+fn two_covering_indexes_pick_cheapest() {
     let mut c = Connection::open_memory().unwrap();
     c.execute("CREATE TABLE t(a INTEGER PRIMARY KEY, b, c)")
         .unwrap();
     c.execute("CREATE INDEX ib ON t(b)").unwrap();
     c.execute("CREATE INDEX ic ON t(c)").unwrap();
     c.execute("INSERT INTO t VALUES (1,10,100),(2,20,200)")
         .unwrap();
-    // count(*) is covered by both ib and ic; rather than guess sqlite's pick,
-    // graphite keeps the plain scan.
+    // count(*) is covered by both ib and ic; both are narrower than the table and
+    // equally wide, so SQLite's cost model counts the most-recently-created (ic).
     assert_eq!(
         plan(&c, "EXPLAIN QUERY PLAN SELECT count(*) FROM t"),
-        "SCAN t"
+        "SCAN t USING COVERING INDEX ic"
     );
 }
diff --git a/tests/eqp_covering_index_cost.rs b/tests/eqp_covering_index_cost.rs