@@ -17916,20 +17916,78 @@ impl Connection {
1791617916 if self.group_by_is_rowid(sel, meta, label) {
1791717917 return None;
1791817918 }
17919- let mut covering = self.indexes_of(&t.name).ok()?.into_iter().filter(|idx| {
17920- idx.partial.is_none()
17921- && idx.key_exprs.is_none()
17922- && self.query_cols_covered(sel, meta, &idx.cols)
17923- });
17924- let chosen = covering.next()?;
17925- // Ambiguous (two or more covering indexes): keep the plain scan rather
17926- // than guess which one sqlite's cost model would pick.
17927- if covering.next().is_some() {
17928- return None;
17929- }
17919+ let covering: Vec<_> = self
17920+ .indexes_of(&t.name)
17921+ .ok()?
17922+ .into_iter()
17923+ .filter(|idx| {
17924+ idx.partial.is_none()
17925+ && idx.key_exprs.is_none()
17926+ && self.query_cols_covered(sel, meta, &idx.cols)
17927+ })
17928+ .collect();
17929+ // A `GROUP BY` / `DISTINCT` / `ORDER BY` query walks the index to produce
17930+ // its keys in order (avoiding a full sort — for a partial sort the index
17931+ // still supplies the leading terms), so SQLite reads from a covering index
17932+ // there *regardless* of width; only a bare projection is a pure width
17933+ // choice. (A fully sort-satisfying scan already bailed above via
17934+ // `order_satisfied_by_scan`.) For these we keep the conservative
17935+ // single-candidate rule — which index an ambiguous walk picks is the
17936+ // ordered-scan path's job.
17937+ if !sel.group_by.is_empty() || sel.distinct || !sel.order_by.is_empty() {
17938+ if covering.len() != 1 {
17939+ return None;
17940+ }
17941+ let chosen = covering.into_iter().next()?;
17942+ return Some((chosen.name, chosen.root, chosen.cols));
17943+ }
17944+ // Plain no-`WHERE` projection: port SQLite's covering-scan cost choice
17945+ // (`estimateTableWidth` / `estimateIndexWidth`): the table's estimated row
17946+ // width is `Σ szEst(col) (+1 if no INTEGER PRIMARY KEY)`; an index's is
17947+ // `Σ szEst(key col) + 1` (the trailing rowid). A covering index is used only
17948+ // when its width (in `LogEst` units) is *strictly* less than the table's,
17949+ // and among the candidates the narrowest wins — ties broken by the
17950+ // most-recently-created index (highest rootpage; SQLite considers indexes
17951+ // newest-first and keeps the first of an equal cost). Verified against the
17952+ // sqlite3 3.50.4 planner.
17953+ let szests = self.table_col_szests(&t.name).unwrap_or_default();
17954+ let szest_of = |i: usize| szests.get(i).copied().unwrap_or(1);
17955+ let mut wtable: u32 = (0..meta.columns.len()).map(szest_of).sum();
17956+ if meta.ipk.is_none() {
17957+ wtable += 1;
17958+ }
17959+ let sz_tab = logest(u64::from(wtable) * 4);
17960+ let chosen = covering
17961+ .into_iter()
17962+ .map(|idx| {
17963+ let widx: u32 = idx.cols.iter().map(|&c| szest_of(c)).sum::<u32>() + 1;
17964+ (logest(u64::from(widx) * 4), idx)
17965+ })
17966+ .filter(|(sz_idx, _)| *sz_idx < sz_tab)
17967+ .min_by(|(sa, ia), (sb, ib)| sa.cmp(sb).then(ib.root.cmp(&ia.root)))?
17968+ .1;
1793017969 Some((chosen.name, chosen.root, chosen.cols))
1793117970 }
1793217971
17972+ /// The per-column [`col_szest`] estimates for a rowid table, aligned with its
17973+ /// declared column order (which matches `TableMeta::columns` for a rowid
17974+ /// table). Parses the stored `CREATE TABLE` for the raw declared type of each
17975+ /// column (an untyped column is `1`, not the `BLOB` fallback other paths use).
17976+ /// Returns an empty vector when the table can't be resolved, so callers fall
17977+ /// back to a size of `1` per column.
17978+ fn table_col_szests(&self, table: &str) -> Option<Vec<u32>> {
17979+ let obj = self.schema.table(table)?;
17980+ let Ok(Statement::CreateTable(ct)) = sql::parse_one(obj.sql.as_deref()?) else {
17981+ return None;
17982+ };
17983+ Some(
17984+ ct.columns
17985+ .iter()
17986+ .map(|c| col_szest(c.type_name.as_deref()))
17987+ .collect(),
17988+ )
17989+ }
17990+
1793317991 /// SQLite's min/max optimization: a query whose only aggregate is a single
1793417992 /// `min(col)` / `max(col)` (no `GROUP BY`, no `HAVING`, no `WHERE`, no second
1793517993 /// aggregate; the call may be wrapped in scalar expressions and may be
@@ -18311,18 +18369,13 @@ impl Connection {
1831118369 if meta.without_rowid {
1831218370 return None;
1831318371 }
18314- // Exactly one full (non-partial, non-expression) secondary index.
18315- let mut chosen: Option<(String, u32)> = None;
18316- for idx in self.indexes_of(&t.name).ok()? {
18317- if idx.partial.is_some() || idx.key_exprs.is_some() {
18318- continue;
18319- }
18320- if chosen.is_some() {
18321- return None; // ambiguous: more than one candidate
18322- }
18323- chosen = Some((idx.name, idx.root));
18324- }
18325- chosen
18372+ // A `count(*)` needs no columns, so every full secondary index "covers" it.
18373+ // The choice — and whether a covering scan is cheaper than a plain table
18374+ // scan at all — is the shared cost model in `covering_scan` (which picks the
18375+ // narrowest index strictly narrower than the table, or `None` so the caller
18376+ // `SCAN`s the table).
18377+ let (name, root, _) = self.covering_scan(sel, &meta, &Params::default())?;
18378+ Some((name, root))
1832618379 }
1832718380
1832818381 /// Whether a single-table scan already yields rows in the query's `ORDER BY`
@@ -27013,6 +27066,70 @@ fn column_resolves_scoped(
2701327066 })
2701427067}
2701527068
27069+ /// SQLite's `sqlite3LogEst` — an integer approximation of `10*log2(x)`, the unit
27070+ /// the query planner costs rows and row-widths in. Ported verbatim so a covering
27071+ /// index's estimated width can be compared exactly the way SQLite does.
27072+ fn logest(mut x: u64) -> i16 {
27073+ const A: [i16; 8] = [0, 2, 3, 5, 6, 7, 8, 9];
27074+ let mut y: i16 = 40;
27075+ if x < 8 {
27076+ if x < 2 {
27077+ return 0;
27078+ }
27079+ while x < 8 {
27080+ y -= 10;
27081+ x <<= 1;
27082+ }
27083+ } else {
27084+ while x > 255 {
27085+ y += 40;
27086+ x >>= 4;
27087+ }
27088+ while x > 15 {
27089+ y += 10;
27090+ x >>= 1;
27091+ }
27092+ }
27093+ A[(x & 7) as usize] + y - 10
27094+ }
27095+
27096+ /// The estimated per-column size SQLite records (`estimateTableWidth` via
27097+ /// `sqlite3AffinityType`), scaled so an integer/real/numeric or untyped column is
27098+ /// `1`. A `TEXT`/`BLOB`/`CLOB`/`CHAR` with no size is `5`; a sized `VARCHAR(k)` /
27099+ /// `CHAR(k)` / `BLOB(k)` is `k/4 + 1` (capped at 255). Only TEXT/BLOB-affinity
27100+ /// columns carry a size; numeric affinities are always `1`.
27101+ fn col_szest(type_name: Option<&str>) -> u32 {
27102+ let Some(t) = type_name else { return 1 };
27103+ if t.trim().is_empty() {
27104+ return 1;
27105+ }
27106+ let up = t.to_ascii_uppercase();
27107+ // The first unsigned integer literal in `s`, if any.
27108+ fn first_uint(s: &str) -> Option<u32> {
27109+ let start = s.find(|c: char| c.is_ascii_digit())?;
27110+ let end = s[start..]
27111+ .find(|c: char| !c.is_ascii_digit())
27112+ .map(|e| start + e)
27113+ .unwrap_or(s.len());
27114+ s[start..end].parse().ok()
27115+ }
27116+ let v: u32 = match eval::Affinity::from_type(Some(t)) {
27117+ // A size for a text column sits after the "CHAR" token (`VARCHAR(k)`,
27118+ // `CHAR(k)`); a bare `TEXT`/`CLOB` carries none → 16 (→ szEst 5).
27119+ eval::Affinity::Text => up
27120+ .rfind("CHAR")
27121+ .and_then(|p| first_uint(&up[p + 4..]))
27122+ .unwrap_or(16),
27123+ // A `BLOB(k)` size sits immediately after "BLOB("; a bare `BLOB` → 16.
27124+ eval::Affinity::Blob => match up.find("BLOB") {
27125+ Some(p) if up[p + 4..].starts_with('(') => first_uint(&up[p + 4..]).unwrap_or(16),
27126+ _ => 16,
27127+ },
27128+ _ => 0,
27129+ };
27130+ (v / 4 + 1).min(255)
27131+ }
27132+
2701627133fn walk_shallow_columns(e: &Expr, f: &mut impl FnMut(Option<&str>, Option<&str>, &str, bool)) {
2701727134 match e {
2701827135 Expr::Column {
0 commit comments