Skip to content

Commit 94426fb

Browse files
MagicalTuxclaude
andcommitted
feat(planner): seek col = (non-correlated scalar subquery) (B9e)
`WHERE b = (SELECT max(x) FROM u)` (and `<`/`>`/etc.) against a non-correlated scalar subquery now seeks the index instead of scanning, matching SQLite: `SEARCH t USING INDEX tb (b=?)`. Secondary index and INTEGER PRIMARY KEY, equality and range. Two sides, kept consistent without evaluating during EXPLAIN: - Executor: scan_source's single-table fast path folds the WHERE via the existing fold_subquery_expr (foldable = non-correlated computed) into a shadowed seek clause passed to the try_* seeks; run_core re-applies the original WHERE, so it stays a superset. - EQP: eqp_access rewrites a structurally-foldable scalar-subquery comparison operand to a placeholder literal (placeholder_fold_seek_where) so the constraint collectors recognize the seek WITHOUT running the subquery — SQLite plans the seek without evaluating it (e.g. `b=(SELECT 1/0)` still plans a SEARCH; the query errors only at execution, as in SQLite). Gated to SELECT. Residuals (executor and EQP agree — they only differ from SQLite): a bare-column subquery does not fold (dropping its affinity would be unsound) and stays a SCAN; a DELETE/UPDATE subquery WHERE stays a SCAN (SQLite's two-pass COVERING plan is not reproducible from the sel-less eqp_access). Correlated / EXISTS / IN (SELECT) correctly do not seek. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 79118bd commit 94426fb

3 files changed

Lines changed: 233 additions & 21 deletions

File tree

ROADMAP.md

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,10 +1196,19 @@ tie/representative order), so they are perf/EQP-fidelity work, not correctness:
11961196
grouping b-tree only over a bare `SCAN`; under a `WHERE` seek it omits the node.
11971197
Entangled with B9h (SQLite often picks a *different* index whose walk serves the
11981198
grouping), so it should land together with the index-choice work.
1199-
- **B9e — `col = (scalar subquery)` seek.** `WHERE b = (SELECT …)` seeks the once-
1200-
computed value in SQLite (`SEARCH … (b=?)`); graphite `SCAN`s because the eq-
1201-
collector requires a constant RHS. Needs the executor to evaluate the
1202-
non-correlated subquery once, then seek (superset-safe; `eqp_access` mirrors it).
1199+
- **B9e — `col = (scalar subquery)` seek. ✅ Done (SELECT).** `WHERE b = (SELECT …)`
1200+
(and `>`/`<`/etc.) against a non-correlated scalar subquery now seeks — the executor
1201+
folds the subquery to its value before the seek (`scan_source` single-table fast path),
1202+
and `eqp_access` recognizes the shape *structurally* (a placeholder-literal rewrite,
1203+
`placeholder_fold_seek_where`) so `EXPLAIN` renders the `SEARCH` without running the
1204+
subquery — matching SQLite, which plans the seek without evaluating it (so even
1205+
`b=(SELECT 1/0)` plans a `SEARCH`; the query still errors at execution as in SQLite).
1206+
Secondary index + INTEGER PRIMARY KEY, equality + range. Superset-safe. *Residuals:*
1207+
a **bare-column** subquery (`(SELECT x FROM u)`) does not fold (dropping its affinity
1208+
would be unsound), so it stays a SCAN (rows correct); and a **DELETE/UPDATE** with a
1209+
subquery `WHERE` stays a SCAN (SQLite renders a two-pass `USING COVERING INDEX` the
1210+
`sel`-less `eqp_access` can't reproduce). A **correlated** body / `EXISTS` /
1211+
`IN (SELECT)` correctly do not seek.
12031212
- **B9f — `GLOB 'prefix*'` prefix-range seek. ✅ Done.** A fixed-prefix `GLOB`
12041213
(always case-sensitive / byte-based) now seeks `col >= 'prefix' AND col < 'prefix⁺'`
12051214
on a BINARY index and reads `SEARCH … (b>? AND b<?)`. Implemented as a `BinaryOp::Glob`

src/exec/mod.rs

Lines changed: 102 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -776,24 +776,21 @@ impl Connection {
776776
/// reference — so the resulting literal has the same NONE affinity / BINARY
777777
/// collation the subquery operand would have had, making the substitution
778778
/// exact for the enclosing comparison.
779+
/// The *structural* half of [`Self::eval_foldable_scalar`]: whether a scalar
780+
/// subquery would fold to a literal — self-contained (non-correlated), a single
781+
/// *computed* result column, and (for a compound) every arm computed — WITHOUT
782+
/// running it. Used to recognize `col = (subquery)` as a seekable equality for
783+
/// EXPLAIN QUERY PLAN, which SQLite plans without evaluating the subquery.
784+
fn scalar_subquery_folds_structurally(&self, sel2: &Select) -> bool {
785+
self.vdbe_subquery_foldable(sel2)
786+
&& sel2.columns.len() == 1
787+
&& matches!(&sel2.columns[0],
788+
sql::ast::ResultColumn::Expr { expr, .. } if !is_bare_column_expr(expr))
789+
&& self.compound_arms_computed(sel2)
790+
}
791+
779792
fn eval_foldable_scalar(&self, sel2: &Select) -> Option<Value> {
780-
if !self.vdbe_subquery_foldable(sel2) {
781-
return None;
782-
}
783-
if sel2.columns.len() != 1 {
784-
return None;
785-
}
786-
let sql::ast::ResultColumn::Expr { expr, .. } = &sel2.columns[0] else {
787-
return None;
788-
};
789-
if is_bare_column_expr(expr) {
790-
return None;
791-
}
792-
// A compound body's value is folded to a literal, so every additional arm
793-
// must also project a COMPUTED column (NONE affinity / BINARY collation);
794-
// a bare-column arm would carry that column's affinity, which the literal
795-
// would drop — see `is_bare_column_expr` above for the base arm.
796-
if !self.compound_arms_computed(sel2) {
793+
if !self.scalar_subquery_folds_structurally(sel2) {
797794
return None;
798795
}
799796
let r = self.run_select(sel2, &Params::default()).ok()?;
@@ -806,6 +803,61 @@ impl Connection {
806803
)
807804
}
808805

806+
/// Rewrite a `WHERE` clause so that a *structurally-foldable* non-correlated
807+
/// scalar subquery used as a comparison operand (`col = (SELECT …)`, `col > (…)`)
808+
/// is replaced by a non-NULL placeholder literal — WITHOUT running it. The seek
809+
/// constraint collectors then recognize the comparison as seekable, so
810+
/// `eqp_access` renders the `SEARCH` SQLite plans (SQLite plans the seek without
811+
/// evaluating the subquery; the executor evaluates it via `fold_subquery_expr`).
812+
/// Descends only the `AND`/`(…)` spine and the seekable comparison operators, so a
813+
/// subquery elsewhere (an `OR`, a projection) never spuriously enables a seek.
814+
/// Returns `None` when nothing changed. The placeholder value is irrelevant — the
815+
/// plan renders `col=?`/`col>?` and only the constrained *column* is used.
816+
fn placeholder_fold_seek_where(&self, e: &Expr) -> Option<Expr> {
817+
let mut changed = false;
818+
let out = self.placeholder_fold_where_inner(e, &mut changed);
819+
changed.then_some(out)
820+
}
821+
822+
fn placeholder_fold_where_inner(&self, e: &Expr, changed: &mut bool) -> Expr {
823+
let subq_placeholder = |s: &Expr, changed: &mut bool| -> Expr {
824+
match s {
825+
Expr::Subquery(sel2) if self.scalar_subquery_folds_structurally(sel2) => {
826+
*changed = true;
827+
Expr::Literal(Literal::Integer(0))
828+
}
829+
other => other.clone(),
830+
}
831+
};
832+
match e {
833+
Expr::Binary {
834+
op: BinaryOp::And,
835+
left,
836+
right,
837+
} => Expr::Binary {
838+
op: BinaryOp::And,
839+
left: Box::new(self.placeholder_fold_where_inner(left, changed)),
840+
right: Box::new(self.placeholder_fold_where_inner(right, changed)),
841+
},
842+
Expr::Paren(inner) => {
843+
Expr::Paren(Box::new(self.placeholder_fold_where_inner(inner, changed)))
844+
}
845+
Expr::Binary { op, left, right }
846+
if matches!(
847+
op,
848+
BinaryOp::Eq | BinaryOp::Lt | BinaryOp::LtEq | BinaryOp::Gt | BinaryOp::GtEq
849+
) =>
850+
{
851+
Expr::Binary {
852+
op: *op,
853+
left: Box::new(subq_placeholder(left, changed)),
854+
right: Box::new(subq_placeholder(right, changed)),
855+
}
856+
}
857+
other => other.clone(),
858+
}
859+
}
860+
809861
/// True when every *compound arm* of `sel2` (the `UNION`/… operands after the
810862
/// base) projects a single computed (non-bare-column) expression — so the
811863
/// whole compound's result column carries NONE affinity, exactly like an
@@ -14640,6 +14692,17 @@ impl Connection {
1464014692
let Some(where_expr) = where_clause else {
1464114693
return Ok(alloc::format!("SCAN {label}"));
1464214694
};
14695+
// A non-correlated scalar subquery used as a comparison operand
14696+
// (`col = (SELECT …)`) seeks the same as a constant would: SQLite evaluates it
14697+
// once and plans a `SEARCH`. Replace it with a placeholder literal (structurally,
14698+
// without running it — matching SQLite, which plans the seek without evaluating
14699+
// the subquery) so the constraint collectors below recognize the seek. The
14700+
// executor mirrors this by folding the subquery to its value before its seek.
14701+
// Restricted to a `SELECT` (`sel` present): a DELETE/UPDATE with a subquery
14702+
// `WHERE` is a two-pass plan SQLite renders `USING COVERING INDEX`, which the
14703+
// `sel`-less `eqp_access` can't reproduce, so it is left to its prior SCAN.
14704+
let folded = sel.and_then(|_| self.placeholder_fold_seek_where(where_expr));
14705+
let where_expr = folded.as_ref().unwrap_or(where_expr);
1464314706
// `INDEX` vs `COVERING INDEX` for a seek through `idx_cols`: the same
1464414707
// decision the executor's seek paths make via `seek_index_covers`.
1464514708
let index_kw = |idx_cols: &[usize]| -> &'static str {
@@ -20988,6 +21051,28 @@ impl Connection {
2098821051
// full WHERE is still applied by run_core, so the index only needs to
2098921052
// return a superset of matching rows.
2099021053
if from.joins.is_empty() {
21054+
// Fold a non-correlated scalar subquery used as a seek operand
21055+
// (`col = (SELECT …)`) to its value so the seek can use it — the same
21056+
// seek `eqp_access` renders. Only the seek *decision* uses the folded
21057+
// WHERE; `run_core` re-applies the original (superset-safe). A subquery
21058+
// that fails to fold (correlated / bare-column / erroring) is left in
21059+
// place and the query scans, exactly as before.
21060+
let seek_where;
21061+
let sel = match &sel.where_clause {
21062+
Some(w) => {
21063+
let mut changed = false;
21064+
let fw = self.fold_subquery_expr(w, &mut changed);
21065+
if changed {
21066+
let mut s = sel.clone();
21067+
s.where_clause = Some(fw);
21068+
seek_where = s;
21069+
&seek_where
21070+
} else {
21071+
sel
21072+
}
21073+
}
21074+
None => sel,
21075+
};
2099121076
let mut first_meta = self
2099221077
.table_meta(&from.first.name, from.first.alias.as_deref())
2099321078
.map_err(|e| {

tests/seek_scalar_subquery.rs

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
//! B9e — a comparison against a *non-correlated scalar subquery* seeks the index,
2+
//! exactly as a constant would. `WHERE b = (SELECT max(x) FROM u)` reads
3+
//! `SEARCH t USING INDEX tb (b=?)` (plus the existing `SCALAR SUBQUERY N` child) in
4+
//! SQLite, which evaluates the subquery once and plans a seek; graphite used to SCAN
5+
//! because the eq-collector required a constant RHS. The executor folds the
6+
//! subquery to its value before the seek; `eqp_access` recognizes the shape
7+
//! *structurally* (without running the subquery, matching SQLite — `EXPLAIN` never
8+
//! evaluates it, so even `b = (SELECT 1/0)` plans a `SEARCH`). Equality and range,
9+
//! secondary index and INTEGER PRIMARY KEY.
10+
//!
11+
//! A *correlated* body, `EXISTS`, `IN (SELECT)`, and a bare-column subquery
12+
//! (`(SELECT x FROM u)` — folding it would drop the column's affinity) do NOT seek —
13+
//! the outer table stays a `SCAN`, and the full `WHERE` is re-applied so rows are
14+
//! exact regardless. Verified vs the sqlite3 3.50.4 CLI.
15+
16+
#![cfg(feature = "std")]
17+
18+
use std::process::Command;
19+
20+
fn sqlite3_available() -> bool {
21+
Command::new("sqlite3").arg("--version").output().is_ok()
22+
}
23+
24+
fn plan(bin: &str, base: &str, sql: &str) -> String {
25+
let full = format!("{base} EXPLAIN QUERY PLAN {sql}");
26+
let out = Command::new(bin)
27+
.arg(":memory:")
28+
.arg(&full)
29+
.output()
30+
.unwrap();
31+
String::from_utf8_lossy(&out.stdout)
32+
.lines()
33+
.filter(|l| !l.trim().is_empty() && !l.starts_with("QUERY PLAN"))
34+
.map(|l| l.trim_start_matches(|c: char| " |`*+_-".contains(c)))
35+
.collect::<Vec<_>>()
36+
.join("#")
37+
}
38+
39+
fn rows(bin: &str, base: &str, sql: &str) -> String {
40+
let full = format!("{base} {sql}");
41+
let out = Command::new(bin)
42+
.arg(":memory:")
43+
.arg(&full)
44+
.output()
45+
.unwrap();
46+
String::from_utf8_lossy(&out.stdout).trim_end().to_string()
47+
}
48+
49+
const SCHEMA: &str = "CREATE TABLE t(a INTEGER PRIMARY KEY, b, c); CREATE INDEX tb ON t(b); \
50+
CREATE TABLE u(x,y); CREATE INDEX ux ON u(x);";
51+
52+
#[test]
53+
fn scalar_subquery_operand_seeks_like_sqlite() {
54+
if !sqlite3_available() {
55+
eprintln!("sqlite3 CLI not found; skipping");
56+
return;
57+
}
58+
let g = env!("CARGO_BIN_EXE_graphitesql");
59+
for q in [
60+
"SELECT * FROM t WHERE b=(SELECT max(x) FROM u)", // secondary index equality
61+
"SELECT * FROM t WHERE a=(SELECT max(x) FROM u)", // INTEGER PRIMARY KEY
62+
"SELECT * FROM t WHERE b>(SELECT max(x) FROM u)", // range
63+
"SELECT * FROM t WHERE b=(SELECT max(x) FROM u) AND c=1",
64+
"SELECT * FROM t WHERE b=(SELECT 5)", // constant subquery
65+
"SELECT c FROM t WHERE b=(SELECT max(x) FROM u)", // covering
66+
"SELECT * FROM t WHERE b=(SELECT 1/0)", // EXPLAIN never runs the subquery
67+
] {
68+
assert_eq!(
69+
plan("sqlite3", SCHEMA, q),
70+
plan(g, SCHEMA, q),
71+
"plan for {q}"
72+
);
73+
}
74+
}
75+
76+
#[test]
77+
fn non_seekable_subquery_shapes_stay_scan() {
78+
// A correlated / EXISTS / IN (SELECT) / bare-column subquery must not seek the
79+
// outer table — graphite keeps the SCAN (results still correct via the WHERE
80+
// re-apply). (SQLite renders extra CORRELATED / LIST SUBQUERY nodes graphite does
81+
// not model, so we assert only that the outer access is not a SEARCH.)
82+
let g = env!("CARGO_BIN_EXE_graphitesql");
83+
for q in [
84+
"SELECT * FROM t WHERE b=(SELECT y FROM u WHERE x=t.a)", // correlated
85+
"SELECT * FROM t WHERE b IN (SELECT x FROM u)", // IN (SELECT)
86+
"SELECT * FROM t WHERE EXISTS(SELECT 1 FROM u WHERE x=b)", // correlated EXISTS
87+
"SELECT * FROM t WHERE b=(SELECT x FROM u)", // bare-column projection
88+
] {
89+
let got = plan(g, SCHEMA, q);
90+
assert!(
91+
!got.contains("SEARCH t "),
92+
"{q} should not seek the outer table t, got {got:?}"
93+
);
94+
}
95+
}
96+
97+
#[test]
98+
fn scalar_subquery_seek_rows_match() {
99+
if !sqlite3_available() {
100+
eprintln!("sqlite3 CLI not found; skipping");
101+
return;
102+
}
103+
let g = env!("CARGO_BIN_EXE_graphitesql");
104+
let base = format!(
105+
"{SCHEMA} INSERT INTO t VALUES(1,10,1),(2,20,2),(3,30,3),(4,20,4); \
106+
INSERT INTO u VALUES(20,1),(10,2),(5,3);"
107+
);
108+
for q in [
109+
"SELECT a FROM t WHERE b=(SELECT max(x) FROM u) ORDER BY a",
110+
"SELECT a FROM t WHERE a=(SELECT max(x) FROM u) ORDER BY a",
111+
"SELECT a FROM t WHERE b>(SELECT min(x) FROM u) ORDER BY a",
112+
"SELECT count(*) FROM t WHERE b=(SELECT max(x) FROM u)",
113+
"SELECT a FROM t WHERE b=(SELECT max(x) FROM u WHERE x>999) ORDER BY a", // NULL → no rows
114+
"SELECT a FROM t WHERE b=(SELECT x FROM u ORDER BY x DESC LIMIT 1) ORDER BY a",
115+
] {
116+
assert_eq!(rows("sqlite3", &base, q), rows(g, &base, q), "rows for {q}");
117+
}
118+
}

0 commit comments

Comments
 (0)