Skip to content

Commit 774e996

Browse files
MagicalTuxclaude
andcommitted
fix(exec): eagerly reject a bad column in a window OVER PARTITION BY / ORDER BY
A bad column reference in a window `OVER` clause's `PARTITION BY` / `ORDER BY` (or a named `WINDOW …` definition) is a prepare-time `no such column` in SQLite, raised even over an empty or fully-filtered table. graphite's eager column validators bailed on any window query, and the VDBE window execution path bypasses them too, so this was resolved only lazily, per row — missed when no row is reached. A window partition/order term binds strictly to a base column of the `FROM` (never an output alias — `PARTITION BY <alias>` is `no such column` in SQLite), so the new `validate_window_over_columns` resolves each term against the source schema (metadata only, no row scan) with no alias-scope subtlety. It gathers every window spec — the `WINDOW`-clause definitions plus each window function's inline `OVER` spec in the projection and top-level `ORDER BY` — and is wired into both `run_core`'s validator block and the `run_select_vdbe` window path (which otherwise bypasses it). Conservatively limited to plain base-table / view sources. Verified vs sqlite3 3.50.4 (`tests/eager_window_over_columns.rs`). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 692b0e2 commit 774e996

3 files changed

Lines changed: 171 additions & 2 deletions

File tree

ROADMAP.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1518,8 +1518,9 @@ reasonable order:
15181518
pass for lazy-validation gaps is **essentially done**: a 2026-07 exhaustive
15191519
differential sweep (subqueries, derived tables, CTEs, USING/NATURAL, GROUP BY /
15201520
HAVING, ORDER-BY subqueries, compound arms, DML/RETURNING/upsert, INDEX/CHECK
1521-
expr — all over empty/filtered input) found only one residual, the FROM-less
1522-
multi-row `VALUES`-in-`IN` case, now closed too. Only compound bodies with
1521+
expr — all over empty/filtered input) found three residuals, all now closed: the
1522+
FROM-less multi-row `VALUES`-in-`IN` case, the LHS of a nested `IN (SELECT)`, and
1523+
window `OVER` `PARTITION BY` / `ORDER BY` columns. Only compound bodies with
15231524
per-arm `FROM`s remain deliberately lazy.
15241525
6. **`EXPLAIN QUERY PLAN` fidelity (Track B) — essentially closed.** The whole B9
15251526
cluster shipped in 2026-07 (B9a incl. the seekable-`IN` render, B9c–B9g, the B9d

src/exec/mod.rs

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,6 +1434,11 @@ impl Connection {
14341434
for (_, spec) in &sel.window_defs {
14351435
reject_window_in_windowspec(spec)?;
14361436
}
1437+
// A bad column in a window `PARTITION BY` / `ORDER BY` is a
1438+
// prepare-time `no such column` in SQLite, but this path bypasses
1439+
// `run_core`'s eager validators, so re-check it here (else the query
1440+
// would run and silently return rows over an empty/filtered input).
1441+
self.validate_window_over_columns(sel)?;
14371442
}
14381443
return self.run_window_vdbe(sel);
14391444
}
@@ -20014,6 +20019,7 @@ impl Connection {
2001420019
if self.outer_scope.borrow().is_empty() {
2001520020
self.validate_nested_ambiguity(sel, &columns)?;
2001620021
self.validate_columns_exist(sel, &columns)?;
20022+
self.validate_window_over_columns(sel)?;
2001720023
self.validate_derived_columns(sel, &columns)?;
2001820024
self.validate_join_derived_columns(sel)?;
2001920025
// A column reference inside an expression-position subquery body that
@@ -21023,6 +21029,102 @@ impl Connection {
2102321029
}
2102421030
}
2102521031

21032+
/// Eager `no such column` check for the `PARTITION BY` / `ORDER BY` terms of a
21033+
/// window `OVER` clause (and named `WINDOW …` definition).
21034+
/// [`Self::validate_columns_exist`] bails on any window query, so its column
21035+
/// references were resolved only lazily and missed over an empty/filtered
21036+
/// input. A window partition/order term binds strictly to a base column of the
21037+
/// `FROM` (never an output alias — `PARTITION BY <alias>` is `no such column`
21038+
/// in SQLite), so it resolves against the scanned source `columns` exactly like
21039+
/// the base-column targets. Conservatively limited to plain base-table / view
21040+
/// sources (a subquery / TVF / vtab / `NATURAL`/`USING` source bails, never a
21041+
/// false positive).
21042+
fn validate_window_over_columns(&self, sel: &Select) -> Result<()> {
21043+
if !window::has_window(sel) {
21044+
return Ok(());
21045+
}
21046+
let Some(from) = &sel.from else { return Ok(()) };
21047+
let mut srcs = alloc::vec![&from.first];
21048+
for j in &from.joins {
21049+
if j.natural || !j.using.is_empty() {
21050+
return Ok(());
21051+
}
21052+
srcs.push(&j.table);
21053+
}
21054+
// Resolve the base column set from schema metadata (no row scan, so the
21055+
// check is cheap even when the VDBE window path calls it before executing).
21056+
// Any source that can't be resolved from metadata alone — a subquery, TVF,
21057+
// schema-qualified, or virtual table — bails the whole check (never a false
21058+
// positive).
21059+
let mut columns: Vec<ColumnInfo> = Vec::new();
21060+
for s in &srcs {
21061+
if s.subquery.is_some()
21062+
|| s.tvf_args.is_some()
21063+
|| s.schema.is_some()
21064+
|| s.name.is_empty()
21065+
{
21066+
return Ok(());
21067+
}
21068+
let Some(cols) = self.source_columns_of(s) else {
21069+
return Ok(());
21070+
};
21071+
let label = s.alias.clone().unwrap_or_else(|| s.name.clone());
21072+
for (name, _) in cols {
21073+
columns.push(ColumnInfo {
21074+
name,
21075+
table: label.clone(),
21076+
schema: None,
21077+
affinity: eval::Affinity::Blob,
21078+
collation: crate::value::Collation::Binary,
21079+
hidden: false,
21080+
});
21081+
}
21082+
}
21083+
let columns = &columns[..];
21084+
// Every window spec in play: the `WINDOW`-clause definitions plus each
21085+
// window function's inline `OVER (…)` spec found in the projection or the
21086+
// top-level `ORDER BY`. `window::visit` stops at nested subqueries (they
21087+
// validate their own specs), so only this query level is gathered.
21088+
let mut specs: Vec<WindowSpec> = sel.window_defs.iter().map(|(_, s)| s.clone()).collect();
21089+
let gather = |e: &Expr, specs: &mut Vec<WindowSpec>| {
21090+
window::visit(e, &mut |m| {
21091+
if let Expr::Function {
21092+
over: Some(spec), ..
21093+
} = m
21094+
{
21095+
specs.push(spec.clone());
21096+
}
21097+
});
21098+
};
21099+
for rc in &sel.columns {
21100+
if let ResultColumn::Expr { expr, .. } = rc {
21101+
gather(expr, &mut specs);
21102+
}
21103+
}
21104+
for t in &sel.order_by {
21105+
gather(&t.expr, &mut specs);
21106+
}
21107+
let mut parts: Vec<&Expr> = Vec::new();
21108+
for spec in &specs {
21109+
windowspec_parts(spec, &mut parts);
21110+
}
21111+
let mut missing: Option<Error> = None;
21112+
for e in parts {
21113+
if missing.is_some() {
21114+
break;
21115+
}
21116+
walk_shallow_columns(e, &mut |schema, table, column, quoted| {
21117+
if missing.is_none() && !column_resolves_scoped(columns, schema, table, column) {
21118+
missing = Some(eval::no_such_column(schema, table, column, quoted));
21119+
}
21120+
});
21121+
}
21122+
match missing {
21123+
Some(e) => Err(e),
21124+
None => Ok(()),
21125+
}
21126+
}
21127+
2102621128
/// Eager `no such column` check for a query whose *sole* `FROM` source is a
2102721129
/// derived table (a parenthesized subquery), the counterpart of
2102821130
/// [`Self::validate_columns_exist`] for a case that one bails on. SQLite

tests/eager_window_over_columns.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
//! A bad column reference in a window `OVER` clause's `PARTITION BY` / `ORDER BY`
2+
//! (or a named `WINDOW …` definition) is a prepare-time `no such column` in
3+
//! SQLite — raised even over an empty or fully-filtered table. graphite's eager
4+
//! column validators bailed on any window query, and the VDBE window path also
5+
//! bypasses them, so this was caught only lazily. A window partition/order term
6+
//! binds strictly to a base column of the `FROM` (never an output alias), so it
7+
//! resolves against the source schema. Verified against sqlite3 3.50.4.
8+
9+
#![cfg(feature = "std")]
10+
11+
use std::process::Command;
12+
13+
fn sqlite3_available() -> bool {
14+
Command::new("sqlite3").arg("--version").output().is_ok()
15+
}
16+
17+
fn rejects(bin: &str, base: &str, sql: &str) -> bool {
18+
let full = format!("{base} {sql}");
19+
let out = Command::new(bin)
20+
.arg(":memory:")
21+
.arg(&full)
22+
.output()
23+
.unwrap();
24+
let mut s = String::from_utf8_lossy(&out.stdout).into_owned();
25+
s.push_str(&String::from_utf8_lossy(&out.stderr));
26+
s.to_lowercase().contains("error")
27+
}
28+
29+
#[test]
30+
fn window_over_column_validation_matches_sqlite() {
31+
if !sqlite3_available() {
32+
eprintln!("sqlite3 CLI not found; skipping");
33+
return;
34+
}
35+
let g = env!("CARGO_BIN_EXE_graphitesql");
36+
let empty = "CREATE TABLE t(a, b);";
37+
let full = "CREATE TABLE t(a, b); INSERT INTO t VALUES(1, 2), (3, 4);\
38+
CREATE TABLE u(c, d); INSERT INTO u VALUES(1, 9);";
39+
let cases = [
40+
// Bad column in an OVER clause → rejected (empty table, so caught eagerly).
41+
(empty, "SELECT sum(a) OVER (PARTITION BY zzz) FROM t"),
42+
(empty, "SELECT sum(a) OVER (ORDER BY zzz) FROM t"),
43+
(empty, "SELECT sum(a) OVER (PARTITION BY a ORDER BY zzz) FROM t"),
44+
(empty, "SELECT sum(a) OVER w FROM t WINDOW w AS (PARTITION BY zzz)"),
45+
(empty, "SELECT 1 + sum(a) OVER (PARTITION BY zzz) FROM t"),
46+
// An output alias is NOT a valid window partition/order term.
47+
(full, "SELECT a AS x, sum(b) OVER (PARTITION BY x) FROM t"),
48+
// Valid — must not be a false positive.
49+
(full, "SELECT sum(a) OVER (PARTITION BY b ORDER BY a) FROM t"),
50+
(full, "SELECT sum(a) OVER (PARTITION BY a + 0) FROM t"),
51+
(full, "SELECT sum(b) OVER (PARTITION BY rowid) FROM t"),
52+
(full, "SELECT sum(b) OVER w FROM t WINDOW w AS (ORDER BY a)"),
53+
(
54+
full,
55+
"SELECT t.a, sum(d) OVER (PARTITION BY t.b) FROM t JOIN u ON t.a = u.c",
56+
),
57+
(full, "SELECT row_number() OVER (ORDER BY a), a FROM t ORDER BY a"),
58+
];
59+
for (base, q) in cases {
60+
assert_eq!(
61+
rejects("sqlite3", base, q),
62+
rejects(g, base, q),
63+
"reject-parity for `{q}`"
64+
);
65+
}
66+
}

0 commit comments

Comments
 (0)