Skip to content

Commit cbff5e1

Browse files
MagicalTuxclaude
andcommitted
fix(planner): apply the COLLATE-mismatch seek check to range bounds too
A range bound whose explicit COLLATE differs from the column's collation (`b > 'x' COLLATE NOCASE` over a BINARY index) now scans like SQLite instead of seeking the index, and a BETWEEN keeps only the bound whose collation matches (`b BETWEEN 'a' COLLATE NOCASE AND 'z'` → seek `b < 'z'` only). Mirrors the equality fix, in the shared collect_range_constraints, so the executor seek and the EQP label stay in lockstep. (`IN (…) COLLATE` is left as-is — SQLite still seeks there.) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 1c4d065 commit cbff5e1

3 files changed

Lines changed: 35 additions & 6 deletions

File tree

ROADMAP.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -482,8 +482,10 @@ byte-exact vs the pinned `sqlite3` 3.50.4 oracle. Capability summary:
482482
graphite SCANs as SQLite does; this also fixed a row-**ordering** bug, since the
483483
mis-applied seek had wrongly credited its `(b, rowid)` walk for a sole `ORDER BY` on
484484
the rowid (a NOCASE match spans several binary keys, so the rows are not in rowid
485-
order). The check lives in the shared `collect_eq_constraints`, so the seek, the
486-
EQP label, and the ORDER-BY credit stay in lockstep.
485+
order). The check lives in the shared `collect_eq_constraints` /
486+
`collect_range_constraints` (a range bound and a `BETWEEN` bound get it per-bound,
487+
so `b BETWEEN 'a' COLLATE NOCASE AND 'z'` keeps only the matching `b<'z'` bound, as
488+
SQLite does), so the seek, the EQP label, and the ORDER-BY credit stay in lockstep.
487489
The `NOT INDEXED` planner hint is now honored in the *plan* (the executor already
488490
honored it, so rows were always correct): SQLite forbids every *secondary* index on
489491
that table, so a `WHERE` seek, covering scan, ORDER-BY index walk, and MULTI-INDEX OR

src/exec/mod.rs

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30363,12 +30363,23 @@ fn collect_range_constraints(
3036330363
BinaryOp::Lt | BinaryOp::LtEq | BinaryOp::Gt | BinaryOp::GtEq
3036430364
) =>
3036530365
{
30366+
// A bound whose value carries an explicit `COLLATE` differing from the
30367+
// column's collation cannot seek that column's index (its keys order for a
30368+
// different collation), so SQLite ignores it for the seek — same rule as
30369+
// the equality collector.
30370+
let coll_ok = |ci: usize, val: &Expr| {
30371+
explicit_collation(val).is_none_or(|c| c == columns[ci].collation)
30372+
};
3036630373
if let (Some(ci), Some(v)) = (col_index(left, columns), const_value(right, params)) {
30367-
apply_bound(out.entry(ci).or_default(), *op, v);
30374+
if coll_ok(ci, right) {
30375+
apply_bound(out.entry(ci).or_default(), *op, v);
30376+
}
3036830377
} else if let (Some(ci), Some(v)) =
3036930378
(col_index(right, columns), const_value(left, params))
3037030379
{
30371-
apply_bound(out.entry(ci).or_default(), flip_cmp(*op), v);
30380+
if coll_ok(ci, left) {
30381+
apply_bound(out.entry(ci).or_default(), flip_cmp(*op), v);
30382+
}
3037230383
}
3037330384
}
3037430385
Expr::Between {
@@ -30378,11 +30389,13 @@ fn collect_range_constraints(
3037830389
negated: false,
3037930390
} => {
3038030391
if let Some(ci) = col_index(expr, columns) {
30392+
let coll_ok =
30393+
|val: &Expr| explicit_collation(val).is_none_or(|c| c == columns[ci].collation);
3038130394
let b = out.entry(ci).or_default();
30382-
if let Some(v) = const_value(low, params) {
30395+
if let (Some(v), true) = (const_value(low, params), coll_ok(low)) {
3038330396
apply_bound(b, BinaryOp::GtEq, v);
3038430397
}
30385-
if let Some(v) = const_value(high, params) {
30398+
if let (Some(v), true) = (const_value(high, params), coll_ok(high)) {
3038630399
apply_bound(b, BinaryOp::LtEq, v);
3038730400
}
3038830401
}

tests/seek_collation_mismatch.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,16 @@ fn collation_mismatch_scans_matching_seeks() {
5858
}
5959
let g = env!("CARGO_BIN_EXE_graphitesql");
6060
// BINARY column / index: an explicit NOCASE differs → SCAN; BINARY (or absent) → SEARCH.
61+
// Equality and range alike; a BETWEEN keeps only the bound whose collation matches.
6162
for q in [
6263
"SELECT * FROM t WHERE b='x' COLLATE NOCASE",
6364
"SELECT * FROM t WHERE b='x' COLLATE BINARY",
6465
"SELECT * FROM t WHERE b='x'",
66+
"SELECT * FROM t WHERE b>'x' COLLATE NOCASE",
67+
"SELECT * FROM t WHERE b>'x' COLLATE BINARY",
68+
"SELECT * FROM t WHERE b>'x'",
69+
"SELECT * FROM t WHERE b BETWEEN 'a' COLLATE NOCASE AND 'z'",
70+
"SELECT * FROM t WHERE b BETWEEN 'a' AND 'z'",
6571
] {
6672
assert_eq!(
6773
plan("sqlite3", BINARY_COL, q),
@@ -102,6 +108,14 @@ fn collation_mismatch_orders_rows_correctly() {
102108
"SELECT a,b FROM t WHERE b='x' COLLATE BINARY ORDER BY a",
103109
),
104110
(BINARY_COL, "SELECT a,b FROM t WHERE b='x' ORDER BY a"),
111+
(
112+
BINARY_COL,
113+
"SELECT a FROM t WHERE b>'x' COLLATE NOCASE ORDER BY a",
114+
),
115+
(
116+
BINARY_COL,
117+
"SELECT a FROM t WHERE b BETWEEN 'a' COLLATE NOCASE AND 'z' ORDER BY a",
118+
),
105119
(
106120
NOCASE_COL,
107121
"SELECT a,b FROM t WHERE b='x' COLLATE BINARY ORDER BY a",

0 commit comments

Comments
 (0)