Skip to content

Commit 9dde5ea

Browse files
authored
fix: don't panic if between bounds are reversed (#3706)
If a user queried with `BETWEEN` and the `start` came after the `end` then it was possible for a btree index to generate a panic. We should not panic and should instead return 0 results.
1 parent 0b6a2ed commit 9dde5ea

2 files changed

Lines changed: 37 additions & 1 deletion

File tree

python/python/tests/test_scalar_index.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def test_indexed_scalar_scan(indexed_dataset: lance.LanceDataset, data_table: pa
111111

112112

113113
def test_indexed_between(tmp_path):
114-
dataset = lance.write_dataset(pa.table({"val": range(100)}), tmp_path)
114+
dataset = lance.write_dataset(pa.table({"val": range(0, 10000)}), tmp_path)
115115
dataset.create_scalar_index("val", index_type="BTREE")
116116

117117
scanner = dataset.scanner(filter="val BETWEEN 10 AND 20", prefilter=True)
@@ -128,6 +128,23 @@ def test_indexed_between(tmp_path):
128128
actual_data = scanner.to_table()
129129
assert actual_data.num_rows == 11
130130

131+
# The following cases are slightly ill-formed since end is before start
132+
# but we should handle them gracefully and simply return an empty result
133+
# (previously we panicked here)
134+
scanner = dataset.scanner(filter="val >= 5000 AND val <= 0", prefilter=True)
135+
136+
assert "MaterializeIndex" in scanner.explain_plan()
137+
138+
actual_data = scanner.to_table()
139+
assert actual_data.num_rows == 0
140+
141+
scanner = dataset.scanner(filter="val BETWEEN 5000 AND 0", prefilter=True)
142+
143+
assert "MaterializeIndex" in scanner.explain_plan()
144+
145+
actual_data = scanner.to_table()
146+
assert actual_data.num_rows == 0
147+
131148

132149
def test_temporal_index(tmp_path):
133150
# Timestamps

rust/lance-index/src/scalar/btree.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,25 @@ impl BTreeLookup {
636636
// matches an upper bound. This will all be moot if/when we merge pages.
637637
Bound::Excluded(upper) => Bound::Included(upper),
638638
};
639+
640+
match (lower_bound, upper_bound) {
641+
(Bound::Excluded(lower), Bound::Excluded(upper))
642+
| (Bound::Excluded(lower), Bound::Included(upper))
643+
| (Bound::Included(lower), Bound::Excluded(upper)) => {
644+
// It's not really clear what (Included(5), Excluded(5)) would mean so we
645+
// interpret it as an empty range which matches rust's BTreeMap behavior
646+
if lower >= upper {
647+
return vec![];
648+
}
649+
}
650+
(Bound::Included(lower), Bound::Included(upper)) => {
651+
if lower > upper {
652+
return vec![];
653+
}
654+
}
655+
_ => {}
656+
}
657+
639658
let candidates = self
640659
.tree
641660
.range((lower_bound, upper_bound))

0 commit comments

Comments
 (0)