Skip to content

Commit 96a951e

Browse files
committed
Reverse order scans
1 parent 2f117e4 commit 96a951e

15 files changed

Lines changed: 762 additions & 9 deletions

File tree

vortex-array/src/array/erased.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ use crate::arrays::DictArray;
3838
use crate::arrays::FilterArray;
3939
use crate::arrays::Null;
4040
use crate::arrays::Primitive;
41+
use crate::arrays::ReversedArray;
4142
use crate::arrays::SliceArray;
4243
use crate::arrays::VarBin;
4344
use crate::arrays::VarBinView;
@@ -212,6 +213,19 @@ impl ArrayRef {
212213
.optimize()
213214
}
214215

216+
/// Wraps the array in a [`ReversedArray`] so that it is logically reversed.
217+
///
218+
/// The optimizer is applied immediately, eliminating the wrapper for known encodings:
219+
///
220+
/// * `Reversed(Reversed(x)) → x` — double reversal cancels out.
221+
/// * `Reversed(Dict(codes, values)) → Dict(Reversed(codes), values)` — only the
222+
/// codes array is reversed; the values dictionary is reused unchanged.
223+
pub fn reverse(&self) -> VortexResult<ArrayRef> {
224+
ReversedArray::try_new(self.clone())?
225+
.into_array()
226+
.optimize()
227+
}
228+
215229
/// Fetch the scalar at the given index.
216230
#[deprecated(
217231
note = "Use `execute_scalar` instead, which allows passing an execution context for more \

vortex-array/src/arrays/chunked/compute/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ mod fill_null;
77
mod filter;
88
pub(crate) mod kernel;
99
mod mask;
10+
mod reverse;
1011
pub(crate) mod rules;
1112
mod slice;
1213
mod take;
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_error::VortexResult;
5+
6+
use crate::ArrayRef;
7+
use crate::IntoArray as _;
8+
use crate::array::ArrayView;
9+
use crate::arrays::Chunked;
10+
use crate::arrays::ChunkedArray;
11+
use crate::arrays::chunked::ChunkedArrayExt as _;
12+
use crate::arrays::reversed::ReverseReduce;
13+
14+
/// Reverses a `ChunkedArray` by reversing the chunk order and lazily reversing each chunk.
15+
///
16+
/// Transforms `Reversed(Chunked([c0, c1, …, cn]))` into
17+
/// `Chunked([reverse(cn), …, reverse(c1), reverse(c0)])`.
18+
///
19+
/// This avoids eagerly merging all chunks into a single canonical array before reversing.
20+
/// Each per-chunk `reverse()` call goes through the optimizer, so further reduce rules
21+
/// (e.g. `Dict` codes-only reversal) still fire on individual chunks.
22+
impl ReverseReduce for Chunked {
23+
fn reverse(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
24+
let dtype = array.as_ref().dtype().clone();
25+
let reversed_chunks = array
26+
.chunks()
27+
.into_iter()
28+
.rev()
29+
.map(|chunk| chunk.reverse())
30+
.collect::<VortexResult<Vec<ArrayRef>>>()?;
31+
// SAFETY: all chunks come from the original ChunkedArray and share its DType;
32+
// reversing order and wrapping in Reversed preserves the invariant.
33+
Ok(Some(
34+
unsafe { ChunkedArray::new_unchecked(reversed_chunks, dtype) }.into_array(),
35+
))
36+
}
37+
}

vortex-array/src/arrays/chunked/compute/rules.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use crate::arrays::ConstantArray;
1414
use crate::arrays::ScalarFn;
1515
use crate::arrays::ScalarFnArray;
1616
use crate::arrays::chunked::ChunkedArrayExt;
17+
use crate::arrays::reversed::ReverseReduceAdaptor;
1718
use crate::arrays::scalar_fn::AnyScalarFn;
1819
use crate::arrays::scalar_fn::ScalarFnArrayExt;
1920
use crate::optimizer::ArrayOptimizer;
@@ -27,6 +28,7 @@ pub(crate) const PARENT_RULES: ParentRuleSet<Chunked> = ParentRuleSet::new(&[
2728
ParentRuleSet::lift(&ChunkedUnaryScalarFnPushDownRule),
2829
ParentRuleSet::lift(&ChunkedConstantScalarFnPushDownRule),
2930
ParentRuleSet::lift(&FillNullReduceAdaptor(Chunked)),
31+
ParentRuleSet::lift(&ReverseReduceAdaptor(Chunked)),
3032
]);
3133

3234
/// Push down any unary scalar function through chunked arrays.

vortex-array/src/arrays/dict/compute/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub(crate) mod is_sorted;
99
mod like;
1010
mod mask;
1111
pub(crate) mod min_max;
12+
mod reverse;
1213
pub(crate) mod rules;
1314
mod slice;
1415

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_error::VortexResult;
5+
6+
use crate::ArrayRef;
7+
use crate::IntoArray as _;
8+
use crate::array::ArrayView;
9+
use crate::arrays::Dict;
10+
use crate::arrays::DictArray;
11+
use crate::arrays::dict::DictArraySlotsExt as _;
12+
use crate::arrays::reversed::ReverseReduce;
13+
14+
/// Reverses a `DictArray` by reversing only the codes array.
15+
///
16+
/// The values dictionary is reused unchanged. Since codes are typically small
17+
/// integers (`u8` or `u16`), the reversal is O(n_codes) rather than O(n_rows × value_size).
18+
///
19+
/// # Example
20+
///
21+
/// For `Dict(codes=[2,2,1,1,0,0], values=[A, B, C])` → decoded `[C,C,B,B,A,A]`:
22+
/// `Dict(codes=[0,0,1,1,2,2], values=[A, B, C])` → decoded `[A,A,B,B,C,C]` ✓
23+
impl ReverseReduce for Dict {
24+
fn reverse(array: ArrayView<'_, Self>) -> VortexResult<Option<ArrayRef>> {
25+
let reversed_codes = array.codes().reverse()?;
26+
// SAFETY: reversing codes doesn't change the dict invariants; the values
27+
// dictionary is untouched and all code indices remain valid.
28+
Ok(Some(
29+
unsafe { DictArray::new_unchecked(reversed_codes, array.values().clone()) }
30+
.into_array(),
31+
))
32+
}
33+
}

vortex-array/src/arrays/dict/compute/rules.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use crate::arrays::ScalarFn;
1717
use crate::arrays::ScalarFnArray;
1818
use crate::arrays::dict::DictArraySlotsExt;
1919
use crate::arrays::filter::FilterReduceAdaptor;
20+
use crate::arrays::reversed::ReverseReduceAdaptor;
2021
use crate::arrays::scalar_fn::AnyScalarFn;
2122
use crate::arrays::scalar_fn::ScalarFnArrayExt;
2223
use crate::arrays::slice::SliceReduceAdaptor;
@@ -38,6 +39,7 @@ pub(crate) const PARENT_RULES: ParentRuleSet<Dict> = ParentRuleSet::new(&[
3839
ParentRuleSet::lift(&LikeReduceAdaptor(Dict)),
3940
ParentRuleSet::lift(&DictionaryScalarFnValuesPushDownRule),
4041
ParentRuleSet::lift(&DictionaryScalarFnCodesPullUpRule),
42+
ParentRuleSet::lift(&ReverseReduceAdaptor(Dict)),
4143
ParentRuleSet::lift(&SliceReduceAdaptor(Dict)),
4244
]);
4345

vortex-array/src/arrays/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ pub mod primitive;
7474
pub use primitive::Primitive;
7575
pub use primitive::PrimitiveArray;
7676

77+
pub mod reversed;
78+
pub use reversed::Reversed;
79+
pub use reversed::ReversedArray;
80+
7781
pub mod scalar_fn;
7882
pub use scalar_fn::ScalarFn;
7983
pub use scalar_fn::ScalarFnArray;
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_error::VortexExpect as _;
5+
use vortex_error::VortexResult;
6+
7+
use crate::ArrayRef;
8+
use crate::array::{Array, ArrayParts, EmptyArrayData, TypedArrayRef};
9+
use crate::arrays::Reversed;
10+
11+
/// Slot index for the inner (to-be-reversed) child array.
12+
pub(super) const CHILD_SLOT: usize = 0;
13+
pub(super) const NUM_SLOTS: usize = 1;
14+
pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["child"];
15+
16+
/// Extension trait for accessing [`ReversedArray`](crate::arrays::ReversedArray) properties.
17+
pub trait ReversedArrayExt: TypedArrayRef<Reversed> {
18+
/// Returns the inner array whose elements will be yielded in reverse order.
19+
fn child(&self) -> &ArrayRef {
20+
self.as_ref().slots()[CHILD_SLOT]
21+
.as_ref()
22+
.vortex_expect("validated ReversedArray child slot")
23+
}
24+
}
25+
26+
impl<T: TypedArrayRef<Reversed>> ReversedArrayExt for T {}
27+
28+
impl Array<Reversed> {
29+
/// Wraps `child` in a [`ReversedArray`](crate::arrays::ReversedArray).
30+
pub fn try_new(child: ArrayRef) -> VortexResult<Self> {
31+
let dtype = child.dtype().clone();
32+
let len = child.len();
33+
Array::try_from_parts(
34+
ArrayParts::new(Reversed, dtype, len, EmptyArrayData).with_slots(vec![Some(child)]),
35+
)
36+
}
37+
38+
/// Wraps `child` in a [`ReversedArray`](crate::arrays::ReversedArray) without validation.
39+
///
40+
/// # Safety
41+
///
42+
/// Caller must ensure `child` is a valid array.
43+
pub unsafe fn new_unchecked(child: ArrayRef) -> Self {
44+
let dtype = child.dtype().clone();
45+
let len = child.len();
46+
unsafe {
47+
Array::from_parts_unchecked(
48+
ArrayParts::new(Reversed, dtype, len, EmptyArrayData).with_slots(vec![Some(child)]),
49+
)
50+
}
51+
}
52+
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_buffer::BitBuffer;
5+
use vortex_buffer::Buffer;
6+
use vortex_error::VortexResult;
7+
8+
use crate::arrays::BoolArray;
9+
use crate::arrays::PrimitiveArray;
10+
use crate::arrays::StructArray;
11+
use crate::arrays::bool::BoolArrayExt as _;
12+
use crate::arrays::primitive::PrimitiveArrayExt as _;
13+
use crate::arrays::struct_::StructArrayExt as _;
14+
use crate::canonical::Canonical;
15+
use crate::executor::ExecutionCtx;
16+
use crate::match_each_native_ptype;
17+
use crate::validity::Validity;
18+
use crate::{ArrayRef, IntoArray as _};
19+
20+
/// Reverses a canonical array, dispatching to type-specific fast paths where possible.
21+
///
22+
/// Fast paths:
23+
/// - `Bool`: reverses the bit buffer directly via `value_unchecked` — O(n), no extra allocation.
24+
/// - `Primitive`: reverses the element buffer directly — O(n), no extra allocation.
25+
/// - `Struct`: reverses each field lazily via [`ArrayRef::reverse`] — allows per-field
26+
/// optimisations (e.g. the `Dict` reduce rule fires on dict-encoded fields).
27+
///
28+
/// All other canonical variants fall back to a reversed-index `take`, which is equivalent
29+
/// to the generic path but is deferred to decode time.
30+
pub(super) fn reverse_canonical(
31+
child: &ArrayRef,
32+
ctx: &mut ExecutionCtx,
33+
) -> VortexResult<ArrayRef> {
34+
let n = child.len();
35+
if n <= 1 {
36+
return Ok(child.clone());
37+
}
38+
39+
let canonical = child.clone().execute::<Canonical>(ctx)?;
40+
Ok(match canonical {
41+
Canonical::Bool(a) => reverse_bool(&a)?.into_array(),
42+
Canonical::Primitive(a) => reverse_primitive(&a)?.into_array(),
43+
Canonical::Struct(a) => reverse_struct(&a)?.into_array(),
44+
// All other canonical types: reverse via take with reversed indices.
45+
_ => {
46+
let indices = PrimitiveArray::from_iter((0u64..n as u64).rev()).into_array();
47+
child.take(indices)?
48+
}
49+
})
50+
}
51+
52+
/// Reverses a `BoolArray` by reading each bit in reverse order.
53+
///
54+
/// Uses `value_unchecked` for O(n) direct bit access with no intermediate `Vec` allocation,
55+
/// and correctly handles the buffer's bit offset.
56+
fn reverse_bool(array: &BoolArray) -> VortexResult<BoolArray> {
57+
let validity = reverse_validity(array.validity()?)?;
58+
let bits = array.to_bit_buffer();
59+
let n = bits.len();
60+
let reversed = BitBuffer::collect_bool(n, |i| {
61+
// SAFETY: `n - 1 - i` is in `[0, n)` since `i` is in `[0, n)`.
62+
unsafe { bits.value_unchecked(n - 1 - i) }
63+
});
64+
Ok(BoolArray::new(reversed, validity))
65+
}
66+
67+
/// Reverses a `PrimitiveArray` by iterating the typed buffer backwards.
68+
///
69+
/// This is O(n × element_width) and sequential in both reads and writes, so it is
70+
/// highly cache-friendly and eligible for auto-vectorisation.
71+
fn reverse_primitive(array: &PrimitiveArray) -> VortexResult<PrimitiveArray> {
72+
let validity = reverse_validity(array.validity()?)?;
73+
match_each_native_ptype!(array.ptype(), |T| {
74+
let reversed: Vec<T> = array.as_slice::<T>().iter().rev().copied().collect();
75+
Ok(PrimitiveArray::new(Buffer::from(reversed), validity))
76+
})
77+
}
78+
79+
/// Reverses a `StructArray` by lazily reversing each child field.
80+
///
81+
/// Each field is reversed via [`ArrayRef::reverse`], which in turn runs the optimizer.
82+
/// For dict-encoded fields this fires the `ReverseReduce for Dict` rule, so only the
83+
/// (small) codes array is reversed; the values dictionary remains untouched.
84+
fn reverse_struct(array: &StructArray) -> VortexResult<StructArray> {
85+
let validity = reverse_validity(array.struct_validity())?;
86+
let names = array.names().clone();
87+
let n = array.len();
88+
let reversed_fields = array
89+
.iter_unmasked_fields()
90+
.map(|field| field.reverse())
91+
.collect::<VortexResult<Vec<ArrayRef>>>()?;
92+
StructArray::try_new(names, reversed_fields, n, validity)
93+
}
94+
95+
/// Reverses a [`Validity`] value.
96+
///
97+
/// `NonNullable`, `AllValid`, and `AllInvalid` are identity under reversal.
98+
/// `Array` variants are reversed lazily: `arr.reverse()` creates a
99+
/// `ReversedArray` wrapper that is further optimised at decode time.
100+
fn reverse_validity(validity: Validity) -> VortexResult<Validity> {
101+
match validity {
102+
Validity::NonNullable => Ok(Validity::NonNullable),
103+
Validity::AllValid => Ok(Validity::AllValid),
104+
Validity::AllInvalid => Ok(Validity::AllInvalid),
105+
Validity::Array(arr) => Ok(Validity::Array(arr.reverse()?)),
106+
}
107+
}

0 commit comments

Comments
 (0)