Skip to content

Commit 638f0c1

Browse files
authored
Refactor DictEncoder to not use ArrayAccessors (#7759)
ArrayAccessor is not the fastest api and we can do better by accessing the value iterators directly
1 parent 9382028 commit 638f0c1

18 files changed

Lines changed: 429 additions & 250 deletions

File tree

encodings/sequence/src/compress.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,6 @@ fn encode_primitive_array<P: NativePType + Into<PValue> + CheckedAdd + CheckedSu
150150
mod tests {
151151
use std::sync::LazyLock;
152152

153-
#[expect(unused_imports)]
154-
use itertools::Itertools;
155153
use vortex_array::VortexSessionExecute;
156154
use vortex_array::arrays::PrimitiveArray;
157155
use vortex_array::assert_arrays_eq;

vortex-array/benches/dict_compare.rs

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,9 @@ fn bench_compare_primitive(bencher: divan::Bencher, (len, uniqueness): (usize, u
5757
)
5858
.unwrap();
5959
let value = primitive_arr.as_slice::<i32>()[0];
60-
let session = vortex_array::array_session();
6160

6261
bencher
63-
.with_inputs(|| (&dict, session.create_execution_ctx()))
62+
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
6463
.bench_refs(|(dict, ctx)| {
6564
dict.clone()
6665
.into_array()
@@ -81,10 +80,9 @@ fn bench_compare_varbin(bencher: divan::Bencher, (len, uniqueness): (usize, usiz
8180
.unwrap();
8281
let bytes = varbin_arr.with_iterator(|i| i.next().unwrap().unwrap().to_vec());
8382
let value = from_utf8(bytes.as_slice()).unwrap();
84-
let session = vortex_array::array_session();
8583

8684
bencher
87-
.with_inputs(|| (&dict, session.create_execution_ctx()))
85+
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
8886
.bench_refs(|(dict, ctx)| {
8987
dict.clone()
9088
.into_array()
@@ -105,10 +103,9 @@ fn bench_compare_varbinview(bencher: divan::Bencher, (len, uniqueness): (usize,
105103
.unwrap();
106104
let bytes = varbinview_arr.with_iterator(|i| i.next().unwrap().unwrap().to_vec());
107105
let value = from_utf8(bytes.as_slice()).unwrap();
108-
let session = vortex_array::array_session();
109106

110107
bencher
111-
.with_inputs(|| (&dict, session.create_execution_ctx()))
108+
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
112109
.bench_refs(|(dict, ctx)| {
113110
dict.clone()
114111
.into_array()
@@ -144,10 +141,9 @@ fn bench_compare_sliced_dict_primitive(
144141
.unwrap();
145142
let dict = dict.into_array().slice(0..codes_len).unwrap();
146143
let value = primitive_arr.as_slice::<i32>()[0];
147-
let session = vortex_array::array_session();
148144

149145
bencher
150-
.with_inputs(|| (&dict, session.create_execution_ctx()))
146+
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
151147
.bench_refs(|(dict, ctx)| {
152148
dict.clone()
153149
.apply(&eq(root(), lit(value)))
@@ -171,10 +167,9 @@ fn bench_compare_sliced_dict_varbinview(
171167
let dict = dict.into_array().slice(0..codes_len).unwrap();
172168
let bytes = varbin_arr.with_iterator(|i| i.next().unwrap().unwrap().to_vec());
173169
let value = from_utf8(bytes.as_slice()).unwrap();
174-
let session = vortex_array::array_session();
175170

176171
bencher
177-
.with_inputs(|| (&dict, session.create_execution_ctx()))
172+
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
178173
.bench_refs(|(dict, ctx)| {
179174
dict.clone()
180175
.apply(&eq(root(), lit(value)))

vortex-array/benches/dict_compress.rs

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -45,29 +45,30 @@ where
4545
T: NativePType,
4646
StandardUniform: Distribution<T>,
4747
{
48-
let primitive_arr = gen_primitive_for_dict::<T>(len, unique_values);
48+
let primitive_arr = gen_primitive_for_dict::<T>(len, unique_values).into_array();
4949

5050
bencher
5151
.with_inputs(|| (&primitive_arr, SESSION.create_execution_ctx()))
52-
.bench_refs(|(arr, ctx)| dict_encode(&arr.clone().into_array(), ctx));
52+
.bench_refs(|(arr, ctx)| dict_encode(arr, ctx));
5353
}
5454

5555
#[divan::bench(args = BENCH_ARGS)]
5656
fn encode_varbin(bencher: Bencher, (len, unique_values): (usize, usize)) {
57-
let varbin_arr = VarBinArray::from(gen_varbin_words(len, unique_values));
57+
let varbin_arr = VarBinArray::from(gen_varbin_words(len, unique_values)).into_array();
5858

5959
bencher
6060
.with_inputs(|| (&varbin_arr, SESSION.create_execution_ctx()))
61-
.bench_refs(|(arr, ctx)| dict_encode(&arr.clone().into_array(), ctx));
61+
.bench_refs(|(arr, ctx)| dict_encode(arr, ctx));
6262
}
6363

6464
#[divan::bench(args = BENCH_ARGS)]
6565
fn encode_varbinview(bencher: Bencher, (len, unique_values): (usize, usize)) {
66-
let varbinview_arr = VarBinViewArray::from_iter_str(gen_varbin_words(len, unique_values));
66+
let varbinview_arr =
67+
VarBinViewArray::from_iter_str(gen_varbin_words(len, unique_values)).into_array();
6768

6869
bencher
6970
.with_inputs(|| (&varbinview_arr, SESSION.create_execution_ctx()))
70-
.bench_refs(|(arr, ctx)| dict_encode(&arr.clone().into_array(), ctx));
71+
.bench_refs(|(arr, ctx)| dict_encode(arr, ctx));
7172
}
7273

7374
#[divan::bench(types = [u8, f32, i64], args = BENCH_ARGS)]
@@ -76,13 +77,10 @@ where
7677
T: NativePType,
7778
StandardUniform: Distribution<T>,
7879
{
79-
let primitive_arr = gen_primitive_for_dict::<T>(len, unique_values);
80-
let dict = dict_encode(
81-
&primitive_arr.into_array(),
82-
&mut SESSION.create_execution_ctx(),
83-
)
84-
.unwrap()
85-
.into_array();
80+
let primitive_arr = gen_primitive_for_dict::<T>(len, unique_values).into_array();
81+
let dict = dict_encode(&primitive_arr, &mut SESSION.create_execution_ctx())
82+
.unwrap()
83+
.into_array();
8684

8785
bencher
8886
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
@@ -91,13 +89,10 @@ where
9189

9290
#[divan::bench(args = BENCH_ARGS)]
9391
fn decode_varbin(bencher: Bencher, (len, unique_values): (usize, usize)) {
94-
let varbin_arr = VarBinArray::from(gen_varbin_words(len, unique_values));
95-
let dict = dict_encode(
96-
&varbin_arr.into_array(),
97-
&mut SESSION.create_execution_ctx(),
98-
)
99-
.unwrap()
100-
.into_array();
92+
let varbin_arr = VarBinArray::from(gen_varbin_words(len, unique_values)).into_array();
93+
let dict = dict_encode(&varbin_arr, &mut SESSION.create_execution_ctx())
94+
.unwrap()
95+
.into_array();
10196

10297
bencher
10398
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))
@@ -106,13 +101,11 @@ fn decode_varbin(bencher: Bencher, (len, unique_values): (usize, usize)) {
106101

107102
#[divan::bench(args = BENCH_ARGS)]
108103
fn decode_varbinview(bencher: Bencher, (len, unique_values): (usize, usize)) {
109-
let varbinview_arr = VarBinViewArray::from_iter_str(gen_varbin_words(len, unique_values));
110-
let dict = dict_encode(
111-
&varbinview_arr.into_array(),
112-
&mut SESSION.create_execution_ctx(),
113-
)
114-
.unwrap()
115-
.into_array();
104+
let varbinview_arr =
105+
VarBinViewArray::from_iter_str(gen_varbin_words(len, unique_values)).into_array();
106+
let dict = dict_encode(&varbinview_arr, &mut SESSION.create_execution_ctx())
107+
.unwrap()
108+
.into_array();
116109

117110
bencher
118111
.with_inputs(|| (&dict, SESSION.create_execution_ctx()))

vortex-array/src/arrays/dict/array.rs

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
use std::fmt::Display;
55
use std::fmt::Formatter;
66

7+
use num_traits::AsPrimitive;
78
use smallvec::smallvec;
89
use vortex_buffer::BitBuffer;
910
use vortex_error::VortexExpect;
@@ -159,28 +160,19 @@ pub trait DictArrayExt: TypedArrayRef<Dict> + DictArraySlotsExt {
159160
match codes_validity.bit_buffer() {
160161
AllOr::All => {
161162
match_each_integer_ptype!(codes_primitive.ptype(), |P| {
162-
#[allow(
163-
clippy::cast_possible_truncation,
164-
clippy::cast_sign_loss,
165-
reason = "codes are non-negative indices; a negative signed code would wrap to a large usize and panic on the bounds-checked array index"
166-
)]
167-
for &idx in codes_primitive.as_slice::<P>() {
168-
values_vec[idx as usize] = referenced_value;
163+
for idx in codes_primitive.as_slice::<P>() {
164+
let idxu: usize = idx.as_();
165+
values_vec[idxu] = referenced_value;
169166
}
170167
});
171168
}
172169
AllOr::None => {}
173170
AllOr::Some(mask) => {
174171
match_each_integer_ptype!(codes_primitive.ptype(), |P| {
175172
let codes = codes_primitive.as_slice::<P>();
176-
177-
#[allow(
178-
clippy::cast_possible_truncation,
179-
clippy::cast_sign_loss,
180-
reason = "codes are non-negative indices; a negative signed code would wrap to a large usize and panic on the bounds-checked array index"
181-
)]
182173
mask.set_indices().for_each(|idx| {
183-
values_vec[codes[idx] as usize] = referenced_value;
174+
let idxu: usize = codes[idx].as_();
175+
values_vec[idxu] = referenced_value;
184176
});
185177
});
186178
}

vortex-array/src/arrays/dict/compute/cast.rs

Lines changed: 18 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,15 @@ mod tests {
4949
use std::sync::LazyLock;
5050

5151
use rstest::rstest;
52+
use vortex_buffer::BitBuffer;
5253
use vortex_buffer::buffer;
5354
use vortex_session::VortexSession;
5455

56+
use crate::ArrayRef;
5557
use crate::IntoArray;
56-
#[expect(deprecated)]
57-
use crate::ToCanonical as _;
5858
use crate::VortexSessionExecute;
5959
use crate::arrays::Dict;
60+
use crate::arrays::DictArray;
6061
use crate::arrays::PrimitiveArray;
6162
use crate::arrays::dict::DictArraySlotsExt;
6263
use crate::assert_arrays_eq;
@@ -66,13 +67,15 @@ mod tests {
6667
use crate::dtype::DType;
6768
use crate::dtype::Nullability;
6869
use crate::dtype::PType;
70+
use crate::validity::Validity;
6971

7072
static SESSION: LazyLock<VortexSession> = LazyLock::new(crate::array_session);
7173

7274
#[test]
7375
fn test_cast_dict_to_wider_type() {
76+
let ctx = &mut SESSION.create_execution_ctx();
7477
let values = buffer![1i32, 2, 3, 2, 1].into_array();
75-
let dict = dict_encode(&values, &mut SESSION.create_execution_ctx()).unwrap();
78+
let dict = dict_encode(&values, ctx).unwrap();
7679

7780
let casted = dict
7881
.into_array()
@@ -83,13 +86,8 @@ mod tests {
8386
&DType::Primitive(PType::I64, Nullability::NonNullable)
8487
);
8588

86-
#[expect(deprecated)]
87-
let decoded = casted.to_primitive();
88-
assert_arrays_eq!(
89-
decoded,
90-
PrimitiveArray::from_iter([1i64, 2, 3, 2, 1]),
91-
&mut SESSION.create_execution_ctx()
92-
);
89+
let decoded = casted.into_array().execute::<PrimitiveArray>(ctx).unwrap();
90+
assert_arrays_eq!(decoded, PrimitiveArray::from_iter([1i64, 2, 3, 2, 1]), ctx);
9391
}
9492

9593
#[test]
@@ -110,9 +108,10 @@ mod tests {
110108

111109
#[test]
112110
fn test_cast_dict_allvalid_to_nonnullable_and_back() {
111+
let ctx = &mut SESSION.create_execution_ctx();
113112
// Create an AllValid dict array (no nulls)
114113
let values = buffer![10i32, 20, 30, 40].into_array();
115-
let dict = dict_encode(&values, &mut SESSION.create_execution_ctx()).unwrap();
114+
let dict = dict_encode(&values, ctx).unwrap();
116115

117116
// Verify initial state - codes should be NonNullable, values should be NonNullable
118117
assert_eq!(dict.codes().dtype().nullability(), Nullability::NonNullable);
@@ -173,37 +172,30 @@ mod tests {
173172
);
174173

175174
// Verify values are unchanged
176-
#[expect(deprecated)]
177-
let original_values = dict.as_array().to_primitive();
178-
#[expect(deprecated)]
179-
let final_values = back_to_non_nullable.to_primitive();
180-
assert_arrays_eq!(
181-
original_values,
182-
final_values,
183-
&mut SESSION.create_execution_ctx()
184-
);
175+
let original_values = dict.into_array().execute::<PrimitiveArray>(ctx).unwrap();
176+
177+
let final_values = back_to_non_nullable.execute::<PrimitiveArray>(ctx).unwrap();
178+
assert_arrays_eq!(original_values, final_values, ctx);
185179
}
186180

187181
#[rstest]
188182
#[case(dict_encode(&buffer![1i32, 2, 3, 2, 1, 3].into_array(), &mut SESSION.create_execution_ctx()).unwrap().into_array())]
189183
#[case(dict_encode(&buffer![100u32, 200, 100, 300, 200].into_array(), &mut SESSION.create_execution_ctx()).unwrap().into_array())]
190184
#[case(dict_encode(&PrimitiveArray::from_option_iter([Some(1i32), None, Some(2), Some(1), None]).into_array(), &mut SESSION.create_execution_ctx()).unwrap().into_array())]
191185
#[case(dict_encode(&buffer![1.5f32, 2.5, 1.5, 3.5].into_array(), &mut SESSION.create_execution_ctx()).unwrap().into_array())]
192-
fn test_cast_dict_conformance(#[case] array: crate::ArrayRef) {
186+
fn test_cast_dict_conformance(#[case] array: ArrayRef) {
193187
test_cast_conformance(&array);
194188
}
195189

196190
#[test]
197191
fn test_cast_dict_with_unreferenced_null_values_to_nonnullable() {
198-
use crate::arrays::DictArray;
199-
use crate::validity::Validity;
200-
192+
let ctx = &mut SESSION.create_execution_ctx();
201193
// Create a dict with nullable values that have unreferenced null entries.
202194
// Values: [1.0, null, 3.0] (index 1 is null but no code points to it)
203195
// Codes: [0, 2, 0] (only reference indices 0 and 2, never 1)
204196
let values = PrimitiveArray::new(
205197
buffer![1.0f64, 0.0f64, 3.0f64],
206-
Validity::from(vortex_buffer::BitBuffer::from(vec![true, false, true])),
198+
Validity::from(BitBuffer::from(vec![true, false, true])),
207199
)
208200
.into_array();
209201
let codes = buffer![0u32, 2, 0].into_array();
@@ -228,12 +220,6 @@ mod tests {
228220
casted.dtype(),
229221
&DType::Primitive(PType::F64, Nullability::NonNullable)
230222
);
231-
#[expect(deprecated)]
232-
let casted_prim = casted.to_primitive();
233-
assert_arrays_eq!(
234-
casted_prim,
235-
PrimitiveArray::from_iter([1.0f64, 3.0, 1.0]),
236-
&mut SESSION.create_execution_ctx()
237-
);
223+
assert_arrays_eq!(casted, PrimitiveArray::from_iter([1.0f64, 3.0, 1.0]), ctx);
238224
}
239225
}

vortex-array/src/arrays/dict/compute/mod.rs

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,11 @@ impl FilterReduce for Dict {
5656
mod test {
5757
use std::sync::LazyLock;
5858

59-
#[expect(unused_imports)]
60-
use itertools::Itertools;
6159
use vortex_buffer::buffer;
6260
use vortex_session::VortexSession;
6361

6462
use crate::ArrayRef;
6563
use crate::IntoArray;
66-
#[expect(deprecated)]
67-
use crate::ToCanonical as _;
6864
use crate::VortexSessionExecute;
6965
use crate::accessor::ArrayAccessor;
7066
use crate::arrays::ConstantArray;
@@ -101,8 +97,10 @@ mod test {
10197
&mut SESSION.create_execution_ctx(),
10298
)
10399
.unwrap();
104-
#[expect(deprecated)]
105-
let actual = dict.as_array().to_primitive();
100+
let actual = dict
101+
.into_array()
102+
.execute::<PrimitiveArray>(&mut SESSION.create_execution_ctx())
103+
.unwrap();
106104

107105
let expected = PrimitiveArray::from_option_iter(values);
108106

@@ -119,9 +117,12 @@ mod test {
119117
&expected.clone().into_array(),
120118
&mut SESSION.create_execution_ctx(),
121119
)
122-
.unwrap();
123-
#[expect(deprecated)]
124-
let actual = dict.as_array().to_primitive();
120+
.unwrap()
121+
.into_array();
122+
123+
let actual = dict
124+
.execute::<PrimitiveArray>(&mut SESSION.create_execution_ctx())
125+
.unwrap();
125126

126127
assert_arrays_eq!(actual, expected, &mut ctx);
127128
}
@@ -136,9 +137,12 @@ mod test {
136137
&expected.clone().into_array(),
137138
&mut SESSION.create_execution_ctx(),
138139
)
139-
.unwrap();
140-
#[expect(deprecated)]
141-
let actual = dict.as_array().to_primitive();
140+
.unwrap()
141+
.into_array();
142+
143+
let actual = dict
144+
.execute::<PrimitiveArray>(&mut SESSION.create_execution_ctx())
145+
.unwrap();
142146

143147
assert_arrays_eq!(actual, expected, &mut ctx);
144148
}
@@ -155,8 +159,10 @@ mod test {
155159
&mut SESSION.create_execution_ctx(),
156160
)
157161
.unwrap();
158-
#[expect(deprecated)]
159-
let flattened_dict = dict.as_array().to_varbinview();
162+
let flattened_dict = dict
163+
.into_array()
164+
.execute::<VarBinViewArray>(&mut SESSION.create_execution_ctx())
165+
.unwrap();
160166
assert_eq!(
161167
flattened_dict.with_iterator(|iter| iter
162168
.map(|slice| slice.map(|s| s.to_vec()))

0 commit comments

Comments
 (0)