Skip to content

Commit fc0bb65

Browse files
gatesnrobert3005
authored andcommitted
Remove more scalar_at from sparse arrays
Signed-off-by: Nicholas Gates <nick@nickgates.com>
1 parent cb9b138 commit fc0bb65

4 files changed

Lines changed: 145 additions & 20 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

encodings/sparse/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ vortex-mask = { workspace = true }
2727
vortex-session = { workspace = true }
2828

2929
[dev-dependencies]
30+
divan = { workspace = true }
3031
itertools = { workspace = true }
3132
rstest = { workspace = true }
3233
vortex-array = { workspace = true, features = ["_test-harness"] }
34+
35+
[[bench]]
36+
name = "sparse_canonical"
37+
harness = false
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
#![expect(clippy::cast_possible_truncation)]
5+
6+
use std::sync::Arc;
7+
8+
use divan::Bencher;
9+
use vortex_array::ArrayRef;
10+
use vortex_array::IntoArray;
11+
use vortex_array::arrays::FixedSizeListArray;
12+
use vortex_array::arrays::ListViewArray;
13+
use vortex_array::arrays::PrimitiveArray;
14+
use vortex_array::dtype::Nullability::NonNullable;
15+
use vortex_array::dtype::PType::I32;
16+
use vortex_array::scalar::Scalar;
17+
use vortex_array::validity::Validity;
18+
use vortex_buffer::Buffer;
19+
use vortex_error::VortexExpect;
20+
use vortex_sparse::Sparse;
21+
22+
fn main() {
23+
divan::main();
24+
}
25+
26+
const LIST_ARGS: &[(usize, usize, usize)] = &[
27+
// len, patch_stride, list_size
28+
(10_000, 7, 8),
29+
(50_000, 7, 8),
30+
(50_000, 11, 16),
31+
];
32+
33+
const FIXED_SIZE_LIST_ARGS: &[(usize, usize, u32)] = &[
34+
// len, patch_stride, list_size
35+
(10_000, 7, 8),
36+
(50_000, 7, 8),
37+
(50_000, 11, 16),
38+
];
39+
40+
fn make_sparse_list(len: usize, patch_stride: usize, list_size: usize) -> ArrayRef {
41+
let patch_indices: Buffer<u32> = (0..len).step_by(patch_stride).map(|i| i as u32).collect();
42+
let n_patches = patch_indices.len();
43+
44+
let patch_elements = PrimitiveArray::from_iter(0..(n_patches * list_size) as i32).into_array();
45+
let patch_offsets: Buffer<u32> = (0..n_patches).map(|i| (i * list_size) as u32).collect();
46+
let patch_sizes: Buffer<u32> = std::iter::repeat_n(list_size as u32, n_patches).collect();
47+
let patch_values = ListViewArray::new(
48+
patch_elements,
49+
patch_offsets.into_array(),
50+
patch_sizes.into_array(),
51+
Validity::NonNullable,
52+
)
53+
.into_array();
54+
55+
let fill_value = Scalar::list(
56+
Arc::new(I32.into()),
57+
(0..list_size as i32).map(Scalar::from).collect(),
58+
NonNullable,
59+
);
60+
61+
Sparse::try_new(patch_indices.into_array(), patch_values, len, fill_value)
62+
.vortex_expect("sparse list input should be valid")
63+
.into_array()
64+
}
65+
66+
fn make_sparse_fixed_size_list(len: usize, patch_stride: usize, list_size: u32) -> ArrayRef {
67+
let patch_indices: Buffer<u32> = (0..len).step_by(patch_stride).map(|i| i as u32).collect();
68+
let n_patches = patch_indices.len();
69+
70+
let patch_elements =
71+
PrimitiveArray::from_iter(0..(n_patches * list_size as usize) as i32).into_array();
72+
let patch_values =
73+
FixedSizeListArray::new(patch_elements, list_size, Validity::NonNullable, n_patches)
74+
.into_array();
75+
76+
let fill_value = Scalar::list(
77+
Arc::new(I32.into()),
78+
(0..list_size as i32).map(Scalar::from).collect(),
79+
NonNullable,
80+
);
81+
82+
Sparse::try_new(patch_indices.into_array(), patch_values, len, fill_value)
83+
.vortex_expect("sparse fixed-size-list input should be valid")
84+
.into_array()
85+
}
86+
87+
#[divan::bench(args = LIST_ARGS)]
88+
fn canonicalize_sparse_list(
89+
bencher: Bencher,
90+
(len, patch_stride, list_size): (usize, usize, usize),
91+
) {
92+
let sparse = make_sparse_list(len, patch_stride, list_size);
93+
94+
bencher
95+
.with_inputs(|| sparse.clone())
96+
.bench_values(|array| {
97+
divan::black_box(array.to_canonical().vortex_expect("sparse list canonicalization"))
98+
});
99+
}
100+
101+
#[divan::bench(args = FIXED_SIZE_LIST_ARGS)]
102+
fn canonicalize_sparse_fixed_size_list(
103+
bencher: Bencher,
104+
(len, patch_stride, list_size): (usize, usize, u32),
105+
) {
106+
let sparse = make_sparse_fixed_size_list(len, patch_stride, list_size);
107+
108+
bencher
109+
.with_inputs(|| sparse.clone())
110+
.bench_values(|array| {
111+
divan::black_box(
112+
array
113+
.to_canonical()
114+
.vortex_expect("sparse fixed-size-list canonicalization"),
115+
)
116+
});
117+
}

encodings/sparse/src/canonical.rs

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ fn execute_sparse_lists_inner<I: IntegerPType, O: IntegerPType>(
187187
total_canonical_values,
188188
len,
189189
);
190+
let patch_values_validity = patch_values
191+
.listview_validity()
192+
.execute_mask(len, ctx)
193+
.vortex_expect("sparse list validity mask failed to execute");
190194

191195
let mut patch_idx = 0;
192196

@@ -200,15 +204,17 @@ fn execute_sparse_lists_inner<I: IntegerPType, O: IntegerPType>(
200204
== position;
201205

202206
if position_is_patched {
203-
// Set with the patch value.
204-
builder
205-
.append_value(
206-
patch_values
207-
.execute_scalar(patch_idx, ctx)
208-
.vortex_expect("scalar_at")
209-
.as_list(),
210-
)
211-
.vortex_expect("Failed to append sparse value");
207+
if patch_values_validity.value(patch_idx) {
208+
// Bulk-append the list value to avoid per-element scalar_at.
209+
let patch_list = patch_values
210+
.list_elements_at(patch_idx)
211+
.vortex_expect("list_elements_at");
212+
builder
213+
.append_array_as_list(&patch_list)
214+
.vortex_expect("Failed to append sparse value");
215+
} else {
216+
builder.append_null();
217+
}
212218
patch_idx += 1;
213219
} else {
214220
// Set with the fill value.
@@ -275,6 +281,11 @@ fn execute_sparse_fixed_size_list_inner<I: IntegerPType>(
275281
let total_elements = array_len * list_size as usize;
276282
let mut builder = builder_with_capacity(element_dtype, total_elements);
277283
let fill_elements = fill_value.elements();
284+
let values_validity = values
285+
.validity()
286+
.vortex_expect("sparse fixed-size-list validity should be derivable")
287+
.execute_mask(values.len(), ctx)
288+
.vortex_expect("sparse fixed-size-list validity mask failed to execute");
278289

279290
let mut next_index = 0;
280291
let indices = indices
@@ -291,20 +302,11 @@ fn execute_sparse_fixed_size_list_inner<I: IntegerPType>(
291302
);
292303

293304
// Append the patch value, handling null patches by appending defaults.
294-
if values
295-
.validity()
296-
.vortex_expect("sparse fixed-size-list validity should be derivable")
297-
.is_valid(patch_idx)
298-
.vortex_expect("is_valid")
299-
{
305+
if values_validity.value(patch_idx) {
300306
let patch_list = values
301307
.fixed_size_list_elements_at(patch_idx)
302308
.vortex_expect("fixed_size_list_elements_at");
303-
for i in 0..list_size as usize {
304-
builder
305-
.append_scalar(&patch_list.execute_scalar(i, ctx).vortex_expect("scalar_at"))
306-
.vortex_expect("element dtype must match");
307-
}
309+
builder.extend_from_array(&patch_list);
308310
} else {
309311
builder.append_defaults(list_size as usize);
310312
}

0 commit comments

Comments
 (0)