Skip to content

Commit 00d6bc4

Browse files
committed
why not
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent abd15b8 commit 00d6bc4

1 file changed

Lines changed: 132 additions & 20 deletions

File tree

  • encodings/fastlanes/src/bitpacking/compute

encodings/fastlanes/src/bitpacking/compute/compare.rs

Lines changed: 132 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -167,30 +167,44 @@ where
167167
T: NativePType + FastLanesComparable<Bitpacked = U>,
168168
U: UnsignedPType + BitPacking + BitPackingCompare,
169169
{
170-
let mut bits = collect_chunk_masks::<U>(array, |bit_width, packed_chunk, lower_matches| {
171-
let mut upper_matches = [0u64; 16];
172-
173-
unsafe {
174-
U::unchecked_unpack_cmp(
175-
bit_width,
176-
packed_chunk,
177-
lower_matches,
178-
|value, lower_bound| lower_matches_bound(lower_bound, value, options.lower_strict),
170+
let mut bits = match (options.lower_strict, options.upper_strict) {
171+
(StrictComparison::Strict, StrictComparison::Strict) => {
172+
collect_between_masks::<U, T, _, _>(
173+
array,
179174
lower,
180-
);
181-
U::unchecked_unpack_cmp(
182-
bit_width,
183-
packed_chunk,
184-
&mut upper_matches,
185-
|value, upper_bound| upper_matches_bound(value, upper_bound, options.upper_strict),
186175
upper,
187-
);
176+
NativePType::is_lt,
177+
NativePType::is_lt,
178+
)
188179
}
189-
190-
for (lower_match, upper_match) in lower_matches.iter_mut().zip(upper_matches) {
191-
*lower_match &= upper_match;
180+
(StrictComparison::Strict, StrictComparison::NonStrict) => {
181+
collect_between_masks::<U, T, _, _>(
182+
array,
183+
lower,
184+
upper,
185+
NativePType::is_lt,
186+
NativePType::is_le,
187+
)
192188
}
193-
});
189+
(StrictComparison::NonStrict, StrictComparison::Strict) => {
190+
collect_between_masks::<U, T, _, _>(
191+
array,
192+
lower,
193+
upper,
194+
NativePType::is_le,
195+
NativePType::is_lt,
196+
)
197+
}
198+
(StrictComparison::NonStrict, StrictComparison::NonStrict) => {
199+
collect_between_masks::<U, T, _, _>(
200+
array,
201+
lower,
202+
upper,
203+
NativePType::is_le,
204+
NativePType::is_le,
205+
)
206+
}
207+
};
194208

195209
if let Some(patches) = array.patches() {
196210
apply_patch_predicate::<T>(&mut bits, &patches, ctx, |patched| {
@@ -206,6 +220,31 @@ where
206220
.into_array())
207221
}
208222

223+
fn collect_between_masks<U, T, LF, UF>(
224+
array: &BitPackedData,
225+
lower: T,
226+
upper: T,
227+
lower_matches: LF,
228+
upper_matches: UF,
229+
) -> BitBufferMut
230+
where
231+
T: NativePType + FastLanesComparable<Bitpacked = U>,
232+
U: UnsignedPType + BitPacking,
233+
LF: Fn(T, T) -> bool + Copy,
234+
UF: Fn(T, T) -> bool + Copy,
235+
{
236+
collect_unpacked_chunk_masks::<U>(array, |unpacked, chunk_matches| {
237+
fill_between_chunk::<U, T, LF, UF>(
238+
unpacked,
239+
chunk_matches,
240+
lower,
241+
upper,
242+
lower_matches,
243+
upper_matches,
244+
);
245+
})
246+
}
247+
209248
fn collect_chunk_masks<U>(
210249
array: &BitPackedData,
211250
mut fill_chunk: impl FnMut(usize, &[U], &mut [u64; 16]),
@@ -245,6 +284,79 @@ where
245284
)
246285
}
247286

287+
fn collect_unpacked_chunk_masks<U>(
288+
array: &BitPackedData,
289+
mut fill_chunk: impl FnMut(&[U; 1024], &mut [u64; 16]),
290+
) -> BitBufferMut
291+
where
292+
U: UnsignedPType + BitPacking,
293+
{
294+
if array.is_empty() {
295+
return BitBufferMut::empty();
296+
}
297+
298+
let bit_width = array.bit_width() as usize;
299+
let packed = array.packed_slice::<U>();
300+
let elems_per_chunk = 128 * bit_width / size_of::<U>();
301+
let num_chunks = (array.offset() as usize + array.len()).div_ceil(1024);
302+
let mut output = BufferMut::<u64>::with_capacity(num_chunks * 16);
303+
304+
for chunk_idx in 0..num_chunks {
305+
let packed_chunk = &packed[chunk_idx * elems_per_chunk..][..elems_per_chunk];
306+
let mut unpacked = [U::default(); 1024];
307+
let mut chunk_matches = [0u64; 16];
308+
309+
unsafe {
310+
U::unchecked_unpack(bit_width, packed_chunk, &mut unpacked);
311+
}
312+
313+
fill_chunk(&unpacked, &mut chunk_matches);
314+
output.extend_from_slice(&chunk_matches);
315+
}
316+
317+
let total_len = num_chunks * 1024;
318+
let mut output = BitBufferMut::from_buffer(output.into_byte_buffer(), 0, total_len);
319+
320+
if array.offset() == 0 {
321+
output.truncate(array.len());
322+
return output;
323+
}
324+
325+
BitBufferMut::copy_from(
326+
&output
327+
.freeze()
328+
.slice(array.offset() as usize..array.offset() as usize + array.len()),
329+
)
330+
}
331+
332+
#[inline]
333+
fn fill_between_chunk<U, T, LF, UF>(
334+
unpacked: &[U; 1024],
335+
chunk_matches: &mut [u64; 16],
336+
lower: T,
337+
upper: T,
338+
lower_matches: LF,
339+
upper_matches: UF,
340+
) where
341+
T: NativePType + FastLanesComparable<Bitpacked = U>,
342+
U: UnsignedPType,
343+
LF: Fn(T, T) -> bool,
344+
UF: Fn(T, T) -> bool,
345+
{
346+
for (word_idx, word) in chunk_matches.iter_mut().enumerate() {
347+
let start = word_idx * 64;
348+
let mut mask = 0u64;
349+
350+
for bit_idx in 0..64 {
351+
let value = T::as_unpacked(unpacked[start + bit_idx]);
352+
mask |=
353+
u64::from(lower_matches(lower, value) && upper_matches(value, upper)) << bit_idx;
354+
}
355+
356+
*word = mask;
357+
}
358+
}
359+
248360
fn apply_patch_predicate<T>(
249361
bits: &mut BitBufferMut,
250362
patches: &Patches,

0 commit comments

Comments
 (0)