-
Notifications
You must be signed in to change notification settings - Fork 172
perf[buffer]: iteration for fallible operations with validity #8120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7b5828f
85ef2f8
5cf469a
502a286
2f6df63
769a258
3a30290
d2bca93
6fd7fc1
72bca8b
fe34ccb
4299cf0
8e5945f
e9aac1d
d8d5463
2556d53
aa8a6d1
ca2ad88
608111c
d0a7806
fc9b5e8
2c314ab
73dbb94
49ec12a
337147c
74216c1
7ed76bc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,9 @@ use num_traits::AsPrimitive; | |
| use num_traits::NumCast; | ||
| use vortex_buffer::Buffer; | ||
| use vortex_buffer::BufferMut; | ||
| use vortex_compute::lane_kernels::IndexedSinkExt; | ||
| use vortex_compute::lane_kernels::IndexedSourceExt; | ||
| use vortex_compute::lane_kernels::ReinterpretSink; | ||
| use vortex_error::VortexResult; | ||
| use vortex_error::vortex_bail; | ||
| use vortex_error::vortex_err; | ||
|
|
@@ -102,9 +105,7 @@ impl CastKernel for Primitive { | |
| } | ||
| } | ||
|
|
||
| /// Cast values from `F` to `T`. For infallible casts this is a pure pass; for fallible casts | ||
| /// each valid value goes through a checked `NumCast::from` and the kernel bails if any of them | ||
| /// overflow `T`. Invalid positions use the wrapping `as` cast since their values are masked out. | ||
| /// Cast Primitive values from `F` to `T`. | ||
| fn cast_values<F, T>( | ||
| array: ArrayView<'_, Primitive>, | ||
| new_validity: Validity, | ||
|
|
@@ -114,53 +115,99 @@ where | |
| F: NativePType + AsPrimitive<T>, | ||
| T: NativePType, | ||
| { | ||
| let values = array.as_slice::<F>(); | ||
|
|
||
| // Fast path: statically infallible, or cached min/max prove every valid value fits in `T`. | ||
| // The cached check never triggers a stats computation — if the bounds aren't already known | ||
| // we fall through to the per-lane loop below. | ||
| if values_always_fit(F::PTYPE, T::PTYPE) || values_fit_in(array, T::PTYPE, ctx, false) { | ||
| return Ok(PrimitiveArray::new(cast::<F, T>(values), new_validity).into_array()); | ||
| } | ||
|
|
||
| // TODO(joe): if the values source and target have the same bit-width we can | ||
| // mutate in place. | ||
|
|
||
| // Fallible: invalid lanes are pre-multiplied to zero so the checked cast always succeeds for | ||
| // them; valid lanes go through `NumCast::from` and the whole cast bails on the first overflow. | ||
| let mask = array.validity()?.execute_mask(array.len(), ctx)?; | ||
| let overflow = || { | ||
| vortex_err!( | ||
| Compute: "Cannot cast {} to {} — value exceeds target range", | ||
| F::PTYPE, T::PTYPE, | ||
| ) | ||
| }; | ||
| let buffer: Buffer<T> = match &mask { | ||
| Mask::AllTrue(_) => BufferMut::try_from_trusted_len_iter( | ||
|
|
||
| // Returns `true` if every value of `from` is representable in `to` without loss. | ||
| fn casts_losslessly_to(from: PType, to: PType) -> bool { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesn't need to be a function
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer this only that the body does easily read like that? |
||
| from.least_supertype(to) == Some(to) | ||
| } | ||
|
|
||
| // Skip the fallible kernel when type widening or (cached) min/max prove every value fits. | ||
| let target_dtype = DType::Primitive(T::PTYPE, Nullability::NonNullable); | ||
| let infallible = casts_losslessly_to(F::PTYPE, T::PTYPE) | ||
| || cached_values_fit_in(array, &target_dtype).unwrap_or(false); | ||
|
|
||
| let len = array.len(); | ||
|
|
||
| // If F and T have the same byte width, try to take unique ownership of the buffer. | ||
| let same_bit_width = F::PTYPE.byte_width() == T::PTYPE.byte_width(); | ||
| let owned: Option<BufferMut<F>> = same_bit_width | ||
| .then(|| array.into_owned().try_into_buffer_mut::<F>().ok()) | ||
| .flatten(); | ||
| let values: &[F] = array.as_slice::<F>(); | ||
|
|
||
| if infallible { | ||
| return match owned { | ||
| Some(mut buf) => { | ||
| ReinterpretSink::<F, T>::new(buf.as_mut_slice()).map_into_in_place(|v: F| v.as_()); | ||
| // SAFETY: same size + alignment for NativePType | ||
| let result: BufferMut<T> = unsafe { buf.transmute::<T>() }; | ||
| Ok(PrimitiveArray::new(result.freeze(), new_validity).into_array()) | ||
| } | ||
| None => { | ||
| let mut buffer = BufferMut::<T>::with_capacity(len); | ||
| values.map_into(&mut buffer.spare_capacity_mut()[..len], |v| v.as_()); | ||
| // SAFETY: map_into initializes every lane. | ||
| unsafe { buffer.set_len(len) }; | ||
| Ok(PrimitiveArray::new(buffer.freeze(), new_validity).into_array()) | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| let mask = array.validity()?.execute_mask(len, ctx)?; | ||
|
|
||
| let buffer: Buffer<T> = match (&mask, owned) { | ||
| (Mask::AllTrue(_), Some(mut buf)) => { | ||
| ReinterpretSink::<F, T>::new(buf.as_mut_slice()) | ||
| .try_map_in_place(|v: F| <T as NumCast>::from(v)) | ||
| .map_err(|_| overflow())?; | ||
| // SAFETY: same size + alignment for NativePType | ||
| let result: BufferMut<T> = unsafe { buf.transmute::<T>() }; | ||
| result.freeze() | ||
| } | ||
| (Mask::AllTrue(_), None) => { | ||
| let mut buffer = BufferMut::<T>::with_capacity(len); | ||
| values | ||
| .try_map_into(&mut buffer.spare_capacity_mut()[..len], |v| { | ||
| <T as NumCast>::from(v) | ||
| }) | ||
| .map_err(|_| overflow())?; | ||
| // SAFETY: initialized every lane. | ||
| unsafe { buffer.set_len(len) }; | ||
| buffer.freeze() | ||
| } | ||
| (Mask::AllFalse(_), _) => BufferMut::<T>::zeroed(len).freeze(), | ||
| (Mask::Values(m), Some(mut buf)) => { | ||
| ReinterpretSink::<F, T>::new(buf.as_mut_slice()) | ||
| .try_map_masked_in_place(m.bit_buffer(), |v: F| <T as NumCast>::from(v)) | ||
| .map_err(|_| overflow())?; | ||
| // SAFETY: same size + alignment for NativePType | ||
| let result: BufferMut<T> = unsafe { buf.transmute::<T>() }; | ||
| result.freeze() | ||
| } | ||
| (Mask::Values(m), None) => { | ||
| let mut buffer = BufferMut::<T>::with_capacity(len); | ||
| values | ||
| .iter() | ||
| .map(|&v| <T as NumCast>::from(v).ok_or_else(overflow)), | ||
| )? | ||
| .freeze(), | ||
| Mask::AllFalse(_) => BufferMut::<T>::zeroed(values.len()).freeze(), | ||
| Mask::Values(m) => BufferMut::try_from_trusted_len_iter( | ||
| values.iter().zip(m.bit_buffer().iter()).map(|(&v, valid)| { | ||
| let factor = if valid { F::one() } else { F::zero() }; | ||
| <T as NumCast>::from(v * factor).ok_or_else(overflow) | ||
| }), | ||
| )? | ||
| .freeze(), | ||
| .try_map_masked_into( | ||
| m.bit_buffer(), | ||
| &mut buffer.spare_capacity_mut()[..len], | ||
| |v| <T as NumCast>::from(v), | ||
| ) | ||
| .map_err(|_| overflow())?; | ||
| // SAFETY: initialized every lane. | ||
| unsafe { buffer.set_len(len) }; | ||
| buffer.freeze() | ||
| } | ||
| }; | ||
|
|
||
| Ok(PrimitiveArray::new(buffer, new_validity).into_array()) | ||
| } | ||
|
|
||
| /// Out-of-range values at invalid positions are truncated/wrapped by `as`, which is fine because | ||
| /// they are masked out by validity. | ||
| fn cast<F: NativePType + AsPrimitive<T>, T: NativePType>(array: &[F]) -> Buffer<T> { | ||
| BufferMut::from_trusted_len_iter(array.iter().map(|&src| src.as_())).freeze() | ||
| } | ||
|
|
||
| fn reinterpret( | ||
| array: ArrayView<'_, Primitive>, | ||
| new_ptype: PType, | ||
|
|
@@ -178,23 +225,6 @@ fn reinterpret( | |
| .into_array() | ||
| } | ||
|
|
||
| /// Returns `true` if every value of `src` is guaranteed representable in `target` without | ||
| /// overflow. Precision may be lost (e.g. large integers cast to `f32`), but the cast can never | ||
| /// produce an out-of-range result. | ||
| fn values_always_fit(src: PType, target: PType) -> bool { | ||
| if src == target { | ||
| return true; | ||
| } | ||
| if src.is_int() && target.is_int() { | ||
| return target.byte_width() > src.byte_width() | ||
| && (src.is_unsigned_int() || target.is_signed_int()); | ||
| } | ||
| if src.is_float() && target.is_float() { | ||
| return target.byte_width() > src.byte_width(); | ||
| } | ||
| src.is_int() && matches!(target, PType::F32 | PType::F64) | ||
| } | ||
|
|
||
| /// Returns `true` if all valid values in `array` are representable as `target_ptype`. | ||
| /// | ||
| /// Cached min/max statistics are consulted first. If either bound is missing, the function either | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| [package] | ||
| name = "vortex-compute" | ||
| authors = { workspace = true } | ||
| categories = { workspace = true } | ||
| description = "Lane-level compute kernels for Vortex buffers" | ||
| edition = { workspace = true } | ||
| homepage = { workspace = true } | ||
| include = { workspace = true } | ||
| keywords = { workspace = true } | ||
| license = { workspace = true } | ||
| readme = { workspace = true } | ||
| repository = { workspace = true } | ||
| rust-version = { workspace = true } | ||
| version = { workspace = true } | ||
|
|
||
| [package.metadata.docs.rs] | ||
| all-features = true | ||
|
|
||
| [dependencies] | ||
| vortex-buffer = { workspace = true } | ||
|
|
||
| [dev-dependencies] | ||
| arrow-arith = { workspace = true } | ||
| arrow-array = { workspace = true } | ||
| arrow-buffer = { workspace = true } | ||
| arrow-cast = { workspace = true } | ||
| arrow-schema = { workspace = true } | ||
| divan = { workspace = true } | ||
| num-traits = { workspace = true } | ||
| rand = { workspace = true } | ||
|
|
||
| [lints] | ||
| workspace = true | ||
|
|
||
| [[bench]] | ||
| name = "lane_kernels" | ||
| harness = false |
Uh oh!
There was an error while loading. Please reload this page.