Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ members = [
"vortex-array-macros",
"vortex-error",
"vortex-buffer",
"vortex-compute",
"vortex-mask",
"vortex-utils",
"vortex-session",
Expand Down Expand Up @@ -285,6 +286,7 @@ vortex-btrblocks = { version = "0.1.0", path = "./vortex-btrblocks", default-fea
vortex-buffer = { version = "0.1.0", path = "./vortex-buffer", default-features = false }
vortex-bytebool = { version = "0.1.0", path = "./encodings/bytebool", default-features = false }
vortex-compressor = { version = "0.1.0", path = "./vortex-compressor", default-features = false }
vortex-compute = { version = "0.1.0", path = "./vortex-compute", default-features = false }
vortex-datafusion = { version = "0.1.0", path = "./vortex-datafusion", default-features = false }
vortex-datetime-parts = { version = "0.1.0", path = "./encodings/datetime-parts", default-features = false }
vortex-decimal-byte-parts = { version = "0.1.0", path = "encodings/decimal-byte-parts", default-features = false }
Expand Down
1 change: 1 addition & 0 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ tracing = { workspace = true }
uuid = { workspace = true }
vortex-array-macros = { workspace = true }
vortex-buffer = { workspace = true, features = ["arrow"] }
vortex-compute = { workspace = true }
vortex-error = { workspace = true, features = ["flatbuffers"] }
vortex-flatbuffers = { workspace = true, features = ["array", "dtype"] }
vortex-mask = { workspace = true }
Expand Down
58 changes: 47 additions & 11 deletions vortex-array/benches/cast_primitive.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#![expect(clippy::unwrap_used)]

use std::sync::LazyLock;

use divan::Bencher;
Expand All @@ -21,21 +23,19 @@ fn main() {
divan::main();
}

const N: usize = 100_000;
// Sizes used for the fallible-path benches below. Kept small enough to fit in L2 so
// the kernel cost shows up clearly rather than being hidden by DRAM bandwidth.
const SIZES: &[usize] = &[65_536];

static SESSION: LazyLock<VortexSession> =
LazyLock::new(|| VortexSession::empty().with::<ArraySession>());

#[divan::bench]
fn cast_u16_to_u32(bencher: Bencher) {
#[divan::bench(args = SIZES)]
fn cast_u16_to_u32(bencher: Bencher, n: usize) {
let mut rng = StdRng::seed_from_u64(42);
#[expect(clippy::cast_possible_truncation)]
let arr = PrimitiveArray::from_option_iter((0..N).map(|i| {
if rng.random_bool(0.5) {
None
} else {
Some(i as u16)
}
let arr = PrimitiveArray::from_option_iter((0..n).map(|i| {
#[expect(clippy::cast_possible_truncation)]
rng.random_bool(0.5).then_some(i as u16)
}))
.into_array();
// Pre-compute min/max so values_fit_in is a cache hit during the benchmark.
Expand All @@ -45,7 +45,43 @@ fn cast_u16_to_u32(bencher: Bencher) {
bencher
.with_inputs(|| (arr.clone(), SESSION.create_execution_ctx()))
.bench_refs(|(a, ctx)| {
#[expect(clippy::unwrap_used)]
a.cast(DType::Primitive(PType::U32, Nullability::Nullable))
.unwrap()
.execute::<Canonical>(ctx)
});
}

/// Narrowing fallible cast that goes through `try_map_with_mask`. Inputs are bounded
/// so every value fits, isolating the kernel's per-lane checked-cast overhead.
#[divan::bench(args = SIZES)]
fn cast_u32_to_u8(bencher: Bencher, n: usize) {
let mut rng = StdRng::seed_from_u64(42);
Comment thread
joseph-isaacs marked this conversation as resolved.
let arr = PrimitiveArray::from_option_iter((0..n).map(|_| {
rng.random_bool(0.7)
.then(|| rng.random_range(0..u8::MAX) as u32)
}))
.into_array();
bencher
.with_inputs(|| (arr.clone(), SESSION.create_execution_ctx()))
.bench_refs(|(a, ctx)| {
a.cast(DType::Primitive(PType::U8, Nullability::Nullable))
.unwrap()
.execute::<Canonical>(ctx)
});
}

/// Sign-change cast i32 → u32. Values are non-negative so the kernel succeeds
/// but still pays the per-lane `try_from` check.
#[divan::bench(args = SIZES)]
fn cast_i32_to_u32(bencher: Bencher, n: usize) {
let mut rng = StdRng::seed_from_u64(42);
let arr = PrimitiveArray::from_option_iter(
(0..n).map(|_| rng.random_bool(0.7).then(|| rng.random_range(0..i32::MAX))),
)
.into_array();
bencher
.with_inputs(|| (arr.clone(), SESSION.create_execution_ctx()))
.bench_refs(|(a, ctx)| {
a.cast(DType::Primitive(PType::U32, Nullability::Nullable))
.unwrap()
.execute::<Canonical>(ctx)
Expand Down
3 changes: 3 additions & 0 deletions vortex-array/src/arrays/primitive/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,9 @@ impl PrimitiveData {
}

/// Try to extract a mutable buffer from the PrimitiveData with zero copy.
///
/// # Panic
/// If the buffer is not of type T this will panic
pub fn try_into_buffer_mut<T: NativePType>(self) -> Result<BufferMut<T>, Buffer<T>> {
if T::PTYPE != self.ptype() {
vortex_panic!(
Expand Down
140 changes: 85 additions & 55 deletions vortex-array/src/arrays/primitive/compute/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ use num_traits::AsPrimitive;
use num_traits::NumCast;
use vortex_buffer::Buffer;
use vortex_buffer::BufferMut;
use vortex_compute::lane_kernels::IndexedSinkExt;
use vortex_compute::lane_kernels::IndexedSourceExt;
use vortex_compute::lane_kernels::ReinterpretSink;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_err;
Expand Down Expand Up @@ -102,9 +105,7 @@ impl CastKernel for Primitive {
}
}

/// Cast values from `F` to `T`. For infallible casts this is a pure pass; for fallible casts
/// each valid value goes through a checked `NumCast::from` and the kernel bails if any of them
/// overflow `T`. Invalid positions use the wrapping `as` cast since their values are masked out.
/// Cast Primitive values from `F` to `T`.
fn cast_values<F, T>(
array: ArrayView<'_, Primitive>,
new_validity: Validity,
Expand All @@ -114,53 +115,99 @@ where
F: NativePType + AsPrimitive<T>,
T: NativePType,
{
let values = array.as_slice::<F>();

// Fast path: statically infallible, or cached min/max prove every valid value fits in `T`.
// The cached check never triggers a stats computation — if the bounds aren't already known
// we fall through to the per-lane loop below.
if values_always_fit(F::PTYPE, T::PTYPE) || values_fit_in(array, T::PTYPE, ctx, false) {
return Ok(PrimitiveArray::new(cast::<F, T>(values), new_validity).into_array());
}

// TODO(joe): if the values source and target have the same bit-width we can
// mutate in place.

// Fallible: invalid lanes are pre-multiplied to zero so the checked cast always succeeds for
// them; valid lanes go through `NumCast::from` and the whole cast bails on the first overflow.
let mask = array.validity()?.execute_mask(array.len(), ctx)?;
let overflow = || {
vortex_err!(
Compute: "Cannot cast {} to {} — value exceeds target range",
F::PTYPE, T::PTYPE,
)
};
let buffer: Buffer<T> = match &mask {
Mask::AllTrue(_) => BufferMut::try_from_trusted_len_iter(

// Returns `true` if every value of `from` is representable in `to` without loss.
fn casts_losslessly_to(from: PType, to: PType) -> bool {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't need to be a function

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer this only that the body does easily read like that?

from.least_supertype(to) == Some(to)
}

// Skip the fallible kernel when type widening or (cached) min/max prove every value fits.
let target_dtype = DType::Primitive(T::PTYPE, Nullability::NonNullable);
let infallible = casts_losslessly_to(F::PTYPE, T::PTYPE)
|| cached_values_fit_in(array, &target_dtype).unwrap_or(false);

let len = array.len();

// If F and T have the same byte width, try to take unique ownership of the buffer.
let same_bit_width = F::PTYPE.byte_width() == T::PTYPE.byte_width();
let owned: Option<BufferMut<F>> = same_bit_width
.then(|| array.into_owned().try_into_buffer_mut::<F>().ok())
.flatten();
let values: &[F] = array.as_slice::<F>();

if infallible {
return match owned {
Some(mut buf) => {
ReinterpretSink::<F, T>::new(buf.as_mut_slice()).map_into_in_place(|v: F| v.as_());
// SAFETY: same size + alignment for NativePType
let result: BufferMut<T> = unsafe { buf.transmute::<T>() };
Ok(PrimitiveArray::new(result.freeze(), new_validity).into_array())
}
None => {
let mut buffer = BufferMut::<T>::with_capacity(len);
values.map_into(&mut buffer.spare_capacity_mut()[..len], |v| v.as_());
// SAFETY: map_into initializes every lane.
unsafe { buffer.set_len(len) };
Ok(PrimitiveArray::new(buffer.freeze(), new_validity).into_array())
}
};
}

let mask = array.validity()?.execute_mask(len, ctx)?;

let buffer: Buffer<T> = match (&mask, owned) {
(Mask::AllTrue(_), Some(mut buf)) => {
ReinterpretSink::<F, T>::new(buf.as_mut_slice())
.try_map_in_place(|v: F| <T as NumCast>::from(v))
.map_err(|_| overflow())?;
// SAFETY: same size + alignment for NativePType
let result: BufferMut<T> = unsafe { buf.transmute::<T>() };
result.freeze()
}
(Mask::AllTrue(_), None) => {
let mut buffer = BufferMut::<T>::with_capacity(len);
values
.try_map_into(&mut buffer.spare_capacity_mut()[..len], |v| {
<T as NumCast>::from(v)
})
.map_err(|_| overflow())?;
// SAFETY: initialized every lane.
unsafe { buffer.set_len(len) };
buffer.freeze()
}
(Mask::AllFalse(_), _) => BufferMut::<T>::zeroed(len).freeze(),
(Mask::Values(m), Some(mut buf)) => {
ReinterpretSink::<F, T>::new(buf.as_mut_slice())
.try_map_masked_in_place(m.bit_buffer(), |v: F| <T as NumCast>::from(v))
.map_err(|_| overflow())?;
// SAFETY: same size + alignment for NativePType
let result: BufferMut<T> = unsafe { buf.transmute::<T>() };
result.freeze()
}
(Mask::Values(m), None) => {
let mut buffer = BufferMut::<T>::with_capacity(len);
values
.iter()
.map(|&v| <T as NumCast>::from(v).ok_or_else(overflow)),
)?
.freeze(),
Mask::AllFalse(_) => BufferMut::<T>::zeroed(values.len()).freeze(),
Mask::Values(m) => BufferMut::try_from_trusted_len_iter(
values.iter().zip(m.bit_buffer().iter()).map(|(&v, valid)| {
let factor = if valid { F::one() } else { F::zero() };
<T as NumCast>::from(v * factor).ok_or_else(overflow)
}),
)?
.freeze(),
.try_map_masked_into(
m.bit_buffer(),
&mut buffer.spare_capacity_mut()[..len],
|v| <T as NumCast>::from(v),
)
.map_err(|_| overflow())?;
// SAFETY: initialized every lane.
unsafe { buffer.set_len(len) };
buffer.freeze()
}
};

Ok(PrimitiveArray::new(buffer, new_validity).into_array())
}

/// Out-of-range values at invalid positions are truncated/wrapped by `as`, which is fine because
/// they are masked out by validity.
fn cast<F: NativePType + AsPrimitive<T>, T: NativePType>(array: &[F]) -> Buffer<T> {
BufferMut::from_trusted_len_iter(array.iter().map(|&src| src.as_())).freeze()
}

fn reinterpret(
array: ArrayView<'_, Primitive>,
new_ptype: PType,
Expand All @@ -178,23 +225,6 @@ fn reinterpret(
.into_array()
}

/// Returns `true` if every value of `src` is guaranteed representable in `target` without
/// overflow. Precision may be lost (e.g. large integers cast to `f32`), but the cast can never
/// produce an out-of-range result.
fn values_always_fit(src: PType, target: PType) -> bool {
if src == target {
return true;
}
if src.is_int() && target.is_int() {
return target.byte_width() > src.byte_width()
&& (src.is_unsigned_int() || target.is_signed_int());
}
if src.is_float() && target.is_float() {
return target.byte_width() > src.byte_width();
}
src.is_int() && matches!(target, PType::F32 | PType::F64)
}

/// Returns `true` if all valid values in `array` are representable as `target_ptype`.
///
/// Cached min/max statistics are consulted first. If either bound is missing, the function either
Expand Down
1 change: 1 addition & 0 deletions vortex-buffer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ workspace = true
[dev-dependencies]
divan = { workspace = true }
num-traits = { workspace = true }
rand = { workspace = true }
rstest = { workspace = true }

[[bench]]
Expand Down
37 changes: 37 additions & 0 deletions vortex-compute/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[package]
name = "vortex-compute"
authors = { workspace = true }
categories = { workspace = true }
description = "Lane-level compute kernels for Vortex buffers"
edition = { workspace = true }
homepage = { workspace = true }
include = { workspace = true }
keywords = { workspace = true }
license = { workspace = true }
readme = { workspace = true }
repository = { workspace = true }
rust-version = { workspace = true }
version = { workspace = true }

[package.metadata.docs.rs]
all-features = true

[dependencies]
vortex-buffer = { workspace = true }

[dev-dependencies]
arrow-arith = { workspace = true }
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-cast = { workspace = true }
arrow-schema = { workspace = true }
divan = { workspace = true }
num-traits = { workspace = true }
rand = { workspace = true }

[lints]
workspace = true

[[bench]]
name = "lane_kernels"
harness = false
Loading
Loading