diff --git a/vortex-cuda/kernels/src/dynamic_dispatch.cu b/vortex-cuda/kernels/src/dynamic_dispatch.cu index ef9c824e187..b2a3b5aed44 100644 --- a/vortex-cuda/kernels/src/dynamic_dispatch.cu +++ b/vortex-cuda/kernels/src/dynamic_dispatch.cu @@ -114,6 +114,17 @@ __device__ inline void dynamic_source_op(const T *__restrict input, return; } + case SourceOp::SEQUENCE: { + // Generate a linear sequence: value[i] = base + i * multiplier. + // Used for SequenceArray (e.g. monotonic run-end endpoints). + const T base = static_cast(source_op.params.sequence.base); + const T mul = static_cast(source_op.params.sequence.multiplier); + for (uint32_t i = threadIdx.x; i < chunk_len; i += blockDim.x) { + smem_output[i] = base + static_cast(chunk_start + i) * mul; + } + break; + } + default: __builtin_unreachable(); } diff --git a/vortex-cuda/kernels/src/dynamic_dispatch.h b/vortex-cuda/kernels/src/dynamic_dispatch.h index 9f7dc122f1b..bbbbe853f49 100644 --- a/vortex-cuda/kernels/src/dynamic_dispatch.h +++ b/vortex-cuda/kernels/src/dynamic_dispatch.h @@ -60,10 +60,16 @@ union SourceParams { uint64_t num_runs; uint64_t offset; } runend; + + /// Generate a linear sequence: `value[i] = base + i * multiplier`. + struct SequenceParams { + int64_t base; + int64_t multiplier; + } sequence; }; struct SourceOp { - enum SourceOpCode { BITUNPACK, LOAD, RUNEND } op_code; + enum SourceOpCode { BITUNPACK, LOAD, RUNEND, SEQUENCE } op_code; union SourceParams params; }; diff --git a/vortex-cuda/src/dynamic_dispatch/mod.rs b/vortex-cuda/src/dynamic_dispatch/mod.rs index dd4e78c47af..dc5ad14c043 100644 --- a/vortex-cuda/src/dynamic_dispatch/mod.rs +++ b/vortex-cuda/src/dynamic_dispatch/mod.rs @@ -75,6 +75,17 @@ impl SourceOp { }, } } + + /// Generate a linear sequence: `value[i] = base + i * multiplier`. + /// Used for SequenceArray (e.g. monotonic run-end endpoints). + pub fn sequence(base: i64, multiplier: i64) -> Self { + Self { + op_code: SourceOp_SourceOpCode_SEQUENCE, + params: SourceParams { + sequence: SourceParams_SequenceParams { base, multiplier }, + }, + } + } } impl ScalarOp { @@ -1004,4 +1015,60 @@ mod tests { Ok(()) } + + #[rstest] + #[case(0u32, 1u32, 100)] + #[case(5u32, 3u32, 2048)] + #[case(0u32, 1u32, 4096)] + #[case(100u32, 7u32, 5000)] + #[crate::test] + fn test_sequence_unsigned( + #[case] base: u32, + #[case] multiplier: u32, + #[case] len: usize, + ) -> VortexResult<()> { + use vortex::dtype::Nullability; + use vortex::encodings::sequence::SequenceArray; + + let expected: Vec = (0..len).map(|i| base + (i as u32) * multiplier).collect(); + + let seq = SequenceArray::try_new_typed(base, multiplier, Nullability::NonNullable, len)?; + + let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; + let (plan, _bufs) = build_plan(&seq.into_array(), &cuda_ctx)?; + + let actual = run_dynamic_dispatch_plan(&cuda_ctx, expected.len(), &plan)?; + assert_eq!(actual, expected); + + Ok(()) + } + + #[rstest] + #[case(0i32, 1i32, 100)] + #[case(-10i32, 3i32, 2048)] + #[case(100i32, -1i32, 100)] + #[case(-500i32, -7i32, 50)] + #[case(0i32, 1i32, 5000)] + #[crate::test] + fn test_sequence_signed( + #[case] base: i32, + #[case] multiplier: i32, + #[case] len: usize, + ) -> VortexResult<()> { + use vortex::dtype::Nullability; + use vortex::encodings::sequence::SequenceArray; + + let expected: Vec = (0..len).map(|i| base + (i as i32) * multiplier).collect(); + + let seq = SequenceArray::try_new_typed(base, multiplier, Nullability::NonNullable, len)?; + + let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; + let (plan, _bufs) = build_plan(&seq.into_array(), &cuda_ctx)?; + + let actual_u32 = run_dynamic_dispatch_plan(&cuda_ctx, expected.len(), &plan)?; + let actual: Vec = actual_u32.into_iter().map(|v| v as i32).collect(); + assert_eq!(actual, expected); + + Ok(()) + } } diff --git a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs index c381f0c3430..db5b040871c 100644 --- a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs +++ b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs @@ -26,6 +26,8 @@ use vortex::encodings::fastlanes::FoR; use vortex::encodings::fastlanes::FoRArray; use vortex::encodings::runend::RunEnd; use vortex::encodings::runend::RunEndArrayParts; +use vortex::encodings::sequence::Sequence; +use vortex::encodings::sequence::SequenceArrayParts; use vortex::encodings::zigzag::ZigZag; use vortex::error::VortexResult; use vortex::error::vortex_bail; @@ -82,6 +84,7 @@ struct Pipeline { /// - `ALPArray` → recurse + `ALP` scalar op (f32 only, no patches) /// - `DictArray` → input stage for values + recurse codes + `DICT` scalar op /// - `RunEndArray` → input stages for ends/values + `RUNEND` source +/// - `SequenceArray` → `SEQUENCE` source (integer ptypes only) /// - `SliceArray` → resolve via child's slice reduce/kernel /// /// # Limitations @@ -158,6 +161,8 @@ impl PlanBuilderState<'_> { self.walk_primitive(array) } else if id == Slice::ID { self.walk_slice(array) + } else if id == Sequence::ID { + self.walk_sequence(array) } else { vortex_bail!( "Encoding {:?} not supported by dynamic dispatch plan builder", @@ -305,6 +310,25 @@ impl PlanBuilderState<'_> { Ok(pipeline) } + /// SequenceArray → SEQUENCE source op + /// + /// Generates `value[i] = base + i * multiplier` on the GPU. + fn walk_sequence(&mut self, array: ArrayRef) -> VortexResult { + let seq = array + .try_into::() + .map_err(|_| vortex_err!("Expected SequenceArray"))?; + let SequenceArrayParts { + base, multiplier, .. + } = seq.into_parts(); + + Ok(Pipeline { + source: SourceOp::sequence(base.cast()?, multiplier.cast()?), + scalar_ops: vec![], + // SEQUENCE does not have an input pointer. + input_ptr: 0, + }) + } + /// RunEndArray → add input stages for ends and values, RUNEND source op. fn walk_runend(&mut self, array: ArrayRef) -> VortexResult { let re = array