@@ -177,14 +177,16 @@ where
177177 // Get CUDA view of input
178178 let input_view = device_input. cuda_view :: < A :: Physical > ( ) ?;
179179
180+ let output_len = offset + len;
181+
180182 // Allocate output buffer
181- let output_slice = ctx. device_alloc :: < A > ( len . next_multiple_of ( 1024 ) ) ?;
183+ let output_slice = ctx. device_alloc :: < A > ( output_len . next_multiple_of ( 1024 ) ) ?;
182184 let output_buf = CudaDeviceBuffer :: new ( output_slice) ;
183185 let output_view = output_buf. as_view :: < A > ( ) ;
184186
185187 let output_width = size_of :: < A > ( ) * 8 ;
186188 let cuda_function = bitpacked_cuda_kernel ( bit_width, output_width, ctx) ?;
187- let config = bitpacked_cuda_launch_config ( output_width, len ) ?;
189+ let config = bitpacked_cuda_launch_config ( output_width, output_len ) ?;
188190
189191 // We hold this here to keep the device buffers alive.
190192 let device_patches = if let Some ( patches) = patches {
@@ -199,7 +201,7 @@ where
199201
200202 let patches_arg = build_gpu_patches ( device_patches. as_ref ( ) ) ?;
201203
202- ctx. launch_kernel_config ( & cuda_function, config, len , |args| {
204+ ctx. launch_kernel_config ( & cuda_function, config, output_len , |args| {
203205 args. arg ( & input_view)
204206 . arg ( & output_view)
205207 . arg ( & reference)
@@ -552,8 +554,14 @@ mod tests {
552554 Ok ( ( ) )
553555 }
554556
557+ #[ rstest]
558+ #[ case( 67 , 3969 ) ]
559+ #[ case( 1 , 1025 ) ]
555560 #[ crate :: test]
556- fn test_cuda_bitunpack_sliced ( ) -> VortexResult < ( ) > {
561+ fn test_cuda_bitunpack_sliced (
562+ #[ case] slice_start : usize ,
563+ #[ case] slice_end : usize ,
564+ ) -> VortexResult < ( ) > {
557565 let mut ctx = vortex_array:: array_session ( ) . create_execution_ctx ( ) ;
558566 let bit_width = 32 ;
559567 let mut cuda_ctx = CudaSession :: create_execution_ctx ( & crate :: cuda_session ( ) )
@@ -570,7 +578,7 @@ mod tests {
570578
571579 let bitpacked_array = BitPacked :: encode ( & primitive_array. into_array ( ) , bit_width, & mut ctx)
572580 . vortex_expect ( "operation should succeed in test" ) ;
573- let sliced_array = bitpacked_array. into_array ( ) . slice ( 67 .. 3969 ) ?;
581+ let sliced_array = bitpacked_array. into_array ( ) . slice ( slice_start..slice_end ) ?;
574582 let gpu_result = block_on ( async {
575583 BitPackedExecutor
576584 . execute ( sliced_array. clone ( ) , & mut cuda_ctx)
0 commit comments