@@ -156,6 +156,7 @@ where
156156 offset : p. offset as u32 ,
157157 num_patches : p. num_patches as u32 ,
158158 n_chunks : p. n_chunks as u32 ,
159+ offset_within_chunk : p. offset_within_chunk as u32 ,
159160 }
160161 } else {
161162 // NULL chunk_offsets signals no patches to the kernel
@@ -167,6 +168,7 @@ where
167168 offset : 0 ,
168169 num_patches : 0 ,
169170 n_chunks : 0 ,
171+ offset_within_chunk : 0 ,
170172 }
171173 } ;
172174
@@ -561,4 +563,151 @@ mod tests {
561563
562564 Ok ( ( ) )
563565 }
566+
567+ /// Test slicing a bitpacked array with patches where the slice boundary
568+ /// falls in the middle of a chunk's patch range, creating a non-zero
569+ /// offset_within_chunk.
570+ #[ crate :: test]
571+ fn test_cuda_bitunpack_sliced_patches_offset_within_chunk ( ) -> VortexResult < ( ) > {
572+ let mut cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) )
573+ . vortex_expect ( "failed to create execution context" ) ;
574+
575+ // Create an array with values that will generate patches.
576+ // We use values 0-511 (fits in 9 bits) but include some larger values
577+ // that will become patches.
578+ let mut values: Vec < u16 > = Vec :: with_capacity ( 3072 ) ;
579+ for i in 0u16 ..3072 {
580+ if i == 100 || i == 200 || i == 300 || i == 1100 || i == 1200 || i == 2100 {
581+ // These will be patches (values > 511)
582+ values. push ( 600 ) ;
583+ } else {
584+ values. push ( i % 512 ) ;
585+ }
586+ }
587+
588+ let primitive_array =
589+ PrimitiveArray :: new ( Buffer :: from_iter ( values. iter ( ) . copied ( ) ) , NonNullable ) ;
590+
591+ // Encode with bit width 9 (max value 511)
592+ let bitpacked_array = BitPacked :: encode ( & primitive_array. into_array ( ) , 9 ) ?;
593+ assert ! (
594+ bitpacked_array. patches( ) . is_some( ) ,
595+ "Expected patches to be present"
596+ ) ;
597+
598+ // Slice to create non-zero offset_within_chunk.
599+ // The first chunk (0-1023) has patches at indices 100, 200, 300.
600+ // Slicing from 150 should skip the patch at 100, creating offset_within_chunk=1.
601+ let sliced_array = bitpacked_array. into_array ( ) . slice ( 150 ..2500 ) ?;
602+ assert ! ( sliced_array. is:: <BitPacked >( ) ) ;
603+
604+ let cpu_result = sliced_array. to_canonical ( ) ?;
605+ let gpu_result = block_on ( async {
606+ BitPackedExecutor
607+ . execute ( sliced_array, & mut cuda_ctx)
608+ . await
609+ . vortex_expect ( "GPU decompression failed" )
610+ . into_host ( )
611+ . await
612+ . map ( |a| a. into_array ( ) )
613+ } ) ?;
614+
615+ assert_arrays_eq ! ( cpu_result. into_array( ) , gpu_result) ;
616+
617+ Ok ( ( ) )
618+ }
619+
620+ /// Test slicing a bitpacked array multiple times, accumulating offset_within_chunk.
621+ #[ crate :: test]
622+ fn test_cuda_bitunpack_double_sliced_patches ( ) -> VortexResult < ( ) > {
623+ let mut cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) )
624+ . vortex_expect ( "failed to create execution context" ) ;
625+
626+ // Create an array with values that will generate patches.
627+ let mut values: Vec < u16 > = Vec :: with_capacity ( 3072 ) ;
628+ for i in 0u16 ..3072 {
629+ if i == 50 || i == 100 || i == 200 || i == 300 || i == 400 || i == 1100 || i == 2100 {
630+ values. push ( 600 ) ;
631+ } else {
632+ values. push ( i % 512 ) ;
633+ }
634+ }
635+
636+ let primitive_array =
637+ PrimitiveArray :: new ( Buffer :: from_iter ( values. iter ( ) . copied ( ) ) , NonNullable ) ;
638+
639+ let bitpacked_array = BitPacked :: encode ( & primitive_array. into_array ( ) , 9 ) ?;
640+ assert ! (
641+ bitpacked_array. patches( ) . is_some( ) ,
642+ "Expected patches to be present"
643+ ) ;
644+
645+ // First slice: skip patches at 50
646+ let first_slice = bitpacked_array. into_array ( ) . slice ( 75 ..2500 ) ?;
647+ // Second slice: skip more patches
648+ let second_slice = first_slice. slice ( 50 ..2000 ) ?;
649+ assert ! ( second_slice. is:: <BitPacked >( ) ) ;
650+
651+ let cpu_result = second_slice. to_canonical ( ) ?;
652+ let gpu_result = block_on ( async {
653+ BitPackedExecutor
654+ . execute ( second_slice, & mut cuda_ctx)
655+ . await
656+ . vortex_expect ( "GPU decompression failed" )
657+ . into_host ( )
658+ . await
659+ . map ( |a| a. into_array ( ) )
660+ } ) ?;
661+
662+ assert_arrays_eq ! ( cpu_result. into_array( ) , gpu_result) ;
663+
664+ Ok ( ( ) )
665+ }
666+
667+ /// Test slicing to skip an entire chunk's worth of patches.
668+ #[ crate :: test]
669+ fn test_cuda_bitunpack_sliced_skip_first_chunk_patches ( ) -> VortexResult < ( ) > {
670+ let mut cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) )
671+ . vortex_expect ( "failed to create execution context" ) ;
672+
673+ // Create patches in first chunk only, then slice past them all.
674+ let mut values: Vec < u16 > = Vec :: with_capacity ( 3072 ) ;
675+ for i in 0u16 ..3072 {
676+ if i == 100 || i == 200 || i == 300 {
677+ values. push ( 600 ) ;
678+ } else if i == 1500 || i == 2500 {
679+ values. push ( 700 ) ;
680+ } else {
681+ values. push ( i % 512 ) ;
682+ }
683+ }
684+
685+ let primitive_array =
686+ PrimitiveArray :: new ( Buffer :: from_iter ( values. iter ( ) . copied ( ) ) , NonNullable ) ;
687+
688+ let bitpacked_array = BitPacked :: encode ( & primitive_array. into_array ( ) , 9 ) ?;
689+ assert ! (
690+ bitpacked_array. patches( ) . is_some( ) ,
691+ "Expected patches to be present"
692+ ) ;
693+
694+ // Slice to skip past all first chunk patches
695+ let sliced_array = bitpacked_array. into_array ( ) . slice ( 1024 ..3072 ) ?;
696+ assert ! ( sliced_array. is:: <BitPacked >( ) ) ;
697+
698+ let cpu_result = sliced_array. to_canonical ( ) ?;
699+ let gpu_result = block_on ( async {
700+ BitPackedExecutor
701+ . execute ( sliced_array, & mut cuda_ctx)
702+ . await
703+ . vortex_expect ( "GPU decompression failed" )
704+ . into_host ( )
705+ . await
706+ . map ( |a| a. into_array ( ) )
707+ } ) ?;
708+
709+ assert_arrays_eq ! ( cpu_result. into_array( ) , gpu_result) ;
710+
711+ Ok ( ( ) )
712+ }
564713}
0 commit comments