@@ -24,14 +24,18 @@ use cudarc::driver::DevicePtr;
2424use cudarc:: driver:: LaunchConfig ;
2525use cudarc:: driver:: PushKernelArg ;
2626use vortex:: array:: Canonical ;
27+ use vortex:: array:: IntoArray ;
28+ use vortex:: array:: arrays:: ConstantArray ;
2729use vortex:: array:: arrays:: PrimitiveArray ;
2830use vortex:: array:: buffer:: BufferHandle ;
2931use vortex:: array:: buffer:: DeviceBufferExt ;
3032use vortex:: array:: match_each_unsigned_integer_ptype;
33+ use vortex:: array:: scalar:: Scalar ;
3134use vortex:: array:: validity:: Validity ;
3235use vortex:: buffer:: Alignment ;
3336use vortex:: buffer:: ByteBuffer ;
3437use vortex:: buffer:: ByteBufferMut ;
38+ use vortex:: dtype:: DType ;
3539use vortex:: dtype:: Nullability ;
3640use vortex:: dtype:: PType ;
3741use vortex:: error:: VortexResult ;
@@ -408,6 +412,15 @@ impl ScalarOp {
408412impl MaterializedPlan {
409413 pub fn execute ( self , len : usize , ctx : & mut CudaExecutionCtx ) -> VortexResult < Canonical > {
410414 let output_ptype = self . dispatch_plan . output_ptype ( ) ;
415+
416+ // All values are null — no need to touch the GPU.
417+ if matches ! ( self . validity, Validity :: AllInvalid ) {
418+ let dtype = DType :: Primitive ( output_ptype, Nullability :: Nullable ) ;
419+ return ConstantArray :: new ( Scalar :: null ( dtype) , len)
420+ . into_array ( )
421+ . to_canonical ( ) ;
422+ }
423+
411424 // The CUDA kernels are instantiated for unsigned integer types only;
412425 // map signed/float ptypes to their same-width unsigned counterpart.
413426 let unsigned_ptype = match output_ptype {
@@ -431,9 +444,11 @@ impl MaterializedPlan {
431444 where
432445 T : cudarc:: driver:: DeviceRepr + vortex:: dtype:: NativePType ,
433446 {
447+ let nullability = self . validity . nullability ( ) ;
448+
434449 if len == 0 {
435450 return Ok ( Canonical :: Primitive ( PrimitiveArray :: empty :: < T > (
436- Nullability :: NonNullable ,
451+ nullability ,
437452 ) ) ) ;
438453 }
439454
@@ -467,7 +482,7 @@ impl MaterializedPlan {
467482 Ok ( Canonical :: Primitive ( PrimitiveArray :: from_buffer_handle (
468483 BufferHandle :: new_device ( output_buf. slice_typed :: < T > ( 0 ..len) ) ,
469484 output_ptype,
470- Validity :: NonNullable ,
485+ self . validity ,
471486 ) ) )
472487 }
473488}
@@ -485,8 +500,10 @@ mod tests {
485500 use vortex:: array:: arrays:: DictArray ;
486501 use vortex:: array:: arrays:: PrimitiveArray ;
487502 use vortex:: array:: scalar:: Scalar ;
503+ use vortex:: array:: validity:: Validity ;
488504 use vortex:: array:: validity:: Validity :: NonNullable ;
489505 use vortex:: buffer:: Buffer ;
506+ use vortex:: dtype:: Nullability ;
490507 use vortex:: dtype:: PType ;
491508 use vortex:: encodings:: alp:: ALP ;
492509 use vortex:: encodings:: alp:: ALPArrayExt ;
@@ -509,6 +526,7 @@ mod tests {
509526 use crate :: CudaBufferExt ;
510527 use crate :: CudaDeviceBuffer ;
511528 use crate :: CudaExecutionCtx ;
529+ use crate :: executor:: CudaArrayExt ;
512530 use crate :: hybrid_dispatch:: try_gpu_dispatch;
513531 use crate :: session:: CudaSession ;
514532
@@ -1817,4 +1835,204 @@ mod tests {
18171835
18181836 Ok ( ( ) )
18191837 }
1838+
1839+ // ═══════════════════════════════════════════════════════════════════
1840+ // Validity propagation tests
1841+ // ═══════════════════════════════════════════════════════════════════
1842+
1843+ /// Nullable Primitive array — LOAD source with validity propagated.
1844+ #[ crate :: test]
1845+ async fn test_nullable_primitive ( ) -> VortexResult < ( ) > {
1846+ let mut cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
1847+
1848+ let array = PrimitiveArray :: from_option_iter (
1849+ ( 0 ..2048u32 ) . map ( |i| if i % 3 == 0 { None } else { Some ( i) } ) ,
1850+ ) ;
1851+ let cpu = array. to_canonical ( ) ?. into_array ( ) ;
1852+
1853+ let gpu = try_gpu_dispatch ( & array. into_array ( ) , & mut cuda_ctx)
1854+ . await ?
1855+ . into_host ( )
1856+ . await ?
1857+ . into_array ( ) ;
1858+
1859+ vortex:: array:: assert_arrays_eq!( cpu, gpu) ;
1860+ Ok ( ( ) )
1861+ }
1862+
1863+ /// Nullable FoR(BitPacked) — validity from the root FoR propagated
1864+ /// through the fused plan.
1865+ #[ crate :: test]
1866+ async fn test_nullable_for_bitpacked ( ) -> VortexResult < ( ) > {
1867+ let mut cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
1868+
1869+ let len = 2048 ;
1870+ let reference = 1000u32 ;
1871+ let residuals: Vec < u32 > = ( 0 ..len) . map ( |i| ( i % 64 ) as u32 ) . collect ( ) ;
1872+
1873+ // Build a nullable source — encode as FoR(BitPacked), then replace
1874+ // validity on the root FoR array.
1875+ let prim = PrimitiveArray :: from_option_iter ( residuals. iter ( ) . enumerate ( ) . map ( |( i, & v) | {
1876+ if i % 5 == 0 {
1877+ None
1878+ } else {
1879+ Some ( v + reference)
1880+ }
1881+ } ) ) ;
1882+
1883+ // Build expected from the nullable primitive.
1884+ let cpu = prim. to_canonical ( ) ?. into_array ( ) ;
1885+
1886+ // Encode the non-null residuals into FoR(BitPacked). The validity
1887+ // lives on the outermost array; inner children are non-nullable.
1888+ let inner_prim = PrimitiveArray :: new ( Buffer :: from ( residuals) , NonNullable ) ;
1889+ let bp = BitPacked :: encode ( & inner_prim. into_array ( ) , 6 ) ?;
1890+ let for_arr = FoR :: try_new ( bp. into_array ( ) , reference. into ( ) ) ?;
1891+
1892+ // Re-wrap with the nullable validity from the original primitive.
1893+ let validity = prim. validity ( ) ?;
1894+ let nullable_for = PrimitiveArray :: from_buffer_handle (
1895+ for_arr. into_array ( ) . to_primitive ( ) . into_data_parts ( ) . buffer ,
1896+ PType :: U32 ,
1897+ validity,
1898+ ) ;
1899+
1900+ let gpu = try_gpu_dispatch ( & nullable_for. into_array ( ) , & mut cuda_ctx)
1901+ . await ?
1902+ . into_host ( )
1903+ . await ?
1904+ . into_array ( ) ;
1905+
1906+ vortex:: array:: assert_arrays_eq!( cpu, gpu) ;
1907+ Ok ( ( ) )
1908+ }
1909+
1910+ /// AllInvalid array — kernel should be skipped entirely.
1911+ #[ crate :: test]
1912+ async fn test_all_invalid_skips_kernel ( ) -> VortexResult < ( ) > {
1913+ let mut cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
1914+
1915+ let array = PrimitiveArray :: new ( Buffer :: from ( vec ! [ 0u32 ; 2048 ] ) , Validity :: AllInvalid ) ;
1916+
1917+ let result = try_gpu_dispatch ( & array. into_array ( ) , & mut cuda_ctx)
1918+ . await ?
1919+ . into_host ( )
1920+ . await ?;
1921+
1922+ let prim = result. into_primitive ( ) ;
1923+ assert_eq ! ( prim. len( ) , 2048 ) ;
1924+ assert ! ( matches!( prim. validity( ) ?, Validity :: AllInvalid ) ) ;
1925+ Ok ( ( ) )
1926+ }
1927+
1928+ /// AllValid nullable array — should fuse and produce AllValid output.
1929+ #[ crate :: test]
1930+ async fn test_all_valid_nullable ( ) -> VortexResult < ( ) > {
1931+ let mut cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
1932+
1933+ let values: Vec < u32 > = ( 0 ..2048 ) . collect ( ) ;
1934+ let array = PrimitiveArray :: new ( Buffer :: from ( values. clone ( ) ) , Validity :: AllValid ) ;
1935+
1936+ let cpu = array. to_canonical ( ) ?. into_array ( ) ;
1937+ let gpu = try_gpu_dispatch ( & array. into_array ( ) , & mut cuda_ctx)
1938+ . await ?
1939+ . into_host ( )
1940+ . await ?
1941+ . into_array ( ) ;
1942+
1943+ vortex:: array:: assert_arrays_eq!( cpu, gpu) ;
1944+ Ok ( ( ) )
1945+ }
1946+
1947+ /// Dict with nullable codes must fall back to Unfused (not fused).
1948+ #[ crate :: test]
1949+ fn test_dict_nullable_codes_rejected ( ) -> VortexResult < ( ) > {
1950+ use vortex:: buffer:: buffer;
1951+
1952+ let codes = PrimitiveArray :: from_option_iter ( [ Some ( 0u32 ) , None , Some ( 1 ) , None , Some ( 2 ) ] ) ;
1953+ let values = PrimitiveArray :: new ( buffer ! [ 10u32 , 20 , 30 ] , NonNullable ) ;
1954+ let dict = DictArray :: try_new ( codes. into_array ( ) , values. into_array ( ) ) ?;
1955+
1956+ let plan = DispatchPlan :: new ( & dict. into_array ( ) ) ?;
1957+ assert ! (
1958+ matches!( plan, DispatchPlan :: Unfused ) ,
1959+ "Dict with nullable codes should fall back to Unfused"
1960+ ) ;
1961+ Ok ( ( ) )
1962+ }
1963+
1964+ /// Dict with non-nullable codes but nullable values should still fuse.
1965+ #[ crate :: test]
1966+ async fn test_dict_nullable_values_fuses ( ) -> VortexResult < ( ) > {
1967+ use vortex:: buffer:: buffer;
1968+
1969+ let mut cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
1970+
1971+ let codes = PrimitiveArray :: new ( buffer ! [ 0u32 , 1 , 2 , 2 , 1 , 0 ] , NonNullable ) ;
1972+ let values = PrimitiveArray :: from_option_iter ( [ Some ( 10u32 ) , None , Some ( 30 ) ] ) ;
1973+ let dict = DictArray :: try_new ( codes. into_array ( ) , values. into_array ( ) ) ?;
1974+
1975+ let cpu = dict. to_canonical ( ) ?. into_array ( ) ;
1976+ let gpu = dict
1977+ . into_array ( )
1978+ . execute_cuda ( & mut cuda_ctx)
1979+ . await ?
1980+ . into_host ( )
1981+ . await ?
1982+ . into_array ( ) ;
1983+
1984+ vortex:: array:: assert_arrays_eq!( cpu, gpu) ;
1985+ Ok ( ( ) )
1986+ }
1987+
1988+ /// Nullable FoR(BitPacked) through CUB filter — the original bug scenario.
1989+ /// Validity must survive through fused dispatch and into the filter.
1990+ #[ crate :: test]
1991+ async fn test_nullable_fused_then_filter ( ) -> VortexResult < ( ) > {
1992+ use vortex:: array:: arrays:: FilterArray ;
1993+ use vortex:: mask:: Mask ;
1994+
1995+ let mut cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
1996+
1997+ let len = 2048usize ;
1998+ let values: Vec < Option < u32 > > = ( 0 ..len)
1999+ . map ( |i| {
2000+ if i % 7 == 0 {
2001+ None
2002+ } else {
2003+ Some ( ( i % 64 ) as u32 )
2004+ }
2005+ } )
2006+ . collect ( ) ;
2007+ let prim = PrimitiveArray :: from_option_iter ( values. iter ( ) . copied ( ) ) ;
2008+
2009+ // Keep every other element.
2010+ let mask = Mask :: from_iter ( ( 0 ..len) . map ( |i| i % 2 == 0 ) ) ;
2011+ let filter_array = FilterArray :: try_new ( prim. into_array ( ) , mask) ?;
2012+
2013+ let cpu = filter_array. to_canonical ( ) ?. into_array ( ) ;
2014+ let gpu = filter_array
2015+ . into_array ( )
2016+ . execute_cuda ( & mut cuda_ctx)
2017+ . await ?
2018+ . into_host ( )
2019+ . await ?
2020+ . into_array ( ) ;
2021+
2022+ vortex:: array:: assert_arrays_eq!( cpu, gpu) ;
2023+ Ok ( ( ) )
2024+ }
2025+
2026+ /// Empty nullable array should preserve nullability.
2027+ #[ crate :: test]
2028+ async fn test_empty_nullable_array ( ) -> VortexResult < ( ) > {
2029+ let mut cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
2030+
2031+ let array = PrimitiveArray :: new ( Buffer :: < u32 > :: empty ( ) , Validity :: AllValid ) ;
2032+ let result = try_gpu_dispatch ( & array. into_array ( ) , & mut cuda_ctx) . await ?;
2033+ let prim = result. into_primitive ( ) ;
2034+ assert_eq ! ( prim. len( ) , 0 ) ;
2035+ assert_eq ! ( prim. validity( ) ?. nullability( ) , Nullability :: Nullable ) ;
2036+ Ok ( ( ) )
2037+ }
18202038}
0 commit comments