@@ -1164,78 +1164,89 @@ end
11641164 end
11651165end
11661166
1167- # Whole-tile `reinterpret` (cuda_tile.pack/unpack, 13.3+) must match Julia's own
1168- # `reinterpret` element-for-element — not merely round-trip. An asymmetric check
1169- # (kernel result vs host `reinterpret`) is what actually pins down the byte order
1170- # and the column-major dimension scaling; a symmetric X→Y→X round-trip would pass
1171- # under any self-consistent convention.
11721167@testset " reinterpret matches Base.reinterpret" begin
1173- # 2-D widen→narrow: UInt16 (2,4) → UInt8 (4,4). Exercises both the column-major
1174- # leading-dim scaling and the within-element (little-endian) byte order.
1175- function u16_to_u8 (a :: ct.TileArray{UInt16,2} , b :: ct.TileArray{UInt8,2} )
1176- pid = ct. bid ( 1 )
1177- ct . store (b, pid, reinterpret (UInt8, ct . load (a, pid, ( 2 , 4 ))))
1178- return
1179- end
1180- let M = reshape (UInt16[ 0x0102 , 0x0304 , 0x0506 , 0x0708 ,
1181- 0x090a , 0x0b0c , 0x0d0e , 0x0f10 ], 2 , 4 )
1182- a = CuArray (M )
1183- b = CUDA . zeros (UInt8, 4 , 4 )
1184- @cuda backend = cuTile blocks = 1 u16_to_u8 (a, b )
1185- @test Array (b) == Array ( reinterpret (UInt8, M))
1168+ @testset " 2D narrowing " begin
1169+ function u16_to_u8 (a :: ct.TileArray{UInt16,2} , b :: ct.TileArray{UInt8,2} )
1170+ pid = ct. bid ( 1 )
1171+ ct . store (b, pid, reinterpret (UInt8, ct. load (a, pid, ( 2 , 4 ))) )
1172+ return
1173+ end
1174+ let M = reshape (UInt16[ 0x0102 , 0x0304 , 0x0506 , 0x0708 ,
1175+ 0x090a , 0x0b0c , 0x0d0e , 0x0f10 ], 2 , 4 )
1176+ a = CuArray (M )
1177+ b = CUDA . zeros (UInt8, 4 , 4 )
1178+ @cuda backend = cuTile blocks = 1 u16_to_u8 (a, b )
1179+ @test Array (b) == Array ( reinterpret (UInt8, M) )
1180+ end
11861181 end
11871182
1188- # 1-D narrow→widen the other direction: UInt8 (8,) → UInt16 (4,).
1189- function u8_to_u16 (a:: ct.TileArray{UInt8,1} , b:: ct.TileArray{UInt16,1} )
1190- pid = ct. bid (1 )
1191- ct. store (b, pid, reinterpret (UInt16, ct. load (a, pid, (8 ,))))
1192- return
1193- end
1194- let v = UInt8[0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 , 0x08 ]
1195- a = CuArray (v)
1196- b = CUDA. zeros (UInt16, 4 )
1197- @cuda backend= cuTile blocks= 1 u8_to_u16 (a, b)
1198- @test Array (b) == reinterpret (UInt16, v)
1183+ @testset " 1D widening" begin
1184+ function u8_to_u16 (a:: ct.TileArray{UInt8,1} , b:: ct.TileArray{UInt16,1} )
1185+ pid = ct. bid (1 )
1186+ ct. store (b, pid, reinterpret (UInt16, ct. load (a, pid, (8 ,))))
1187+ return
1188+ end
1189+ let v = UInt8[0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 , 0x08 ]
1190+ a = CuArray (v)
1191+ b = CUDA. zeros (UInt16, 4 )
1192+ @cuda backend= cuTile blocks= 1 u8_to_u16 (a, b)
1193+ @test Array (b) == reinterpret (UInt16, v)
1194+ end
11991195 end
12001196
1201- # `reshape`-form: widening drops the leading dim. UInt8 (2,4) → UInt16 (4,).
1202- function u8_reshape_u16 (a:: ct.TileArray{UInt8,2} , b:: ct.TileArray{UInt16,1} )
1203- pid = ct. bid (1 )
1204- ct. store (b, pid, reinterpret (reshape, UInt16, ct. load (a, pid, (2 , 4 ))))
1205- return
1206- end
1207- let M = reshape (UInt8[0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 , 0x08 ], 2 , 4 )
1208- a = CuArray (M)
1209- b = CUDA. zeros (UInt16, 4 )
1210- @cuda backend= cuTile blocks= 1 u8_reshape_u16 (a, b)
1211- @test Array (b) == reinterpret (reshape, UInt16, M)
1197+ @testset " narrowing: reshape argument drops dim" begin
1198+ function u8_reshape_u16 (a:: ct.TileArray{UInt8,2} , b:: ct.TileArray{UInt16,1} )
1199+ pid = ct. bid (1 )
1200+ ct. store (b, pid, reinterpret (reshape, UInt16, ct. load (a, pid, (2 , 4 ))))
1201+ return
1202+ end
1203+ let M = reshape (UInt8[0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 , 0x08 ], 2 , 4 )
1204+ a = CuArray (M)
1205+ b = CUDA. zeros (UInt16, 4 )
1206+ @cuda backend= cuTile blocks= 1 u8_reshape_u16 (a, b)
1207+ @test Array (b) == reinterpret (reshape, UInt16, M)
1208+ end
12121209 end
12131210
1214- # Equal-width route lowers to `bitcast` (shape preserved), not pack/unpack.
1215- # UInt32 → Float32 is a real bitcast (distinct Tile IR dtypes i32 vs f32).
1216- function u32_to_f32 (a :: ct.TileArray{UInt32,1} , b :: ct.TileArray{Float32,1} )
1217- pid = ct. bid ( 1 )
1218- ct . store (b, pid, reinterpret (Float32, ct . load (a, pid, ( 16 ,))))
1219- return
1220- end
1221- let v = rand (UInt32, 16 )
1222- a = CuArray (v )
1223- b = CUDA . zeros (Float32, 16 )
1224- @cuda backend = cuTile blocks = 1 u32_to_f32 (a, b )
1225- @test reinterpret (UInt32, Array (b)) == v # bit-exact (avoids NaN ≠ NaN)
1211+ @testset " widening: reshape argument inserts dim " begin
1212+ function u16_reshape_u8 (a :: ct.TileArray{UInt16,1} , b :: ct.TileArray{UInt8,2} )
1213+ pid = ct. bid ( 1 )
1214+ ct . store (b, pid, reinterpret (reshape, UInt8, ct. load (a, pid, ( 4 ,))) )
1215+ return
1216+ end
1217+ let M = UInt16[ 0x0201 , 0x0403 , 0x0605 , 0x0807 ]
1218+ a = CuArray (M )
1219+ b = CUDA . zeros (UInt8, 2 , 4 )
1220+ @cuda backend = cuTile blocks = 1 u16_reshape_u8 (a, b )
1221+ @test Array (b) == reinterpret (reshape, UInt8, M )
1222+ end
12261223 end
12271224
1228- # Signless integer no-op (Int32 ↔ UInt32 are both i32): emits no op, but the
1229- # result must still equal Julia's reinterpret, with the 2-D shape preserved.
1230- function i32_to_u32_2d (a:: ct.TileArray{Int32,2} , b:: ct.TileArray{UInt32,2} )
1231- pid = ct. bid (1 )
1232- ct. store (b, pid, reinterpret (UInt32, ct. load (a, pid, (4 , 4 ))))
1233- return
1225+ @testset " Equal-with round-trip preserves values and shape" begin
1226+ function u32_to_f32 (a:: ct.TileArray{UInt32,1} , b:: ct.TileArray{Float32,1} )
1227+ pid = ct. bid (1 )
1228+ ct. store (b, pid, reinterpret (Float32, ct. load (a, pid, (16 ,))))
1229+ return
1230+ end
1231+ let v = rand (UInt32, 16 )
1232+ a = CuArray (v)
1233+ b = CUDA. zeros (Float32, 16 )
1234+ @cuda backend= cuTile blocks= 1 u32_to_f32 (a, b)
1235+ @test reinterpret (UInt32, Array (b)) == v # bit-exact (avoids NaN ≠ NaN)
1236+ end
12341237 end
1235- let M = reshape (Int32 .(- 8 : 7 ), 4 , 4 )
1236- a = CuArray (M)
1237- b = CUDA. zeros (UInt32, 4 , 4 )
1238- @cuda backend= cuTile blocks= 1 i32_to_u32_2d (a, b)
1239- @test Array (b) == reinterpret (UInt32, M)
1238+
1239+ @testset " Int32 to UInt32" begin
1240+ function i32_to_u32_2d (a:: ct.TileArray{Int32,2} , b:: ct.TileArray{UInt32,2} )
1241+ pid = ct. bid (1 )
1242+ ct. store (b, pid, reinterpret (UInt32, ct. load (a, pid, (4 , 4 ))))
1243+ return
1244+ end
1245+ let M = reshape (Int32 .(- 8 : 7 ), 4 , 4 )
1246+ a = CuArray (M)
1247+ b = CUDA. zeros (UInt32, 4 , 4 )
1248+ @cuda backend= cuTile blocks= 1 i32_to_u32_2d (a, b)
1249+ @test Array (b) == reinterpret (UInt32, M)
1250+ end
12401251 end
12411252end
0 commit comments