@@ -1233,78 +1233,89 @@ end
12331233 end
12341234end
12351235
1236- # Whole-tile `reinterpret` (cuda_tile.pack/unpack, 13.3+) must match Julia's own
1237- # `reinterpret` element-for-element — not merely round-trip. An asymmetric check
1238- # (kernel result vs host `reinterpret`) is what actually pins down the byte order
1239- # and the column-major dimension scaling; a symmetric X→Y→X round-trip would pass
1240- # under any self-consistent convention.
12411236@testset " reinterpret matches Base.reinterpret" begin
1242- # 2-D widen→narrow: UInt16 (2,4) → UInt8 (4,4). Exercises both the column-major
1243- # leading-dim scaling and the within-element (little-endian) byte order.
1244- function u16_to_u8 (a :: ct.TileArray{UInt16,2} , b :: ct.TileArray{UInt8,2} )
1245- pid = ct. bid ( 1 )
1246- ct . store (b, pid, reinterpret (UInt8, ct . load (a, pid, ( 2 , 4 ))))
1247- return
1248- end
1249- let M = reshape (UInt16[ 0x0102 , 0x0304 , 0x0506 , 0x0708 ,
1250- 0x090a , 0x0b0c , 0x0d0e , 0x0f10 ], 2 , 4 )
1251- a = CuArray (M )
1252- b = CUDA . zeros (UInt8, 4 , 4 )
1253- @cuda backend = cuTile blocks = 1 u16_to_u8 (a, b )
1254- @test Array (b) == Array ( reinterpret (UInt8, M))
1237+ @testset " 2D narrowing " begin
1238+ function u16_to_u8 (a :: ct.TileArray{UInt16,2} , b :: ct.TileArray{UInt8,2} )
1239+ pid = ct. bid ( 1 )
1240+ ct . store (b, pid, reinterpret (UInt8, ct. load (a, pid, ( 2 , 4 ))) )
1241+ return
1242+ end
1243+ let M = reshape (UInt16[ 0x0102 , 0x0304 , 0x0506 , 0x0708 ,
1244+ 0x090a , 0x0b0c , 0x0d0e , 0x0f10 ], 2 , 4 )
1245+ a = CuArray (M )
1246+ b = CUDA . zeros (UInt8, 4 , 4 )
1247+ @cuda backend = cuTile blocks = 1 u16_to_u8 (a, b )
1248+ @test Array (b) == Array ( reinterpret (UInt8, M) )
1249+ end
12551250 end
12561251
1257- # 1-D narrow→widen the other direction: UInt8 (8,) → UInt16 (4,).
1258- function u8_to_u16 (a:: ct.TileArray{UInt8,1} , b:: ct.TileArray{UInt16,1} )
1259- pid = ct. bid (1 )
1260- ct. store (b, pid, reinterpret (UInt16, ct. load (a, pid, (8 ,))))
1261- return
1262- end
1263- let v = UInt8[0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 , 0x08 ]
1264- a = CuArray (v)
1265- b = CUDA. zeros (UInt16, 4 )
1266- @cuda backend= cuTile blocks= 1 u8_to_u16 (a, b)
1267- @test Array (b) == reinterpret (UInt16, v)
1252+ @testset " 1D widening" begin
1253+ function u8_to_u16 (a:: ct.TileArray{UInt8,1} , b:: ct.TileArray{UInt16,1} )
1254+ pid = ct. bid (1 )
1255+ ct. store (b, pid, reinterpret (UInt16, ct. load (a, pid, (8 ,))))
1256+ return
1257+ end
1258+ let v = UInt8[0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 , 0x08 ]
1259+ a = CuArray (v)
1260+ b = CUDA. zeros (UInt16, 4 )
1261+ @cuda backend= cuTile blocks= 1 u8_to_u16 (a, b)
1262+ @test Array (b) == reinterpret (UInt16, v)
1263+ end
12681264 end
12691265
1270- # `reshape`-form: widening drops the leading dim. UInt8 (2,4) → UInt16 (4,).
1271- function u8_reshape_u16 (a:: ct.TileArray{UInt8,2} , b:: ct.TileArray{UInt16,1} )
1272- pid = ct. bid (1 )
1273- ct. store (b, pid, reinterpret (reshape, UInt16, ct. load (a, pid, (2 , 4 ))))
1274- return
1275- end
1276- let M = reshape (UInt8[0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 , 0x08 ], 2 , 4 )
1277- a = CuArray (M)
1278- b = CUDA. zeros (UInt16, 4 )
1279- @cuda backend= cuTile blocks= 1 u8_reshape_u16 (a, b)
1280- @test Array (b) == reinterpret (reshape, UInt16, M)
1266+ @testset " narrowing: reshape argument drops dim" begin
1267+ function u8_reshape_u16 (a:: ct.TileArray{UInt8,2} , b:: ct.TileArray{UInt16,1} )
1268+ pid = ct. bid (1 )
1269+ ct. store (b, pid, reinterpret (reshape, UInt16, ct. load (a, pid, (2 , 4 ))))
1270+ return
1271+ end
1272+ let M = reshape (UInt8[0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 , 0x08 ], 2 , 4 )
1273+ a = CuArray (M)
1274+ b = CUDA. zeros (UInt16, 4 )
1275+ @cuda backend= cuTile blocks= 1 u8_reshape_u16 (a, b)
1276+ @test Array (b) == reinterpret (reshape, UInt16, M)
1277+ end
12811278 end
12821279
1283- # Equal-width route lowers to `bitcast` (shape preserved), not pack/unpack.
1284- # UInt32 → Float32 is a real bitcast (distinct Tile IR dtypes i32 vs f32).
1285- function u32_to_f32 (a :: ct.TileArray{UInt32,1} , b :: ct.TileArray{Float32,1} )
1286- pid = ct. bid ( 1 )
1287- ct . store (b, pid, reinterpret (Float32, ct . load (a, pid, ( 16 ,))))
1288- return
1289- end
1290- let v = rand (UInt32, 16 )
1291- a = CuArray (v )
1292- b = CUDA . zeros (Float32, 16 )
1293- @cuda backend = cuTile blocks = 1 u32_to_f32 (a, b )
1294- @test reinterpret (UInt32, Array (b)) == v # bit-exact (avoids NaN ≠ NaN)
1280+ @testset " widening: reshape argument inserts dim " begin
1281+ function u16_reshape_u8 (a :: ct.TileArray{UInt16,1} , b :: ct.TileArray{UInt8,2} )
1282+ pid = ct. bid ( 1 )
1283+ ct . store (b, pid, reinterpret (reshape, UInt8, ct. load (a, pid, ( 4 ,))) )
1284+ return
1285+ end
1286+ let M = UInt16[ 0x0201 , 0x0403 , 0x0605 , 0x0807 ]
1287+ a = CuArray (M )
1288+ b = CUDA . zeros (UInt8, 2 , 4 )
1289+ @cuda backend = cuTile blocks = 1 u16_reshape_u8 (a, b )
1290+ @test Array (b) == reinterpret (reshape, UInt8, M )
1291+ end
12951292 end
12961293
1297- # Signless integer no-op (Int32 ↔ UInt32 are both i32): emits no op, but the
1298- # result must still equal Julia's reinterpret, with the 2-D shape preserved.
1299- function i32_to_u32_2d (a:: ct.TileArray{Int32,2} , b:: ct.TileArray{UInt32,2} )
1300- pid = ct. bid (1 )
1301- ct. store (b, pid, reinterpret (UInt32, ct. load (a, pid, (4 , 4 ))))
1302- return
1294+ @testset " Equal-with round-trip preserves values and shape" begin
1295+ function u32_to_f32 (a:: ct.TileArray{UInt32,1} , b:: ct.TileArray{Float32,1} )
1296+ pid = ct. bid (1 )
1297+ ct. store (b, pid, reinterpret (Float32, ct. load (a, pid, (16 ,))))
1298+ return
1299+ end
1300+ let v = rand (UInt32, 16 )
1301+ a = CuArray (v)
1302+ b = CUDA. zeros (Float32, 16 )
1303+ @cuda backend= cuTile blocks= 1 u32_to_f32 (a, b)
1304+ @test reinterpret (UInt32, Array (b)) == v # bit-exact (avoids NaN ≠ NaN)
1305+ end
13031306 end
1304- let M = reshape (Int32 .(- 8 : 7 ), 4 , 4 )
1305- a = CuArray (M)
1306- b = CUDA. zeros (UInt32, 4 , 4 )
1307- @cuda backend= cuTile blocks= 1 i32_to_u32_2d (a, b)
1308- @test Array (b) == reinterpret (UInt32, M)
1307+
1308+ @testset " Int32 to UInt32" begin
1309+ function i32_to_u32_2d (a:: ct.TileArray{Int32,2} , b:: ct.TileArray{UInt32,2} )
1310+ pid = ct. bid (1 )
1311+ ct. store (b, pid, reinterpret (UInt32, ct. load (a, pid, (4 , 4 ))))
1312+ return
1313+ end
1314+ let M = reshape (Int32 .(- 8 : 7 ), 4 , 4 )
1315+ a = CuArray (M)
1316+ b = CUDA. zeros (UInt32, 4 , 4 )
1317+ @cuda backend= cuTile blocks= 1 i32_to_u32_2d (a, b)
1318+ @test Array (b) == reinterpret (UInt32, M)
1319+ end
13091320 end
13101321end
0 commit comments