@@ -72,16 +72,16 @@ let kernel = (a, b) -> begin
7272end
7373
7474# Whole-tile `reinterpret` between UInt8 and Float4_E2M1FN packs/unpacks two FP4
75- # per byte: a `Tile{UInt8,(8,)} ` unpacks to a `Tile{Float4_E2M1FN,(16,) }`,
75+ # per byte: a `Tile{UInt8,Tuple{8}} ` unpacks to a `Tile{Float4_E2M1FN,Tuple{16} }`,
7676# lowering to `cuda_tile.unpack` (13.3+).
7777@test @filecheck begin
7878 @check_label " entry"
7979 code_tiled (Tuple{ct. TileArray{UInt8,1 ,spec1d}, ct. TileArray{Float32,1 ,spec1d}};
8080 bytecode_version= v " 13.3" ) do a, b
8181 pid = ct. bid (1 )
82- bytes = ct. load (a, pid, (8 ,)) # Tile{UInt8,(8,) }
82+ bytes = ct. load (a, pid, (8 ,)) # Tile{UInt8,Tuple{8} }
8383 @check " unpack"
84- fp4 = reinterpret (Float4_E2M1FN, bytes) # Tile{Float4_E2M1FN,(16,) }
84+ fp4 = reinterpret (Float4_E2M1FN, bytes) # Tile{Float4_E2M1FN,Tuple{16} }
8585 ct. store (b, pid, convert (ct. Tile{Float32}, fp4))
8686 return
8787 end
9494 bytecode_version= v " 13.3" ) do a, b
9595 pid = ct. bid (1 )
9696 vals = ct. load (a, pid, (16 ,))
97- fp4 = convert (ct. Tile{Float4_E2M1FN}, vals) # Tile{Float4_E2M1FN,(16,) }
97+ fp4 = convert (ct. Tile{Float4_E2M1FN}, vals) # Tile{Float4_E2M1FN,Tuple{16} }
9898 @check " pack"
99- ct. store (b, pid, reinterpret (UInt8, fp4)) # Tile{UInt8,(8,) }
99+ ct. store (b, pid, reinterpret (UInt8, fp4)) # Tile{UInt8,Tuple{8} }
100100 return
101101 end
102102end
0 commit comments