Use Tuple{...} for Shape type parameter of Tile in comments and docs introduced in #238

AntonOresten · AntonOresten · commit 2b4d57d01f23 · 2026-06-01T20:53:11.000+02:00
diff --git a/src/language/operations.jl b/src/language/operations.jl
@@ -975,16 +975,16 @@ Reinterpret the *whole tile* `x` as a tile of element type `T`, like
 element widths. Lowers to `cuda_tile.bitcast` for equal widths and to
 `cuda_tile.pack`/`unpack` (via `reshape` to rank-1) when widths differ.
 
-This is how sub-byte formats move through global memory: a `Tile{UInt8,(N,)}`
-reinterprets to a `Tile{Float4_E2M1FN,(2N,)}` and back, so FP4 data can be stored
+This is how sub-byte formats move through global memory: a `Tile{UInt8,Tuple{N}}`
+reinterprets to a `Tile{Float4_E2M1FN,Tuple{2N}}` and back, so FP4 data can be stored
 in a `UInt8` array. The total bit-width is preserved, so it must divide evenly.
 
 Note `reinterpret.(T, x)` (with a dot) is the unrelated *element-wise* broadcast,
 which keeps the shape and requires `T` to be the same width as `eltype(x)`.
 
 ```julia
-bytes = ct.load(a, pid, (8,))                 # Tile{UInt8,(8,)}
-fp4   = reinterpret(Float4_E2M1FN, bytes)     # Tile{Float4_E2M1FN,(16,)}
+bytes = ct.load(a, pid, (8,))                 # Tile{UInt8,Tuple{8}}
+fp4   = reinterpret(Float4_E2M1FN, bytes)     # Tile{Float4_E2M1FN,Tuple{16}}
 vals  = convert(ct.Tile{Float32}, fp4)        # widen for compute
 ```
 """
diff --git a/test/extensions/Microfloats.jl b/test/extensions/Microfloats.jl
@@ -72,16 +72,16 @@ let kernel = (a, b) -> begin
 end
 
 # Whole-tile `reinterpret` between UInt8 and Float4_E2M1FN packs/unpacks two FP4
-# per byte: a `Tile{UInt8,(8,)}` unpacks to a `Tile{Float4_E2M1FN,(16,)}`,
+# per byte: a `Tile{UInt8,Tuple{8}}` unpacks to a `Tile{Float4_E2M1FN,Tuple{16}}`,
 # lowering to `cuda_tile.unpack` (13.3+).
 @test @filecheck begin
     @check_label "entry"
     code_tiled(Tuple{ct.TileArray{UInt8,1,spec1d}, ct.TileArray{Float32,1,spec1d}};
                bytecode_version=v"13.3") do a, b
         pid = ct.bid(1)
-        bytes = ct.load(a, pid, (8,))            # Tile{UInt8,(8,)}
+        bytes = ct.load(a, pid, (8,))            # Tile{UInt8,Tuple{8}}
         @check "unpack"
-        fp4 = reinterpret(Float4_E2M1FN, bytes)  # Tile{Float4_E2M1FN,(16,)}
+        fp4 = reinterpret(Float4_E2M1FN, bytes)  # Tile{Float4_E2M1FN,Tuple{16}}
         ct.store(b, pid, convert(ct.Tile{Float32}, fp4))
         return
     end
@@ -94,9 +94,9 @@ end
                bytecode_version=v"13.3") do a, b
         pid = ct.bid(1)
         vals = ct.load(a, pid, (16,))
-        fp4 = convert(ct.Tile{Float4_E2M1FN}, vals)  # Tile{Float4_E2M1FN,(16,)}
+        fp4 = convert(ct.Tile{Float4_E2M1FN}, vals)  # Tile{Float4_E2M1FN,Tuple{16}}
         @check "pack"
-        ct.store(b, pid, reinterpret(UInt8, fp4))    # Tile{UInt8,(8,)}
+        ct.store(b, pid, reinterpret(UInt8, fp4))    # Tile{UInt8,Tuple{8}}
         return
     end
 end