Skip to content

Commit ffd18ff

Browse files
maleadtclaude
andcommitted
Make atomic intrinsics consistently tile-based
Move scalar-to-tile conversion from the intrinsic layer to the language layer: atomic_tfunc and emit functions now always return Tile{T, S} (even for 0D), and scalar atomic methods unwrap via Intrinsics.to_scalar(). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 81efb47 commit ffd18ff

2 files changed

Lines changed: 8 additions & 17 deletions

File tree

src/compiler/intrinsics/atomics.jl

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ end
3434
atomic_tfunc(ptrs) -> Type
3535
3636
Shared tfunc for atomic operations (add, xchg, cas).
37-
Returns raw T for 0D pointer tiles, Tile{T, S} for N-D.
37+
Always returns Tile{T, S}, even for 0D (S = Tuple{}).
3838
"""
3939
function atomic_tfunc(𝕃, @nospecialize(ptrs), @nospecialize args...)
4040
ptrs_type = CC.widenconst(ptrs)
@@ -43,7 +43,6 @@ function atomic_tfunc(𝕃, @nospecialize(ptrs), @nospecialize args...)
4343
ptr_type <: Ptr || return nothing
4444
T = eltype(ptr_type)
4545
S = ptrs_type.parameters[2]
46-
S === Tuple{} && return T
4746
return Tile{T, S}
4847
end
4948

@@ -105,12 +104,7 @@ function emit_intrinsic!(ctx::CGCtx, ::typeof(Intrinsics.atomic_cas), args)
105104
end
106105
ctx.token = new_token
107106

108-
# Return type depends on shape: raw T for 0D, Tile{T, S} for N-D
109-
if isempty(shape)
110-
CGVal(old_val, result_tile_type, elem_type, Int[])
111-
else
112-
CGVal(old_val, result_tile_type, Tile{elem_type, Tuple{shape...}}, collect(shape))
113-
end
107+
CGVal(old_val, result_tile_type, Tile{elem_type, Tuple{shape...}}, collect(shape))
114108
end
115109

116110
# cuda_tile.atomic_rmw_tko (shared helper for atomic RMW operations)
@@ -171,12 +165,7 @@ function emit_atomic_rmw!(ctx::CGCtx, args::AbstractVector, mode::AtomicRMWMode)
171165
end
172166
ctx.token = new_token
173167

174-
# Return type depends on shape: raw T for 0D, Tile{T, S} for N-D
175-
if isempty(shape)
176-
CGVal(old_val, result_tile_type, elem_type, Int[])
177-
else
178-
CGVal(old_val, result_tile_type, Tile{elem_type, Tuple{shape...}}, collect(shape))
179-
end
168+
CGVal(old_val, result_tile_type, Tile{elem_type, Tuple{shape...}}, collect(shape))
180169
end
181170

182171
# cuda_tile.atomic_rmw_tko with XCHG

src/language/atomics.jl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ for op in (:add, :xchg)
116116
memory_order::Int=MemoryOrder.AcqRel,
117117
memory_scope::Int=MemScope.Device) where {T}
118118
ptr_tile, mask, _ = _atomic_ptr_and_mask(array, index)
119-
Intrinsics.$intrinsic(ptr_tile, Tile(val), mask, memory_order, memory_scope)
119+
Intrinsics.to_scalar(
120+
Intrinsics.$intrinsic(ptr_tile, Tile(val), mask, memory_order, memory_scope))
120121
end
121122

122123
# N-D tile indices, scalar val
@@ -177,8 +178,9 @@ end
177178
memory_order::Int=MemoryOrder.AcqRel,
178179
memory_scope::Int=MemScope.Device) where {T}
179180
ptr_tile, mask, _ = _atomic_ptr_and_mask(array, index)
180-
Intrinsics.atomic_cas(ptr_tile, Tile(expected), Tile(desired), mask,
181-
memory_order, memory_scope)
181+
Intrinsics.to_scalar(
182+
Intrinsics.atomic_cas(ptr_tile, Tile(expected), Tile(desired), mask,
183+
memory_order, memory_scope))
182184
end
183185

184186
# N-D tile indices, scalar expected/desired

0 commit comments

Comments
 (0)