@@ -115,74 +115,32 @@ function add_kernarg_address_spaces!(
115115 end
116116 needs_rewrite || return f
117117
118- # generate the new function type with constant address space on byref params
119- new_types = LLVMType[]
120- for (i, param) in enumerate (parameters (ft))
121- if byref_mask[i] && param isa LLVM. PointerType && addrspace (param) == 0
122- if supports_typed_pointers (context ())
123- push! (new_types, LLVM. PointerType (eltype (param), #= constant=# 4 ))
124- else
125- push! (new_types, LLVM. PointerType (#= constant=# 4 ))
126- end
127- else
128- push! (new_types, param)
129- end
130- end
131- new_ft = LLVM. FunctionType (return_type (ft), new_types)
132- new_f = LLVM. Function (mod, " " , new_ft)
133- linkage! (new_f, linkage (f))
134- for (arg, new_arg) in zip (parameters (f), parameters (new_f))
135- LLVM. name! (new_arg, LLVM. name (arg))
136- end
137-
138- # insert addrspacecasts from kernarg (4) back to flat (0) so that the cloned IR
139- # (which expects flat pointers) continues to work. The AMDGPU backend's
140- # AMDGPULowerKernelArguments traces these casts and produces s_load.
141- new_args = LLVM. Value[]
142- @dispose builder= IRBuilder () begin
143- entry_bb = BasicBlock (new_f, " conversion" )
144- position! (builder, entry_bb)
145-
146- for (i, param) in enumerate (parameters (ft))
147- if byref_mask[i] && param isa LLVM. PointerType && addrspace (param) == 0
148- cast = addrspacecast! (builder, parameters (new_f)[i], param)
149- push! (new_args, cast)
150- else
151- push! (new_args, parameters (new_f)[i])
152- end
153- end
154-
155- # clone the original function body
156- value_map = Dict {LLVM.Value, LLVM.Value} (
157- param => new_args[i] for (i, param) in enumerate (parameters (f))
158- )
159- value_map[f] = new_f
160- clone_into! (
161- new_f, f; value_map,
162- changes = LLVM. API. LLVMCloneFunctionChangeTypeGlobalChanges
163- )
164-
165- # fall through from conversion block to cloned entry
166- br! (builder, blocks (new_f)[2 ])
167- end
168-
169- # copy parameter attributes AFTER clone_into!, because CloneFunctionInto
170- # overwrites all attributes via setAttributes. For byref params, the VMap
171- # maps old args to addrspacecast instructions (not Arguments), so LLVM's
172- # attribute remapping silently drops them. We must re-add them here.
173- for i in 1 : length (parameters (ft))
118+ # generate the new function type with constant address space on byref flat-pointer params
119+ param_types = parameters (ft)
120+ flat_byref (i) = byref_mask[i] && param_types[i] isa LLVM. PointerType && addrspace (param_types[i]) == 0
121+ new_types = Union{Nothing,LLVMType}[
122+ flat_byref (i) ? (supports_typed_pointers (context ()) ?
123+ LLVM. PointerType (eltype (param_types[i]), #= constant=# 4 ) :
124+ LLVM. PointerType (#= constant=# 4 )) :
125+ nothing
126+ for i in 1 : length (param_types)]
127+
128+ # insert addrspacecasts from kernarg (4) back to flat (0) so that the cloned IR (which expects
129+ # flat pointers) continues to work; the AMDGPU backend's AMDGPULowerKernelArguments traces these
130+ # casts and produces s_load.
131+ new_f = clone_with_converted_args! (mod, f, new_types,
132+ (builder, param, i) -> addrspacecast! (builder, param, param_types[i]))
133+
134+ # copy parameter attributes AFTER clone_into!, because CloneFunctionInto overwrites all
135+ # attributes via setAttributes. For byref params, the VMap maps old args to addrspacecast
136+ # instructions (not Arguments), so LLVM's attribute remapping silently drops them.
137+ for i in 1 : length (param_types)
174138 for attr in collect (parameter_attributes (f, i))
175139 push! (parameter_attributes (new_f, i), attr)
176140 end
177141 end
178142
179- # replace the old function
180- fn = LLVM. name (f)
181- prune_constexpr_uses! (f)
182- @assert isempty (uses (f))
183- replace_metadata_uses! (f, new_f)
184- erase! (f)
185- LLVM. name! (new_f, fn)
143+ replace_function! (f, new_f)
186144
187145 # clean up the extra conversion block
188146 @dispose pb= NewPMPassBuilder () begin
@@ -192,7 +150,7 @@ function add_kernarg_address_spaces!(
192150 run! (pb, mod)
193151 end
194152
195- return functions (mod)[fn]
153+ return new_f
196154end
197155
198156
0 commit comments