Skip to content

Commit e75450c

Browse files
authored
Revert "PTX: only lower byval arguments the back-end would copy." (#838)
This reverts commit 189c66e.
1 parent 8adcb92 commit e75450c

5 files changed

Lines changed: 11 additions & 41 deletions

File tree

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "GPUCompiler"
22
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
3-
version = "1.19.0"
3+
version = "1.19.1"
44
authors = ["Tim Besard <tim.besard@gmail.com>"]
55

66
[workspace]

src/interface.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ kernel_state_type(@nospecialize(job::CompilerJob)) = Nothing
296296
# Does the target need to pass kernel arguments by value?
297297
pass_by_value(@nospecialize(job::CompilerJob)) = true
298298

299-
# Should the target use byref instead of byval for kernel arguments?
299+
# Should the target use byref instead of byval+lower_byval for kernel arguments?
300300
# When true, aggregate arguments are passed as pointers with the byref attribute,
301301
# allowing the backend to load fields directly from the argument memory (e.g. kernarg
302302
# segment on AMDGPU) instead of materializing the entire struct via first-class aggregates.

src/irgen.jl

Lines changed: 6 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -660,53 +660,24 @@ end
660660

661661
# byval lowering
662662
#
663-
# the NVPTX back-end accesses byval kernel arguments directly in parameter space when all
664-
# uses are simple loads (possibly via GEPs), but otherwise falls back to copying the
665-
# argument to local memory during machine-code generation, too late for the optimizer to
666-
# clean up. eagerly materialize such arguments ourselves instead, leaving the directly-
667-
# accessible ones to the back-end. (we historically lowered all byval arguments, working
668-
# around bad codegen in old back-ends; see #92 and https://reviews.llvm.org/D79744.)
669-
670-
# can the back-end service all uses of this argument straight from parameter space?
671-
function loads_parameter_directly(param::LLVM.Argument)
672-
worklist = LLVM.Value[param]
673-
while !isempty(worklist)
674-
value = popfirst!(worklist)
675-
for use in uses(value)
676-
inst = user(use)
677-
if inst isa LLVM.LoadInst
678-
# serviced by ld.param
679-
elseif inst isa LLVM.GetElementPtrInst ||
680-
inst isa LLVM.BitCastInst ||
681-
inst isa LLVM.AddrSpaceCastInst
682-
push!(worklist, inst)
683-
else
684-
# phis, selects, stores, calls (e.g. memcpy), etc. make the back-end
685-
# fall back to a local copy
686-
return false
687-
end
688-
end
689-
end
690-
return true
691-
end
692-
663+
# some back-ends don't support byval, or support it badly, so lower it eagerly ourselves
664+
# https://reviews.llvm.org/D79744
693665
function lower_byval(@nospecialize(job::CompilerJob), mod::LLVM.Module, f::LLVM.Function)
694666
ft = function_type(f)
695667
@tracepoint "lower byval" begin
696668

697-
# find the byval parameters that need lowering
669+
# find the byval parameters
698670
byval = BitVector(undef, length(parameters(ft)))
699671
types = Vector{LLVMType}(undef, length(parameters(ft)))
700-
for (i, param) in enumerate(parameters(f))
672+
for i in 1:length(byval)
701673
byval[i] = false
702674
for attr in collect(parameter_attributes(f, i))
703675
if kind(attr) == kind(TypeAttribute("byval", LLVM.VoidType()))
704-
byval[i] = !loads_parameter_directly(param)
676+
byval[i] = true
705677
types[i] = value(attr)
706678
end
707679
end
708680
end
709-
any(byval) || return f
710681

711682
# fixup metadata
712683
#
@@ -807,7 +778,6 @@ function lower_byval(@nospecialize(job::CompilerJob), mod::LLVM.Module, f::LLVM.
807778
end
808779

809780

810-
811781
# kernel state arguments
812782
#
813783
# to facilitate passing stateful information to kernels without having to recompile, e.g.,
@@ -1213,7 +1183,7 @@ end
12131183
#
12141184
# the kernel state argument is always passed by value to avoid codegen issues with byval.
12151185
# some back-ends however do not support passing kernel arguments by value, so this pass
1216-
# serves to convert that argument.
1186+
# serves to convert that argument (and is conceptually the inverse of `lower_byval`).
12171187
function kernel_state_to_reference!(@nospecialize(job::CompilerJob), mod::LLVM.Module,
12181188
f::LLVM.Function)
12191189
ft = function_type(f)

src/ptx.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ function finish_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}),
190190
end
191191

192192
if job.config.kernel
193-
# materialize byval arguments the back-end would otherwise copy to local memory
193+
# work around bad byval codegen (JuliaGPU/GPUCompiler.jl#92)
194194
entry = lower_byval(job, mod, entry)
195195

196196
# emit kernel property annotations into the module. these have to be in

test/ptx.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ end
2828
end
2929

3030
@test @filecheck begin
31-
# aggregates are passed `byval`; the back-end loads them from parameter space
3231
@check_label "define ptx_kernel void @_Z6kernel9Aggregate"
33-
@check_same "byval({{({ i64 }|\\[1 x i64\\])}})"
32+
@check_not cond=typed_ptrs "*"
33+
@check_not cond=opaque_ptrs "ptr"
3434
PTX.code_llvm(mod.kernel, Tuple{mod.Aggregate}; kernel=true)
3535
end
3636
end

0 commit comments

Comments
 (0)