Skip to content

Commit 5e4c84f

Browse files
authored
Metal: Drive the downgrader here, lower intrinsics during mcgen. (#829)
1 parent 5d88d3c commit 5e4c84f

6 files changed

Lines changed: 84 additions & 7 deletions

File tree

Project.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "GPUCompiler"
22
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
3-
version = "1.16.1"
3+
version = "1.17.0"
44
authors = ["Tim Besard <tim.besard@gmail.com>"]
55

66
[workspace]
@@ -20,10 +20,14 @@ TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
2020
Tracy = "e689c965-62c8-4b79-b2c5-8359227902fd"
2121
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
2222

23+
[weakdeps]
24+
LLVMDowngrader_jll = "f52de702-fb25-5922-94ba-81dd59b07444"
25+
2326
[compat]
2427
ExprTools = "0.1"
2528
InteractiveUtils = "1"
2629
LLVM = "9.8.1"
30+
LLVMDowngrader_jll = "0.7"
2731
Libdl = "1"
2832
Logging = "1"
2933
PrecompileTools = "1"

src/metal.jl

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# implementation of the GPUCompiler interfaces for generating Metal code
22

3+
const LLVMDowngrader_jll =
4+
LazyModule("LLVMDowngrader_jll",
5+
UUID("f52de702-fb25-5922-94ba-81dd59b07444"))
6+
37

48
## target info
59

@@ -64,7 +68,8 @@ function Base.hash(target::MetalCompilerTarget, h::UInt)
6468
h = hash(target.fastmath, h)
6569
end
6670

67-
source_code(target::MetalCompilerTarget) = "text"
71+
# the canonical text representation is AIR assembly, i.e., LLVM 5 era textual IR
72+
source_code(target::MetalCompilerTarget) = "llvm"
6873

6974
# Metal is not supported by our LLVM builds, so we can't get a target machine
7075
llvm_machine(::MetalCompilerTarget) = nothing
@@ -295,6 +300,17 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
295300
add_module_metadata!(job, mod)
296301
end
297302

303+
return functions(mod)[entry_fn]
304+
end
305+
306+
# lowering of LLVM IR to AIR-compatible IR
307+
#
308+
# Metal does not have an LLVM back-end, so the lowering of target-independent LLVM IR into
309+
# target-specific constructs -- something that normally happens during instruction
310+
# selection -- is implemented here as IR-to-IR rewrites, run at the start of `mcgen`.
311+
# this keeps the `:llvm` output (e.g. `code_llvm`) close to what Julia generated, using
312+
# generic LLVM intrinsics, while the `:asm`/`:obj` outputs contain AIR intrinsics.
313+
function lower_air!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::LLVM.Module)
298314
# strip device-side `trap`s and rewrite `unreachable` into clean returns (#433, #370). this
299315
# runs post-`optimize!`, after the trap has finished serving as the optimizer guard; the pass
300316
# force-inlines throwing functions into the kernel first so the rewrite is sound, then scrubs
@@ -316,13 +332,15 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
316332
changed |= lower_llvm_intrinsics!(job, f)
317333
end
318334
if changed
319-
# lowering may have introduced additional functions marked `alwaysinline`
335+
# lowering may have introduced additional functions marked `alwaysinline`,
336+
# and left dead declarations of the replaced LLVM intrinsics behind
320337
@dispose pb=NewPMPassBuilder() begin
321338
add!(pb, AlwaysInlinerPass())
322339
add!(pb, NewPMFunctionPassManager()) do fpm
323340
add!(fpm, SimplifyCFGPass())
324341
add!(fpm, instcombine_pass(job))
325342
end
343+
add!(pb, StripDeadPrototypesPass())
326344
run!(pb, mod)
327345
end
328346
end
@@ -336,13 +354,28 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
336354
end
337355
end
338356

339-
return functions(mod)[entry_fn]
357+
return
340358
end
341359

342360
@unlocked function mcgen(job::CompilerJob{MetalCompilerTarget}, mod::LLVM.Module,
343361
format=LLVM.API.LLVMObjectFile)
344-
# our LLVM version does not support emitting Metal libraries
345-
return nothing
362+
# lower LLVM constructs that the AIR back-end does not support; this takes the place
363+
# of instruction selection, as our LLVM does not have a Metal target machine.
364+
lower_air!(job, mod)
365+
366+
if !isavailable(LLVMDowngrader_jll)
367+
error("Metal machine-code generation requires the LLVMDowngrader_jll package, which should be installed and loaded first.")
368+
end
369+
370+
# assemble to AIR, i.e., LLVM 5 era bitcode, as consumed by the metallib loader
371+
air = run_tool(`$(LLVMDowngrader_jll.llvm_as()) --bitcode-version=5.0 -o -`, string(mod))
372+
373+
if format == LLVM.API.LLVMAssemblyFile
374+
# disassemble the bitcode again to AIR assembly, i.e., LLVM 5 era textual IR
375+
String(run_tool(`$(LLVMDowngrader_jll.llvm_dis_5()) -o -`, air))
376+
else
377+
air
378+
end
346379
end
347380

348381

src/utils.jl

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,37 @@ function Base.getproperty(lazy_mod::LazyModule, sym::Symbol)
3737
end
3838

3939

40+
## external tools
41+
42+
# run an external tool (e.g. from a JLL package), feeding `input` to its standard input
43+
# and returning its standard output. throws on failure, including the tool's standard
44+
# error output in the exception. for tools that instead communicate through files, e.g.,
45+
# because the inputs should be preserved for error reporting, use `run` directly.
46+
function run_tool(cmd::Cmd, input)
47+
stdin_pipe = Pipe()
48+
stdout_pipe = Pipe()
49+
stderr_pipe = Pipe()
50+
51+
proc = run(pipeline(cmd; stdin=stdin_pipe, stdout=stdout_pipe, stderr=stderr_pipe);
52+
wait=false)
53+
close(stdout_pipe.in)
54+
close(stderr_pipe.in)
55+
56+
writer = @async begin
57+
write(stdin_pipe, input)
58+
close(stdin_pipe)
59+
end
60+
reader = @async read(stdout_pipe)
61+
logger = @async read(stderr_pipe, String)
62+
63+
wait(proc)
64+
if !success(proc)
65+
error("Failed to run $(basename(cmd.exec[1])):\n" * fetch(logger))
66+
end
67+
fetch(reader)
68+
end
69+
70+
4071
## safe logging
4172

4273
using Logging

test/Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ FileCheck = "4e644321-382b-4b05-b0b6-5d23c3d944fb"
44
GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
55
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
66
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
7+
LLVMDowngrader_jll = "f52de702-fb25-5922-94ba-81dd59b07444"
78
LLVM_jll = "86de99a1-58d6-5da7-8064-bd56ce2e322c"
89
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
910
ParallelTestRunner = "d3525ed8-44d0-4b2c-a655-542cee43accc"

test/metal.jl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,18 @@ end
102102
(NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}}), x, y)
103103
end
104104

105+
# intrinsics survive the `:llvm` stage as-is; they are only lowered to AIR during mcgen
105106
@test @filecheck begin
106107
@check_label "define <2 x i64> @{{(julia|j)_foo_[0-9]+}}"
107-
@check "air.max.s.v2i64"
108+
@check "llvm.smax.v2i64"
108109
Metal.code_llvm(mod.foo, (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}}))
109110
end
111+
112+
@test @filecheck begin
113+
@check_label "define <2 x i64> @{{(julia|j)_foo_[0-9]+}}"
114+
@check "air.max.s.v2i64"
115+
Metal.code_native(mod.foo, (NTuple{2, VecElement{Int64}}, NTuple{2, VecElement{Int64}}))
116+
end
110117
end
111118

112119
@testset "unsupported type detection" begin

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import GPUCompiler, LLVM
44
const init_code = quote
55
using GPUCompiler, LLVM
66
using SPIRV_LLVM_Backend_jll, SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll
7+
using LLVMDowngrader_jll
78

89
# include all helpers
910
include(joinpath(@__DIR__, "helpers", "runtime.jl"))

0 commit comments

Comments
 (0)