11# implementation of the GPUCompiler interfaces for generating Metal code
22
3+ const LLVMDowngrader_jll =
4+ LazyModule (" LLVMDowngrader_jll" ,
5+ UUID (" f52de702-fb25-5922-94ba-81dd59b07444" ))
6+
37
48# # target info
59
@@ -64,7 +68,8 @@ function Base.hash(target::MetalCompilerTarget, h::UInt)
6468 h = hash (target. fastmath, h)
6569end
6670
67- source_code (target:: MetalCompilerTarget ) = " text"
71+ # the canonical text representation is AIR assembly, i.e., LLVM 5 era textual IR
72+ source_code (target:: MetalCompilerTarget ) = " llvm"
6873
6974# Metal is not supported by our LLVM builds, so we can't get a target machine
7075llvm_machine (:: MetalCompilerTarget ) = nothing
@@ -295,6 +300,17 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
295300 add_module_metadata! (job, mod)
296301 end
297302
303+ return functions (mod)[entry_fn]
304+ end
305+
306+ # lowering of LLVM IR to AIR-compatible IR
307+ #
308+ # Metal does not have an LLVM back-end, so the lowering of target-independent LLVM IR into
309+ # target-specific constructs -- something that normally happens during instruction
310+ # selection -- is implemented here as IR-to-IR rewrites, run at the start of `mcgen`.
311+ # this keeps the `:llvm` output (e.g. `code_llvm`) close to what Julia generated, using
312+ # generic LLVM intrinsics, while the `:asm`/`:obj` outputs contain AIR intrinsics.
313+ function lower_air! (@nospecialize (job:: CompilerJob{MetalCompilerTarget} ), mod:: LLVM.Module )
298314 # strip device-side `trap`s and rewrite `unreachable` into clean returns (#433, #370). this
299315 # runs post-`optimize!`, after the trap has finished serving as the optimizer guard; the pass
300316 # force-inlines throwing functions into the kernel first so the rewrite is sound, then scrubs
@@ -316,13 +332,15 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
316332 changed |= lower_llvm_intrinsics! (job, f)
317333 end
318334 if changed
319- # lowering may have introduced additional functions marked `alwaysinline`
335+ # lowering may have introduced additional functions marked `alwaysinline`,
336+ # and left dead declarations of the replaced LLVM intrinsics behind
320337 @dispose pb= NewPMPassBuilder () begin
321338 add! (pb, AlwaysInlinerPass ())
322339 add! (pb, NewPMFunctionPassManager ()) do fpm
323340 add! (fpm, SimplifyCFGPass ())
324341 add! (fpm, instcombine_pass (job))
325342 end
343+ add! (pb, StripDeadPrototypesPass ())
326344 run! (pb, mod)
327345 end
328346 end
@@ -336,13 +354,28 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
336354 end
337355 end
338356
339- return functions (mod)[entry_fn]
357+ return
340358end
341359
342360@unlocked function mcgen (job:: CompilerJob{MetalCompilerTarget} , mod:: LLVM.Module ,
343361 format= LLVM. API. LLVMObjectFile)
344- # our LLVM version does not support emitting Metal libraries
345- return nothing
362+ # lower LLVM constructs that the AIR back-end does not support; this takes the place
363+ # of instruction selection, as our LLVM does not have a Metal target machine.
364+ lower_air! (job, mod)
365+
366+ if ! isavailable (LLVMDowngrader_jll)
367+ error (" Metal machine-code generation requires the LLVMDowngrader_jll package, which should be installed and loaded first." )
368+ end
369+
370+ # assemble to AIR, i.e., LLVM 5 era bitcode, as consumed by the metallib loader
371+ air = run_tool (` $(LLVMDowngrader_jll. llvm_as ()) --bitcode-version=5.0 -o -` , string (mod))
372+
373+ if format == LLVM. API. LLVMAssemblyFile
374+ # disassemble the bitcode again to AIR assembly, i.e., LLVM 5 era textual IR
375+ String (run_tool (` $(LLVMDowngrader_jll. llvm_dis_5 ()) -o -` , air))
376+ else
377+ air
378+ end
346379end
347380
348381
0 commit comments