|
1 | 1 | # implementation of the GPUCompiler interfaces for generating PTX code |
2 | 2 |
|
| 3 | +const NVPTX_LLVM_Backend_jll = |
| 4 | + LazyModule("NVPTX_LLVM_Backend_jll", |
| 5 | + UUID("ef6e0fe3-e6ef-59c0-bde6-4989574699e0")) |
| 6 | + |
| 7 | + |
3 | 8 | ## target |
4 | 9 |
|
5 | 10 | export PTXCompilerTarget |
@@ -341,6 +346,44 @@ function finish_ir!(@nospecialize(job::CompilerJob{PTXCompilerTarget}), |
341 | 346 | return entry |
342 | 347 | end |
343 | 348 |
|
| 349 | +@unlocked function mcgen(@nospecialize(job::CompilerJob{PTXCompilerTarget}), |
| 350 | + mod::LLVM.Module, format=LLVM.API.LLVMAssemblyFile) |
| 351 | + if !isavailable(NVPTX_LLVM_Backend_jll) || !NVPTX_LLVM_Backend_jll.is_available() |
| 352 | + error("NVPTX LLVM back-end not loaded; cannot compile to PTX.") |
| 353 | + end |
| 354 | + |
| 355 | + target = job.config.target |
| 356 | + filetype = if format == LLVM.API.LLVMAssemblyFile |
| 357 | + "asm" |
| 358 | + elseif format == LLVM.API.LLVMObjectFile |
| 359 | + "obj" |
| 360 | + else |
| 361 | + error("Unsupported PTX output format $format") |
| 362 | + end |
| 363 | + |
| 364 | + input = tempname(cleanup=false) * ".bc" |
| 365 | + output = tempname(cleanup=false) * (filetype == "asm" ? ".ptx" : ".cubin") |
| 366 | + write(input, mod) |
| 367 | + |
| 368 | + cmd = `$(NVPTX_LLVM_Backend_jll.llc()) $input |
| 369 | + -mtriple=$(llvm_triple(target)) |
| 370 | + -mcpu=$(cpu_name(target)) |
| 371 | + -mattr=+ptx$(target.ptx.major)$(target.ptx.minor) |
| 372 | + -filetype=$filetype |
| 373 | + -o $output` |
| 374 | + try |
| 375 | + run(cmd) |
| 376 | + catch |
| 377 | + error("""Failed to compile to PTX with external llc. |
| 378 | + If you think this is a bug, please file an issue and attach $(input).""") |
| 379 | + end |
| 380 | + |
| 381 | + code = filetype == "asm" ? read(output, String) : String(read(output)) |
| 382 | + rm(input) |
| 383 | + rm(output) |
| 384 | + return code |
| 385 | +end |
| 386 | + |
344 | 387 | function llvm_debug_info(@nospecialize(job::CompilerJob{PTXCompilerTarget})) |
345 | 388 | # allow overriding the debug info from CUDA.jl |
346 | 389 | if job.config.target.debuginfo |
|
0 commit comments