@@ -6,73 +6,50 @@ We currently work on launching the following Rust kernel on the GPU.
66To follow along, copy it to a ` src/lib.rs ` file.
77
88``` rust
9- #![feature(abi_gpu_kernel)]
10- #![feature(rustc_attrs)]
11- #![feature(core_intrinsics)]
9+ #![allow(internal_features)]
10+ #![feature(gpu_offload)]
11+ #![cfg_attr(target_os = " linux" , feature(core_intrinsics))]
12+ #![cfg_attr(target_arch = " amdgpu" , feature(stdarch_amdgpu, abi_gpu_kernel))]
13+ #![cfg_attr(target_arch = " nvptx64" , feature(stdarch_nvptx, abi_gpu_kernel))]
1214#![no_std]
1315
1416#[cfg(target_os = " linux" )]
1517extern crate libc;
16- #[cfg(target_os = " linux" )]
17- use libc :: c_char;
1818
19- #[cfg(target_os = " linux" )]
20- use core :: mem;
19+ use core :: offload :: offload_kernel;
2120
2221#[panic_handler]
2322fn panic (_ : & core :: panic :: PanicInfo ) -> ! {
2423 loop {}
2524}
2625
27- #[cfg(target_os = " linux" )]
28- #[unsafe (no_mangle)]
29- #[inline(never)]
30- fn main () {
31- let array_c : * mut [f64 ; 256 ] =
32- unsafe { libc :: calloc (256 , (mem :: size_of :: <f64 >()) as libc :: size_t ) as * mut [f64 ; 256 ] };
33- let output = c " The first element is zero %f\ n" ;
34- let output2 = c " The first element is NOT zero %f\ n" ;
35- let output3 = c " The second element is %f\ n" ;
36- unsafe {
37- let val : * const c_char = if (* array_c )[0 ] < 0.1 {
38- output . as_ptr ()
39- } else {
40- output2 . as_ptr ()
41- };
42- libc :: printf (val , (* array_c )[0 ]);
43- }
26+ #[cfg(target_arch = " amdgpu" )]
27+ use core :: arch :: amdgpu :: {workgroup_id_x as block_idx_x, workitem_id_x as thread_idx_x};
28+ #[cfg(target_arch = " nvptx64" )]
29+ use core :: arch :: nvptx :: {
30+ _block_dim_x as block_dim_x, _block_idx_x as block_idx_x, _thread_idx_x as thread_idx_x,
31+ };
4432
33+ #[offload_kernel]
34+ fn kernel (x : * mut [f64 ; 256 ]) {
4535 unsafe {
46- kernel (array_c );
47- }
48- core :: hint :: black_box (& array_c );
49- unsafe {
50- let val : * const c_char = if (* array_c )[0 ] < 0.1 {
51- output . as_ptr ()
52- } else {
53- output2 . as_ptr ()
54- };
55- libc :: printf (val , (* array_c )[0 ]);
56- libc :: printf (output3 . as_ptr (), (* array_c )[1 ]);
36+ let n = (* x ). len ();
37+ let i = (thread_idx_x () + block_idx_x () * block_dim_x ()) as usize ;
38+ if i < n {
39+ (* x )[i ] = i as f64 ;
40+ }
5741 }
5842}
5943
60- #[inline(never)]
61- unsafe fn kernel (x : * mut [f64 ; 256 ]) {
62- core :: intrinsics :: offload (kernel_1 , [256 , 1 , 1 ], [32 , 1 , 1 ], (x ,))
63- }
64-
6544#[cfg(target_os = " linux" )]
66- unsafe extern " C" {
67- pub fn kernel_1 (array_b : * mut [f64 ; 256 ]);
68- }
69-
70- #[cfg(not(target_os = " linux" ))]
7145#[unsafe (no_mangle)]
72- #[inline(never)]
73- #[rustc_offload_kernel]
74- pub extern " gpu-kernel" fn kernel_1 (x : * mut [f64 ; 256 ]) {
75- unsafe { (* x )[0 ] = 21.0 };
46+ fn main () {
47+ let mut x = [0.0f64 ; 256 ];
48+ core :: intrinsics :: offload :: <_ , _ , ()>(kernel , [256 , 1 , 1 ], [1 , 1 , 1 ], (& mut x as * mut [f64 ; 256 ],));
49+ for i in 0 .. x . len () {
50+ assert_eq! (x [i ], i as f64 );
51+ }
52+ unsafe { libc :: printf (c " all checks passed" . as_ptr ()); }
7653}
7754```
7855
@@ -84,7 +61,7 @@ So either substitute clang/lld invocations below with absolute path, or set your
8461First we generate the device (GPU) code.
8562
8663<div class =" warning " >
87-
64+
8865Replace the ` target-cpu ` (gfx90a) with the right code for your GPU. These are often referred to as "LLVM target names"[ ^ list ] .
8966
9067</div >
@@ -102,7 +79,7 @@ This call also does a lot of work and generates multiple intermediate files for
10279While we integrated most offload steps into rustc by now, one binary invocation still remains for now:
10380
10481```
105- "clang-linker-wrapper" "--should-extract=gfx90a" "--device-compiler=amdgcn-amd-amdhsa=-g" "--device-compiler=amdgcn-amd-amdhsa=-save-temps=cwd" "--device-linker=amdgcn-amd-amdhsa=-lompdevice" "--host-triple=x86_64-unknown-linux-gnu" "--save-temps" "--linker-path=/ABSOlUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/lld/bin/ld.lld" "--hash-style=gnu" "--eh-frame-hdr" "-m" "elf_x86_64" "-pie" "-dynamic-linker" "/lib64/ld-linux-x86-64.so.2" "-o" "bare" "/lib/../lib64/Scrt1.o" "/lib/../lib64/crti.o" "/ABSOLUTE_PATH_TO/crtbeginS.o" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/bin/../lib/x86_64-unknown-linux-gnu" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib/clang/21/lib/x86_64-unknown-linux-gnu" "-L/lib/../lib64" "-L/usr/lib64" "-L/lib" "-L/usr/lib" "target/<GPU_DIR>/release/host.o" "-lstdc++" "-lm" "-lomp" "-lomptarget" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib" "-lgcc_s" "-lgcc" "-lpthread" "-lc" "-lgcc_s" "-lgcc" "/ABSOLUTE_PATH_TO/crtendS.o" "/lib/../lib64/crtn.o"
82+ "clang-linker-wrapper" "--should-extract=gfx90a" "--device-compiler=amdgcn-amd-amdhsa=-g" "--device-compiler=amdgcn-amd-amdhsa=-save-temps=cwd" "--device-linker=amdgcn-amd-amdhsa=-lompdevice" "--host-triple=x86_64-unknown-linux-gnu" "--save-temps" "--linker-path=/ABSOlUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/lld/bin/ld.lld" "--hash-style=gnu" "--eh-frame-hdr" "-m" "elf_x86_64" "-pie" "-dynamic-linker" "/lib64/ld-linux-x86-64.so.2" "-o" "main" "/lib/../lib64/Scrt1.o" "/lib/../lib64/crti.o" "/ABSOLUTE_PATH_TO/crtbeginS.o" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/bin/../lib/x86_64-unknown-linux-gnu" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib/clang/21/lib/x86_64-unknown-linux-gnu" "-L/lib/../lib64" "-L/usr/lib64" "-L/lib" "-L/usr/lib" "target/<GPU_DIR>/release/host.o" "-lstdc++" "-lm" "-lomp" "-lomptarget" "-L/ABSOLUTE_PATH_TO/rust/build/x86_64-unknown-linux-gnu/llvm/lib" "-lgcc_s" "-lgcc" "-lpthread" "-lc" "-lgcc_s" "-lgcc" "/ABSOLUTE_PATH_TO/crtendS.o" "/lib/../lib64/crtn.o"
10683```
10784
10885You can try to find the paths to those files on your system.
@@ -118,9 +95,7 @@ In the final step, you can now run your binary
11895
11996```
12097./main
121- The first element is zero 0.000000
122- The first element is NOT zero 21.000000
123- The second element is 0.000000
98+ all checks passed!
12499```
125100
126101To receive more information about the memory transfer, you can enable info printing with
0 commit comments