1- //@ compile-flags: -Zoffload=Enable -Zunstable-options -C opt-level=3 -Clto=fat
1+ //@ compile-flags: -Zoffload=Test -Zunstable-options -C opt-level=3 -Clto=fat
22//@ no-prefer-dynamic
3- //@ needs-enzyme
43
54// This test is verifying that we generate __tgt_target_data_*_mapper before and after a call to the
65// kernel_1. Better documentation to what each global or variable means is available in the gpu
7- // offlaod code, or the LLVM offload documentation. This code does not launch any GPU kernels yet,
8- // and will be rewritten once a proper offload frontend has landed.
9- //
10- // We currently only handle memory transfer for specific calls to functions named `kernel_{num}`,
11- // when inside of a function called main. This, too, is a temporary workaround for not having a
12- // frontend.
6+ // offlaod code, or the LLVM offload documentation.
137
148#![ feature( rustc_attrs) ]
159#![ feature( core_intrinsics) ]
@@ -22,6 +16,20 @@ fn main() {
2216 core:: hint:: black_box ( & x) ;
2317}
2418
19+ #[ unsafe( no_mangle) ]
20+ #[ inline( never) ]
21+ pub fn kernel_1 ( x : & mut [ f32 ; 256 ] ) {
22+ core:: intrinsics:: offload ( _kernel_1, ( x, ) )
23+ }
24+
25+ #[ unsafe( no_mangle) ]
26+ #[ inline( never) ]
27+ pub fn _kernel_1 ( x : & mut [ f32 ; 256 ] ) {
28+ for i in 0 ..256 {
29+ x[ i] = 21.0 ;
30+ }
31+ }
32+
2533// CHECK: %struct.ident_t = type { i32, i32, i32, i32, ptr }
2634// CHECK: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
2735// CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
@@ -36,8 +44,9 @@ fn main() {
3644// CHECK: @.offloading.entry_name._kernel_1 = internal unnamed_addr constant [10 x i8] c"_kernel_1\00", section ".llvm.rodata.offloading", align 1
3745// CHECK: @.offloading.entry._kernel_1 = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel_1.region_id, ptr @.offloading.entry_name._kernel_1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
3846
39- // CHECK: Function Attrs: nounwind
4047// CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr)
48+ // CHECK: declare void @__tgt_register_lib(ptr) local_unnamed_addr
49+ // CHECK: declare void @__tgt_unregister_lib(ptr) local_unnamed_addr
4150
4251// CHECK: define{{( dso_local)?}} void @main()
4352// CHECK-NEXT: start:
@@ -94,17 +103,3 @@ fn main() {
94103// CHECK-NEXT: call void @__tgt_unregister_lib(ptr nonnull %EmptyDesc)
95104// CHECK-NEXT: ret void
96105// CHECK-NEXT: }
97-
98- #[ unsafe( no_mangle) ]
99- #[ inline( never) ]
100- pub fn kernel_1 ( x : & mut [ f32 ; 256 ] ) {
101- core:: intrinsics:: offload ( _kernel_1, ( x, ) )
102- }
103-
104- #[ unsafe( no_mangle) ]
105- #[ inline( never) ]
106- pub fn _kernel_1 ( x : & mut [ f32 ; 256 ] ) {
107- for i in 0 ..256 {
108- x[ i] = 21.0 ;
109- }
110- }
0 commit comments