Skip to content

Commit 2341687

Browse files
committed
Update test and verify that tgt_(un)register_lib have the right type
1 parent 8da80d3 commit 2341687

5 files changed

Lines changed: 39 additions & 28 deletions

File tree

compiler/rustc_codegen_llvm/src/base.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,13 @@ pub(crate) fn compile_codegen_unit(
9393
// They are necessary for correct offload execution. We do this here to simplify the
9494
// `offload` intrinsic, avoiding the need for tracking whether it's the first
9595
// intrinsic call or not.
96-
let has_host_offload =
97-
cx.sess().opts.unstable_opts.offload.iter().any(|o| matches!(o, Offload::Host(_)));
96+
let has_host_offload = cx
97+
.sess()
98+
.opts
99+
.unstable_opts
100+
.offload
101+
.iter()
102+
.any(|o| matches!(o, Offload::Host(_) | Offload::Test));
98103
if has_host_offload && !cx.sess().target.is_like_gpu {
99104
cx.offload_globals.replace(Some(OffloadGlobals::declare(&cx)));
100105
}

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,9 @@ impl<'ll> OffloadGlobals<'ll> {
4747
let bin_desc = cx.type_named_struct("struct.__tgt_bin_desc");
4848
cx.set_struct_body(bin_desc, &tgt_bin_desc_ty, false);
4949

50-
let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", mapper_fn_ty);
51-
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", mapper_fn_ty);
50+
let reg_lib_decl = cx.type_func(&[cx.type_ptr()], cx.type_void());
51+
let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", reg_lib_decl);
52+
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", reg_lib_decl);
5253
let init_ty = cx.type_func(&[], cx.type_void());
5354
let init_rtls = declare_offload_fn(cx, "__tgt_init_all_rtls", init_ty);
5455

compiler/rustc_session/src/config.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,8 @@ pub enum Offload {
196196
Device,
197197
/// Second step in the offload pipeline, generates the host code to call kernels.
198198
Host(String),
199+
/// Test is similar to Host, but allows testing without a device artifact.
200+
Test,
199201
}
200202

201203
/// The different settings that the `-Z autodiff` flag can have.

compiler/rustc_session/src/options.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -795,7 +795,8 @@ mod desc {
795795
pub(crate) const parse_list_with_polarity: &str =
796796
"a comma-separated list of strings, with elements beginning with + or -";
797797
pub(crate) const parse_autodiff: &str = "a comma separated list of settings: `Enable`, `PrintSteps`, `PrintTA`, `PrintTAFn`, `PrintAA`, `PrintPerf`, `PrintModBefore`, `PrintModAfter`, `PrintModFinal`, `PrintPasses`, `NoPostopt`, `LooseTypes`, `Inline`, `NoTT`";
798-
pub(crate) const parse_offload: &str = "a comma separated list of settings: `Enable`";
798+
pub(crate) const parse_offload: &str =
799+
"a comma separated list of settings: `Host=<Absolute-Path>`, `Device`, `Test`";
799800
pub(crate) const parse_comma_list: &str = "a comma-separated list of strings";
800801
pub(crate) const parse_opt_comma_list: &str = parse_comma_list;
801802
pub(crate) const parse_number: &str = "a number";
@@ -1472,6 +1473,13 @@ pub mod parse {
14721473
}
14731474
Offload::Device
14741475
}
1476+
"Test" => {
1477+
if let Some(_) = arg {
1478+
// Test does not accept a value
1479+
return false;
1480+
}
1481+
Offload::Test
1482+
}
14751483
_ => {
14761484
// FIXME(ZuseZ4): print an error saying which value is not recognized
14771485
return false;

tests/codegen-llvm/gpu_offload/gpu_host.rs

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,9 @@
1-
//@ compile-flags: -Zoffload=Enable -Zunstable-options -C opt-level=3 -Clto=fat
1+
//@ compile-flags: -Zoffload=Test -Zunstable-options -C opt-level=3 -Clto=fat
22
//@ no-prefer-dynamic
3-
//@ needs-enzyme
43

54
// This test is verifying that we generate __tgt_target_data_*_mapper before and after a call to the
65
// kernel_1. Better documentation to what each global or variable means is available in the gpu
7-
// offlaod code, or the LLVM offload documentation. This code does not launch any GPU kernels yet,
8-
// and will be rewritten once a proper offload frontend has landed.
9-
//
10-
// We currently only handle memory transfer for specific calls to functions named `kernel_{num}`,
11-
// when inside of a function called main. This, too, is a temporary workaround for not having a
12-
// frontend.
6+
// offlaod code, or the LLVM offload documentation.
137

148
#![feature(rustc_attrs)]
159
#![feature(core_intrinsics)]
@@ -22,6 +16,20 @@ fn main() {
2216
core::hint::black_box(&x);
2317
}
2418

19+
#[unsafe(no_mangle)]
20+
#[inline(never)]
21+
pub fn kernel_1(x: &mut [f32; 256]) {
22+
core::intrinsics::offload(_kernel_1, (x,))
23+
}
24+
25+
#[unsafe(no_mangle)]
26+
#[inline(never)]
27+
pub fn _kernel_1(x: &mut [f32; 256]) {
28+
for i in 0..256 {
29+
x[i] = 21.0;
30+
}
31+
}
32+
2533
// CHECK: %struct.ident_t = type { i32, i32, i32, i32, ptr }
2634
// CHECK: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
2735
// CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
@@ -36,8 +44,9 @@ fn main() {
3644
// CHECK: @.offloading.entry_name._kernel_1 = internal unnamed_addr constant [10 x i8] c"_kernel_1\00", section ".llvm.rodata.offloading", align 1
3745
// CHECK: @.offloading.entry._kernel_1 = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel_1.region_id, ptr @.offloading.entry_name._kernel_1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
3846

39-
// CHECK: Function Attrs: nounwind
4047
// CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr)
48+
// CHECK: declare void @__tgt_register_lib(ptr) local_unnamed_addr
49+
// CHECK: declare void @__tgt_unregister_lib(ptr) local_unnamed_addr
4150

4251
// CHECK: define{{( dso_local)?}} void @main()
4352
// CHECK-NEXT: start:
@@ -94,17 +103,3 @@ fn main() {
94103
// CHECK-NEXT: call void @__tgt_unregister_lib(ptr nonnull %EmptyDesc)
95104
// CHECK-NEXT: ret void
96105
// CHECK-NEXT: }
97-
98-
#[unsafe(no_mangle)]
99-
#[inline(never)]
100-
pub fn kernel_1(x: &mut [f32; 256]) {
101-
core::intrinsics::offload(_kernel_1, (x,))
102-
}
103-
104-
#[unsafe(no_mangle)]
105-
#[inline(never)]
106-
pub fn _kernel_1(x: &mut [f32; 256]) {
107-
for i in 0..256 {
108-
x[i] = 21.0;
109-
}
110-
}

0 commit comments

Comments
 (0)