Skip to content

Commit a234ae6

Browse files
committed
Auto merge of #151778 - Zalathar:rollup-A0breU3, r=Zalathar
Rollup of 9 pull requests Successful merges: - #151239 (Support trait objects in type info reflection) - #150893 (offload: move (un)register lib into global_ctors) - #151013 (Add some clarifications and fixes for fmt syntax) - #151666 (compiler: Rename several types/traits for per-query vtables) - #151738 (Add `extern crate core` to diagnostic tests) - #151747 (Update `askama` version to `0.15.2`) - #151759 (Update `browser-ui-test` version to `0.23.3`) - #151763 (Add FileCheck annotations to simplify_match.rs) - #151766 (Fix `x fix`, again)
2 parents 466ea4e + 4d2c8e3 commit a234ae6

59 files changed

Lines changed: 991 additions & 544 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
184184

185185
[[package]]
186186
name = "askama"
187-
version = "0.15.1"
187+
version = "0.15.2"
188188
source = "registry+https://github.com/rust-lang/crates.io-index"
189-
checksum = "bb7125972258312e79827b60c9eb93938334100245081cf701a2dee981b17427"
189+
checksum = "03341eae1125472b0672fbf35cc9aa7b74cd8e0c3d02f02c28a04678f12aaa7a"
190190
dependencies = [
191191
"askama_macros",
192192
"itoa",
@@ -197,9 +197,9 @@ dependencies = [
197197

198198
[[package]]
199199
name = "askama_derive"
200-
version = "0.15.1"
200+
version = "0.15.2"
201201
source = "registry+https://github.com/rust-lang/crates.io-index"
202-
checksum = "8ba5e7259a1580c61571e3116ebaaa01e3c001b2132b17c4cc5c70780ca3e994"
202+
checksum = "461bd78f3da90b5e44eee4272cfb1c4832aa3dcdb6c370aedd3eb253d2b9e3ca"
203203
dependencies = [
204204
"askama_parser",
205205
"basic-toml",
@@ -214,18 +214,18 @@ dependencies = [
214214

215215
[[package]]
216216
name = "askama_macros"
217-
version = "0.15.1"
217+
version = "0.15.2"
218218
source = "registry+https://github.com/rust-lang/crates.io-index"
219-
checksum = "236ce20b77cb13506eaf5024899f4af6e12e8825f390bd943c4c37fd8f322e46"
219+
checksum = "ba49fb22ee3074574b8510abd9495d4f0bb9b8f87e8e45ee31e2cee508f7a8e5"
220220
dependencies = [
221221
"askama_derive",
222222
]
223223

224224
[[package]]
225225
name = "askama_parser"
226-
version = "0.15.1"
226+
version = "0.15.2"
227227
source = "registry+https://github.com/rust-lang/crates.io-index"
228-
checksum = "f3c63392767bb2df6aa65a6e1e3b80fd89bb7af6d58359b924c0695620f1512e"
228+
checksum = "7e33eb7484958aaa1f27e9adb556f5d557331cd891bdbb33781bc1f9550b6f6e"
229229
dependencies = [
230230
"rustc-hash 2.1.1",
231231
"serde",

compiler/rustc_codegen_llvm/src/builder.rs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -188,19 +188,6 @@ impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
188188
load
189189
}
190190
}
191-
192-
fn memset(&mut self, ptr: &'ll Value, fill_byte: &'ll Value, size: &'ll Value, align: Align) {
193-
unsafe {
194-
llvm::LLVMRustBuildMemSet(
195-
self.llbuilder,
196-
ptr,
197-
align.bytes() as c_uint,
198-
fill_byte,
199-
size,
200-
false,
201-
);
202-
}
203-
}
204191
}
205192

206193
/// Empty string, to be used where LLVM expects an instruction name, indicating

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 69 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ pub(crate) struct OffloadGlobals<'ll> {
1919
pub launcher_fn: &'ll llvm::Value,
2020
pub launcher_ty: &'ll llvm::Type,
2121

22-
pub bin_desc: &'ll llvm::Type,
23-
2422
pub kernel_args_ty: &'ll llvm::Type,
2523

2624
pub offload_entry_ty: &'ll llvm::Type,
@@ -31,8 +29,8 @@ pub(crate) struct OffloadGlobals<'ll> {
3129

3230
pub ident_t_global: &'ll llvm::Value,
3331

34-
pub register_lib: &'ll llvm::Value,
35-
pub unregister_lib: &'ll llvm::Value,
32+
// FIXME(offload): Drop this, once we fully automated our offload compilation pipeline, since
33+
// LLVM will initialize them for us if it sees gpu kernels being registered.
3634
pub init_rtls: &'ll llvm::Value,
3735
}
3836

@@ -44,15 +42,6 @@ impl<'ll> OffloadGlobals<'ll> {
4442
let (begin_mapper, _, end_mapper, mapper_fn_ty) = gen_tgt_data_mappers(cx);
4543
let ident_t_global = generate_at_one(cx);
4644

47-
let tptr = cx.type_ptr();
48-
let ti32 = cx.type_i32();
49-
let tgt_bin_desc_ty = vec![ti32, tptr, tptr, tptr];
50-
let bin_desc = cx.type_named_struct("struct.__tgt_bin_desc");
51-
cx.set_struct_body(bin_desc, &tgt_bin_desc_ty, false);
52-
53-
let reg_lib_decl = cx.type_func(&[cx.type_ptr()], cx.type_void());
54-
let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", reg_lib_decl);
55-
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", reg_lib_decl);
5645
let init_ty = cx.type_func(&[], cx.type_void());
5746
let init_rtls = declare_offload_fn(cx, "__tgt_init_all_rtls", init_ty);
5847

@@ -63,20 +52,84 @@ impl<'ll> OffloadGlobals<'ll> {
6352
OffloadGlobals {
6453
launcher_fn,
6554
launcher_ty,
66-
bin_desc,
6755
kernel_args_ty,
6856
offload_entry_ty,
6957
begin_mapper,
7058
end_mapper,
7159
mapper_fn_ty,
7260
ident_t_global,
73-
register_lib,
74-
unregister_lib,
7561
init_rtls,
7662
}
7763
}
7864
}
7965

66+
// We need to register offload before using it. We also should unregister it once we are done, for
67+
// good measures. Previously we have done so before and after each individual offload intrinsic
68+
// call, but that comes at a performance cost. The repeated (un)register calls might also confuse
69+
// the LLVM ompOpt pass, which tries to move operations to a better location. The easiest solution,
70+
// which we copy from clang, is to just have those two calls once, in the global ctor/dtor section
71+
// of the final binary.
72+
pub(crate) fn register_offload<'ll>(cx: &CodegenCx<'ll, '_>) {
73+
// First we check quickly whether we already have done our setup, in which case we return early.
74+
// Shouldn't be needed for correctness.
75+
let register_lib_name = "__tgt_register_lib";
76+
if cx.get_function(register_lib_name).is_some() {
77+
return;
78+
}
79+
80+
let reg_lib_decl = cx.type_func(&[cx.type_ptr()], cx.type_void());
81+
let register_lib = declare_offload_fn(&cx, register_lib_name, reg_lib_decl);
82+
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", reg_lib_decl);
83+
84+
let ptr_null = cx.const_null(cx.type_ptr());
85+
let const_struct = cx.const_struct(&[cx.get_const_i32(0), ptr_null, ptr_null, ptr_null], false);
86+
let omp_descriptor =
87+
add_global(cx, ".omp_offloading.descriptor", const_struct, InternalLinkage);
88+
// @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_llvm_offload_entries, ptr @__stop_llvm_offload_entries }
89+
// @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 0, ptr null, ptr null, ptr null }
90+
91+
let atexit = cx.type_func(&[cx.type_ptr()], cx.type_i32());
92+
let atexit_fn = declare_offload_fn(cx, "atexit", atexit);
93+
94+
let desc_ty = cx.type_func(&[], cx.type_void());
95+
let reg_name = ".omp_offloading.descriptor_reg";
96+
let unreg_name = ".omp_offloading.descriptor_unreg";
97+
let desc_reg_fn = declare_offload_fn(cx, reg_name, desc_ty);
98+
let desc_unreg_fn = declare_offload_fn(cx, unreg_name, desc_ty);
99+
llvm::set_linkage(desc_reg_fn, InternalLinkage);
100+
llvm::set_linkage(desc_unreg_fn, InternalLinkage);
101+
llvm::set_section(desc_reg_fn, c".text.startup");
102+
llvm::set_section(desc_unreg_fn, c".text.startup");
103+
104+
// define internal void @.omp_offloading.descriptor_reg() section ".text.startup" {
105+
// entry:
106+
// call void @__tgt_register_lib(ptr @.omp_offloading.descriptor)
107+
// %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
108+
// ret void
109+
// }
110+
let bb = Builder::append_block(cx, desc_reg_fn, "entry");
111+
let mut a = Builder::build(cx, bb);
112+
a.call(reg_lib_decl, None, None, register_lib, &[omp_descriptor], None, None);
113+
a.call(atexit, None, None, atexit_fn, &[desc_unreg_fn], None, None);
114+
a.ret_void();
115+
116+
// define internal void @.omp_offloading.descriptor_unreg() section ".text.startup" {
117+
// entry:
118+
// call void @__tgt_unregister_lib(ptr @.omp_offloading.descriptor)
119+
// ret void
120+
// }
121+
let bb = Builder::append_block(cx, desc_unreg_fn, "entry");
122+
let mut a = Builder::build(cx, bb);
123+
a.call(reg_lib_decl, None, None, unregister_lib, &[omp_descriptor], None, None);
124+
a.ret_void();
125+
126+
// @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.omp_offloading.descriptor_reg, ptr null }]
127+
let args = vec![cx.get_const_i32(101), desc_reg_fn, ptr_null];
128+
let const_struct = cx.const_struct(&args, false);
129+
let arr = cx.const_array(cx.val_ty(const_struct), &[const_struct]);
130+
add_global(cx, "llvm.global_ctors", arr, AppendingLinkage);
131+
}
132+
80133
pub(crate) struct OffloadKernelDims<'ll> {
81134
num_workgroups: &'ll Value,
82135
threads_per_block: &'ll Value,
@@ -487,9 +540,6 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
487540
let tgt_decl = offload_globals.launcher_fn;
488541
let tgt_target_kernel_ty = offload_globals.launcher_ty;
489542

490-
// %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
491-
let tgt_bin_desc = offload_globals.bin_desc;
492-
493543
let tgt_kernel_decl = offload_globals.kernel_args_ty;
494544
let begin_mapper_decl = offload_globals.begin_mapper;
495545
let end_mapper_decl = offload_globals.end_mapper;
@@ -513,12 +563,9 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
513563
}
514564

515565
// Step 0)
516-
// %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
517-
// %6 = alloca %struct.__tgt_bin_desc, align 8
518566
unsafe {
519567
llvm::LLVMRustPositionBuilderPastAllocas(&builder.llbuilder, builder.llfn());
520568
}
521-
let tgt_bin_desc_alloca = builder.direct_alloca(tgt_bin_desc, Align::EIGHT, "EmptyDesc");
522569

523570
let ty = cx.type_array(cx.type_ptr(), num_args);
524571
// Baseptr are just the input pointer to the kernel, stored in a local alloca
@@ -536,7 +583,6 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
536583
unsafe {
537584
llvm::LLVMPositionBuilderAtEnd(&builder.llbuilder, bb);
538585
}
539-
builder.memset(tgt_bin_desc_alloca, cx.get_const_i8(0), cx.get_const_i64(32), Align::EIGHT);
540586

541587
// Now we allocate once per function param, a copy to be passed to one of our maps.
542588
let mut vals = vec![];
@@ -574,15 +620,9 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
574620
geps.push(gep);
575621
}
576622

577-
let mapper_fn_ty = cx.type_func(&[cx.type_ptr()], cx.type_void());
578-
let register_lib_decl = offload_globals.register_lib;
579-
let unregister_lib_decl = offload_globals.unregister_lib;
580623
let init_ty = cx.type_func(&[], cx.type_void());
581624
let init_rtls_decl = offload_globals.init_rtls;
582625

583-
// FIXME(offload): Later we want to add them to the wrapper code, rather than our main function.
584-
// call void @__tgt_register_lib(ptr noundef %6)
585-
builder.call(mapper_fn_ty, None, None, register_lib_decl, &[tgt_bin_desc_alloca], None, None);
586626
// call void @__tgt_init_all_rtls()
587627
builder.call(init_ty, None, None, init_rtls_decl, &[], None, None);
588628

@@ -679,6 +719,4 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
679719
num_args,
680720
s_ident_t,
681721
);
682-
683-
builder.call(mapper_fn_ty, None, None, unregister_lib_decl, &[tgt_bin_desc_alloca], None, None);
684722
}

compiler/rustc_codegen_llvm/src/common.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
124124
pub(crate) fn const_null(&self, t: &'ll Type) -> &'ll Value {
125125
unsafe { llvm::LLVMConstNull(t) }
126126
}
127+
128+
pub(crate) fn const_struct(&self, elts: &[&'ll Value], packed: bool) -> &'ll Value {
129+
struct_in_context(self.llcx(), elts, packed)
130+
}
127131
}
128132

129133
impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> {

compiler/rustc_codegen_llvm/src/intrinsic.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ use tracing::debug;
3030
use crate::abi::FnAbiLlvmExt;
3131
use crate::builder::Builder;
3232
use crate::builder::autodiff::{adjust_activity_to_abi, generate_enzyme_call};
33-
use crate::builder::gpu_offload::{OffloadKernelDims, gen_call_handling, gen_define_handling};
33+
use crate::builder::gpu_offload::{
34+
OffloadKernelDims, gen_call_handling, gen_define_handling, register_offload,
35+
};
3436
use crate::context::CodegenCx;
3537
use crate::declare::declare_raw_fn;
3638
use crate::errors::{
@@ -1402,6 +1404,7 @@ fn codegen_offload<'ll, 'tcx>(
14021404
return;
14031405
}
14041406
};
1407+
register_offload(cx);
14051408
let offload_data = gen_define_handling(&cx, &metadata, target_symbol, offload_globals);
14061409
gen_call_handling(bx, &offload_data, &args, &types, &metadata, offload_globals, &offload_dims);
14071410
}

0 commit comments

Comments
 (0)