Skip to content

Commit b5c38a6

Browse files
committed
cg_LLVM: Stop needing an alloca for volatile loads
And while I'm here, improve the tests to check that the unaligned ones are actually unaligned, since `unaligned_volatile_load::<u8>` doesn't actually test anything.
1 parent d595fce commit b5c38a6

9 files changed

Lines changed: 196 additions & 63 deletions

File tree

compiler/rustc_codegen_gcc/src/builder.rs

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,34 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
446446
);
447447
result.to_rvalue()
448448
}
449+
450+
/// For use in `load` and `volatile_load` to support loads with custom alignments.
451+
fn make_aligned_type(&self, pointee_ty: Type<'gcc>, align: Align) -> Type<'gcc> {
452+
// NOTE(FractalFir): In some cases, we *should* skip the call to get_aligned.
453+
// For example, calling `get_aligned` on a i8 is pointless(since it can only be 1 aligned)
454+
// Calling get_aligned on a `u128`/`i128` causes the attribute to become "stacked"
455+
//
456+
// From GCCs perspective:
457+
// __int128_t __attribute__((aligned(16))) __attribute__((aligned(16)))
458+
// and:
459+
// __int128_t __attribute__((aligned(16)))
460+
// are 2 distinct, incompatible types.
461+
//
462+
// So, we skip the call to `get_aligned` in such a case. *Ideally*, we could do this for all the types,
463+
// but the GCC APIs to facilitate this just aren't quite there yet.
464+
465+
// This checks that we only skip `get_aligned` on 128 bit ints if they have the correct alignment.
466+
// Otherwise, this may be an under-aligned load, so we will still call get_aligned.
467+
let mut can_skip_align = (pointee_ty == self.cx.u128_type
468+
|| pointee_ty == self.cx.i128_type)
469+
&& align == self.int128_align;
470+
// We can skip the call to `get_aligned` for byte-sized types with alignment of 1.
471+
can_skip_align = can_skip_align
472+
|| (pointee_ty == self.cx.u8_type || pointee_ty == self.cx.i8_type)
473+
&& align.bytes() == 1;
474+
// Skip the call to `get_aligned` when possible.
475+
if can_skip_align { pointee_ty } else { pointee_ty.get_aligned(align.bytes()) }
476+
}
449477
}
450478

451479
impl<'tcx> HasTyCtxt<'tcx> for Builder<'_, '_, 'tcx> {
@@ -953,31 +981,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
953981
fn load(&mut self, pointee_ty: Type<'gcc>, ptr: RValue<'gcc>, align: Align) -> RValue<'gcc> {
954982
let block = self.llbb();
955983
let function = block.get_function();
956-
// NOTE(FractalFir): In some cases, we *should* skip the call to get_aligned.
957-
// For example, calling `get_aligned` on a i8 is pointless(since it can only be 1 aligned)
958-
// Calling get_aligned on a `u128`/`i128` causes the attribute to become "stacked"
959-
//
960-
// From GCCs perspective:
961-
// __int128_t __attribute__((aligned(16))) __attribute__((aligned(16)))
962-
// and:
963-
// __int128_t __attribute__((aligned(16)))
964-
// are 2 distinct, incompatible types.
965-
//
966-
// So, we skip the call to `get_aligned` in such a case. *Ideally*, we could do this for all the types,
967-
// but the GCC APIs to facilitate this just aren't quite there yet.
968984

969-
// This checks that we only skip `get_aligned` on 128 bit ints if they have the correct alignment.
970-
// Otherwise, this may be an under-aligned load, so we will still call get_aligned.
971-
let mut can_skip_align = (pointee_ty == self.cx.u128_type
972-
|| pointee_ty == self.cx.i128_type)
973-
&& align == self.int128_align;
974-
// We can skip the call to `get_aligned` for byte-sized types with alignment of 1.
975-
can_skip_align = can_skip_align
976-
|| (pointee_ty == self.cx.u8_type || pointee_ty == self.cx.i8_type)
977-
&& align.bytes() == 1;
978-
// Skip the call to `get_aligned` when possible.
979-
let aligned_type =
980-
if can_skip_align { pointee_ty } else { pointee_ty.get_aligned(align.bytes()) };
985+
let aligned_type = self.make_aligned_type(pointee_ty, align);
981986

982987
let ptr = self.context.new_cast(self.location, ptr, aligned_type.make_pointer());
983988
// NOTE: instead of returning the dereference here, we have to assign it to a variable in
@@ -993,7 +998,14 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
993998
loaded_value.to_rvalue()
994999
}
9951000

996-
fn volatile_load(&mut self, ty: Type<'gcc>, ptr: RValue<'gcc>) -> RValue<'gcc> {
1001+
fn volatile_load(
1002+
&mut self,
1003+
pointee_ty: Type<'gcc>,
1004+
ptr: RValue<'gcc>,
1005+
align: Align,
1006+
) -> RValue<'gcc> {
1007+
let ty = self.make_aligned_type(pointee_ty, align);
1008+
9971009
let ptr = self.context.new_cast(self.location, ptr, ty.make_volatile().make_pointer());
9981010
// (FractalFir): We insert a local here, to ensure this volatile load can't move across
9991011
// blocks.

compiler/rustc_codegen_gcc/src/intrinsic/mod.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ mod simd;
55
use std::iter;
66

77
use gccjit::{ComparisonOp, Function, FunctionType, RValue, ToRValue, Type, UnaryOp};
8-
use rustc_abi::{BackendRepr, HasDataLayout, WrappingRange};
8+
use rustc_abi::{Align, BackendRepr, HasDataLayout, WrappingRange};
99
use rustc_codegen_ssa::base::wants_msvc_seh;
1010
use rustc_codegen_ssa::common::IntPredicate;
1111
use rustc_codegen_ssa::errors::InvalidMonomorphization;
@@ -368,8 +368,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
368368

369369
sym::volatile_load | sym::unaligned_volatile_load => {
370370
let ptr = args[0].immediate();
371-
let load = self.volatile_load(result.layout.gcc_type(self), ptr);
372-
// FIXME(antoyo): set alignment.
371+
let abi_align = result_layout.align.abi;
372+
let ptr_align = if name == sym::volatile_load { abi_align } else { Align::ONE };
373+
let load = self.volatile_load(result.layout.gcc_type(self), ptr, ptr_align);
373374
if let BackendRepr::Scalar(scalar) = result.layout.backend_repr {
374375
self.to_immediate_scalar(load, scalar)
375376
} else {

compiler/rustc_codegen_llvm/src/builder.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -636,9 +636,9 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
636636
}
637637
}
638638

639-
fn volatile_load(&mut self, ty: &'ll Type, ptr: &'ll Value) -> &'ll Value {
639+
fn volatile_load(&mut self, ty: &'ll Type, ptr: &'ll Value, align: Align) -> &'ll Value {
640640
unsafe {
641-
let load = llvm::LLVMBuildLoad2(self.llbuilder, ty, ptr, UNNAMED);
641+
let load = self.load(ty, ptr, align);
642642
llvm::LLVMSetVolatile(load, llvm::TRUE);
643643
load
644644
}

compiler/rustc_codegen_llvm/src/context.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -973,10 +973,7 @@ impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
973973
}
974974

975975
fn intrinsic_call_expects_place_always(&self, name: Symbol) -> bool {
976-
matches!(
977-
name,
978-
sym::autodiff | sym::volatile_load | sym::unaligned_volatile_load | sym::black_box
979-
)
976+
matches!(name, sym::autodiff | sym::black_box)
980977
}
981978
}
982979

compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// .debug_gdb_scripts binary section.
22

3+
use rustc_abi::Align;
34
use rustc_codegen_ssa::base::collect_debugger_visualizers_transitive;
45
use rustc_codegen_ssa::traits::*;
56
use rustc_hir::attrs::DebuggerVisualizerType;
@@ -18,10 +19,7 @@ pub(crate) fn insert_reference_to_gdb_debug_scripts_section_global(bx: &mut Buil
1819
let gdb_debug_scripts_section = get_or_insert_gdb_debug_scripts_section_global(bx);
1920
// Load just the first byte as that's all that's necessary to force
2021
// LLVM to keep around the reference to the global.
21-
let volatile_load_instruction = bx.volatile_load(bx.type_i8(), gdb_debug_scripts_section);
22-
unsafe {
23-
llvm::LLVMSetAlignment(volatile_load_instruction, 1);
24-
}
22+
bx.volatile_load(bx.type_i8(), gdb_debug_scripts_section, Align::ONE);
2523
}
2624
}
2725

compiler/rustc_codegen_llvm/src/intrinsic.rs

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -358,25 +358,39 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
358358
}
359359

360360
sym::volatile_load | sym::unaligned_volatile_load => {
361-
let result = PlaceRef {
362-
val: result_place.unwrap(),
363-
layout: result_layout,
364-
};
365-
361+
// Note that we cannot just load the `llvm_type` because we should never load non-scalars.
362+
// Trying to do so blows up horribly in some cases -- for example loading a
363+
// `MaybeUninint<&dyn Trait>` would load as `{ [i64x2] }` which gives assertions later
364+
// (if we're lucky) from things not being pointers that ought to be.
366365
let ptr = args[0].immediate();
367-
let load = self.volatile_load(result_layout.llvm_type(self), ptr);
368-
let align = if name == sym::unaligned_volatile_load {
369-
1
366+
let abi_align = result_layout.align.abi;
367+
let ptr_align = if name == sym::volatile_load { abi_align } else { Align::ONE };
368+
if result_layout.is_zst() {
369+
return IntrinsicResult::Operand(OperandValue::ZeroSized);
370+
} else if let BackendRepr::Scalar(scalar) = result_layout.backend_repr {
371+
let load = self.volatile_load(self.type_from_scalar(scalar), ptr, ptr_align);
372+
self.to_immediate_scalar(load, scalar)
370373
} else {
371-
result_layout.align.bytes() as u32
372-
};
373-
unsafe {
374-
llvm::LLVMSetAlignment(load, align);
375-
}
376-
if !result_layout.is_zst() {
377-
self.store_to_place(load, result.val);
374+
// One day Rust will probably want to define how we split up a volatile load
375+
// of something that's *not* just an ordinary scalar, but for now we can just
376+
// use an LLVM integer type of the correct width and let it split it however.
377+
let llty = self.type_ix(result_layout.size.bits());
378+
let temp = if let Some(result_place) = result_place {
379+
PlaceRef {
380+
val: result_place,
381+
layout: result_layout,
382+
}
383+
} else {
384+
PlaceRef::alloca(self, result_layout)
385+
};
386+
let llval = self.volatile_load(llty, ptr, ptr_align);
387+
self.store(llval, temp.val.llval, abi_align);
388+
return if result_place.is_none() {
389+
IntrinsicResult::Operand(self.load_operand(temp).val)
390+
} else {
391+
IntrinsicResult::WroteIntoPlace
392+
};
378393
}
379-
return IntrinsicResult::WroteIntoPlace;
380394
}
381395
sym::volatile_store => {
382396
let dst = args[0].deref(self.cx());

compiler/rustc_codegen_ssa/src/traits/builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ pub trait BuilderMethods<'a, 'tcx>:
238238
fn alloca_with_ty(&mut self, layout: TyAndLayout<'tcx>) -> Self::Value;
239239

240240
fn load(&mut self, ty: Self::Type, ptr: Self::Value, align: Align) -> Self::Value;
241-
fn volatile_load(&mut self, ty: Self::Type, ptr: Self::Value) -> Self::Value;
241+
fn volatile_load(&mut self, ty: Self::Type, ptr: Self::Value, align: Align) -> Self::Value;
242242
fn atomic_load(
243243
&mut self,
244244
ty: Self::Type,

tests/codegen-llvm/i128-x86-align.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
// while rustc wants it to have 16 byte alignment. This test checks that we handle this
66
// correctly.
77

8-
// CHECK: %ScalarPair = type { i32, [3 x i32], i128 }
9-
108
#![feature(core_intrinsics)]
119

1210
#[repr(C)]
@@ -62,8 +60,8 @@ pub fn load_volatile(x: &ScalarPair) -> ScalarPair {
6260
// CHECK-SAME: dereferenceable(32) %_0,
6361
// CHECK-SAME: align 16
6462
// CHECK-SAME: dereferenceable(32) %x
65-
// CHECK: [[LOAD:%.*]] = load volatile %ScalarPair, ptr %x, align 16
66-
// CHECK-NEXT: store %ScalarPair [[LOAD]], ptr %_0, align 16
63+
// CHECK: [[LOAD:%.*]] = load volatile i256, ptr %x, align 16
64+
// CHECK-NEXT: store i256 [[LOAD]], ptr %_0, align 16
6765
// CHECK-NEXT: ret void
6866
unsafe { std::intrinsics::volatile_load(x) }
6967
}

tests/codegen-llvm/intrinsics/volatile.rs

Lines changed: 117 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55

66
use std::intrinsics;
77

8+
#[repr(align(32))]
9+
pub struct CustomZst;
10+
11+
type UninitFatPointer = std::mem::MaybeUninit<&'static dyn std::fmt::Debug>;
12+
813
// CHECK-LABEL: @volatile_copy_memory
914
#[no_mangle]
1015
pub unsafe fn volatile_copy_memory(a: *mut u8, b: *const u8) {
@@ -28,8 +33,62 @@ pub unsafe fn volatile_set_memory(a: *mut u8, b: u8) {
2833

2934
// CHECK-LABEL: @volatile_load
3035
#[no_mangle]
31-
pub unsafe fn volatile_load(a: *const u8) -> u8 {
32-
// CHECK: load volatile
36+
pub unsafe fn volatile_load(a: *const u16) -> u16 {
37+
// CHECK: [[TEMP:%.+]] = load volatile i16, ptr %a
38+
// CHECK-SAME: align 2{{,|$}}
39+
// CHECK-NEXT: ret i16 [[TEMP]]
40+
intrinsics::volatile_load(a)
41+
}
42+
43+
// CHECK-LABEL: @volatile_load_bool
44+
#[no_mangle]
45+
pub unsafe fn volatile_load_bool(a: *const bool) -> bool {
46+
// CHECK: [[TEMP:%.+]] = load volatile i8, ptr %a
47+
// CHECK-SAME: align 1{{,|$}}
48+
// CHECK: [[TRUNC:%.+]] = trunc nuw i8 [[TEMP]] to i1
49+
// CHECK: ret i1 [[TRUNC]]
50+
intrinsics::volatile_load(a)
51+
}
52+
53+
// CHECK-LABEL: @volatile_load_zst
54+
#[no_mangle]
55+
pub unsafe fn volatile_load_zst(a: *const CustomZst) -> CustomZst {
56+
// CHECK: start:
57+
// CHECK-NEXT: ret void
58+
intrinsics::volatile_load(a)
59+
}
60+
61+
// CHECK-LABEL: @volatile_load_array
62+
// CHECK-SAME: ptr{{.+}}sret([16 x i8]){{.+}}%_0
63+
#[no_mangle]
64+
pub unsafe fn volatile_load_array(a: *const [u16; 8]) -> [u16; 8] {
65+
// CHECK-NOT: alloca
66+
// CHECK: [[TEMP:%.+]] = load volatile i128, ptr %a,
67+
// CHECK-SAME: align 2{{,|$}}
68+
// CHECK: store i128 [[TEMP]], ptr %_0,
69+
// CHECK-SAME: align 2{{,|$}}
70+
// CHECK-NEXT: ret void
71+
intrinsics::volatile_load(a)
72+
}
73+
74+
// CHECK-LABEL: @volatile_load_fat
75+
#[no_mangle]
76+
pub unsafe fn volatile_load_fat(a: *const UninitFatPointer) -> UninitFatPointer {
77+
// CHECK: [[ALLOCA:%.+]] = alloca
78+
// CHECK-SAME: [[SIZE:4|8|16]] x i8
79+
// CHECK-SAME: align [[ALIGN:2|4|8]]
80+
81+
// CHECK: [[TEMP:%.+]] = load volatile [[INT:i32|i64|i128]], ptr %a,
82+
// CHECK-SAME: align [[ALIGN]]{{,|$}}
83+
// CHECK: store [[INT]] [[TEMP]], ptr [[ALLOCA]],
84+
// CHECK-SAME: align [[ALIGN]]{{,|$}}
85+
86+
// CHECK: [[T0:%.+]] = load ptr, ptr [[ALLOCA]], align [[ALIGN]]
87+
// CHECK: [[T1:%.+]] = getelementptr inbounds i8, ptr [[ALLOCA]]
88+
// CHECK: [[T2:%.+]] = load ptr, ptr [[T1]], align [[ALIGN]]
89+
// CHECK: [[P1:%.+]] = insertvalue { ptr, ptr } poison, ptr [[T0]], 0
90+
// CHECK: [[P2:%.+]] = insertvalue { ptr, ptr } [[P1]], ptr [[T2]], 1
91+
// CHECK: ret { ptr, ptr } [[P2]]
3392
intrinsics::volatile_load(a)
3493
}
3594

@@ -42,8 +101,62 @@ pub unsafe fn volatile_store(a: *mut u8, b: u8) {
42101

43102
// CHECK-LABEL: @unaligned_volatile_load
44103
#[no_mangle]
45-
pub unsafe fn unaligned_volatile_load(a: *const u8) -> u8 {
46-
// CHECK: load volatile
104+
pub unsafe fn unaligned_volatile_load(a: *const u16) -> u16 {
105+
// CHECK: [[TEMP:%.+]] = load volatile i16, ptr %a
106+
// CHECK-SAME: align 1{{,|$}}
107+
// CHECK-NEXT: ret i16 [[TEMP]]
108+
intrinsics::unaligned_volatile_load(a)
109+
}
110+
111+
// CHECK-LABEL: @unaligned_volatile_load_bool
112+
#[no_mangle]
113+
pub unsafe fn unaligned_volatile_load_bool(a: *const bool) -> bool {
114+
// CHECK: [[TEMP:%.+]] = load volatile i8, ptr %a
115+
// CHECK-SAME: align 1{{,|$}}
116+
// CHECK: [[TRUNC:%.+]] = trunc nuw i8 [[TEMP]] to i1
117+
// CHECK: ret i1 [[TRUNC]]
118+
intrinsics::unaligned_volatile_load(a)
119+
}
120+
121+
// CHECK-LABEL: @unaligned_volatile_load_zst
122+
#[no_mangle]
123+
pub unsafe fn unaligned_volatile_load_zst(a: *const CustomZst) -> CustomZst {
124+
// CHECK: start:
125+
// CHECK-NEXT: ret void
126+
intrinsics::unaligned_volatile_load(a)
127+
}
128+
129+
// CHECK-LABEL: @unaligned_volatile_load_array
130+
// CHECK-SAME: ptr{{.+}}sret([16 x i8]){{.+}}%_0,
131+
#[no_mangle]
132+
pub unsafe fn unaligned_volatile_load_array(a: *const [u16; 8]) -> [u16; 8] {
133+
// CHECK-NOT: alloca
134+
// CHECK: [[TEMP:%.+]] = load volatile i128, ptr %a,
135+
// CHECK-SAME: align 1{{,|$}}
136+
// CHECK: store i128 [[TEMP]], ptr %_0,
137+
// CHECK-SAME: align 2{{,|$}}
138+
// CHECK-NEXT: ret void
139+
intrinsics::unaligned_volatile_load(a)
140+
}
141+
142+
// CHECK-LABEL: @unaligned_volatile_load_fat
143+
#[no_mangle]
144+
pub unsafe fn unaligned_volatile_load_fat(a: *const UninitFatPointer) -> UninitFatPointer {
145+
// CHECK: [[ALLOCA:%.+]] = alloca
146+
// CHECK-SAME: [[SIZE]] x i8
147+
// CHECK-SAME: align [[ALIGN]]
148+
149+
// CHECK: [[TEMP:%.+]] = load volatile [[INT]], ptr %a,
150+
// CHECK-SAME: align 1{{,|$}}
151+
// CHECK: store [[INT]] [[TEMP]], ptr [[ALLOCA]],
152+
// CHECK-SAME: align [[ALIGN]]{{,|$}}
153+
154+
// CHECK: [[T0:%.+]] = load ptr, ptr [[ALLOCA]], align [[ALIGN]]
155+
// CHECK: [[T1:%.+]] = getelementptr inbounds i8, ptr [[ALLOCA]]
156+
// CHECK: [[T2:%.+]] = load ptr, ptr [[T1]], align [[ALIGN]]
157+
// CHECK: [[P1:%.+]] = insertvalue { ptr, ptr } poison, ptr [[T0]], 0
158+
// CHECK: [[P2:%.+]] = insertvalue { ptr, ptr } [[P1]], ptr [[T2]], 1
159+
// CHECK: ret { ptr, ptr } [[P2]]
47160
intrinsics::unaligned_volatile_load(a)
48161
}
49162

0 commit comments

Comments
 (0)