Skip to content

Commit f676c20

Browse files
committed
Auto merge of #155343 - dianqk:indirect-by-ref, r=nikic
codegen: Copy to an alloca when the argument is neither by-val nor by-move for indirect pointer. Fixes #155241. When a value is passed via an indirect pointer, the value needs to be copied to a new alloca. For x86_64-unknown-linux-gnu, `Thing` is the case: ```rust #[derive(Clone, Copy)] struct Thing(usize, usize, usize); pub fn foo() { let thing = Thing(0, 0, 0); bar(thing); assert_eq!(thing.0, 0); } #[inline(never)] #[unsafe(no_mangle)] pub fn bar(mut thing: Thing) { thing.0 = 1; } ``` Before passing the thing to the bar function, the thing needs to be copied to an alloca that is passed to bar. ```llvm %0 = alloca [24 x i8], align 8 call void @llvm.memcpy.p0.p0.i64(ptr align 8 %0, ptr align 8 %thing, i64 24, i1 false) call void @bar(ptr %0) ``` This patch applies the rule to the untupled arguments as well. ```rust #![feature(fn_traits)] #[derive(Clone, Copy)] struct Thing(usize, usize, usize); #[inline(never)] #[unsafe(no_mangle)] pub fn foo() { let thing = (Thing(0, 0, 0),); (|mut thing: Thing| { thing.0 = 1; }).call(thing); assert_eq!(thing.0.0, 0); } ``` For this case, this patch changes from ```llvm ; call example::foo::{closure#0} call void @_RNCNvCs15qdZVLwHPA_7example3foo0B3_(ptr ..., ptr %thing) ``` to ```llvm %0 = alloca [24 x i8], align 8 call void @llvm.memcpy.p0.p0.i64(ptr align 8 %0, ptr align 8 %thing, i64 24, i1 false) ; call example::foo::{closure#0} call void @_RNCNvCs15qdZVLwHPA_7example3foo0B3_(ptr ..., ptr %0) ``` However, the same rule cannot be applied to tail calls that would be unsound, because the caller's stack frame is overwritten by the callee's stack frame. Fortunately, #151143 has already handled the special case. We must not copy again. No copy is needed for by-move arguments, because the argument is passed to the called "in-place". No copy is also needed for by-val arguments, because the attribute implies that a hidden copy of the pointee is made between the caller and the callee. NOTE: The patch has a trick for tail calls that we pass by-move. We can choose to copy an alloca even for by-move arguments, but tail calls require MUST-by-move.
2 parents cf1817b + 10d8329 commit f676c20

5 files changed

Lines changed: 229 additions & 66 deletions

File tree

compiler/rustc_codegen_ssa/src/mir/block.rs

Lines changed: 65 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,8 +1147,9 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
11471147

11481148
// Special logic for tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments.
11491149
//
1150-
// Normally an indirect argument with `on_stack: false` would be passed as a pointer into
1151-
// the caller's stack frame. For tail calls, that would be unsound, because the caller's
1150+
// Normally an indirect argument that is allocated in the caller's stack frame
1151+
// would be passed as a pointer into the callee's stack frame.
1152+
// For tail calls, that would be unsound, because the caller's
11521153
// stack frame is overwritten by the callee's stack frame.
11531154
//
11541155
// Therefore we store the argument for the callee in the corresponding caller's slot.
@@ -1240,59 +1241,57 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
12401241
}
12411242
}
12421243

1243-
match kind {
1244-
CallKind::Normal => {
1245-
// The callee needs to own the argument memory if we pass it
1246-
// by-ref, so make a local copy of non-immediate constants.
1247-
if let &mir::Operand::Copy(_) | &mir::Operand::Constant(_) = &arg.node
1248-
&& let Ref(PlaceValue { llextra: None, .. }) = op.val
1249-
{
1250-
let tmp = PlaceRef::alloca(bx, op.layout);
1251-
bx.lifetime_start(tmp.val.llval, tmp.layout.size);
1252-
op.store_with_annotation(bx, tmp);
1253-
op.val = Ref(tmp.val);
1254-
lifetime_ends_after_call.push((tmp.val.llval, tmp.layout.size));
1255-
}
1256-
}
1257-
CallKind::Tail => {
1258-
if let PassMode::Indirect { on_stack: false, .. } = fn_abi.args[i].mode {
1259-
let Some(tmp) = tail_call_temporaries[i].take() else {
1260-
span_bug!(
1261-
fn_span,
1262-
"missing temporary for indirect tail call argument #{i}"
1263-
)
1264-
};
1244+
let by_move = if let PassMode::Indirect { on_stack: false, .. } = fn_abi.args[i].mode
1245+
&& kind == CallKind::Tail
1246+
{
1247+
// Special logic for tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments.
1248+
//
1249+
// Normally an indirect argument that is allocated in the caller's stack frame
1250+
// would be passed as a pointer into the callee's stack frame.
1251+
// For tail calls, that would be unsound, because the caller's
1252+
// stack frame is overwritten by the callee's stack frame.
1253+
//
1254+
// To handle the case, we introduce `tail_call_temporaries` to copy arguments into
1255+
// temporaries, then copy back to the caller's argument slots.
1256+
// Finally, we pass the caller's argument slots as arguments.
1257+
//
1258+
// To do that, the argument must be MUST-by-move value.
1259+
let Some(tmp) = tail_call_temporaries[i].take() else {
1260+
span_bug!(fn_span, "missing temporary for indirect tail call argument #{i}")
1261+
};
12651262

1266-
let local = self.mir.args_iter().nth(i).unwrap();
1263+
let local = self.mir.args_iter().nth(i).unwrap();
12671264

1268-
match &self.locals[local] {
1269-
LocalRef::Place(arg) => {
1270-
bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout);
1271-
op.val = Ref(arg.val);
1272-
}
1273-
LocalRef::Operand(arg) => {
1274-
let Ref(place_value) = arg.val else {
1275-
bug!("only `Ref` should use `PassMode::Indirect`");
1276-
};
1277-
bx.typed_place_copy(place_value, tmp.val, fn_abi.args[i].layout);
1278-
op.val = arg.val;
1279-
}
1280-
LocalRef::UnsizedPlace(_) => {
1281-
span_bug!(fn_span, "unsized types are not supported")
1282-
}
1283-
LocalRef::PendingOperand => {
1284-
span_bug!(fn_span, "argument local should not be pending")
1285-
}
1265+
match &self.locals[local] {
1266+
LocalRef::Place(arg) => {
1267+
bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout);
1268+
op.val = Ref(arg.val);
1269+
}
1270+
LocalRef::Operand(arg) => {
1271+
let Ref(place_value) = arg.val else {
1272+
bug!("only `Ref` should use `PassMode::Indirect`");
12861273
};
1287-
1288-
bx.lifetime_end(tmp.val.llval, tmp.layout.size);
1274+
bx.typed_place_copy(place_value, tmp.val, fn_abi.args[i].layout);
1275+
op.val = arg.val;
12891276
}
1290-
}
1291-
}
1277+
LocalRef::UnsizedPlace(_) => {
1278+
span_bug!(fn_span, "unsized types are not supported")
1279+
}
1280+
LocalRef::PendingOperand => {
1281+
span_bug!(fn_span, "argument local should not be pending")
1282+
}
1283+
};
1284+
1285+
bx.lifetime_end(tmp.val.llval, tmp.layout.size);
1286+
true
1287+
} else {
1288+
matches!(arg.node, mir::Operand::Move(_))
1289+
};
12921290

12931291
self.codegen_argument(
12941292
bx,
12951293
op,
1294+
by_move,
12961295
&mut llargs,
12971296
&fn_abi.args[i],
12981297
&mut lifetime_ends_after_call,
@@ -1331,6 +1330,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
13311330
self.codegen_argument(
13321331
bx,
13331332
location,
1333+
/* by_move */ false,
13341334
&mut llargs,
13351335
last_arg,
13361336
&mut lifetime_ends_after_call,
@@ -1649,6 +1649,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
16491649
&mut self,
16501650
bx: &mut Bx,
16511651
op: OperandRef<'tcx, Bx::Value>,
1652+
by_move: bool,
16521653
llargs: &mut Vec<Bx::Value>,
16531654
arg: &ArgAbi<'tcx, Ty<'tcx>>,
16541655
lifetime_ends_after_call: &mut Vec<(Bx::Value, Size)>,
@@ -1703,18 +1704,19 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
17031704
_ => (op.immediate_or_packed_pair(bx), arg.layout.align.abi, false),
17041705
},
17051706
Ref(op_place_val) => match arg.mode {
1706-
PassMode::Indirect { attrs, .. } => {
1707+
PassMode::Indirect { attrs, on_stack, .. } => {
1708+
// For `foo(packed.large_field)`, and types with <4 byte alignment on x86,
1709+
// alignment requirements may be higher than the type's alignment, so copy
1710+
// to a higher-aligned alloca.
17071711
let required_align = match attrs.pointee_align {
17081712
Some(pointee_align) => cmp::max(pointee_align, arg.layout.align.abi),
17091713
None => arg.layout.align.abi,
17101714
};
1711-
if op_place_val.align < required_align {
1712-
// For `foo(packed.large_field)`, and types with <4 byte alignment on x86,
1713-
// alignment requirements may be higher than the type's alignment, so copy
1714-
// to a higher-aligned alloca.
1715+
// Copy to an alloca when the argument is neither by-val nor by-move.
1716+
if op_place_val.align < required_align || (!on_stack && !by_move) {
17151717
let scratch = PlaceValue::alloca(bx, arg.layout.size, required_align);
17161718
bx.lifetime_start(scratch.llval, arg.layout.size);
1717-
bx.typed_place_copy(scratch, op_place_val, op.layout);
1719+
op.store_with_annotation(bx, scratch.with_type(arg.layout));
17181720
lifetime_ends_after_call.push((scratch.llval, arg.layout.size));
17191721
(scratch.llval, scratch.align, true)
17201722
} else {
@@ -1800,6 +1802,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
18001802
lifetime_ends_after_call: &mut Vec<(Bx::Value, Size)>,
18011803
) -> usize {
18021804
let tuple = self.codegen_operand(bx, operand);
1805+
let by_move = matches!(operand, mir::Operand::Move(_));
18031806

18041807
// Handle both by-ref and immediate tuples.
18051808
if let Ref(place_val) = tuple.val {
@@ -1810,13 +1813,20 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
18101813
for i in 0..tuple.layout.fields.count() {
18111814
let field_ptr = tuple_ptr.project_field(bx, i);
18121815
let field = bx.load_operand(field_ptr);
1813-
self.codegen_argument(bx, field, llargs, &args[i], lifetime_ends_after_call);
1816+
self.codegen_argument(
1817+
bx,
1818+
field,
1819+
by_move,
1820+
llargs,
1821+
&args[i],
1822+
lifetime_ends_after_call,
1823+
);
18141824
}
18151825
} else {
18161826
// If the tuple is immediate, the elements are as well.
18171827
for i in 0..tuple.layout.fields.count() {
18181828
let op = tuple.extract_field(self, bx, i);
1819-
self.codegen_argument(bx, op, llargs, &args[i], lifetime_ends_after_call);
1829+
self.codegen_argument(bx, op, by_move, llargs, &args[i], lifetime_ends_after_call);
18201830
}
18211831
}
18221832
tuple.layout.fields.count()

tests/codegen-llvm/call-tmps-lifetime.rs

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,12 @@ extern crate minicore;
1111
use minicore::*;
1212

1313
// Const operand. Regression test for #98156.
14+
// Temporary allocas are not required when passing as byval arguments.
1415
//
1516
// CHECK-LABEL: define void @const_indirect(
1617
// CHECK-NEXT: start:
17-
// CHECK-NEXT: [[B:%.*]] = alloca
18-
// CHECK-NEXT: [[A:%.*]] = alloca
19-
// CHECK-NEXT: call void @llvm.lifetime.start.p0({{(i64 4096, )?}}ptr [[A]])
20-
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 4096, i1 false)
21-
// CHECK-NEXT: call void %h(ptr {{.*}} [[A]])
22-
// CHECK-NEXT: call void @llvm.lifetime.end.p0({{(i64 4096, )?}}ptr [[A]])
23-
// CHECK-NEXT: call void @llvm.lifetime.start.p0({{(i64 4096, )?}}ptr [[B]])
24-
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4096, i1 false)
25-
// CHECK-NEXT: call void %h(ptr {{.*}} [[B]])
26-
// CHECK-NEXT: call void @llvm.lifetime.end.p0({{(i64 4096, )?}}ptr [[B]])
18+
// CHECK-NEXT: call void %h(ptr {{.*}}byval([4096 x i8]){{.*}} [[C:@anon.*]])
19+
// CHECK-NEXT: call void %h(ptr {{.*}}byval([4096 x i8]){{.*}} [[C]])
2720
#[no_mangle]
2821
pub fn const_indirect(h: extern "C" fn([u32; 1024])) {
2922
const C: [u32; 1024] = [0; 1024];

tests/codegen-llvm/const-vector.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ pub fn do_call() {
7474
// CHECK: call void @test_simd(<4 x i32> <i32 2, i32 4, i32 6, i32 8>
7575
test_simd(const { Simd::<i32, 4>([2, 4, 6, 8]) });
7676

77-
// CHECK: call void @test_simd_unaligned(%"minisimd::PackedSimd<i32, 3>" %1
77+
// CHECK: [[UNALIGNED_ARG:%.*]] = load %"minisimd::PackedSimd<i32, 3>", ptr @anon{{.*}}
78+
// CHECK-NEXT: call void @test_simd_unaligned(%"minisimd::PackedSimd<i32, 3>" [[UNALIGNED_ARG]]
7879
test_simd_unaligned(const { Simd::<i32, 3>([2, 4, 6]) });
7980
}
8081
}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
//! Regression test for issue <https://github.com/rust-lang/rust/issues/155241>.
2+
//! Arguments passed indirectly via a hidden pointer must be copied to an alloca,
3+
//! except for by-val or by-move.
4+
//@ add-minicore
5+
//@ revisions: x64-linux i686-linux i686-windows
6+
//@ compile-flags: -Cno-prepopulate-passes -Copt-level=3
7+
//@[x64-linux] compile-flags: --target x86_64-unknown-linux-gnu
8+
//@[x64-linux] needs-llvm-components: x86
9+
//@[i686-linux] compile-flags: --target i686-unknown-linux-gnu
10+
//@[i686-linux] needs-llvm-components: x86
11+
//@[i686-windows] compile-flags: --target i686-pc-windows-msvc
12+
//@[i686-windows] needs-llvm-components: x86
13+
14+
#![crate_type = "lib"]
15+
#![feature(stmt_expr_attributes, no_core)]
16+
#![expect(unused)]
17+
#![no_std]
18+
#![no_core]
19+
20+
extern crate minicore;
21+
use minicore::*;
22+
23+
struct Thing(u64, u64, u64);
24+
25+
impl Copy for Thing {}
26+
27+
// The argument of the second call is a by-move argument.
28+
29+
// CHECK-LABEL: @normal
30+
// CHECK: call void @llvm.memcpy{{.*}}(ptr{{.*}} [[normal_V1:%.*]], ptr{{.*}} %value,
31+
// CHECK: call void @opaque(ptr{{.*}} [[normal_V1]])
32+
// CHECK: call void @opaque(ptr{{.*}} %value)
33+
// CHECK: call void @llvm.memcpy{{.*}}(ptr{{.*}} [[normal_V3:%.*]], ptr{{.*}} @anon{{.*}},
34+
// CHECK: call void @opaque(ptr{{.*}} [[normal_V3]])
35+
#[unsafe(no_mangle)]
36+
pub fn normal() {
37+
#[inline(never)]
38+
#[unsafe(no_mangle)]
39+
fn opaque(mut thing: Thing) {
40+
thing.0 = 1;
41+
}
42+
let value = Thing(0, 0, 0);
43+
opaque(value);
44+
opaque(value);
45+
const VALUE: Thing = Thing(0, 0, 0);
46+
opaque(VALUE);
47+
}
48+
49+
// The argument of the second call is a by-move argument.
50+
51+
// CHECK-LABEL: @untupled
52+
// CHECK: call void @llvm.memcpy{{.*}}(ptr{{.*}} [[untupled_V1:%.*]], ptr{{.*}} %value
53+
// CHECK: call indirect_bycopy_bymove_byval::untupled::{closure#0}
54+
// CHECK-NEXT: call void @{{.*}}(ptr {{.*}}, ptr{{.*}} [[untupled_V1]])
55+
// CHECK: call indirect_bycopy_bymove_byval::untupled::{closure#1}
56+
// CHECK-NEXT: call void @{{.*}}(ptr {{.*}}, ptr{{.*}} %value)
57+
// CHECK: call void @llvm.memcpy{{.*}}(ptr{{.*}} [[untupled_V3:%.*]], ptr{{.*}} @anon{{.*}}
58+
// CHECK: call indirect_bycopy_bymove_byval::untupled::{closure#2}
59+
// CHECK-NEXT: call void @{{.*}}(ptr {{.*}}, ptr{{.*}} [[untupled_V3]])
60+
#[unsafe(no_mangle)]
61+
pub fn untupled() {
62+
let value = (Thing(0, 0, 0),);
63+
(#[inline(never)]
64+
|mut thing: Thing| {
65+
thing.0 = 1;
66+
})
67+
.call(value);
68+
(#[inline(never)]
69+
|mut thing: Thing| {
70+
thing.0 = 2;
71+
})
72+
.call(value);
73+
const VALUE: (Thing,) = (Thing(0, 0, 0),);
74+
(#[inline(never)]
75+
|mut thing: Thing| {
76+
thing.0 = 3;
77+
})
78+
.call(VALUE);
79+
}
80+
81+
// All memcpy calls are redundant for byval.
82+
83+
// CHECK-LABEL: @byval
84+
// CHECK: call void @opaque_byval(ptr{{.*}} byval([24 x i8]){{.*}} %value)
85+
// CHECK: call void @opaque_byval(ptr{{.*}} byval([24 x i8]){{.*}} %value)
86+
// CHECK: call void @opaque_byval(ptr{{.*}} byval([24 x i8]){{.*}} @anon{{.*}})
87+
#[unsafe(no_mangle)]
88+
pub fn byval() {
89+
#[repr(C)]
90+
struct Thing(u64, u64, u64);
91+
impl Copy for Thing {}
92+
#[inline(never)]
93+
#[unsafe(no_mangle)]
94+
extern "C" fn opaque_byval(mut thing: Thing) {
95+
thing.0 = 1;
96+
}
97+
let value = Thing(0, 0, 0);
98+
opaque_byval(value);
99+
opaque_byval(value);
100+
const VALUE: Thing = Thing(0, 0, 0);
101+
opaque_byval(VALUE);
102+
}

tests/ui/moves/bycopy_untupled.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
//! Regression test for issue <https://github.com/rust-lang/rust/issues/155241>.
2+
//@ run-pass
3+
//@ revisions: noopt opt
4+
//@[noopt] compile-flags: -C opt-level=0
5+
//@[opt] compile-flags: -C opt-level=3
6+
7+
#![feature(fn_traits, stmt_expr_attributes)]
8+
#![expect(unused)]
9+
10+
#[derive(Copy, Clone)]
11+
struct Thing {
12+
x: usize,
13+
y: usize,
14+
z: usize,
15+
}
16+
17+
#[inline(never)]
18+
fn opt_0() {
19+
let value = (Thing { x: 0, y: 0, z: 0 },);
20+
(|mut thing: Thing| {
21+
thing.z = 1;
22+
})
23+
.call(value);
24+
assert_eq!(value.0.z, 0);
25+
}
26+
27+
#[inline(never)]
28+
fn opt_3() {
29+
fn with(f: impl FnOnce(Vec<usize>)) {
30+
f(Vec::new())
31+
}
32+
with(|mut v| v.resize(2, 1));
33+
with(|v| {
34+
if v.len() != 0 {
35+
unreachable!();
36+
}
37+
});
38+
}
39+
40+
#[inline(never)]
41+
fn const_() {
42+
const VALUE: (Thing,) = (Thing { x: 0, y: 0, z: 0 },);
43+
44+
(#[inline(never)]
45+
|mut thing: Thing| {
46+
thing.z = 1;
47+
std::hint::black_box(&mut thing.z);
48+
assert_eq!(thing.z, 1);
49+
})
50+
.call(VALUE);
51+
}
52+
53+
fn main() {
54+
opt_0();
55+
opt_3();
56+
const_();
57+
}

0 commit comments

Comments
 (0)