-
-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Support u128/i128 c-variadic arguments
#155429
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,6 @@ | ||
| use rustc_abi::{Align, BackendRepr, CVariadicStatus, Endian, HasDataLayout, Primitive, Size}; | ||
| use rustc_codegen_ssa::MemFlags; | ||
| use rustc_abi::{ | ||
| Align, BackendRepr, CVariadicStatus, Endian, Float, HasDataLayout, Integer, Primitive, Size, | ||
| }; | ||
| use rustc_codegen_ssa::common::IntPredicate; | ||
| use rustc_codegen_ssa::mir::operand::OperandRef; | ||
| use rustc_codegen_ssa::traits::{ | ||
|
|
@@ -77,6 +78,35 @@ fn emit_direct_ptr_va_arg<'ll, 'tcx>( | |
| } | ||
| } | ||
|
|
||
| /// Some backends apply special alignment rules to c-variadic arguments. | ||
| fn get_param_type_alignment<'ll, 'tcx>( | ||
| bx: &mut Builder<'_, 'll, 'tcx>, | ||
| layout: TyAndLayout<'tcx>, | ||
| ) -> Align { | ||
| let BackendRepr::Scalar(scalar) = layout.backend_repr else { | ||
| bug!("unexpected backend repr {:?}", layout.backend_repr); | ||
| }; | ||
|
|
||
| match bx.cx.tcx.sess.target.arch { | ||
| Arch::PowerPC64 => match scalar.primitive() { | ||
| Primitive::Int(integer, _) => match integer { | ||
| Integer::I8 | Integer::I16 => unreachable!(), | ||
| Integer::I32 | Integer::I64 => { /* fall through */ } | ||
| Integer::I128 => return Align::EIGHT, | ||
| }, | ||
| Primitive::Float(float) => match float { | ||
| Float::F16 | Float::F32 => unreachable!(), | ||
| Float::F64 => { /* fall through */ } | ||
| Float::F128 => return Align::from_bytes(16).unwrap(), | ||
| }, | ||
| Primitive::Pointer(_) => { /* fall through */ } | ||
| }, | ||
| _ => { /* fall through */ } | ||
| } | ||
|
|
||
| layout.align.abi | ||
| } | ||
|
|
||
| enum PassMode { | ||
| Direct, | ||
| Indirect, | ||
|
|
@@ -136,23 +166,23 @@ fn emit_ptr_va_arg<'ll, 'tcx>( | |
| ( | ||
| bx.cx.layout_of(Ty::new_imm_ptr(bx.cx.tcx, target_ty)).llvm_type(bx.cx), | ||
| bx.cx.data_layout().pointer_size(), | ||
| bx.cx.data_layout().pointer_align(), | ||
| bx.cx.data_layout().pointer_align().abi, | ||
| ) | ||
| } else { | ||
| (layout.llvm_type(bx.cx), layout.size, layout.align) | ||
| (layout.llvm_type(bx.cx), layout.size, get_param_type_alignment(bx, layout)) | ||
| }; | ||
| let (addr, addr_align) = emit_direct_ptr_va_arg( | ||
| bx, | ||
| list, | ||
| size, | ||
| align.abi, | ||
| align, | ||
| slot_size, | ||
| allow_higher_align, | ||
| force_right_adjust, | ||
| ); | ||
| if indirect { | ||
| let tmp_ret = bx.load(llty, addr, addr_align); | ||
| bx.load(layout.llvm_type(bx.cx), tmp_ret, align.abi) | ||
| bx.load(layout.llvm_type(bx.cx), tmp_ret, align) | ||
| } else { | ||
| bx.load(llty, addr, addr_align) | ||
| } | ||
|
|
@@ -585,8 +615,10 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>( | |
| // registers. In the case: l->gp_offset > 48 - num_gp * 8 or | ||
| // l->fp_offset > 176 - num_fp * 16 go to step 7. | ||
|
|
||
| // We support x86_64-unknown-linux-gnux32 which uses 4-byte pointers. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. deep sigh that is... correct... I suppose... |
||
| let unsigned_int_offset = 4; | ||
| let ptr_offset = 8; | ||
| let ptr_offset = bx.tcx().data_layout.pointer_size().bytes(); | ||
|
|
||
| let gp_offset_ptr = va_list_addr; | ||
| let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset)); | ||
|
|
||
|
|
@@ -660,7 +692,7 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>( | |
| let reg_hi_addr = bx.inbounds_ptradd(reg_lo_addr, bx.const_i32(16)); | ||
|
|
||
| let align = layout.layout.align().abi; | ||
| let tmp = bx.alloca(layout.layout.size(), align); | ||
| let tmp = bx.alloca(layout.size, layout.align.abi); | ||
|
|
||
| let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo); | ||
| let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi); | ||
|
|
@@ -683,7 +715,7 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>( | |
| Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr), | ||
| }; | ||
|
|
||
| let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi); | ||
| let tmp = bx.alloca(layout.size, layout.align.abi); | ||
|
|
||
| let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo); | ||
| let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi); | ||
|
|
@@ -751,16 +783,12 @@ fn copy_to_temporary_if_more_aligned<'ll, 'tcx>( | |
| src_align: Align, | ||
| ) -> &'ll Value { | ||
| if layout.layout.align.abi > src_align { | ||
| let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi); | ||
| bx.memcpy( | ||
| tmp, | ||
| layout.layout.align.abi, | ||
| reg_addr, | ||
| src_align, | ||
| bx.const_u32(layout.layout.size().bytes() as u32), | ||
| MemFlags::empty(), | ||
| None, | ||
| ); | ||
| assert!(layout.ty.is_integral()); | ||
|
|
||
| // A memcpy below optimizes poorly for 128-bit integers. | ||
| let tmp = bx.alloca(layout.size, layout.align.abi); | ||
| let val = bx.load(layout.llvm_type(bx), reg_addr, src_align); | ||
| bx.store(val, tmp, layout.align.abi); | ||
| tmp | ||
| } else { | ||
| reg_addr | ||
|
|
@@ -782,9 +810,14 @@ fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>( | |
| // byte boundary if alignment needed by type exceeds 8 byte boundary. | ||
| // It isn't stated explicitly in the standard, but in practice we use | ||
| // alignment greater than 16 where necessary. | ||
| if layout.layout.align.bytes() > 8 { | ||
| unreachable!("all instances of VaArgSafe have an alignment <= 8"); | ||
| } | ||
| // The AMD64 psABI leaves unspecified what to do for alignments above 16, but | ||
| // this behavior for 32+ alignment matches clang. | ||
| // It currently (2026 July) can only occur for 16-byte-aligned types. | ||
| let overflow_arg_area_v = if layout.layout.align.bytes() > 8 { | ||
| round_pointer_up_to_alignment(bx, overflow_arg_area_v, layout.layout.align.abi) | ||
|
folkertdev marked this conversation as resolved.
|
||
| } else { | ||
| overflow_arg_area_v | ||
| }; | ||
|
Comment on lines
+816
to
+820
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. deep sigh I'm not sure, then... large arguments like these are the point where clang's implementations often fall down and are incorrect relative to gcc re: varargs.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we only need 16, not greater than 16. For Anyhow, is there anything specific you want to see here?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, that's true. I think we're good then, yeah. |
||
|
|
||
| // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. | ||
| let mem_addr = overflow_arg_area_v; | ||
|
|
@@ -1071,7 +1104,8 @@ pub(super) fn emit_va_arg<'ll, 'tcx>( | |
| Arch::AArch64 => emit_aapcs_va_arg(bx, addr, target_ty), | ||
| Arch::Arm => { | ||
| // Types wider than 16 bytes are not currently supported. Clang has special logic for | ||
| // such types, but `VaArgSafe` is not implemented for any type that is this large. | ||
| // such types, but `VaArgSafe` is not implemented for any type that is this large on | ||
| // arm (i.e. 32-bit) targets. | ||
| assert!(bx.cx.size_of(target_ty).bytes() <= 16); | ||
|
|
||
| emit_ptr_va_arg( | ||
|
|
@@ -1093,6 +1127,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>( | |
| PassMode::Direct, | ||
| SlotSize::Bytes8, | ||
| AllowHigherAlign::Yes, | ||
| // ForceRightAdjust only takes effect on big-endian architectures. | ||
| ForceRightAdjust::Yes, | ||
| ), | ||
| Arch::RiscV32 if target.llvm_abiname == LlvmAbi::Ilp32e => { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -299,6 +299,9 @@ const impl<'f> Drop for VaList<'f> { | |
| /// and [`c_float`] is promoted to [`c_double`]. Implementing this trait for types that are | ||
| /// subject to this promotion rule is invalid. | ||
| /// | ||
| /// This trait is only implemented for 128-bit integers when the platform defines the `__int128` | ||
| /// type. | ||
|
folkertdev marked this conversation as resolved.
|
||
| /// | ||
| /// [`c_int`]: core::ffi::c_int | ||
| /// [`c_long`]: core::ffi::c_long | ||
| /// [`c_longlong`]: core::ffi::c_longlong | ||
|
|
@@ -310,8 +313,8 @@ const impl<'f> Drop for VaList<'f> { | |
| /// [`c_float`]: core::ffi::c_float | ||
| /// [`c_double`]: core::ffi::c_double | ||
| // We may unseal this trait in the future, but currently our `va_arg` implementations don't support | ||
| // types with an alignment larger than 8, or with a non-scalar layout. Inline assembly can be used | ||
| // to accept unsupported types in the meantime. | ||
| // types with a non-scalar layout. Inline assembly can be used to accept unsupported types in the | ||
| // meantime. | ||
| #[lang = "va_arg_safe"] | ||
| pub impl(self) unsafe trait VaArgSafe: Copy {} | ||
|
|
||
|
|
@@ -352,6 +355,61 @@ unsafe impl VaArgSafe for u32 {} | |
| unsafe impl VaArgSafe for u64 {} | ||
| unsafe impl VaArgSafe for usize {} | ||
|
|
||
| // Implement `VaArgSafe` for 128-bit integers on targets where clang provides `__int128`. | ||
| // | ||
| // GCC does not implement `__int128` for any 16-bit/32-bit target: | ||
| // | ||
| // https://gcc.gnu.org/onlinedocs/gcc-15.2.0/gcc/_005f_005fint128.html | ||
| // | ||
| // > There is no support in GCC for expressing an integer constant of type __int128 for targets | ||
| // > with long long integer less than 128 bits wide. | ||
| // | ||
| // Per https://learn.microsoft.com/en-us/cpp/cpp/data-type-ranges?view=msvc-170, MSVC does not | ||
| // define `__int128`. | ||
| // | ||
| // Clang is slightly more permissive: it defines `__int128` on wasm32 (a 32-bit target) and also | ||
| // does provide `__int128` on 64-bit `*-pc-windows-msvc`, and we follow suit. | ||
|
Comment on lines
+358
to
+371
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. calling this out: the implementation now follows clang: if it defines
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cc @Amanieu @joshtriplett if you have objections to that. |
||
| cfg_select! { | ||
| any( | ||
| target_arch = "aarch64", | ||
| target_arch = "amdgpu", | ||
| target_arch = "arm64ec", | ||
| target_arch = "bpf", | ||
| target_arch = "loongarch64", | ||
| target_arch = "mips64", | ||
| target_arch = "mips64r6", | ||
| target_arch = "nvptx64", | ||
| target_arch = "powerpc64", | ||
| target_arch = "riscv64", | ||
| target_arch = "s390x", | ||
| target_arch = "sparc64", | ||
| target_arch = "wasm32", | ||
| target_arch = "wasm64", | ||
| target_arch = "x86_64", | ||
| ) => { | ||
| #[cfg(not(any(target_arch = "wasm32", target_abi = "x32", target_pointer_width = "64")))] | ||
| compile_error!("unexpected target architecture for 128-bit c-variadic"); | ||
|
|
||
| #[unstable_feature_bound(c_variadic_int128)] | ||
| #[unstable(feature = "c_variadic_int128", issue = "155752")] | ||
| unsafe impl VaArgSafe for i128 {} | ||
| #[unstable_feature_bound(c_variadic_int128)] | ||
| #[unstable(feature = "c_variadic_int128", issue = "155752")] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems fine since it has its own feature gate. |
||
| unsafe impl VaArgSafe for u128 {} | ||
| } | ||
| _ => { | ||
| #[repr(transparent)] | ||
| #[derive(Clone, Copy)] | ||
| struct S(i32); | ||
|
|
||
| // When there are no actual implementations on i128, declare the c_variadic_int128 feature | ||
| // on a private type so that the feature is defined on all targets. | ||
| #[unstable(feature = "c_variadic_int128", issue = "155752")] | ||
| unsafe impl VaArgSafe for S {} | ||
|
|
||
| } | ||
| } | ||
|
|
||
| unsafe impl VaArgSafe for f64 {} | ||
|
|
||
| unsafe impl<T> VaArgSafe for *mut T {} | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It has custom behavior, the
va_argimplementation forpowerpc64is herehttps://github.com/llvm/llvm-project/blob/c7eea85b8046660f0a1fd4a1c5c41b44475f8caf/clang/lib/CodeGen/Targets/PPC.cpp#L998
and it uses the implementation here
https://github.com/llvm/llvm-project/blob/c7eea85b8046660f0a1fd4a1c5c41b44475f8caf/clang/lib/CodeGen/Targets/PPC.cpp#L765
The logic falls through to the last line for
i128, returning8. Forf128it curiously does use an alignment of 16, so I've just added that already because it would be easy to miss down the line.View changes since the review
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's Weird. Kinda almost want to ask if that's intentional. I suppose this is currently correct, at least...? ...but does gcc agree...?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GCC does agree, see https://godbolt.org/z/eW3daf7rG. 24 is added to the pointer, with an alignment of 16 you'd expect to see 32 to be added. 24 = 8 (for the u32 plus padding) + 16 (for the i128).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah thanks for digging that up! Such a weird assembly language, to notate immediate adds with their own instruction and use juxtaposition for addition...