Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use std::ffi::c_uint;
use std::{assert_matches, iter, ptr};

use rustc_abi::{
AddressSpace, Align, BackendRepr, CVariadicStatus, Float, HasDataLayout, Integer,
NumScalableVectors, Primitive, Size, WrappingRange,
AddressSpace, Align, BackendRepr, CVariadicStatus, Float, HasDataLayout, NumScalableVectors,
Primitive, Size, WrappingRange,
};
use rustc_codegen_ssa::RetagInfo;
use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh};
Expand Down Expand Up @@ -315,11 +315,6 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
Primitive::Pointer(_) => {
// Pointers are always OK.
}
Primitive::Int(Integer::I128, _) => {
// FIXME: maybe we should support these? At least on 32-bit powerpc
// the logic in LLVM does not handle i128 correctly though.
bug!("the va_arg intrinsic does not support `i128`/`u128`")
}
Primitive::Int(..) => {
let int_width = self.cx().size_of(result_layout.ty).bits();
let target_c_int_width = self.cx().sess().target.options.c_int_width;
Expand Down
81 changes: 58 additions & 23 deletions compiler/rustc_codegen_llvm/src/va_arg.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use rustc_abi::{Align, BackendRepr, CVariadicStatus, Endian, HasDataLayout, Primitive, Size};
use rustc_codegen_ssa::MemFlags;
use rustc_abi::{
Align, BackendRepr, CVariadicStatus, Endian, Float, HasDataLayout, Integer, Primitive, Size,
};
use rustc_codegen_ssa::common::IntPredicate;
use rustc_codegen_ssa::mir::operand::OperandRef;
use rustc_codegen_ssa::traits::{
Expand Down Expand Up @@ -77,6 +78,35 @@ fn emit_direct_ptr_va_arg<'ll, 'tcx>(
}
}

/// Some backends apply special alignment rules to c-variadic arguments.
fn get_param_type_alignment<'ll, 'tcx>(
bx: &mut Builder<'_, 'll, 'tcx>,
layout: TyAndLayout<'tcx>,
) -> Align {
let BackendRepr::Scalar(scalar) = layout.backend_repr else {
bug!("unexpected backend repr {:?}", layout.backend_repr);
};

match bx.cx.tcx.sess.target.arch {
Arch::PowerPC64 => match scalar.primitive() {
Primitive::Int(integer, _) => match integer {
Integer::I8 | Integer::I16 => unreachable!(),
Integer::I32 | Integer::I64 => { /* fall through */ }
Integer::I128 => return Align::EIGHT,

@folkertdev folkertdev Apr 21, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It has custom behavior, the va_arg implementation for powerpc64 is here

https://github.com/llvm/llvm-project/blob/c7eea85b8046660f0a1fd4a1c5c41b44475f8caf/clang/lib/CodeGen/Targets/PPC.cpp#L998

and it uses the implementation here

https://github.com/llvm/llvm-project/blob/c7eea85b8046660f0a1fd4a1c5c41b44475f8caf/clang/lib/CodeGen/Targets/PPC.cpp#L765

The logic falls through to the last line for i128, returning 8. For f128 it curiously does use an alignment of 16, so I've just added that already because it would be easy to miss down the line.

View changes since the review

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's Weird. Kinda almost want to ask if that's intentional. I suppose this is currently correct, at least...? ...but does gcc agree...?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GCC does agree, see https://godbolt.org/z/eW3daf7rG. 24 is added to the pointer, with an alignment of 16 you'd expect to see 32 to be added. 24 = 8 (for the u32 plus padding) + 16 (for the i128).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah thanks for digging that up! Such a weird assembly language, to notate immediate adds with their own instruction and use juxtaposition for addition...

},
Primitive::Float(float) => match float {
Float::F16 | Float::F32 => unreachable!(),
Float::F64 => { /* fall through */ }
Float::F128 => return Align::from_bytes(16).unwrap(),
},
Primitive::Pointer(_) => { /* fall through */ }
},
_ => { /* fall through */ }
}

layout.align.abi
}

enum PassMode {
Direct,
Indirect,
Expand Down Expand Up @@ -136,23 +166,23 @@ fn emit_ptr_va_arg<'ll, 'tcx>(
(
bx.cx.layout_of(Ty::new_imm_ptr(bx.cx.tcx, target_ty)).llvm_type(bx.cx),
bx.cx.data_layout().pointer_size(),
bx.cx.data_layout().pointer_align(),
bx.cx.data_layout().pointer_align().abi,
)
} else {
(layout.llvm_type(bx.cx), layout.size, layout.align)
(layout.llvm_type(bx.cx), layout.size, get_param_type_alignment(bx, layout))
};
let (addr, addr_align) = emit_direct_ptr_va_arg(
bx,
list,
size,
align.abi,
align,
slot_size,
allow_higher_align,
force_right_adjust,
);
if indirect {
let tmp_ret = bx.load(llty, addr, addr_align);
bx.load(layout.llvm_type(bx.cx), tmp_ret, align.abi)
bx.load(layout.llvm_type(bx.cx), tmp_ret, align)
} else {
bx.load(llty, addr, addr_align)
}
Expand Down Expand Up @@ -585,8 +615,10 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
// registers. In the case: l->gp_offset > 48 - num_gp * 8 or
// l->fp_offset > 176 - num_fp * 16 go to step 7.

// We support x86_64-unknown-linux-gnux32 which uses 4-byte pointers.

@workingjubilee workingjubilee Jun 30, 2026

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deep sigh that is... correct... I suppose...

View changes since the review

let unsigned_int_offset = 4;
let ptr_offset = 8;
let ptr_offset = bx.tcx().data_layout.pointer_size().bytes();

let gp_offset_ptr = va_list_addr;
let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));

Expand Down Expand Up @@ -660,7 +692,7 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
let reg_hi_addr = bx.inbounds_ptradd(reg_lo_addr, bx.const_i32(16));

let align = layout.layout.align().abi;
let tmp = bx.alloca(layout.layout.size(), align);
let tmp = bx.alloca(layout.size, layout.align.abi);

let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
Expand All @@ -683,7 +715,7 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr),
};

let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
let tmp = bx.alloca(layout.size, layout.align.abi);

let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
Expand Down Expand Up @@ -751,16 +783,12 @@ fn copy_to_temporary_if_more_aligned<'ll, 'tcx>(
src_align: Align,
) -> &'ll Value {
if layout.layout.align.abi > src_align {
let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
bx.memcpy(
tmp,
layout.layout.align.abi,
reg_addr,
src_align,
bx.const_u32(layout.layout.size().bytes() as u32),
MemFlags::empty(),
None,
);
assert!(layout.ty.is_integral());

// A memcpy below optimizes poorly for 128-bit integers.
let tmp = bx.alloca(layout.size, layout.align.abi);
let val = bx.load(layout.llvm_type(bx), reg_addr, src_align);
bx.store(val, tmp, layout.align.abi);
tmp
} else {
reg_addr
Expand All @@ -782,9 +810,14 @@ fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
// byte boundary if alignment needed by type exceeds 8 byte boundary.
// It isn't stated explicitly in the standard, but in practice we use
// alignment greater than 16 where necessary.
if layout.layout.align.bytes() > 8 {
unreachable!("all instances of VaArgSafe have an alignment <= 8");
}
// The AMD64 psABI leaves unspecified what to do for alignments above 16, but
// this behavior for 32+ alignment matches clang.
// It currently (2026 July) can only occur for 16-byte-aligned types.
let overflow_arg_area_v = if layout.layout.align.bytes() > 8 {
round_pointer_up_to_alignment(bx, overflow_arg_area_v, layout.layout.align.abi)
Comment thread
folkertdev marked this conversation as resolved.
} else {
overflow_arg_area_v
};
Comment on lines +816 to +820

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
  // byte boundary if alignment needed by type exceeds 8 byte boundary.
  // It isn't stated explicitly in the standard, but in practice we use
  // alignment greater than 16 where necessary.

deep sigh I'm not sure, then... large arguments like these are the point where clang's implementations often fall down and are incorrect relative to gcc re: varargs.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we only need 16, not greater than 16. For i128 I'd guess we're still allright, some people must use this e.g. for printf and so on.

Anyhow, is there anything specific you want to see here?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, that's true.

I think we're good then, yeah.


// AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
let mem_addr = overflow_arg_area_v;
Expand Down Expand Up @@ -1071,7 +1104,8 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
Arch::AArch64 => emit_aapcs_va_arg(bx, addr, target_ty),
Arch::Arm => {
// Types wider than 16 bytes are not currently supported. Clang has special logic for
// such types, but `VaArgSafe` is not implemented for any type that is this large.
// such types, but `VaArgSafe` is not implemented for any type that is this large on
// arm (i.e. 32-bit) targets.
assert!(bx.cx.size_of(target_ty).bytes() <= 16);

emit_ptr_va_arg(
Expand All @@ -1093,6 +1127,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
PassMode::Direct,
SlotSize::Bytes8,
AllowHigherAlign::Yes,
// ForceRightAdjust only takes effect on big-endian architectures.
ForceRightAdjust::Yes,
),
Arch::RiscV32 if target.llvm_abiname == LlvmAbi::Ilp32e => {
Expand Down
62 changes: 60 additions & 2 deletions library/core/src/ffi/va_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,9 @@ const impl<'f> Drop for VaList<'f> {
/// and [`c_float`] is promoted to [`c_double`]. Implementing this trait for types that are
/// subject to this promotion rule is invalid.
///
/// This trait is only implemented for 128-bit integers when the platform defines the `__int128`
/// type.
Comment thread
folkertdev marked this conversation as resolved.
///
/// [`c_int`]: core::ffi::c_int
/// [`c_long`]: core::ffi::c_long
/// [`c_longlong`]: core::ffi::c_longlong
Expand All @@ -310,8 +313,8 @@ const impl<'f> Drop for VaList<'f> {
/// [`c_float`]: core::ffi::c_float
/// [`c_double`]: core::ffi::c_double
// We may unseal this trait in the future, but currently our `va_arg` implementations don't support
// types with an alignment larger than 8, or with a non-scalar layout. Inline assembly can be used
// to accept unsupported types in the meantime.
// types with a non-scalar layout. Inline assembly can be used to accept unsupported types in the
// meantime.
#[lang = "va_arg_safe"]
pub impl(self) unsafe trait VaArgSafe: Copy {}

Expand Down Expand Up @@ -352,6 +355,61 @@ unsafe impl VaArgSafe for u32 {}
unsafe impl VaArgSafe for u64 {}
unsafe impl VaArgSafe for usize {}

// Implement `VaArgSafe` for 128-bit integers on targets where clang provides `__int128`.
//
// GCC does not implement `__int128` for any 16-bit/32-bit target:
//
// https://gcc.gnu.org/onlinedocs/gcc-15.2.0/gcc/_005f_005fint128.html
//
// > There is no support in GCC for expressing an integer constant of type __int128 for targets
// > with long long integer less than 128 bits wide.
//
// Per https://learn.microsoft.com/en-us/cpp/cpp/data-type-ranges?view=msvc-170, MSVC does not
// define `__int128`.
//
// Clang is slightly more permissive: it defines `__int128` on wasm32 (a 32-bit target) and also
// does provide `__int128` on 64-bit `*-pc-windows-msvc`, and we follow suit.
Comment on lines +358 to +371

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

calling this out: the implementation now follows clang: if it defines __int128, we impl the trait.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @Amanieu @joshtriplett if you have objections to that.

cfg_select! {
any(
target_arch = "aarch64",
target_arch = "amdgpu",
target_arch = "arm64ec",
target_arch = "bpf",
target_arch = "loongarch64",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "nvptx64",
target_arch = "powerpc64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "sparc64",
target_arch = "wasm32",
target_arch = "wasm64",
target_arch = "x86_64",
) => {
#[cfg(not(any(target_arch = "wasm32", target_abi = "x32", target_pointer_width = "64")))]
compile_error!("unexpected target architecture for 128-bit c-variadic");

#[unstable_feature_bound(c_variadic_int128)]
#[unstable(feature = "c_variadic_int128", issue = "155752")]
unsafe impl VaArgSafe for i128 {}
#[unstable_feature_bound(c_variadic_int128)]
#[unstable(feature = "c_variadic_int128", issue = "155752")]

@workingjubilee workingjubilee Jun 30, 2026

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems fine since it has its own feature gate.

View changes since the review

unsafe impl VaArgSafe for u128 {}
}
_ => {
#[repr(transparent)]
#[derive(Clone, Copy)]
struct S(i32);

// When there are no actual implementations on i128, declare the c_variadic_int128 feature
// on a private type so that the feature is defined on all targets.
#[unstable(feature = "c_variadic_int128", issue = "155752")]
unsafe impl VaArgSafe for S {}

}
}

unsafe impl VaArgSafe for f64 {}

unsafe impl<T> VaArgSafe for *mut T {}
Expand Down
Loading
Loading