rust-lang · folkertdev · Apr 17, 2026 · Apr 25, 2026 · folkertdev · Apr 21, 2026
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -3,8 +3,8 @@ use std::ffi::c_uint;
 use std::{assert_matches, iter, ptr};
 
 use rustc_abi::{
-    AddressSpace, Align, BackendRepr, CVariadicStatus, Float, HasDataLayout, Integer,
-    NumScalableVectors, Primitive, Size, WrappingRange,
+    AddressSpace, Align, BackendRepr, CVariadicStatus, Float, HasDataLayout, NumScalableVectors,
+    Primitive, Size, WrappingRange,
 };
 use rustc_codegen_ssa::RetagInfo;
 use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh};
@@ -315,11 +315,6 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     Primitive::Pointer(_) => {
                         // Pointers are always OK.
                     }
-                    Primitive::Int(Integer::I128, _) => {
-                        // FIXME: maybe we should support these? At least on 32-bit powerpc
-                        // the logic in LLVM does not handle i128 correctly though.
-                        bug!("the va_arg intrinsic does not support `i128`/`u128`")
-                    }
                     Primitive::Int(..) => {
                         let int_width = self.cx().size_of(result_layout.ty).bits();
                         let target_c_int_width = self.cx().sess().target.options.c_int_width;

diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs
@@ -1,5 +1,6 @@
-use rustc_abi::{Align, BackendRepr, CVariadicStatus, Endian, HasDataLayout, Primitive, Size};
-use rustc_codegen_ssa::MemFlags;
+use rustc_abi::{
+    Align, BackendRepr, CVariadicStatus, Endian, Float, HasDataLayout, Integer, Primitive, Size,
+};
 use rustc_codegen_ssa::common::IntPredicate;
 use rustc_codegen_ssa::mir::operand::OperandRef;
 use rustc_codegen_ssa::traits::{
@@ -77,6 +78,35 @@ fn emit_direct_ptr_va_arg<'ll, 'tcx>(
     }
 }
 
+/// Some backends apply special alignment rules to c-variadic arguments.
+fn get_param_type_alignment<'ll, 'tcx>(
+    bx: &mut Builder<'_, 'll, 'tcx>,
+    layout: TyAndLayout<'tcx>,
+) -> Align {
+    let BackendRepr::Scalar(scalar) = layout.backend_repr else {
+        bug!("unexpected backend repr {:?}", layout.backend_repr);
+    };
+
+    match bx.cx.tcx.sess.target.arch {
+        Arch::PowerPC64 => match scalar.primitive() {
+            Primitive::Int(integer, _) => match integer {
+                Integer::I8 | Integer::I16 => unreachable!(),
+                Integer::I32 | Integer::I64 => { /* fall through */ }
+                Integer::I128 => return Align::EIGHT,
+            },
+            Primitive::Float(float) => match float {
+                Float::F16 | Float::F32 => unreachable!(),
+                Float::F64 => { /* fall through */ }
+                Float::F128 => return Align::from_bytes(16).unwrap(),
+            },
+            Primitive::Pointer(_) => { /* fall through */ }
+        },
+        _ => { /* fall through */ }
+    }
+
+    layout.align.abi
+}
+
 enum PassMode {
     Direct,
     Indirect,
@@ -136,23 +166,23 @@ fn emit_ptr_va_arg<'ll, 'tcx>(
         (
             bx.cx.layout_of(Ty::new_imm_ptr(bx.cx.tcx, target_ty)).llvm_type(bx.cx),
             bx.cx.data_layout().pointer_size(),
-            bx.cx.data_layout().pointer_align(),
+            bx.cx.data_layout().pointer_align().abi,
         )
     } else {
-        (layout.llvm_type(bx.cx), layout.size, layout.align)
+        (layout.llvm_type(bx.cx), layout.size, get_param_type_alignment(bx, layout))
     };
     let (addr, addr_align) = emit_direct_ptr_va_arg(
         bx,
         list,
         size,
-        align.abi,
+        align,
         slot_size,
         allow_higher_align,
         force_right_adjust,
     );
     if indirect {
         let tmp_ret = bx.load(llty, addr, addr_align);
-        bx.load(layout.llvm_type(bx.cx), tmp_ret, align.abi)
+        bx.load(layout.llvm_type(bx.cx), tmp_ret, align)
     } else {
         bx.load(llty, addr, addr_align)
     }
@@ -585,8 +615,10 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
     // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
     // l->fp_offset > 176 - num_fp * 16 go to step 7.
 
+    // We support x86_64-unknown-linux-gnux32 which uses 4-byte pointers.
     let unsigned_int_offset = 4;
-    let ptr_offset = 8;
+    let ptr_offset = bx.tcx().data_layout.pointer_size().bytes();
+
     let gp_offset_ptr = va_list_addr;
     let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
 
@@ -660,7 +692,7 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
                     let reg_hi_addr = bx.inbounds_ptradd(reg_lo_addr, bx.const_i32(16));
 
                     let align = layout.layout.align().abi;
-                    let tmp = bx.alloca(layout.layout.size(), align);
+                    let tmp = bx.alloca(layout.size, layout.align.abi);
 
                     let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
                     let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
@@ -683,7 +715,7 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
                         Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr),
                     };
 
-                    let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
+                    let tmp = bx.alloca(layout.size, layout.align.abi);
 
                     let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
                     let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
@@ -751,16 +783,12 @@ fn copy_to_temporary_if_more_aligned<'ll, 'tcx>(
     src_align: Align,
 ) -> &'ll Value {
     if layout.layout.align.abi > src_align {
-        let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
-        bx.memcpy(
-            tmp,
-            layout.layout.align.abi,
-            reg_addr,
-            src_align,
-            bx.const_u32(layout.layout.size().bytes() as u32),
-            MemFlags::empty(),
-            None,
-        );
+        assert!(layout.ty.is_integral());
+
+        // A memcpy below optimizes poorly for 128-bit integers.
+        let tmp = bx.alloca(layout.size, layout.align.abi);
+        let val = bx.load(layout.llvm_type(bx), reg_addr, src_align);
+        bx.store(val, tmp, layout.align.abi);
         tmp
     } else {
         reg_addr
@@ -782,9 +810,14 @@ fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
     // byte boundary if alignment needed by type exceeds 8 byte boundary.
     // It isn't stated explicitly in the standard, but in practice we use
     // alignment greater than 16 where necessary.
-    if layout.layout.align.bytes() > 8 {
-        unreachable!("all instances of VaArgSafe have an alignment <= 8");
-    }
+    // The AMD64 psABI leaves unspecified what to do for alignments above 16, but
+    // this behavior for 32+ alignment matches clang.
+    // It currently (2026 July) can only occur for 16-byte-aligned types.
+    let overflow_arg_area_v = if layout.layout.align.bytes() > 8 {
+        round_pointer_up_to_alignment(bx, overflow_arg_area_v, layout.layout.align.abi)
+    } else {
+        overflow_arg_area_v
+    };
 
     // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
     let mem_addr = overflow_arg_area_v;
@@ -1071,7 +1104,8 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
         Arch::AArch64 => emit_aapcs_va_arg(bx, addr, target_ty),
         Arch::Arm => {
             // Types wider than 16 bytes are not currently supported. Clang has special logic for
-            // such types, but `VaArgSafe` is not implemented for any type that is this large.
+            // such types, but `VaArgSafe` is not implemented for any type that is this large on
+            // arm (i.e. 32-bit) targets.
             assert!(bx.cx.size_of(target_ty).bytes() <= 16);
 
             emit_ptr_va_arg(
@@ -1093,6 +1127,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
             PassMode::Direct,
             SlotSize::Bytes8,
             AllowHigherAlign::Yes,
+            // ForceRightAdjust only takes effect on big-endian architectures.
             ForceRightAdjust::Yes,
         ),
         Arch::RiscV32 if target.llvm_abiname == LlvmAbi::Ilp32e => {

diff --git a/library/core/src/ffi/va_list.rs b/library/core/src/ffi/va_list.rs
@@ -299,6 +299,9 @@ const impl<'f> Drop for VaList<'f> {
 /// and [`c_float`] is promoted to [`c_double`]. Implementing this trait for types that are
 /// subject to this promotion rule is invalid.
 ///
+/// This trait is only implemented for 128-bit integers when the platform defines the `__int128`
+/// type.
+///
 /// [`c_int`]: core::ffi::c_int
 /// [`c_long`]: core::ffi::c_long
 /// [`c_longlong`]: core::ffi::c_longlong
@@ -310,8 +313,8 @@ const impl<'f> Drop for VaList<'f> {
 /// [`c_float`]: core::ffi::c_float
 /// [`c_double`]: core::ffi::c_double
 // We may unseal this trait in the future, but currently our `va_arg` implementations don't support
-// types with an alignment larger than 8, or with a non-scalar layout. Inline assembly can be used
-// to accept unsupported types in the meantime.
+// types with a non-scalar layout. Inline assembly can be used to accept unsupported types in the
+// meantime.
 #[lang = "va_arg_safe"]
 pub impl(self) unsafe trait VaArgSafe: Copy {}
 
@@ -352,6 +355,61 @@ unsafe impl VaArgSafe for u32 {}
 unsafe impl VaArgSafe for u64 {}
 unsafe impl VaArgSafe for usize {}
 
+// Implement `VaArgSafe` for 128-bit integers on targets where clang provides `__int128`.
+//
+// GCC does not implement `__int128` for any 16-bit/32-bit target:
+//
+// https://gcc.gnu.org/onlinedocs/gcc-15.2.0/gcc/_005f_005fint128.html
+//
+// > There is no support in GCC for expressing an integer constant of type __int128 for targets
+// > with long long integer less than 128 bits wide.
+//
+// Per https://learn.microsoft.com/en-us/cpp/cpp/data-type-ranges?view=msvc-170, MSVC does not
+// define `__int128`.
+//
+// Clang is slightly more permissive: it defines `__int128` on wasm32 (a 32-bit target) and also
+// does provide `__int128` on 64-bit `*-pc-windows-msvc`, and we follow suit.
+cfg_select! {
+    any(
+        target_arch = "aarch64",
+        target_arch = "amdgpu",
+        target_arch = "arm64ec",
+        target_arch = "bpf",
+        target_arch = "loongarch64",
+        target_arch = "mips64",
+        target_arch = "mips64r6",
+        target_arch = "nvptx64",
+        target_arch = "powerpc64",
+        target_arch = "riscv64",
+        target_arch = "s390x",
+        target_arch = "sparc64",
+        target_arch = "wasm32",
+        target_arch = "wasm64",
+        target_arch = "x86_64",
+    ) => {
+        #[cfg(not(any(target_arch = "wasm32", target_abi = "x32", target_pointer_width = "64")))]
+        compile_error!("unexpected target architecture for 128-bit c-variadic");
+
+        #[unstable_feature_bound(c_variadic_int128)]
+        #[unstable(feature = "c_variadic_int128", issue = "155752")]
+        unsafe impl VaArgSafe for i128 {}
+        #[unstable_feature_bound(c_variadic_int128)]
+        #[unstable(feature = "c_variadic_int128", issue = "155752")]
+        unsafe impl VaArgSafe for u128 {}
+    }
+    _ => {
+        #[repr(transparent)]
+        #[derive(Clone, Copy)]
+        struct S(i32);
+
+        // When there are no actual implementations on i128, declare the c_variadic_int128 feature
+        // on a private type so that the feature is defined on all targets.
+        #[unstable(feature = "c_variadic_int128", issue = "155752")]
+        unsafe impl VaArgSafe for S {}
+
+    }
+}
+
 unsafe impl VaArgSafe for f64 {}
 
 unsafe impl<T> VaArgSafe for *mut T {}