Skip to content

Commit cd848fb

Browse files
committed
Implement aarch64 PMULL LLVM intrinsics
1 parent ac09e49 commit cd848fb

2 files changed

Lines changed: 99 additions & 0 deletions

File tree

example/neon.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,27 @@ unsafe fn test_vsha256su1q_u32() {
354354
assert_eq!(r, e);
355355
}
356356

357+
#[cfg(target_arch = "aarch64")]
358+
#[target_feature(enable = "aes")]
359+
fn test_vmull_p64() {
360+
// AArch64 llvm intrinsic: llvm.aarch64.neon.pmull64
361+
let a: u64 = 3;
362+
let b: u64 = 6;
363+
let e: u128 = 10;
364+
let r: u128 = vmull_p64(a, b);
365+
assert_eq!(r, e);
366+
}
367+
368+
#[cfg(target_arch = "aarch64")]
369+
unsafe fn test_vmull_p8() {
370+
// AArch64 llvm intrinsic: llvm.aarch64.neon.pmull.v8i16
371+
let a = u8x8::from([0, 1, 2, 3, 4, 5, 6, 7]);
372+
let b = u8x8::from([8, 9, 10, 11, 12, 13, 14, 15]);
373+
let e = u16x8::from([0x0000, 0x0009, 0x0014, 0x001d, 0x0030, 0x0039, 0x0024, 0x002d]);
374+
let r: u16x8 = unsafe { transmute(vmull_p8(transmute(a), transmute(b))) };
375+
assert_eq!(r, e);
376+
}
377+
357378
#[cfg(target_arch = "aarch64")]
358379
fn main() {
359380
unsafe {
@@ -398,6 +419,9 @@ fn main() {
398419
test_vsha256h2q_u32();
399420
test_vsha256su0q_u32();
400421
test_vsha256su1q_u32();
422+
423+
test_vmull_p64();
424+
test_vmull_p8();
401425
}
402426
}
403427

src/intrinsics/llvm_aarch64.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,81 @@ pub(super) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
712712
);
713713
}
714714

715+
"llvm.aarch64.neon.pmull64" => {
716+
intrinsic_args!(fx, args => (a, b); intrinsic);
717+
718+
let a = a.load_scalar(fx);
719+
let b = b.load_scalar(fx);
720+
721+
codegen_inline_asm_inner(
722+
fx,
723+
&[InlineAsmTemplatePiece::String(
724+
"fmov d0, x0
725+
fmov d1, x1
726+
pmull v0.1q, v0.1d, v1.1d"
727+
.into(),
728+
)],
729+
&[
730+
CInlineAsmOperand::Out {
731+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
732+
AArch64InlineAsmReg::v0,
733+
)),
734+
late: true,
735+
place: Some(ret),
736+
},
737+
CInlineAsmOperand::In {
738+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
739+
AArch64InlineAsmReg::x0,
740+
)),
741+
value: a,
742+
},
743+
CInlineAsmOperand::In {
744+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
745+
AArch64InlineAsmReg::x1,
746+
)),
747+
value: b,
748+
},
749+
CInlineAsmOperand::Out {
750+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
751+
AArch64InlineAsmReg::v1,
752+
)),
753+
late: true,
754+
place: None,
755+
},
756+
],
757+
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
758+
);
759+
}
760+
761+
"llvm.aarch64.neon.pmull.v8i16" => {
762+
intrinsic_args!(fx, args => (a, b); intrinsic);
763+
764+
let a = a.load_scalar(fx);
765+
let b = b.load_scalar(fx);
766+
767+
codegen_inline_asm_inner(
768+
fx,
769+
&[InlineAsmTemplatePiece::String("pmull v0.8h, v0.8b, v1.8b".into())],
770+
&[
771+
CInlineAsmOperand::InOut {
772+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
773+
AArch64InlineAsmReg::v0,
774+
)),
775+
_late: true,
776+
in_value: a,
777+
out_place: Some(ret),
778+
},
779+
CInlineAsmOperand::In {
780+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
781+
AArch64InlineAsmReg::v1,
782+
)),
783+
value: b,
784+
},
785+
],
786+
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
787+
);
788+
}
789+
715790
_ => {
716791
fx.tcx.dcx().warn(format!(
717792
"unsupported AArch64 llvm intrinsic {}; replacing with trap",

0 commit comments

Comments
 (0)