Skip to content

Commit 585ad90

Browse files
committed
cg_llvm: sve_tuple_{create,get,set} intrinsics
Clang changed from representing tuples of scalable vectors as structs rather than as wide vectors (that is, scalable vector types where the `N` part of the `<vscale x N x ty>` type was multiplied by the number of vectors). rustc mirrored this in the initial implementation of scalable vectors. Earlier versions of our patches used the wide vector representation and our intrinsic patches used the legacy `llvm.aarch64.sve.tuple.{create,get,set}{2,3,4}` intrinsics for creating these tuples/getting/setting the vectors, which were only supported due to LLVM's `AutoUpgrade` pass converting these intrinsics into `llvm.vector.insert`. `AutoUpgrade` only supports these legacy intrinsics with the wide vector representation. With the current struct representation, Clang has special handling in codegen for generating `insertvalue`/`extractvalue` instructions for these operations, which must be replicated by rustc's codegen for our intrinsics to use. This patch implements new intrinsics in `core::intrinsics::scalable` (mirroring the structure of `core::intrinsics::simd`) which rustc lowers to the appropriate `insertvalue`/`extractvalue` instructions.
1 parent 2ee725b commit 585ad90

7 files changed

Lines changed: 283 additions & 2 deletions

File tree

compiler/rustc_codegen_gcc/src/builder.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ use rustc_data_structures::fx::FxHashSet;
2424
use rustc_middle::bug;
2525
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
2626
use rustc_middle::ty::layout::{
27-
FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasTyCtxt, HasTypingEnv, LayoutError, LayoutOfHelpers, TyAndLayout,
27+
FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasTyCtxt, HasTypingEnv, LayoutError,
28+
LayoutOfHelpers, TyAndLayout,
2829
};
2930
use rustc_middle::ty::{self, AtomicOrdering, Instance, Ty, TyCtxt};
3031
use rustc_span::Span;

compiler/rustc_codegen_llvm/src/intrinsic.rs

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ use std::ffi::c_uint;
33
use std::ptr;
44

55
use rustc_abi::{
6-
Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size, WrappingRange,
6+
Align, BackendRepr, ExternAbi, Float, HasDataLayout, NumScalableVectors, Primitive, Size,
7+
WrappingRange,
78
};
89
use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh};
910
use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
@@ -581,6 +582,98 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
581582
self.pointercast(val, self.type_ptr())
582583
}
583584

585+
sym::sve_tuple_create2 => {
586+
assert_matches!(
587+
self.layout_of(fn_args.type_at(0)).backend_repr,
588+
BackendRepr::ScalableVector { number_of_vectors: NumScalableVectors::One, .. }
589+
);
590+
let tuple_ty = self.layout_of(fn_args.type_at(1));
591+
assert_matches!(
592+
tuple_ty.backend_repr,
593+
BackendRepr::ScalableVector { number_of_vectors: NumScalableVectors::Two, .. }
594+
);
595+
let ret = self.const_poison(self.backend_type(tuple_ty));
596+
let ret = self.insert_value(ret, args[0].immediate(), 0);
597+
self.insert_value(ret, args[1].immediate(), 1)
598+
}
599+
600+
sym::sve_tuple_create3 => {
601+
assert_matches!(
602+
self.layout_of(fn_args.type_at(0)).backend_repr,
603+
BackendRepr::ScalableVector { number_of_vectors: NumScalableVectors::One, .. }
604+
);
605+
let tuple_ty = self.layout_of(fn_args.type_at(1));
606+
assert_matches!(
607+
tuple_ty.backend_repr,
608+
BackendRepr::ScalableVector {
609+
number_of_vectors: NumScalableVectors::Three,
610+
..
611+
}
612+
);
613+
let ret = self.const_poison(self.backend_type(tuple_ty));
614+
let ret = self.insert_value(ret, args[0].immediate(), 0);
615+
let ret = self.insert_value(ret, args[1].immediate(), 1);
616+
self.insert_value(ret, args[2].immediate(), 2)
617+
}
618+
619+
sym::sve_tuple_create4 => {
620+
assert_matches!(
621+
self.layout_of(fn_args.type_at(0)).backend_repr,
622+
BackendRepr::ScalableVector { number_of_vectors: NumScalableVectors::One, .. }
623+
);
624+
let tuple_ty = self.layout_of(fn_args.type_at(1));
625+
assert_matches!(
626+
tuple_ty.backend_repr,
627+
BackendRepr::ScalableVector { number_of_vectors: NumScalableVectors::Four, .. }
628+
);
629+
let ret = self.const_poison(self.backend_type(tuple_ty));
630+
let ret = self.insert_value(ret, args[0].immediate(), 0);
631+
let ret = self.insert_value(ret, args[1].immediate(), 1);
632+
let ret = self.insert_value(ret, args[2].immediate(), 2);
633+
self.insert_value(ret, args[3].immediate(), 3)
634+
}
635+
636+
sym::sve_tuple_get => {
637+
assert_matches!(
638+
self.layout_of(fn_args.type_at(0)).backend_repr,
639+
BackendRepr::ScalableVector {
640+
number_of_vectors: NumScalableVectors::Two
641+
| NumScalableVectors::Three
642+
| NumScalableVectors::Four,
643+
..
644+
}
645+
);
646+
assert_matches!(
647+
self.layout_of(fn_args.type_at(1)).backend_repr,
648+
BackendRepr::ScalableVector { number_of_vectors: NumScalableVectors::One, .. }
649+
);
650+
self.extract_value(
651+
args[0].immediate(),
652+
fn_args.const_at(2).to_leaf().to_i32() as u64,
653+
)
654+
}
655+
656+
sym::sve_tuple_set => {
657+
assert_matches!(
658+
self.layout_of(fn_args.type_at(0)).backend_repr,
659+
BackendRepr::ScalableVector {
660+
number_of_vectors: NumScalableVectors::Two
661+
| NumScalableVectors::Three
662+
| NumScalableVectors::Four,
663+
..
664+
}
665+
);
666+
assert_matches!(
667+
self.layout_of(fn_args.type_at(1)).backend_repr,
668+
BackendRepr::ScalableVector { number_of_vectors: NumScalableVectors::One, .. }
669+
);
670+
self.insert_value(
671+
args[0].immediate(),
672+
args[1].immediate(),
673+
fn_args.const_at(2).to_leaf().to_i32() as u64,
674+
)
675+
}
676+
584677
_ if name.as_str().starts_with("simd_") => {
585678
// Unpack non-power-of-2 #[repr(packed, simd)] arguments.
586679
// This gives them the expected layout of a regular #[repr(simd)] vector.

compiler/rustc_hir_analysis/src/check/intrinsic.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,12 @@ pub(crate) fn check_intrinsic_type(
785785
sym::simd_shuffle => (3, 0, vec![param(0), param(0), param(1)], param(2)),
786786
sym::simd_shuffle_const_generic => (2, 1, vec![param(0), param(0)], param(1)),
787787

788+
sym::sve_tuple_create2 => (2, 0, vec![param(0), param(0)], param(1)),
789+
sym::sve_tuple_create3 => (2, 0, vec![param(0), param(0), param(0)], param(1)),
790+
sym::sve_tuple_create4 => (2, 0, vec![param(0), param(0), param(0), param(0)], param(1)),
791+
sym::sve_tuple_get => (2, 1, vec![param(0)], param(1)),
792+
sym::sve_tuple_set => (2, 1, vec![param(0), param(1)], param(0)),
793+
788794
sym::atomic_cxchg | sym::atomic_cxchgweak => (
789795
1,
790796
2,

compiler/rustc_span/src/symbol.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1984,6 +1984,11 @@ symbols! {
19841984
suggestion,
19851985
super_let,
19861986
supertrait_item_shadowing,
1987+
sve_tuple_create2,
1988+
sve_tuple_create3,
1989+
sve_tuple_create4,
1990+
sve_tuple_get,
1991+
sve_tuple_set,
19871992
sym,
19881993
sync,
19891994
synthetic,
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
//!
33
//! In this module, a "vector" is any `repr(simd)` type.
44
5+
pub mod scalable;
6+
57
use crate::marker::ConstParamTy;
68

79
/// Inserts an element into a vector, returning the updated vector.
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
//! Scalable vector compiler intrinsics.
2+
//!
3+
//! In this module, a "vector" is any `#[rustc_scalable_vector]`-annotated type.
4+
5+
/// Create a tuple of two vectors.
6+
///
7+
/// `SVecTup` must be a scalable vector tuple (`#[rustc_scalable_vector]`) and `SVec` must be a
8+
/// scalable vector (`#[rustc_scalable_vector(N)]`). `SVecTup` must be a tuple of vectors of
9+
/// type `SVec`.
10+
///
11+
/// Corresponds to Clang's `__builtin_sve_svcreate2*` builtins.
12+
#[rustc_nounwind]
13+
#[rustc_intrinsic]
14+
pub unsafe fn sve_tuple_create2<SVec, SVecTup>(x0: SVec, x1: SVec) -> SVecTup;
15+
16+
/// Create a tuple of three vectors.
17+
///
18+
/// `SVecTup` must be a scalable vector tuple (`#[rustc_scalable_vector]`) and `SVec` must be a
19+
/// scalable vector (`#[rustc_scalable_vector(N)]`). `SVecTup` must be a tuple of vectors of
20+
/// type `SVec`.
21+
///
22+
/// Corresponds to Clang's `__builtin_sve_svcreate3*` builtins.
23+
#[cfg(target_arch = "aarch64")]
24+
#[rustc_intrinsic]
25+
#[rustc_nounwind]
26+
#[target_feature(enable = "sve")]
27+
pub unsafe fn sve_tuple_create3<SVec, SVecTup>(x0: SVec, x1: SVec, x2: SVec) -> SVecTup;
28+
29+
/// Create a tuple of four vectors.
30+
///
31+
/// `SVecTup` must be a scalable vector tuple (`#[rustc_scalable_vector]`) and `SVec` must be a
32+
/// scalable vector (`#[rustc_scalable_vector(N)]`). `SVecTup` must be a tuple of vectors of
33+
/// type `SVec`.
34+
///
35+
/// Corresponds to Clang's `__builtin_sve_svcreate4*` builtins.
36+
#[cfg(target_arch = "aarch64")]
37+
#[rustc_intrinsic]
38+
#[rustc_nounwind]
39+
#[target_feature(enable = "sve")]
40+
pub unsafe fn sve_tuple_create4<SVec, SVecTup>(x0: SVec, x1: SVec, x2: SVec, x3: SVec) -> SVecTup;
41+
42+
/// Get one vector from a tuple of vectors.
43+
///
44+
/// `SVecTup` must be a scalable vector tuple (`#[rustc_scalable_vector]`) and `SVec` must be a
45+
/// scalable vector (`#[rustc_scalable_vector(N)]`). `SVecTup` must be a tuple of vectors of
46+
/// type `SVec`.
47+
///
48+
/// Corresponds to Clang's `__builtin_sve_svget*` builtins.
49+
///
50+
/// # Safety
51+
///
52+
/// `IDX` must be in-bounds of the tuple.
53+
#[cfg(target_arch = "aarch64")]
54+
#[rustc_intrinsic]
55+
#[rustc_nounwind]
56+
#[target_feature(enable = "sve")]
57+
pub unsafe fn sve_tuple_get<SVecTup, SVec, const IDX: i32>(tuple: SVecTup) -> SVec;
58+
59+
/// Change one vector in a tuple of vectors.
60+
///
61+
/// `SVecTup` must be a scalable vector tuple (`#[rustc_scalable_vector]`) and `SVec` must be a
62+
/// scalable vector (`#[rustc_scalable_vector(N)]`). `SVecTup` must be a tuple of vectors of
63+
/// type `SVec`.
64+
///
65+
/// Corresponds to Clang's `__builtin_sve_svset*` builtins.
66+
///
67+
/// # Safety
68+
///
69+
/// `IDX` must be in-bounds of the tuple.
70+
#[cfg(target_arch = "aarch64")]
71+
#[rustc_intrinsic]
72+
#[rustc_nounwind]
73+
#[target_feature(enable = "sve")]
74+
pub unsafe fn sve_tuple_set<SVecTup, SVec, const IDX: i32>(tuple: SVecTup, x: SVec) -> SVecTup;
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
//@ build-pass
2+
//@ only-aarch64
3+
#![crate_type = "lib"]
4+
#![allow(incomplete_features, internal_features)]
5+
#![feature(abi_unadjusted, core_intrinsics, link_llvm_intrinsics, rustc_attrs)]
6+
7+
// Tests that tuples of scalable vectors are passed as immediates and that the intrinsics for
8+
// creating/getting/setting tuples of scalable vectors generate the correct assembly
9+
10+
#[derive(Copy, Clone)]
11+
#[rustc_scalable_vector(4)]
12+
#[allow(non_camel_case_types)]
13+
pub struct svfloat32_t(f32);
14+
15+
#[derive(Copy, Clone)]
16+
#[rustc_scalable_vector]
17+
#[allow(non_camel_case_types)]
18+
pub struct svfloat32x2_t(svfloat32_t, svfloat32_t);
19+
20+
#[derive(Copy, Clone)]
21+
#[rustc_scalable_vector]
22+
#[allow(non_camel_case_types)]
23+
pub struct svfloat32x3_t(svfloat32_t, svfloat32_t, svfloat32_t);
24+
25+
#[derive(Copy, Clone)]
26+
#[rustc_scalable_vector]
27+
#[allow(non_camel_case_types)]
28+
pub struct svfloat32x4_t(svfloat32_t, svfloat32_t, svfloat32_t, svfloat32_t);
29+
30+
#[inline(never)]
31+
#[target_feature(enable = "sve")]
32+
pub fn svdup_n_f32(op: f32) -> svfloat32_t {
33+
extern "C" {
34+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.x.nxv4f32")]
35+
fn _svdup_n_f32(op: f32) -> svfloat32_t;
36+
}
37+
unsafe { _svdup_n_f32(op) }
38+
}
39+
40+
// CHECK: define { <vscale x 4 x float>, <vscale x 4 x float> } @svcreate2_f32(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1)
41+
#[no_mangle]
42+
#[target_feature(enable = "sve")]
43+
pub fn svcreate2_f32(x0: svfloat32_t, x1: svfloat32_t) -> svfloat32x2_t {
44+
// CHECK: %1 = insertvalue { <vscale x 4 x float>, <vscale x 4 x float> } poison, <vscale x 4 x float> %x0, 0
45+
// CHECK-NEXT: %2 = insertvalue { <vscale x 4 x float>, <vscale x 4 x float> } %1, <vscale x 4 x float> %x1, 1
46+
unsafe { std::intrinsics::simd::scalable::sve_tuple_create2(x0, x1) }
47+
}
48+
49+
// CHECK: define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @svcreate3_f32(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2)
50+
#[no_mangle]
51+
#[target_feature(enable = "sve")]
52+
pub fn svcreate3_f32(x0: svfloat32_t, x1: svfloat32_t, x2: svfloat32_t) -> svfloat32x3_t {
53+
// CHECK-LABEL: @_RNvCsk3YxfLN8zWY_6tuples13svcreate3_f32
54+
// CHECK: %1 = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } poison, <vscale x 4 x float> %x0, 0
55+
// CHECK-NEXT: %2 = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, <vscale x 4 x float> %x1, 1
56+
// CHECK-NEXT: %3 = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %2, <vscale x 4 x float> %x2, 2
57+
unsafe { std::intrinsics::simd::scalable::sve_tuple_create3(x0, x1, x2) }
58+
}
59+
60+
// CHECK: define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @svcreate4_f32(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2, <vscale x 4 x float> %x3)
61+
#[no_mangle]
62+
#[target_feature(enable = "sve")]
63+
pub fn svcreate4_f32(
64+
x0: svfloat32_t,
65+
x1: svfloat32_t,
66+
x2: svfloat32_t,
67+
x3: svfloat32_t,
68+
) -> svfloat32x4_t {
69+
// CHECK-LABEL: @_RNvCsk3YxfLN8zWY_6tuples13svcreate4_f32
70+
// CHECK: %1 = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } poison, <vscale x 4 x float> %x0, 0
71+
// CHECK-NEXT: %2 = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, <vscale x 4 x float> %x1, 1
72+
// CHECK-NEXT: %3 = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %2, <vscale x 4 x float> %x2, 2
73+
// CHECK-NEXT: %4 = insertvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %3, <vscale x 4 x float> %x3, 3
74+
unsafe { std::intrinsics::simd::scalable::sve_tuple_create4(x0, x1, x2, x3) }
75+
}
76+
77+
// CHECK: define <vscale x 4 x float> @svget2_f32({ <vscale x 4 x float>, <vscale x 4 x float> } %tup)
78+
#[no_mangle]
79+
#[target_feature(enable = "sve")]
80+
pub fn svget2_f32<const IDX: i32>(tup: svfloat32x2_t) -> svfloat32_t {
81+
// CHECK: %1 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %tup, 0
82+
unsafe { std::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IDX }>(tup) }
83+
}
84+
85+
// CHECK: define { <vscale x 4 x float>, <vscale x 4 x float> } @svset2_f32({ <vscale x 4 x float>, <vscale x 4 x float> } %tup, <vscale x 4 x float> %x)
86+
#[no_mangle]
87+
#[target_feature(enable = "sve")]
88+
pub fn svset2_f32<const IDX: i32>(tup: svfloat32x2_t, x: svfloat32_t) -> svfloat32x2_t {
89+
// CHECK: %1 = insertvalue { <vscale x 4 x float>, <vscale x 4 x float> } %tup, <vscale x 4 x float> %x, 0
90+
unsafe { std::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IDX }>(tup, x) }
91+
}
92+
93+
// This function exists only so there are calls to the generic functions
94+
#[target_feature(enable = "sve")]
95+
pub fn test() {
96+
let x = svdup_n_f32(2f32);
97+
let tup = svcreate2_f32(x, x);
98+
let x = svget2_f32::<0>(tup);
99+
let tup = svset2_f32::<0>(tup, x);
100+
}

0 commit comments

Comments
 (0)