Skip to content

Commit 98e033d

Browse files
authored
Merge pull request #2058 from RalfJung/s390x-minmax
s390x: use llvm intrinsics instead of simd_fmin/fmax
2 parents abbf776 + 454668b commit 98e033d

1 file changed

Lines changed: 31 additions & 4 deletions

File tree

crates/core_arch/src/s390x/vector.rs

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,20 @@ unsafe extern "unadjusted" {
335335
#[link_name = "llvm.s390.vcfn"] fn vcfn(a: vector_signed_short, immarg: i32) -> vector_signed_short;
336336
#[link_name = "llvm.s390.vcnf"] fn vcnf(a: vector_signed_short, immarg: i32) -> vector_signed_short;
337337
#[link_name = "llvm.s390.vcrnfs"] fn vcrnfs(a: vector_float, b: vector_float, immarg: i32) -> vector_signed_short;
338+
339+
// These are the intrinsics we'd like to use (with mode 0). However, they require
340+
// "vector-enhancements-1" and don't have a fallback, whereas `vec_min`/`vec_max` should be
341+
// available with just "vector". Therefore, we cannot use them.
342+
// #[link_name = "llvm.s390.vfmaxsb"] fn vfmaxsb(a: vector_float, b: vector_float, mode: i32) -> vector_float;
343+
// #[link_name = "llvm.s390.vfmaxdb"] fn vfmaxdb(a: vector_double, b: vector_double, mode: i32) -> vector_double;
344+
// #[link_name = "llvm.s390.vfminsb"] fn vfminsb(a: vector_float, b: vector_float, mode: i32) -> vector_float;
345+
// #[link_name = "llvm.s390.vfmindb"] fn vfmindb(a: vector_double, b: vector_double, mode: i32) -> vector_double;
346+
// Instead, we use "portable" LLVM intrinsics -- even though those have the wrong semantics
347+
// (https://github.com/rust-lang/stdarch/issues/2060), they usually do the right thing.
348+
#[link_name = "llvm.minnum.v4f32"] fn minnum_v4f32(a: vector_float, b: vector_float) -> vector_float;
349+
#[link_name = "llvm.minnum.v2f64"] fn minnum_v2f64(a: vector_double, b: vector_double) -> vector_double;
350+
#[link_name = "llvm.maxnum.v4f32"] fn maxnum_v4f32(a: vector_float, b: vector_float) -> vector_float;
351+
#[link_name = "llvm.maxnum.v2f64"] fn maxnum_v2f64(a: vector_double, b: vector_double) -> vector_double;
338352
}
339353

340354
#[repr(simd)]
@@ -780,8 +794,8 @@ mod sealed {
780794
impl_max!(vec_vmxslg, vector_unsigned_long_long, vmxlg);
781795
}
782796

783-
test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [simd_fmax, "vector-enhancements-1" vfmaxsb ] }
784-
test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [simd_fmax, "vector-enhancements-1" vfmaxdb] }
797+
test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [maxnum_v4f32, "vector-enhancements-1" vfmaxsb] }
798+
test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [maxnum_v2f64, "vector-enhancements-1" vfmaxdb] }
785799

786800
impl_vec_trait!([VectorMax vec_max] vec_vfmaxsb (vector_float, vector_float) -> vector_float);
787801
impl_vec_trait!([VectorMax vec_max] vec_vfmaxdb (vector_double, vector_double) -> vector_double);
@@ -827,8 +841,8 @@ mod sealed {
827841
impl_min!(vec_vmnslg, vector_unsigned_long_long, vmnlg);
828842
}
829843

830-
test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [simd_fmin, "vector-enhancements-1" vfminsb] }
831-
test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [simd_fmin, "vector-enhancements-1" vfmindb] }
844+
test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [minnum_v4f32, "vector-enhancements-1" vfminsb] }
845+
test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [minnum_v2f64, "vector-enhancements-1" vfmindb] }
832846

833847
impl_vec_trait!([VectorMin vec_min] vec_vfminsb (vector_float, vector_float) -> vector_float);
834848
impl_vec_trait!([VectorMin vec_min] vec_vfmindb (vector_double, vector_double) -> vector_double);
@@ -7477,6 +7491,19 @@ mod tests {
74777491
[0, !0, !0, !0]
74787492
}
74797493

7494+
// f32 is the tricky case for max/min as that needs a fallback on z13
7495+
test_vec_2! { test_vec_max, vec_max, f32x4, f32x4 -> f32x4,
7496+
[1.0, f32::NAN, f32::INFINITY, 2.0],
7497+
[-10.0, -10.0, 5.0, f32::NAN],
7498+
[1.0, -10.0, f32::INFINITY, 2.0]
7499+
}
7500+
7501+
test_vec_2! { test_vec_min, vec_min, f32x4, f32x4 -> f32x4,
7502+
[1.0, f32::NAN, f32::INFINITY, 2.0],
7503+
[-10.0, -10.0, 5.0, f32::NAN],
7504+
[-10.0, -10.0, 5.0, 2.0]
7505+
}
7506+
74807507
#[simd_test(enable = "vector")]
74817508
fn test_vec_meadd() {
74827509
let a = vector_unsigned_short([1, 0, 2, 0, 3, 0, 4, 0]);

0 commit comments

Comments
 (0)