Skip to content

Commit 1e7c3b9

Browse files
authored
Fix Clang SVE build errors (#1303)
1 parent 174247d commit 1e7c3b9

File tree

1 file changed

+141
-49
lines changed

1 file changed

+141
-49
lines changed

include/xsimd/arch/xsimd_sve.hpp

Lines changed: 141 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -78,22 +78,114 @@ namespace xsimd
7878
// enable for all SVE supported types
7979
template <class T>
8080
using sve_enable_all_t = std::enable_if_t<std::is_arithmetic<T>::value, int>;
81+
82+
// Trait describing the SVE types that correspond to a scalar,
83+
// parameterised by (byte size, signedness, floating-point-ness).
84+
//
85+
// `scalar` is the matching fixed-width scalar (int8_t, ..., float,
86+
// double). SVE load/store intrinsics are overloaded on these
87+
// pointer types, so remapping integers through `scalar` avoids
88+
// platform quirks such as darwin arm64's `long` vs `long long`
89+
// distinction and rejects `char` as an element type.
90+
//
91+
// `sizeless` is the matching sizeless SVE type. xsimd stores SVE
92+
// vectors as fixed-size attributed types (arm_sve_vector_bits),
93+
// which clang treats as implicitly convertible to every sizeless
94+
// SVE type — including multi-vector tuples — making the overloaded
95+
// svreinterpret_*/svsel/etc. intrinsics ambiguous. Static-casting
96+
// to `sizeless` first collapses the overload set to the single
97+
// 1-vector candidate.
98+
template <size_t N, bool Signed, bool FP>
99+
struct sve_type;
100+
template <>
101+
struct sve_type<1, true, false>
102+
{
103+
using scalar = int8_t;
104+
using sizeless = svint8_t;
105+
};
106+
template <>
107+
struct sve_type<1, false, false>
108+
{
109+
using scalar = uint8_t;
110+
using sizeless = svuint8_t;
111+
};
112+
template <>
113+
struct sve_type<2, true, false>
114+
{
115+
using scalar = int16_t;
116+
using sizeless = svint16_t;
117+
};
118+
template <>
119+
struct sve_type<2, false, false>
120+
{
121+
using scalar = uint16_t;
122+
using sizeless = svuint16_t;
123+
};
124+
template <>
125+
struct sve_type<4, true, false>
126+
{
127+
using scalar = int32_t;
128+
using sizeless = svint32_t;
129+
};
130+
template <>
131+
struct sve_type<4, false, false>
132+
{
133+
using scalar = uint32_t;
134+
using sizeless = svuint32_t;
135+
};
136+
template <>
137+
struct sve_type<8, true, false>
138+
{
139+
using scalar = int64_t;
140+
using sizeless = svint64_t;
141+
};
142+
template <>
143+
struct sve_type<8, false, false>
144+
{
145+
using scalar = uint64_t;
146+
using sizeless = svuint64_t;
147+
};
148+
template <>
149+
struct sve_type<4, true, true>
150+
{
151+
using scalar = float;
152+
using sizeless = svfloat32_t;
153+
};
154+
template <>
155+
struct sve_type<8, true, true>
156+
{
157+
using scalar = double;
158+
using sizeless = svfloat64_t;
159+
};
160+
161+
template <class T>
162+
using sve_type_for = sve_type<sizeof(T), std::is_signed<T>::value, std::is_floating_point<T>::value>;
163+
164+
template <class T>
165+
using sve_sizeless_t = typename sve_type_for<T>::sizeless;
166+
167+
// Remap integer Ts to their matching fixed-width counterpart (via
168+
// sve_type::scalar) so svld1/svst1 see the pointer type their
169+
// overload set expects; pass non-integer Ts through unchanged.
170+
template <class T, bool IsInt = std::is_integral<std::decay_t<T>>::value>
171+
struct sve_fix_integer_impl
172+
{
173+
using type = T;
174+
};
175+
template <class T>
176+
struct sve_fix_integer_impl<T, true>
177+
{
178+
using type = typename sve_type_for<std::decay_t<T>>::scalar;
179+
};
180+
181+
template <class T>
182+
using sve_fix_char_t = typename sve_fix_integer_impl<T>::type;
81183
} // namespace detail
82184

83185
/*********
84186
* Load *
85187
*********/
86188

87-
namespace detail
88-
{
89-
// "char" is not allowed in SVE load/store operations
90-
using sve_fix_char_t_impl = std::conditional_t<std::is_signed<char>::value, int8_t, uint8_t>;
91-
92-
template <class T>
93-
using sve_fix_char_t = std::conditional_t<std::is_same<char, std::decay_t<T>>::value,
94-
sve_fix_char_t_impl, T>;
95-
}
96-
97189
template <class A, class T, detail::sve_enable_all_t<T> = 0>
98190
XSIMD_INLINE batch<T, A> load_aligned(T const* src, convert<T>, requires_arch<sve>) noexcept
99191
{
@@ -108,7 +200,7 @@ namespace xsimd
108200

109201
// load_masked
110202
template <class A, class T, bool... Values, class Mode, detail::sve_enable_all_t<T> = 0>
111-
XSIMD_INLINE batch<T, A> load_masked(T const* mem, batch_bool_constant<float, A, Values...> mask, Mode, requires_arch<sve>) noexcept
203+
XSIMD_INLINE batch<T, A> load_masked(T const* mem, batch_bool_constant<float, A, Values...>, Mode, requires_arch<sve>) noexcept
112204
{
113205
return svld1(detail::sve_pmask<Values...>(), reinterpret_cast<detail::sve_fix_char_t<T> const*>(mem));
114206
}
@@ -323,25 +415,25 @@ namespace xsimd
323415
template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
324416
XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<sve>) noexcept
325417
{
326-
return svreinterpret_u8(svneg_x(detail::sve_ptrue<T>(), svreinterpret_s8(arg)));
418+
return svreinterpret_u8(svneg_x(detail::sve_ptrue<T>(), svreinterpret_s8(static_cast<detail::sve_sizeless_t<T>>(arg))));
327419
}
328420

329421
template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
330422
XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<sve>) noexcept
331423
{
332-
return svreinterpret_u16(svneg_x(detail::sve_ptrue<T>(), svreinterpret_s16(arg)));
424+
return svreinterpret_u16(svneg_x(detail::sve_ptrue<T>(), svreinterpret_s16(static_cast<detail::sve_sizeless_t<T>>(arg))));
333425
}
334426

335427
template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
336428
XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<sve>) noexcept
337429
{
338-
return svreinterpret_u32(svneg_x(detail::sve_ptrue<T>(), svreinterpret_s32(arg)));
430+
return svreinterpret_u32(svneg_x(detail::sve_ptrue<T>(), svreinterpret_s32(static_cast<detail::sve_sizeless_t<T>>(arg))));
339431
}
340432

341433
template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
342434
XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<sve>) noexcept
343435
{
344-
return svreinterpret_u64(svneg_x(detail::sve_ptrue<T>(), svreinterpret_s64(arg)));
436+
return svreinterpret_u64(svneg_x(detail::sve_ptrue<T>(), svreinterpret_s64(static_cast<detail::sve_sizeless_t<T>>(arg))));
345437
}
346438

347439
template <class A, class T, detail::sve_enable_signed_int_or_floating_point_t<T> = 0>
@@ -405,17 +497,17 @@ namespace xsimd
405497
template <class A>
406498
XSIMD_INLINE batch<float, A> bitwise_and(batch<float, A> const& lhs, batch<float, A> const& rhs, requires_arch<sve>) noexcept
407499
{
408-
const auto lhs_bits = svreinterpret_u32(lhs);
409-
const auto rhs_bits = svreinterpret_u32(rhs);
500+
const auto lhs_bits = svreinterpret_u32(static_cast<detail::sve_sizeless_t<float>>(lhs));
501+
const auto rhs_bits = svreinterpret_u32(static_cast<detail::sve_sizeless_t<float>>(rhs));
410502
const auto result_bits = svand_x(detail::sve_ptrue<float>(), lhs_bits, rhs_bits);
411503
return svreinterpret_f32(result_bits);
412504
}
413505

414506
template <class A>
415507
XSIMD_INLINE batch<double, A> bitwise_and(batch<double, A> const& lhs, batch<double, A> const& rhs, requires_arch<sve>) noexcept
416508
{
417-
const auto lhs_bits = svreinterpret_u64(lhs);
418-
const auto rhs_bits = svreinterpret_u64(rhs);
509+
const auto lhs_bits = svreinterpret_u64(static_cast<detail::sve_sizeless_t<double>>(lhs));
510+
const auto rhs_bits = svreinterpret_u64(static_cast<detail::sve_sizeless_t<double>>(rhs));
419511
const auto result_bits = svand_x(detail::sve_ptrue<double>(), lhs_bits, rhs_bits);
420512
return svreinterpret_f64(result_bits);
421513
}
@@ -436,17 +528,17 @@ namespace xsimd
436528
template <class A>
437529
XSIMD_INLINE batch<float, A> bitwise_andnot(batch<float, A> const& lhs, batch<float, A> const& rhs, requires_arch<sve>) noexcept
438530
{
439-
const auto lhs_bits = svreinterpret_u32(lhs);
440-
const auto rhs_bits = svreinterpret_u32(rhs);
531+
const auto lhs_bits = svreinterpret_u32(static_cast<detail::sve_sizeless_t<float>>(lhs));
532+
const auto rhs_bits = svreinterpret_u32(static_cast<detail::sve_sizeless_t<float>>(rhs));
441533
const auto result_bits = svbic_x(detail::sve_ptrue<float>(), lhs_bits, rhs_bits);
442534
return svreinterpret_f32(result_bits);
443535
}
444536

445537
template <class A>
446538
XSIMD_INLINE batch<double, A> bitwise_andnot(batch<double, A> const& lhs, batch<double, A> const& rhs, requires_arch<sve>) noexcept
447539
{
448-
const auto lhs_bits = svreinterpret_u64(lhs);
449-
const auto rhs_bits = svreinterpret_u64(rhs);
540+
const auto lhs_bits = svreinterpret_u64(static_cast<detail::sve_sizeless_t<double>>(lhs));
541+
const auto rhs_bits = svreinterpret_u64(static_cast<detail::sve_sizeless_t<double>>(rhs));
450542
const auto result_bits = svbic_x(detail::sve_ptrue<double>(), lhs_bits, rhs_bits);
451543
return svreinterpret_f64(result_bits);
452544
}
@@ -467,17 +559,17 @@ namespace xsimd
467559
template <class A>
468560
XSIMD_INLINE batch<float, A> bitwise_or(batch<float, A> const& lhs, batch<float, A> const& rhs, requires_arch<sve>) noexcept
469561
{
470-
const auto lhs_bits = svreinterpret_u32(lhs);
471-
const auto rhs_bits = svreinterpret_u32(rhs);
562+
const auto lhs_bits = svreinterpret_u32(static_cast<detail::sve_sizeless_t<float>>(lhs));
563+
const auto rhs_bits = svreinterpret_u32(static_cast<detail::sve_sizeless_t<float>>(rhs));
472564
const auto result_bits = svorr_x(detail::sve_ptrue<float>(), lhs_bits, rhs_bits);
473565
return svreinterpret_f32(result_bits);
474566
}
475567

476568
template <class A>
477569
XSIMD_INLINE batch<double, A> bitwise_or(batch<double, A> const& lhs, batch<double, A> const& rhs, requires_arch<sve>) noexcept
478570
{
479-
const auto lhs_bits = svreinterpret_u64(lhs);
480-
const auto rhs_bits = svreinterpret_u64(rhs);
571+
const auto lhs_bits = svreinterpret_u64(static_cast<detail::sve_sizeless_t<double>>(lhs));
572+
const auto rhs_bits = svreinterpret_u64(static_cast<detail::sve_sizeless_t<double>>(rhs));
481573
const auto result_bits = svorr_x(detail::sve_ptrue<double>(), lhs_bits, rhs_bits);
482574
return svreinterpret_f64(result_bits);
483575
}
@@ -498,17 +590,17 @@ namespace xsimd
498590
template <class A>
499591
XSIMD_INLINE batch<float, A> bitwise_xor(batch<float, A> const& lhs, batch<float, A> const& rhs, requires_arch<sve>) noexcept
500592
{
501-
const auto lhs_bits = svreinterpret_u32(lhs);
502-
const auto rhs_bits = svreinterpret_u32(rhs);
593+
const auto lhs_bits = svreinterpret_u32(static_cast<detail::sve_sizeless_t<float>>(lhs));
594+
const auto rhs_bits = svreinterpret_u32(static_cast<detail::sve_sizeless_t<float>>(rhs));
503595
const auto result_bits = sveor_x(detail::sve_ptrue<float>(), lhs_bits, rhs_bits);
504596
return svreinterpret_f32(result_bits);
505597
}
506598

507599
template <class A>
508600
XSIMD_INLINE batch<double, A> bitwise_xor(batch<double, A> const& lhs, batch<double, A> const& rhs, requires_arch<sve>) noexcept
509601
{
510-
const auto lhs_bits = svreinterpret_u64(lhs);
511-
const auto rhs_bits = svreinterpret_u64(rhs);
602+
const auto lhs_bits = svreinterpret_u64(static_cast<detail::sve_sizeless_t<double>>(lhs));
603+
const auto rhs_bits = svreinterpret_u64(static_cast<detail::sve_sizeless_t<double>>(rhs));
512604
const auto result_bits = sveor_x(detail::sve_ptrue<double>(), lhs_bits, rhs_bits);
513605
return svreinterpret_f64(result_bits);
514606
}
@@ -529,15 +621,15 @@ namespace xsimd
529621
template <class A>
530622
XSIMD_INLINE batch<float, A> bitwise_not(batch<float, A> const& arg, requires_arch<sve>) noexcept
531623
{
532-
const auto arg_bits = svreinterpret_u32(arg);
624+
const auto arg_bits = svreinterpret_u32(static_cast<detail::sve_sizeless_t<float>>(arg));
533625
const auto result_bits = svnot_x(detail::sve_ptrue<float>(), arg_bits);
534626
return svreinterpret_f32(result_bits);
535627
}
536628

537629
template <class A>
538630
XSIMD_INLINE batch<double, A> bitwise_not(batch<double, A> const& arg, requires_arch<sve>) noexcept
539631
{
540-
const auto arg_bits = svreinterpret_u64(arg);
632+
const auto arg_bits = svreinterpret_u64(static_cast<detail::sve_sizeless_t<double>>(arg));
541633
const auto result_bits = svnot_x(detail::sve_ptrue<double>(), arg_bits);
542634
return svreinterpret_f64(result_bits);
543635
}
@@ -557,25 +649,25 @@ namespace xsimd
557649
template <class A, class T, class U>
558650
XSIMD_INLINE batch<U, A> sve_to_unsigned_batch_impl(batch<T, A> const& arg, index<1>) noexcept
559651
{
560-
return svreinterpret_u8(arg);
652+
return svreinterpret_u8(static_cast<sve_sizeless_t<T>>(arg));
561653
}
562654

563655
template <class A, class T, class U>
564656
XSIMD_INLINE batch<U, A> sve_to_unsigned_batch_impl(batch<T, A> const& arg, index<2>) noexcept
565657
{
566-
return svreinterpret_u16(arg);
658+
return svreinterpret_u16(static_cast<sve_sizeless_t<T>>(arg));
567659
}
568660

569661
template <class A, class T, class U>
570662
XSIMD_INLINE batch<U, A> sve_to_unsigned_batch_impl(batch<T, A> const& arg, index<4>) noexcept
571663
{
572-
return svreinterpret_u32(arg);
664+
return svreinterpret_u32(static_cast<sve_sizeless_t<T>>(arg));
573665
}
574666

575667
template <class A, class T, class U>
576668
XSIMD_INLINE batch<U, A> sve_to_unsigned_batch_impl(batch<T, A> const& arg, index<8>) noexcept
577669
{
578-
return svreinterpret_u64(arg);
670+
return svreinterpret_u64(static_cast<sve_sizeless_t<T>>(arg));
579671
}
580672

581673
template <class A, class T, class U = as_unsigned_integer_t<T>>
@@ -825,7 +917,7 @@ namespace xsimd
825917
template <class A, class T, detail::sve_enable_all_t<T> = 0>
826918
XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& a, batch<T, A> const& b, requires_arch<sve>) noexcept
827919
{
828-
return svsel(cond, a, b);
920+
return svsel(cond, static_cast<detail::sve_sizeless_t<T>>(a), static_cast<detail::sve_sizeless_t<T>>(b));
829921
}
830922

831923
template <class A, class T, bool... b>
@@ -964,7 +1056,7 @@ namespace xsimd
9641056
// create a predicate with only the I-th lane activated
9651057
const auto iota = detail::sve_iota<T>();
9661058
const auto index_predicate = svcmpeq(detail::sve_ptrue<T>(), iota, static_cast<as_unsigned_integer_t<T>>(I));
967-
return svsel(index_predicate, broadcast<A, T>(val, sve {}), arg);
1059+
return svsel(index_predicate, static_cast<detail::sve_sizeless_t<T>>(broadcast<A, T>(val, sve {})), static_cast<detail::sve_sizeless_t<T>>(arg));
9681060
}
9691061

9701062
// first
@@ -992,61 +1084,61 @@ namespace xsimd
9921084
template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_unsigned_t<R, 1> = 0>
9931085
XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
9941086
{
995-
return svreinterpret_u8(arg);
1087+
return svreinterpret_u8(static_cast<detail::sve_sizeless_t<T>>(arg));
9961088
}
9971089

9981090
template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_signed_t<R, 1> = 0>
9991091
XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
10001092
{
1001-
return svreinterpret_s8(arg);
1093+
return svreinterpret_s8(static_cast<detail::sve_sizeless_t<T>>(arg));
10021094
}
10031095

10041096
template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_unsigned_t<R, 2> = 0>
10051097
XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
10061098
{
1007-
return svreinterpret_u16(arg);
1099+
return svreinterpret_u16(static_cast<detail::sve_sizeless_t<T>>(arg));
10081100
}
10091101

10101102
template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_signed_t<R, 2> = 0>
10111103
XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
10121104
{
1013-
return svreinterpret_s16(arg);
1105+
return svreinterpret_s16(static_cast<detail::sve_sizeless_t<T>>(arg));
10141106
}
10151107

10161108
template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_unsigned_t<R, 4> = 0>
10171109
XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
10181110
{
1019-
return svreinterpret_u32(arg);
1111+
return svreinterpret_u32(static_cast<detail::sve_sizeless_t<T>>(arg));
10201112
}
10211113

10221114
template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_signed_t<R, 4> = 0>
10231115
XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
10241116
{
1025-
return svreinterpret_s32(arg);
1117+
return svreinterpret_s32(static_cast<detail::sve_sizeless_t<T>>(arg));
10261118
}
10271119

10281120
template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_unsigned_t<R, 8> = 0>
10291121
XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
10301122
{
1031-
return svreinterpret_u64(arg);
1123+
return svreinterpret_u64(static_cast<detail::sve_sizeless_t<T>>(arg));
10321124
}
10331125

10341126
template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_signed_t<R, 8> = 0>
10351127
XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
10361128
{
1037-
return svreinterpret_s64(arg);
1129+
return svreinterpret_s64(static_cast<detail::sve_sizeless_t<T>>(arg));
10381130
}
10391131

10401132
template <class A, class T, detail::sve_enable_all_t<T> = 0>
10411133
XSIMD_INLINE batch<float, A> bitwise_cast(batch<T, A> const& arg, batch<float, A> const&, requires_arch<sve>) noexcept
10421134
{
1043-
return svreinterpret_f32(arg);
1135+
return svreinterpret_f32(static_cast<detail::sve_sizeless_t<T>>(arg));
10441136
}
10451137

10461138
template <class A, class T, detail::sve_enable_all_t<T> = 0>
10471139
XSIMD_INLINE batch<double, A> bitwise_cast(batch<T, A> const& arg, batch<double, A> const&, requires_arch<sve>) noexcept
10481140
{
1049-
return svreinterpret_f64(arg);
1141+
return svreinterpret_f64(static_cast<detail::sve_sizeless_t<T>>(arg));
10501142
}
10511143

10521144
// batch_bool_cast

0 commit comments

Comments
 (0)