@@ -78,22 +78,114 @@ namespace xsimd
7878 // enable for all SVE supported types
7979 template <class T >
8080 using sve_enable_all_t = std::enable_if_t <std::is_arithmetic<T>::value, int >;
81+
82+ // Trait describing the SVE types that correspond to a scalar,
83+ // parameterised by (byte size, signedness, floating-point-ness).
84+ //
85+ // `scalar` is the matching fixed-width scalar (int8_t, ..., float,
86+ // double). SVE load/store intrinsics are overloaded on these
87+ // pointer types, so remapping integers through `scalar` avoids
88+ // platform quirks such as darwin arm64's `long` vs `long long`
89+ // distinction and rejects `char` as an element type.
90+ //
91+ // `sizeless` is the matching sizeless SVE type. xsimd stores SVE
92+ // vectors as fixed-size attributed types (arm_sve_vector_bits),
93+ // which clang treats as implicitly convertible to every sizeless
94+ // SVE type — including multi-vector tuples — making the overloaded
95+ // svreinterpret_*/svsel/etc. intrinsics ambiguous. Static-casting
96+ // to `sizeless` first collapses the overload set to the single
97+ // 1-vector candidate.
98+ template <size_t N, bool Signed, bool FP>
99+ struct sve_type ;
100+ template <>
101+ struct sve_type <1 , true , false >
102+ {
103+ using scalar = int8_t ;
104+ using sizeless = svint8_t ;
105+ };
106+ template <>
107+ struct sve_type <1 , false , false >
108+ {
109+ using scalar = uint8_t ;
110+ using sizeless = svuint8_t ;
111+ };
112+ template <>
113+ struct sve_type <2 , true , false >
114+ {
115+ using scalar = int16_t ;
116+ using sizeless = svint16_t ;
117+ };
118+ template <>
119+ struct sve_type <2 , false , false >
120+ {
121+ using scalar = uint16_t ;
122+ using sizeless = svuint16_t ;
123+ };
124+ template <>
125+ struct sve_type <4 , true , false >
126+ {
127+ using scalar = int32_t ;
128+ using sizeless = svint32_t ;
129+ };
130+ template <>
131+ struct sve_type <4 , false , false >
132+ {
133+ using scalar = uint32_t ;
134+ using sizeless = svuint32_t ;
135+ };
136+ template <>
137+ struct sve_type <8 , true , false >
138+ {
139+ using scalar = int64_t ;
140+ using sizeless = svint64_t ;
141+ };
142+ template <>
143+ struct sve_type <8 , false , false >
144+ {
145+ using scalar = uint64_t ;
146+ using sizeless = svuint64_t ;
147+ };
148+ template <>
149+ struct sve_type <4 , true , true >
150+ {
151+ using scalar = float ;
152+ using sizeless = svfloat32_t ;
153+ };
154+ template <>
155+ struct sve_type <8 , true , true >
156+ {
157+ using scalar = double ;
158+ using sizeless = svfloat64_t ;
159+ };
160+
161+ template <class T >
162+ using sve_type_for = sve_type<sizeof (T), std::is_signed<T>::value, std::is_floating_point<T>::value>;
163+
164+ template <class T >
165+ using sve_sizeless_t = typename sve_type_for<T>::sizeless;
166+
167+ // Remap integer Ts to their matching fixed-width counterpart (via
168+ // sve_type::scalar) so svld1/svst1 see the pointer type their
169+ // overload set expects; pass non-integer Ts through unchanged.
170+ template <class T , bool IsInt = std::is_integral<std::decay_t <T>>::value>
171+ struct sve_fix_integer_impl
172+ {
173+ using type = T;
174+ };
175+ template <class T >
176+ struct sve_fix_integer_impl <T, true >
177+ {
178+ using type = typename sve_type_for<std::decay_t <T>>::scalar;
179+ };
180+
181+ template <class T >
182+ using sve_fix_char_t = typename sve_fix_integer_impl<T>::type;
81183 } // namespace detail
82184
83185 /* ********
84186 * Load *
85187 *********/
86188
87- namespace detail
88- {
89- // "char" is not allowed in SVE load/store operations
90- using sve_fix_char_t_impl = std::conditional_t <std::is_signed<char >::value, int8_t , uint8_t >;
91-
92- template <class T >
93- using sve_fix_char_t = std::conditional_t <std::is_same<char , std::decay_t <T>>::value,
94- sve_fix_char_t_impl, T>;
95- }
96-
97189 template <class A , class T , detail::sve_enable_all_t <T> = 0 >
98190 XSIMD_INLINE batch<T, A> load_aligned (T const * src, convert<T>, requires_arch<sve>) noexcept
99191 {
@@ -108,7 +200,7 @@ namespace xsimd
108200
109201 // load_masked
110202 template <class A , class T , bool ... Values, class Mode , detail::sve_enable_all_t <T> = 0 >
111- XSIMD_INLINE batch<T, A> load_masked (T const * mem, batch_bool_constant<float , A, Values...> mask , Mode, requires_arch<sve>) noexcept
203+ XSIMD_INLINE batch<T, A> load_masked (T const * mem, batch_bool_constant<float , A, Values...>, Mode, requires_arch<sve>) noexcept
112204 {
113205 return svld1 (detail::sve_pmask<Values...>(), reinterpret_cast <detail::sve_fix_char_t <T> const *>(mem));
114206 }
@@ -323,25 +415,25 @@ namespace xsimd
323415 template <class A , class T , detail::enable_sized_unsigned_t <T, 1 > = 0 >
324416 XSIMD_INLINE batch<T, A> neg (batch<T, A> const & arg, requires_arch<sve>) noexcept
325417 {
326- return svreinterpret_u8 (svneg_x (detail::sve_ptrue<T>(), svreinterpret_s8 (arg)));
418+ return svreinterpret_u8 (svneg_x (detail::sve_ptrue<T>(), svreinterpret_s8 (static_cast <detail:: sve_sizeless_t <T>>( arg) )));
327419 }
328420
329421 template <class A , class T , detail::enable_sized_unsigned_t <T, 2 > = 0 >
330422 XSIMD_INLINE batch<T, A> neg (batch<T, A> const & arg, requires_arch<sve>) noexcept
331423 {
332- return svreinterpret_u16 (svneg_x (detail::sve_ptrue<T>(), svreinterpret_s16 (arg)));
424+ return svreinterpret_u16 (svneg_x (detail::sve_ptrue<T>(), svreinterpret_s16 (static_cast <detail:: sve_sizeless_t <T>>( arg) )));
333425 }
334426
335427 template <class A , class T , detail::enable_sized_unsigned_t <T, 4 > = 0 >
336428 XSIMD_INLINE batch<T, A> neg (batch<T, A> const & arg, requires_arch<sve>) noexcept
337429 {
338- return svreinterpret_u32 (svneg_x (detail::sve_ptrue<T>(), svreinterpret_s32 (arg)));
430+ return svreinterpret_u32 (svneg_x (detail::sve_ptrue<T>(), svreinterpret_s32 (static_cast <detail:: sve_sizeless_t <T>>( arg) )));
339431 }
340432
341433 template <class A , class T , detail::enable_sized_unsigned_t <T, 8 > = 0 >
342434 XSIMD_INLINE batch<T, A> neg (batch<T, A> const & arg, requires_arch<sve>) noexcept
343435 {
344- return svreinterpret_u64 (svneg_x (detail::sve_ptrue<T>(), svreinterpret_s64 (arg)));
436+ return svreinterpret_u64 (svneg_x (detail::sve_ptrue<T>(), svreinterpret_s64 (static_cast <detail:: sve_sizeless_t <T>>( arg) )));
345437 }
346438
347439 template <class A , class T , detail::sve_enable_signed_int_or_floating_point_t <T> = 0 >
@@ -405,17 +497,17 @@ namespace xsimd
405497 template <class A >
406498 XSIMD_INLINE batch<float , A> bitwise_and (batch<float , A> const & lhs, batch<float , A> const & rhs, requires_arch<sve>) noexcept
407499 {
408- const auto lhs_bits = svreinterpret_u32 (lhs);
409- const auto rhs_bits = svreinterpret_u32 (rhs);
500+ const auto lhs_bits = svreinterpret_u32 (static_cast <detail:: sve_sizeless_t < float >>( lhs) );
501+ const auto rhs_bits = svreinterpret_u32 (static_cast <detail:: sve_sizeless_t < float >>( rhs) );
410502 const auto result_bits = svand_x (detail::sve_ptrue<float >(), lhs_bits, rhs_bits);
411503 return svreinterpret_f32 (result_bits);
412504 }
413505
414506 template <class A >
415507 XSIMD_INLINE batch<double , A> bitwise_and (batch<double , A> const & lhs, batch<double , A> const & rhs, requires_arch<sve>) noexcept
416508 {
417- const auto lhs_bits = svreinterpret_u64 (lhs);
418- const auto rhs_bits = svreinterpret_u64 (rhs);
509+ const auto lhs_bits = svreinterpret_u64 (static_cast <detail:: sve_sizeless_t < double >>( lhs) );
510+ const auto rhs_bits = svreinterpret_u64 (static_cast <detail:: sve_sizeless_t < double >>( rhs) );
419511 const auto result_bits = svand_x (detail::sve_ptrue<double >(), lhs_bits, rhs_bits);
420512 return svreinterpret_f64 (result_bits);
421513 }
@@ -436,17 +528,17 @@ namespace xsimd
436528 template <class A >
437529 XSIMD_INLINE batch<float , A> bitwise_andnot (batch<float , A> const & lhs, batch<float , A> const & rhs, requires_arch<sve>) noexcept
438530 {
439- const auto lhs_bits = svreinterpret_u32 (lhs);
440- const auto rhs_bits = svreinterpret_u32 (rhs);
531+ const auto lhs_bits = svreinterpret_u32 (static_cast <detail:: sve_sizeless_t < float >>( lhs) );
532+ const auto rhs_bits = svreinterpret_u32 (static_cast <detail:: sve_sizeless_t < float >>( rhs) );
441533 const auto result_bits = svbic_x (detail::sve_ptrue<float >(), lhs_bits, rhs_bits);
442534 return svreinterpret_f32 (result_bits);
443535 }
444536
445537 template <class A >
446538 XSIMD_INLINE batch<double , A> bitwise_andnot (batch<double , A> const & lhs, batch<double , A> const & rhs, requires_arch<sve>) noexcept
447539 {
448- const auto lhs_bits = svreinterpret_u64 (lhs);
449- const auto rhs_bits = svreinterpret_u64 (rhs);
540+ const auto lhs_bits = svreinterpret_u64 (static_cast <detail:: sve_sizeless_t < double >>( lhs) );
541+ const auto rhs_bits = svreinterpret_u64 (static_cast <detail:: sve_sizeless_t < double >>( rhs) );
450542 const auto result_bits = svbic_x (detail::sve_ptrue<double >(), lhs_bits, rhs_bits);
451543 return svreinterpret_f64 (result_bits);
452544 }
@@ -467,17 +559,17 @@ namespace xsimd
467559 template <class A >
468560 XSIMD_INLINE batch<float , A> bitwise_or (batch<float , A> const & lhs, batch<float , A> const & rhs, requires_arch<sve>) noexcept
469561 {
470- const auto lhs_bits = svreinterpret_u32 (lhs);
471- const auto rhs_bits = svreinterpret_u32 (rhs);
562+ const auto lhs_bits = svreinterpret_u32 (static_cast <detail:: sve_sizeless_t < float >>( lhs) );
563+ const auto rhs_bits = svreinterpret_u32 (static_cast <detail:: sve_sizeless_t < float >>( rhs) );
472564 const auto result_bits = svorr_x (detail::sve_ptrue<float >(), lhs_bits, rhs_bits);
473565 return svreinterpret_f32 (result_bits);
474566 }
475567
476568 template <class A >
477569 XSIMD_INLINE batch<double , A> bitwise_or (batch<double , A> const & lhs, batch<double , A> const & rhs, requires_arch<sve>) noexcept
478570 {
479- const auto lhs_bits = svreinterpret_u64 (lhs);
480- const auto rhs_bits = svreinterpret_u64 (rhs);
571+ const auto lhs_bits = svreinterpret_u64 (static_cast <detail:: sve_sizeless_t < double >>( lhs) );
572+ const auto rhs_bits = svreinterpret_u64 (static_cast <detail:: sve_sizeless_t < double >>( rhs) );
481573 const auto result_bits = svorr_x (detail::sve_ptrue<double >(), lhs_bits, rhs_bits);
482574 return svreinterpret_f64 (result_bits);
483575 }
@@ -498,17 +590,17 @@ namespace xsimd
498590 template <class A >
499591 XSIMD_INLINE batch<float , A> bitwise_xor (batch<float , A> const & lhs, batch<float , A> const & rhs, requires_arch<sve>) noexcept
500592 {
501- const auto lhs_bits = svreinterpret_u32 (lhs);
502- const auto rhs_bits = svreinterpret_u32 (rhs);
593+ const auto lhs_bits = svreinterpret_u32 (static_cast <detail:: sve_sizeless_t < float >>( lhs) );
594+ const auto rhs_bits = svreinterpret_u32 (static_cast <detail:: sve_sizeless_t < float >>( rhs) );
503595 const auto result_bits = sveor_x (detail::sve_ptrue<float >(), lhs_bits, rhs_bits);
504596 return svreinterpret_f32 (result_bits);
505597 }
506598
507599 template <class A >
508600 XSIMD_INLINE batch<double , A> bitwise_xor (batch<double , A> const & lhs, batch<double , A> const & rhs, requires_arch<sve>) noexcept
509601 {
510- const auto lhs_bits = svreinterpret_u64 (lhs);
511- const auto rhs_bits = svreinterpret_u64 (rhs);
602+ const auto lhs_bits = svreinterpret_u64 (static_cast <detail:: sve_sizeless_t < double >>( lhs) );
603+ const auto rhs_bits = svreinterpret_u64 (static_cast <detail:: sve_sizeless_t < double >>( rhs) );
512604 const auto result_bits = sveor_x (detail::sve_ptrue<double >(), lhs_bits, rhs_bits);
513605 return svreinterpret_f64 (result_bits);
514606 }
@@ -529,15 +621,15 @@ namespace xsimd
529621 template <class A >
530622 XSIMD_INLINE batch<float , A> bitwise_not (batch<float , A> const & arg, requires_arch<sve>) noexcept
531623 {
532- const auto arg_bits = svreinterpret_u32 (arg);
624+ const auto arg_bits = svreinterpret_u32 (static_cast <detail:: sve_sizeless_t < float >>( arg) );
533625 const auto result_bits = svnot_x (detail::sve_ptrue<float >(), arg_bits);
534626 return svreinterpret_f32 (result_bits);
535627 }
536628
537629 template <class A >
538630 XSIMD_INLINE batch<double , A> bitwise_not (batch<double , A> const & arg, requires_arch<sve>) noexcept
539631 {
540- const auto arg_bits = svreinterpret_u64 (arg);
632+ const auto arg_bits = svreinterpret_u64 (static_cast <detail:: sve_sizeless_t < double >>( arg) );
541633 const auto result_bits = svnot_x (detail::sve_ptrue<double >(), arg_bits);
542634 return svreinterpret_f64 (result_bits);
543635 }
@@ -557,25 +649,25 @@ namespace xsimd
557649 template <class A , class T , class U >
558650 XSIMD_INLINE batch<U, A> sve_to_unsigned_batch_impl (batch<T, A> const & arg, index<1 >) noexcept
559651 {
560- return svreinterpret_u8 (arg);
652+ return svreinterpret_u8 (static_cast < sve_sizeless_t <T>>( arg) );
561653 }
562654
563655 template <class A , class T , class U >
564656 XSIMD_INLINE batch<U, A> sve_to_unsigned_batch_impl (batch<T, A> const & arg, index<2 >) noexcept
565657 {
566- return svreinterpret_u16 (arg);
658+ return svreinterpret_u16 (static_cast < sve_sizeless_t <T>>( arg) );
567659 }
568660
569661 template <class A , class T , class U >
570662 XSIMD_INLINE batch<U, A> sve_to_unsigned_batch_impl (batch<T, A> const & arg, index<4 >) noexcept
571663 {
572- return svreinterpret_u32 (arg);
664+ return svreinterpret_u32 (static_cast < sve_sizeless_t <T>>( arg) );
573665 }
574666
575667 template <class A , class T , class U >
576668 XSIMD_INLINE batch<U, A> sve_to_unsigned_batch_impl (batch<T, A> const & arg, index<8 >) noexcept
577669 {
578- return svreinterpret_u64 (arg);
670+ return svreinterpret_u64 (static_cast < sve_sizeless_t <T>>( arg) );
579671 }
580672
581673 template <class A , class T , class U = as_unsigned_integer_t <T>>
@@ -825,7 +917,7 @@ namespace xsimd
825917 template <class A , class T , detail::sve_enable_all_t <T> = 0 >
826918 XSIMD_INLINE batch<T, A> select (batch_bool<T, A> const & cond, batch<T, A> const & a, batch<T, A> const & b, requires_arch<sve>) noexcept
827919 {
828- return svsel (cond, a, b );
920+ return svsel (cond, static_cast <detail:: sve_sizeless_t <T>>(a), static_cast <detail:: sve_sizeless_t <T>>(b) );
829921 }
830922
831923 template <class A , class T , bool ... b>
@@ -964,7 +1056,7 @@ namespace xsimd
9641056 // create a predicate with only the I-th lane activated
9651057 const auto iota = detail::sve_iota<T>();
9661058 const auto index_predicate = svcmpeq (detail::sve_ptrue<T>(), iota, static_cast <as_unsigned_integer_t <T>>(I));
967- return svsel (index_predicate, broadcast<A, T>(val, sve {}), arg);
1059+ return svsel (index_predicate, static_cast <detail:: sve_sizeless_t <T>>( broadcast<A, T>(val, sve {})), static_cast <detail:: sve_sizeless_t <T>>( arg) );
9681060 }
9691061
9701062 // first
@@ -992,61 +1084,61 @@ namespace xsimd
9921084 template <class A , class T , class R , detail::sve_enable_all_t <T> = 0 , detail::enable_sized_unsigned_t <R, 1 > = 0 >
9931085 XSIMD_INLINE batch<R, A> bitwise_cast (batch<T, A> const & arg, batch<R, A> const &, requires_arch<sve>) noexcept
9941086 {
995- return svreinterpret_u8 (arg);
1087+ return svreinterpret_u8 (static_cast <detail:: sve_sizeless_t <T>>( arg) );
9961088 }
9971089
9981090 template <class A , class T , class R , detail::sve_enable_all_t <T> = 0 , detail::enable_sized_signed_t <R, 1 > = 0 >
9991091 XSIMD_INLINE batch<R, A> bitwise_cast (batch<T, A> const & arg, batch<R, A> const &, requires_arch<sve>) noexcept
10001092 {
1001- return svreinterpret_s8 (arg);
1093+ return svreinterpret_s8 (static_cast <detail:: sve_sizeless_t <T>>( arg) );
10021094 }
10031095
10041096 template <class A , class T , class R , detail::sve_enable_all_t <T> = 0 , detail::enable_sized_unsigned_t <R, 2 > = 0 >
10051097 XSIMD_INLINE batch<R, A> bitwise_cast (batch<T, A> const & arg, batch<R, A> const &, requires_arch<sve>) noexcept
10061098 {
1007- return svreinterpret_u16 (arg);
1099+ return svreinterpret_u16 (static_cast <detail:: sve_sizeless_t <T>>( arg) );
10081100 }
10091101
10101102 template <class A , class T , class R , detail::sve_enable_all_t <T> = 0 , detail::enable_sized_signed_t <R, 2 > = 0 >
10111103 XSIMD_INLINE batch<R, A> bitwise_cast (batch<T, A> const & arg, batch<R, A> const &, requires_arch<sve>) noexcept
10121104 {
1013- return svreinterpret_s16 (arg);
1105+ return svreinterpret_s16 (static_cast <detail:: sve_sizeless_t <T>>( arg) );
10141106 }
10151107
10161108 template <class A , class T , class R , detail::sve_enable_all_t <T> = 0 , detail::enable_sized_unsigned_t <R, 4 > = 0 >
10171109 XSIMD_INLINE batch<R, A> bitwise_cast (batch<T, A> const & arg, batch<R, A> const &, requires_arch<sve>) noexcept
10181110 {
1019- return svreinterpret_u32 (arg);
1111+ return svreinterpret_u32 (static_cast <detail:: sve_sizeless_t <T>>( arg) );
10201112 }
10211113
10221114 template <class A , class T , class R , detail::sve_enable_all_t <T> = 0 , detail::enable_sized_signed_t <R, 4 > = 0 >
10231115 XSIMD_INLINE batch<R, A> bitwise_cast (batch<T, A> const & arg, batch<R, A> const &, requires_arch<sve>) noexcept
10241116 {
1025- return svreinterpret_s32 (arg);
1117+ return svreinterpret_s32 (static_cast <detail:: sve_sizeless_t <T>>( arg) );
10261118 }
10271119
10281120 template <class A , class T , class R , detail::sve_enable_all_t <T> = 0 , detail::enable_sized_unsigned_t <R, 8 > = 0 >
10291121 XSIMD_INLINE batch<R, A> bitwise_cast (batch<T, A> const & arg, batch<R, A> const &, requires_arch<sve>) noexcept
10301122 {
1031- return svreinterpret_u64 (arg);
1123+ return svreinterpret_u64 (static_cast <detail:: sve_sizeless_t <T>>( arg) );
10321124 }
10331125
10341126 template <class A , class T , class R , detail::sve_enable_all_t <T> = 0 , detail::enable_sized_signed_t <R, 8 > = 0 >
10351127 XSIMD_INLINE batch<R, A> bitwise_cast (batch<T, A> const & arg, batch<R, A> const &, requires_arch<sve>) noexcept
10361128 {
1037- return svreinterpret_s64 (arg);
1129+ return svreinterpret_s64 (static_cast <detail:: sve_sizeless_t <T>>( arg) );
10381130 }
10391131
10401132 template <class A , class T , detail::sve_enable_all_t <T> = 0 >
10411133 XSIMD_INLINE batch<float , A> bitwise_cast (batch<T, A> const & arg, batch<float , A> const &, requires_arch<sve>) noexcept
10421134 {
1043- return svreinterpret_f32 (arg);
1135+ return svreinterpret_f32 (static_cast <detail:: sve_sizeless_t <T>>( arg) );
10441136 }
10451137
10461138 template <class A , class T , detail::sve_enable_all_t <T> = 0 >
10471139 XSIMD_INLINE batch<double , A> bitwise_cast (batch<T, A> const & arg, batch<double , A> const &, requires_arch<sve>) noexcept
10481140 {
1049- return svreinterpret_f64 (arg);
1141+ return svreinterpret_f64 (static_cast <detail:: sve_sizeless_t <T>>( arg) );
10501142 }
10511143
10521144 // batch_bool_cast
0 commit comments