Skip to content

Commit f47622a

Browse files
cyb70289serge-sans-paille
authored andcommitted
SVE: Implement fused operations for all types
1 parent 03715d9 commit f47622a

File tree

1 file changed

+28
-13
lines changed

1 file changed

+28
-13
lines changed

include/xsimd/arch/xsimd_sve.hpp

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,34 @@ namespace xsimd
345345
return svabs_x(detail::sve_ptrue<T>(), arg);
346346
}
347347

348+
// fma: x * y + z
349+
template <class A, class T, detail::sve_enable_all_t<T> = 0>
350+
inline batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<sve>) noexcept
351+
{
352+
return svmad_x(detail::sve_ptrue<T>(), x, y, z);
353+
}
354+
355+
// fnma: z - x * y
356+
template <class A, class T, detail::sve_enable_all_t<T> = 0>
357+
inline batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<sve>) noexcept
358+
{
359+
return svmsb_x(detail::sve_ptrue<T>(), x, y, z);
360+
}
361+
362+
// fms: x * y - z
363+
template <class A, class T, detail::sve_enable_all_t<T> = 0>
364+
inline batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<sve>) noexcept
365+
{
366+
return -fnma(x, y, z, sve {});
367+
}
368+
369+
// fnms: - x * y - z
370+
template <class A, class T, detail::sve_enable_all_t<T> = 0>
371+
inline batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<sve>) noexcept
372+
{
373+
return -fma(x, y, z, sve {});
374+
}
375+
348376
/**********************
349377
* Logical operations *
350378
**********************/
@@ -798,19 +826,6 @@ namespace xsimd
798826
return svsqrt_x(detail::sve_ptrue<T>(), arg);
799827
}
800828

801-
// fused operations
802-
template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
803-
inline batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<sve>) noexcept
804-
{
805-
return svmad_x(detail::sve_ptrue<T>(), x, y, z);
806-
}
807-
808-
template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
809-
inline batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<sve>) noexcept
810-
{
811-
return svmad_x(detail::sve_ptrue<T>(), x, y, -z);
812-
}
813-
814829
// reciprocal
815830
template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
816831
inline batch<T, A> reciprocal(const batch<T, A>& arg, requires_arch<sve>) noexcept

0 commit comments

Comments
 (0)