11/* **************************************************************************
2- * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3- * Martin Renou *
4- * Copyright (c) QuantStack *
5- * Copyright (c) Serge Guelton *
6- * *
7- * Distributed under the terms of the BSD 3-Clause License. *
8- * *
9- * The full license is in the file LICENSE, distributed with this software. *
10- ****************************************************************************/
2+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3+ * Martin Renou *
4+ * Copyright (c) QuantStack *
5+ * Copyright (c) Serge Guelton *
6+ * *
7+ * Distributed under the terms of the BSD 3-Clause License. *
8+ * *
9+ * The full license is in the file LICENSE, distributed with this software. *
10+ ****************************************************************************/
1111
1212#ifndef XSIMD_BENCHMARK_HPP
1313#define XSIMD_BENCHMARK_HPP
1414
15+ #include " xsimd/xsimd.hpp"
1516#include < chrono>
17+ #include < iostream>
1618#include < string>
1719#include < vector>
18- #include < iostream>
19- #include " xsimd/xsimd.hpp"
2020
2121namespace xsimd
2222{
2323 template <class T >
2424 std::string batch_name ();
2525
26- template <> inline std::string batch_name<batch<float , 4 >>() { return " sse/neon float" ; }
27- template <> inline std::string batch_name<batch<double , 2 >>() { return " sse/neon double" ; }
28- template <> inline std::string batch_name<batch<float , 8 >>() { return " avx float" ; }
29- template <> inline std::string batch_name<batch<double , 4 >>() { return " avx double" ; }
30- template <> inline std::string batch_name<batch<float , 7 >>() { return " fallback float" ; }
31- template <> inline std::string batch_name<batch<double , 3 >>() { return " fallback double" ; }
26+ template <>
27+ inline std::string batch_name<batch<float , 4 >>() { return " sse/neon float" ; }
28+ template <>
29+ inline std::string batch_name<batch<double , 2 >>() { return " sse/neon double" ; }
30+ template <>
31+ inline std::string batch_name<batch<float , 8 >>() { return " avx float" ; }
32+ template <>
33+ inline std::string batch_name<batch<double , 4 >>() { return " avx double" ; }
34+ template <>
35+ inline std::string batch_name<batch<float , 7 >>() { return " fallback float" ; }
36+ template <>
37+ inline std::string batch_name<batch<double , 3 >>() { return " fallback double" ; }
3238
3339 using duration_type = std::chrono::duration<double , std::milli>;
3440
@@ -111,7 +117,7 @@ namespace xsimd
111117 auto start = std::chrono::steady_clock::now ();
112118 for (size_t i = 0 ; i < s; ++i)
113119 {
114- res[i] = f (lhs[i], rhs[i]);
120+ res[i] = f (lhs[i], rhs[i]);
115121 }
116122 auto end = std::chrono::steady_clock::now ();
117123 auto tmp = end - start;
@@ -130,7 +136,7 @@ namespace xsimd
130136 auto start = std::chrono::steady_clock::now ();
131137 for (size_t i = 0 ; i < s; ++i)
132138 {
133- res[i] = f (op0[i], op1[i], op2[i]);
139+ res[i] = f (op0[i], op1[i], op2[i]);
134140 }
135141 auto end = std::chrono::steady_clock::now ();
136142 auto tmp = end - start;
@@ -175,7 +181,7 @@ namespace xsimd
175181 size_t k = j + B::size;
176182 size_t l = k + B::size;
177183 B blhs (&lhs[i], aligned_mode ()), blhs2 (&lhs[j], aligned_mode ()),
178- blhs3 (&lhs[k], aligned_mode ()), blhs4 (&lhs[l], aligned_mode ());
184+ blhs3 (&lhs[k], aligned_mode ()), blhs4 (&lhs[l], aligned_mode ());
179185 B bres = f (blhs);
180186 B bres2 = f (blhs2);
181187 B bres3 = f (blhs3);
@@ -228,9 +234,9 @@ namespace xsimd
228234 size_t k = j + B::size;
229235 size_t l = k + B::size;
230236 B blhs (&lhs[i], aligned_mode ()), brhs (&rhs[i], aligned_mode ()),
231- blhs2 (&lhs[j], aligned_mode ()), brhs2 (&rhs[j], aligned_mode ());
237+ blhs2 (&lhs[j], aligned_mode ()), brhs2 (&rhs[j], aligned_mode ());
232238 B blhs3 (&lhs[k], aligned_mode ()), brhs3 (&rhs[k], aligned_mode ()),
233- blhs4 (&lhs[l], aligned_mode ()), brhs4 (&rhs[l], aligned_mode ());
239+ blhs4 (&lhs[l], aligned_mode ()), brhs4 (&rhs[l], aligned_mode ());
234240 B bres = f (blhs, brhs);
235241 B bres2 = f (blhs2, brhs2);
236242 B bres3 = f (blhs3, brhs3);
@@ -247,7 +253,6 @@ namespace xsimd
247253 return t_res;
248254 }
249255
250-
251256 template <class B , class F , class V >
252257 duration_type benchmark_simd (F f, V& op0, V& op1, V& op2, V& res, std::size_t number)
253258 {
@@ -259,8 +264,8 @@ namespace xsimd
259264 for (std::size_t i = 0 ; i <= (s - B::size); i += B::size)
260265 {
261266 B bop0 (&op0[i], aligned_mode ()),
262- bop1 (&op1[i], aligned_mode ()),
263- bop2 (&op2[i], aligned_mode ());
267+ bop1 (&op1[i], aligned_mode ()),
268+ bop2 (&op2[i], aligned_mode ());
264269 B bres = f (bop0, bop1, bop2);
265270 bres.store_aligned (&res[i]);
266271 }
@@ -558,85 +563,105 @@ namespace xsimd
558563 out << " ============================" << std::endl;
559564 }
560565
561-
562- # define DEFINE_OP_FUNCTOR_2OP ( OP, NAME) \
563- struct NAME ##_fn { \
564- template <class T >\
565- inline T operator ()(const T& lhs, const T& rhs) const { return lhs OP rhs; }\
566- inline std::string name () const { return #NAME; }\
566+ # define DEFINE_OP_FUNCTOR_2OP ( OP, NAME ) \
567+ struct NAME ##_fn \
568+ { \
569+ template <class T > \
570+ inline T operator ()(const T& lhs, const T& rhs) const { return lhs OP rhs; } \
571+ inline std::string name () const { return #NAME; } \
567572 }
568573
569- #define DEFINE_FUNCTOR_1OP (FN )\
570- struct FN ##_fn {\
571- template <class T >\
572- inline T operator ()(const T& x) const { using xsimd::FN; return FN (x); }\
573- inline std::string name () const { return #FN; }\
574+ #define DEFINE_FUNCTOR_1OP (FN ) \
575+ struct FN ##_fn \
576+ { \
577+ template <class T > \
578+ inline T operator ()(const T& x) const \
579+ { \
580+ using xsimd::FN; \
581+ return FN (x); \
582+ } \
583+ inline std::string name () const { return #FN; } \
574584 }
575585
576- #define DEFINE_FUNCTOR_1OP_TEMPLATE (FN, N, ...)\
577- struct FN ##_##N##_fn {\
578- template <class T >\
579- inline T operator ()(const T& x) const { using xsimd::FN; return FN<T, __VA_ARGS__>(x); }\
580- inline std::string name () const { return #FN " " #N ; }\
586+ #define DEFINE_FUNCTOR_1OP_TEMPLATE (FN, N, ...) \
587+ struct FN ##_##N##_fn \
588+ { \
589+ template <class T > \
590+ inline T operator ()(const T& x) const \
591+ { \
592+ using xsimd::FN; \
593+ return FN<T, __VA_ARGS__>(x); \
594+ } \
595+ inline std::string name () const { return #FN " " #N; } \
581596 }
582597
583- #define DEFINE_FUNCTOR_2OP (FN )\
584- struct FN ##_fn{\
585- template <class T >\
586- inline T operator ()(const T&lhs, const T& rhs) const { using xsimd::FN; return FN (lhs, rhs); }\
587- inline std::string name () const { return #FN; }\
598+ #define DEFINE_FUNCTOR_2OP (FN ) \
599+ struct FN ##_fn \
600+ { \
601+ template <class T > \
602+ inline T operator ()(const T& lhs, const T& rhs) const \
603+ { \
604+ using xsimd::FN; \
605+ return FN (lhs, rhs); \
606+ } \
607+ inline std::string name () const { return #FN; } \
588608 }
589609
590- #define DEFINE_FUNCTOR_3OP (FN )\
591- struct FN ##_fn{\
592- template <class T >\
593- inline T operator ()(const T& op0, const T& op1, const T& op2) const { using xsimd::FN; return FN (op0, op1, op2); }\
594- inline std::string name () const { return #FN; }\
610+ #define DEFINE_FUNCTOR_3OP (FN ) \
611+ struct FN ##_fn \
612+ { \
613+ template <class T > \
614+ inline T operator ()(const T& op0, const T& op1, const T& op2) const \
615+ { \
616+ using xsimd::FN; \
617+ return FN (op0, op1, op2); \
618+ } \
619+ inline std::string name () const { return #FN; } \
595620 }
596621
597- DEFINE_OP_FUNCTOR_2OP (+, add);
598- DEFINE_OP_FUNCTOR_2OP (-, sub);
599- DEFINE_OP_FUNCTOR_2OP (*, mul);
600- DEFINE_OP_FUNCTOR_2OP (/, div);
601-
602- DEFINE_FUNCTOR_1OP (exp);
603- DEFINE_FUNCTOR_1OP (exp2);
604- DEFINE_FUNCTOR_1OP (expm1);
605- DEFINE_FUNCTOR_1OP (log);
606- DEFINE_FUNCTOR_1OP (log10);
607- DEFINE_FUNCTOR_1OP (log2);
608- DEFINE_FUNCTOR_1OP (log1p);
609-
610- DEFINE_FUNCTOR_1OP (sin);
611- DEFINE_FUNCTOR_1OP (cos);
612- DEFINE_FUNCTOR_1OP (tan);
613- DEFINE_FUNCTOR_1OP (asin);
614- DEFINE_FUNCTOR_1OP (acos);
615- DEFINE_FUNCTOR_1OP (atan);
616-
617- DEFINE_FUNCTOR_1OP (sinh);
618- DEFINE_FUNCTOR_1OP (cosh);
619- DEFINE_FUNCTOR_1OP (tanh);
620- DEFINE_FUNCTOR_1OP (asinh);
621- DEFINE_FUNCTOR_1OP (acosh);
622- DEFINE_FUNCTOR_1OP (atanh);
623-
624- DEFINE_FUNCTOR_2OP (pow);
625- DEFINE_FUNCTOR_1OP (sqrt);
626- DEFINE_FUNCTOR_1OP (cbrt);
627- DEFINE_FUNCTOR_2OP (hypot);
628-
629- DEFINE_FUNCTOR_1OP (ceil);
630- DEFINE_FUNCTOR_1OP (floor);
631- DEFINE_FUNCTOR_1OP (trunc);
632- DEFINE_FUNCTOR_1OP (round);
633- DEFINE_FUNCTOR_1OP (nearbyint);
634- DEFINE_FUNCTOR_1OP (rint);
635-
636- DEFINE_FUNCTOR_2OP (fmod);
637- DEFINE_FUNCTOR_2OP (remainder);
638- DEFINE_FUNCTOR_2OP (fdim);
639- DEFINE_FUNCTOR_3OP (clip);
622+ DEFINE_OP_FUNCTOR_2OP (+, add);
623+ DEFINE_OP_FUNCTOR_2OP (-, sub);
624+ DEFINE_OP_FUNCTOR_2OP (*, mul);
625+ DEFINE_OP_FUNCTOR_2OP (/, div);
626+
627+ DEFINE_FUNCTOR_1OP (exp);
628+ DEFINE_FUNCTOR_1OP (exp2);
629+ DEFINE_FUNCTOR_1OP (expm1);
630+ DEFINE_FUNCTOR_1OP (log);
631+ DEFINE_FUNCTOR_1OP (log10);
632+ DEFINE_FUNCTOR_1OP (log2);
633+ DEFINE_FUNCTOR_1OP (log1p);
634+
635+ DEFINE_FUNCTOR_1OP (sin);
636+ DEFINE_FUNCTOR_1OP (cos);
637+ DEFINE_FUNCTOR_1OP (tan);
638+ DEFINE_FUNCTOR_1OP (asin);
639+ DEFINE_FUNCTOR_1OP (acos);
640+ DEFINE_FUNCTOR_1OP (atan);
641+
642+ DEFINE_FUNCTOR_1OP (sinh);
643+ DEFINE_FUNCTOR_1OP (cosh);
644+ DEFINE_FUNCTOR_1OP (tanh);
645+ DEFINE_FUNCTOR_1OP (asinh);
646+ DEFINE_FUNCTOR_1OP (acosh);
647+ DEFINE_FUNCTOR_1OP (atanh);
648+
649+ DEFINE_FUNCTOR_2OP (pow);
650+ DEFINE_FUNCTOR_1OP (sqrt);
651+ DEFINE_FUNCTOR_1OP (cbrt);
652+ DEFINE_FUNCTOR_2OP (hypot);
653+
654+ DEFINE_FUNCTOR_1OP (ceil);
655+ DEFINE_FUNCTOR_1OP (floor);
656+ DEFINE_FUNCTOR_1OP (trunc);
657+ DEFINE_FUNCTOR_1OP (round);
658+ DEFINE_FUNCTOR_1OP (nearbyint);
659+ DEFINE_FUNCTOR_1OP (rint);
660+
661+ DEFINE_FUNCTOR_2OP (fmod);
662+ DEFINE_FUNCTOR_2OP (remainder);
663+ DEFINE_FUNCTOR_2OP (fdim);
664+ DEFINE_FUNCTOR_3OP (clip);
640665#if 0
641666DEFINE_FUNCTOR_1OP(isfinite);
642667DEFINE_FUNCTOR_1OP(isinf);
@@ -646,16 +671,16 @@ DEFINE_FUNCTOR_1OP(is_even);
646671#endif
647672
648673#ifdef XSIMD_POLY_BENCHMARKS
649- DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 5 , 1 , 2 , 3 , 4 , 5 );
650- DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 5 , 1 , 2 , 3 , 4 , 5 );
651- DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 10 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 );
652- DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 10 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 );
653- DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 12 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 );
654- DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 12 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 );
655- DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 14 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 );
656- DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 14 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 );
657- DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 16 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 );
658- DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 16 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 );
674+ DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 5 , 1 , 2 , 3 , 4 , 5 );
675+ DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 5 , 1 , 2 , 3 , 4 , 5 );
676+ DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 10 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 );
677+ DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 10 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 );
678+ DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 12 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 );
679+ DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 12 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 );
680+ DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 14 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 );
681+ DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 14 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 );
682+ DEFINE_FUNCTOR_1OP_TEMPLATE (horner, 16 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 );
683+ DEFINE_FUNCTOR_1OP_TEMPLATE (estrin, 16 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 );
659684#endif
660685
661686}
0 commit comments