@@ -619,8 +619,9 @@ class vbool4 {
619619template <int i0, int i1, int i2, int i3>
620620OIIO_FORCEINLINE vbool4 shuffle (const vbool4& a);
621621
622- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
623- template <int i> OIIO_FORCEINLINE vbool4 shuffle (const vbool4& a);
622+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
623+ // / value a[i].
624+ template <int i> OIIO_FORCEINLINE vbool4 broadcast_element (const vbool4& a);
624625
625626// / Helper: as rapid as possible extraction of one component, when the
626627// / index is fixed.
@@ -770,8 +771,9 @@ class vbool8 {
770771template <int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
771772OIIO_FORCEINLINE vbool8 shuffle (const vbool8& a);
772773
773- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
774- template <int i> OIIO_FORCEINLINE vbool8 shuffle (const vbool8& a);
774+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
775+ // / value a[i].
776+ template <int i> OIIO_FORCEINLINE vbool8 broadcast_element (const vbool8& a);
775777
776778// / Helper: as rapid as possible extraction of one component, when the
777779// / index is fixed.
@@ -1163,8 +1165,9 @@ vint4 srl (const vint4& val, const unsigned int bits);
11631165template <int i0, int i1, int i2, int i3>
11641166OIIO_FORCEINLINE vint4 shuffle (const vint4& a);
11651167
1166- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
1167- template <int i> OIIO_FORCEINLINE vint4 shuffle (const vint4& a);
1168+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
1169+ // / value a[i].
1170+ template <int i> OIIO_FORCEINLINE vint4 broadcast_element (const vint4& a);
11681171
11691172// / Helper: as rapid as possible extraction of one component, when the
11701173// / index is fixed.
@@ -1463,8 +1466,9 @@ vint8 srl (const vint8& val, const unsigned int bits);
14631466template <int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
14641467OIIO_FORCEINLINE vint8 shuffle (const vint8& a);
14651468
1466- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
1467- template <int i> OIIO_FORCEINLINE vint8 shuffle (const vint8& a);
1469+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
1470+ // / value a[i].
1471+ template <int i> OIIO_FORCEINLINE vint8 broadcast_element (const vint8& a);
14681472
14691473// / Helper: as rapid as possible extraction of one component, when the
14701474// / index is fixed.
@@ -1773,8 +1777,9 @@ template<int i> vint16 shuffle4 (const vint16& a);
17731777template <int i0, int i1, int i2, int i3>
17741778vint16 shuffle (const vint16& a);
17751779
1776- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
1777- template <int i> vint16 shuffle (const vint16& a);
1780+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
1781+ // / value a[i].
1782+ template <int i> vint16 broadcast_element (const vint16& a);
17781783
17791784// / Helper: as rapid as possible extraction of one component, when the
17801785// / index is fixed.
@@ -2098,8 +2103,9 @@ class vfloat4 {
20982103template <int i0, int i1, int i2, int i3>
20992104OIIO_FORCEINLINE vfloat4 shuffle (const vfloat4& a);
21002105
2101- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
2102- template <int i> OIIO_FORCEINLINE vfloat4 shuffle (const vfloat4& a);
2106+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
2107+ // / value a[i].
2108+ template <int i> OIIO_FORCEINLINE vfloat4 broadcast_element (const vfloat4& a);
21032109
21042110// / Return { a[i0], a[i1], b[i2], b[i3] }, where i0..i3 are the extracted
21052111// / 2-bit indices packed into the template parameter i (going from the low
@@ -2721,8 +2727,8 @@ class vfloat8 {
27212727template <int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
27222728OIIO_FORCEINLINE vfloat8 shuffle (const vfloat8& a);
27232729
2724- // / shuffle <i>(a) is the same as shuffle<i,i,i,i,...>(a)
2725- template <int i> OIIO_FORCEINLINE vfloat8 shuffle (const vfloat8& a);
2730+ // / broadcast_element <i>(a) is the same as shuffle<i,i,i,i,...>(a)
2731+ template <int i> OIIO_FORCEINLINE vfloat8 broadcast_element (const vfloat8& a);
27262732
27272733// / Helper: as rapid as possible extraction of one component, when the
27282734// / index is fixed.
@@ -3051,8 +3057,9 @@ template<int i> OIIO_FORCEINLINE vfloat16 shuffle4 (const vfloat16& a);
30513057template <int i0, int i1, int i2, int i3>
30523058OIIO_FORCEINLINE vfloat16 shuffle (const vfloat16& a);
30533059
3054- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
3055- template <int i> vfloat16 shuffle (const vfloat16& a);
3060+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
3061+ // / value a[i].
3062+ template <int i> vfloat16 broadcast_element (const vfloat16& a);
30563063
30573064// / Helper: as rapid as possible extraction of one component, when the
30583065// / index is fixed.
@@ -3473,11 +3480,17 @@ OIIO_FORCEINLINE vbool4 shuffle (const vbool4& a) {
34733480#endif
34743481}
34753482
3476- // / shuffle<i>(a) is the same as shuffle<i,i,i,i>(a)
3477- template <int i> OIIO_FORCEINLINE vbool4 shuffle (const vbool4& a) {
3483+ // / broadcast_element<i>(a) returns a simd variable in which all lanes have
3484+ // / value a[i].
3485+ template <int i> OIIO_FORCEINLINE vbool4 broadcast_element (const vbool4& a) {
34783486 return shuffle<i,i,i,i>(a);
34793487}
34803488
3489+ // DEPRECATED(3.1): old name; use broadcast_element instead
3490+ template <int i> OIIO_FORCEINLINE vbool4 shuffle (const vbool4& a) {
3491+ return broadcast_element<i>(a);
3492+ }
3493+
34813494
34823495// / Helper: as rapid as possible extraction of one component, when the
34833496// / index is fixed.
@@ -3801,10 +3814,15 @@ OIIO_FORCEINLINE vbool8 shuffle (const vbool8& a) {
38013814#endif
38023815}
38033816
3804- template <int i> OIIO_FORCEINLINE vbool8 shuffle (const vbool8& a) {
3817+ template <int i> OIIO_FORCEINLINE vbool8 broadcast_element (const vbool8& a) {
38053818 return shuffle<i,i,i,i,i,i,i,i>(a);
38063819}
38073820
3821+ // DEPRECATED(3.1): old name; use broadcast_element instead
3822+ template <int i> OIIO_FORCEINLINE vbool8 shuffle (const vbool8& a) {
3823+ return broadcast_element<i>(a);
3824+ }
3825+
38083826
38093827template <int i>
38103828OIIO_FORCEINLINE bool extract (const vbool8& a) {
@@ -4744,7 +4762,14 @@ OIIO_FORCEINLINE vint4 shuffle (const vint4& a) {
47444762#endif
47454763}
47464764
4747- template <int i> OIIO_FORCEINLINE vint4 shuffle (const vint4& a) { return shuffle<i,i,i,i>(a); }
4765+ template <int i> OIIO_FORCEINLINE vint4 broadcast_element (const vint4& a) {
4766+ return shuffle<i,i,i,i>(a);
4767+ }
4768+
4769+ // DEPRECATED(3.1): old name; use broadcast_element instead
4770+ template <int i> OIIO_FORCEINLINE vint4 shuffle (const vint4& a) {
4771+ return broadcast_element<i>(a);
4772+ }
47484773
47494774
47504775template <int i>
@@ -5584,10 +5609,15 @@ OIIO_FORCEINLINE vint8 shuffle (const vint8& a) {
55845609#endif
55855610}
55865611
5587- template <int i> OIIO_FORCEINLINE vint8 shuffle (const vint8& a) {
5612+ template <int i> OIIO_FORCEINLINE vint8 broadcast_element (const vint8& a) {
55885613 return shuffle<i,i,i,i,i,i,i,i>(a);
55895614}
55905615
5616+ // DEPRECATED(3.1): old name; use broadcast_element instead
5617+ template <int i> OIIO_FORCEINLINE vint8 shuffle (const vint8& a) {
5618+ return broadcast_element<i>(a);
5619+ }
5620+
55915621
55925622template <int i>
55935623OIIO_FORCEINLINE int extract (const vint8& v) {
@@ -6395,8 +6425,15 @@ vint16 shuffle (const vint16& a) {
63956425#endif
63966426}
63976427
6398- template <int i> vint16 shuffle (const vint16& a) {
6399- return shuffle<i,i,i,i> (a);
6428+ template <int i> vint16 broadcast_element (const vint16& a) {
6429+ return a[i];
6430+ }
6431+
6432+ // DEPRECATED(3.1): old name and nonstandard use
6433+ template <int i>
6434+ OIIO_DEPRECATED (" Use broadcast_element (3.1)" )
6435+ vint16 shuffle (const vint16& a) {
6436+ return broadcast_element<i> (a);
64006437}
64016438
64026439
@@ -7253,19 +7290,26 @@ OIIO_FORCEINLINE vfloat4 shuffle (const vfloat4& a) {
72537290#endif
72547291}
72557292
7256- template <int i> OIIO_FORCEINLINE vfloat4 shuffle (const vfloat4& a) { return shuffle<i,i,i,i>(a); }
7293+ template <int i> OIIO_FORCEINLINE vfloat4 broadcast_element (const vfloat4& a) {
7294+ return shuffle<i,i,i,i>(a);
7295+ }
7296+
7297+ // DEPRECATED(3.1): old name; use broadcast_element instead
7298+ template <int i> OIIO_FORCEINLINE vfloat4 shuffle (const vfloat4& a) {
7299+ return broadcast_element<i>(a);
7300+ }
72577301
72587302#if OIIO_SIMD_NEON
7259- template <> OIIO_FORCEINLINE vfloat4 shuffle <0 > (const vfloat4& a) {
7303+ template <> OIIO_FORCEINLINE vfloat4 broadcast_element <0 > (const vfloat4& a) {
72607304 float32x2_t t = vget_low_f32 (a.simd ()); return vdupq_lane_f32 (t,0 );
72617305}
7262- template <> OIIO_FORCEINLINE vfloat4 shuffle <1 > (const vfloat4& a) {
7306+ template <> OIIO_FORCEINLINE vfloat4 broadcast_element <1 > (const vfloat4& a) {
72637307 float32x2_t t = vget_low_f32 (a.simd ()); return vdupq_lane_f32 (t,1 );
72647308}
7265- template <> OIIO_FORCEINLINE vfloat4 shuffle <2 > (const vfloat4& a) {
7309+ template <> OIIO_FORCEINLINE vfloat4 broadcast_element <2 > (const vfloat4& a) {
72667310 float32x2_t t = vget_high_f32 (a.simd ()); return vdupq_lane_f32 (t,0 );
72677311}
7268- template <> OIIO_FORCEINLINE vfloat4 shuffle <3 > (const vfloat4& a) {
7312+ template <> OIIO_FORCEINLINE vfloat4 broadcast_element <3 > (const vfloat4& a) {
72697313 float32x2_t t = vget_high_f32 (a.simd ()); return vdupq_lane_f32 (t,1 );
72707314}
72717315#endif
@@ -8265,9 +8309,9 @@ OIIO_FORCEINLINE matrix44 matrix44::transposed () const {
82658309
82668310OIIO_FORCEINLINE vfloat3 matrix44::transformp (const vfloat3 &V) const {
82678311#if OIIO_SIMD_SSE
8268- vfloat4 R = shuffle <0 >(V) * m_row[0 ] + shuffle <1 >(V) * m_row[1 ] +
8269- shuffle <2 >(V) * m_row[2 ] + m_row[3 ];
8270- R = R / shuffle <3 >(R);
8312+ vfloat4 R = broadcast_element <0 >(V) * m_row[0 ] + broadcast_element <1 >(V) * m_row[1 ] +
8313+ broadcast_element <2 >(V) * m_row[2 ] + m_row[3 ];
8314+ R = R / broadcast_element <3 >(R);
82718315 return vfloat3 (R.xyz0 ());
82728316#else
82738317 value_t a, b, c, w;
@@ -8281,8 +8325,8 @@ OIIO_FORCEINLINE vfloat3 matrix44::transformp (const vfloat3 &V) const {
82818325
82828326OIIO_FORCEINLINE vfloat3 matrix44::transformv (const vfloat3 &V) const {
82838327#if OIIO_SIMD_SSE
8284- vfloat4 R = shuffle <0 >(V) * m_row[0 ] + shuffle <1 >(V) * m_row[1 ] +
8285- shuffle <2 >(V) * m_row[2 ];
8328+ vfloat4 R = broadcast_element <0 >(V) * m_row[0 ] + broadcast_element <1 >(V) * m_row[1 ] +
8329+ broadcast_element <2 >(V) * m_row[2 ];
82868330 return vfloat3 (R.xyz0 ());
82878331#else
82888332 value_t a, b, c;
@@ -8296,8 +8340,8 @@ OIIO_FORCEINLINE vfloat3 matrix44::transformv (const vfloat3 &V) const {
82968340OIIO_FORCEINLINE vfloat3 matrix44::transformvT (const vfloat3 &V) const {
82978341#if OIIO_SIMD_SSE
82988342 matrix44 T = transposed ();
8299- vfloat4 R = shuffle <0 >(V) * T[0 ] + shuffle <1 >(V) * T[1 ] +
8300- shuffle <2 >(V) * T[2 ];
8343+ vfloat4 R = broadcast_element <0 >(V) * T[0 ] + broadcast_element <1 >(V) * T[1 ] +
8344+ broadcast_element <2 >(V) * T[2 ];
83018345 return vfloat3 (R.xyz0 ());
83028346#else
83038347 value_t a, b, c;
@@ -8311,8 +8355,8 @@ OIIO_FORCEINLINE vfloat3 matrix44::transformvT (const vfloat3 &V) const {
83118355OIIO_FORCEINLINE vfloat4 operator * (const vfloat4 &V, const matrix44& M)
83128356{
83138357#if OIIO_SIMD_SSE
8314- return shuffle <0 >(V) * M[0 ] + shuffle <1 >(V) * M[1 ] +
8315- shuffle <2 >(V) * M[2 ] + shuffle <3 >(V) * M[3 ];
8358+ return broadcast_element <0 >(V) * M[0 ] + broadcast_element <1 >(V) * M[1 ] +
8359+ broadcast_element <2 >(V) * M[2 ] + broadcast_element <3 >(V) * M[3 ];
83168360#else
83178361 float a, b, c, w;
83188362 a = V[0 ] * M[0 ][0 ] + V[1 ] * M[1 ][0 ] + V[2 ] * M[2 ][0 ] + V[3 ] * M[3 ][0 ];
@@ -9034,14 +9078,19 @@ OIIO_FORCEINLINE vfloat8 shuffle (const vfloat8& a) {
90349078#endif
90359079}
90369080
9037- template <int i> OIIO_FORCEINLINE vfloat8 shuffle (const vfloat8& a) {
9081+ template <int i> OIIO_FORCEINLINE vfloat8 broadcast_element (const vfloat8& a) {
90389082#if OIIO_SIMD_AVX >= 2
90399083 return _mm256_permutevar8x32_ps (a, vint8 (i));
90409084#else
9041- return shuffle<i,i,i,i,i,i,i,i>(a) ;
9085+ return a[i] ;
90429086#endif
90439087}
90449088
9089+ // DEPRECATED(3.1): old name; use broadcast_element instead
9090+ template <int i> OIIO_FORCEINLINE vfloat8 shuffle (const vfloat8& a) {
9091+ return broadcast_element<i>(a);
9092+ }
9093+
90459094
90469095template <int i>
90479096OIIO_FORCEINLINE float extract (const vfloat8& v) {
@@ -9104,9 +9153,9 @@ OIIO_FORCEINLINE vfloat8 vreduce_add (const vfloat8& v) {
91049153 vfloat8 ab_cd_0_0_ef_gh_0_0 = _mm256_hadd_ps (v.simd (), _mm256_setzero_ps ());
91059154 vfloat8 abcd_0_0_0_efgh_0_0_0 = _mm256_hadd_ps (ab_cd_0_0_ef_gh_0_0, _mm256_setzero_ps ());
91069155 // get efgh in the 0-idx slot
9107- vfloat8 efgh = shuffle <4 >(abcd_0_0_0_efgh_0_0_0);
9156+ vfloat8 efgh = broadcast_element <4 >(abcd_0_0_0_efgh_0_0_0);
91089157 vfloat8 final_sum = abcd_0_0_0_efgh_0_0_0 + efgh;
9109- return shuffle <0 >(final_sum);
9158+ return broadcast_element <0 >(final_sum);
91109159#else
91119160 vfloat4 hadd4 = vreduce_add (v.lo ()) + vreduce_add (v.hi ());
91129161 return vfloat8 (hadd4, hadd4);
@@ -9913,7 +9962,14 @@ vfloat16 shuffle (const vfloat16& a) {
99139962#endif
99149963}
99159964
9916- template <int i> vfloat16 shuffle (const vfloat16& a) {
9965+ template <int i> vfloat16 broadcast_element (const vfloat16& a) {
9966+ return a[i];
9967+ }
9968+
9969+ // DEPRECATED(3.1): old name and nonstandard use
9970+ template <int i>
9971+ OIIO_DEPRECATED (" Use broadcast_element (3.1)" )
9972+ vfloat16 shuffle (const vfloat16& a) {
99179973 return shuffle<i,i,i,i> (a);
99189974}
99199975
0 commit comments