@@ -1239,18 +1239,23 @@ inline XMVECTOR XM_CALLCONV XMVectorSwizzle
12391239 unsigned int elem[4] = { E0, E1, E2, E3 };
12401240 __m128i vControl = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&elem[0]));
12411241 return _mm_permutevar_ps(V, vControl);
1242+ #else
1243+ #if defined(__GNUC__) && !defined(__clang__)
1244+ // workaround some GCC optimization behavior that breaks this function
1245+ XMVECTORU32 T;
1246+ T.v = V;
1247+ auto aPtr = reinterpret_cast<const uint32_t*>(&T);
12421248#else
12431249 auto aPtr = reinterpret_cast<const uint32_t*>(&V);
1250+ #endif
12441251
1245- XMVECTOR Result;
1246- auto pWork = reinterpret_cast<uint32_t*>(&Result);
1247-
1248- pWork[0] = aPtr[E0];
1249- pWork[1] = aPtr[E1];
1250- pWork[2] = aPtr[E2];
1251- pWork[3] = aPtr[E3];
1252+ XMVECTORU32 vResult;
1253+ vResult.u[0] = aPtr[E0];
1254+ vResult.u[1] = aPtr[E1];
1255+ vResult.u[2] = aPtr[E2];
1256+ vResult.u[3] = aPtr[E3];
12521257
1253- return Result ;
1258+ return vResult.v ;
12541259#endif
12551260}
12561261
@@ -1313,29 +1318,38 @@ inline XMVECTOR XM_CALLCONV XMVectorPermute
13131318#else
13141319
13151320 const uint32_t* aPtr[2];
1321+
1322+ #if defined(__GNUC__) && !defined(__clang__)
1323+ // workaround some GCC optimization behavior that breaks this function
1324+ XMVECTORU32 T1;
1325+ T1.v = V1;
1326+ XMVECTORU32 T2;
1327+ T2.v = V2;
1328+ aPtr[0] = reinterpret_cast<const uint32_t*>(&T1);
1329+ aPtr[1] = reinterpret_cast<const uint32_t*>(&T2);
1330+ #else
13161331 aPtr[0] = reinterpret_cast<const uint32_t*>(&V1);
13171332 aPtr[1] = reinterpret_cast<const uint32_t*>(&V2);
1333+ #endif
13181334
1319- XMVECTOR Result;
1320- auto pWork = reinterpret_cast<uint32_t*>(&Result);
1321-
1335+ XMVECTORU32 vResult;
13221336 const uint32_t i0 = PermuteX & 3;
13231337 const uint32_t vi0 = PermuteX >> 2;
1324- pWork [0] = aPtr[vi0][i0];
1338+ vResult.u [0] = aPtr[vi0][i0];
13251339
13261340 const uint32_t i1 = PermuteY & 3;
13271341 const uint32_t vi1 = PermuteY >> 2;
1328- pWork [1] = aPtr[vi1][i1];
1342+ vResult.u [1] = aPtr[vi1][i1];
13291343
13301344 const uint32_t i2 = PermuteZ & 3;
13311345 const uint32_t vi2 = PermuteZ >> 2;
1332- pWork [2] = aPtr[vi2][i2];
1346+ vResult.u [2] = aPtr[vi2][i2];
13331347
13341348 const uint32_t i3 = PermuteW & 3;
13351349 const uint32_t vi3 = PermuteW >> 2;
1336- pWork [3] = aPtr[vi3][i3];
1350+ vResult.u [3] = aPtr[vi3][i3];
13371351
1338- return Result ;
1352+ return vResult.v ;
13391353#endif
13401354}
13411355
0 commit comments