Skip to content

Commit 6504050

Browse files
authored
Fix problems with XMVectorSwizzle/XMVectorPermute for GCC (#306)
1 parent 9c1dc2a commit 6504050

1 file changed

Lines changed: 30 additions & 16 deletions

File tree

Inc/DirectXMathVector.inl

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,18 +1239,23 @@ inline XMVECTOR XM_CALLCONV XMVectorSwizzle
12391239
unsigned int elem[4] = { E0, E1, E2, E3 };
12401240
__m128i vControl = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&elem[0]));
12411241
return _mm_permutevar_ps(V, vControl);
1242+
#else
1243+
#if defined(__GNUC__) && !defined(__clang__)
1244+
// workaround some GCC optimization behavior that breaks this function
1245+
XMVECTORU32 T;
1246+
T.v = V;
1247+
auto aPtr = reinterpret_cast<const uint32_t*>(&T);
12421248
#else
12431249
auto aPtr = reinterpret_cast<const uint32_t*>(&V);
1250+
#endif
12441251

1245-
XMVECTOR Result;
1246-
auto pWork = reinterpret_cast<uint32_t*>(&Result);
1247-
1248-
pWork[0] = aPtr[E0];
1249-
pWork[1] = aPtr[E1];
1250-
pWork[2] = aPtr[E2];
1251-
pWork[3] = aPtr[E3];
1252+
XMVECTORU32 vResult;
1253+
vResult.u[0] = aPtr[E0];
1254+
vResult.u[1] = aPtr[E1];
1255+
vResult.u[2] = aPtr[E2];
1256+
vResult.u[3] = aPtr[E3];
12521257

1253-
return Result;
1258+
return vResult.v;
12541259
#endif
12551260
}
12561261

@@ -1313,29 +1318,38 @@ inline XMVECTOR XM_CALLCONV XMVectorPermute
13131318
#else
13141319

13151320
const uint32_t* aPtr[2];
1321+
1322+
#if defined(__GNUC__) && !defined(__clang__)
1323+
// workaround some GCC optimization behavior that breaks this function
1324+
XMVECTORU32 T1;
1325+
T1.v = V1;
1326+
XMVECTORU32 T2;
1327+
T2.v = V2;
1328+
aPtr[0] = reinterpret_cast<const uint32_t*>(&T1);
1329+
aPtr[1] = reinterpret_cast<const uint32_t*>(&T2);
1330+
#else
13161331
aPtr[0] = reinterpret_cast<const uint32_t*>(&V1);
13171332
aPtr[1] = reinterpret_cast<const uint32_t*>(&V2);
1333+
#endif
13181334

1319-
XMVECTOR Result;
1320-
auto pWork = reinterpret_cast<uint32_t*>(&Result);
1321-
1335+
XMVECTORU32 vResult;
13221336
const uint32_t i0 = PermuteX & 3;
13231337
const uint32_t vi0 = PermuteX >> 2;
1324-
pWork[0] = aPtr[vi0][i0];
1338+
vResult.u[0] = aPtr[vi0][i0];
13251339

13261340
const uint32_t i1 = PermuteY & 3;
13271341
const uint32_t vi1 = PermuteY >> 2;
1328-
pWork[1] = aPtr[vi1][i1];
1342+
vResult.u[1] = aPtr[vi1][i1];
13291343

13301344
const uint32_t i2 = PermuteZ & 3;
13311345
const uint32_t vi2 = PermuteZ >> 2;
1332-
pWork[2] = aPtr[vi2][i2];
1346+
vResult.u[2] = aPtr[vi2][i2];
13331347

13341348
const uint32_t i3 = PermuteW & 3;
13351349
const uint32_t vi3 = PermuteW >> 2;
1336-
pWork[3] = aPtr[vi3][i3];
1350+
vResult.u[3] = aPtr[vi3][i3];
13371351

1338-
return Result;
1352+
return vResult.v;
13391353
#endif
13401354
}
13411355

0 commit comments

Comments
 (0)