Skip to content

Commit 0fcaeff

Browse files
committed
Code review feedback
1 parent 47c89f2 commit 0fcaeff

1 file changed

Lines changed: 18 additions & 40 deletions

File tree

Inc/DirectXMathConvert.inl

Lines changed: 18 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -869,24 +869,6 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3(const XMFLOAT4X3* pSource) noexcept
869869
M.r[2] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T2), g_XMMask3));
870870
M.r[3] = vsetq_lane_f32(1.f, T3, 3);
871871
return M;
872-
#elif defined(_XM_SSE4_INTRINSICS_)
873-
XMVECTOR vTemp1 = _mm_loadu_ps(&pSource->m[0][0]);
874-
XMVECTOR vTemp2 = _mm_loadu_ps(&pSource->m[1][1]);
875-
XMVECTOR vTemp4 = _mm_loadu_ps(&pSource->m[2][2]);
876-
XMVECTOR vTemp3 = _mm_shuffle_ps(vTemp2, vTemp4, _MM_SHUFFLE(0, 0, 3, 2));
877-
vTemp2 = _mm_shuffle_ps(vTemp2, vTemp1, _MM_SHUFFLE(3, 3, 1, 0));
878-
vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(1, 1, 0, 2));
879-
XMVECTOR zero = _mm_setzero_ps();
880-
vTemp1 = _mm_blend_ps(zero, vTemp1, 0x7);
881-
vTemp2 = _mm_blend_ps(zero, vTemp2, 0x7);
882-
vTemp3 = _mm_blend_ps(zero, vTemp3, 0x7);
883-
__m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4), 32 / 8);
884-
vTemp4i = _mm_or_si128(vTemp4i, g_XMIdentityR3);
885-
XMMATRIX M(vTemp1,
886-
vTemp2,
887-
vTemp3,
888-
_mm_castsi128_ps(vTemp4i));
889-
return M;
890872
#elif defined(_XM_SSE_INTRINSICS_)
891873
// Use unaligned load instructions to
892874
// load the 12 floats
@@ -903,11 +885,18 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3(const XMFLOAT4X3* pSource) noexcept
903885
// vTemp2 = x2,y2,z2,z2
904886
vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(1, 1, 0, 2));
905887
// vTemp1 = x1,y1,z1,0
906-
vTemp1 = _mm_and_ps(vTemp1, g_XMMask3);
907888
// vTemp2 = x2,y2,z2,0
908-
vTemp2 = _mm_and_ps(vTemp2, g_XMMask3);
909889
// vTemp3 = x3,y3,z3,0
890+
#ifdef _XM_SSE4_INTRINSICS_
891+
XMVECTOR zero = _mm_setzero_ps();
892+
vTemp1 = _mm_blend_ps(zero, vTemp1, 0x7);
893+
vTemp2 = _mm_blend_ps(zero, vTemp2, 0x7);
894+
vTemp3 = _mm_blend_ps(zero, vTemp3, 0x7);
895+
#else
896+
vTemp1 = _mm_and_ps(vTemp1, g_XMMask3);
897+
vTemp2 = _mm_and_ps(vTemp2, g_XMMask3);
910898
vTemp3 = _mm_and_ps(vTemp3, g_XMMask3);
899+
#endif
911900
// vTemp4i = x4,y4,z4,0
912901
__m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4), 32 / 8);
913902
// vTemp4i = x4,y4,z4,1.0f
@@ -971,24 +960,6 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A(const XMFLOAT4X3A* pSource) noexcept
971960
M.r[2] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T2), g_XMMask3));
972961
M.r[3] = vsetq_lane_f32(1.f, T3, 3);
973962
return M;
974-
#elif defined(_XM_SSE4_INTRINSICS_)
975-
XMVECTOR vTemp1 = _mm_load_ps(&pSource->m[0][0]);
976-
XMVECTOR vTemp2 = _mm_load_ps(&pSource->m[1][1]);
977-
XMVECTOR vTemp4 = _mm_load_ps(&pSource->m[2][2]);
978-
XMVECTOR vTemp3 = _mm_shuffle_ps(vTemp2, vTemp4, _MM_SHUFFLE(0, 0, 3, 2));
979-
vTemp2 = _mm_shuffle_ps(vTemp2, vTemp1, _MM_SHUFFLE(3, 3, 1, 0));
980-
vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(1, 1, 0, 2));
981-
XMVECTOR zero = _mm_setzero_ps();
982-
vTemp1 = _mm_blend_ps(zero, vTemp1, 0x7);
983-
vTemp2 = _mm_blend_ps(zero, vTemp2, 0x7);
984-
vTemp3 = _mm_blend_ps(zero, vTemp3, 0x7);
985-
__m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4), 32 / 8);
986-
vTemp4i = _mm_or_si128(vTemp4i, g_XMIdentityR3);
987-
XMMATRIX M(vTemp1,
988-
vTemp2,
989-
vTemp3,
990-
_mm_castsi128_ps(vTemp4i));
991-
return M;
992963
#elif defined(_XM_SSE_INTRINSICS_)
993964
// Use aligned load instructions to
994965
// load the 12 floats
@@ -1005,11 +976,18 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A(const XMFLOAT4X3A* pSource) noexcept
1005976
// vTemp2 = x2,y2,z2,z2
1006977
vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(1, 1, 0, 2));
1007978
// vTemp1 = x1,y1,z1,0
1008-
vTemp1 = _mm_and_ps(vTemp1, g_XMMask3);
1009979
// vTemp2 = x2,y2,z2,0
1010-
vTemp2 = _mm_and_ps(vTemp2, g_XMMask3);
1011980
// vTemp3 = x3,y3,z3,0
981+
#ifdef _XM_SSE4_INTRINSICS_
982+
XMVECTOR zero = _mm_setzero_ps();
983+
vTemp1 = _mm_blend_ps(zero, vTemp1, 0x7);
984+
vTemp2 = _mm_blend_ps(zero, vTemp2, 0x7);
985+
vTemp3 = _mm_blend_ps(zero, vTemp3, 0x7);
986+
#else
987+
vTemp1 = _mm_and_ps(vTemp1, g_XMMask3);
988+
vTemp2 = _mm_and_ps(vTemp2, g_XMMask3);
1012989
vTemp3 = _mm_and_ps(vTemp3, g_XMMask3);
990+
#endif
1013991
// vTemp4i = x4,y4,z4,0
1014992
__m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4), 32 / 8);
1015993
// vTemp4i = x4,y4,z4,1.0f

0 commit comments

Comments
 (0)