Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions Inc/DirectXMathConvert.inl
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,10 @@ inline XMVECTOR XM_CALLCONV XMLoadFloat3A(const XMFLOAT3A* pSource) noexcept
float32x4_t V = vld1q_f32(reinterpret_cast<const float*>(pSource));
#endif
return vsetq_lane_f32(0, V, 3);
#elif defined(_XM_SSE4_INTRINSICS_)
// Reads an extra float which is zero'd
__m128 V = _mm_load_ps(&pSource->x);
return _mm_blend_ps(_mm_setzero_ps(), V, 0x7);
#elif defined(_XM_SSE_INTRINSICS_)
// Reads an extra float which is zero'd
__m128 V = _mm_load_ps(&pSource->x);
Expand Down Expand Up @@ -881,11 +885,18 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3(const XMFLOAT4X3* pSource) noexcept
// vTemp2 = x2,y2,z2,z2
vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(1, 1, 0, 2));
// vTemp1 = x1,y1,z1,0
vTemp1 = _mm_and_ps(vTemp1, g_XMMask3);
// vTemp2 = x2,y2,z2,0
vTemp2 = _mm_and_ps(vTemp2, g_XMMask3);
// vTemp3 = x3,y3,z3,0
#ifdef _XM_SSE4_INTRINSICS_
XMVECTOR zero = _mm_setzero_ps();
vTemp1 = _mm_blend_ps(zero, vTemp1, 0x7);
vTemp2 = _mm_blend_ps(zero, vTemp2, 0x7);
vTemp3 = _mm_blend_ps(zero, vTemp3, 0x7);
#else
vTemp1 = _mm_and_ps(vTemp1, g_XMMask3);
vTemp2 = _mm_and_ps(vTemp2, g_XMMask3);
vTemp3 = _mm_and_ps(vTemp3, g_XMMask3);
#endif
// vTemp4i = x4,y4,z4,0
__m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4), 32 / 8);
// vTemp4i = x4,y4,z4,1.0f
Expand Down Expand Up @@ -965,11 +976,18 @@ inline XMMATRIX XM_CALLCONV XMLoadFloat4x3A(const XMFLOAT4X3A* pSource) noexcept
// vTemp2 = x2,y2,z2,z2
vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(1, 1, 0, 2));
// vTemp1 = x1,y1,z1,0
vTemp1 = _mm_and_ps(vTemp1, g_XMMask3);
// vTemp2 = x2,y2,z2,0
vTemp2 = _mm_and_ps(vTemp2, g_XMMask3);
// vTemp3 = x3,y3,z3,0
#ifdef _XM_SSE4_INTRINSICS_
XMVECTOR zero = _mm_setzero_ps();
vTemp1 = _mm_blend_ps(zero, vTemp1, 0x7);
vTemp2 = _mm_blend_ps(zero, vTemp2, 0x7);
vTemp3 = _mm_blend_ps(zero, vTemp3, 0x7);
#else
vTemp1 = _mm_and_ps(vTemp1, g_XMMask3);
vTemp2 = _mm_and_ps(vTemp2, g_XMMask3);
vTemp3 = _mm_and_ps(vTemp3, g_XMMask3);
#endif
// vTemp4i = x4,y4,z4,0
__m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4), 32 / 8);
// vTemp4i = x4,y4,z4,1.0f
Expand Down
Loading