Known Issues in XNAMath v2.03
xnamath, directxmathOriginally posted to Chuck Walbourn's Blog on MSDN,
Some users of XNAMath have reported a range problem with the XMVectorFloor and XMVectorCeiling functions with the SSE implementation (i.e. whenever the values are greater than INT_MAX
). These fixes will be included in a future release of XNAMath, but since XNAMath is an all-header implementation it can also be fixed directly by affected developers in the xnamathvector.inl
file.
Here are the corrected versions:
XMFINLINE XMVECTOR XMVectorFloor
(FXMVECTOR V)
{
#if defined(_XM_NO_INTRINSICS_)
XMVECTOR vResult = { floorf(V.vector4_f32[0]), floorf(V.vector4_f32[1]), floorf(V.vector4_f32[2]), floorf(V.vector4_f32[3]) };
return vResult;
#elif defined(_XM_SSE_INTRINSICS_)
// To handle NAN, INF and numbers greater than 8388608, use masking
// Get the abs value
__m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0], g_XMAbsMask);
// Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
vTest = _mm_cmplt_epi32(vTest, g_XMNoFraction);
// Convert to int and back to float for rounding
XMVECTOR vResult = _mm_sub_ps(V, g_XMOneHalfMinusEpsilon);
__m128i vInt = _mm_cvtps_epi32(vResult);
// Convert back to floats
vResult = _mm_cvtepi32_ps(vInt);
// All numbers less than 8388608 will use the round to int
vResult = _mm_and_ps(vResult, reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
// All others, use the ORIGINAL value
vTest = _mm_andnot_si128(vTest, reinterpret_cast<const __m128i *>(&V)[0]);
vResult = _mm_or_ps(vResult, reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
return vResult;
#else // _XM_VMX128_INTRINSICS_
#endif // _XM_VMX128_INTRINSICS_
}
//------------------------------------------------------------------------------
XMFINLINE XMVECTOR XMVectorCeiling(FXMVECTOR V)
{
#if defined(_XM_NO_INTRINSICS_)
XMVECTOR vResult = { ceilf(V.vector4_f32[0]), ceilf(V.vector4_f32[1]), ceilf(V.vector4_f32[2]), ceilf(V.vector4_f32[3]) };
return vResult;
#elif defined(_XM_SSE_INTRINSICS_)
// To handle NAN, INF and numbers greater than 8388608, use masking
// Get the abs value
__m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0], g_XMAbsMask);
// Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
vTest = _mm_cmplt_epi32(vTest, g_XMNoFraction);
// Convert to int and back to float for rounding
XMVECTOR vResult = _mm_add_ps(V, g_XMOneHalfMinusEpsilon);
__m128i vInt = _mm_cvtps_epi32(vResult);
// Convert back to floats
vResult = _mm_cvtepi32_ps(vInt);
// All numbers less than 8388608 will use the round to int
vResult = _mm_and_ps(vResult, reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
// All others, use the ORIGINAL value
vTest = _mm_andnot_si128(vTest, reinterpret_cast<const __m128i *>(&V)[0]);
vResult = _mm_or_ps(vResult, reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
return vResult;
#else // _XM_VMX128_INTRINSICS_
#endif // _XM_VMX128_INTRINSICS_
}
Note that this is the same technique used by XMVectorRound to get around the same range issue in the SSE2 float<->integer conversion mechanism.
Updated: This issue has been fixed for XNAMath v2.04 and DirectXMath.