Known Issues in XNAMath v2.03

xnamath, directxmath

Originally posted to Chuck Walbourn's Blog on MSDN, Jan 20, 2011

Some users of XNAMath have reported a range problem with the XMVectorFloor and XMVectorCeiling functions with the SSE implementation (i.e. whenever the values are greater than INT_MAX). These fixes will be included in a future release of XNAMath, but since XNAMath is an all-header implementation it can also be fixed directly by affected developers in the xnamathvector.inl file.

Here are the corrected versions:

XMFINLINE XMVECTOR XMVectorFloor
(FXMVECTOR V)
{
#if defined(_XM_NO_INTRINSICS_)
    XMVECTOR vResult = { floorf(V.vector4_f32[0]), floorf(V.vector4_f32[1]), floorf(V.vector4_f32[2]), floorf(V.vector4_f32[3]) };
    return vResult;
#elif defined(_XM_SSE_INTRINSICS_)
    // To handle NAN, INF and numbers greater than 8388608, use masking
    // Get the abs value
    __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0], g_XMAbsMask);
    // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
    vTest = _mm_cmplt_epi32(vTest, g_XMNoFraction);
    // Convert to int and back to float for rounding
    XMVECTOR vResult = _mm_sub_ps(V, g_XMOneHalfMinusEpsilon);
    __m128i vInt = _mm_cvtps_epi32(vResult);
    // Convert back to floats
    vResult = _mm_cvtepi32_ps(vInt);
    // All numbers less than 8388608 will use the round to int
    vResult = _mm_and_ps(vResult, reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
    // All others, use the ORIGINAL value
    vTest = _mm_andnot_si128(vTest, reinterpret_cast<const __m128i *>(&V)[0]);
    vResult = _mm_or_ps(vResult, reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
    return vResult;
#else // _XM_VMX128_INTRINSICS_
#endif // _XM_VMX128_INTRINSICS_
}

//------------------------------------------------------------------------------
XMFINLINE XMVECTOR XMVectorCeiling(FXMVECTOR V)
{
#if defined(_XM_NO_INTRINSICS_)
    XMVECTOR vResult = { ceilf(V.vector4_f32[0]), ceilf(V.vector4_f32[1]), ceilf(V.vector4_f32[2]), ceilf(V.vector4_f32[3]) };
    return vResult;
#elif defined(_XM_SSE_INTRINSICS_)
    // To handle NAN, INF and numbers greater than 8388608, use masking
    // Get the abs value
    __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0], g_XMAbsMask);
    // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
    vTest = _mm_cmplt_epi32(vTest, g_XMNoFraction);
    // Convert to int and back to float for rounding
    XMVECTOR vResult = _mm_add_ps(V, g_XMOneHalfMinusEpsilon);
    __m128i vInt = _mm_cvtps_epi32(vResult);
    // Convert back to floats
    vResult = _mm_cvtepi32_ps(vInt);
    // All numbers less than 8388608 will use the round to int
    vResult = _mm_and_ps(vResult, reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
    // All others, use the ORIGINAL value
    vTest = _mm_andnot_si128(vTest, reinterpret_cast<const __m128i *>(&V)[0]);
    vResult = _mm_or_ps(vResult, reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
    return vResult;
#else // _XM_VMX128_INTRINSICS_
#endif // _XM_VMX128_INTRINSICS_
}

Note that this is the same technique used by XMVectorRound to get around the same range issue in the SSE2 float<->integer conversion mechanism.

Updated: This issue has been fixed for XNAMath v2.04 and DirectXMath.

Games for Windows and the DirectX SDK blog