Games for Windows and the DirectX SDK blog

Technical tips, tricks, and news about game development for Microsoft platforms including desktop, Xbox One, and UWP


Project maintained by walbourn Hosted on GitHub Pages — Theme by mattgraham
Home | Posts by Tag | Posts by Month

Known Issues in XNAMath v2.03

xnamath, directxmath

Originally posted to Chuck Walbourn's Blog on MSDN,

Some users of XNAMath have reported a range problem with the XMVectorFloor and XMVectorCeiling functions with the SSE implementation (i.e. whenever the values are greater than INT_MAX). These fixes will be included in a future release of XNAMath, but since XNAMath is an all-header implementation it can also be fixed directly by affected developers in the xnamathvector.inl file.

Here are the corrected versions:

XMFINLINE XMVECTOR XMVectorFloor
(FXMVECTOR V)
{
#if defined(_XM_NO_INTRINSICS_)
    XMVECTOR vResult = { floorf(V.vector4_f32[0]), floorf(V.vector4_f32[1]), floorf(V.vector4_f32[2]), floorf(V.vector4_f32[3]) };
    return vResult;
#elif defined(_XM_SSE_INTRINSICS_)
    // To handle NAN, INF and numbers greater than 8388608, use masking
    // Get the abs value
    __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0], g_XMAbsMask);
    // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
    vTest = _mm_cmplt_epi32(vTest, g_XMNoFraction);
    // Convert to int and back to float for rounding
    XMVECTOR vResult = _mm_sub_ps(V, g_XMOneHalfMinusEpsilon);
    __m128i vInt = _mm_cvtps_epi32(vResult);
    // Convert back to floats
    vResult = _mm_cvtepi32_ps(vInt);
    // All numbers less than 8388608 will use the round to int
    vResult = _mm_and_ps(vResult, reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
    // All others, use the ORIGINAL value
    vTest = _mm_andnot_si128(vTest, reinterpret_cast<const __m128i *>(&V)[0]);
    vResult = _mm_or_ps(vResult, reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
    return vResult;
#else // _XM_VMX128_INTRINSICS_
#endif // _XM_VMX128_INTRINSICS_
}

//------------------------------------------------------------------------------
XMFINLINE XMVECTOR XMVectorCeiling(FXMVECTOR V)
{
#if defined(_XM_NO_INTRINSICS_)
    XMVECTOR vResult = { ceilf(V.vector4_f32[0]), ceilf(V.vector4_f32[1]), ceilf(V.vector4_f32[2]), ceilf(V.vector4_f32[3]) };
    return vResult;
#elif defined(_XM_SSE_INTRINSICS_)
    // To handle NAN, INF and numbers greater than 8388608, use masking
    // Get the abs value
    __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0], g_XMAbsMask);
    // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
    vTest = _mm_cmplt_epi32(vTest, g_XMNoFraction);
    // Convert to int and back to float for rounding
    XMVECTOR vResult = _mm_add_ps(V, g_XMOneHalfMinusEpsilon);
    __m128i vInt = _mm_cvtps_epi32(vResult);
    // Convert back to floats
    vResult = _mm_cvtepi32_ps(vInt);
    // All numbers less than 8388608 will use the round to int
    vResult = _mm_and_ps(vResult, reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
    // All others, use the ORIGINAL value
    vTest = _mm_andnot_si128(vTest, reinterpret_cast<const __m128i *>(&V)[0]);
    vResult = _mm_or_ps(vResult, reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
    return vResult;
#else // _XM_VMX128_INTRINSICS_
#endif // _XM_VMX128_INTRINSICS_
}

Note that this is the same technique used by XMVectorRound to get around the same range issue in the SSE2 float<->integer conversion mechanism.

Updated: This issue has been fixed for XNAMath v2.04 and DirectXMath.