Ignore:
Timestamp:
Aug 29, 2011, 7:03:07 PM (8 years ago)
Author:
sam
Message:

core: tune the half precision code so that the best variants are being
used on the PS3 platform.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/half.cpp

    r877 r879  
    7575    /* If zero, or denormal, or exponent underflows too much for a denormal,
    7676     * return signed zero. */
     77#if !defined __CELLOS_LV2__
    7778    if (e < 103)
    7879        return bits;
     80#else
     81    /* PS3 don't know bout my denormals */
     82    if (e < 113)
     83        return bits;
     84#endif
    7985
    8086    /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
     
    8894    }
    8995
     96#if !defined __CELLOS_LV2__
    9097    /* If exponent underflows but not too much, return a denormal */
    9198    if (e < 113)
     
    97104        return bits;
    98105    }
     106#endif
    99107
    100108    bits |= ((e - 112) << 10) | (m >> 1);
     
    173181    if (e == 0)
    174182    {
     183#if !defined __CELLOS_LV2__
    175184        uint32_t v = m | (m >> 1);
    176185        v |= v >> 2;
     
    183192         * added to our underestimated exponent. */
    184193        return s | (((125 - e) << 23) + (m << e));
     194#else
     195        /* PS3 don't know bout my denormals */
     196        return s;
     197#endif
    185198    }
    186199
     
    203216{
    204217    union { float f; uint32_t x; } u = { f };
     218#if !defined __CELLOS_LV2__
    205219    return makebits(float_to_half_nobranch(u.x));
     220#else
     221    /* This code is slightly faster on the PS3, mostly because we
     222     * don't need to care about denormals. */
     223    return makebits(float_to_half_branch(u.x));
     224#endif
    206225}
    207226
    208227/* Constructor from float with better precision. */
    209 half half::makeslow(float f)
     228half half::makeaccurate(float f)
    210229{
    211230    union { float f; uint32_t x; } u = { f };
     
    230249        union { float f; uint32_t x; } u;
    231250        u.f = *src++;
     251#if !defined __CELLOS_LV2__
    232252        *dst++ = makebits(float_to_half_nobranch(u.x));
     253#else
     254        /* This code is slightly faster on the PS3, mostly because we
     255         * don't need to care about denormals. */
     256        *dst++ = makebits(float_to_half_branch(u.x));
     257#endif
    233258    }
    234259
     
    241266    {
    242267        union { float f; uint32_t x; } u;
     268#if !defined __CELLOS_LV2__
     269        /* This code is really too slow on the PS3, even with the denormal
     270         * handling stripped off. */
    243271        u.x = half_to_float_nobranch((*src++).bits);
     272#else
     273        u.x = half_to_float_branch((*src++).bits);
     274#endif
    244275        *dst++ = u.f;
    245276    }
Note: See TracChangeset for help on using the changeset viewer.