Changeset 869


Ignore:
Timestamp:
Aug 28, 2011, 11:07:06 PM (12 years ago)
Author:
sam
Message:

core: add a half to float conversion routine and utility floating point
functions such as isnan(), isinf() etc.

Location:
trunk/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/half.cpp

    r867 r869  
    1717using namespace std;
    1818
     19namespace lol
     20{
     21
     22/* Lookup table-based algorithm from “Fast Half Float Conversions”
     23 * by Jeroen van der Zijp, November 2008. No rounding is performed,
     24 * and some NaN values may be incorrectly converted to Inf. */
     25half half::makefast(float f)
     26{
    1927#define S4(x)    S1(4*(x)),  S1(4*(x)+1),  S1(4*(x)+2),  S1(4*(x)+3)
    2028#define S16(x)   S4(4*(x)),  S4(4*(x)+1),  S4(4*(x)+2),  S4(4*(x)+3)
     
    2230#define S256(x) S64(4*(x)), S64(4*(x)+1), S64(4*(x)+2), S64(4*(x)+3)
    2331
    24 namespace lol
    25 {
    26 
    27 half half::makefast(float f)
    28 {
    29     /* Lookup table-based algorithm from “Fast Half Float Conversions”
    30      * by Jeroen van der Zijp, November 2008. No rounding is performed. */
    3132    static uint16_t const basetable[512] =
    3233    {
     
    5960}
    6061
     62/* This method is faster than the OpenEXR implementation (very often
     63 * used, eg. in Ogre), with the additional benefit of rounding, inspired
     64 * by James Tursa’s half-precision code. */
    6165half half::makeslow(float f)
    6266{
     
    6872
    6973    /* If zero, or denormal, or exponent underflows too much for a denormal,
    70      * return signed zero */
     74     * return signed zero. */
    7175    if (e < 103)
    7276        return makebits(bits);
    7377
    74     /* If NaN, Inf or exponent overflow, return NaN or Inf */
     78    /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
    7579    if (e > 142)
    7680    {
    7781        bits |= 0x7c00u;
     82        /* If exponent was 0xff and one mantissa bit was set, it means NaN,
     83         * not Inf, so make sure we set one mantissa bit too. */
    7884        bits |= e == 255 && (u.x & 0x007fffffu);
    7985        return makebits(bits);
     
    8490    {
    8591        m |= 0x0800u;
     92        /* Extra rounding may overflow and set mantissa to 0 and exponent
     93         * to 1, which is OK. */
    8694        bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
    8795        return makebits(bits);
     
    8997
    9098    bits |= ((e - 112) << 10) | (m >> 1);
    91     bits += m & 1; /* Overflows here are expected and handled */
     99    /* Extra rounding. An overflow will set mantissa to 0 and increment
     100     * the exponent, which is OK. */
     101    bits += m & 1;
    92102    return makebits(bits);
     103}
     104
     105half::operator float() const
     106{
     107    union { float f; uint32_t x; } u;
     108
     109    uint32_t s = (m_bits & 0x8000u) << 16;
     110
     111    if ((m_bits & 0x7fffu) == 0)
     112    {
     113        u.x = (uint32_t)m_bits << 16;
     114        return u.f;
     115    }
     116
     117    uint32_t e = m_bits & 0x7c00u;
     118    uint32_t m = m_bits & 0x03ffu;
     119
     120    if (e == 0)
     121    {
     122        static int const shifttable[32] =
     123        {
     124            10, 1, 9, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 7, 0,
     125            2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0, 5, 6, 0,
     126        };
     127
     128        uint32_t v = m | (m >> 1);
     129        v |= v >> 2;
     130        v |= v >> 4;
     131        v |= v >> 8;
     132
     133        e = shifttable[(v * 0x07C4ACDDU) >> 27];
     134        m <<= e;
     135
     136        /* We don't have to remove the 10th mantissa bit because it gets
     137         * added to our underestimated exponent. */
     138        u.x = s | (((112 - e) << 23) + (m << 13));
     139        return u.f;
     140    }
     141
     142    if (e == 0x7c00u)
     143    {
     144        /* The amd64 pipeline likes the if() better than a ternary operator
     145         * or any other trick I could find. --sam */
     146        if (m == 0)
     147            u.x = s | 0x7f800000u;
     148        else
     149            u.x = s | 0x7fc00000u;
     150
     151        return u.f;
     152    }
     153
     154    u.x = s | (((e >> 10) + 112) << 23) | (m << 13);
     155
     156    return u.f;
    93157}
    94158
  • trunk/src/half.h

    r867 r869  
    3535    }
    3636
     37    inline int isnan() const
     38    {
     39        return ((m_bits & 0x7c00u) == 0x7c00u) && (m_bits & 0x03ffu);
     40    }
     41
     42    inline int isfinite() const
     43    {
     44        return (m_bits & 0x7c00u) != 0x7c00u;
     45    }
     46
     47    inline int isinf() const
     48    {
     49        return (uint16_t)(m_bits << 1) == (0x7c00u << 1);
     50    }
     51
     52    inline int isnormal() const
     53    {
     54        return (isfinite() && (m_bits & 0x7c00u)) || ((m_bits & 0x7fffu) == 0);
     55    }
     56
     57    inline uint16_t bits()
     58    {
     59        return m_bits;
     60    }
     61
     62    /* Cast to other types */
     63    operator float() const;
     64    inline operator int() const { return (int)(float)*this; }
     65
     66    /* Factories */
    3767    static half makeslow(float f);
    3868    static half makefast(float f);
    39 
    4069    static inline half makebits(uint16_t x)
    4170    {
     
    4372        ret.m_bits = x;
    4473        return ret;
    45     }
    46 
    47     inline operator float() const
    48     {
    49         int s = m_bits & 0x8000u;
    50         int e = m_bits & 0x7c00u;
    51         int m = m_bits & 0x03ffu;
    52 
    53         union { float f; uint32_t x; } u;
    54         u.x = 0;
    55         u.x |= s << 16;
    56         u.x |= (-15 + (e >> 10) + 127) << 23;
    57         u.x |= m << 13;
    58 
    59         return u.f;
    60     }
    61 
    62     inline uint16_t bits()
    63     {
    64         return m_bits;
    6574    }
    6675};
Note: See TracChangeset for help on using the changeset viewer.