# Changeset 872Tweet

Ignore:
Timestamp:
Aug 29, 2011, 2:07:54 AM (9 years ago)
Message:

core: minor refactoring in the float / half conversions to accomodate
for future array versions.

Location:
trunk
Files:
4 edited

Unmodified
Removed
• ## trunk/src/half.cpp

 r869 { /* These macros implement a finite iterator useful to build lookup * tables. For instance, S64(0) will call S1(x) for all values of x * between 0 and 63. * Due to the exponential behaviour of the calls, the stress on the * compiler may be important. */ #define S4(x)    S1((x)),   S1((x)+1),     S1((x)+2),     S1((x)+3) #define S16(x)   S4((x)),   S4((x)+4),     S4((x)+8),     S4((x)+12) #define S64(x)   S16((x)),  S16((x)+16),   S16((x)+32),   S16((x)+48) #define S256(x)  S64((x)),  S64((x)+64),   S64((x)+128),  S64((x)+192) #define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768) /* Lookup table-based algorithm from “Fast Half Float Conversions” * by Jeroen van der Zijp, November 2008. No rounding is performed, * and some NaN values may be incorrectly converted to Inf. */ half half::makefast(float f) { #define S4(x)    S1(4*(x)),  S1(4*(x)+1),  S1(4*(x)+2),  S1(4*(x)+3) #define S16(x)   S4(4*(x)),  S4(4*(x)+1),  S4(4*(x)+2),  S4(4*(x)+3) #define S64(x)  S16(4*(x)), S16(4*(x)+1), S16(4*(x)+2), S16(4*(x)+3) #define S256(x) S64(4*(x)), S64(4*(x)+1), S64(4*(x)+2), S64(4*(x)+3) static inline uint16_t float_to_half_nobranch(uint32_t x) { static uint16_t const basetable[512] = { }; union { float f; uint32_t x; } u = { f }; uint16_t bits = basetable[(u.x >> 23) & 0x1ff]; bits |= (u.x & 0x007fffff) >> shifttable[(u.x >> 23) & 0x1ff]; return makebits(bits); uint16_t bits = basetable[(x >> 23) & 0x1ff]; bits |= (x & 0x007fffff) >> shifttable[(x >> 23) & 0x1ff]; return bits; } * used, eg. in Ogre), with the additional benefit of rounding, inspired * by James Tursa’s half-precision code. */ half half::makeslow(float f) { union { float f; uint32_t x; } u = { f }; uint16_t bits = (u.x >> 16) & 0x8000; /* Get the sign */ uint16_t m = (u.x >> 12) & 0x07ff; /* Keep one extra bit for rounding */ unsigned int e = (u.x >> 23) & 0xff; /* Using int is faster here */ static inline uint16_t float_to_half_branch(uint32_t x) { uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */ uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */ unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */ /* If zero, or denormal, or exponent underflows too much for a denormal, * return signed zero. */ if (e < 103) return makebits(bits); return bits; /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ /* If exponent was 0xff and one mantissa bit was set, it means NaN, * not Inf, so make sure we set one mantissa bit too. */ bits |= e == 255 && (u.x & 0x007fffffu); return makebits(bits); bits |= e == 255 && (x & 0x007fffffu); return bits; } * to 1, which is OK. */ bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); return makebits(bits); return bits; } * the exponent, which is OK. */ bits += m & 1; return makebits(bits); } half::operator float() const { union { float f; uint32_t x; } u; uint32_t s = (m_bits & 0x8000u) << 16; if ((m_bits & 0x7fffu) == 0) { u.x = (uint32_t)m_bits << 16; return u.f; } uint32_t e = m_bits & 0x7c00u; uint32_t m = m_bits & 0x03ffu; return bits; } static int const shifttable[32] = { 23, 14, 22, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 20, 0, 15, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 17, 0, 18, 19, 0, }; static uint32_t const shiftmagic = 0x07c4acddu; /* Lookup table-based algorithm from “Fast Half Float Conversions” * by Jeroen van der Zijp, November 2008. Tables are generated using * the C++ preprocessor, thanks to a branchless implementation also * used in half_to_float_branch(). This code is actually almost always * slower than the branching one. */ static inline uint32_t half_to_float_nobranch(uint16_t x) { #define M3(i) ((i) | ((i) >> 1)) #define M7(i) (M3(i) | (M3(i) >> 2)) #define MF(i) (M7(i) | (M7(i) >> 4)) #define MFF(i) (MF(i) | (MF(i) >> 8)) #define E(i) shifttable[(unsigned int)(MFF(i) * shiftmagic) >> 27] static uint32_t const mantissatable[2048] = { #define S1(i) (((i) == 0) ? 0 : ((125 - E(i)) << 23) + ((i) << E(i))) S1024(0), #undef S1 #define S1(i) (0x38000000u + ((i) << 13)) S1024(0), #undef S1 }; static uint32_t const exponenttable[64] = { #define S1(i) (((i) == 0) ? 0 : \ ((i) < 31) ? ((i) << 23) : \ ((i) == 31) ? 0x47800000u : \ ((i) == 32) ? 0x80000000u : \ ((i) < 63) ? (0x80000000u + (((i) - 32) << 23)) : 0xc7800000) S64(0), #undef S1 }; static int const offsettable[64] = { #define S1(i) (((i) == 0 || (i) == 32) ? 0 : 1024) S64(0), #undef S1 }; return mantissatable[offsettable[x >> 10] + (x & 0x3ff)] + exponenttable[x >> 10]; } /* This algorithm is similar to the OpenEXR implementation, except it * uses branchless code in the denormal path. */ static inline uint32_t half_to_float_branch(uint16_t x) { uint32_t s = (x & 0x8000u) << 16; if ((x & 0x7fffu) == 0) return (uint32_t)x << 16; uint32_t e = x & 0x7c00u; uint32_t m = x & 0x03ffu; if (e == 0) { static int const shifttable[32] = { 10, 1, 9, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 7, 0, 2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0, 5, 6, 0, }; uint32_t v = m | (m >> 1); v |= v >> 2; v |= v >> 8; e = shifttable[(v * 0x07C4ACDDU) >> 27]; m <<= e; e = shifttable[(v * shiftmagic) >> 27]; /* We don't have to remove the 10th mantissa bit because it gets * added to our underestimated exponent. */ u.x = s | (((112 - e) << 23) + (m << 13)); return u.f; return s | (((125 - e) << 23) + (m << e)); } * or any other trick I could find. --sam */ if (m == 0) u.x = s | 0x7f800000u; else u.x = s | 0x7fc00000u; return u.f; } u.x = s | (((e >> 10) + 112) << 23) | (m << 13); return s | 0x7f800000u; return s | 0x7fc00000u; } return s | (((e >> 10) + 112) << 23) | (m << 13); } half half::makefast(float f) { union { float f; uint32_t x; } u = { f }; return makebits(float_to_half_nobranch(u.x)); } half half::makeslow(float f) { union { float f; uint32_t x; } u = { f }; return makebits(float_to_half_branch(u.x)); } half::operator float() const { union { float f; uint32_t x; } u; u.x = half_to_float_branch(bits); return u.f; }
• ## trunk/src/half.h

 r871 class half { private: uint16_t m_bits; public: inline half() { } inline int isnan() const { return ((m_bits & 0x7c00u) == 0x7c00u) && (m_bits & 0x03ffu); return ((bits & 0x7c00u) == 0x7c00u) && (bits & 0x03ffu); } inline int isfinite() const { return (m_bits & 0x7c00u) != 0x7c00u; return (bits & 0x7c00u) != 0x7c00u; } inline int isinf() const { return (uint16_t)(m_bits << 1) == (0x7c00u << 1); return (uint16_t)(bits << 1) == (0x7c00u << 1); } inline int isnormal() const { return (isfinite() && (m_bits & 0x7c00u)) || ((m_bits & 0x7fffu) == 0); } inline uint16_t bits() { return m_bits; return (isfinite() && (bits & 0x7c00u)) || ((bits & 0x7fffu) == 0); } /* Operations */ inline half operator -() { return makebits(m_bits ^ 0x8000u); } inline half operator -() { return makebits(bits ^ 0x8000u); } inline half &operator +=(float f) { return (*this = (half)(*this + f)); } inline half &operator -=(float f) { return (*this = (half)(*this - f)); } { half ret; ret.m_bits = x; ret.bits = x; return ret; } /* Internal representation */ uint16_t bits; };
• ## trunk/test/half.cpp

 r871 half a = half::makebits(i); uint16_t b = i; CPPUNIT_ASSERT_EQUAL(a.bits(), b); CPPUNIT_ASSERT_EQUAL(a.bits, b); } } half a = half::makeslow(pairs[i].f); uint16_t b = pairs[i].x; CPPUNIT_ASSERT_EQUAL(a.bits(), b); CPPUNIT_ASSERT_EQUAL(a.bits, b); } } half a = half::makefast(pairs[i].f); uint16_t b = pairs[i].x; CPPUNIT_ASSERT_EQUAL(a.bits(), b); CPPUNIT_ASSERT_EQUAL(a.bits, b); } } { CPPUNIT_ASSERT(!isnan(f)); CPPUNIT_ASSERT_EQUAL(g.bits(), h.bits()); CPPUNIT_ASSERT_EQUAL(g.bits, h.bits); } } half a = one + 0.0f; CPPUNIT_ASSERT_EQUAL(one.bits(), a.bits()); CPPUNIT_ASSERT_EQUAL(one.bits, a.bits); a += 0.0f; CPPUNIT_ASSERT_EQUAL(one.bits(), a.bits()); CPPUNIT_ASSERT_EQUAL(one.bits, a.bits); a -= 0.0f; CPPUNIT_ASSERT_EQUAL(one.bits(), a.bits()); CPPUNIT_ASSERT_EQUAL(one.bits, a.bits); a *= 1.0f; CPPUNIT_ASSERT_EQUAL(one.bits(), a.bits()); CPPUNIT_ASSERT_EQUAL(one.bits, a.bits); a /= 1.0f; CPPUNIT_ASSERT_EQUAL(one.bits(), a.bits()); CPPUNIT_ASSERT_EQUAL(one.bits, a.bits); half b = one + 0.0f; CPPUNIT_ASSERT_EQUAL(one.bits(), b.bits()); CPPUNIT_ASSERT_EQUAL(one.bits, b.bits); b += 1.0f; CPPUNIT_ASSERT_EQUAL(two.bits(), b.bits()); CPPUNIT_ASSERT_EQUAL(two.bits, b.bits); b *= 2.0f; CPPUNIT_ASSERT_EQUAL(four.bits(), b.bits()); CPPUNIT_ASSERT_EQUAL(four.bits, b.bits); b -= 2.0f; CPPUNIT_ASSERT_EQUAL(two.bits(), b.bits()); CPPUNIT_ASSERT_EQUAL(two.bits, b.bits); b /= 2.0f; CPPUNIT_ASSERT_EQUAL(one.bits(), b.bits()); CPPUNIT_ASSERT_EQUAL(one.bits, b.bits); half c = 1.0f - zero; CPPUNIT_ASSERT_EQUAL(one.bits(), c.bits()); CPPUNIT_ASSERT_EQUAL(one.bits, c.bits); half d = 2.0f - one; CPPUNIT_ASSERT_EQUAL(one.bits(), d.bits()); CPPUNIT_ASSERT_EQUAL(one.bits, d.bits); half e = 2.0f + (-one); CPPUNIT_ASSERT_EQUAL(one.bits(), e.bits()); CPPUNIT_ASSERT_EQUAL(one.bits, e.bits); half f = (2.0f * two) / (1.0f + one); CPPUNIT_ASSERT_EQUAL(two.bits(), f.bits()); CPPUNIT_ASSERT_EQUAL(two.bits, f.bits); }
• ## trunk/test/lol-bench.cpp

 r870 half h = half::makeslow(u.f); total ^= h.bits(); total ^= h.bits; } Log::Info("time for makeslow: %f (hash %04x)\n", timer.GetMs(), total); half h = half::makefast(u.f); total ^= h.bits(); total ^= h.bits; } Log::Info("time for makefast: %f (hash %04x)\n", timer.GetMs(), total);
Note: See TracChangeset for help on using the changeset viewer.