Changeset 869
Legend:
 Unmodified
 Added
 Removed

trunk/src/half.cpp
r867 r869 17 17 using namespace std; 18 18 19 namespace lol 20 { 21 22 /* Lookup tablebased algorithm from “Fast Half Float Conversions” 23 * by Jeroen van der Zijp, November 2008. No rounding is performed, 24 * and some NaN values may be incorrectly converted to Inf. */ 25 half half::makefast(float f) 26 { 19 27 #define S4(x) S1(4*(x)), S1(4*(x)+1), S1(4*(x)+2), S1(4*(x)+3) 20 28 #define S16(x) S4(4*(x)), S4(4*(x)+1), S4(4*(x)+2), S4(4*(x)+3) … … 22 30 #define S256(x) S64(4*(x)), S64(4*(x)+1), S64(4*(x)+2), S64(4*(x)+3) 23 31 24 namespace lol25 {26 27 half half::makefast(float f)28 {29 /* Lookup tablebased algorithm from “Fast Half Float Conversions”30 * by Jeroen van der Zijp, November 2008. No rounding is performed. */31 32 static uint16_t const basetable[512] = 32 33 { … … 59 60 } 60 61 62 /* This method is faster than the OpenEXR implementation (very often 63 * used, eg. in Ogre), with the additional benefit of rounding, inspired 64 * by James Tursa’s halfprecision code. */ 61 65 half half::makeslow(float f) 62 66 { … … 68 72 69 73 /* If zero, or denormal, or exponent underflows too much for a denormal, 70 * return signed zero */74 * return signed zero. */ 71 75 if (e < 103) 72 76 return makebits(bits); 73 77 74 /* If NaN, Inf or exponent overflow, return NaN or Inf*/78 /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ 75 79 if (e > 142) 76 80 { 77 81 bits = 0x7c00u; 82 /* If exponent was 0xff and one mantissa bit was set, it means NaN, 83 * not Inf, so make sure we set one mantissa bit too. */ 78 84 bits = e == 255 && (u.x & 0x007fffffu); 79 85 return makebits(bits); … … 84 90 { 85 91 m = 0x0800u; 92 /* Extra rounding may overflow and set mantissa to 0 and exponent 93 * to 1, which is OK. */ 86 94 bits = (m >> (114  e)) + ((m >> (113  e)) & 1); 87 95 return makebits(bits); … … 89 97 90 98 bits = ((e  112) << 10)  (m >> 1); 91 bits += m & 1; /* Overflows here are expected and handled */ 99 /* Extra rounding. An overflow will set mantissa to 0 and increment 100 * the exponent, which is OK. */ 101 bits += m & 1; 92 102 return makebits(bits); 103 } 104 105 half::operator float() const 106 { 107 union { float f; uint32_t x; } u; 108 109 uint32_t s = (m_bits & 0x8000u) << 16; 110 111 if ((m_bits & 0x7fffu) == 0) 112 { 113 u.x = (uint32_t)m_bits << 16; 114 return u.f; 115 } 116 117 uint32_t e = m_bits & 0x7c00u; 118 uint32_t m = m_bits & 0x03ffu; 119 120 if (e == 0) 121 { 122 static int const shifttable[32] = 123 { 124 10, 1, 9, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 7, 0, 125 2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0, 5, 6, 0, 126 }; 127 128 uint32_t v = m  (m >> 1); 129 v = v >> 2; 130 v = v >> 4; 131 v = v >> 8; 132 133 e = shifttable[(v * 0x07C4ACDDU) >> 27]; 134 m <<= e; 135 136 /* We don't have to remove the 10th mantissa bit because it gets 137 * added to our underestimated exponent. */ 138 u.x = s  (((112  e) << 23) + (m << 13)); 139 return u.f; 140 } 141 142 if (e == 0x7c00u) 143 { 144 /* The amd64 pipeline likes the if() better than a ternary operator 145 * or any other trick I could find. sam */ 146 if (m == 0) 147 u.x = s  0x7f800000u; 148 else 149 u.x = s  0x7fc00000u; 150 151 return u.f; 152 } 153 154 u.x = s  (((e >> 10) + 112) << 23)  (m << 13); 155 156 return u.f; 93 157 } 94 158 
trunk/src/half.h
r867 r869 35 35 } 36 36 37 inline int isnan() const 38 { 39 return ((m_bits & 0x7c00u) == 0x7c00u) && (m_bits & 0x03ffu); 40 } 41 42 inline int isfinite() const 43 { 44 return (m_bits & 0x7c00u) != 0x7c00u; 45 } 46 47 inline int isinf() const 48 { 49 return (uint16_t)(m_bits << 1) == (0x7c00u << 1); 50 } 51 52 inline int isnormal() const 53 { 54 return (isfinite() && (m_bits & 0x7c00u))  ((m_bits & 0x7fffu) == 0); 55 } 56 57 inline uint16_t bits() 58 { 59 return m_bits; 60 } 61 62 /* Cast to other types */ 63 operator float() const; 64 inline operator int() const { return (int)(float)*this; } 65 66 /* Factories */ 37 67 static half makeslow(float f); 38 68 static half makefast(float f); 39 40 69 static inline half makebits(uint16_t x) 41 70 { … … 43 72 ret.m_bits = x; 44 73 return ret; 45 }46 47 inline operator float() const48 {49 int s = m_bits & 0x8000u;50 int e = m_bits & 0x7c00u;51 int m = m_bits & 0x03ffu;52 53 union { float f; uint32_t x; } u;54 u.x = 0;55 u.x = s << 16;56 u.x = (15 + (e >> 10) + 127) << 23;57 u.x = m << 13;58 59 return u.f;60 }61 62 inline uint16_t bits()63 {64 return m_bits;65 74 } 66 75 };
Note: See TracChangeset
for help on using the changeset viewer.