Changeset 869 for trunk/src/half.cpp
 Timestamp:
 Aug 28, 2011, 11:07:06 PM (9 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/src/half.cpp
r867 r869 17 17 using namespace std; 18 18 19 namespace lol 20 { 21 22 /* Lookup tablebased algorithm from “Fast Half Float Conversions” 23 * by Jeroen van der Zijp, November 2008. No rounding is performed, 24 * and some NaN values may be incorrectly converted to Inf. */ 25 half half::makefast(float f) 26 { 19 27 #define S4(x) S1(4*(x)), S1(4*(x)+1), S1(4*(x)+2), S1(4*(x)+3) 20 28 #define S16(x) S4(4*(x)), S4(4*(x)+1), S4(4*(x)+2), S4(4*(x)+3) … … 22 30 #define S256(x) S64(4*(x)), S64(4*(x)+1), S64(4*(x)+2), S64(4*(x)+3) 23 31 24 namespace lol25 {26 27 half half::makefast(float f)28 {29 /* Lookup tablebased algorithm from “Fast Half Float Conversions”30 * by Jeroen van der Zijp, November 2008. No rounding is performed. */31 32 static uint16_t const basetable[512] = 32 33 { … … 59 60 } 60 61 62 /* This method is faster than the OpenEXR implementation (very often 63 * used, eg. in Ogre), with the additional benefit of rounding, inspired 64 * by James Tursa’s halfprecision code. */ 61 65 half half::makeslow(float f) 62 66 { … … 68 72 69 73 /* If zero, or denormal, or exponent underflows too much for a denormal, 70 * return signed zero */74 * return signed zero. */ 71 75 if (e < 103) 72 76 return makebits(bits); 73 77 74 /* If NaN, Inf or exponent overflow, return NaN or Inf*/78 /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ 75 79 if (e > 142) 76 80 { 77 81 bits = 0x7c00u; 82 /* If exponent was 0xff and one mantissa bit was set, it means NaN, 83 * not Inf, so make sure we set one mantissa bit too. */ 78 84 bits = e == 255 && (u.x & 0x007fffffu); 79 85 return makebits(bits); … … 84 90 { 85 91 m = 0x0800u; 92 /* Extra rounding may overflow and set mantissa to 0 and exponent 93 * to 1, which is OK. */ 86 94 bits = (m >> (114  e)) + ((m >> (113  e)) & 1); 87 95 return makebits(bits); … … 89 97 90 98 bits = ((e  112) << 10)  (m >> 1); 91 bits += m & 1; /* Overflows here are expected and handled */ 99 /* Extra rounding. An overflow will set mantissa to 0 and increment 100 * the exponent, which is OK. */ 101 bits += m & 1; 92 102 return makebits(bits); 103 } 104 105 half::operator float() const 106 { 107 union { float f; uint32_t x; } u; 108 109 uint32_t s = (m_bits & 0x8000u) << 16; 110 111 if ((m_bits & 0x7fffu) == 0) 112 { 113 u.x = (uint32_t)m_bits << 16; 114 return u.f; 115 } 116 117 uint32_t e = m_bits & 0x7c00u; 118 uint32_t m = m_bits & 0x03ffu; 119 120 if (e == 0) 121 { 122 static int const shifttable[32] = 123 { 124 10, 1, 9, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 7, 0, 125 2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0, 5, 6, 0, 126 }; 127 128 uint32_t v = m  (m >> 1); 129 v = v >> 2; 130 v = v >> 4; 131 v = v >> 8; 132 133 e = shifttable[(v * 0x07C4ACDDU) >> 27]; 134 m <<= e; 135 136 /* We don't have to remove the 10th mantissa bit because it gets 137 * added to our underestimated exponent. */ 138 u.x = s  (((112  e) << 23) + (m << 13)); 139 return u.f; 140 } 141 142 if (e == 0x7c00u) 143 { 144 /* The amd64 pipeline likes the if() better than a ternary operator 145 * or any other trick I could find. sam */ 146 if (m == 0) 147 u.x = s  0x7f800000u; 148 else 149 u.x = s  0x7fc00000u; 150 151 return u.f; 152 } 153 154 u.x = s  (((e >> 10) + 112) << 23)  (m << 13); 155 156 return u.f; 93 157 } 94 158
Note: See TracChangeset
for help on using the changeset viewer.