Changeset 1186
- Timestamp:
- Apr 3, 2012, 8:28:41 PM (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/math/half.cpp
r1181 r1186 159 159 #endif 160 160 161 /* We use this De Bruijn sequence to compute a branchless integer log2 */ 162 static int const shifttable[32] = 163 { 164 23, 14, 22, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 20, 0, 165 15, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 17, 0, 18, 19, 0, 161 /* We use this magic table, inspired by De Bruijn sequences, to compute a 162 * branchless integer log2. The actual value fetched is 24-log2(x+1) for x 163 * in 1, 3, 7, f, 1f, 3f, 7f, ff, 1fe, 1ff, 3fc, 3fd, 3fe, 3ff. */ 164 static int const shifttable[16] = 165 { 166 23, 22, 21, 15, -1, 20, 18, 14, 14, 16, 19, -1, 17, -1, -1, -1, 166 167 }; 167 static uint32_t const shiftmagic = 0x0 7c4acddu;168 static uint32_t const shiftmagic = 0x05a1a1a2u; 168 169 169 170 /* Lookup table-based algorithm from “Fast Half Float Conversions” … … 177 178 #define M7(i) (M3(i) | (M3(i) >> 2)) 178 179 #define MF(i) (M7(i) | (M7(i) >> 4)) 179 #define MFF(i) (MF(i) | (MF(i) >> 8)) 180 #define E(i) shifttable[(unsigned int)(MFF(i) * shiftmagic) >> 27] 180 #define E(i) shifttable[(uint32_t)((uint64_t)MF(i) * shiftmagic) >> 28] 181 181 182 182 static uint32_t const mantissatable[2048] = … … 193 193 { 194 194 #define S1(i) (((i) == 0) ? 0 : \ 195 ((i) < 31) ? (( i) << 23) : \195 ((i) < 31) ? ((uint32_t)(i) << 23) : \ 196 196 ((i) == 31) ? 0x47800000u : \ 197 197 ((i) == 32) ? 0x80000000u : \ 198 ((i) < 63) ? (0x80000000u +(((i) - 32) << 23)) : 0xc7800000)198 ((i) < 63) ? (0x80000000u | (((i) - 32) << 23)) : 0xc7800000) 199 199 S64(0), 200 200 #undef S1 … … 228 228 if (e == 0) 229 229 { 230 /* m has 10 significant bits but replicating the leading bit to 231 * 8 positions instead of 16 works just as well because of our 232 * handcrafted shiftmagic table. */ 230 233 uint32_t v = m | (m >> 1); 231 234 v |= v >> 2; 232 235 v |= v >> 4; 233 v |= v >> 8; 234 235 e = shifttable[(v * shiftmagic) >> 27]; 236 237 e = shifttable[(v * shiftmagic) >> 28]; 236 238 237 239 /* We don't have to remove the 10th mantissa bit because it gets
Note: See TracChangeset
for help on using the changeset viewer.