Changeset 1188
 Timestamp:
 Apr 3, 2012, 11:32:18 PM (9 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/src/math/half.cpp
r1186 r1188 112 112 } 113 113 114 #if 0115 static inline void float_to_half_vector(half *dst, float const *src)116 {117 vector unsigned int const v7 = vec_splat_u32(7);118 vector unsigned short const v6 = vec_splat_u16(6);119 #if _XBOX120 vector signed short const v9 = vec_splat_u16(9);121 vector unsigned short const v10 = vec_splat_u16(10);122 #else123 vector signed short const v0x0040 = {124 0x0040, 0x0040, 0x0040, 0x0040, 0x0040, 0x0040, 0x0040, 0x0040};125 vector unsigned short const v0x0400 = {126 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400};127 #endif128 vector unsigned char const shuffle_high = {129 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};130 vector unsigned char const shuffle_low = {131 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31};132 vector unsigned char const v0xbf70 = {133 0xbf, 0x70, 0xbf, 0x70, 0xbf, 0x70, 0xbf, 0x70,134 0xbf, 0x70, 0xbf, 0x70, 0xbf, 0x70, 0xbf, 0x70};135 136 vector unsigned short v_mant, v_ret;137 vector signed short v_exp;138 vector unsigned int in0 = (vector unsigned int)vec_ld(0, src);139 vector unsigned int in1 = (vector unsigned int)vec_ld(16, src);140 141 in0 = vec_sra(in0, v7);142 in1 = vec_sra(in1, v7);143 v_exp = (vector signed short)vec_perm(in0, in1, shuffle_high);144 v_mant = (vector unsigned short)vec_perm(in0, in1, shuffle_low);145 v_exp = (vector signed short)vec_subs((vector unsigned char)v_exp, v0xbf70);146 #if _XBOX147 v_ret = (vector unsigned short)vec_or(v_exp, vec_sr(v_exp, v9));148 #else149 v_ret = (vector unsigned short)vec_madds(v_exp, v0x0040, v_exp);150 #endif151 v_mant = vec_sr(v_mant, v6);152 #if _XBOX153 v_ret = vec_or(v_mant, vec_sl(v_ret, v10));154 #else155 v_ret = vec_mladd(v_ret, v0x0400, v_mant);156 #endif157 vec_st(v_ret, 0, (uint16_t *)dst);158 }159 #endif160 161 114 /* We use this magic table, inspired by De Bruijn sequences, to compute a 162 115 * branchless integer log2. The actual value fetched is 24log2(x+1) for x 163 * in 1, 3, 7, f, 1f, 3f, 7f, ff, 1fe, 1ff, 3fc, 3fd, 3fe, 3ff. */ 116 * in 1, 3, 7, f, 1f, 3f, 7f, ff, 1fe, 1ff, 3fc, 3fd, 3fe, 3ff. See 117 * http://lol.zoy.org/blog/2012/4/3/beyonddebruijn for an explanation 118 * of how the value 0x5a1a1a2u was obtained. */ 164 119 static int const shifttable[16] = 165 120 { 166 121 23, 22, 21, 15, 1, 20, 18, 14, 14, 16, 19, 1, 17, 1, 1, 1, 167 122 }; 168 static uint32_t const shiftmagic = 0x 05a1a1a2u;123 static uint32_t const shiftmagic = 0x5a1a1a2u; 169 124 170 125 /* Lookup tablebased algorithm from “Fast Half Float Conversions” … … 286 241 u.f = *src++; 287 242 *dst++ = makebits(float_to_half_nobranch(u.x)); 288 #if 0289 /* AltiVec code. Will work one day. */290 float_to_half_vector(dst, src);291 src += 8;292 dst += 8;293 i += 7;294 #endif295 243 } 296 244
Note: See TracChangeset
for help on using the changeset viewer.