source:trunk/src/math/half.cpp@1181Tweet

Last change on this file since 1181 was 1181, checked in by sam, 9 years ago

math: add a few comments.

• Property svn:keywords set to `Id`
File size: 9.6 KB
Line
1//
2// Lol Engine
3//
4// Copyright: (c) 2010-2012 Sam Hocevar <sam@hocevar.net>
5//   This program is free software; you can redistribute it and/or
6//   modify it under the terms of the Do What The Fuck You Want To
7//   Public License, Version 2, as published by Sam Hocevar. See
8//   http://sam.zoy.org/projects/COPYING.WTFPL for more details.
9//
10
11#if defined HAVE_CONFIG_H
12#   include "config.h"
13#endif
14
15#if defined __CELLOS_LV2__
16#   if defined __SNC__
17#       include <ppu_altivec_internals.h>
18#   else
19#       include <altivec.h>
20#   endif
21#endif
22
23#include "core.h"
24
25using namespace std;
26
27namespace lol
28{
29
30/* These macros implement a finite iterator useful to build lookup
31 * tables. For instance, S64(0) will call S1(x) for all values of x
32 * between 0 and 63.
33 * Due to the exponential behaviour of the calls, the stress on the
34 * compiler may be important. */
35#define S4(x)    S1((x)),   S1((x)+1),     S1((x)+2),     S1((x)+3)
36#define S16(x)   S4((x)),   S4((x)+4),     S4((x)+8),     S4((x)+12)
37#define S64(x)   S16((x)),  S16((x)+16),   S16((x)+32),   S16((x)+48)
38#define S256(x)  S64((x)),  S64((x)+64),   S64((x)+128),  S64((x)+192)
39#define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768)
40
41/* Lookup table-based algorithm from “Fast Half Float Conversions”
42 * by Jeroen van der Zijp, November 2008. No rounding is performed,
43 * and some NaN values may be incorrectly converted to Inf (because
44 * the lowest order bits in the mantissa are ignored). */
45static inline uint16_t float_to_half_nobranch(uint32_t x)
46{
47    static uint16_t const basetable[512] =
48    {
49#define S1(i) (((i) < 103) ? 0x0000 : \
50               ((i) < 113) ? 0x0400 >> (0x1f & (113 - (i))) : \
51               ((i) < 143) ? ((i) - 112) << 10 : 0x7c00)
52        S256(0),
53#undef S1
54#define S1(i) (0x8000 | basetable[i])
55        S256(0),
56#undef S1
57    };
58
59    static uint8_t const shifttable[512] =
60    {
61#define S1(i) (((i) < 103) ? 24 : \
62               ((i) < 113) ? 126 - (i) : \
63               ((i) < 143 || (i) == 255) ? 13 : 24)
64        S256(0), S256(0),
65#undef S1
66    };
67
68    uint16_t bits = basetable[(x >> 23) & 0x1ff];
69    bits |= (x & 0x007fffff) >> shifttable[(x >> 23) & 0x1ff];
70    return bits;
71}
72
73/* This method is faster than the OpenEXR implementation (very often
74 * used, eg. in Ogre), with the additional benefit of rounding, inspired
75 * by James Tursa’s half-precision code. */
76static inline uint16_t float_to_half_branch(uint32_t x)
77{
78    uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */
79    uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
80    unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */
81
82    /* If zero, or denormal, or exponent underflows too much for a denormal
83     * half, return signed zero. */
84    if (e < 103)
85        return bits;
86
87    /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
88    if (e > 142)
89    {
90        bits |= 0x7c00u;
91        /* If exponent was 0xff and one mantissa bit was set, it means NaN,
92         * not Inf, so make sure we set one mantissa bit too. */
93        bits |= e == 255 && (x & 0x007fffffu);
94        return bits;
95    }
96
97    /* If exponent underflows but not too much, return a denormal */
98    if (e < 113)
99    {
100        m |= 0x0800u;
101        /* Extra rounding may overflow and set mantissa to 0 and exponent
102         * to 1, which is OK. */
103        bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
104        return bits;
105    }
106
107    bits |= ((e - 112) << 10) | (m >> 1);
108    /* Extra rounding. An overflow will set mantissa to 0 and increment
109     * the exponent, which is OK. */
110    bits += m & 1;
111    return bits;
112}
113
114#if 0
115static inline void float_to_half_vector(half *dst, float const *src)
116{
117    vector unsigned int const v7 = vec_splat_u32(7);
118    vector unsigned short const v6 = vec_splat_u16(6);
119#if _XBOX
120    vector signed short const v9 = vec_splat_u16(9);
121    vector unsigned short const v10 = vec_splat_u16(10);
122#else
123    vector signed short const v0x0040 = {
124        0x0040, 0x0040, 0x0040, 0x0040, 0x0040, 0x0040, 0x0040, 0x0040};
125    vector unsigned short const v0x0400 = {
126        0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400};
127#endif
128    vector unsigned char const shuffle_high = {
129        0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
130    vector unsigned char const shuffle_low = {
131        2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31};
132    vector unsigned char const v0xbf70 = {
133        0xbf, 0x70, 0xbf, 0x70, 0xbf, 0x70, 0xbf, 0x70,
134        0xbf, 0x70, 0xbf, 0x70, 0xbf, 0x70, 0xbf, 0x70};
135
136    vector unsigned short v_mant, v_ret;
137    vector signed short v_exp;
138    vector unsigned int in0 = (vector unsigned int)vec_ld(0, src);
139    vector unsigned int in1 = (vector unsigned int)vec_ld(16, src);
140
141    in0 = vec_sra(in0, v7);
142    in1 = vec_sra(in1, v7);
143    v_exp = (vector signed short)vec_perm(in0, in1, shuffle_high);
144    v_mant = (vector unsigned short)vec_perm(in0, in1, shuffle_low);
145    v_exp = (vector signed short)vec_subs((vector unsigned char)v_exp, v0xbf70);
146#if _XBOX
147    v_ret = (vector unsigned short)vec_or(v_exp, vec_sr(v_exp, v9));
148#else
149    v_ret = (vector unsigned short)vec_madds(v_exp, v0x0040, v_exp);
150#endif
151    v_mant = vec_sr(v_mant, v6);
152#if _XBOX
153    v_ret = vec_or(v_mant, vec_sl(v_ret, v10));
154#else
155    v_ret = vec_mladd(v_ret, v0x0400, v_mant);
156#endif
157    vec_st(v_ret, 0, (uint16_t *)dst);
158}
159#endif
160
161/* We use this De Bruijn sequence to compute a branchless integer log2 */
162static int const shifttable[32] =
163{
164    23, 14, 22, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 20, 0,
165    15, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 17, 0, 18, 19, 0,
166};
167static uint32_t const shiftmagic = 0x07c4acddu;
168
169/* Lookup table-based algorithm from “Fast Half Float Conversions”
170 * by Jeroen van der Zijp, November 2008. Tables are generated using
171 * the C++ preprocessor, thanks to a branchless implementation also
172 * used in half_to_float_branch(). This code is very fast when performing
173 * conversions on arrays of values. */
174static inline uint32_t half_to_float_nobranch(uint16_t x)
175{
176#define M3(i) ((i) | ((i) >> 1))
177#define M7(i) (M3(i) | (M3(i) >> 2))
178#define MF(i) (M7(i) | (M7(i) >> 4))
179#define MFF(i) (MF(i) | (MF(i) >> 8))
180#define E(i) shifttable[(unsigned int)(MFF(i) * shiftmagic) >> 27]
181
182    static uint32_t const mantissatable[2048] =
183    {
184#define S1(i) (((i) == 0) ? 0 : ((125 - E(i)) << 23) + ((i) << E(i)))
185        S1024(0),
186#undef S1
187#define S1(i) (0x38000000u + ((i) << 13))
188        S1024(0),
189#undef S1
190    };
191
192    static uint32_t const exponenttable[64] =
193    {
194#define S1(i) (((i) == 0) ? 0 : \
195               ((i) < 31) ? ((i) << 23) : \
196               ((i) == 31) ? 0x47800000u : \
197               ((i) == 32) ? 0x80000000u : \
198               ((i) < 63) ? (0x80000000u + (((i) - 32) << 23)) : 0xc7800000)
199        S64(0),
200#undef S1
201    };
202
203    static int const offsettable[64] =
204    {
205#define S1(i) (((i) == 0 || (i) == 32) ? 0 : 1024)
206        S64(0),
207#undef S1
208    };
209
210    return mantissatable[offsettable[x >> 10] + (x & 0x3ff)]
211            + exponenttable[x >> 10];
212}
213
214/* This algorithm is similar to the OpenEXR implementation, except it
215 * uses branchless code in the denormal path. This is slower than the
216 * table version, but will be more friendly to the cache for occasional
217 * uses. */
218static inline uint32_t half_to_float_branch(uint16_t x)
219{
220    uint32_t s = (x & 0x8000u) << 16;
221
222    if ((x & 0x7fffu) == 0)
223        return (uint32_t)x << 16;
224
225    uint32_t e = x & 0x7c00u;
226    uint32_t m = x & 0x03ffu;
227
228    if (e == 0)
229    {
230        uint32_t v = m | (m >> 1);
231        v |= v >> 2;
232        v |= v >> 4;
233        v |= v >> 8;
234
235        e = shifttable[(v * shiftmagic) >> 27];
236
237        /* We don't have to remove the 10th mantissa bit because it gets
238         * added to our underestimated exponent. */
239        return s | (((125 - e) << 23) + (m << e));
240    }
241
242    if (e == 0x7c00u)
243    {
244        /* The amd64 pipeline likes the if() better than a ternary operator
245         * or any other trick I could find. --sam */
246        if (m == 0)
247            return s | 0x7f800000u;
248        return s | 0x7fc00000u;
249    }
250
251    return s | (((e >> 10) + 112) << 23) | (m << 13);
252}
253
254/* Constructor from float. Uses the non-branching version because benchmarks
255 * indicate it is about 80% faster on amd64, and 20% faster on the PS3. The
256 * penalty of loading the lookup tables does not seem important. */
257half half::makefast(float f)
258{
259    union { float f; uint32_t x; } u = { f };
260    return makebits(float_to_half_nobranch(u.x));
261}
262
263/* Constructor from float with better precision. */
264half half::makeaccurate(float f)
265{
266    union { float f; uint32_t x; } u = { f };
267    return makebits(float_to_half_branch(u.x));
268}
269
270/* Cast to float. Uses the branching version because loading the tables
271 * for only one value is going to be cache-expensive. */
272float half::tofloat(half h)
273{
274    union { float f; uint32_t x; } u;
275    u.x = half_to_float_branch(h.bits);
276    return u.f;
277}
278
279size_t half::convert(half *dst, float const *src, size_t nelem)
280{
281    for (size_t i = 0; i < nelem; i++)
282    {
283        union { float f; uint32_t x; } u;
284        u.f = *src++;
285        *dst++ = makebits(float_to_half_nobranch(u.x));
286#if 0
287        /* AltiVec code. Will work one day. */
288        float_to_half_vector(dst, src);
289        src += 8;
290        dst += 8;
291        i += 7;
292#endif
293    }
294
295    return nelem;
296}
297
298size_t half::convert(float *dst, half const *src, size_t nelem)
299{
300    for (size_t i = 0; i < nelem; i++)
301    {
302        union { float f; uint32_t x; } u;
303#if !defined __CELLOS_LV2__
304        /* This code is really too slow on the PS3, even with the denormal
305         * handling stripped off. */
306        u.x = half_to_float_nobranch((*src++).bits);
307#else
308        u.x = half_to_float_branch((*src++).bits);
309#endif
310        *dst++ = u.f;
311    }
312
313    return nelem;
314}
315
316} /* namespace lol */
317
Note: See TracBrowser for help on using the repository browser.