# source:trunk/src/half.cpp@877Tweet

Last change on this file since 877 was 877, checked in by sam, 8 years ago

test: improve the benchmark code to measure a lot more half precision
number conversions.

• Property svn:keywords set to `Id`
File size: 7.5 KB
Line
1//
2// Lol Engine
3//
4// Copyright: (c) 2010-2011 Sam Hocevar <sam@hocevar.net>
5//   This program is free software; you can redistribute it and/or
6//   modify it under the terms of the Do What The Fuck You Want To
8//   http://sam.zoy.org/projects/COPYING.WTFPL for more details.
9//
10
11#if defined HAVE_CONFIG_H
12#   include "config.h"
13#endif
14
15#include "core.h"
16
17using namespace std;
18
19namespace lol
20{
21
22/* These macros implement a finite iterator useful to build lookup
23 * tables. For instance, S64(0) will call S1(x) for all values of x
24 * between 0 and 63.
25 * Due to the exponential behaviour of the calls, the stress on the
26 * compiler may be important. */
27#define S4(x)    S1((x)),   S1((x)+1),     S1((x)+2),     S1((x)+3)
28#define S16(x)   S4((x)),   S4((x)+4),     S4((x)+8),     S4((x)+12)
29#define S64(x)   S16((x)),  S16((x)+16),   S16((x)+32),   S16((x)+48)
30#define S256(x)  S64((x)),  S64((x)+64),   S64((x)+128),  S64((x)+192)
31#define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768)
32
33/* Lookup table-based algorithm from “Fast Half Float Conversions”
34 * by Jeroen van der Zijp, November 2008. No rounding is performed,
35 * and some NaN values may be incorrectly converted to Inf. */
36static inline uint16_t float_to_half_nobranch(uint32_t x)
37{
38    static uint16_t const basetable[512] =
39    {
40#define S1(i) (((i) < 103) ? 0x0000: \
41               ((i) < 113) ? 0x0400 >> (113 - (i)) : \
42               ((i) < 143) ? ((i) - 112) << 10 : 0x7c00)
43        S256(0),
44#undef S1
45#define S1(i) (0x8000 | (((i) < 103) ? 0x0000 : \
46                         ((i) < 113) ? 0x0400 >> (113 - (i)): \
47                         ((i) < 143) ? ((i) - 112) << 10 : 0x7c00))
48        S256(0),
49#undef S1
50    };
51
52    static uint8_t const shifttable[512] =
53    {
54#define S1(i) (((i) < 103) ? 24 : \
55               ((i) < 113) ? 126 - (i) : \
56               ((i) < 143 || (i) == 255) ? 13 : 24)
57        S256(0), S256(0),
58#undef S1
59    };
60
61    uint16_t bits = basetable[(x >> 23) & 0x1ff];
62    bits |= (x & 0x007fffff) >> shifttable[(x >> 23) & 0x1ff];
63    return bits;
64}
65
66/* This method is faster than the OpenEXR implementation (very often
67 * used, eg. in Ogre), with the additional benefit of rounding, inspired
68 * by James Tursa’s half-precision code. */
69static inline uint16_t float_to_half_branch(uint32_t x)
70{
71    uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */
72    uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
73    unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */
74
75    /* If zero, or denormal, or exponent underflows too much for a denormal,
76     * return signed zero. */
77    if (e < 103)
78        return bits;
79
80    /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
81    if (e > 142)
82    {
83        bits |= 0x7c00u;
84        /* If exponent was 0xff and one mantissa bit was set, it means NaN,
85         * not Inf, so make sure we set one mantissa bit too. */
86        bits |= e == 255 && (x & 0x007fffffu);
87        return bits;
88    }
89
90    /* If exponent underflows but not too much, return a denormal */
91    if (e < 113)
92    {
93        m |= 0x0800u;
94        /* Extra rounding may overflow and set mantissa to 0 and exponent
95         * to 1, which is OK. */
96        bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
97        return bits;
98    }
99
100    bits |= ((e - 112) << 10) | (m >> 1);
101    /* Extra rounding. An overflow will set mantissa to 0 and increment
102     * the exponent, which is OK. */
103    bits += m & 1;
104    return bits;
105}
106
107static int const shifttable[32] =
108{
109    23, 14, 22, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 20, 0,
110    15, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 17, 0, 18, 19, 0,
111};
112static uint32_t const shiftmagic = 0x07c4acddu;
113
114/* Lookup table-based algorithm from “Fast Half Float Conversions”
115 * by Jeroen van der Zijp, November 2008. Tables are generated using
116 * the C++ preprocessor, thanks to a branchless implementation also
117 * used in half_to_float_branch(). This code is very fast when performing
118 * conversions on arrays of values. */
119static inline uint32_t half_to_float_nobranch(uint16_t x)
120{
121#define M3(i) ((i) | ((i) >> 1))
122#define M7(i) (M3(i) | (M3(i) >> 2))
123#define MF(i) (M7(i) | (M7(i) >> 4))
124#define MFF(i) (MF(i) | (MF(i) >> 8))
125#define E(i) shifttable[(unsigned int)(MFF(i) * shiftmagic) >> 27]
126
127    static uint32_t const mantissatable[2048] =
128    {
129#define S1(i) (((i) == 0) ? 0 : ((125 - E(i)) << 23) + ((i) << E(i)))
130        S1024(0),
131#undef S1
132#define S1(i) (0x38000000u + ((i) << 13))
133        S1024(0),
134#undef S1
135    };
136
137    static uint32_t const exponenttable[64] =
138    {
139#define S1(i) (((i) == 0) ? 0 : \
140               ((i) < 31) ? ((i) << 23) : \
141               ((i) == 31) ? 0x47800000u : \
142               ((i) == 32) ? 0x80000000u : \
143               ((i) < 63) ? (0x80000000u + (((i) - 32) << 23)) : 0xc7800000)
144        S64(0),
145#undef S1
146    };
147
148    static int const offsettable[64] =
149    {
150#define S1(i) (((i) == 0 || (i) == 32) ? 0 : 1024)
151        S64(0),
152#undef S1
153    };
154
155    return mantissatable[offsettable[x >> 10] + (x & 0x3ff)]
156            + exponenttable[x >> 10];
157}
158
159/* This algorithm is similar to the OpenEXR implementation, except it
160 * uses branchless code in the denormal path. This is slower than the
161 * table version, but will be more friendly to the cache for occasional
162 * uses. */
163static inline uint32_t half_to_float_branch(uint16_t x)
164{
165    uint32_t s = (x & 0x8000u) << 16;
166
167    if ((x & 0x7fffu) == 0)
168        return (uint32_t)x << 16;
169
170    uint32_t e = x & 0x7c00u;
171    uint32_t m = x & 0x03ffu;
172
173    if (e == 0)
174    {
175        uint32_t v = m | (m >> 1);
176        v |= v >> 2;
177        v |= v >> 4;
178        v |= v >> 8;
179
180        e = shifttable[(v * shiftmagic) >> 27];
181
182        /* We don't have to remove the 10th mantissa bit because it gets
183         * added to our underestimated exponent. */
184        return s | (((125 - e) << 23) + (m << e));
185    }
186
187    if (e == 0x7c00u)
188    {
189        /* The amd64 pipeline likes the if() better than a ternary operator
190         * or any other trick I could find. --sam */
191        if (m == 0)
192            return s | 0x7f800000u;
193        return s | 0x7fc00000u;
194    }
195
196    return s | (((e >> 10) + 112) << 23) | (m << 13);
197}
198
199/* Constructor from float. Uses the non-branching version because benchmarks
200 * indicate it is always twice as fast. The penalty of loading the lookup
201 * tables does not seem important. */
202half half::makefast(float f)
203{
204    union { float f; uint32_t x; } u = { f };
205    return makebits(float_to_half_nobranch(u.x));
206}
207
208/* Constructor from float with better precision. */
209half half::makeslow(float f)
210{
211    union { float f; uint32_t x; } u = { f };
212    return makebits(float_to_half_branch(u.x));
213}
214
215/* Cast to float. Uses the branching version because loading the tables
216 * for only one value is going to be cache-expensive. */
217half::operator float() const
218{
219    /* FIXME: there is a hidden "this" in this method. Export more
220     * code so that it can all work in registers instead. */
221    union { float f; uint32_t x; } u;
222    u.x = half_to_float_branch(bits);
223    return u.f;
224}
225
226size_t half::convert(half *dst, float const *src, size_t nelem)
227{
228    for (size_t i = 0; i < nelem; i++)
229    {
230        union { float f; uint32_t x; } u;
231        u.f = *src++;
232        *dst++ = makebits(float_to_half_nobranch(u.x));
233    }
234
235    return nelem;
236}
237
238size_t half::convert(float *dst, half const *src, size_t nelem)
239{
240    for (size_t i = 0; i < nelem; i++)
241    {
242        union { float f; uint32_t x; } u;
243        u.x = half_to_float_nobranch((*src++).bits);
244        *dst++ = u.f;
245    }
246
247    return nelem;
248}
249
250} /* namespace lol */
251
Note: See TracBrowser for help on using the repository browser.