1 | // |
---|
2 | // Lol Engine |
---|
3 | // |
---|
4 | // Copyright: (c) 2010-2011 Sam Hocevar <sam@hocevar.net> |
---|
5 | // This program is free software; you can redistribute it and/or |
---|
6 | // modify it under the terms of the Do What The Fuck You Want To |
---|
7 | // Public License, Version 2, as published by Sam Hocevar. See |
---|
8 | // http://sam.zoy.org/projects/COPYING.WTFPL for more details. |
---|
9 | // |
---|
10 | |
---|
11 | #if defined HAVE_CONFIG_H |
---|
12 | # include "config.h" |
---|
13 | #endif |
---|
14 | |
---|
15 | #include "core.h" |
---|
16 | |
---|
17 | using namespace std; |
---|
18 | |
---|
19 | #define S4(x) S1(4*(x)), S1(4*(x)+1), S1(4*(x)+2), S1(4*(x)+3) |
---|
20 | #define S16(x) S4(4*(x)), S4(4*(x)+1), S4(4*(x)+2), S4(4*(x)+3) |
---|
21 | #define S64(x) S16(4*(x)), S16(4*(x)+1), S16(4*(x)+2), S16(4*(x)+3) |
---|
22 | #define S256(x) S64(4*(x)), S64(4*(x)+1), S64(4*(x)+2), S64(4*(x)+3) |
---|
23 | |
---|
24 | namespace lol |
---|
25 | { |
---|
26 | |
---|
27 | half half::makefast(float f) |
---|
28 | { |
---|
29 | /* Lookup table-based algorithm from “Fast Half Float Conversions” |
---|
30 | * by Jeroen van der Zijp, November 2008. No rounding is performed. */ |
---|
31 | static uint16_t const basetable[512] = |
---|
32 | { |
---|
33 | #define S1(i) (((i) < 103) ? 0x0000: \ |
---|
34 | ((i) < 113) ? 0x0400 >> (113 - (i)) : \ |
---|
35 | ((i) < 143) ? ((i) - 112) << 10 : 0x7c00) |
---|
36 | S256(0), |
---|
37 | #undef S1 |
---|
38 | #define S1(i) (0x8000 | (((i) < 103) ? 0x0000 : \ |
---|
39 | ((i) < 113) ? 0x0400 >> (113 - (i)): \ |
---|
40 | ((i) < 143) ? ((i) - 112) << 10 : 0x7c00)) |
---|
41 | S256(0), |
---|
42 | #undef S1 |
---|
43 | }; |
---|
44 | |
---|
45 | static uint8_t const shifttable[512] = |
---|
46 | { |
---|
47 | #define S1(i) (((i) < 103) ? 24 : \ |
---|
48 | ((i) < 113) ? 126 - (i) : \ |
---|
49 | ((i) < 143 || (i) == 255) ? 13 : 24) |
---|
50 | S256(0), S256(0), |
---|
51 | #undef S1 |
---|
52 | }; |
---|
53 | |
---|
54 | union { float f; uint32_t x; } u = { f }; |
---|
55 | |
---|
56 | uint16_t bits = basetable[(u.x >> 23) & 0x1ff]; |
---|
57 | bits |= (u.x & 0x007fffff) >> shifttable[(u.x >> 23) & 0x1ff]; |
---|
58 | return makebits(bits); |
---|
59 | } |
---|
60 | |
---|
61 | half half::makeslow(float f) |
---|
62 | { |
---|
63 | union { float f; uint32_t x; } u = { f }; |
---|
64 | |
---|
65 | uint16_t bits = (u.x >> 16) & 0x8000; /* Get the sign */ |
---|
66 | uint16_t m = (u.x >> 12) & 0x07ff; /* Keep one extra bit for rounding */ |
---|
67 | unsigned int e = (u.x >> 23) & 0xff; /* Using int is faster here */ |
---|
68 | |
---|
69 | /* If zero, or denormal, or exponent underflows too much for a denormal, |
---|
70 | * return signed zero */ |
---|
71 | if (e < 103) |
---|
72 | return makebits(bits); |
---|
73 | |
---|
74 | /* If NaN, Inf or exponent overflow, return NaN or Inf */ |
---|
75 | if (e > 142) |
---|
76 | { |
---|
77 | bits |= 0x7c00u; |
---|
78 | bits |= e == 255 && (u.x & 0x007fffffu); |
---|
79 | return makebits(bits); |
---|
80 | } |
---|
81 | |
---|
82 | /* If exponent underflows but not too much, return a denormal */ |
---|
83 | if (e < 113) |
---|
84 | { |
---|
85 | m |= 0x0800u; |
---|
86 | bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); |
---|
87 | return makebits(bits); |
---|
88 | } |
---|
89 | |
---|
90 | bits |= ((e - 112) << 10) | (m >> 1); |
---|
91 | bits += m & 1; /* Overflows here are expected and handled */ |
---|
92 | return makebits(bits); |
---|
93 | } |
---|
94 | |
---|
95 | } /* namespace lol */ |
---|
96 | |
---|