source: trunk/src/real.cpp @ 982

Last change on this file since 982 was 982, checked in by sam, 9 years ago

core: add rounding to real->double conversion, fix a bug in the addition
code shortcut, fix bugs in the addition and subtraction, another one in
the multiplication code, and add new unit tests for most of these.

File size: 9.9 KB
Line 
1//
2// Lol Engine
3//
4// Copyright: (c) 2010-2011 Sam Hocevar <sam@hocevar.net>
5//   This program is free software; you can redistribute it and/or
6//   modify it under the terms of the Do What The Fuck You Want To
7//   Public License, Version 2, as published by Sam Hocevar. See
8//   http://sam.zoy.org/projects/COPYING.WTFPL for more details.
9//
10
11#if defined HAVE_CONFIG_H
12#   include "config.h"
13#endif
14
15#include <cstring>
16#include <cstdio>
17
18#include "core.h"
19
20using namespace std;
21
22namespace lol
23{
24
25real::real(float f)
26{
27    new(this) real((double)f);
28}
29
30real::real(double d)
31{
32    union { double d; uint64_t x; } u = { d };
33
34    uint32_t sign = (u.x >> 63) << 31;
35    uint32_t exponent = (u.x << 1) >> 53;
36
37    switch (exponent)
38    {
39    case 0x00:
40        m_signexp = sign;
41        break;
42    case 0x7ff:
43        m_signexp = sign | 0x7fffffffu;
44        break;
45    default:
46        m_signexp = sign | (exponent + (1 << 30) - (1 << 10));
47        break;
48    }
49
50    m_mantissa[0] = u.x >> 36;
51    m_mantissa[1] = u.x >> 20;
52    m_mantissa[2] = u.x >> 4;
53    m_mantissa[3] = u.x << 12;
54    memset(m_mantissa + 4, 0, sizeof(m_mantissa) - 4 * sizeof(m_mantissa[0]));
55}
56
57real::operator float() const
58{
59    return (float)(double)(*this);
60}
61
62real::operator double() const
63{
64    union { double d; uint64_t x; } u;
65
66    /* Get sign */
67    u.x = m_signexp >> 31;
68    u.x <<= 11;
69
70    /* Compute new exponent */
71    uint32_t exponent = (m_signexp << 1) >> 1;
72    int e = (int)exponent - (1 << 30) + (1 << 10);
73
74    if (e < 0)
75        u.x <<= 52;
76    else if (e >= 0x7ff)
77    {
78        u.x |= 0x7ff;
79        u.x <<= 52;
80    }
81    else
82    {
83        u.x |= e;
84
85        /* Store mantissa if necessary */
86        u.x <<= 16;
87        u.x |= m_mantissa[0];
88        u.x <<= 16;
89        u.x |= m_mantissa[1];
90        u.x <<= 16;
91        u.x |= m_mantissa[2];
92        u.x <<= 4;
93        u.x |= m_mantissa[3] >> 12;
94        /* Rounding */
95        u.x += (m_mantissa[3] >> 11) & 1;
96    }
97
98    return u.d;
99}
100
101real real::operator -() const
102{
103    real ret = *this;
104    ret.m_signexp ^= 0x80000000u;
105    return ret;
106}
107
108real real::operator +(real const &x) const
109{
110    if (x.m_signexp << 1 == 0)
111        return *this;
112
113    /* Ensure both arguments are positive. Otherwise, switch signs,
114     * or replace + with -. */
115    if (m_signexp >> 31)
116        return -(-*this + -x);
117
118    if (x.m_signexp >> 31)
119        return *this - (-x);
120
121    /* Ensure *this is the larger exponent (no need to be strictly larger,
122     * as in subtraction). Otherwise, switch. */
123    if ((m_signexp << 1) < (x.m_signexp << 1))
124        return x + *this;
125
126    real ret;
127
128    int e1 = m_signexp - (1 << 30) + 1;
129    int e2 = x.m_signexp - (1 << 30) + 1;
130
131    int bigoff = (e1 - e2) / (sizeof(uint16_t) * 8);
132    int off = e1 - e2 - bigoff * (sizeof(uint16_t) * 8);
133
134    ret.m_signexp = m_signexp;
135
136    uint32_t carry = 0;
137    for (int i = BIGITS; i--; )
138    {
139        carry += m_mantissa[i];
140        if (i - bigoff >= 0)
141            carry += x.m_mantissa[i - bigoff] >> off;
142
143        if (i - bigoff > 0)
144            carry += (x.m_mantissa[i - bigoff - 1] << (16 - off)) & 0xffffu;
145        else if (i - bigoff == 0)
146            carry += 0x0001u << (16 - off);
147
148        ret.m_mantissa[i] = carry;
149        carry >>= 16;
150    }
151
152    /* Renormalise in case we overflowed the mantissa */
153    if (carry)
154    {
155        carry--;
156        for (int i = 0; i < BIGITS; i++)
157        {
158            uint16_t tmp = ret.m_mantissa[i];
159            ret.m_mantissa[i] = (carry << 15) | (tmp >> 1);
160            carry = tmp & 0x0001u;
161        }
162        ret.m_signexp++;
163    }
164
165    return ret;
166}
167
168real real::operator -(real const &x) const
169{
170    if (x.m_signexp << 1 == 0)
171        return *this;
172
173    /* Ensure both arguments are positive. Otherwise, switch signs,
174     * or replace - with +. */
175    if (m_signexp >> 31)
176        return -(-*this + x);
177
178    if (x.m_signexp >> 31)
179        return (*this) + (-x);
180
181    /* Ensure *this is larger than x */
182    if (*this < x)
183        return -(x - *this);
184
185    real ret;
186
187    int e1 = m_signexp - (1 << 30) + 1;
188    int e2 = x.m_signexp - (1 << 30) + 1;
189
190    int bigoff = (e1 - e2) / (sizeof(uint16_t) * 8);
191    int off = e1 - e2 - bigoff * (sizeof(uint16_t) * 8);
192
193    ret.m_signexp = m_signexp;
194
195    int32_t carry = 0;
196    for (int i = 0; i < bigoff; i++)
197    {
198        carry -= x.m_mantissa[BIGITS - i];
199        carry = (carry & 0xffff0000u) | (carry >> 16);
200    }
201    carry -= x.m_mantissa[BIGITS - 1 - bigoff] & ((1 << off) - 1);
202    carry /= (1 << off);
203
204    for (int i = BIGITS; i--; )
205    {
206        carry += m_mantissa[i];
207        if (i - bigoff >= 0)
208            carry -= x.m_mantissa[i - bigoff] >> off;
209
210        if (i - bigoff > 0)
211            carry -= (x.m_mantissa[i - bigoff - 1] << (16 - off)) & 0xffffu;
212        else if (i - bigoff == 0)
213            carry -= 0x0001u << (16 - off);
214
215        ret.m_mantissa[i] = carry;
216        carry = (carry & 0xffff0000u) | (carry >> 16);
217    }
218
219    carry += 1;
220
221    /* Renormalise if we underflowed the mantissa */
222    if (carry == 0)
223    {
224        /* How much do we need to shift the mantissa? FIXME: this could
225         * be computed above */
226        off = 0;
227        for (int i = 0; i < BIGITS; i++)
228        {
229            if (!ret.m_mantissa[i])
230            {
231                off += sizeof(uint16_t) * 8;
232                continue;
233            }
234
235            for (uint16_t tmp = ret.m_mantissa[i]; tmp < 0x8000u; tmp <<= 1)
236                off++;
237            break;
238        }
239        if (off == BIGITS * sizeof(uint16_t) * 8)
240            ret.m_signexp &= 0x80000000u;
241        else
242        {
243            off++; /* Shift one more to get rid of the leading one */
244            ret.m_signexp -= off;
245
246            bigoff = off / (sizeof(uint16_t) * 8);
247            off -= bigoff * sizeof(uint16_t) * 8;
248
249            for (int i = 0; i < BIGITS; i++)
250            {
251                uint16_t tmp = 0;
252                if (i + bigoff < BIGITS)
253                    tmp |= ret.m_mantissa[i + bigoff] << off;
254                if (i + bigoff + 1 < BIGITS)
255                    tmp |= ret.m_mantissa[i + bigoff + 1] >> (16 - off);
256                ret.m_mantissa[i] = tmp;
257            }
258        }
259    }
260
261    return ret;
262}
263
264real real::operator *(real const &x) const
265{
266    real ret;
267
268    ret.m_signexp = (m_signexp ^ x.m_signexp) & 0x80000000u;
269    int e = (m_signexp & 0x7fffffffu) - (1 << 30) + 1
270          + (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
271
272    /* Accumulate low order product; no need to store it, we just
273     * want the carry value */
274    uint64_t carry = 0;
275    for (int i = 0; i < BIGITS; i++)
276    {
277        for (int j = 0; j < i + 1; j++)
278            carry += (uint32_t)m_mantissa[BIGITS - 1 - j]
279                   * (uint32_t)x.m_mantissa[BIGITS - 1 + j - i];
280        carry >>= 16;
281    }
282
283    for (int i = 0; i < BIGITS; i++)
284    {
285        for (int j = i + 1; j < BIGITS; j++)
286            carry += (uint32_t)m_mantissa[BIGITS - 1 - j]
287                   * (uint32_t)x.m_mantissa[j - 1 - i];
288
289        carry += m_mantissa[BIGITS - 1 - i];
290        carry += x.m_mantissa[BIGITS - 1 - i];
291        ret.m_mantissa[BIGITS - 1 - i] = carry & 0xffffu;
292        carry >>= 16;
293    }
294
295    /* Renormalise in case we overflowed the mantissa */
296    if (carry)
297    {
298        carry--;
299        for (int i = 0; i < BIGITS; i++)
300        {
301            uint16_t tmp = ret.m_mantissa[i];
302            ret.m_mantissa[i] = (carry << 15) | (tmp >> 1);
303            carry = tmp & 0x0001u;
304        }
305        e++;
306    }
307
308    ret.m_signexp |= e + (1 << 30) - 1;
309
310    return ret;
311}
312
313real real::operator /(real const &x) const
314{
315    return *this * fres(x);
316}
317
318bool real::operator <(real const &x) const
319{
320    /* Ensure both numbers are positive */
321    if (m_signexp >> 31)
322        return (x.m_signexp >> 31) ? -*this > -x : true;
323
324    if (x.m_signexp >> 31)
325        return false;
326
327    /* Compare all relevant bits */
328    if (m_signexp != x.m_signexp)
329        return m_signexp < x.m_signexp;
330
331    for (int i = 0; i < BIGITS; i++)
332        if (m_mantissa[i] != x.m_mantissa[i])
333            return m_mantissa[i] < x.m_mantissa[i];
334
335    return false;
336}
337
338bool real::operator <=(real const &x) const
339{
340    return !(*this > x);
341}
342
343bool real::operator >(real const &x) const
344{
345    /* Ensure both numbers are positive */
346    if (m_signexp >> 31)
347        return (x.m_signexp >> 31) ? -*this < -x : false;
348
349    if (x.m_signexp >> 31)
350        return true;
351
352    /* Compare all relevant bits */
353    if (m_signexp != x.m_signexp)
354        return m_signexp > x.m_signexp;
355
356    for (int i = 0; i < BIGITS; i++)
357        if (m_mantissa[i] != x.m_mantissa[i])
358            return m_mantissa[i] > x.m_mantissa[i];
359
360    return false;
361}
362
363bool real::operator >=(real const &x) const
364{
365    return !(*this < x);
366}
367
368real fres(real const &x)
369{
370    if (!(x.m_signexp << 1))
371    {
372        real ret = x;
373        ret.m_signexp = x.m_signexp | 0x7fffffffu;
374        ret.m_mantissa[0] = 0;
375        return ret;
376    }
377
378    /* Use the system's float inversion to approximate 1/x */
379    union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
380    v.x |= (uint32_t)x.m_mantissa[0] << 7;
381    v.x |= (uint32_t)x.m_mantissa[1] >> 9;
382    v.f = 1.0 / v.f;
383
384    real ret;
385    ret.m_mantissa[0] = (v.x >> 7) & 0xffffu;
386    ret.m_mantissa[1] = (v.x << 9) & 0xffffu;
387    /* Better convergence with the mantissa zeroed. */
388    memset(ret.m_mantissa + 2, 0, (real::BIGITS - 2) * sizeof(uint16_t));
389
390    uint32_t sign = x.m_signexp & 0x80000000u;
391    ret.m_signexp = sign;
392
393    int exponent = (x.m_signexp & 0x7fffffffu) + 1;
394    exponent = -exponent + (v.x >> 23) - (u.x >> 23);
395    ret.m_signexp |= (exponent - 1) & 0x7fffffffu;
396
397    /* Five steps of Newton-Raphson seems enough for 32-bigit reals. */
398    real two(2.0f);
399    ret = ret * (two - ret * x);
400    ret = ret * (two - ret * x);
401    ret = ret * (two - ret * x);
402    ret = ret * (two - ret * x);
403    ret = ret * (two - ret * x);
404
405    return ret;
406}
407
408void real::print() const
409{
410    printf("%x  %08x  ", m_signexp >> 31, (m_signexp << 1) >> 1);
411    for (int i = 0; i < BIGITS; i++)
412        printf("%04x ", m_mantissa[i]);
413    printf("\n");
414}
415
416} /* namespace lol */
417
Note: See TracBrowser for help on using the repository browser.