source: trunk/src/real.cpp @ 993

Last change on this file since 993 was 993, checked in by sam, 11 years ago

core: improve exp() on reals: faster (constant time) and a lot more
accurate.

File size: 16.6 KB
Line 
1//
2// Lol Engine
3//
4// Copyright: (c) 2010-2011 Sam Hocevar <sam@hocevar.net>
5//   This program is free software; you can redistribute it and/or
6//   modify it under the terms of the Do What The Fuck You Want To
7//   Public License, Version 2, as published by Sam Hocevar. See
8//   http://sam.zoy.org/projects/COPYING.WTFPL for more details.
9//
10
11#if defined HAVE_CONFIG_H
12#   include "config.h"
13#endif
14
15#include <cstring>
16#include <cstdio>
17
18#include "core.h"
19
20using namespace std;
21
22namespace lol
23{
24
25real::real(float f) { *this = (double)f; }
26real::real(int i) { *this = (double)i; }
27real::real(unsigned int i) { *this = (double)i; }
28
29real::real(double d)
30{
31    union { double d; uint64_t x; } u = { d };
32
33    uint32_t sign = (u.x >> 63) << 31;
34    uint32_t exponent = (u.x << 1) >> 53;
35
36    switch (exponent)
37    {
38    case 0x00:
39        m_signexp = sign;
40        break;
41    case 0x7ff:
42        m_signexp = sign | 0x7fffffffu;
43        break;
44    default:
45        m_signexp = sign | (exponent + (1 << 30) - (1 << 10));
46        break;
47    }
48
49    m_mantissa[0] = u.x >> 36;
50    m_mantissa[1] = u.x >> 20;
51    m_mantissa[2] = u.x >> 4;
52    m_mantissa[3] = u.x << 12;
53    memset(m_mantissa + 4, 0, sizeof(m_mantissa) - 4 * sizeof(m_mantissa[0]));
54}
55
56real::operator float() const { return (float)(double)(*this); }
57real::operator int() const { return (int)(double)(*this); }
58real::operator unsigned int() const { return (unsigned int)(double)(*this); }
59
60real::operator double() const
61{
62    union { double d; uint64_t x; } u;
63
64    /* Get sign */
65    u.x = m_signexp >> 31;
66    u.x <<= 11;
67
68    /* Compute new exponent */
69    uint32_t exponent = (m_signexp << 1) >> 1;
70    int e = (int)exponent - (1 << 30) + (1 << 10);
71
72    if (e < 0)
73        u.x <<= 52;
74    else if (e >= 0x7ff)
75    {
76        u.x |= 0x7ff;
77        u.x <<= 52;
78    }
79    else
80    {
81        u.x |= e;
82
83        /* Store mantissa if necessary */
84        u.x <<= 16;
85        u.x |= m_mantissa[0];
86        u.x <<= 16;
87        u.x |= m_mantissa[1];
88        u.x <<= 16;
89        u.x |= m_mantissa[2];
90        u.x <<= 4;
91        u.x |= m_mantissa[3] >> 12;
92        /* Rounding */
93        u.x += (m_mantissa[3] >> 11) & 1;
94    }
95
96    return u.d;
97}
98
99real real::operator -() const
100{
101    real ret = *this;
102    ret.m_signexp ^= 0x80000000u;
103    return ret;
104}
105
106real real::operator +(real const &x) const
107{
108    if (x.m_signexp << 1 == 0)
109        return *this;
110
111    /* Ensure both arguments are positive. Otherwise, switch signs,
112     * or replace + with -. */
113    if (m_signexp >> 31)
114        return -(-*this + -x);
115
116    if (x.m_signexp >> 31)
117        return *this - (-x);
118
119    /* Ensure *this has the larger exponent (no need for the mantissa to
120     * be larger, as in subtraction). Otherwise, switch. */
121    if ((m_signexp << 1) < (x.m_signexp << 1))
122        return x + *this;
123
124    real ret;
125
126    int e1 = m_signexp - (1 << 30) + 1;
127    int e2 = x.m_signexp - (1 << 30) + 1;
128
129    int bigoff = (e1 - e2) / (sizeof(uint16_t) * 8);
130    int off = e1 - e2 - bigoff * (sizeof(uint16_t) * 8);
131
132    if (bigoff > BIGITS)
133        return *this;
134
135    ret.m_signexp = m_signexp;
136
137    uint32_t carry = 0;
138    for (int i = BIGITS; i--; )
139    {
140        carry += m_mantissa[i];
141        if (i - bigoff >= 0)
142            carry += x.m_mantissa[i - bigoff] >> off;
143
144        if (i - bigoff > 0)
145            carry += (x.m_mantissa[i - bigoff - 1] << (16 - off)) & 0xffffu;
146        else if (i - bigoff == 0)
147            carry += 0x0001u << (16 - off);
148
149        ret.m_mantissa[i] = carry;
150        carry >>= 16;
151    }
152
153    /* Renormalise in case we overflowed the mantissa */
154    if (carry)
155    {
156        carry--;
157        for (int i = 0; i < BIGITS; i++)
158        {
159            uint16_t tmp = ret.m_mantissa[i];
160            ret.m_mantissa[i] = (carry << 15) | (tmp >> 1);
161            carry = tmp & 0x0001u;
162        }
163        ret.m_signexp++;
164    }
165
166    return ret;
167}
168
169real real::operator -(real const &x) const
170{
171    if (x.m_signexp << 1 == 0)
172        return *this;
173
174    /* Ensure both arguments are positive. Otherwise, switch signs,
175     * or replace - with +. */
176    if (m_signexp >> 31)
177        return -(-*this + x);
178
179    if (x.m_signexp >> 31)
180        return (*this) + (-x);
181
182    /* Ensure *this is larger than x */
183    if (*this < x)
184        return -(x - *this);
185
186    real ret;
187
188    int e1 = m_signexp - (1 << 30) + 1;
189    int e2 = x.m_signexp - (1 << 30) + 1;
190
191    int bigoff = (e1 - e2) / (sizeof(uint16_t) * 8);
192    int off = e1 - e2 - bigoff * (sizeof(uint16_t) * 8);
193
194    if (bigoff > BIGITS)
195        return *this;
196
197    ret.m_signexp = m_signexp;
198
199    int32_t carry = 0;
200    for (int i = 0; i < bigoff; i++)
201    {
202        carry -= x.m_mantissa[BIGITS - i];
203        carry = (carry & 0xffff0000u) | (carry >> 16);
204    }
205    carry -= x.m_mantissa[BIGITS - 1 - bigoff] & ((1 << off) - 1);
206    carry /= (1 << off);
207
208    for (int i = BIGITS; i--; )
209    {
210        carry += m_mantissa[i];
211        if (i - bigoff >= 0)
212            carry -= x.m_mantissa[i - bigoff] >> off;
213
214        if (i - bigoff > 0)
215            carry -= (x.m_mantissa[i - bigoff - 1] << (16 - off)) & 0xffffu;
216        else if (i - bigoff == 0)
217            carry -= 0x0001u << (16 - off);
218
219        ret.m_mantissa[i] = carry;
220        carry = (carry & 0xffff0000u) | (carry >> 16);
221    }
222
223    carry += 1;
224
225    /* Renormalise if we underflowed the mantissa */
226    if (carry == 0)
227    {
228        /* How much do we need to shift the mantissa? FIXME: this could
229         * be computed above */
230        off = 0;
231        for (int i = 0; i < BIGITS; i++)
232        {
233            if (!ret.m_mantissa[i])
234            {
235                off += sizeof(uint16_t) * 8;
236                continue;
237            }
238
239            for (uint16_t tmp = ret.m_mantissa[i]; tmp < 0x8000u; tmp <<= 1)
240                off++;
241            break;
242        }
243        if (off == BIGITS * sizeof(uint16_t) * 8)
244            ret.m_signexp &= 0x80000000u;
245        else
246        {
247            off++; /* Shift one more to get rid of the leading one */
248            ret.m_signexp -= off;
249
250            bigoff = off / (sizeof(uint16_t) * 8);
251            off -= bigoff * sizeof(uint16_t) * 8;
252
253            for (int i = 0; i < BIGITS; i++)
254            {
255                uint16_t tmp = 0;
256                if (i + bigoff < BIGITS)
257                    tmp |= ret.m_mantissa[i + bigoff] << off;
258                if (i + bigoff + 1 < BIGITS)
259                    tmp |= ret.m_mantissa[i + bigoff + 1] >> (16 - off);
260                ret.m_mantissa[i] = tmp;
261            }
262        }
263    }
264
265    return ret;
266}
267
268real real::operator *(real const &x) const
269{
270    real ret;
271
272    if (m_signexp << 1 == 0 || x.m_signexp << 1 == 0)
273    {
274        ret = (m_signexp << 1 == 0) ? *this : x;
275        ret.m_signexp ^= x.m_signexp & 0x80000000u;
276        return ret;
277    }
278
279    ret.m_signexp = (m_signexp ^ x.m_signexp) & 0x80000000u;
280    int e = (m_signexp & 0x7fffffffu) - (1 << 30) + 1
281          + (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
282
283    /* Accumulate low order product; no need to store it, we just
284     * want the carry value */
285    uint64_t carry = 0;
286    for (int i = 0; i < BIGITS; i++)
287    {
288        for (int j = 0; j < i + 1; j++)
289            carry += (uint32_t)m_mantissa[BIGITS - 1 - j]
290                   * (uint32_t)x.m_mantissa[BIGITS - 1 + j - i];
291        carry >>= 16;
292    }
293
294    for (int i = 0; i < BIGITS; i++)
295    {
296        for (int j = i + 1; j < BIGITS; j++)
297            carry += (uint32_t)m_mantissa[BIGITS - 1 - j]
298                   * (uint32_t)x.m_mantissa[j - 1 - i];
299
300        carry += m_mantissa[BIGITS - 1 - i];
301        carry += x.m_mantissa[BIGITS - 1 - i];
302        ret.m_mantissa[BIGITS - 1 - i] = carry & 0xffffu;
303        carry >>= 16;
304    }
305
306    /* Renormalise in case we overflowed the mantissa */
307    if (carry)
308    {
309        carry--;
310        for (int i = 0; i < BIGITS; i++)
311        {
312            uint16_t tmp = ret.m_mantissa[i];
313            ret.m_mantissa[i] = (carry << 15) | (tmp >> 1);
314            carry = tmp & 0x0001u;
315        }
316        e++;
317    }
318
319    ret.m_signexp |= e + (1 << 30) - 1;
320
321    return ret;
322}
323
324real real::operator /(real const &x) const
325{
326    return *this * fres(x);
327}
328
329real &real::operator +=(real const &x)
330{
331    real tmp = *this;
332    return *this = tmp + x;
333}
334
335real &real::operator -=(real const &x)
336{
337    real tmp = *this;
338    return *this = tmp - x;
339}
340
341real &real::operator *=(real const &x)
342{
343    real tmp = *this;
344    return *this = tmp * x;
345}
346
347real &real::operator /=(real const &x)
348{
349    real tmp = *this;
350    return *this = tmp / x;
351}
352
353bool real::operator ==(real const &x) const
354{
355    if ((m_signexp << 1) == 0 && (x.m_signexp << 1) == 0)
356        return true;
357
358    if (m_signexp != x.m_signexp)
359        return false;
360
361    return memcmp(m_mantissa, x.m_mantissa, sizeof(m_mantissa)) == 0;
362}
363
364bool real::operator !=(real const &x) const
365{
366    return !(*this == x);
367}
368
369bool real::operator <(real const &x) const
370{
371    /* Ensure both numbers are positive */
372    if (m_signexp >> 31)
373        return (x.m_signexp >> 31) ? -*this > -x : true;
374
375    if (x.m_signexp >> 31)
376        return false;
377
378    /* Compare all relevant bits */
379    if (m_signexp != x.m_signexp)
380        return m_signexp < x.m_signexp;
381
382    for (int i = 0; i < BIGITS; i++)
383        if (m_mantissa[i] != x.m_mantissa[i])
384            return m_mantissa[i] < x.m_mantissa[i];
385
386    return false;
387}
388
389bool real::operator <=(real const &x) const
390{
391    return !(*this > x);
392}
393
394bool real::operator >(real const &x) const
395{
396    /* Ensure both numbers are positive */
397    if (m_signexp >> 31)
398        return (x.m_signexp >> 31) ? -*this < -x : false;
399
400    if (x.m_signexp >> 31)
401        return true;
402
403    /* Compare all relevant bits */
404    if (m_signexp != x.m_signexp)
405        return m_signexp > x.m_signexp;
406
407    for (int i = 0; i < BIGITS; i++)
408        if (m_mantissa[i] != x.m_mantissa[i])
409            return m_mantissa[i] > x.m_mantissa[i];
410
411    return false;
412}
413
414bool real::operator >=(real const &x) const
415{
416    return !(*this < x);
417}
418
419real fres(real const &x)
420{
421    if (!(x.m_signexp << 1))
422    {
423        real ret = x;
424        ret.m_signexp = x.m_signexp | 0x7fffffffu;
425        ret.m_mantissa[0] = 0;
426        return ret;
427    }
428
429    /* Use the system's float inversion to approximate 1/x */
430    union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
431    v.x |= (uint32_t)x.m_mantissa[0] << 7;
432    v.x |= (uint32_t)x.m_mantissa[1] >> 9;
433    v.f = 1.0 / v.f;
434
435    real ret;
436    ret.m_mantissa[0] = (v.x >> 7) & 0xffffu;
437    ret.m_mantissa[1] = (v.x << 9) & 0xffffu;
438
439    uint32_t sign = x.m_signexp & 0x80000000u;
440    ret.m_signexp = sign;
441
442    int exponent = (x.m_signexp & 0x7fffffffu) + 1;
443    exponent = -exponent + (v.x >> 23) - (u.x >> 23);
444    ret.m_signexp |= (exponent - 1) & 0x7fffffffu;
445
446    /* Five steps of Newton-Raphson seems enough for 32-bigit reals. */
447    real two = 2;
448    ret = ret * (two - ret * x);
449    ret = ret * (two - ret * x);
450    ret = ret * (two - ret * x);
451    ret = ret * (two - ret * x);
452    ret = ret * (two - ret * x);
453
454    return ret;
455}
456
457real sqrt(real const &x)
458{
459    /* if zero, return x */
460    if (!(x.m_signexp << 1))
461        return x;
462
463    /* if negative, return NaN */
464    if (x.m_signexp >> 31)
465    {
466        real ret;
467        ret.m_signexp = 0x7fffffffu;
468        ret.m_mantissa[0] = 0xffffu;
469        return ret;
470    }
471
472    /* Use the system's float inversion to approximate 1/sqrt(x). First
473     * we construct a float in the [1..4[ range that has roughly the same
474     * mantissa as our real. Its exponent is 0 or 1, depending on the
475     * partity of x. The final exponent is 0, -1 or -2. We use the final
476     * exponent and final mantissa to pre-fill the result. */
477    union { float f; uint32_t x; } u = { 1.0f }, v = { 2.0f };
478    v.x -= ((x.m_signexp & 1) << 23);
479    v.x |= (uint32_t)x.m_mantissa[0] << 7;
480    v.x |= (uint32_t)x.m_mantissa[1] >> 9;
481    v.f = 1.0 / sqrtf(v.f);
482
483    real ret;
484    ret.m_mantissa[0] = (v.x >> 7) & 0xffffu;
485    ret.m_mantissa[1] = (v.x << 9) & 0xffffu;
486
487    uint32_t sign = x.m_signexp & 0x80000000u;
488    ret.m_signexp = sign;
489
490    int exponent = (x.m_signexp & 0x7fffffffu) - ((1 << 30) - 1);
491    exponent = - (exponent / 2) + (v.x >> 23) - (u.x >> 23);
492    ret.m_signexp |= (exponent + ((1 << 30) - 1)) & 0x7fffffffu;
493
494    /* Five steps of Newton-Raphson seems enough for 32-bigit reals. */
495    real three = 3;
496    ret = ret * (three - ret * ret * x);
497    ret.m_signexp--;
498    ret = ret * (three - ret * ret * x);
499    ret.m_signexp--;
500    ret = ret * (three - ret * ret * x);
501    ret.m_signexp--;
502    ret = ret * (three - ret * ret * x);
503    ret.m_signexp--;
504    ret = ret * (three - ret * ret * x);
505    ret.m_signexp--;
506
507    return ret * x;
508}
509
510real fabs(real const &x)
511{
512    real ret = x;
513    ret.m_signexp &= 0x7fffffffu;
514    return ret;
515}
516
517static real fastlog(real const &x)
518{
519    /* This fast log method is tuned to work on the [1..2] range and
520     * no effort whatsoever was made to improve convergence outside this
521     * domain of validity. It can converge pretty fast, provided we use
522     * the following variable substitutions:
523     *    y = sqrt(x)
524     *    z = (y - 1) / (y + 1)
525     *
526     * And the following identities:
527     *    ln(x) = 2 ln(y)
528     *          = 2 ln((1 + z) / (1 - z))
529     *          = 4 z (1 + z^2 / 3 + z^4 / 5 + z^6 / 7...)
530     *
531     * Any additional sqrt() call would halve the convergence time, but
532     * would also impact the final precision. For now we stick with one
533     * sqrt() call. */
534    real y = sqrt(x);
535    real z = (y - (real)1) / (y + (real)1), z2 = z * z, zn = z2;
536    real sum = 1.0;
537
538    for (int i = 3; i < 200; i += 2)
539    {
540        sum += zn / (real)i;
541        zn *= z2;
542    }
543
544    /* FIXME: sum.m_signexp += 2; (but needs private data access) */
545    sum *= (real)4;
546    return z * sum;
547}
548
549static real LOG_2 = fastlog((real)2);
550
551real log(real const &x)
552{
553    /* Strategy for log(x): if x = 2^E*M then log(x) = E log(2) + log(M),
554     * with the property that M is in [1..2[, so fastlog() applies here. */
555    real tmp = x;
556    if (x.m_signexp >> 31 || x.m_signexp == 0)
557    {
558        tmp.m_signexp = 0xffffffffu;
559        tmp.m_mantissa[0] = 0xffffu;
560        return tmp;
561    }
562    tmp.m_signexp = (1 << 30) - 1;
563    return (real)(x.m_signexp - (1 << 30) + 1) * LOG_2 + fastlog(tmp);
564}
565
566real exp(real const &x)
567{
568    /* Strategy for exp(x): the Taylor series does not converge very fast
569     * with large positive or negative values.
570     *
571     * However, we know that the result is going to be in the form M*2^E,
572     * where M is the mantissa and E the exponent. We first try to predict
573     * a value for E, which is approximately log2(exp(x)) = x / log(2).
574     *
575     * Let E0 be an integer close to x / log(2). We need to find a value x0
576     * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2).
577     *
578     * Thus the final algorithm:
579     *  int E0 = x / log(2)
580     *  real x0 = x - E0 log(2)
581     *  real x1 = exp(x0)
582     *  return x1 * 2^E0
583     */
584    int e0 = x / LOG_2;
585    real x0 = x - (real)e0 * LOG_2;
586    real x1 = 1.0, fact = 1.0, xn = x0;
587
588    for (int i = 1; i < 100; i++)
589    {
590        fact *= (real)i;
591        x1 += xn / fact;
592        xn *= x0;
593    }
594
595    x1.m_signexp += e0;
596    return x1;
597}
598
599real sin(real const &x)
600{
601    real ret = 0.0, fact = 1.0, xn = x, x2 = x * x;
602
603    for (int i = 1; ; i += 2)
604    {
605        real newret = ret + xn / fact;
606        if (ret == newret)
607            break;
608        ret = newret;
609        xn *= x2;
610        fact *= (real)(-(i + 1) * (i + 2));
611    }
612
613    return ret;
614}
615
616real cos(real const &x)
617{
618    real ret = 0.0, fact = 1.0, xn = 1.0, x2 = x * x;
619
620    for (int i = 1; ; i += 2)
621    {
622        real newret = ret + xn / fact;
623        if (ret == newret)
624            break;
625        ret = newret;
626        xn *= x2;
627        fact *= (real)(-i * (i + 1));
628    }
629
630    return ret;
631}
632
633void real::print(int ndigits) const
634{
635    real const r1 = 1, r10 = 10;
636    real x = *this;
637
638    if (x.m_signexp >> 31)
639    {
640        printf("-");
641        x = -x;
642    }
643
644    /* Normalise x so that mantissa is in [1..9.999] */
645    int exponent = 0;
646    if (x.m_signexp)
647    {
648        for (real div = r1, newdiv; true; div = newdiv)
649        {
650            newdiv = div * r10;
651            if (x < newdiv)
652            {
653                x /= div;
654                break;
655            }
656            exponent++;
657        }
658        for (real mul = 1, newx; true; mul *= r10)
659        {
660            newx = x * mul;
661            if (newx >= r1)
662            {
663                x = newx;
664                break;
665            }
666            exponent--;
667        }
668    }
669
670    /* Print digits */
671    for (int i = 0; i < ndigits; i++)
672    {
673        int digit = (int)x;
674        printf("%i", digit);
675        if (i == 0)
676            printf(".");
677        x -= real(digit);
678        x *= r10;
679    }
680
681    /* Print exponent information */
682    if (exponent < 0)
683        printf("e-%i", -exponent);
684    else if (exponent > 0)
685        printf("e+%i", exponent);
686
687    printf("\n");
688}
689
690} /* namespace lol */
691
Note: See TracBrowser for help on using the repository browser.