source: trunk/src/real.cpp @ 1130

Last change on this file since 1130 was 1130, checked in by sam, 11 years ago

math: implement ulp() for reals, which returns the smallest real y > 0 such
that x + y != x, and nextafter() which behaves like the C function.

File size: 33.8 KB
Line 
1//
2// Lol Engine
3//
4// Copyright: (c) 2010-2012 Sam Hocevar <sam@hocevar.net>
5//   This program is free software; you can redistribute it and/or
6//   modify it under the terms of the Do What The Fuck You Want To
7//   Public License, Version 2, as published by Sam Hocevar. See
8//   http://sam.zoy.org/projects/COPYING.WTFPL for more details.
9//
10
11#if defined HAVE_CONFIG_H
12#   include "config.h"
13#endif
14
15#if defined WIN32 && !defined _XBOX
16#   define _USE_MATH_DEFINES /* for M_PI */
17#   define WIN32_LEAN_AND_MEAN
18#   include <windows.h>
19#   undef near /* Fuck Microsoft */
20#   undef far /* Fuck Microsoft again */
21#endif
22
23#include <new>
24#include <cstring>
25#include <cstdio>
26#include <cstdlib>
27#include <cmath>
28
29#include "core.h"
30
31using namespace std;
32
33namespace lol
34{
35
36real::real()
37{
38    m_mantissa = new uint32_t[BIGITS];
39    m_signexp = 0;
40}
41
42real::real(real const &x)
43{
44    m_mantissa = new uint32_t[BIGITS];
45    memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
46    m_signexp = x.m_signexp;
47}
48
49real const &real::operator =(real const &x)
50{
51    if (&x != this)
52    {
53        memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
54        m_signexp = x.m_signexp;
55    }
56
57    return *this;
58}
59
60real::~real()
61{
62    delete[] m_mantissa;
63}
64
65real::real(float f) { new(this) real((double)f); }
66real::real(int i) { new(this) real((double)i); }
67real::real(unsigned int i) { new(this) real((double)i); }
68
69real::real(double d)
70{
71    new(this) real();
72
73    union { double d; uint64_t x; } u = { d };
74
75    uint32_t sign = (u.x >> 63) << 31;
76    uint32_t exponent = (u.x << 1) >> 53;
77
78    switch (exponent)
79    {
80    case 0x00:
81        m_signexp = sign;
82        break;
83    case 0x7ff:
84        m_signexp = sign | 0x7fffffffu;
85        break;
86    default:
87        m_signexp = sign | (exponent + (1 << 30) - (1 << 10));
88        break;
89    }
90
91    m_mantissa[0] = (uint32_t)(u.x >> 20);
92    m_mantissa[1] = (uint32_t)(u.x << 12);
93    memset(m_mantissa + 2, 0, (BIGITS - 2) * sizeof(m_mantissa[0]));
94}
95
96real::operator float() const { return (float)(double)(*this); }
97real::operator int() const { return (int)(double)(*this); }
98real::operator unsigned int() const { return (unsigned int)(double)(*this); }
99
100real::operator double() const
101{
102    union { double d; uint64_t x; } u;
103
104    /* Get sign */
105    u.x = m_signexp >> 31;
106    u.x <<= 11;
107
108    /* Compute new exponent */
109    uint32_t exponent = (m_signexp << 1) >> 1;
110    int e = (int)exponent - (1 << 30) + (1 << 10);
111
112    if (e < 0)
113        u.x <<= 52;
114    else if (e >= 0x7ff)
115    {
116        u.x |= 0x7ff;
117        u.x <<= 52;
118    }
119    else
120    {
121        u.x |= e;
122
123        /* Store mantissa if necessary */
124        u.x <<= 32;
125        u.x |= m_mantissa[0];
126        u.x <<= 20;
127        u.x |= m_mantissa[1] >> 12;
128        /* Rounding */
129        u.x += (m_mantissa[1] >> 11) & 1;
130    }
131
132    return u.d;
133}
134
135/*
136 * Create a real number from an ASCII representation
137 */
138real::real(char const *str)
139{
140    real ret = 0;
141    int exponent = 0;
142    bool comma = false, nonzero = false, negative = false, finished = false;
143
144    for (char const *p = str; *p && !finished; p++)
145    {
146        switch (*p)
147        {
148        case '-':
149        case '+':
150            if (p != str)
151                break;
152            negative = (*p == '-');
153            break;
154        case '.':
155            if (comma)
156                finished = true;
157            comma = true;
158            break;
159        case '0': case '1': case '2': case '3': case '4':
160        case '5': case '6': case '7': case '8': case '9':
161            if (nonzero)
162            {
163                real x = ret + ret;
164                x = x + x + ret;
165                ret = x + x;
166            }
167            if (*p != '0')
168            {
169                ret += (int)(*p - '0');
170                nonzero = true;
171            }
172            if (comma)
173                exponent--;
174            break;
175        case 'e':
176        case 'E':
177            exponent += atoi(p + 1);
178            finished = true;
179            break;
180        default:
181            finished = true;
182            break;
183        }
184    }
185
186    if (exponent)
187        ret *= pow(R_10, (real)exponent);
188
189    if (negative)
190        ret = -ret;
191
192    new(this) real(ret);
193}
194
195real real::operator +() const
196{
197    return *this;
198}
199
200real real::operator -() const
201{
202    real ret = *this;
203    ret.m_signexp ^= 0x80000000u;
204    return ret;
205}
206
207real real::operator +(real const &x) const
208{
209    if (x.m_signexp << 1 == 0)
210        return *this;
211
212    /* Ensure both arguments are positive. Otherwise, switch signs,
213     * or replace + with -. */
214    if (m_signexp >> 31)
215        return -(-*this + -x);
216
217    if (x.m_signexp >> 31)
218        return *this - (-x);
219
220    /* Ensure *this has the larger exponent (no need for the mantissa to
221     * be larger, as in subtraction). Otherwise, switch. */
222    if ((m_signexp << 1) < (x.m_signexp << 1))
223        return x + *this;
224
225    real ret;
226
227    int e1 = m_signexp - (1 << 30) + 1;
228    int e2 = x.m_signexp - (1 << 30) + 1;
229
230    int bigoff = (e1 - e2) / BIGIT_BITS;
231    int off = e1 - e2 - bigoff * BIGIT_BITS;
232
233    if (bigoff > BIGITS)
234        return *this;
235
236    ret.m_signexp = m_signexp;
237
238    uint64_t carry = 0;
239    for (int i = BIGITS; i--; )
240    {
241        carry += m_mantissa[i];
242        if (i - bigoff >= 0)
243            carry += x.m_mantissa[i - bigoff] >> off;
244
245        if (off && i - bigoff > 0)
246            carry += (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
247        else if (i - bigoff == 0)
248            carry += (uint64_t)1 << (BIGIT_BITS - off);
249
250        ret.m_mantissa[i] = (uint32_t)carry;
251        carry >>= BIGIT_BITS;
252    }
253
254    /* Renormalise in case we overflowed the mantissa */
255    if (carry)
256    {
257        carry--;
258        for (int i = 0; i < BIGITS; i++)
259        {
260            uint32_t tmp = ret.m_mantissa[i];
261            ret.m_mantissa[i] = ((uint32_t)carry << (BIGIT_BITS - 1))
262                              | (tmp >> 1);
263            carry = tmp & 1u;
264        }
265        ret.m_signexp++;
266    }
267
268    return ret;
269}
270
271real real::operator -(real const &x) const
272{
273    if (x.m_signexp << 1 == 0)
274        return *this;
275
276    /* Ensure both arguments are positive. Otherwise, switch signs,
277     * or replace - with +. */
278    if (m_signexp >> 31)
279        return -(-*this + x);
280
281    if (x.m_signexp >> 31)
282        return (*this) + (-x);
283
284    /* Ensure *this is larger than x */
285    if (*this < x)
286        return -(x - *this);
287
288    real ret;
289
290    int e1 = m_signexp - (1 << 30) + 1;
291    int e2 = x.m_signexp - (1 << 30) + 1;
292
293    int bigoff = (e1 - e2) / BIGIT_BITS;
294    int off = e1 - e2 - bigoff * BIGIT_BITS;
295
296    if (bigoff > BIGITS)
297        return *this;
298
299    ret.m_signexp = m_signexp;
300
301    /* int64_t instead of uint64_t to preserve sign through shifts */
302    int64_t carry = 0;
303    for (int i = 0; i < bigoff; i++)
304    {
305        carry -= x.m_mantissa[BIGITS - 1 - i];
306        /* Emulates a signed shift */
307        carry >>= BIGIT_BITS;
308        carry |= carry << BIGIT_BITS;
309    }
310    if (bigoff < BIGITS)
311        carry -= x.m_mantissa[BIGITS - 1 - bigoff] & (((int64_t)1 << off) - 1);
312    carry /= (int64_t)1 << off;
313
314    for (int i = BIGITS; i--; )
315    {
316        carry += m_mantissa[i];
317        if (i - bigoff >= 0)
318            carry -= x.m_mantissa[i - bigoff] >> off;
319
320        if (off && i - bigoff > 0)
321            carry -= (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
322        else if (i - bigoff == 0)
323            carry -= (uint64_t)1 << (BIGIT_BITS - off);
324
325        ret.m_mantissa[i] = (uint32_t)carry;
326        carry >>= BIGIT_BITS;
327        carry |= carry << BIGIT_BITS;
328    }
329
330    carry += 1;
331
332    /* Renormalise if we underflowed the mantissa */
333    if (carry == 0)
334    {
335        /* How much do we need to shift the mantissa? FIXME: this could
336         * be computed above */
337        off = 0;
338        for (int i = 0; i < BIGITS; i++)
339        {
340            if (!ret.m_mantissa[i])
341            {
342                off += BIGIT_BITS;
343                continue;
344            }
345
346            for (uint32_t tmp = ret.m_mantissa[i]; tmp < 0x80000000u; tmp <<= 1)
347                off++;
348            break;
349        }
350        if (off == BIGITS * BIGIT_BITS)
351            ret.m_signexp &= 0x80000000u;
352        else
353        {
354            off++; /* Shift one more to get rid of the leading one */
355            ret.m_signexp -= off;
356
357            bigoff = off / BIGIT_BITS;
358            off -= bigoff * BIGIT_BITS;
359
360            for (int i = 0; i < BIGITS; i++)
361            {
362                uint32_t tmp = 0;
363                if (i + bigoff < BIGITS)
364                    tmp |= ret.m_mantissa[i + bigoff] << off;
365                if (off && i + bigoff + 1 < BIGITS)
366                    tmp |= ret.m_mantissa[i + bigoff + 1] >> (BIGIT_BITS - off);
367                ret.m_mantissa[i] = tmp;
368            }
369        }
370    }
371
372    return ret;
373}
374
375real real::operator *(real const &x) const
376{
377    real ret;
378
379    if (m_signexp << 1 == 0 || x.m_signexp << 1 == 0)
380    {
381        ret = (m_signexp << 1 == 0) ? *this : x;
382        ret.m_signexp ^= x.m_signexp & 0x80000000u;
383        return ret;
384    }
385
386    ret.m_signexp = (m_signexp ^ x.m_signexp) & 0x80000000u;
387    int e = (m_signexp & 0x7fffffffu) - (1 << 30) + 1
388          + (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
389
390    /* Accumulate low order product; no need to store it, we just
391     * want the carry value */
392    uint64_t carry = 0, hicarry = 0, prev;
393    for (int i = 0; i < BIGITS; i++)
394    {
395        for (int j = 0; j < i + 1; j++)
396        {
397            prev = carry;
398            carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
399                   * (uint64_t)x.m_mantissa[BIGITS - 1 + j - i];
400            if (carry < prev)
401                hicarry++;
402        }
403        carry >>= BIGIT_BITS;
404        carry |= hicarry << BIGIT_BITS;
405        hicarry >>= BIGIT_BITS;
406    }
407
408    for (int i = 0; i < BIGITS; i++)
409    {
410        for (int j = i + 1; j < BIGITS; j++)
411        {
412            prev = carry;
413            carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
414                   * (uint64_t)x.m_mantissa[j - 1 - i];
415            if (carry < prev)
416                hicarry++;
417        }
418        prev = carry;
419        carry += m_mantissa[BIGITS - 1 - i];
420        carry += x.m_mantissa[BIGITS - 1 - i];
421        if (carry < prev)
422            hicarry++;
423        ret.m_mantissa[BIGITS - 1 - i] = carry & 0xffffffffu;
424        carry >>= BIGIT_BITS;
425        carry |= hicarry << BIGIT_BITS;
426        hicarry >>= BIGIT_BITS;
427    }
428
429    /* Renormalise in case we overflowed the mantissa */
430    if (carry)
431    {
432        carry--;
433        for (int i = 0; i < BIGITS; i++)
434        {
435            uint32_t tmp = (uint32_t)ret.m_mantissa[i];
436            ret.m_mantissa[i] = ((uint32_t)carry << (BIGIT_BITS - 1))
437                              | (tmp >> 1);
438            carry = tmp & 1u;
439        }
440        e++;
441    }
442
443    ret.m_signexp |= e + (1 << 30) - 1;
444
445    return ret;
446}
447
448real real::operator /(real const &x) const
449{
450    return *this * re(x);
451}
452
453real const &real::operator +=(real const &x)
454{
455    real tmp = *this;
456    return *this = tmp + x;
457}
458
459real const &real::operator -=(real const &x)
460{
461    real tmp = *this;
462    return *this = tmp - x;
463}
464
465real const &real::operator *=(real const &x)
466{
467    real tmp = *this;
468    return *this = tmp * x;
469}
470
471real const &real::operator /=(real const &x)
472{
473    real tmp = *this;
474    return *this = tmp / x;
475}
476
477bool real::operator ==(real const &x) const
478{
479    if ((m_signexp << 1) == 0 && (x.m_signexp << 1) == 0)
480        return true;
481
482    if (m_signexp != x.m_signexp)
483        return false;
484
485    return memcmp(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t)) == 0;
486}
487
488bool real::operator !=(real const &x) const
489{
490    return !(*this == x);
491}
492
493bool real::operator <(real const &x) const
494{
495    /* Ensure both numbers are positive */
496    if (m_signexp >> 31)
497        return (x.m_signexp >> 31) ? -*this > -x : true;
498
499    if (x.m_signexp >> 31)
500        return false;
501
502    /* Compare all relevant bits */
503    if (m_signexp != x.m_signexp)
504        return m_signexp < x.m_signexp;
505
506    for (int i = 0; i < BIGITS; i++)
507        if (m_mantissa[i] != x.m_mantissa[i])
508            return m_mantissa[i] < x.m_mantissa[i];
509
510    return false;
511}
512
513bool real::operator <=(real const &x) const
514{
515    return !(*this > x);
516}
517
518bool real::operator >(real const &x) const
519{
520    /* Ensure both numbers are positive */
521    if (m_signexp >> 31)
522        return (x.m_signexp >> 31) ? -*this < -x : false;
523
524    if (x.m_signexp >> 31)
525        return true;
526
527    /* Compare all relevant bits */
528    if (m_signexp != x.m_signexp)
529        return m_signexp > x.m_signexp;
530
531    for (int i = 0; i < BIGITS; i++)
532        if (m_mantissa[i] != x.m_mantissa[i])
533            return m_mantissa[i] > x.m_mantissa[i];
534
535    return false;
536}
537
538bool real::operator >=(real const &x) const
539{
540    return !(*this < x);
541}
542
543bool real::operator !() const
544{
545    return !(bool)*this;
546}
547
548real::operator bool() const
549{
550    /* A real is "true" if it is non-zero (exponent is non-zero) AND
551     * not NaN (exponent is not full bits OR higher order mantissa is zero) */
552    uint32_t exponent = m_signexp << 1;
553    return exponent && (~exponent || m_mantissa[0] == 0);
554}
555
556real re(real const &x)
557{
558    if (!(x.m_signexp << 1))
559    {
560        real ret = x;
561        ret.m_signexp = x.m_signexp | 0x7fffffffu;
562        ret.m_mantissa[0] = 0;
563        return ret;
564    }
565
566    /* Use the system's float inversion to approximate 1/x */
567    union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
568    v.x |= x.m_mantissa[0] >> 9;
569    v.f = 1.0f / v.f;
570
571    real ret;
572    ret.m_mantissa[0] = v.x << 9;
573
574    uint32_t sign = x.m_signexp & 0x80000000u;
575    ret.m_signexp = sign;
576
577    int exponent = (x.m_signexp & 0x7fffffffu) + 1;
578    exponent = -exponent + (v.x >> 23) - (u.x >> 23);
579    ret.m_signexp |= (exponent - 1) & 0x7fffffffu;
580
581    /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
582     * convergence, but this hasn't been checked seriously. */
583    for (int i = 1; i <= real::BIGITS; i *= 2)
584        ret = ret * (real::R_2 - ret * x);
585
586    return ret;
587}
588
589real sqrt(real const &x)
590{
591    /* if zero, return x */
592    if (!(x.m_signexp << 1))
593        return x;
594
595    /* if negative, return NaN */
596    if (x.m_signexp >> 31)
597    {
598        real ret;
599        ret.m_signexp = 0x7fffffffu;
600        ret.m_mantissa[0] = 0xffffu;
601        return ret;
602    }
603
604    /* Use the system's float inversion to approximate 1/sqrt(x). First
605     * we construct a float in the [1..4[ range that has roughly the same
606     * mantissa as our real. Its exponent is 0 or 1, depending on the
607     * partity of x. The final exponent is 0, -1 or -2. We use the final
608     * exponent and final mantissa to pre-fill the result. */
609    union { float f; uint32_t x; } u = { 1.0f }, v = { 2.0f };
610    v.x -= ((x.m_signexp & 1) << 23);
611    v.x |= x.m_mantissa[0] >> 9;
612    v.f = 1.0f / sqrtf(v.f);
613
614    real ret;
615    ret.m_mantissa[0] = v.x << 9;
616
617    uint32_t sign = x.m_signexp & 0x80000000u;
618    ret.m_signexp = sign;
619
620    uint32_t exponent = (x.m_signexp & 0x7fffffffu);
621    exponent = ((1 << 30) + (1 << 29) - 1) - (exponent + 1) / 2;
622    exponent = exponent + (v.x >> 23) - (u.x >> 23);
623    ret.m_signexp |= exponent & 0x7fffffffu;
624
625    /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
626     * convergence, but this hasn't been checked seriously. */
627    for (int i = 1; i <= real::BIGITS; i *= 2)
628    {
629        ret = ret * (real::R_3 - ret * ret * x);
630        ret.m_signexp--;
631    }
632
633    return ret * x;
634}
635
636real cbrt(real const &x)
637{
638    /* if zero, return x */
639    if (!(x.m_signexp << 1))
640        return x;
641
642    /* Use the system's float inversion to approximate cbrt(x). First
643     * we construct a float in the [1..8[ range that has roughly the same
644     * mantissa as our real. Its exponent is 0, 1 or 2, depending on the
645     * value of x. The final exponent is 0 or 1 (special case). We use
646     * the final exponent and final mantissa to pre-fill the result. */
647    union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
648    v.x += ((x.m_signexp % 3) << 23);
649    v.x |= x.m_mantissa[0] >> 9;
650    v.f = powf(v.f, 0.33333333333333333f);
651
652    real ret;
653    ret.m_mantissa[0] = v.x << 9;
654
655    uint32_t sign = x.m_signexp & 0x80000000u;
656    ret.m_signexp = sign;
657
658    int exponent = (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
659    exponent = exponent / 3 + (v.x >> 23) - (u.x >> 23);
660    ret.m_signexp |= (exponent + (1 << 30) - 1) & 0x7fffffffu;
661
662    /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
663     * convergence, but this hasn't been checked seriously. */
664    for (int i = 1; i <= real::BIGITS; i *= 2)
665    {
666        static real third = re(real::R_3);
667        ret = third * (x / (ret * ret) + (ret / 2));
668    }
669
670    return ret;
671}
672
673real pow(real const &x, real const &y)
674{
675    if (!y)
676        return real::R_1;
677    if (!x)
678        return real::R_0;
679    if (x > real::R_0)
680        return exp(y * log(x));
681    else /* x < 0 */
682    {
683        /* Odd integer exponent */
684        if (y == (round(y / 2) * 2))
685            return exp(y * log(-x));
686
687        /* Even integer exponent */
688        if (y == round(y))
689            return -exp(y * log(-x));
690
691        /* FIXME: negative nth root */
692        return real::R_0;
693    }
694}
695
696static real fast_fact(int x)
697{
698    real ret = real::R_1;
699    int i = 1, multiplier = 1, exponent = 0;
700
701    for (;;)
702    {
703        if (i++ >= x)
704            /* Multiplication is a no-op if multiplier == 1 */
705            return ldexp(ret * multiplier, exponent);
706
707        int tmp = i;
708        while ((tmp & 1) == 0)
709        {
710            tmp >>= 1;
711            exponent++;
712        }
713        if (multiplier * tmp / tmp != multiplier)
714        {
715            ret *= multiplier;
716            multiplier = 1;
717        }
718        multiplier *= tmp;
719    }
720}
721
722real gamma(real const &x)
723{
724    /* We use Spouge's formula. FIXME: precision is far from acceptable,
725     * especially with large values. We need to compute this with higher
726     * precision values in order to attain the desired accuracy. It might
727     * also be useful to sort the ck values by decreasing absolute value
728     * and do the addition in this order. */
729    int a = ceilf(logf(2) / logf(2 * M_PI) * real::BIGITS * real::BIGIT_BITS);
730
731    real ret = sqrt(real::R_PI * 2);
732    real fact_k_1 = real::R_1;
733
734    for (int k = 1; k < a; k++)
735    {
736        real a_k = (real)(a - k);
737        real ck = pow(a_k, (real)((float)k - 0.5)) * exp(a_k)
738                / (fact_k_1 * (x + (real)(k - 1)));
739        ret += ck;
740        fact_k_1 *= (real)-k;
741    }
742
743    ret *= pow(x + (real)(a - 1), x - (real::R_1 / 2));
744    ret *= exp(-x - (real)(a - 1));
745
746    return ret;
747}
748
749real fabs(real const &x)
750{
751    real ret = x;
752    ret.m_signexp &= 0x7fffffffu;
753    return ret;
754}
755
756static real fast_log(real const &x)
757{
758    /* This fast log method is tuned to work on the [1..2] range and
759     * no effort whatsoever was made to improve convergence outside this
760     * domain of validity. It can converge pretty fast, provided we use
761     * the following variable substitutions:
762     *    y = sqrt(x)
763     *    z = (y - 1) / (y + 1)
764     *
765     * And the following identities:
766     *    ln(x) = 2 ln(y)
767     *          = 2 ln((1 + z) / (1 - z))
768     *          = 4 z (1 + z^2 / 3 + z^4 / 5 + z^6 / 7...)
769     *
770     * Any additional sqrt() call would halve the convergence time, but
771     * would also impact the final precision. For now we stick with one
772     * sqrt() call. */
773    real y = sqrt(x);
774    real z = (y - real::R_1) / (y + real::R_1), z2 = z * z, zn = z2;
775    real sum = real::R_1;
776
777    for (int i = 3; ; i += 2)
778    {
779        real newsum = sum + zn / (real)i;
780        if (newsum == sum)
781            break;
782        sum = newsum;
783        zn *= z2;
784    }
785
786    return z * sum * 4;
787}
788
789real log(real const &x)
790{
791    /* Strategy for log(x): if x = 2^E*M then log(x) = E log(2) + log(M),
792     * with the property that M is in [1..2[, so fast_log() applies here. */
793    real tmp = x;
794    if (x.m_signexp >> 31 || x.m_signexp == 0)
795    {
796        tmp.m_signexp = 0xffffffffu;
797        tmp.m_mantissa[0] = 0xffffffffu;
798        return tmp;
799    }
800    tmp.m_signexp = (1 << 30) - 1;
801    return (real)(int)(x.m_signexp - (1 << 30) + 1) * real::R_LN2
802           + fast_log(tmp);
803}
804
805real log2(real const &x)
806{
807    /* Strategy for log2(x): see log(x). */
808    real tmp = x;
809    if (x.m_signexp >> 31 || x.m_signexp == 0)
810    {
811        tmp.m_signexp = 0xffffffffu;
812        tmp.m_mantissa[0] = 0xffffffffu;
813        return tmp;
814    }
815    tmp.m_signexp = (1 << 30) - 1;
816    return (real)(int)(x.m_signexp - (1 << 30) + 1)
817           + fast_log(tmp) * real::R_LOG2E;
818}
819
820real log10(real const &x)
821{
822    return log(x) * real::R_LOG10E;
823}
824
825static real fast_exp_sub(real const &x, real const &y)
826{
827    /* This fast exp method is tuned to work on the [-1..1] range and
828     * no effort whatsoever was made to improve convergence outside this
829     * domain of validity. The argument y is used for cases where we
830     * don't want the leading 1 in the Taylor series. */
831    real ret = real::R_1 - y, xn = x;
832    int i = 1;
833
834    for (;;)
835    {
836        real newret = ret + xn;
837        if (newret == ret)
838            break;
839        ret = newret * ++i;
840        xn *= x;
841    }
842
843    return ret / fast_fact(i);
844}
845
846real exp(real const &x)
847{
848    /* Strategy for exp(x): the Taylor series does not converge very fast
849     * with large positive or negative values.
850     *
851     * However, we know that the result is going to be in the form M*2^E,
852     * where M is the mantissa and E the exponent. We first try to predict
853     * a value for E, which is approximately log2(exp(x)) = x / log(2).
854     *
855     * Let E0 be an integer close to x / log(2). We need to find a value x0
856     * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2).
857     *
858     * Thus the final algorithm:
859     *  int E0 = x / log(2)
860     *  real x0 = x - E0 log(2)
861     *  real x1 = exp(x0)
862     *  return x1 * 2^E0
863     */
864    int e0 = x / real::R_LN2;
865    real x0 = x - (real)e0 * real::R_LN2;
866    real x1 = fast_exp_sub(x0, real::R_0);
867    x1.m_signexp += e0;
868    return x1;
869}
870
871real exp2(real const &x)
872{
873    /* Strategy for exp2(x): see strategy in exp(). */
874    int e0 = x;
875    real x0 = x - (real)e0;
876    real x1 = fast_exp_sub(x0 * real::R_LN2, real::R_0);
877    x1.m_signexp += e0;
878    return x1;
879}
880
881real sinh(real const &x)
882{
883    /* We cannot always use (exp(x)-exp(-x))/2 because we'll lose
884     * accuracy near zero. We only use this identity for |x|>0.5. If
885     * |x|<=0.5, we compute exp(x)-1 and exp(-x)-1 instead. */
886    bool near_zero = (fabs(x) < real::R_1 / 2);
887    real x1 = near_zero ? fast_exp_sub(x, real::R_1) : exp(x);
888    real x2 = near_zero ? fast_exp_sub(-x, real::R_1) : exp(-x);
889    return (x1 - x2) / 2;
890}
891
892real tanh(real const &x)
893{
894    /* See sinh() for the strategy here */
895    bool near_zero = (fabs(x) < real::R_1 / 2);
896    real x1 = near_zero ? fast_exp_sub(x, real::R_1) : exp(x);
897    real x2 = near_zero ? fast_exp_sub(-x, real::R_1) : exp(-x);
898    real x3 = near_zero ? x1 + x2 + real::R_2 : x1 + x2;
899    return (x1 - x2) / x3;
900}
901
902real cosh(real const &x)
903{
904    /* No need to worry about accuracy here; maybe the last bit is slightly
905     * off, but that's about it. */
906    return (exp(x) + exp(-x)) / 2;
907}
908
909real frexp(real const &x, int *exp)
910{
911    if (!x)
912    {
913        *exp = 0;
914        return x;
915    }
916
917    real ret = x;
918    int exponent = (ret.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
919    *exp = exponent + 1;
920    ret.m_signexp -= exponent + 1;
921    return ret;
922}
923
924real ldexp(real const &x, int exp)
925{
926    real ret = x;
927    if (ret)
928        ret.m_signexp += exp;
929    return ret;
930}
931
932real modf(real const &x, real *iptr)
933{
934    real absx = fabs(x);
935    real tmp = floor(absx);
936
937    *iptr = copysign(tmp, x);
938    return copysign(absx - tmp, x);
939}
940
941real ulp(real const &x)
942{
943    real ret = real::R_1;
944    if (x)
945        ret.m_signexp = x.m_signexp + 1 - real::BIGITS * real::BIGIT_BITS;
946    else
947        ret.m_signexp = 0;
948    return ret;
949}
950
951real nextafter(real const &x, real const &y)
952{
953    if (x == y)
954        return x;
955    else if (x < y)
956        return x + ulp(x);
957    else
958        return x - ulp(x);
959}
960
961real copysign(real const &x, real const &y)
962{
963    real ret = x;
964    ret.m_signexp &= 0x7fffffffu;
965    ret.m_signexp |= y.m_signexp & 0x80000000u;
966    return ret;
967}
968
969real floor(real const &x)
970{
971    /* Strategy for floor(x):
972     *  - if negative, return -ceil(-x)
973     *  - if zero or negative zero, return x
974     *  - if less than one, return zero
975     *  - otherwise, if e is the exponent, clear all bits except the
976     *    first e. */
977    if (x < -real::R_0)
978        return -ceil(-x);
979    if (!x)
980        return x;
981    if (x < real::R_1)
982        return real::R_0;
983
984    real ret = x;
985    int exponent = x.m_signexp - (1 << 30) + 1;
986
987    for (int i = 0; i < real::BIGITS; i++)
988    {
989        if (exponent <= 0)
990            ret.m_mantissa[i] = 0;
991        else if (exponent < real::BIGIT_BITS)
992            ret.m_mantissa[i] &= ~((1 << (real::BIGIT_BITS - exponent)) - 1);
993
994        exponent -= real::BIGIT_BITS;
995    }
996
997    return ret;
998}
999
1000real ceil(real const &x)
1001{
1002    /* Strategy for ceil(x):
1003     *  - if negative, return -floor(-x)
1004     *  - if x == floor(x), return x
1005     *  - otherwise, return floor(x) + 1 */
1006    if (x < -real::R_0)
1007        return -floor(-x);
1008    real ret = floor(x);
1009    if (x == ret)
1010        return ret;
1011    else
1012        return ret + real::R_1;
1013}
1014
1015real round(real const &x)
1016{
1017    if (x < real::R_0)
1018        return -round(-x);
1019
1020    return floor(x + (real::R_1 / 2));
1021}
1022
1023real fmod(real const &x, real const &y)
1024{
1025    if (!y)
1026        return real::R_0; /* FIXME: return NaN */
1027
1028    if (!x)
1029        return x;
1030
1031    real tmp = round(x / y);
1032    return x - tmp * y;
1033}
1034
1035real sin(real const &x)
1036{
1037    int switch_sign = x.m_signexp & 0x80000000u;
1038
1039    real absx = fmod(fabs(x), real::R_PI * 2);
1040    if (absx > real::R_PI)
1041    {
1042        absx -= real::R_PI;
1043        switch_sign = !switch_sign;
1044    }
1045
1046    if (absx > real::R_PI_2)
1047        absx = real::R_PI - absx;
1048
1049    real ret = real::R_0, fact = real::R_1, xn = absx, mx2 = -absx * absx;
1050    int i = 1;
1051    for (;;)
1052    {
1053        real newret = ret + xn;
1054        if (newret == ret)
1055            break;
1056        ret = newret * ((i + 1) * (i + 2));
1057        xn *= mx2;
1058        i += 2;
1059    }
1060    ret /= fast_fact(i);
1061
1062    /* Propagate sign */
1063    if (switch_sign)
1064        ret.m_signexp ^= 0x80000000u;
1065    return ret;
1066}
1067
1068real cos(real const &x)
1069{
1070    return sin(real::R_PI_2 - x);
1071}
1072
1073real tan(real const &x)
1074{
1075    /* Constrain input to [-π,π] */
1076    real y = fmod(x, real::R_PI);
1077
1078    /* Constrain input to [-π/2,π/2] */
1079    if (y < -real::R_PI_2)
1080        y += real::R_PI;
1081    else if (y > real::R_PI_2)
1082        y -= real::R_PI;
1083
1084    /* In [-π/4,π/4] return sin/cos */
1085    if (fabs(y) <= real::R_PI_4)
1086        return sin(y) / cos(y);
1087
1088    /* Otherwise, return cos/sin */
1089    if (y > real::R_0)
1090        y = real::R_PI_2 - y;
1091    else
1092        y = -real::R_PI_2 - y;
1093
1094    return cos(y) / sin(y);
1095}
1096
1097static inline real asinacos(real const &x, int is_asin, int is_negative)
1098{
1099    /* Strategy for asin(): in [-0.5..0.5], use a Taylor series around
1100     * zero. In [0.5..1], use asin(x) = π/2 - 2*asin(sqrt((1-x)/2)), and
1101     * in [-1..-0.5] just revert the sign.
1102     * Strategy for acos(): use acos(x) = π/2 - asin(x) and try not to
1103     * lose the precision around x=1. */
1104    real absx = fabs(x);
1105    int around_zero = (absx < (real::R_1 / 2));
1106
1107    if (!around_zero)
1108        absx = sqrt((real::R_1 - absx) / 2);
1109
1110    real ret = absx, xn = absx, x2 = absx * absx, fact1 = 2, fact2 = 1;
1111    for (int i = 1; ; i++)
1112    {
1113        xn *= x2;
1114        real mul = (real)(2 * i + 1);
1115        real newret = ret + ldexp(fact1 * xn / (mul * fact2), -2 * i);
1116        if (newret == ret)
1117            break;
1118        ret = newret;
1119        fact1 *= (real)((2 * i + 1) * (2 * i + 2));
1120        fact2 *= (real)((i + 1) * (i + 1));
1121    }
1122
1123    if (is_negative)
1124        ret = -ret;
1125
1126    if (around_zero)
1127        ret = is_asin ? ret : real::R_PI_2 - ret;
1128    else
1129    {
1130        real adjust = is_negative ? real::R_PI : real::R_0;
1131        if (is_asin)
1132            ret = real::R_PI_2 - adjust - ret * 2;
1133        else
1134            ret = adjust + ret * 2;
1135    }
1136
1137    return ret;
1138}
1139
1140real asin(real const &x)
1141{
1142    return asinacos(x, 1, x.m_signexp >> 31);
1143}
1144
1145real acos(real const &x)
1146{
1147    return asinacos(x, 0, x.m_signexp >> 31);
1148}
1149
1150real atan(real const &x)
1151{
1152    /* Computing atan(x): we choose a different Taylor series depending on
1153     * the value of x to help with convergence.
1154     *
1155     * If |x| < 0.5 we evaluate atan(y) near 0:
1156     *  atan(y) = y - y^3/3 + y^5/5 - y^7/7 + y^9/9 ...
1157     *
1158     * If 0.5 <= |x| < 1.5 we evaluate atan(1+y) near 0:
1159     *  atan(1+y) = π/4 + y/(1*2^1) - y^2/(2*2^1) + y^3/(3*2^2)
1160     *                  - y^5/(5*2^3) + y^6/(6*2^3) - y^7/(7*2^4)
1161     *                  + y^9/(9*2^5) - y^10/(10*2^5) + y^11/(11*2^6) ...
1162     *
1163     * If 1.5 <= |x| < 2 we evaluate atan(sqrt(3)+2y) near 0:
1164     *  atan(sqrt(3)+2y) = π/3 + 1/2 y - sqrt(3)/2 y^2/2
1165     *                         + y^3/3 - sqrt(3)/2 y^4/4 + 1/2 y^5/5
1166     *                         - 1/2 y^7/7 + sqrt(3)/2 y^8/8
1167     *                         - y^9/9 + sqrt(3)/2 y^10/10 - 1/2 y^11/11
1168     *                         + 1/2 y^13/13 - sqrt(3)/2 y^14/14
1169     *                         + y^15/15 - sqrt(3)/2 y^16/16 + 1/2 y^17/17 ...
1170     *
1171     * If |x| >= 2 we evaluate atan(y) near +∞:
1172     *  atan(y) = π/2 - y^-1 + y^-3/3 - y^-5/5 + y^-7/7 - y^-9/9 ...
1173     */
1174    real absx = fabs(x);
1175
1176    if (absx < (real::R_1 / 2))
1177    {
1178        real ret = x, xn = x, mx2 = -x * x;
1179        for (int i = 3; ; i += 2)
1180        {
1181            xn *= mx2;
1182            real newret = ret + xn / (real)i;
1183            if (newret == ret)
1184                break;
1185            ret = newret;
1186        }
1187        return ret;
1188    }
1189
1190    real ret = 0;
1191
1192    if (absx < (real::R_3 / 2))
1193    {
1194        real y = real::R_1 - absx;
1195        real yn = y, my2 = -y * y;
1196        for (int i = 0; ; i += 2)
1197        {
1198            real newret = ret + ldexp(yn / (real)(2 * i + 1), -i - 1);
1199            yn *= y;
1200            newret += ldexp(yn / (real)(2 * i + 2), -i - 1);
1201            yn *= y;
1202            newret += ldexp(yn / (real)(2 * i + 3), -i - 2);
1203            if (newret == ret)
1204                break;
1205            ret = newret;
1206            yn *= my2;
1207        }
1208        ret = real::R_PI_4 - ret;
1209    }
1210    else if (absx < real::R_2)
1211    {
1212        real y = (absx - real::R_SQRT3) / 2;
1213        real yn = y, my2 = -y * y;
1214        for (int i = 1; ; i += 6)
1215        {
1216            real newret = ret + ((yn / (real)i) / 2);
1217            yn *= y;
1218            newret -= (real::R_SQRT3 / 2) * yn / (real)(i + 1);
1219            yn *= y;
1220            newret += yn / (real)(i + 2);
1221            yn *= y;
1222            newret -= (real::R_SQRT3 / 2) * yn / (real)(i + 3);
1223            yn *= y;
1224            newret += (yn / (real)(i + 4)) / 2;
1225            if (newret == ret)
1226                break;
1227            ret = newret;
1228            yn *= my2;
1229        }
1230        ret = real::R_PI_3 + ret;
1231    }
1232    else
1233    {
1234        real y = re(absx);
1235        real yn = y, my2 = -y * y;
1236        ret = y;
1237        for (int i = 3; ; i += 2)
1238        {
1239            yn *= my2;
1240            real newret = ret + yn / (real)i;
1241            if (newret == ret)
1242                break;
1243            ret = newret;
1244        }
1245        ret = real::R_PI_2 - ret;
1246    }
1247
1248    /* Propagate sign */
1249    ret.m_signexp |= (x.m_signexp & 0x80000000u);
1250    return ret;
1251}
1252
1253real atan2(real const &y, real const &x)
1254{
1255    if (!y)
1256    {
1257        if ((x.m_signexp >> 31) == 0)
1258            return y;
1259        if (y.m_signexp >> 31)
1260            return -real::R_PI;
1261        return real::R_PI;
1262    }
1263
1264    if (!x)
1265    {
1266        if (y.m_signexp >> 31)
1267            return -real::R_PI;
1268        return real::R_PI;
1269    }
1270
1271    /* FIXME: handle the Inf and NaN cases */
1272    real z = y / x;
1273    real ret = atan(z);
1274    if (x < real::R_0)
1275        ret += (y > real::R_0) ? real::R_PI : -real::R_PI;
1276    return ret;
1277}
1278
1279void real::hexprint() const
1280{
1281    printf("%08x", m_signexp);
1282    for (int i = 0; i < BIGITS; i++)
1283        printf(" %08x", m_mantissa[i]);
1284    printf("\n");
1285}
1286
1287void real::print(int ndigits) const
1288{
1289    real x = *this;
1290
1291    if (x.m_signexp >> 31)
1292    {
1293        printf("-");
1294        x = -x;
1295    }
1296
1297    if (!x)
1298    {
1299        printf("0.0\n");
1300        return;
1301    }
1302
1303    /* Normalise x so that mantissa is in [1..9.999] */
1304    /* FIXME: better use int64_t when the cast is implemented */
1305    int exponent = ceil(log10(x));
1306    x /= pow(R_10, (real)exponent);
1307
1308    if (x < R_1)
1309    {
1310        x *= R_10;
1311        exponent--;
1312    }
1313
1314    /* Print digits */
1315    for (int i = 0; i < ndigits; i++)
1316    {
1317        int digit = (int)floor(x);
1318        printf("%i", digit);
1319        if (i == 0)
1320            printf(".");
1321        x -= real(digit);
1322        x *= R_10;
1323    }
1324
1325    /* Print exponent information */
1326    if (exponent < 0)
1327        printf("e-%i", -exponent);
1328    else if (exponent > 0)
1329        printf("e+%i", exponent);
1330
1331    printf("\n");
1332}
1333
1334static real fast_pi()
1335{
1336    /* Approximate Pi using Machin's formula: 16*atan(1/5)-4*atan(1/239) */
1337    real ret = 0, x0 = 5, x1 = 239;
1338    real const m0 = -x0 * x0, m1 = -x1 * x1, r16 = 16, r4 = 4;
1339
1340    for (int i = 1; ; i += 2)
1341    {
1342        real newret = ret + r16 / (x0 * (real)i) - r4 / (x1 * (real)i);
1343        if (newret == ret)
1344            break;
1345        ret = newret;
1346        x0 *= m0;
1347        x1 *= m1;
1348    }
1349
1350    return ret;
1351}
1352
1353real const real::R_0        = (real)0.0;
1354real const real::R_1        = (real)1.0;
1355real const real::R_2        = (real)2.0;
1356real const real::R_3        = (real)3.0;
1357real const real::R_10       = (real)10.0;
1358
1359/*
1360 * Initialisation order is important here:
1361 *  - fast_log() requires R_1
1362 *  - log() requires R_LN2
1363 *  - re() require R_2
1364 *  - exp() requires R_0, R_1, R_LN2
1365 *  - sqrt() requires R_3
1366 */
1367real const real::R_LN2      = fast_log(R_2);
1368real const real::R_LN10     = log(R_10);
1369real const real::R_LOG2E    = re(R_LN2);
1370real const real::R_LOG10E   = re(R_LN10);
1371real const real::R_E        = exp(R_1);
1372real const real::R_PI       = fast_pi();
1373real const real::R_PI_2     = R_PI / 2;
1374real const real::R_PI_3     = R_PI / R_3;
1375real const real::R_PI_4     = R_PI / 4;
1376real const real::R_1_PI     = re(R_PI);
1377real const real::R_2_PI     = R_1_PI * 2;
1378real const real::R_2_SQRTPI = re(sqrt(R_PI)) * 2;
1379real const real::R_SQRT2    = sqrt(R_2);
1380real const real::R_SQRT3    = sqrt(R_3);
1381real const real::R_SQRT1_2  = R_SQRT2 / 2;
1382
1383} /* namespace lol */
1384
Note: See TracBrowser for help on using the repository browser.