source: trunk/src/real.cpp @ 1116

Last change on this file since 1116 was 1116, checked in by sam, 9 years ago

core: fix the sign of a negative real number raised to an even power, and
add the corresponding unit test.

File size: 33.1 KB
Line 
1//
2// Lol Engine
3//
4// Copyright: (c) 2010-2011 Sam Hocevar <sam@hocevar.net>
5//   This program is free software; you can redistribute it and/or
6//   modify it under the terms of the Do What The Fuck You Want To
7//   Public License, Version 2, as published by Sam Hocevar. See
8//   http://sam.zoy.org/projects/COPYING.WTFPL for more details.
9//
10
11#if defined HAVE_CONFIG_H
12#   include "config.h"
13#endif
14
15#include <cstring>
16#include <cstdio>
17
18#include "core.h"
19
20using namespace std;
21
22namespace lol
23{
24
25real::real()
26{
27    m_mantissa = new uint32_t[BIGITS];
28    m_signexp = 0;
29}
30
31real::real(real const &x)
32{
33    m_mantissa = new uint32_t[BIGITS];
34    memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
35    m_signexp = x.m_signexp;
36}
37
38real const &real::operator =(real const &x)
39{
40    if (&x != this)
41    {
42        memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
43        m_signexp = x.m_signexp;
44    }
45
46    return *this;
47}
48
49real::~real()
50{
51    delete[] m_mantissa;
52}
53
54real::real(float f) { new(this) real((double)f); }
55real::real(int i) { new(this) real((double)i); }
56real::real(unsigned int i) { new(this) real((double)i); }
57
58real::real(double d)
59{
60    new(this) real();
61
62    union { double d; uint64_t x; } u = { d };
63
64    uint32_t sign = (u.x >> 63) << 31;
65    uint32_t exponent = (u.x << 1) >> 53;
66
67    switch (exponent)
68    {
69    case 0x00:
70        m_signexp = sign;
71        break;
72    case 0x7ff:
73        m_signexp = sign | 0x7fffffffu;
74        break;
75    default:
76        m_signexp = sign | (exponent + (1 << 30) - (1 << 10));
77        break;
78    }
79
80    m_mantissa[0] = u.x >> 20;
81    m_mantissa[1] = u.x << 12;
82    memset(m_mantissa + 2, 0, (BIGITS - 2) * sizeof(m_mantissa[0]));
83}
84
85real::operator float() const { return (float)(double)(*this); }
86real::operator int() const { return (int)(double)(*this); }
87real::operator unsigned int() const { return (unsigned int)(double)(*this); }
88
89real::operator double() const
90{
91    union { double d; uint64_t x; } u;
92
93    /* Get sign */
94    u.x = m_signexp >> 31;
95    u.x <<= 11;
96
97    /* Compute new exponent */
98    uint32_t exponent = (m_signexp << 1) >> 1;
99    int e = (int)exponent - (1 << 30) + (1 << 10);
100
101    if (e < 0)
102        u.x <<= 52;
103    else if (e >= 0x7ff)
104    {
105        u.x |= 0x7ff;
106        u.x <<= 52;
107    }
108    else
109    {
110        u.x |= e;
111
112        /* Store mantissa if necessary */
113        u.x <<= 32;
114        u.x |= m_mantissa[0];
115        u.x <<= 20;
116        u.x |= m_mantissa[1] >> 12;
117        /* Rounding */
118        u.x += (m_mantissa[1] >> 11) & 1;
119    }
120
121    return u.d;
122}
123
124/*
125 * Create a real number from an ASCII representation
126 */
127real::real(char const *str)
128{
129    real ret = 0;
130    int exponent = 0;
131    bool comma = false, nonzero = false, negative = false, finished = false;
132
133    for (char const *p = str; *p && !finished; p++)
134    {
135        switch (*p)
136        {
137        case '-':
138        case '+':
139            if (p != str)
140                break;
141            negative = (*p == '-');
142            break;
143        case '.':
144            if (comma)
145                finished = true;
146            comma = true;
147            break;
148        case '0': case '1': case '2': case '3': case '4':
149        case '5': case '6': case '7': case '8': case '9':
150            if (nonzero)
151            {
152                real x = ret + ret;
153                x = x + x + ret;
154                ret = x + x;
155            }
156            if (*p != '0')
157            {
158                ret += (int)(*p - '0');
159                nonzero = true;
160            }
161            if (comma)
162                exponent--;
163            break;
164        case 'e':
165        case 'E':
166            exponent += atoi(p + 1);
167            finished = true;
168            break;
169        default:
170            finished = true;
171            break;
172        }
173    }
174
175    if (exponent)
176        ret *= pow(R_10, (real)exponent);
177
178    if (negative)
179        ret = -ret;
180
181    new(this) real(ret);
182}
183
184real real::operator +() const
185{
186    return *this;
187}
188
189real real::operator -() const
190{
191    real ret = *this;
192    ret.m_signexp ^= 0x80000000u;
193    return ret;
194}
195
196real real::operator +(real const &x) const
197{
198    if (x.m_signexp << 1 == 0)
199        return *this;
200
201    /* Ensure both arguments are positive. Otherwise, switch signs,
202     * or replace + with -. */
203    if (m_signexp >> 31)
204        return -(-*this + -x);
205
206    if (x.m_signexp >> 31)
207        return *this - (-x);
208
209    /* Ensure *this has the larger exponent (no need for the mantissa to
210     * be larger, as in subtraction). Otherwise, switch. */
211    if ((m_signexp << 1) < (x.m_signexp << 1))
212        return x + *this;
213
214    real ret;
215
216    int e1 = m_signexp - (1 << 30) + 1;
217    int e2 = x.m_signexp - (1 << 30) + 1;
218
219    int bigoff = (e1 - e2) / BIGIT_BITS;
220    int off = e1 - e2 - bigoff * BIGIT_BITS;
221
222    if (bigoff > BIGITS)
223        return *this;
224
225    ret.m_signexp = m_signexp;
226
227    uint64_t carry = 0;
228    for (int i = BIGITS; i--; )
229    {
230        carry += m_mantissa[i];
231        if (i - bigoff >= 0)
232            carry += x.m_mantissa[i - bigoff] >> off;
233
234        if (off && i - bigoff > 0)
235            carry += (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
236        else if (i - bigoff == 0)
237            carry += (uint64_t)1 << (BIGIT_BITS - off);
238
239        ret.m_mantissa[i] = carry;
240        carry >>= BIGIT_BITS;
241    }
242
243    /* Renormalise in case we overflowed the mantissa */
244    if (carry)
245    {
246        carry--;
247        for (int i = 0; i < BIGITS; i++)
248        {
249            uint32_t tmp = ret.m_mantissa[i];
250            ret.m_mantissa[i] = (carry << (BIGIT_BITS - 1)) | (tmp >> 1);
251            carry = tmp & 1u;
252        }
253        ret.m_signexp++;
254    }
255
256    return ret;
257}
258
259real real::operator -(real const &x) const
260{
261    if (x.m_signexp << 1 == 0)
262        return *this;
263
264    /* Ensure both arguments are positive. Otherwise, switch signs,
265     * or replace - with +. */
266    if (m_signexp >> 31)
267        return -(-*this + x);
268
269    if (x.m_signexp >> 31)
270        return (*this) + (-x);
271
272    /* Ensure *this is larger than x */
273    if (*this < x)
274        return -(x - *this);
275
276    real ret;
277
278    int e1 = m_signexp - (1 << 30) + 1;
279    int e2 = x.m_signexp - (1 << 30) + 1;
280
281    int bigoff = (e1 - e2) / BIGIT_BITS;
282    int off = e1 - e2 - bigoff * BIGIT_BITS;
283
284    if (bigoff > BIGITS)
285        return *this;
286
287    ret.m_signexp = m_signexp;
288
289    int64_t carry = 0;
290    for (int i = 0; i < bigoff; i++)
291    {
292        carry -= x.m_mantissa[BIGITS - 1 - i];
293        /* Emulates a signed shift */
294        carry >>= BIGIT_BITS;
295        carry |= carry << BIGIT_BITS;
296    }
297    if (bigoff < BIGITS)
298        carry -= x.m_mantissa[BIGITS - 1 - bigoff] & (((int64_t)1 << off) - 1);
299    carry /= (int64_t)1 << off;
300
301    for (int i = BIGITS; i--; )
302    {
303        carry += m_mantissa[i];
304        if (i - bigoff >= 0)
305            carry -= x.m_mantissa[i - bigoff] >> off;
306
307        if (off && i - bigoff > 0)
308            carry -= (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
309        else if (i - bigoff == 0)
310            carry -= (uint64_t)1 << (BIGIT_BITS - off);
311
312        ret.m_mantissa[i] = carry;
313        carry >>= BIGIT_BITS;
314        carry |= carry << BIGIT_BITS;
315    }
316
317    carry += 1;
318
319    /* Renormalise if we underflowed the mantissa */
320    if (carry == 0)
321    {
322        /* How much do we need to shift the mantissa? FIXME: this could
323         * be computed above */
324        off = 0;
325        for (int i = 0; i < BIGITS; i++)
326        {
327            if (!ret.m_mantissa[i])
328            {
329                off += BIGIT_BITS;
330                continue;
331            }
332
333            for (uint32_t tmp = ret.m_mantissa[i]; tmp < 0x80000000u; tmp <<= 1)
334                off++;
335            break;
336        }
337        if (off == BIGITS * BIGIT_BITS)
338            ret.m_signexp &= 0x80000000u;
339        else
340        {
341            off++; /* Shift one more to get rid of the leading one */
342            ret.m_signexp -= off;
343
344            bigoff = off / BIGIT_BITS;
345            off -= bigoff * BIGIT_BITS;
346
347            for (int i = 0; i < BIGITS; i++)
348            {
349                uint32_t tmp = 0;
350                if (i + bigoff < BIGITS)
351                    tmp |= ret.m_mantissa[i + bigoff] << off;
352                if (off && i + bigoff + 1 < BIGITS)
353                    tmp |= ret.m_mantissa[i + bigoff + 1] >> (BIGIT_BITS - off);
354                ret.m_mantissa[i] = tmp;
355            }
356        }
357    }
358
359    return ret;
360}
361
362real real::operator *(real const &x) const
363{
364    real ret;
365
366    if (m_signexp << 1 == 0 || x.m_signexp << 1 == 0)
367    {
368        ret = (m_signexp << 1 == 0) ? *this : x;
369        ret.m_signexp ^= x.m_signexp & 0x80000000u;
370        return ret;
371    }
372
373    ret.m_signexp = (m_signexp ^ x.m_signexp) & 0x80000000u;
374    int e = (m_signexp & 0x7fffffffu) - (1 << 30) + 1
375          + (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
376
377    /* Accumulate low order product; no need to store it, we just
378     * want the carry value */
379    uint64_t carry = 0, hicarry = 0, prev;
380    for (int i = 0; i < BIGITS; i++)
381    {
382        for (int j = 0; j < i + 1; j++)
383        {
384            prev = carry;
385            carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
386                   * (uint64_t)x.m_mantissa[BIGITS - 1 + j - i];
387            if (carry < prev)
388                hicarry++;
389        }
390        carry >>= BIGIT_BITS;
391        carry |= hicarry << BIGIT_BITS;
392        hicarry >>= BIGIT_BITS;
393    }
394
395    for (int i = 0; i < BIGITS; i++)
396    {
397        for (int j = i + 1; j < BIGITS; j++)
398        {
399            prev = carry;
400            carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
401                   * (uint64_t)x.m_mantissa[j - 1 - i];
402            if (carry < prev)
403                hicarry++;
404        }
405        prev = carry;
406        carry += m_mantissa[BIGITS - 1 - i];
407        carry += x.m_mantissa[BIGITS - 1 - i];
408        if (carry < prev)
409            hicarry++;
410        ret.m_mantissa[BIGITS - 1 - i] = carry & 0xffffffffu;
411        carry >>= BIGIT_BITS;
412        carry |= hicarry << BIGIT_BITS;
413        hicarry >>= BIGIT_BITS;
414    }
415
416    /* Renormalise in case we overflowed the mantissa */
417    if (carry)
418    {
419        carry--;
420        for (int i = 0; i < BIGITS; i++)
421        {
422            uint32_t tmp = ret.m_mantissa[i];
423            ret.m_mantissa[i] = (carry << (BIGIT_BITS - 1)) | (tmp >> 1);
424            carry = tmp & 1u;
425        }
426        e++;
427    }
428
429    ret.m_signexp |= e + (1 << 30) - 1;
430
431    return ret;
432}
433
434real real::operator /(real const &x) const
435{
436    return *this * re(x);
437}
438
439real const &real::operator +=(real const &x)
440{
441    real tmp = *this;
442    return *this = tmp + x;
443}
444
445real const &real::operator -=(real const &x)
446{
447    real tmp = *this;
448    return *this = tmp - x;
449}
450
451real const &real::operator *=(real const &x)
452{
453    real tmp = *this;
454    return *this = tmp * x;
455}
456
457real const &real::operator /=(real const &x)
458{
459    real tmp = *this;
460    return *this = tmp / x;
461}
462
463real real::operator <<(int x) const
464{
465    real tmp = *this;
466    return tmp <<= x;
467}
468
469real real::operator >>(int x) const
470{
471    real tmp = *this;
472    return tmp >>= x;
473}
474
475real const &real::operator <<=(int x)
476{
477    if (m_signexp << 1)
478        m_signexp += x;
479    return *this;
480}
481
482real const &real::operator >>=(int x)
483{
484    if (m_signexp << 1)
485        m_signexp -= x;
486    return *this;
487}
488
489bool real::operator ==(real const &x) const
490{
491    if ((m_signexp << 1) == 0 && (x.m_signexp << 1) == 0)
492        return true;
493
494    if (m_signexp != x.m_signexp)
495        return false;
496
497    return memcmp(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t)) == 0;
498}
499
500bool real::operator !=(real const &x) const
501{
502    return !(*this == x);
503}
504
505bool real::operator <(real const &x) const
506{
507    /* Ensure both numbers are positive */
508    if (m_signexp >> 31)
509        return (x.m_signexp >> 31) ? -*this > -x : true;
510
511    if (x.m_signexp >> 31)
512        return false;
513
514    /* Compare all relevant bits */
515    if (m_signexp != x.m_signexp)
516        return m_signexp < x.m_signexp;
517
518    for (int i = 0; i < BIGITS; i++)
519        if (m_mantissa[i] != x.m_mantissa[i])
520            return m_mantissa[i] < x.m_mantissa[i];
521
522    return false;
523}
524
525bool real::operator <=(real const &x) const
526{
527    return !(*this > x);
528}
529
530bool real::operator >(real const &x) const
531{
532    /* Ensure both numbers are positive */
533    if (m_signexp >> 31)
534        return (x.m_signexp >> 31) ? -*this < -x : false;
535
536    if (x.m_signexp >> 31)
537        return true;
538
539    /* Compare all relevant bits */
540    if (m_signexp != x.m_signexp)
541        return m_signexp > x.m_signexp;
542
543    for (int i = 0; i < BIGITS; i++)
544        if (m_mantissa[i] != x.m_mantissa[i])
545            return m_mantissa[i] > x.m_mantissa[i];
546
547    return false;
548}
549
550bool real::operator >=(real const &x) const
551{
552    return !(*this < x);
553}
554
555bool real::operator !() const
556{
557    return !(bool)*this;
558}
559
560real::operator bool() const
561{
562    /* A real is "true" if it is non-zero (exponent is non-zero) AND
563     * not NaN (exponent is not full bits OR higher order mantissa is zero) */
564    uint32_t exponent = m_signexp << 1;
565    return exponent && (~exponent || m_mantissa[0] == 0);
566}
567
568real re(real const &x)
569{
570    if (!(x.m_signexp << 1))
571    {
572        real ret = x;
573        ret.m_signexp = x.m_signexp | 0x7fffffffu;
574        ret.m_mantissa[0] = 0;
575        return ret;
576    }
577
578    /* Use the system's float inversion to approximate 1/x */
579    union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
580    v.x |= x.m_mantissa[0] >> 9;
581    v.f = 1.0 / v.f;
582
583    real ret;
584    ret.m_mantissa[0] = v.x << 9;
585
586    uint32_t sign = x.m_signexp & 0x80000000u;
587    ret.m_signexp = sign;
588
589    int exponent = (x.m_signexp & 0x7fffffffu) + 1;
590    exponent = -exponent + (v.x >> 23) - (u.x >> 23);
591    ret.m_signexp |= (exponent - 1) & 0x7fffffffu;
592
593    /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
594     * convergence, but this hasn't been checked seriously. */
595    for (int i = 1; i <= real::BIGITS; i *= 2)
596        ret = ret * (real::R_2 - ret * x);
597
598    return ret;
599}
600
601real sqrt(real const &x)
602{
603    /* if zero, return x */
604    if (!(x.m_signexp << 1))
605        return x;
606
607    /* if negative, return NaN */
608    if (x.m_signexp >> 31)
609    {
610        real ret;
611        ret.m_signexp = 0x7fffffffu;
612        ret.m_mantissa[0] = 0xffffu;
613        return ret;
614    }
615
616    /* Use the system's float inversion to approximate 1/sqrt(x). First
617     * we construct a float in the [1..4[ range that has roughly the same
618     * mantissa as our real. Its exponent is 0 or 1, depending on the
619     * partity of x. The final exponent is 0, -1 or -2. We use the final
620     * exponent and final mantissa to pre-fill the result. */
621    union { float f; uint32_t x; } u = { 1.0f }, v = { 2.0f };
622    v.x -= ((x.m_signexp & 1) << 23);
623    v.x |= x.m_mantissa[0] >> 9;
624    v.f = 1.0 / sqrtf(v.f);
625
626    real ret;
627    ret.m_mantissa[0] = v.x << 9;
628
629    uint32_t sign = x.m_signexp & 0x80000000u;
630    ret.m_signexp = sign;
631
632    uint32_t exponent = (x.m_signexp & 0x7fffffffu);
633    exponent = ((1 << 30) + (1 << 29) - 1) - (exponent + 1) / 2;
634    exponent = exponent + (v.x >> 23) - (u.x >> 23);
635    ret.m_signexp |= exponent & 0x7fffffffu;
636
637    /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
638     * convergence, but this hasn't been checked seriously. */
639    for (int i = 1; i <= real::BIGITS; i *= 2)
640    {
641        ret = ret * (real::R_3 - ret * ret * x);
642        ret.m_signexp--;
643    }
644
645    return ret * x;
646}
647
648real cbrt(real const &x)
649{
650    /* if zero, return x */
651    if (!(x.m_signexp << 1))
652        return x;
653
654    /* Use the system's float inversion to approximate cbrt(x). First
655     * we construct a float in the [1..8[ range that has roughly the same
656     * mantissa as our real. Its exponent is 0, 1 or 2, depending on the
657     * value of x. The final exponent is 0 or 1 (special case). We use
658     * the final exponent and final mantissa to pre-fill the result. */
659    union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
660    v.x += ((x.m_signexp % 3) << 23);
661    v.x |= x.m_mantissa[0] >> 9;
662    v.f = powf(v.f, 0.33333333333333333f);
663
664    real ret;
665    ret.m_mantissa[0] = v.x << 9;
666
667    uint32_t sign = x.m_signexp & 0x80000000u;
668    ret.m_signexp = sign;
669
670    int exponent = (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
671    exponent = exponent / 3 + (v.x >> 23) - (u.x >> 23);
672    ret.m_signexp |= (exponent + (1 << 30) - 1) & 0x7fffffffu;
673
674    /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
675     * convergence, but this hasn't been checked seriously. */
676    for (int i = 1; i <= real::BIGITS; i *= 2)
677    {
678        static real third = re(real::R_3);
679        ret = third * (x / (ret * ret) + (ret << 1));
680    }
681
682    return ret;
683}
684
685real pow(real const &x, real const &y)
686{
687    if (!y)
688        return real::R_1;
689    if (!x)
690        return real::R_0;
691    if (x > real::R_0)
692        return exp(y * log(x));
693    else /* x < 0 */
694    {
695        /* Odd integer exponent */
696        if (y == (round(y >> 1) << 1))
697            return exp(y * log(-x));
698
699        /* Even integer exponent */
700        if (y == round(y))
701            return -exp(y * log(-x));
702
703        /* FIXME: negative nth root */
704        return real::R_0;
705    }
706}
707
708real gamma(real const &x)
709{
710    /* We use Spouge's formula. FIXME: precision is far from acceptable,
711     * especially with large values. We need to compute this with higher
712     * precision values in order to attain the desired accuracy. It might
713     * also be useful to sort the ck values by decreasing absolute value
714     * and do the addition in this order. */
715    int a = ceilf(logf(2) / logf(2 * M_PI) * real::BIGITS * real::BIGIT_BITS);
716
717    real ret = sqrt(real::R_PI << 1);
718    real fact_k_1 = real::R_1;
719
720    for (int k = 1; k < a; k++)
721    {
722        real a_k = (real)(a - k);
723        real ck = pow(a_k, (real)((float)k - 0.5)) * exp(a_k)
724                / (fact_k_1 * (x + (real)(k - 1)));
725        ret += ck;
726        fact_k_1 *= (real)-k;
727    }
728
729    ret *= pow(x + (real)(a - 1), x - (real::R_1 >> 1));
730    ret *= exp(-x - (real)(a - 1));
731
732    return ret;
733}
734
735real fabs(real const &x)
736{
737    real ret = x;
738    ret.m_signexp &= 0x7fffffffu;
739    return ret;
740}
741
742static real fast_log(real const &x)
743{
744    /* This fast log method is tuned to work on the [1..2] range and
745     * no effort whatsoever was made to improve convergence outside this
746     * domain of validity. It can converge pretty fast, provided we use
747     * the following variable substitutions:
748     *    y = sqrt(x)
749     *    z = (y - 1) / (y + 1)
750     *
751     * And the following identities:
752     *    ln(x) = 2 ln(y)
753     *          = 2 ln((1 + z) / (1 - z))
754     *          = 4 z (1 + z^2 / 3 + z^4 / 5 + z^6 / 7...)
755     *
756     * Any additional sqrt() call would halve the convergence time, but
757     * would also impact the final precision. For now we stick with one
758     * sqrt() call. */
759    real y = sqrt(x);
760    real z = (y - real::R_1) / (y + real::R_1), z2 = z * z, zn = z2;
761    real sum = real::R_1;
762
763    for (int i = 3; ; i += 2)
764    {
765        real newsum = sum + zn / (real)i;
766        if (newsum == sum)
767            break;
768        sum = newsum;
769        zn *= z2;
770    }
771
772    return z * (sum << 2);
773}
774
775real log(real const &x)
776{
777    /* Strategy for log(x): if x = 2^E*M then log(x) = E log(2) + log(M),
778     * with the property that M is in [1..2[, so fast_log() applies here. */
779    real tmp = x;
780    if (x.m_signexp >> 31 || x.m_signexp == 0)
781    {
782        tmp.m_signexp = 0xffffffffu;
783        tmp.m_mantissa[0] = 0xffffffffu;
784        return tmp;
785    }
786    tmp.m_signexp = (1 << 30) - 1;
787    return (real)(int)(x.m_signexp - (1 << 30) + 1) * real::R_LN2
788           + fast_log(tmp);
789}
790
791real log2(real const &x)
792{
793    /* Strategy for log2(x): see log(x). */
794    real tmp = x;
795    if (x.m_signexp >> 31 || x.m_signexp == 0)
796    {
797        tmp.m_signexp = 0xffffffffu;
798        tmp.m_mantissa[0] = 0xffffffffu;
799        return tmp;
800    }
801    tmp.m_signexp = (1 << 30) - 1;
802    return (real)(int)(x.m_signexp - (1 << 30) + 1)
803           + fast_log(tmp) * real::R_LOG2E;
804}
805
806real log10(real const &x)
807{
808    return log(x) * real::R_LOG10E;
809}
810
811static real fast_exp_sub(real const &x, real const &y)
812{
813    /* This fast exp method is tuned to work on the [-1..1] range and
814     * no effort whatsoever was made to improve convergence outside this
815     * domain of validity. The argument y is used for cases where we
816     * don't want the leading 1 in the Taylor series. */
817    real ret = real::R_1 - y, fact = real::R_1, xn = x;
818
819    for (int i = 1; ; i++)
820    {
821        real newret = ret + xn;
822        if (newret == ret)
823            break;
824        ret = newret;
825        real mul = (i + 1);
826        fact *= mul;
827        ret *= mul;
828        xn *= x;
829    }
830    ret /= fact;
831
832    return ret;
833}
834
835real exp(real const &x)
836{
837    /* Strategy for exp(x): the Taylor series does not converge very fast
838     * with large positive or negative values.
839     *
840     * However, we know that the result is going to be in the form M*2^E,
841     * where M is the mantissa and E the exponent. We first try to predict
842     * a value for E, which is approximately log2(exp(x)) = x / log(2).
843     *
844     * Let E0 be an integer close to x / log(2). We need to find a value x0
845     * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2).
846     *
847     * Thus the final algorithm:
848     *  int E0 = x / log(2)
849     *  real x0 = x - E0 log(2)
850     *  real x1 = exp(x0)
851     *  return x1 * 2^E0
852     */
853    int e0 = x / real::R_LN2;
854    real x0 = x - (real)e0 * real::R_LN2;
855    real x1 = fast_exp_sub(x0, real::R_0);
856    x1.m_signexp += e0;
857    return x1;
858}
859
860real exp2(real const &x)
861{
862    /* Strategy for exp2(x): see strategy in exp(). */
863    int e0 = x;
864    real x0 = x - (real)e0;
865    real x1 = fast_exp_sub(x0 * real::R_LN2, real::R_0);
866    x1.m_signexp += e0;
867    return x1;
868}
869
870real sinh(real const &x)
871{
872    /* We cannot always use (exp(x)-exp(-x))/2 because we'll lose
873     * accuracy near zero. We only use this identity for |x|>0.5. If
874     * |x|<=0.5, we compute exp(x)-1 and exp(-x)-1 instead. */
875    bool near_zero = (fabs(x) < real::R_1 >> 1);
876    real x1 = near_zero ? fast_exp_sub(x, real::R_1) : exp(x);
877    real x2 = near_zero ? fast_exp_sub(-x, real::R_1) : exp(-x);
878    return (x1 - x2) >> 1;
879}
880
881real tanh(real const &x)
882{
883    /* See sinh() for the strategy here */
884    bool near_zero = (fabs(x) < real::R_1 >> 1);
885    real x1 = near_zero ? fast_exp_sub(x, real::R_1) : exp(x);
886    real x2 = near_zero ? fast_exp_sub(-x, real::R_1) : exp(-x);
887    real x3 = near_zero ? x1 + x2 + real::R_2 : x1 + x2;
888    return (x1 - x2) / x3;
889}
890
891real cosh(real const &x)
892{
893    /* No need to worry about accuracy here; maybe the last bit is slightly
894     * off, but that's about it. */
895    return (exp(x) + exp(-x)) >> 1;
896}
897
898real frexp(real const &x, int *exp)
899{
900    if (!x)
901    {
902        *exp = 0;
903        return x;
904    }
905
906    real ret = x;
907    int exponent = (ret.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
908    *exp = exponent + 1;
909    ret.m_signexp -= exponent + 1;
910    return ret;
911}
912
913real ldexp(real const &x, int exp)
914{
915    real ret = x;
916    if (ret)
917        ret.m_signexp += exp;
918    return ret;
919}
920
921real modf(real const &x, real *iptr)
922{
923    real absx = fabs(x);
924    real tmp = floor(absx);
925
926    *iptr = copysign(tmp, x);
927    return copysign(absx - tmp, x);
928}
929
930real copysign(real const &x, real const &y)
931{
932    real ret = x;
933    ret.m_signexp &= 0x7fffffffu;
934    ret.m_signexp |= y.m_signexp & 0x80000000u;
935    return ret;
936}
937
938real floor(real const &x)
939{
940    /* Strategy for floor(x):
941     *  - if negative, return -ceil(-x)
942     *  - if zero or negative zero, return x
943     *  - if less than one, return zero
944     *  - otherwise, if e is the exponent, clear all bits except the
945     *    first e. */
946    if (x < -real::R_0)
947        return -ceil(-x);
948    if (!x)
949        return x;
950    if (x < real::R_1)
951        return real::R_0;
952
953    real ret = x;
954    int exponent = x.m_signexp - (1 << 30) + 1;
955
956    for (int i = 0; i < real::BIGITS; i++)
957    {
958        if (exponent <= 0)
959            ret.m_mantissa[i] = 0;
960        else if (exponent < real::BIGIT_BITS)
961            ret.m_mantissa[i] &= ~((1 << (real::BIGIT_BITS - exponent)) - 1);
962
963        exponent -= real::BIGIT_BITS;
964    }
965
966    return ret;
967}
968
969real ceil(real const &x)
970{
971    /* Strategy for ceil(x):
972     *  - if negative, return -floor(-x)
973     *  - if x == floor(x), return x
974     *  - otherwise, return floor(x) + 1 */
975    if (x < -real::R_0)
976        return -floor(-x);
977    real ret = floor(x);
978    if (x == ret)
979        return ret;
980    else
981        return ret + real::R_1;
982}
983
984real round(real const &x)
985{
986    if (x < real::R_0)
987        return -round(-x);
988
989    return floor(x + (real::R_1 >> 1));
990}
991
992real fmod(real const &x, real const &y)
993{
994    if (!y)
995        return real::R_0; /* FIXME: return NaN */
996
997    if (!x)
998        return x;
999
1000    real tmp = round(x / y);
1001    return x - tmp * y;
1002}
1003
1004real sin(real const &x)
1005{
1006    bool switch_sign = x.m_signexp & 0x80000000u;
1007
1008    real absx = fmod(fabs(x), real::R_PI << 1);
1009    if (absx > real::R_PI)
1010    {
1011        absx -= real::R_PI;
1012        switch_sign = !switch_sign;
1013    }
1014
1015    if (absx > real::R_PI_2)
1016        absx = real::R_PI - absx;
1017
1018    real ret = real::R_0, fact = real::R_1, xn = absx, mx2 = -absx * absx;
1019    for (int i = 1; ; i += 2)
1020    {
1021        real newret = ret + xn;
1022        if (newret == ret)
1023            break;
1024        ret = newret;
1025        real mul = (i + 1) * (i + 2);
1026        fact *= mul;
1027        ret *= mul;
1028        xn *= mx2;
1029    }
1030    ret /= fact;
1031
1032    /* Propagate sign */
1033    if (switch_sign)
1034        ret.m_signexp ^= 0x80000000u;
1035    return ret;
1036}
1037
1038real cos(real const &x)
1039{
1040    return sin(real::R_PI_2 - x);
1041}
1042
1043real tan(real const &x)
1044{
1045    /* Constrain input to [-π,π] */
1046    real y = fmod(x, real::R_PI);
1047
1048    /* Constrain input to [-π/2,π/2] */
1049    if (y < -real::R_PI_2)
1050        y += real::R_PI;
1051    else if (y > real::R_PI_2)
1052        y -= real::R_PI;
1053
1054    /* In [-π/4,π/4] return sin/cos */
1055    if (fabs(y) <= real::R_PI_4)
1056        return sin(y) / cos(y);
1057
1058    /* Otherwise, return cos/sin */
1059    if (y > real::R_0)
1060        y = real::R_PI_2 - y;
1061    else
1062        y = -real::R_PI_2 - y;
1063
1064    return cos(y) / sin(y);
1065}
1066
1067static real asinacos(real const &x, bool is_asin, bool is_negative)
1068{
1069    /* Strategy for asin(): in [-0.5..0.5], use a Taylor series around
1070     * zero. In [0.5..1], use asin(x) = π/2 - 2*asin(sqrt((1-x)/2)), and
1071     * in [-1..-0.5] just revert the sign.
1072     * Strategy for acos(): use acos(x) = π/2 - asin(x) and try not to
1073     * lose the precision around x=1. */
1074    real absx = fabs(x);
1075    bool around_zero = (absx < (real::R_1 >> 1));
1076
1077    if (!around_zero)
1078        absx = sqrt((real::R_1 - absx) >> 1);
1079
1080    real ret = absx, xn = absx, x2 = absx * absx, fact1 = 2, fact2 = 1;
1081    for (int i = 1; ; i++)
1082    {
1083        xn *= x2;
1084        real mul = (real)(2 * i + 1);
1085        real newret = ret + ((fact1 * xn / (mul * fact2)) >> (i * 2));
1086        if (newret == ret)
1087            break;
1088        ret = newret;
1089        fact1 *= (real)((2 * i + 1) * (2 * i + 2));
1090        fact2 *= (real)((i + 1) * (i + 1));
1091    }
1092
1093    if (is_negative)
1094        ret = -ret;
1095
1096    if (around_zero)
1097        ret = is_asin ? ret : real::R_PI_2 - ret;
1098    else
1099    {
1100        real adjust = is_negative ? real::R_PI : real::R_0;
1101        if (is_asin)
1102            ret = real::R_PI_2 - adjust - (ret << 1);
1103        else
1104            ret = adjust + (ret << 1);
1105    }
1106
1107    return ret;
1108}
1109
1110real asin(real const &x)
1111{
1112    return asinacos(x, true, x.m_signexp >> 31);
1113}
1114
1115real acos(real const &x)
1116{
1117    return asinacos(x, false, x.m_signexp >> 31);
1118}
1119
1120real atan(real const &x)
1121{
1122    /* Computing atan(x): we choose a different Taylor series depending on
1123     * the value of x to help with convergence.
1124     *
1125     * If |x| < 0.5 we evaluate atan(y) near 0:
1126     *  atan(y) = y - y^3/3 + y^5/5 - y^7/7 + y^9/9 ...
1127     *
1128     * If 0.5 <= |x| < 1.5 we evaluate atan(1+y) near 0:
1129     *  atan(1+y) = π/4 + y/(1*2^1) - y^2/(2*2^1) + y^3/(3*2^2)
1130     *                  - y^5/(5*2^3) + y^6/(6*2^3) - y^7/(7*2^4)
1131     *                  + y^9/(9*2^5) - y^10/(10*2^5) + y^11/(11*2^6) ...
1132     *
1133     * If 1.5 <= |x| < 2 we evaluate atan(sqrt(3)+2y) near 0:
1134     *  atan(sqrt(3)+2y) = π/3 + 1/2 y - sqrt(3)/2 y^2/2
1135     *                         + y^3/3 - sqrt(3)/2 y^4/4 + 1/2 y^5/5
1136     *                         - 1/2 y^7/7 + sqrt(3)/2 y^8/8
1137     *                         - y^9/9 + sqrt(3)/2 y^10/10 - 1/2 y^11/11
1138     *                         + 1/2 y^13/13 - sqrt(3)/2 y^14/14
1139     *                         + y^15/15 - sqrt(3)/2 y^16/16 + 1/2 y^17/17 ...
1140     *
1141     * If |x| >= 2 we evaluate atan(y) near +∞:
1142     *  atan(y) = π/2 - y^-1 + y^-3/3 - y^-5/5 + y^-7/7 - y^-9/9 ...
1143     */
1144    real absx = fabs(x);
1145
1146    if (absx < (real::R_1 >> 1))
1147    {
1148        real ret = x, xn = x, mx2 = -x * x;
1149        for (int i = 3; ; i += 2)
1150        {
1151            xn *= mx2;
1152            real newret = ret + xn / (real)i;
1153            if (newret == ret)
1154                break;
1155            ret = newret;
1156        }
1157        return ret;
1158    }
1159
1160    real ret = 0;
1161
1162    if (absx < (real::R_3 >> 1))
1163    {
1164        real y = real::R_1 - absx;
1165        real yn = y, my2 = -y * y;
1166        for (int i = 0; ; i += 2)
1167        {
1168            real newret = ret + ((yn / (real)(2 * i + 1)) >> (i + 1));
1169            yn *= y;
1170            newret += (yn / (real)(2 * i + 2)) >> (i + 1);
1171            yn *= y;
1172            newret += (yn / (real)(2 * i + 3)) >> (i + 2);
1173            if (newret == ret)
1174                break;
1175            ret = newret;
1176            yn *= my2;
1177        }
1178        ret = real::R_PI_4 - ret;
1179    }
1180    else if (absx < real::R_2)
1181    {
1182        real y = (absx - real::R_SQRT3) >> 1;
1183        real yn = y, my2 = -y * y;
1184        for (int i = 1; ; i += 6)
1185        {
1186            real newret = ret + ((yn / (real)i) >> 1);
1187            yn *= y;
1188            newret -= (real::R_SQRT3 >> 1) * yn / (real)(i + 1);
1189            yn *= y;
1190            newret += yn / (real)(i + 2);
1191            yn *= y;
1192            newret -= (real::R_SQRT3 >> 1) * yn / (real)(i + 3);
1193            yn *= y;
1194            newret += (yn / (real)(i + 4)) >> 1;
1195            if (newret == ret)
1196                break;
1197            ret = newret;
1198            yn *= my2;
1199        }
1200        ret = real::R_PI_3 + ret;
1201    }
1202    else
1203    {
1204        real y = re(absx);
1205        real yn = y, my2 = -y * y;
1206        ret = y;
1207        for (int i = 3; ; i += 2)
1208        {
1209            yn *= my2;
1210            real newret = ret + yn / (real)i;
1211            if (newret == ret)
1212                break;
1213            ret = newret;
1214        }
1215        ret = real::R_PI_2 - ret;
1216    }
1217
1218    /* Propagate sign */
1219    ret.m_signexp |= (x.m_signexp & 0x80000000u);
1220    return ret;
1221}
1222
1223real atan2(real const &y, real const &x)
1224{
1225    if (!y)
1226    {
1227        if ((x.m_signexp >> 31) == 0)
1228            return y;
1229        if (y.m_signexp >> 31)
1230            return -real::R_PI;
1231        return real::R_PI;
1232    }
1233
1234    if (!x)
1235    {
1236        if (y.m_signexp >> 31)
1237            return -real::R_PI;
1238        return real::R_PI;
1239    }
1240
1241    /* FIXME: handle the Inf and NaN cases */
1242    real z = y / x;
1243    real ret = atan(z);
1244    if (x < real::R_0)
1245        ret += (y > real::R_0) ? real::R_PI : -real::R_PI;
1246    return ret;
1247}
1248
1249void real::hexprint() const
1250{
1251    printf("%08x", m_signexp);
1252    for (int i = 0; i < BIGITS; i++)
1253        printf(" %08x", m_mantissa[i]);
1254    printf("\n");
1255}
1256
1257void real::print(int ndigits) const
1258{
1259    real x = *this;
1260
1261    if (x.m_signexp >> 31)
1262    {
1263        printf("-");
1264        x = -x;
1265    }
1266
1267    /* Normalise x so that mantissa is in [1..9.999] */
1268    int exponent = 0;
1269    if (x.m_signexp)
1270    {
1271        for (real div = R_1, newdiv; true; div = newdiv)
1272        {
1273            newdiv = div * R_10;
1274            if (x < newdiv)
1275            {
1276                x /= div;
1277                break;
1278            }
1279            exponent++;
1280        }
1281        for (real mul = 1, newx; true; mul *= R_10)
1282        {
1283            newx = x * mul;
1284            if (newx >= R_1)
1285            {
1286                x = newx;
1287                break;
1288            }
1289            exponent--;
1290        }
1291    }
1292
1293    /* Print digits */
1294    for (int i = 0; i < ndigits; i++)
1295    {
1296        int digit = (int)x;
1297        printf("%i", digit);
1298        if (i == 0)
1299            printf(".");
1300        x -= real(digit);
1301        x *= R_10;
1302    }
1303
1304    /* Print exponent information */
1305    if (exponent < 0)
1306        printf("e-%i", -exponent);
1307    else if (exponent > 0)
1308        printf("e+%i", exponent);
1309
1310    printf("\n");
1311}
1312
1313static real fast_pi()
1314{
1315    /* Approximate Pi using Machin's formula: 16*atan(1/5)-4*atan(1/239) */
1316    real ret = 0.0, x0 = 5.0, x1 = 239.0;
1317    real const m0 = -x0 * x0, m1 = -x1 * x1, r16 = 16.0, r4 = 4.0;
1318
1319    for (int i = 1; ; i += 2)
1320    {
1321        real newret = ret + r16 / (x0 * (real)i) - r4 / (x1 * (real)i);
1322        if (newret == ret)
1323            break;
1324        ret = newret;
1325        x0 *= m0;
1326        x1 *= m1;
1327    }
1328
1329    return ret;
1330}
1331
1332real const real::R_0        = (real)0.0;
1333real const real::R_1        = (real)1.0;
1334real const real::R_2        = (real)2.0;
1335real const real::R_3        = (real)3.0;
1336real const real::R_10       = (real)10.0;
1337
1338real const real::R_LN2      = fast_log(R_2);
1339real const real::R_LN10     = log(R_10);
1340real const real::R_LOG2E    = re(R_LN2);
1341real const real::R_LOG10E   = re(R_LN10);
1342real const real::R_E        = exp(R_1);
1343real const real::R_PI       = fast_pi();
1344real const real::R_PI_2     = R_PI >> 1;
1345real const real::R_PI_3     = R_PI / R_3;
1346real const real::R_PI_4     = R_PI >> 2;
1347real const real::R_1_PI     = re(R_PI);
1348real const real::R_2_PI     = R_1_PI << 1;
1349real const real::R_2_SQRTPI = re(sqrt(R_PI)) << 1;
1350real const real::R_SQRT2    = sqrt(R_2);
1351real const real::R_SQRT3    = sqrt(R_3);
1352real const real::R_SQRT1_2  = R_SQRT2 >> 1;
1353
1354} /* namespace lol */
1355
Note: See TracBrowser for help on using the repository browser.