source: trunk/src/real.cpp @ 1115

Last change on this file since 1115 was 1115, checked in by sam, 9 years ago

core: allow to build a real number using a string literal.

File size: 32.9 KB
Line 
1//
2// Lol Engine
3//
4// Copyright: (c) 2010-2011 Sam Hocevar <sam@hocevar.net>
5//   This program is free software; you can redistribute it and/or
6//   modify it under the terms of the Do What The Fuck You Want To
7//   Public License, Version 2, as published by Sam Hocevar. See
8//   http://sam.zoy.org/projects/COPYING.WTFPL for more details.
9//
10
11#if defined HAVE_CONFIG_H
12#   include "config.h"
13#endif
14
15#include <cstring>
16#include <cstdio>
17
18#include "core.h"
19
20using namespace std;
21
22namespace lol
23{
24
25real::real()
26{
27    m_mantissa = new uint32_t[BIGITS];
28    m_signexp = 0;
29}
30
31real::real(real const &x)
32{
33    m_mantissa = new uint32_t[BIGITS];
34    memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
35    m_signexp = x.m_signexp;
36}
37
38real const &real::operator =(real const &x)
39{
40    if (&x != this)
41    {
42        memcpy(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t));
43        m_signexp = x.m_signexp;
44    }
45
46    return *this;
47}
48
49real::~real()
50{
51    delete[] m_mantissa;
52}
53
54real::real(float f) { new(this) real((double)f); }
55real::real(int i) { new(this) real((double)i); }
56real::real(unsigned int i) { new(this) real((double)i); }
57
58real::real(double d)
59{
60    new(this) real();
61
62    union { double d; uint64_t x; } u = { d };
63
64    uint32_t sign = (u.x >> 63) << 31;
65    uint32_t exponent = (u.x << 1) >> 53;
66
67    switch (exponent)
68    {
69    case 0x00:
70        m_signexp = sign;
71        break;
72    case 0x7ff:
73        m_signexp = sign | 0x7fffffffu;
74        break;
75    default:
76        m_signexp = sign | (exponent + (1 << 30) - (1 << 10));
77        break;
78    }
79
80    m_mantissa[0] = u.x >> 20;
81    m_mantissa[1] = u.x << 12;
82    memset(m_mantissa + 2, 0, (BIGITS - 2) * sizeof(m_mantissa[0]));
83}
84
85real::operator float() const { return (float)(double)(*this); }
86real::operator int() const { return (int)(double)(*this); }
87real::operator unsigned int() const { return (unsigned int)(double)(*this); }
88
89real::operator double() const
90{
91    union { double d; uint64_t x; } u;
92
93    /* Get sign */
94    u.x = m_signexp >> 31;
95    u.x <<= 11;
96
97    /* Compute new exponent */
98    uint32_t exponent = (m_signexp << 1) >> 1;
99    int e = (int)exponent - (1 << 30) + (1 << 10);
100
101    if (e < 0)
102        u.x <<= 52;
103    else if (e >= 0x7ff)
104    {
105        u.x |= 0x7ff;
106        u.x <<= 52;
107    }
108    else
109    {
110        u.x |= e;
111
112        /* Store mantissa if necessary */
113        u.x <<= 32;
114        u.x |= m_mantissa[0];
115        u.x <<= 20;
116        u.x |= m_mantissa[1] >> 12;
117        /* Rounding */
118        u.x += (m_mantissa[1] >> 11) & 1;
119    }
120
121    return u.d;
122}
123
124/*
125 * Create a real number from an ASCII representation
126 */
127real::real(char const *str)
128{
129    real ret = 0;
130    int exponent = 0;
131    bool comma = false, nonzero = false, negative = false, finished = false;
132
133    for (char const *p = str; *p && !finished; p++)
134    {
135        switch (*p)
136        {
137        case '-':
138        case '+':
139            if (p != str)
140                break;
141            negative = (*p == '-');
142            break;
143        case '.':
144            if (comma)
145                finished = true;
146            comma = true;
147            break;
148        case '0': case '1': case '2': case '3': case '4':
149        case '5': case '6': case '7': case '8': case '9':
150            if (nonzero)
151            {
152                real x = ret + ret;
153                x = x + x + ret;
154                ret = x + x;
155            }
156            if (*p != '0')
157            {
158                ret += (int)(*p - '0');
159                nonzero = true;
160            }
161            if (comma)
162                exponent--;
163            break;
164        case 'e':
165        case 'E':
166            exponent += atoi(p + 1);
167            finished = true;
168            break;
169        default:
170            finished = true;
171            break;
172        }
173    }
174
175    if (exponent)
176        ret *= pow(R_10, (real)exponent);
177
178    if (negative)
179        ret = -ret;
180
181    new(this) real(ret);
182}
183
184real real::operator +() const
185{
186    return *this;
187}
188
189real real::operator -() const
190{
191    real ret = *this;
192    ret.m_signexp ^= 0x80000000u;
193    return ret;
194}
195
196real real::operator +(real const &x) const
197{
198    if (x.m_signexp << 1 == 0)
199        return *this;
200
201    /* Ensure both arguments are positive. Otherwise, switch signs,
202     * or replace + with -. */
203    if (m_signexp >> 31)
204        return -(-*this + -x);
205
206    if (x.m_signexp >> 31)
207        return *this - (-x);
208
209    /* Ensure *this has the larger exponent (no need for the mantissa to
210     * be larger, as in subtraction). Otherwise, switch. */
211    if ((m_signexp << 1) < (x.m_signexp << 1))
212        return x + *this;
213
214    real ret;
215
216    int e1 = m_signexp - (1 << 30) + 1;
217    int e2 = x.m_signexp - (1 << 30) + 1;
218
219    int bigoff = (e1 - e2) / BIGIT_BITS;
220    int off = e1 - e2 - bigoff * BIGIT_BITS;
221
222    if (bigoff > BIGITS)
223        return *this;
224
225    ret.m_signexp = m_signexp;
226
227    uint64_t carry = 0;
228    for (int i = BIGITS; i--; )
229    {
230        carry += m_mantissa[i];
231        if (i - bigoff >= 0)
232            carry += x.m_mantissa[i - bigoff] >> off;
233
234        if (off && i - bigoff > 0)
235            carry += (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
236        else if (i - bigoff == 0)
237            carry += (uint64_t)1 << (BIGIT_BITS - off);
238
239        ret.m_mantissa[i] = carry;
240        carry >>= BIGIT_BITS;
241    }
242
243    /* Renormalise in case we overflowed the mantissa */
244    if (carry)
245    {
246        carry--;
247        for (int i = 0; i < BIGITS; i++)
248        {
249            uint32_t tmp = ret.m_mantissa[i];
250            ret.m_mantissa[i] = (carry << (BIGIT_BITS - 1)) | (tmp >> 1);
251            carry = tmp & 1u;
252        }
253        ret.m_signexp++;
254    }
255
256    return ret;
257}
258
259real real::operator -(real const &x) const
260{
261    if (x.m_signexp << 1 == 0)
262        return *this;
263
264    /* Ensure both arguments are positive. Otherwise, switch signs,
265     * or replace - with +. */
266    if (m_signexp >> 31)
267        return -(-*this + x);
268
269    if (x.m_signexp >> 31)
270        return (*this) + (-x);
271
272    /* Ensure *this is larger than x */
273    if (*this < x)
274        return -(x - *this);
275
276    real ret;
277
278    int e1 = m_signexp - (1 << 30) + 1;
279    int e2 = x.m_signexp - (1 << 30) + 1;
280
281    int bigoff = (e1 - e2) / BIGIT_BITS;
282    int off = e1 - e2 - bigoff * BIGIT_BITS;
283
284    if (bigoff > BIGITS)
285        return *this;
286
287    ret.m_signexp = m_signexp;
288
289    int64_t carry = 0;
290    for (int i = 0; i < bigoff; i++)
291    {
292        carry -= x.m_mantissa[BIGITS - 1 - i];
293        /* Emulates a signed shift */
294        carry >>= BIGIT_BITS;
295        carry |= carry << BIGIT_BITS;
296    }
297    if (bigoff < BIGITS)
298        carry -= x.m_mantissa[BIGITS - 1 - bigoff] & (((int64_t)1 << off) - 1);
299    carry /= (int64_t)1 << off;
300
301    for (int i = BIGITS; i--; )
302    {
303        carry += m_mantissa[i];
304        if (i - bigoff >= 0)
305            carry -= x.m_mantissa[i - bigoff] >> off;
306
307        if (off && i - bigoff > 0)
308            carry -= (x.m_mantissa[i - bigoff - 1] << (BIGIT_BITS - off)) & 0xffffffffu;
309        else if (i - bigoff == 0)
310            carry -= (uint64_t)1 << (BIGIT_BITS - off);
311
312        ret.m_mantissa[i] = carry;
313        carry >>= BIGIT_BITS;
314        carry |= carry << BIGIT_BITS;
315    }
316
317    carry += 1;
318
319    /* Renormalise if we underflowed the mantissa */
320    if (carry == 0)
321    {
322        /* How much do we need to shift the mantissa? FIXME: this could
323         * be computed above */
324        off = 0;
325        for (int i = 0; i < BIGITS; i++)
326        {
327            if (!ret.m_mantissa[i])
328            {
329                off += BIGIT_BITS;
330                continue;
331            }
332
333            for (uint32_t tmp = ret.m_mantissa[i]; tmp < 0x80000000u; tmp <<= 1)
334                off++;
335            break;
336        }
337        if (off == BIGITS * BIGIT_BITS)
338            ret.m_signexp &= 0x80000000u;
339        else
340        {
341            off++; /* Shift one more to get rid of the leading one */
342            ret.m_signexp -= off;
343
344            bigoff = off / BIGIT_BITS;
345            off -= bigoff * BIGIT_BITS;
346
347            for (int i = 0; i < BIGITS; i++)
348            {
349                uint32_t tmp = 0;
350                if (i + bigoff < BIGITS)
351                    tmp |= ret.m_mantissa[i + bigoff] << off;
352                if (off && i + bigoff + 1 < BIGITS)
353                    tmp |= ret.m_mantissa[i + bigoff + 1] >> (BIGIT_BITS - off);
354                ret.m_mantissa[i] = tmp;
355            }
356        }
357    }
358
359    return ret;
360}
361
362real real::operator *(real const &x) const
363{
364    real ret;
365
366    if (m_signexp << 1 == 0 || x.m_signexp << 1 == 0)
367    {
368        ret = (m_signexp << 1 == 0) ? *this : x;
369        ret.m_signexp ^= x.m_signexp & 0x80000000u;
370        return ret;
371    }
372
373    ret.m_signexp = (m_signexp ^ x.m_signexp) & 0x80000000u;
374    int e = (m_signexp & 0x7fffffffu) - (1 << 30) + 1
375          + (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
376
377    /* Accumulate low order product; no need to store it, we just
378     * want the carry value */
379    uint64_t carry = 0, hicarry = 0, prev;
380    for (int i = 0; i < BIGITS; i++)
381    {
382        for (int j = 0; j < i + 1; j++)
383        {
384            prev = carry;
385            carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
386                   * (uint64_t)x.m_mantissa[BIGITS - 1 + j - i];
387            if (carry < prev)
388                hicarry++;
389        }
390        carry >>= BIGIT_BITS;
391        carry |= hicarry << BIGIT_BITS;
392        hicarry >>= BIGIT_BITS;
393    }
394
395    for (int i = 0; i < BIGITS; i++)
396    {
397        for (int j = i + 1; j < BIGITS; j++)
398        {
399            prev = carry;
400            carry += (uint64_t)m_mantissa[BIGITS - 1 - j]
401                   * (uint64_t)x.m_mantissa[j - 1 - i];
402            if (carry < prev)
403                hicarry++;
404        }
405        prev = carry;
406        carry += m_mantissa[BIGITS - 1 - i];
407        carry += x.m_mantissa[BIGITS - 1 - i];
408        if (carry < prev)
409            hicarry++;
410        ret.m_mantissa[BIGITS - 1 - i] = carry & 0xffffffffu;
411        carry >>= BIGIT_BITS;
412        carry |= hicarry << BIGIT_BITS;
413        hicarry >>= BIGIT_BITS;
414    }
415
416    /* Renormalise in case we overflowed the mantissa */
417    if (carry)
418    {
419        carry--;
420        for (int i = 0; i < BIGITS; i++)
421        {
422            uint32_t tmp = ret.m_mantissa[i];
423            ret.m_mantissa[i] = (carry << (BIGIT_BITS - 1)) | (tmp >> 1);
424            carry = tmp & 1u;
425        }
426        e++;
427    }
428
429    ret.m_signexp |= e + (1 << 30) - 1;
430
431    return ret;
432}
433
434real real::operator /(real const &x) const
435{
436    return *this * re(x);
437}
438
439real const &real::operator +=(real const &x)
440{
441    real tmp = *this;
442    return *this = tmp + x;
443}
444
445real const &real::operator -=(real const &x)
446{
447    real tmp = *this;
448    return *this = tmp - x;
449}
450
451real const &real::operator *=(real const &x)
452{
453    real tmp = *this;
454    return *this = tmp * x;
455}
456
457real const &real::operator /=(real const &x)
458{
459    real tmp = *this;
460    return *this = tmp / x;
461}
462
463real real::operator <<(int x) const
464{
465    real tmp = *this;
466    return tmp <<= x;
467}
468
469real real::operator >>(int x) const
470{
471    real tmp = *this;
472    return tmp >>= x;
473}
474
475real const &real::operator <<=(int x)
476{
477    if (m_signexp << 1)
478        m_signexp += x;
479    return *this;
480}
481
482real const &real::operator >>=(int x)
483{
484    if (m_signexp << 1)
485        m_signexp -= x;
486    return *this;
487}
488
489bool real::operator ==(real const &x) const
490{
491    if ((m_signexp << 1) == 0 && (x.m_signexp << 1) == 0)
492        return true;
493
494    if (m_signexp != x.m_signexp)
495        return false;
496
497    return memcmp(m_mantissa, x.m_mantissa, BIGITS * sizeof(uint32_t)) == 0;
498}
499
500bool real::operator !=(real const &x) const
501{
502    return !(*this == x);
503}
504
505bool real::operator <(real const &x) const
506{
507    /* Ensure both numbers are positive */
508    if (m_signexp >> 31)
509        return (x.m_signexp >> 31) ? -*this > -x : true;
510
511    if (x.m_signexp >> 31)
512        return false;
513
514    /* Compare all relevant bits */
515    if (m_signexp != x.m_signexp)
516        return m_signexp < x.m_signexp;
517
518    for (int i = 0; i < BIGITS; i++)
519        if (m_mantissa[i] != x.m_mantissa[i])
520            return m_mantissa[i] < x.m_mantissa[i];
521
522    return false;
523}
524
525bool real::operator <=(real const &x) const
526{
527    return !(*this > x);
528}
529
530bool real::operator >(real const &x) const
531{
532    /* Ensure both numbers are positive */
533    if (m_signexp >> 31)
534        return (x.m_signexp >> 31) ? -*this < -x : false;
535
536    if (x.m_signexp >> 31)
537        return true;
538
539    /* Compare all relevant bits */
540    if (m_signexp != x.m_signexp)
541        return m_signexp > x.m_signexp;
542
543    for (int i = 0; i < BIGITS; i++)
544        if (m_mantissa[i] != x.m_mantissa[i])
545            return m_mantissa[i] > x.m_mantissa[i];
546
547    return false;
548}
549
550bool real::operator >=(real const &x) const
551{
552    return !(*this < x);
553}
554
555bool real::operator !() const
556{
557    return !(bool)*this;
558}
559
560real::operator bool() const
561{
562    /* A real is "true" if it is non-zero (exponent is non-zero) AND
563     * not NaN (exponent is not full bits OR higher order mantissa is zero) */
564    uint32_t exponent = m_signexp << 1;
565    return exponent && (~exponent || m_mantissa[0] == 0);
566}
567
568real re(real const &x)
569{
570    if (!(x.m_signexp << 1))
571    {
572        real ret = x;
573        ret.m_signexp = x.m_signexp | 0x7fffffffu;
574        ret.m_mantissa[0] = 0;
575        return ret;
576    }
577
578    /* Use the system's float inversion to approximate 1/x */
579    union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
580    v.x |= x.m_mantissa[0] >> 9;
581    v.f = 1.0 / v.f;
582
583    real ret;
584    ret.m_mantissa[0] = v.x << 9;
585
586    uint32_t sign = x.m_signexp & 0x80000000u;
587    ret.m_signexp = sign;
588
589    int exponent = (x.m_signexp & 0x7fffffffu) + 1;
590    exponent = -exponent + (v.x >> 23) - (u.x >> 23);
591    ret.m_signexp |= (exponent - 1) & 0x7fffffffu;
592
593    /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
594     * convergence, but this hasn't been checked seriously. */
595    for (int i = 1; i <= real::BIGITS; i *= 2)
596        ret = ret * (real::R_2 - ret * x);
597
598    return ret;
599}
600
601real sqrt(real const &x)
602{
603    /* if zero, return x */
604    if (!(x.m_signexp << 1))
605        return x;
606
607    /* if negative, return NaN */
608    if (x.m_signexp >> 31)
609    {
610        real ret;
611        ret.m_signexp = 0x7fffffffu;
612        ret.m_mantissa[0] = 0xffffu;
613        return ret;
614    }
615
616    /* Use the system's float inversion to approximate 1/sqrt(x). First
617     * we construct a float in the [1..4[ range that has roughly the same
618     * mantissa as our real. Its exponent is 0 or 1, depending on the
619     * partity of x. The final exponent is 0, -1 or -2. We use the final
620     * exponent and final mantissa to pre-fill the result. */
621    union { float f; uint32_t x; } u = { 1.0f }, v = { 2.0f };
622    v.x -= ((x.m_signexp & 1) << 23);
623    v.x |= x.m_mantissa[0] >> 9;
624    v.f = 1.0 / sqrtf(v.f);
625
626    real ret;
627    ret.m_mantissa[0] = v.x << 9;
628
629    uint32_t sign = x.m_signexp & 0x80000000u;
630    ret.m_signexp = sign;
631
632    uint32_t exponent = (x.m_signexp & 0x7fffffffu);
633    exponent = ((1 << 30) + (1 << 29) - 1) - (exponent + 1) / 2;
634    exponent = exponent + (v.x >> 23) - (u.x >> 23);
635    ret.m_signexp |= exponent & 0x7fffffffu;
636
637    /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
638     * convergence, but this hasn't been checked seriously. */
639    for (int i = 1; i <= real::BIGITS; i *= 2)
640    {
641        ret = ret * (real::R_3 - ret * ret * x);
642        ret.m_signexp--;
643    }
644
645    return ret * x;
646}
647
648real cbrt(real const &x)
649{
650    /* if zero, return x */
651    if (!(x.m_signexp << 1))
652        return x;
653
654    /* Use the system's float inversion to approximate cbrt(x). First
655     * we construct a float in the [1..8[ range that has roughly the same
656     * mantissa as our real. Its exponent is 0, 1 or 2, depending on the
657     * value of x. The final exponent is 0 or 1 (special case). We use
658     * the final exponent and final mantissa to pre-fill the result. */
659    union { float f; uint32_t x; } u = { 1.0f }, v = { 1.0f };
660    v.x += ((x.m_signexp % 3) << 23);
661    v.x |= x.m_mantissa[0] >> 9;
662    v.f = powf(v.f, 0.33333333333333333f);
663
664    real ret;
665    ret.m_mantissa[0] = v.x << 9;
666
667    uint32_t sign = x.m_signexp & 0x80000000u;
668    ret.m_signexp = sign;
669
670    int exponent = (x.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
671    exponent = exponent / 3 + (v.x >> 23) - (u.x >> 23);
672    ret.m_signexp |= (exponent + (1 << 30) - 1) & 0x7fffffffu;
673
674    /* FIXME: 1+log2(BIGITS) steps of Newton-Raphson seems to be enough for
675     * convergence, but this hasn't been checked seriously. */
676    for (int i = 1; i <= real::BIGITS; i *= 2)
677    {
678        static real third = re(real::R_3);
679        ret = third * (x / (ret * ret) + (ret << 1));
680    }
681
682    return ret;
683}
684
685real pow(real const &x, real const &y)
686{
687    if (!y)
688        return real::R_1;
689    if (!x)
690        return real::R_0;
691    if (x > real::R_0)
692        return exp(y * log(x));
693    else /* x < 0 */
694    {
695        if (y == round(y))
696            return -exp(y * log(-x));
697        /* FIXME: negative nth root */
698        return real::R_0;
699    }
700}
701
702real gamma(real const &x)
703{
704    /* We use Spouge's formula. FIXME: precision is far from acceptable,
705     * especially with large values. We need to compute this with higher
706     * precision values in order to attain the desired accuracy. It might
707     * also be useful to sort the ck values by decreasing absolute value
708     * and do the addition in this order. */
709    int a = ceilf(logf(2) / logf(2 * M_PI) * real::BIGITS * real::BIGIT_BITS);
710
711    real ret = sqrt(real::R_PI << 1);
712    real fact_k_1 = real::R_1;
713
714    for (int k = 1; k < a; k++)
715    {
716        real a_k = (real)(a - k);
717        real ck = pow(a_k, (real)((float)k - 0.5)) * exp(a_k)
718                / (fact_k_1 * (x + (real)(k - 1)));
719        ret += ck;
720        fact_k_1 *= (real)-k;
721    }
722
723    ret *= pow(x + (real)(a - 1), x - (real::R_1 >> 1));
724    ret *= exp(-x - (real)(a - 1));
725
726    return ret;
727}
728
729real fabs(real const &x)
730{
731    real ret = x;
732    ret.m_signexp &= 0x7fffffffu;
733    return ret;
734}
735
736static real fast_log(real const &x)
737{
738    /* This fast log method is tuned to work on the [1..2] range and
739     * no effort whatsoever was made to improve convergence outside this
740     * domain of validity. It can converge pretty fast, provided we use
741     * the following variable substitutions:
742     *    y = sqrt(x)
743     *    z = (y - 1) / (y + 1)
744     *
745     * And the following identities:
746     *    ln(x) = 2 ln(y)
747     *          = 2 ln((1 + z) / (1 - z))
748     *          = 4 z (1 + z^2 / 3 + z^4 / 5 + z^6 / 7...)
749     *
750     * Any additional sqrt() call would halve the convergence time, but
751     * would also impact the final precision. For now we stick with one
752     * sqrt() call. */
753    real y = sqrt(x);
754    real z = (y - real::R_1) / (y + real::R_1), z2 = z * z, zn = z2;
755    real sum = real::R_1;
756
757    for (int i = 3; ; i += 2)
758    {
759        real newsum = sum + zn / (real)i;
760        if (newsum == sum)
761            break;
762        sum = newsum;
763        zn *= z2;
764    }
765
766    return z * (sum << 2);
767}
768
769real log(real const &x)
770{
771    /* Strategy for log(x): if x = 2^E*M then log(x) = E log(2) + log(M),
772     * with the property that M is in [1..2[, so fast_log() applies here. */
773    real tmp = x;
774    if (x.m_signexp >> 31 || x.m_signexp == 0)
775    {
776        tmp.m_signexp = 0xffffffffu;
777        tmp.m_mantissa[0] = 0xffffffffu;
778        return tmp;
779    }
780    tmp.m_signexp = (1 << 30) - 1;
781    return (real)(int)(x.m_signexp - (1 << 30) + 1) * real::R_LN2
782           + fast_log(tmp);
783}
784
785real log2(real const &x)
786{
787    /* Strategy for log2(x): see log(x). */
788    real tmp = x;
789    if (x.m_signexp >> 31 || x.m_signexp == 0)
790    {
791        tmp.m_signexp = 0xffffffffu;
792        tmp.m_mantissa[0] = 0xffffffffu;
793        return tmp;
794    }
795    tmp.m_signexp = (1 << 30) - 1;
796    return (real)(int)(x.m_signexp - (1 << 30) + 1)
797           + fast_log(tmp) * real::R_LOG2E;
798}
799
800real log10(real const &x)
801{
802    return log(x) * real::R_LOG10E;
803}
804
805static real fast_exp_sub(real const &x, real const &y)
806{
807    /* This fast exp method is tuned to work on the [-1..1] range and
808     * no effort whatsoever was made to improve convergence outside this
809     * domain of validity. The argument y is used for cases where we
810     * don't want the leading 1 in the Taylor series. */
811    real ret = real::R_1 - y, fact = real::R_1, xn = x;
812
813    for (int i = 1; ; i++)
814    {
815        real newret = ret + xn;
816        if (newret == ret)
817            break;
818        ret = newret;
819        real mul = (i + 1);
820        fact *= mul;
821        ret *= mul;
822        xn *= x;
823    }
824    ret /= fact;
825
826    return ret;
827}
828
829real exp(real const &x)
830{
831    /* Strategy for exp(x): the Taylor series does not converge very fast
832     * with large positive or negative values.
833     *
834     * However, we know that the result is going to be in the form M*2^E,
835     * where M is the mantissa and E the exponent. We first try to predict
836     * a value for E, which is approximately log2(exp(x)) = x / log(2).
837     *
838     * Let E0 be an integer close to x / log(2). We need to find a value x0
839     * such that exp(x) = 2^E0 * exp(x0). We get x0 = x - E0 log(2).
840     *
841     * Thus the final algorithm:
842     *  int E0 = x / log(2)
843     *  real x0 = x - E0 log(2)
844     *  real x1 = exp(x0)
845     *  return x1 * 2^E0
846     */
847    int e0 = x / real::R_LN2;
848    real x0 = x - (real)e0 * real::R_LN2;
849    real x1 = fast_exp_sub(x0, real::R_0);
850    x1.m_signexp += e0;
851    return x1;
852}
853
854real exp2(real const &x)
855{
856    /* Strategy for exp2(x): see strategy in exp(). */
857    int e0 = x;
858    real x0 = x - (real)e0;
859    real x1 = fast_exp_sub(x0 * real::R_LN2, real::R_0);
860    x1.m_signexp += e0;
861    return x1;
862}
863
864real sinh(real const &x)
865{
866    /* We cannot always use (exp(x)-exp(-x))/2 because we'll lose
867     * accuracy near zero. We only use this identity for |x|>0.5. If
868     * |x|<=0.5, we compute exp(x)-1 and exp(-x)-1 instead. */
869    bool near_zero = (fabs(x) < real::R_1 >> 1);
870    real x1 = near_zero ? fast_exp_sub(x, real::R_1) : exp(x);
871    real x2 = near_zero ? fast_exp_sub(-x, real::R_1) : exp(-x);
872    return (x1 - x2) >> 1;
873}
874
875real tanh(real const &x)
876{
877    /* See sinh() for the strategy here */
878    bool near_zero = (fabs(x) < real::R_1 >> 1);
879    real x1 = near_zero ? fast_exp_sub(x, real::R_1) : exp(x);
880    real x2 = near_zero ? fast_exp_sub(-x, real::R_1) : exp(-x);
881    real x3 = near_zero ? x1 + x2 + real::R_2 : x1 + x2;
882    return (x1 - x2) / x3;
883}
884
885real cosh(real const &x)
886{
887    /* No need to worry about accuracy here; maybe the last bit is slightly
888     * off, but that's about it. */
889    return (exp(x) + exp(-x)) >> 1;
890}
891
892real frexp(real const &x, int *exp)
893{
894    if (!x)
895    {
896        *exp = 0;
897        return x;
898    }
899
900    real ret = x;
901    int exponent = (ret.m_signexp & 0x7fffffffu) - (1 << 30) + 1;
902    *exp = exponent + 1;
903    ret.m_signexp -= exponent + 1;
904    return ret;
905}
906
907real ldexp(real const &x, int exp)
908{
909    real ret = x;
910    if (ret)
911        ret.m_signexp += exp;
912    return ret;
913}
914
915real modf(real const &x, real *iptr)
916{
917    real absx = fabs(x);
918    real tmp = floor(absx);
919
920    *iptr = copysign(tmp, x);
921    return copysign(absx - tmp, x);
922}
923
924real copysign(real const &x, real const &y)
925{
926    real ret = x;
927    ret.m_signexp &= 0x7fffffffu;
928    ret.m_signexp |= y.m_signexp & 0x80000000u;
929    return ret;
930}
931
932real floor(real const &x)
933{
934    /* Strategy for floor(x):
935     *  - if negative, return -ceil(-x)
936     *  - if zero or negative zero, return x
937     *  - if less than one, return zero
938     *  - otherwise, if e is the exponent, clear all bits except the
939     *    first e. */
940    if (x < -real::R_0)
941        return -ceil(-x);
942    if (!x)
943        return x;
944    if (x < real::R_1)
945        return real::R_0;
946
947    real ret = x;
948    int exponent = x.m_signexp - (1 << 30) + 1;
949
950    for (int i = 0; i < real::BIGITS; i++)
951    {
952        if (exponent <= 0)
953            ret.m_mantissa[i] = 0;
954        else if (exponent < real::BIGIT_BITS)
955            ret.m_mantissa[i] &= ~((1 << (real::BIGIT_BITS - exponent)) - 1);
956
957        exponent -= real::BIGIT_BITS;
958    }
959
960    return ret;
961}
962
963real ceil(real const &x)
964{
965    /* Strategy for ceil(x):
966     *  - if negative, return -floor(-x)
967     *  - if x == floor(x), return x
968     *  - otherwise, return floor(x) + 1 */
969    if (x < -real::R_0)
970        return -floor(-x);
971    real ret = floor(x);
972    if (x == ret)
973        return ret;
974    else
975        return ret + real::R_1;
976}
977
978real round(real const &x)
979{
980    if (x < real::R_0)
981        return -round(-x);
982
983    return floor(x + (real::R_1 >> 1));
984}
985
986real fmod(real const &x, real const &y)
987{
988    if (!y)
989        return real::R_0; /* FIXME: return NaN */
990
991    if (!x)
992        return x;
993
994    real tmp = round(x / y);
995    return x - tmp * y;
996}
997
998real sin(real const &x)
999{
1000    bool switch_sign = x.m_signexp & 0x80000000u;
1001
1002    real absx = fmod(fabs(x), real::R_PI << 1);
1003    if (absx > real::R_PI)
1004    {
1005        absx -= real::R_PI;
1006        switch_sign = !switch_sign;
1007    }
1008
1009    if (absx > real::R_PI_2)
1010        absx = real::R_PI - absx;
1011
1012    real ret = real::R_0, fact = real::R_1, xn = absx, mx2 = -absx * absx;
1013    for (int i = 1; ; i += 2)
1014    {
1015        real newret = ret + xn;
1016        if (newret == ret)
1017            break;
1018        ret = newret;
1019        real mul = (i + 1) * (i + 2);
1020        fact *= mul;
1021        ret *= mul;
1022        xn *= mx2;
1023    }
1024    ret /= fact;
1025
1026    /* Propagate sign */
1027    if (switch_sign)
1028        ret.m_signexp ^= 0x80000000u;
1029    return ret;
1030}
1031
1032real cos(real const &x)
1033{
1034    return sin(real::R_PI_2 - x);
1035}
1036
1037real tan(real const &x)
1038{
1039    /* Constrain input to [-π,π] */
1040    real y = fmod(x, real::R_PI);
1041
1042    /* Constrain input to [-π/2,π/2] */
1043    if (y < -real::R_PI_2)
1044        y += real::R_PI;
1045    else if (y > real::R_PI_2)
1046        y -= real::R_PI;
1047
1048    /* In [-π/4,π/4] return sin/cos */
1049    if (fabs(y) <= real::R_PI_4)
1050        return sin(y) / cos(y);
1051
1052    /* Otherwise, return cos/sin */
1053    if (y > real::R_0)
1054        y = real::R_PI_2 - y;
1055    else
1056        y = -real::R_PI_2 - y;
1057
1058    return cos(y) / sin(y);
1059}
1060
1061static real asinacos(real const &x, bool is_asin, bool is_negative)
1062{
1063    /* Strategy for asin(): in [-0.5..0.5], use a Taylor series around
1064     * zero. In [0.5..1], use asin(x) = π/2 - 2*asin(sqrt((1-x)/2)), and
1065     * in [-1..-0.5] just revert the sign.
1066     * Strategy for acos(): use acos(x) = π/2 - asin(x) and try not to
1067     * lose the precision around x=1. */
1068    real absx = fabs(x);
1069    bool around_zero = (absx < (real::R_1 >> 1));
1070
1071    if (!around_zero)
1072        absx = sqrt((real::R_1 - absx) >> 1);
1073
1074    real ret = absx, xn = absx, x2 = absx * absx, fact1 = 2, fact2 = 1;
1075    for (int i = 1; ; i++)
1076    {
1077        xn *= x2;
1078        real mul = (real)(2 * i + 1);
1079        real newret = ret + ((fact1 * xn / (mul * fact2)) >> (i * 2));
1080        if (newret == ret)
1081            break;
1082        ret = newret;
1083        fact1 *= (real)((2 * i + 1) * (2 * i + 2));
1084        fact2 *= (real)((i + 1) * (i + 1));
1085    }
1086
1087    if (is_negative)
1088        ret = -ret;
1089
1090    if (around_zero)
1091        ret = is_asin ? ret : real::R_PI_2 - ret;
1092    else
1093    {
1094        real adjust = is_negative ? real::R_PI : real::R_0;
1095        if (is_asin)
1096            ret = real::R_PI_2 - adjust - (ret << 1);
1097        else
1098            ret = adjust + (ret << 1);
1099    }
1100
1101    return ret;
1102}
1103
1104real asin(real const &x)
1105{
1106    return asinacos(x, true, x.m_signexp >> 31);
1107}
1108
1109real acos(real const &x)
1110{
1111    return asinacos(x, false, x.m_signexp >> 31);
1112}
1113
1114real atan(real const &x)
1115{
1116    /* Computing atan(x): we choose a different Taylor series depending on
1117     * the value of x to help with convergence.
1118     *
1119     * If |x| < 0.5 we evaluate atan(y) near 0:
1120     *  atan(y) = y - y^3/3 + y^5/5 - y^7/7 + y^9/9 ...
1121     *
1122     * If 0.5 <= |x| < 1.5 we evaluate atan(1+y) near 0:
1123     *  atan(1+y) = π/4 + y/(1*2^1) - y^2/(2*2^1) + y^3/(3*2^2)
1124     *                  - y^5/(5*2^3) + y^6/(6*2^3) - y^7/(7*2^4)
1125     *                  + y^9/(9*2^5) - y^10/(10*2^5) + y^11/(11*2^6) ...
1126     *
1127     * If 1.5 <= |x| < 2 we evaluate atan(sqrt(3)+2y) near 0:
1128     *  atan(sqrt(3)+2y) = π/3 + 1/2 y - sqrt(3)/2 y^2/2
1129     *                         + y^3/3 - sqrt(3)/2 y^4/4 + 1/2 y^5/5
1130     *                         - 1/2 y^7/7 + sqrt(3)/2 y^8/8
1131     *                         - y^9/9 + sqrt(3)/2 y^10/10 - 1/2 y^11/11
1132     *                         + 1/2 y^13/13 - sqrt(3)/2 y^14/14
1133     *                         + y^15/15 - sqrt(3)/2 y^16/16 + 1/2 y^17/17 ...
1134     *
1135     * If |x| >= 2 we evaluate atan(y) near +∞:
1136     *  atan(y) = π/2 - y^-1 + y^-3/3 - y^-5/5 + y^-7/7 - y^-9/9 ...
1137     */
1138    real absx = fabs(x);
1139
1140    if (absx < (real::R_1 >> 1))
1141    {
1142        real ret = x, xn = x, mx2 = -x * x;
1143        for (int i = 3; ; i += 2)
1144        {
1145            xn *= mx2;
1146            real newret = ret + xn / (real)i;
1147            if (newret == ret)
1148                break;
1149            ret = newret;
1150        }
1151        return ret;
1152    }
1153
1154    real ret = 0;
1155
1156    if (absx < (real::R_3 >> 1))
1157    {
1158        real y = real::R_1 - absx;
1159        real yn = y, my2 = -y * y;
1160        for (int i = 0; ; i += 2)
1161        {
1162            real newret = ret + ((yn / (real)(2 * i + 1)) >> (i + 1));
1163            yn *= y;
1164            newret += (yn / (real)(2 * i + 2)) >> (i + 1);
1165            yn *= y;
1166            newret += (yn / (real)(2 * i + 3)) >> (i + 2);
1167            if (newret == ret)
1168                break;
1169            ret = newret;
1170            yn *= my2;
1171        }
1172        ret = real::R_PI_4 - ret;
1173    }
1174    else if (absx < real::R_2)
1175    {
1176        real y = (absx - real::R_SQRT3) >> 1;
1177        real yn = y, my2 = -y * y;
1178        for (int i = 1; ; i += 6)
1179        {
1180            real newret = ret + ((yn / (real)i) >> 1);
1181            yn *= y;
1182            newret -= (real::R_SQRT3 >> 1) * yn / (real)(i + 1);
1183            yn *= y;
1184            newret += yn / (real)(i + 2);
1185            yn *= y;
1186            newret -= (real::R_SQRT3 >> 1) * yn / (real)(i + 3);
1187            yn *= y;
1188            newret += (yn / (real)(i + 4)) >> 1;
1189            if (newret == ret)
1190                break;
1191            ret = newret;
1192            yn *= my2;
1193        }
1194        ret = real::R_PI_3 + ret;
1195    }
1196    else
1197    {
1198        real y = re(absx);
1199        real yn = y, my2 = -y * y;
1200        ret = y;
1201        for (int i = 3; ; i += 2)
1202        {
1203            yn *= my2;
1204            real newret = ret + yn / (real)i;
1205            if (newret == ret)
1206                break;
1207            ret = newret;
1208        }
1209        ret = real::R_PI_2 - ret;
1210    }
1211
1212    /* Propagate sign */
1213    ret.m_signexp |= (x.m_signexp & 0x80000000u);
1214    return ret;
1215}
1216
1217real atan2(real const &y, real const &x)
1218{
1219    if (!y)
1220    {
1221        if ((x.m_signexp >> 31) == 0)
1222            return y;
1223        if (y.m_signexp >> 31)
1224            return -real::R_PI;
1225        return real::R_PI;
1226    }
1227
1228    if (!x)
1229    {
1230        if (y.m_signexp >> 31)
1231            return -real::R_PI;
1232        return real::R_PI;
1233    }
1234
1235    /* FIXME: handle the Inf and NaN cases */
1236    real z = y / x;
1237    real ret = atan(z);
1238    if (x < real::R_0)
1239        ret += (y > real::R_0) ? real::R_PI : -real::R_PI;
1240    return ret;
1241}
1242
1243void real::hexprint() const
1244{
1245    printf("%08x", m_signexp);
1246    for (int i = 0; i < BIGITS; i++)
1247        printf(" %08x", m_mantissa[i]);
1248    printf("\n");
1249}
1250
1251void real::print(int ndigits) const
1252{
1253    real x = *this;
1254
1255    if (x.m_signexp >> 31)
1256    {
1257        printf("-");
1258        x = -x;
1259    }
1260
1261    /* Normalise x so that mantissa is in [1..9.999] */
1262    int exponent = 0;
1263    if (x.m_signexp)
1264    {
1265        for (real div = R_1, newdiv; true; div = newdiv)
1266        {
1267            newdiv = div * R_10;
1268            if (x < newdiv)
1269            {
1270                x /= div;
1271                break;
1272            }
1273            exponent++;
1274        }
1275        for (real mul = 1, newx; true; mul *= R_10)
1276        {
1277            newx = x * mul;
1278            if (newx >= R_1)
1279            {
1280                x = newx;
1281                break;
1282            }
1283            exponent--;
1284        }
1285    }
1286
1287    /* Print digits */
1288    for (int i = 0; i < ndigits; i++)
1289    {
1290        int digit = (int)x;
1291        printf("%i", digit);
1292        if (i == 0)
1293            printf(".");
1294        x -= real(digit);
1295        x *= R_10;
1296    }
1297
1298    /* Print exponent information */
1299    if (exponent < 0)
1300        printf("e-%i", -exponent);
1301    else if (exponent > 0)
1302        printf("e+%i", exponent);
1303
1304    printf("\n");
1305}
1306
1307static real fast_pi()
1308{
1309    /* Approximate Pi using Machin's formula: 16*atan(1/5)-4*atan(1/239) */
1310    real ret = 0.0, x0 = 5.0, x1 = 239.0;
1311    real const m0 = -x0 * x0, m1 = -x1 * x1, r16 = 16.0, r4 = 4.0;
1312
1313    for (int i = 1; ; i += 2)
1314    {
1315        real newret = ret + r16 / (x0 * (real)i) - r4 / (x1 * (real)i);
1316        if (newret == ret)
1317            break;
1318        ret = newret;
1319        x0 *= m0;
1320        x1 *= m1;
1321    }
1322
1323    return ret;
1324}
1325
1326real const real::R_0        = (real)0.0;
1327real const real::R_1        = (real)1.0;
1328real const real::R_2        = (real)2.0;
1329real const real::R_3        = (real)3.0;
1330real const real::R_10       = (real)10.0;
1331
1332real const real::R_LN2      = fast_log(R_2);
1333real const real::R_LN10     = log(R_10);
1334real const real::R_LOG2E    = re(R_LN2);
1335real const real::R_LOG10E   = re(R_LN10);
1336real const real::R_E        = exp(R_1);
1337real const real::R_PI       = fast_pi();
1338real const real::R_PI_2     = R_PI >> 1;
1339real const real::R_PI_3     = R_PI / R_3;
1340real const real::R_PI_4     = R_PI >> 2;
1341real const real::R_1_PI     = re(R_PI);
1342real const real::R_2_PI     = R_1_PI << 1;
1343real const real::R_2_SQRTPI = re(sqrt(R_PI)) << 1;
1344real const real::R_SQRT2    = sqrt(R_2);
1345real const real::R_SQRT3    = sqrt(R_3);
1346real const real::R_SQRT1_2  = R_SQRT2 >> 1;
1347
1348} /* namespace lol */
1349
Note: See TracBrowser for help on using the repository browser.