Changeset 896


Ignore:
Timestamp:
Sep 3, 2011, 7:45:16 PM (8 years ago)
Author:
sam
Message:

core: add a code shortcut for sin() on platforms that have cheap branches.

Location:
trunk/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/core.h

    r885 r896  
    1616#if !defined __LOL_CORE_H__
    1717#define __LOL_CORE_H__
     18
     19// CPU features
     20#if !defined __CELLOS_LV2__
     21#   define LOL_FEATURE_CHEAP_BRANCHES
     22#endif
    1823
    1924// Base types
  • trunk/src/trig.cpp

    r891 r896  
    2424{
    2525
    26 static const double PI     = 3.14159265358979323846;
    27 static const double NEG_PI = -3.14159265358979323846;
    28 static const double PI_2   = PI / 2.0;
    29 static const double PI_4   = PI / 4.0;
    30 static const double INV_PI = 1.0 / PI;
    31 static const double ROOT3  = 1.732050807568877293527;
     26static const double PI     = 3.14159265358979323846264;
     27static const double NEG_PI = -3.14159265358979323846264;
     28static const double PI_2   = 1.57079632679489661923132;
     29static const double PI_4   = 0.785398163397448309615661;
     30static const double INV_PI = 0.318309886183790671537768;
     31static const double ROOT3  = 1.73205080756887729352745;
    3232
    3333static const double ZERO    = 0.0;
     
    3535static const double NEG_ONE = -1.0;
    3636static const double HALF    = 0.5;
     37static const double QUARTER = 0.25;
    3738static const double TWO     = 2.0;
    3839static const double VERY_SMALL_NUMBER = 0x1.0p-128;
     
    4142
    4243/** sin Taylor series coefficients. */
    43 static const double SC[]    =
     44static const double SC[] =
    4445{
    4546    -1.6449340668482264364724e-0, // pi^2/3!
     
    5354};
    5455
     56static const double CC[] =
     57{
     58    -4.9348022005446793094172e-0, // pi^2/2!
     59    +4.0587121264167682181850e-0, // pi^4/4!
     60    -1.3352627688545894958753e-0, // pi^6/6!
     61    +2.3533063035889320454188e-1, // pi^8/8!
     62    -2.5806891390014060012598e-2, // pi^10/10!
     63    +1.9295743094039230479033e-3, // pi^12/12!
     64    -1.0463810492484570711802e-4, // pi^14/14!
     65    +4.3030695870329470072978e-6, // pi^16/16!
     66};
     67
    5568/* Custom intrinsics */
    5669#define INLINEATTR __attribute__((always_inline))
     
    168181{
    169182    double absx = lol_fabs(x * INV_PI);
     183
     184    /* If branches are cheap, skip the cycle count when |x| < π/4,
     185     * and only do the Taylor series up to the required precision. */
     186#if defined LOL_FEATURE_CHEAP_BRANCHES
     187    if (absx < QUARTER)
     188    {
     189        /* Computing x^4 is one multiplication too many we do, but it helps
     190         * interleave the Taylor series operations a lot better. */
     191        double x2 = absx * absx;
     192        double x4 = x2 * x2;
     193        double sub1 = SC[3] * x4 + SC[1];
     194        double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
     195        double taylor = (sub1 * x2 + sub2) * x2 + ONE;
     196        return x * taylor;
     197    }
     198#endif
     199
     200    /* Wrap |x| to the range [-1, 1] and keep track of the number of
     201     * cycles required. If odd, we'll need to change the sign of the
     202     * result. */
     203#if defined __CELLOS_LV2__
    170204    double sign = lol_fsel(x, PI, NEG_PI);
    171 
    172     /* To compute sin(x) we build a Taylor series for |x|/pi wrapped to
    173      * the range [-1, 1]. We also switch the result sign if the number
    174      * of cycles is odd. */
    175 #if defined __CELLOS_LV2__
    176205    double num_cycles = lol_round(absx);
    177206    double is_even = lol_trunc(num_cycles * HALF) - (num_cycles * HALF);
    178207    sign = lol_fsel(is_even, sign, -sign);
    179208#else
     209    double sign = (x >= 0.0) ? PI : NEG_PI;
    180210    double num_cycles = absx + TWO_EXP_52;
    181211    __asm__("" : "+m" (num_cycles)); num_cycles -= TWO_EXP_52;
     
    188218    sign *= is_even;
    189219#endif
    190     double norm_x = absx - num_cycles;
    191 
    192     /* Computing x^4 is one multiplication too many we do, but it helps
    193      * interleave the Taylor series operations a lot better. */
    194     double x2 = norm_x * norm_x;
     220    absx -= num_cycles;
     221
     222#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES
     223    if (lol_fabs(absx) > QUARTER)
     224    {
     225        sign = (x * absx >= 0.0) ? is_even : -is_even;
     226
     227        double k = HALF - lol_fabs(absx);
     228        double x2 = k * k;
     229        double x4 = x2 * x2;
     230        double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
     231        double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
     232        double taylor = (sub1 * x2 + sub2) * x2 + ONE;
     233
     234        return taylor * sign;
     235    }
     236#endif
     237
     238    double x2 = absx * absx;
    195239    double x4 = x2 * x2;
     240#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES
     241    double sub1 = SC[3] * x4 + SC[1];
     242    double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
     243#else
    196244    double sub1 = ((SC[7] * x4 + SC[5]) * x4 + SC[3]) * x4 + SC[1];
    197245    double sub2 = ((SC[6] * x4 + SC[4]) * x4 + SC[2]) * x4 + SC[0];
     246#endif
    198247    double taylor = (sub1 * x2 + sub2) * x2 + ONE;
    199248
    200     double result = norm_x * taylor;
    201     return result * sign;
     249    return absx * taylor * sign;
    202250}
    203251
Note: See TracChangeset for help on using the changeset viewer.