Changeset 897


Ignore:
Timestamp:
Sep 4, 2011, 12:05:57 AM (8 years ago)
Author:
sam
Message:

core: implement accelerated cos().

Location:
trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/core.h

    r896 r897  
    1818
    1919// CPU features
     20#undef LOL_FEATURE_CHEAP_BRANCHES
     21#undef LOL_FEATURE_VERY_CHEAP_BRANCHES
     22
    2023#if !defined __CELLOS_LV2__
    2124#   define LOL_FEATURE_CHEAP_BRANCHES
  • trunk/src/trig.cpp

    r896 r897  
    207207    sign = lol_fsel(is_even, sign, -sign);
    208208#else
    209     double sign = (x >= 0.0) ? PI : NEG_PI;
    210209    double num_cycles = absx + TWO_EXP_52;
    211210    __asm__("" : "+m" (num_cycles)); num_cycles -= TWO_EXP_52;
     
    216215    __asm__("" : "+m" (is_even));
    217216    is_even -= TWO * num_cycles - ONE;
    218     sign *= is_even;
     217    double sign = is_even;
    219218#endif
    220219    absx -= num_cycles;
    221220
     221    /* If branches are very cheap, we have the option to do the Taylor
     222     * series at a much lower degree by splitting. */
    222223#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES
    223224    if (lol_fabs(absx) > QUARTER)
    224225    {
    225         sign = (x * absx >= 0.0) ? is_even : -is_even;
    226 
    227         double k = HALF - lol_fabs(absx);
    228         double x2 = k * k;
     226        sign = (x * absx >= 0.0) ? sign : -sign;
     227
     228        double x1 = HALF - lol_fabs(absx);
     229        double x2 = x1 * x1;
    229230        double x4 = x2 * x2;
    230231        double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
     
    235236    }
    236237#endif
     238
     239    sign *= (x >= 0.0) ? PI : NEG_PI;
    237240
    238241    double x2 = absx * absx;
     
    250253}
    251254
     255double lol_cos(double x)
     256{
     257    double absx = lol_fabs(x * INV_PI);
     258
     259#if defined LOL_FEATURE_CHEAP_BRANCHES
     260    if (absx < QUARTER)
     261    {
     262        double x2 = absx * absx;
     263        double x4 = x2 * x2;
     264        double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
     265        double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
     266        double taylor = (sub1 * x2 + sub2) * x2 + ONE;
     267        return taylor;
     268    }
     269#endif
     270
     271#if defined __CELLOS_LV2__
     272    double num_cycles = lol_round(absx);
     273    double is_even = lol_trunc(num_cycles * HALF) - (num_cycles * HALF);
     274    double sign = lol_fsel(is_even, ONE, NEG_ONE);
     275#else
     276    double num_cycles = absx + TWO_EXP_52;
     277    __asm__("" : "+m" (num_cycles)); num_cycles -= TWO_EXP_52;
     278
     279    double is_even = TWO * num_cycles - ONE;
     280    __asm__("" : "+m" (is_even)); is_even += TWO_EXP_54;
     281    __asm__("" : "+m" (is_even)); is_even -= TWO_EXP_54;
     282    __asm__("" : "+m" (is_even));
     283    is_even -= TWO * num_cycles - ONE;
     284    double sign = is_even;
     285#endif
     286    absx -= num_cycles;
     287
     288#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES
     289    if (lol_fabs(absx) > QUARTER)
     290    {
     291        double x1 = HALF - lol_fabs(absx);
     292        double x2 = x1 * x1;
     293        double x4 = x2 * x2;
     294        double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE;
     295        double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0];
     296        double taylor = sub2 * x2 + sub1;
     297
     298        return x1 * taylor * sign * PI;
     299    }
     300#endif
     301
     302    double x2 = absx * absx;
     303    double x4 = x2 * x2;
     304#if defined LOL_FEATURE_VERY_CHEAP_BRANCHES
     305    double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1];
     306    double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0];
     307#else
     308    double sub1 = ((CC[7] * x4 + CC[5]) * x4 + CC[3]) * x4 + CC[1];
     309    double sub2 = ((CC[6] * x4 + CC[4]) * x4 + CC[2]) * x4 + CC[0];
     310#endif
     311    double taylor = (sub1 * x2 + sub2) * x2 + ONE;
     312
     313    return taylor * sign;
     314}
     315
    252316} /* namespace lol */
    253317
  • trunk/test/lol-bench.cpp

    r890 r897  
    7575static void bench_trig(int mode)
    7676{
    77     float result[5] = { 0.0f };
     77    float result[7] = { 0.0f };
    7878    Timer timer;
    7979
     
    128128        result[3] += timer.GetMs();
    129129
     130        /* Fast cos */
     131        timer.GetMs();
     132        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
     133#if defined HAVE_FASTMATH_H
     134            pf2[i] = f_cosf(pf[i]);
     135#else
     136            pf2[i] = cosf(pf[i]);
     137#endif
     138        result[4] += timer.GetMs();
     139
     140        /* Lol cos */
     141        timer.GetMs();
     142        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
     143            pf2[i] = lol_cos(pf[i]);
     144        result[5] += timer.GetMs();
     145
    130146        /* Tan */
    131147        timer.GetMs();
    132148        for (size_t i = 0; i < TRIG_TABLE_SIZE; i++)
    133149            pf2[i] = __builtin_tanf(pf[i]);
    134         result[4] += timer.GetMs();
     150        result[6] += timer.GetMs();
    135151    }
    136152
     
    146162    Log::Info("float = lol_sinf(float)  %7.3f\n", result[2]);
    147163    Log::Info("float = cosf(float)      %7.3f\n", result[3]);
    148     Log::Info("float = tanf(float)      %7.3f\n", result[4]);
     164    Log::Info("float = fastcosf(float)  %7.3f\n", result[4]);
     165    Log::Info("float = lol_cosf(float)  %7.3f\n", result[5]);
     166    Log::Info("float = tanf(float)      %7.3f\n", result[6]);
    149167}
    150168
  • trunk/test/trig.cpp

    r894 r897  
    5555            CPPUNIT_ASSERT(fabs(a - b) <= fabs(f) * 1e-11);
    5656        }
     57
     58        for (int i = -10000; i < 10000; i++)
     59        {
     60            double f = (double)i * (1.0 / 1000.0);
     61            double a = __builtin_cos(f);
     62            double b = lol_cos(f);
     63            CPPUNIT_ASSERT(fabs(a - b) <= fabs(f) * 1e-11);
     64        }
     65
     66        for (int i = -10000; i < 10000; i++)
     67        {
     68            double f = (double)i * (1.0 / 100000.0);
     69            double a = __builtin_cos(f);
     70            double b = lol_cos(f);
     71            CPPUNIT_ASSERT(fabs(a - b) <= fabs(f) * 1e-11);
     72        }
    5773    }
    5874};
Note: See TracChangeset for help on using the changeset viewer.