Changeset 896
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/core.h
r885 r896 16 16 #if !defined __LOL_CORE_H__ 17 17 #define __LOL_CORE_H__ 18 19 // CPU features 20 #if !defined __CELLOS_LV2__ 21 # define LOL_FEATURE_CHEAP_BRANCHES 22 #endif 18 23 19 24 // Base types -
trunk/src/trig.cpp
r891 r896 24 24 { 25 25 26 static const double PI = 3.14159265358979323846 ;27 static const double NEG_PI = -3.14159265358979323846 ;28 static const double PI_2 = PI / 2.0;29 static const double PI_4 = PI / 4.0;30 static const double INV_PI = 1.0 / PI;31 static const double ROOT3 = 1.732050807568877293527 ;26 static const double PI = 3.14159265358979323846264; 27 static const double NEG_PI = -3.14159265358979323846264; 28 static const double PI_2 = 1.57079632679489661923132; 29 static const double PI_4 = 0.785398163397448309615661; 30 static const double INV_PI = 0.318309886183790671537768; 31 static const double ROOT3 = 1.73205080756887729352745; 32 32 33 33 static const double ZERO = 0.0; … … 35 35 static const double NEG_ONE = -1.0; 36 36 static const double HALF = 0.5; 37 static const double QUARTER = 0.25; 37 38 static const double TWO = 2.0; 38 39 static const double VERY_SMALL_NUMBER = 0x1.0p-128; … … 41 42 42 43 /** sin Taylor series coefficients. */ 43 static const double SC[] 44 static const double SC[] = 44 45 { 45 46 -1.6449340668482264364724e-0, // pi^2/3! … … 53 54 }; 54 55 56 static const double CC[] = 57 { 58 -4.9348022005446793094172e-0, // pi^2/2! 59 +4.0587121264167682181850e-0, // pi^4/4! 60 -1.3352627688545894958753e-0, // pi^6/6! 61 +2.3533063035889320454188e-1, // pi^8/8! 62 -2.5806891390014060012598e-2, // pi^10/10! 63 +1.9295743094039230479033e-3, // pi^12/12! 64 -1.0463810492484570711802e-4, // pi^14/14! 65 +4.3030695870329470072978e-6, // pi^16/16! 66 }; 67 55 68 /* Custom intrinsics */ 56 69 #define INLINEATTR __attribute__((always_inline)) … … 168 181 { 169 182 double absx = lol_fabs(x * INV_PI); 183 184 /* If branches are cheap, skip the cycle count when |x| < π/4, 185 * and only do the Taylor series up to the required precision. */ 186 #if defined LOL_FEATURE_CHEAP_BRANCHES 187 if (absx < QUARTER) 188 { 189 /* Computing x^4 is one multiplication too many we do, but it helps 190 * interleave the Taylor series operations a lot better. */ 191 double x2 = absx * absx; 192 double x4 = x2 * x2; 193 double sub1 = SC[3] * x4 + SC[1]; 194 double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0]; 195 double taylor = (sub1 * x2 + sub2) * x2 + ONE; 196 return x * taylor; 197 } 198 #endif 199 200 /* Wrap |x| to the range [-1, 1] and keep track of the number of 201 * cycles required. If odd, we'll need to change the sign of the 202 * result. */ 203 #if defined __CELLOS_LV2__ 170 204 double sign = lol_fsel(x, PI, NEG_PI); 171 172 /* To compute sin(x) we build a Taylor series for |x|/pi wrapped to173 * the range [-1, 1]. We also switch the result sign if the number174 * of cycles is odd. */175 #if defined __CELLOS_LV2__176 205 double num_cycles = lol_round(absx); 177 206 double is_even = lol_trunc(num_cycles * HALF) - (num_cycles * HALF); 178 207 sign = lol_fsel(is_even, sign, -sign); 179 208 #else 209 double sign = (x >= 0.0) ? PI : NEG_PI; 180 210 double num_cycles = absx + TWO_EXP_52; 181 211 __asm__("" : "+m" (num_cycles)); num_cycles -= TWO_EXP_52; … … 188 218 sign *= is_even; 189 219 #endif 190 double norm_x = absx - num_cycles; 191 192 /* Computing x^4 is one multiplication too many we do, but it helps 193 * interleave the Taylor series operations a lot better. */ 194 double x2 = norm_x * norm_x; 220 absx -= num_cycles; 221 222 #if defined LOL_FEATURE_VERY_CHEAP_BRANCHES 223 if (lol_fabs(absx) > QUARTER) 224 { 225 sign = (x * absx >= 0.0) ? is_even : -is_even; 226 227 double k = HALF - lol_fabs(absx); 228 double x2 = k * k; 229 double x4 = x2 * x2; 230 double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1]; 231 double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; 232 double taylor = (sub1 * x2 + sub2) * x2 + ONE; 233 234 return taylor * sign; 235 } 236 #endif 237 238 double x2 = absx * absx; 195 239 double x4 = x2 * x2; 240 #if defined LOL_FEATURE_VERY_CHEAP_BRANCHES 241 double sub1 = SC[3] * x4 + SC[1]; 242 double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0]; 243 #else 196 244 double sub1 = ((SC[7] * x4 + SC[5]) * x4 + SC[3]) * x4 + SC[1]; 197 245 double sub2 = ((SC[6] * x4 + SC[4]) * x4 + SC[2]) * x4 + SC[0]; 246 #endif 198 247 double taylor = (sub1 * x2 + sub2) * x2 + ONE; 199 248 200 double result = norm_x * taylor; 201 return result * sign; 249 return absx * taylor * sign; 202 250 } 203 251
Note: See TracChangeset
for help on using the changeset viewer.