Changeset 897
- Timestamp:
- Sep 4, 2011, 12:05:57 AM (12 years ago)
- Location:
- trunk
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/core.h
r896 r897 18 18 19 19 // CPU features 20 #undef LOL_FEATURE_CHEAP_BRANCHES 21 #undef LOL_FEATURE_VERY_CHEAP_BRANCHES 22 20 23 #if !defined __CELLOS_LV2__ 21 24 # define LOL_FEATURE_CHEAP_BRANCHES -
trunk/src/trig.cpp
r896 r897 207 207 sign = lol_fsel(is_even, sign, -sign); 208 208 #else 209 double sign = (x >= 0.0) ? PI : NEG_PI;210 209 double num_cycles = absx + TWO_EXP_52; 211 210 __asm__("" : "+m" (num_cycles)); num_cycles -= TWO_EXP_52; … … 216 215 __asm__("" : "+m" (is_even)); 217 216 is_even -= TWO * num_cycles - ONE; 218 sign *= is_even;217 double sign = is_even; 219 218 #endif 220 219 absx -= num_cycles; 221 220 221 /* If branches are very cheap, we have the option to do the Taylor 222 * series at a much lower degree by splitting. */ 222 223 #if defined LOL_FEATURE_VERY_CHEAP_BRANCHES 223 224 if (lol_fabs(absx) > QUARTER) 224 225 { 225 sign = (x * absx >= 0.0) ? is_even : -is_even;226 227 double k= HALF - lol_fabs(absx);228 double x2 = k * k;226 sign = (x * absx >= 0.0) ? sign : -sign; 227 228 double x1 = HALF - lol_fabs(absx); 229 double x2 = x1 * x1; 229 230 double x4 = x2 * x2; 230 231 double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1]; … … 235 236 } 236 237 #endif 238 239 sign *= (x >= 0.0) ? PI : NEG_PI; 237 240 238 241 double x2 = absx * absx; … … 250 253 } 251 254 255 double lol_cos(double x) 256 { 257 double absx = lol_fabs(x * INV_PI); 258 259 #if defined LOL_FEATURE_CHEAP_BRANCHES 260 if (absx < QUARTER) 261 { 262 double x2 = absx * absx; 263 double x4 = x2 * x2; 264 double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1]; 265 double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; 266 double taylor = (sub1 * x2 + sub2) * x2 + ONE; 267 return taylor; 268 } 269 #endif 270 271 #if defined __CELLOS_LV2__ 272 double num_cycles = lol_round(absx); 273 double is_even = lol_trunc(num_cycles * HALF) - (num_cycles * HALF); 274 double sign = lol_fsel(is_even, ONE, NEG_ONE); 275 #else 276 double num_cycles = absx + TWO_EXP_52; 277 __asm__("" : "+m" (num_cycles)); num_cycles -= TWO_EXP_52; 278 279 double is_even = TWO * num_cycles - ONE; 280 __asm__("" : "+m" (is_even)); is_even += TWO_EXP_54; 281 __asm__("" : "+m" (is_even)); is_even -= TWO_EXP_54; 282 __asm__("" : "+m" (is_even)); 283 is_even -= TWO * num_cycles - ONE; 284 double sign = is_even; 285 #endif 286 absx -= num_cycles; 287 288 #if defined LOL_FEATURE_VERY_CHEAP_BRANCHES 289 if (lol_fabs(absx) > QUARTER) 290 { 291 double x1 = HALF - lol_fabs(absx); 292 double x2 = x1 * x1; 293 double x4 = x2 * x2; 294 double sub1 = (SC[3] * x4 + SC[1]) * x4 + ONE; 295 double sub2 = (SC[4] * x4 + SC[2]) * x4 + SC[0]; 296 double taylor = sub2 * x2 + sub1; 297 298 return x1 * taylor * sign * PI; 299 } 300 #endif 301 302 double x2 = absx * absx; 303 double x4 = x2 * x2; 304 #if defined LOL_FEATURE_VERY_CHEAP_BRANCHES 305 double sub1 = (CC[5] * x4 + CC[3]) * x4 + CC[1]; 306 double sub2 = (CC[4] * x4 + CC[2]) * x4 + CC[0]; 307 #else 308 double sub1 = ((CC[7] * x4 + CC[5]) * x4 + CC[3]) * x4 + CC[1]; 309 double sub2 = ((CC[6] * x4 + CC[4]) * x4 + CC[2]) * x4 + CC[0]; 310 #endif 311 double taylor = (sub1 * x2 + sub2) * x2 + ONE; 312 313 return taylor * sign; 314 } 315 252 316 } /* namespace lol */ 253 317 -
trunk/test/lol-bench.cpp
r890 r897 75 75 static void bench_trig(int mode) 76 76 { 77 float result[ 5] = { 0.0f };77 float result[7] = { 0.0f }; 78 78 Timer timer; 79 79 … … 128 128 result[3] += timer.GetMs(); 129 129 130 /* Fast cos */ 131 timer.GetMs(); 132 for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) 133 #if defined HAVE_FASTMATH_H 134 pf2[i] = f_cosf(pf[i]); 135 #else 136 pf2[i] = cosf(pf[i]); 137 #endif 138 result[4] += timer.GetMs(); 139 140 /* Lol cos */ 141 timer.GetMs(); 142 for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) 143 pf2[i] = lol_cos(pf[i]); 144 result[5] += timer.GetMs(); 145 130 146 /* Tan */ 131 147 timer.GetMs(); 132 148 for (size_t i = 0; i < TRIG_TABLE_SIZE; i++) 133 149 pf2[i] = __builtin_tanf(pf[i]); 134 result[ 4] += timer.GetMs();150 result[6] += timer.GetMs(); 135 151 } 136 152 … … 146 162 Log::Info("float = lol_sinf(float) %7.3f\n", result[2]); 147 163 Log::Info("float = cosf(float) %7.3f\n", result[3]); 148 Log::Info("float = tanf(float) %7.3f\n", result[4]); 164 Log::Info("float = fastcosf(float) %7.3f\n", result[4]); 165 Log::Info("float = lol_cosf(float) %7.3f\n", result[5]); 166 Log::Info("float = tanf(float) %7.3f\n", result[6]); 149 167 } 150 168 -
trunk/test/trig.cpp
r894 r897 55 55 CPPUNIT_ASSERT(fabs(a - b) <= fabs(f) * 1e-11); 56 56 } 57 58 for (int i = -10000; i < 10000; i++) 59 { 60 double f = (double)i * (1.0 / 1000.0); 61 double a = __builtin_cos(f); 62 double b = lol_cos(f); 63 CPPUNIT_ASSERT(fabs(a - b) <= fabs(f) * 1e-11); 64 } 65 66 for (int i = -10000; i < 10000; i++) 67 { 68 double f = (double)i * (1.0 / 100000.0); 69 double a = __builtin_cos(f); 70 double b = lol_cos(f); 71 CPPUNIT_ASSERT(fabs(a - b) <= fabs(f) * 1e-11); 72 } 57 73 } 58 74 };
Note: See TracChangeset
for help on using the changeset viewer.