1 | #include <chrono> |
---|
2 | #include <cmath> |
---|
3 | #include <iostream> |
---|
4 | #include <iomanip> |
---|
5 | #include <cassert> |
---|
6 | |
---|
7 | using namespace std; |
---|
8 | |
---|
9 | double sin1(double x) __attribute__((noinline)); |
---|
10 | double sin2(double x) __attribute__((noinline)); |
---|
11 | double sin3(double x) __attribute__((noinline)); |
---|
12 | double sin4(double x) __attribute__((noinline)); |
---|
13 | double sin5(double x) __attribute__((noinline)); |
---|
14 | double sin6(double x) __attribute__((noinline)); |
---|
15 | double sin7(double x) __attribute__((noinline)); |
---|
16 | |
---|
17 | static double const a0 = +1.0; |
---|
18 | static double const a1 = -1.666666666666580809419428987894207e-1; |
---|
19 | static double const a2 = +8.333333333262716094425037738346873e-3; |
---|
20 | static double const a3 = -1.984126982005911439283646346964929e-4; |
---|
21 | static double const a4 = +2.755731607338689220657382272783309e-6; |
---|
22 | static double const a5 = -2.505185130214293595900283001271652e-8; |
---|
23 | static double const a6 = +1.604729591825977403374012010065495e-10; |
---|
24 | static double const a7 = -7.364589573262279913270651228486670e-13; |
---|
25 | |
---|
26 | double sin1(double x) |
---|
27 | { |
---|
28 | return x * a0 |
---|
29 | + x * x * x * a1 |
---|
30 | + x * x * x * x * x * a2 |
---|
31 | + x * x * x * x * x * x * x * a3 |
---|
32 | + x * x * x * x * x * x * x * x * x * a4 |
---|
33 | + x * x * x * x * x * x * x * x * x * x * x * a5 |
---|
34 | + x * x * x * x * x * x * x * x * x * x * x * x * x * a6 |
---|
35 | + x * x * x * x * x * x * x * x * x * x * x * x * x * x * x * a7; |
---|
36 | } |
---|
37 | |
---|
38 | double sin2(double x) |
---|
39 | { |
---|
40 | double ret = 0.0; |
---|
41 | double y = x; |
---|
42 | double x2 = x * x; |
---|
43 | ret += a0 * y; y *= x2; |
---|
44 | ret += a1 * y; y *= x2; |
---|
45 | ret += a2 * y; y *= x2; |
---|
46 | ret += a3 * y; y *= x2; |
---|
47 | ret += a4 * y; y *= x2; |
---|
48 | ret += a5 * y; y *= x2; |
---|
49 | ret += a6 * y; y *= x2; |
---|
50 | ret += a7 * y; |
---|
51 | return ret; |
---|
52 | } |
---|
53 | |
---|
54 | double sin3(double x) |
---|
55 | { |
---|
56 | double x2 = x * x; |
---|
57 | return x * (a0 + x2 * (a1 + x2 * (a2 + x2 * (a3 + x2 * (a4 + x2 * (a5 + x2 * (a6 + x2 * a7))))))); |
---|
58 | } |
---|
59 | |
---|
60 | double sin4(double x) |
---|
61 | { |
---|
62 | double x2 = x * x; |
---|
63 | double x4 = x2 * x2; |
---|
64 | double A = a0 + x4 * (a2 + x4 * (a4 + x4 * a6)); |
---|
65 | double B = a1 + x4 * (a3 + x4 * (a5 + x4 * a7)); |
---|
66 | return x * (A + x2 * B); |
---|
67 | } |
---|
68 | |
---|
69 | double sin5(double x) |
---|
70 | { |
---|
71 | double x2 = x * x; |
---|
72 | double x4 = x2 * x2; |
---|
73 | double x6 = x4 * x2; |
---|
74 | double A = a0 + x6 * (a3 + x6 * a6); |
---|
75 | double B = a1 + x6 * (a4 + x6 * a7); |
---|
76 | double C = a2 + x6 * a5; |
---|
77 | return x * (A + x2 * B + x4 * C); |
---|
78 | } |
---|
79 | |
---|
80 | double sin6(double x) |
---|
81 | { |
---|
82 | double x2 = x * x; |
---|
83 | double x4 = x2 * x2; |
---|
84 | double x8 = x4 * x4; |
---|
85 | double A = a0 + x2 * (a1 + x2 * (a2 + x2 * a3)); |
---|
86 | double B = a4 + x2 * (a5 + x2 * (a6 + x2 * a7)); |
---|
87 | return x * (A + x8 * B); |
---|
88 | } |
---|
89 | |
---|
90 | double sin7(double x) |
---|
91 | { |
---|
92 | double x2 = x * x; |
---|
93 | double x3 = x2 * x; |
---|
94 | double x4 = x2 * x2; |
---|
95 | double x8 = x4 * x4; |
---|
96 | double x9 = x8 * x; |
---|
97 | __asm__("" : "+x" (x2), "+x" (x3), "+x" (x4), "+x" (x8), "+x" (x9)); |
---|
98 | double A = x3 * (a1 + x2 * (a2 + x2 * a3)); |
---|
99 | double B = a4 + x2 * (a5 + x2 * (a6 + x2 * a7)); |
---|
100 | double C = a0 * x; |
---|
101 | return A + C + x9 * B; |
---|
102 | } |
---|
103 | |
---|
104 | int main() |
---|
105 | { |
---|
106 | typedef chrono::high_resolution_clock clock_t; |
---|
107 | clock_t::time_point t0, t1; |
---|
108 | size_t const iterations = 10000000; |
---|
109 | double const inv = 1.0 / iterations; |
---|
110 | |
---|
111 | double sum = 0.0; |
---|
112 | t0 = clock_t::now(); |
---|
113 | for (size_t run = 0; run < iterations; run++) |
---|
114 | sum += run * inv; |
---|
115 | t1 = clock_t::now(); |
---|
116 | double norm = chrono::nanoseconds(t1 - t0).count() * inv; |
---|
117 | |
---|
118 | double sum0 = 0.0; |
---|
119 | t0 = clock_t::now(); |
---|
120 | for (size_t run = 0; run < iterations; run++) |
---|
121 | sum0 += sin(run * inv); |
---|
122 | t1 = clock_t::now(); |
---|
123 | cout << "sin: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl; |
---|
124 | |
---|
125 | double sum1 = 0.0; |
---|
126 | t0 = clock_t::now(); |
---|
127 | for (size_t run = 0; run < iterations; run++) |
---|
128 | sum1 += sin1(run * inv); |
---|
129 | t1 = clock_t::now(); |
---|
130 | cout << "sin1: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl; |
---|
131 | |
---|
132 | double sum2 = 0.0; |
---|
133 | t0 = clock_t::now(); |
---|
134 | for (size_t run = 0; run < iterations; run++) |
---|
135 | sum2 += sin2(run * inv); |
---|
136 | t1 = clock_t::now(); |
---|
137 | cout << "sin2: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl; |
---|
138 | |
---|
139 | double sum3 = 0.0; |
---|
140 | t0 = clock_t::now(); |
---|
141 | for (size_t run = 0; run < iterations; run++) |
---|
142 | sum3 += sin3(run * inv); |
---|
143 | t1 = clock_t::now(); |
---|
144 | cout << "sin3: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl; |
---|
145 | |
---|
146 | double sum4 = 0.0; |
---|
147 | t0 = clock_t::now(); |
---|
148 | for (size_t run = 0; run < iterations; run++) |
---|
149 | sum4 += sin4(run * inv); |
---|
150 | t1 = clock_t::now(); |
---|
151 | cout << "sin4: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl; |
---|
152 | |
---|
153 | double sum5 = 0.0; |
---|
154 | t0 = clock_t::now(); |
---|
155 | for (size_t run = 0; run < iterations; run++) |
---|
156 | sum5 += sin5(run * inv); |
---|
157 | t1 = clock_t::now(); |
---|
158 | cout << "sin5: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl; |
---|
159 | |
---|
160 | double sum6 = 0.0; |
---|
161 | t0 = clock_t::now(); |
---|
162 | for (size_t run = 0; run < iterations; run++) |
---|
163 | sum6 += sin6(run * inv); |
---|
164 | t1 = clock_t::now(); |
---|
165 | cout << "sin6: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl; |
---|
166 | |
---|
167 | double sum7 = 0.0; |
---|
168 | t0 = clock_t::now(); |
---|
169 | for (size_t run = 0; run < iterations; run++) |
---|
170 | sum7 += sin7(run * inv); |
---|
171 | t1 = clock_t::now(); |
---|
172 | cout << "sin7: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl; |
---|
173 | |
---|
174 | cout << setprecision(20); |
---|
175 | cout << sum0 << endl; |
---|
176 | cout << sum1 << endl; |
---|
177 | cout << sum2 << endl; |
---|
178 | cout << sum3 << endl; |
---|
179 | cout << sum4 << endl; |
---|
180 | cout << sum5 << endl; |
---|
181 | cout << sum6 << endl; |
---|
182 | cout << sum7 << endl; |
---|
183 | |
---|
184 | return sum + sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 == 0.0; |
---|
185 | } |
---|
186 | |
---|