# Blog: Playing with the CPU pipeline: poly.cpp

File poly.cpp, 5.5 KB (added by sam, 7 years ago)
Line
1#include <chrono>
2#include <cmath>
3#include <iostream>
4#include <iomanip>
5#include <cassert>
6
7using namespace std;
8
9double sin1(double x) __attribute__((noinline));
10double sin2(double x) __attribute__((noinline));
11double sin3(double x) __attribute__((noinline));
12double sin4(double x) __attribute__((noinline));
13double sin5(double x) __attribute__((noinline));
14double sin6(double x) __attribute__((noinline));
15double sin7(double x) __attribute__((noinline));
16
17static double const a0 = +1.0;
18static double const a1 = -1.666666666666580809419428987894207e-1;
19static double const a2 = +8.333333333262716094425037738346873e-3;
20static double const a3 = -1.984126982005911439283646346964929e-4;
21static double const a4 = +2.755731607338689220657382272783309e-6;
22static double const a5 = -2.505185130214293595900283001271652e-8;
23static double const a6 = +1.604729591825977403374012010065495e-10;
24static double const a7 = -7.364589573262279913270651228486670e-13;
25
26double sin1(double x)
27{
28    return x * a0
29         + x * x * x * a1
30         + x * x * x * x * x * a2
31         + x * x * x * x * x * x * x * a3
32         + x * x * x * x * x * x * x * x * x * a4
33         + x * x * x * x * x * x * x * x * x * x * x * a5
34         + x * x * x * x * x * x * x * x * x * x * x * x * x * a6
35         + x * x * x * x * x * x * x * x * x * x * x * x * x * x * x * a7;
36}
37
38double sin2(double x)
39{
40    double ret = 0.0;
41    double y = x;
42    double x2 = x * x;
43    ret += a0 * y; y *= x2;
44    ret += a1 * y; y *= x2;
45    ret += a2 * y; y *= x2;
46    ret += a3 * y; y *= x2;
47    ret += a4 * y; y *= x2;
48    ret += a5 * y; y *= x2;
49    ret += a6 * y; y *= x2;
50    ret += a7 * y;
51    return ret;
52}
53
54double sin3(double x)
55{
56    double x2 = x * x;
57    return x * (a0 + x2 * (a1 + x2 * (a2 + x2 * (a3 + x2 * (a4 + x2 * (a5 + x2 * (a6 + x2 * a7)))))));
58}
59
60double sin4(double x)
61{
62    double x2 = x * x;
63    double x4 = x2 * x2;
64    double A = a0 + x4 * (a2 + x4 * (a4 + x4 * a6));
65    double B = a1 + x4 * (a3 + x4 * (a5 + x4 * a7));
66    return x * (A + x2 * B);
67}
68
69double sin5(double x)
70{
71    double x2 = x * x;
72    double x4 = x2 * x2;
73    double x6 = x4 * x2;
74    double A = a0 + x6 * (a3 + x6 * a6);
75    double B = a1 + x6 * (a4 + x6 * a7);
76    double C = a2 + x6 * a5;
77    return x * (A + x2 * B + x4 * C);
78}
79
80double sin6(double x)
81{
82    double x2 = x * x;
83    double x4 = x2 * x2;
84    double x8 = x4 * x4;
85    double A = a0 + x2 * (a1 + x2 * (a2 + x2 * a3));
86    double B = a4 + x2 * (a5 + x2 * (a6 + x2 * a7));
87    return x * (A + x8 * B);
88}
89
90double sin7(double x)
91{
92    double x2 = x * x;
93    double x3 = x2 * x;
94    double x4 = x2 * x2;
95    double x8 = x4 * x4;
96    double x9 = x8 * x;
97    __asm__("" : "+x" (x2), "+x" (x3), "+x" (x4), "+x" (x8), "+x" (x9));
98    double A = x3 * (a1 + x2 * (a2 + x2 * a3));
99    double B = a4 + x2 * (a5 + x2 * (a6 + x2 * a7));
100    double C = a0 * x;
101    return A + C + x9 * B;
102}
103
104int main()
105{
106    typedef chrono::high_resolution_clock clock_t;
107    clock_t::time_point t0, t1;
108    size_t const iterations = 10000000;
109    double const inv = 1.0 / iterations;
110
111    double sum = 0.0;
112    t0 = clock_t::now();
113    for (size_t run = 0; run < iterations; run++)
114        sum += run * inv;
115    t1 = clock_t::now();
116    double norm = chrono::nanoseconds(t1 - t0).count() * inv;
117
118    double sum0 = 0.0;
119    t0 = clock_t::now();
120    for (size_t run = 0; run < iterations; run++)
121        sum0 += sin(run * inv);
122    t1 = clock_t::now();
123    cout << "sin: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;
124
125    double sum1 = 0.0;
126    t0 = clock_t::now();
127    for (size_t run = 0; run < iterations; run++)
128        sum1 += sin1(run * inv);
129    t1 = clock_t::now();
130    cout << "sin1: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;
131
132    double sum2 = 0.0;
133    t0 = clock_t::now();
134    for (size_t run = 0; run < iterations; run++)
135        sum2 += sin2(run * inv);
136    t1 = clock_t::now();
137    cout << "sin2: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;
138
139    double sum3 = 0.0;
140    t0 = clock_t::now();
141    for (size_t run = 0; run < iterations; run++)
142        sum3 += sin3(run * inv);
143    t1 = clock_t::now();
144    cout << "sin3: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;
145
146    double sum4 = 0.0;
147    t0 = clock_t::now();
148    for (size_t run = 0; run < iterations; run++)
149        sum4 += sin4(run * inv);
150    t1 = clock_t::now();
151    cout << "sin4: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;
152
153    double sum5 = 0.0;
154    t0 = clock_t::now();
155    for (size_t run = 0; run < iterations; run++)
156        sum5 += sin5(run * inv);
157    t1 = clock_t::now();
158    cout << "sin5: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;
159
160    double sum6 = 0.0;
161    t0 = clock_t::now();
162    for (size_t run = 0; run < iterations; run++)
163        sum6 += sin6(run * inv);
164    t1 = clock_t::now();
165    cout << "sin6: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;
166
167    double sum7 = 0.0;
168    t0 = clock_t::now();
169    for (size_t run = 0; run < iterations; run++)
170        sum7 += sin7(run * inv);
171    t1 = clock_t::now();
172    cout << "sin7: " << chrono::nanoseconds(t1 - t0).count() * inv - norm << " ns" << endl;
173
174    cout << setprecision(20);
175    cout << sum0 << endl;
176    cout << sum1 << endl;
177    cout << sum2 << endl;
178    cout << sum3 << endl;
179    cout << sum4 << endl;
180    cout << sum5 << endl;
181    cout << sum6 << endl;
182    cout << sum7 << endl;
183
184    return sum + sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 == 0.0;
185}
186