|
6 | 6 | #include "arch.hpp"
|
7 | 7 | #include "../../global.hpp"
|
8 | 8 |
|
9 |
| -#include "sandy_bridge.hpp" |
10 |
| -#include "ivy_bridge.hpp" |
11 |
| -#include "haswell.hpp" |
12 |
| -#include "skylake_128.hpp" |
13 |
| -#include "skylake_256.hpp" |
14 |
| -#include "skylake_512.hpp" |
15 |
| -#include "broadwell.hpp" |
16 |
| -#include "cannon_lake_256.hpp" |
17 |
| -#include "cannon_lake_512.hpp" |
18 |
| -#include "ice_lake.hpp" |
19 |
| -#include "knl.hpp" |
20 |
| -#include "zen.hpp" |
21 |
| -#include "zen2.hpp" |
22 |
| - |
23 |
| -struct benchmark_cpu { |
24 |
| - int n_threads; |
25 |
| - double gflops; |
26 |
| - const char* name; |
27 |
| - bench_type benchmark_type; |
28 |
| - void (*compute_function_128)(__m128 *farr_ptr, __m128, int); |
29 |
| - void (*compute_function_256)(__m256 *farr_ptr, __m256, int); |
30 |
| - void (*compute_function_512)(__m512 *farr_ptr, __m512, int); |
31 |
| -}; |
32 |
| - |
33 |
| -enum { |
34 |
| - BENCH_128_8, |
35 |
| - BENCH_256_6_NOFMA, |
36 |
| - BENCH_256_5, |
37 |
| - BENCH_256_8, |
38 |
| - BENCH_256_10, |
39 |
| - BENCH_512_8, |
40 |
| - BENCH_512_12, |
41 |
| -}; |
42 |
| - |
43 |
| -static const char *bench_name[] = { |
44 |
| - /*[BENCH_TYPE_SANDY_BRIDGE] = */ "Sandy Bridge (AVX)", |
45 |
| - /*[BENCH_TYPE_IVY_BRIDGE] = */ "Ivy Bridge (AVX)", |
46 |
| - /*[BENCH_TYPE_HASWELL] = */ "Haswell (AVX2)", |
47 |
| - /*[BENCH_TYPE_BROADWELL] = */ "Broadwell (AVX2)", |
48 |
| - /*[BENCH_TYPE_SKYLAKE_256] = */ "Skylake (SSE)", |
49 |
| - /*[BENCH_TYPE_SKYLAKE_256] = */ "Skylake (AVX2)", |
50 |
| - /*[BENCH_TYPE_SKYLAKE_512] = */ "Skylake (AVX512)", |
51 |
| - /*[BENCH_TYPE_KABY_LAKE] = */ "Kaby Lake (AVX2)", |
52 |
| - /*[BENCH_TYPE_COFFE_LAKE] = */ "Coffe Lake (AVX2)", |
53 |
| - /*[BENCH_TYPE_COMET_LAKE] = */ "Comet Lake (AVX2)", |
54 |
| - /*[BENCH_TYPE_ICE_LAKE] = */ "Ice Lake (AVX2)", |
55 |
| - /*[BENCH_TYPE_TIGER_LAKE] = */ "Tiger Lake (AVX2)", |
56 |
| - /*[BENCH_TYPE_KNIGHTS_LANDING] = */ "Knights Landing (AVX512)", |
57 |
| - /*[BENCH_TYPE_ZEN] = */ "Zen (AVX2)", |
58 |
| - /*[BENCH_TYPE_ZEN_PLUS] = */ "Zen+ (AVX2)", |
59 |
| - /*[BENCH_TYPE_ZEN2] = */ "Zen 2 (AVX2)", |
60 |
| -}; |
61 |
| - |
62 |
| -static const char *bench_types_str[] = { |
63 |
| - /*[BENCH_TYPE_SANDY_BRIDGE] = */ "sandy_bridge", |
64 |
| - /*[BENCH_TYPE_IVY_BRIDGE] = */ "ivy_bridge", |
65 |
| - /*[BENCH_TYPE_HASWELL] = */ "haswell", |
66 |
| - /*[BENCH_TYPE_BROADWELL] = */ "broadwell", |
67 |
| - /*[BENCH_TYPE_SKYLAKE_256] = */ "skylake_128", |
68 |
| - /*[BENCH_TYPE_SKYLAKE_256] = */ "skylake_256", |
69 |
| - /*[BENCH_TYPE_SKYLAKE_512] = */ "skylake_512", |
70 |
| - /*[BENCH_TYPE_KABY_LAKE] = */ "kaby_lake", |
71 |
| - /*[BENCH_TYPE_COFFE_LAKE] = */ "coffe_lake", |
72 |
| - /*[BENCH_TYPE_COMET_LAKE] = */ "comet_lake", |
73 |
| - /*[BENCH_TYPE_ICE_LAKE] = */ "ice_lake", |
74 |
| - /*[BENCH_TYPE_TIGER_LAKE] = */ "tiger_lake", |
75 |
| - /*[BENCH_TYPE_KNIGHTS_LANDING] = */ "knights_landing", |
76 |
| - /*[BENCH_TYPE_ZEN] = */ "zen", |
77 |
| - /*[BENCH_TYPE_ZEN_PLUS] = */ "zen_plus", |
78 |
| - /*[BENCH_TYPE_ZEN2] = */ "zen2", |
79 |
| -}; |
80 |
| - |
81 | 9 | bench_type parse_benchmark_cpu(char* str) {
|
82 | 10 | int len = sizeof(bench_types_str) / sizeof(bench_types_str[0]);
|
83 | 11 | for(bench_type t = 0; t < len; t++) {
|
@@ -176,82 +104,12 @@ double compute_gflops(int n_threads, char bench) {
|
176 | 104 | * - Zen 2 -> zen2
|
177 | 105 | */
|
178 | 106 | bool select_benchmark(struct benchmark_cpu* bench) {
|
179 |
| - bench->compute_function_128 = NULL; |
180 |
| - bench->compute_function_256 = NULL; |
181 |
| - bench->compute_function_512 = NULL; |
182 |
| - |
183 |
| - switch(bench->benchmark_type) { |
184 |
| - case BENCH_TYPE_SANDY_BRIDGE: |
185 |
| - bench->compute_function_256 = compute_sandy_bridge; |
186 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_6_NOFMA); |
187 |
| - break; |
188 |
| - case BENCH_TYPE_IVY_BRIDGE: |
189 |
| - bench->compute_function_256 = compute_ivy_bridge; |
190 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_6_NOFMA); |
191 |
| - break; |
192 |
| - case BENCH_TYPE_HASWELL: |
193 |
| - bench->compute_function_256 = compute_haswell; |
194 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_10); |
195 |
| - break; |
196 |
| - case BENCH_TYPE_SKYLAKE_512: |
197 |
| - bench->compute_function_512 = compute_skylake_512; |
198 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_512_8); |
199 |
| - break; |
200 |
| - case BENCH_TYPE_SKYLAKE_256: |
201 |
| - bench->compute_function_256 = compute_skylake_256; |
202 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_8); |
203 |
| - break; |
204 |
| - case BENCH_TYPE_SKYLAKE_128: |
205 |
| - bench->compute_function_128 = compute_skylake_128; |
206 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_128_8); |
207 |
| - break; |
208 |
| - case BENCH_TYPE_BROADWELL: |
209 |
| - bench->compute_function_256 = compute_broadwell; |
210 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_8); |
211 |
| - break; |
212 |
| - case BENCH_TYPE_KABY_LAKE: |
213 |
| - bench->compute_function_256 = compute_skylake_256; |
214 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_8); |
215 |
| - break; |
216 |
| - case BENCH_TYPE_COFFE_LAKE: |
217 |
| - bench->compute_function_256 = compute_skylake_256; |
218 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_8); |
219 |
| - break; |
220 |
| - case BENCH_TYPE_COMET_LAKE: |
221 |
| - bench->compute_function_256 = compute_skylake_256; |
222 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_8); |
223 |
| - break; |
224 |
| - case BENCH_TYPE_ICE_LAKE: |
225 |
| - bench->compute_function_256 = compute_ice_lake; |
226 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_8); |
227 |
| - break; |
228 |
| - case BENCH_TYPE_TIGER_LAKE: |
229 |
| - bench->compute_function_256 = compute_ice_lake; |
230 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_8); |
231 |
| - break; |
232 |
| - case BENCH_TYPE_KNIGHTS_LANDING: |
233 |
| - bench->compute_function_512 = compute_knl; |
234 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_512_12); |
235 |
| - break; |
236 |
| - case BENCH_TYPE_ZEN: |
237 |
| - bench->compute_function_256 = compute_zen; |
238 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_5); |
239 |
| - break; |
240 |
| - case BENCH_TYPE_ZEN_PLUS: |
241 |
| - bench->compute_function_256 = compute_zen; |
242 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_5); |
243 |
| - break; |
244 |
| - case BENCH_TYPE_ZEN2: |
245 |
| - bench->compute_function_256 = compute_zen2; |
246 |
| - bench->gflops = compute_gflops(bench->n_threads, BENCH_256_10); |
247 |
| - break; |
248 |
| - default: |
249 |
| - printErr("No valid benchmark! (bench: %d)", bench->benchmark_type); |
250 |
| - return false; |
251 |
| - } |
252 |
| - |
253 |
| - bench->name = bench_name[bench->benchmark_type]; |
254 |
| - return true; |
| 107 | + if(bench->benchmark_type == BENCH_TYPE_SKYLAKE_128) |
| 108 | + return select_benchmark_sse(bench); |
| 109 | + else if(bench->benchmark_type == BENCH_TYPE_SKYLAKE_512 || bench->benchmark_type == BENCH_TYPE_KNIGHTS_LANDING) |
| 110 | + return select_benchmark_avx512(bench); |
| 111 | + else |
| 112 | + return select_benchmark_avx(bench); |
255 | 113 | }
|
256 | 114 |
|
257 | 115 | struct benchmark_cpu* init_benchmark_cpu(struct cpu* cpu, int n_threads, char *bench_type_str) {
|
@@ -351,39 +209,13 @@ struct benchmark_cpu* init_benchmark_cpu(struct cpu* cpu, int n_threads, char *b
|
351 | 209 | return NULL;
|
352 | 210 | }
|
353 | 211 |
|
354 |
| -bool compute_cpu (struct benchmark_cpu* bench, double* e_time) { |
355 |
| - struct timeval t1, t2; |
356 |
| - gettimeofday(&t1, NULL); |
357 |
| - |
358 |
| - if(bench->benchmark_type == BENCH_TYPE_SKYLAKE_512 || bench->benchmark_type == BENCH_TYPE_KNIGHTS_LANDING) { |
359 |
| - __m512 mult = {0}; |
360 |
| - __m512 *farr_ptr = NULL; |
361 |
| - |
362 |
| - #pragma omp parallel for |
363 |
| - for(int t=0; t < bench->n_threads; t++) |
364 |
| - bench->compute_function_512(farr_ptr, mult, t); |
365 |
| - } |
366 |
| - else if(bench->benchmark_type == BENCH_TYPE_SKYLAKE_128) { |
367 |
| - __m128 mult = {0}; |
368 |
| - __m128 *farr_ptr = NULL; |
369 |
| - |
370 |
| - #pragma omp parallel for |
371 |
| - for(int t=0; t < bench->n_threads; t++) |
372 |
| - bench->compute_function_128(farr_ptr, mult, t); |
373 |
| - } |
374 |
| - else { |
375 |
| - __m256 mult = {0}; |
376 |
| - __m256 *farr_ptr = NULL; |
377 |
| - |
378 |
| - #pragma omp parallel for |
379 |
| - for(int t=0; t < bench->n_threads; t++) |
380 |
| - bench->compute_function_256(farr_ptr, mult, t); |
381 |
| - } |
382 |
| - |
383 |
| - gettimeofday(&t2, NULL); |
384 |
| - *e_time = (double)((t2.tv_sec-t1.tv_sec)*1000000 + t2.tv_usec-t1.tv_usec)/1000000; |
385 |
| - |
386 |
| - return true; |
| 212 | +bool compute_cpu(struct benchmark_cpu* bench, double* e_time) { |
| 213 | + if(bench->benchmark_type == BENCH_TYPE_SKYLAKE_128) |
| 214 | + return compute_cpu_sse(bench, e_time); |
| 215 | + else if(bench->benchmark_type == BENCH_TYPE_SKYLAKE_512 || bench->benchmark_type == BENCH_TYPE_KNIGHTS_LANDING) |
| 216 | + return compute_cpu_avx512(bench, e_time); |
| 217 | + else |
| 218 | + return compute_cpu_avx(bench, e_time); |
387 | 219 | }
|
388 | 220 |
|
389 | 221 | double get_gflops_cpu(struct benchmark_cpu* bench) {
|
|
0 commit comments