10
10
11
11
#include < string.h>
12
12
13
- #include < CL /sycl.hpp>
13
+ #include < sycl /sycl.hpp>
14
14
#include < cstdlib>
15
15
#include < memory>
16
16
#include < thread>
24
24
#define B_VALUE 0 .256f
25
25
#define MAX_EPS 1 .0e-4f
26
26
27
+ static bool verbose = false ;
28
+
27
29
static float Check (const std::vector<float >& a, float value) {
28
30
assert (value > MAX_EPS);
29
31
@@ -86,7 +88,9 @@ static float RunAndCheck(sycl::queue queue, const std::vector<float>& a,
86
88
throw ;
87
89
}
88
90
89
- std::cout << " \t Matrix multiplication time: " << time << " sec" << std::endl;
91
+ if (verbose) {
92
+ std::cout << " \t Matrix multiplication time: " << time << " sec" << std::endl;
93
+ }
90
94
91
95
return Check (c, expected_result);
92
96
}
@@ -97,8 +101,10 @@ static void Compute(sycl::queue queue, const std::vector<float>& a,
97
101
float expected_result) {
98
102
for (unsigned i = 0 ; i < repeat_count; ++i) {
99
103
float eps = RunAndCheck (queue, a, b, c, size, expected_result);
100
- std::cout << " Results are " << ((eps < MAX_EPS) ? " " : " IN" )
104
+ if (verbose) {
105
+ std::cout << " Results are " << ((eps < MAX_EPS) ? " " : " IN" )
101
106
<< " CORRECT with accuracy: " << eps << std::endl;
107
+ }
102
108
}
103
109
}
104
110
@@ -122,45 +128,61 @@ const unsigned max_thread_count = 64;
122
128
const unsigned max_size = 8192 ;
123
129
const unsigned min_size = 32 ;
124
130
131
+ const unsigned default_size = 1024 ;
132
+ const unsigned default_thread_count = 2 ;
133
+ const unsigned default_repetition_per_thread = 4 ;
134
+
125
135
void Usage (const char * name) {
126
136
127
- std::cout << " Calculating floating point matrix multiply on gpu, submitting the work from many CPU threads\n " ;
128
- std::cout << name << " [ [number of threads, default=2, max=" << max_thread_count
129
- << " ], [matrix size, default=1024, max=" << max_size << " ], [repetition count, default=4]] \n " ;
137
+ std::cout << " Calculating floating point matrix multiply on gpu, submitting the work from many CPU threads\n "
138
+ << " Usage " << name << " [ options ]" << std::endl;
139
+ std::cout <<
140
+ " --threads [-t] integer " <<
141
+ " Threads number, default: " << default_thread_count << std::endl;
142
+ std::cout <<
143
+ " --size [-s] integer " <<
144
+ " Matrix size, default: " << default_size << std::endl;
145
+ std::cout <<
146
+ " --repeat [-r] integer " <<
147
+ " Repetition number per thread, default: " << default_repetition_per_thread << std::endl;
148
+ std::cout <<
149
+ " --verbose [-v] " <<
150
+ " Enable verbose mode to report the app progress, default: off" << std::endl;
130
151
}
131
152
132
153
int main (int argc, char * argv[]) {
133
154
134
155
int exit_code = EXIT_SUCCESS;
135
- unsigned thread_count = 2 ;
136
- unsigned repeat_count = 4 ;
137
- unsigned size = 1024 ;
138
-
139
- if (argc == 2 &&
140
- ( strcmp (argv[1 ], " -?" ) == 0 or strcmp (argv[1 ], " -h" ) == 0 or strcmp (argv[1 ], " --help" ) == 0 ) ){
141
- Usage (argv[0 ]);
142
- return EXIT_SUCCESS;
143
- }
156
+ unsigned thread_count = default_thread_count;
157
+ unsigned repeat_count = default_repetition_per_thread;
158
+ unsigned size = default_size;
144
159
145
160
try {
146
161
unsigned temp;
147
- if (argc > 1 ) {
148
- temp = std::stoul (argv[1 ]);
149
- thread_count = (temp < 1 ) ? 1 :
150
- (temp > max_thread_count) ? max_thread_count : temp;
151
- }
152
- if (argc > 2 ) {
153
- temp = std::stoul (argv[2 ]);
154
- size = (temp < min_size) ? min_size :
155
- (temp > max_size) ? max_size : temp;
156
- }
157
-
158
- if (argc > 3 ) {
159
- temp = std::stoul (argv[3 ]);
160
- repeat_count = (temp < 1 ) ? 1 : temp;
162
+ for (uint32_t i=1 ; i < argc; i++) {
163
+ if (strcmp (argv[i], " -s" ) == 0 || strcmp (argv[i], " --size" ) == 0 ){
164
+ i++;
165
+ temp = std::stoul (argv[i]);
166
+ size = (temp < min_size) ? min_size : (temp > max_size) ? max_size : temp;
167
+ } else if (strcmp (argv[i], " -t" ) == 0 || strcmp (argv[i], " --threads" ) == 0 ){
168
+ i++;
169
+ temp = std::stoul (argv[i]);
170
+ thread_count = (temp < 1 ) ? 1 : (temp > max_thread_count) ? max_thread_count : temp;
171
+ } else if (strcmp (argv[i], " -r" ) == 0 || strcmp (argv[i], " --repeat" ) == 0 ){
172
+ i++;
173
+ temp = std::stoul (argv[i]);
174
+ repeat_count = (temp < 1 ) ? 1 : temp;
175
+ } else if (strcmp (argv[i], " -v" ) == 0 || strcmp (argv[i], " --verbose" ) == 0 ){
176
+ // verbosity off makes minimal the sample self output -
177
+ // so profiling output won't be intermixed with the sample output
178
+ // and could be analyzed by tests
179
+ verbose = true ;
180
+ } else {
181
+ Usage (argv[0 ]);
182
+ return EXIT_SUCCESS;
183
+ }
161
184
}
162
185
}
163
-
164
186
catch (...) {
165
187
Usage (argv[0 ]);
166
188
return EXIT_FAILURE;
@@ -328,15 +350,17 @@ int main(int argc, char* argv[]) {
328
350
auto end = std::chrono::steady_clock::now ();
329
351
std::chrono::duration<float > time = end - start;
330
352
331
- std::cout << " \t -- Total execution time: " << time .count () << " sec" << std::endl;
353
+ if (verbose) {
354
+ std::cout << " \t -- Total execution time: " << time .count () << " sec" << std::endl;
355
+ }
332
356
};
333
357
334
358
std::cout << " DPC++ Matrix Multiplication (CPU threads: " << thread_count << " , matrix size: " << size << " x "
335
359
<< size << " , repeats: " << repeat_count << " times)" << std::endl;
336
360
std::cout << " Target device: "
337
361
<< queue.get_info <sycl::info::queue::device>()
338
362
.get_info <sycl::info::device::name>()
339
- << std::endl;
363
+ << std::endl << std::flush ;
340
364
341
365
std::vector<std::thread> the_threads;
342
366
for (unsigned i=0 ; i<thread_count; i++) {
0 commit comments