-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcommon_kernels.cl
72 lines (59 loc) · 1.58 KB
/
common_kernels.cl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
/*common_kernels.cl*/
#include "real.cl"
/** Zero-initialization **/
__kernel void zero_memory(__global write_only real * result)
{
size_t i = get_global_id(0);
result[i] = 0;
}
/** Parallel L2-Norm **/
__kernel void L2Norm(__global read_only real * input,
__global write_only real * output)
{
int outPos = get_global_id(0);
output[outPos] = input[outPos]*input[outPos];
}
/** Parallel LInf-Norm **/
__kernel void LInfNorm(__global read_only real * input,
__global write_only real * output,
int inputSize,
int chunks)
{
int outPos = get_global_id(0);
real res = 0.0;
int base = outPos*chunks;
int end = min(inputSize,base+chunks);
for ( ;base < end;++base)
res = max(res,fabs(input[base]));
output[outPos] = res;
}
/** Parallel SumAll **/
__kernel void SumAll(__global read_only real * input,
__global write_only real * output,
int inputSize,
int chunks)
{
int outPos = get_global_id(0);
real res = 0.0;
int base = outPos*chunks;
int end = min(inputSize,base+chunks);
for ( ;base < end;++base)
res+= input[base];
output[outPos] = res;
}
/*** Parallel vector difference **/
__kernel void Diff(__global write_only real * output,
__global read_only real * a,
__global read_only real * b)
{
int idx = get_global_id(0);
output[idx] = a[idx]-b[idx];
}
/** Parallel mult by constant **/
__kernel void Mult(__global read_only real * input,
__global write_only real * output,
float m)
{
int outPos = get_global_id(0);
output[outPos] = input[outPos]*m;
}