-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgema.cve
48 lines (42 loc) · 1.18 KB
/
gema.cve
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#define UNROLL_FACTOR 6
#include <ve_kernel.h>
__ve_kernel__
void
gema_op_d(
const double * A,
const double * B,
double * C)
{
const int ve_lvl = 256;
const int pg_off = __pg_ix * 256 * 256;
/* re-computing these adresses everytime is expensive! */
const double * my_A = A + pg_off;
const double * my_B = B + pg_off;
double * my_C = C + pg_off;
__vr acc_A[UNROLL_FACTOR];
__vr acc_B[UNROLL_FACTOR];
__vr acc_C[UNROLL_FACTOR];
#pragma unroll
for(int i = 0; i < 256; i += UNROLL_FACTOR)
{
#pragma unroll (UNROLL_FACTOR)
for(int ii = 0; ii < UNROLL_FACTOR; ++ii)
{
if(i + ii < 256)
{
acc_A[ii] = _vel_vld_vssl(8, my_A + (i + ii) * 256, ve_lvl);
acc_B[ii] = _vel_vld_vssl(8, my_B + (i + ii) * 256, ve_lvl);
}
}
#pragma unroll (UNROLL_FACTOR)
for(int ii = 0; ii < UNROLL_FACTOR; ++ii)
{
if(i + ii < 256)
{
_vel_vst_vssl(
_vel_vfaddd_vvvl(acc_A[ii], acc_B[ii], ve_lvl),
8, my_C + (i + ii) * 256, ve_lvl);
}
}
}
}