@@ -35,6 +35,23 @@ __global__ void k(A<T, N> a, T* b, const int iter) {
35
35
template __global__ void k (A<int , num_elements>, int *, const int );
36
36
37
37
38
+ template <typename T, int N>
39
+ __attribute ((noinline))
40
+ __device__ void k_const_aggregate_callee(const A<T, N> a, T* b, const int iter) {
41
+ b[0 ] = static_cast <T>(0 );
42
+ int idx = blockIdx.x % N;
43
+ for (int i = 0 ; i < iter; ++i) {
44
+ b[0 ] += a.a [idx];
45
+ idx = (idx == N-1 ) ? 0 : idx + 1 ;
46
+ }
47
+ }
48
+ template <typename T, int N>
49
+ __global__ void k_const_aggregate_caller (const A<T, N> a, T* b, const int iter) {
50
+ k_const_aggregate_callee (a, b, iter);
51
+ }
52
+ template __global__ void k_const_aggregate_caller (const A<int , num_elements>, int *, const int );
53
+
54
+
38
55
#endif
39
56
40
57
template <typename T, int N>
@@ -48,3 +65,15 @@ void host_k_const_aggregate(const A<T, N> a, T* b, const int blockidx, const int
48
65
}
49
66
template void host_k_const_aggregate (const A<int , num_elements>, int *, const int , const int );
50
67
68
+
69
+ template <typename T, int N>
70
+ void host_k (A<T, N> a, T* b, const int blockidx, const int iter) {
71
+ b[0 ] = static_cast <T>(0 );
72
+ int idx = blockidx;
73
+ for (int i = 0 ; i < iter; ++i) {
74
+ b[0 ] += a.a [idx];
75
+ idx = (idx == N-1 ) ? 0 : idx + 1 ;
76
+ }
77
+ }
78
+ template void host_k (A<int , num_elements>, int *, const int , const int );
79
+
0 commit comments