1
1
2
- var NNZ_PER_THREAD = 8 ;
3
- var WARP_SIZE = 32 ;
4
- var BLOCK_SIZE = 256 ;
5
-
6
2
var default_CPU_schedules = {
7
3
spmv : [
8
4
{
@@ -11,7 +7,6 @@ var default_CPU_schedules = {
11
7
} ,
12
8
{
13
9
command : "reorder" ,
14
- numReordered : 3 ,
15
10
parameters : [ "i0" , "i1" , "j" ]
16
11
} ,
17
12
{
@@ -35,7 +30,6 @@ var default_CPU_schedules = {
35
30
} ,
36
31
{
37
32
command : "reorder" ,
38
- numReordered : 3 ,
39
33
parameters : [ "chunk" , "fpos2" , "k" ]
40
34
} ,
41
35
{
@@ -46,12 +40,11 @@ var default_CPU_schedules = {
46
40
mttkrp : [
47
41
{
48
42
command : "reorder" ,
49
- numReordered : 4 ,
50
43
parameters : [ "i" , "k" , "l" , "j" ]
51
44
} ,
52
45
{
53
46
command : "precompute" ,
54
- parameters : [ "j" , "j" , " B(i,k,l) * D(l,j)"]
47
+ parameters : [ "B(i,k,l) * D(l,j)" , "j" , "j "]
55
48
} ,
56
49
{
57
50
command : "split" ,
@@ -64,6 +57,7 @@ var default_CPU_schedules = {
64
57
]
65
58
}
66
59
60
+
67
61
var default_GPU_schedules = {
68
62
spmv : [
69
63
{
@@ -76,28 +70,78 @@ var default_GPU_schedules = {
76
70
} ,
77
71
{
78
72
command : "split" ,
79
- parameters : [ "fpos" , "block" , "fpos1" , NNZ_PER_THREAD * BLOCK_SIZE ]
73
+ parameters : [ "fpos" , "block" , "fpos1" , 2048 ]
80
74
} ,
81
75
{
82
76
command : "split" ,
83
- parameters : [ "fpos1" , "warp" , "fpos2" , NNZ_PER_THREAD * WARP_SIZE ]
77
+ parameters : [ "fpos1" , "warp" , "fpos2" , 216 ]
84
78
} ,
85
79
{
86
80
command : "split" ,
87
- parameters : [ "fpos2" , "thread" , "thr_nz" , NNZ_PER_THREAD ]
81
+ parameters : [ "fpos2" , "thread" , "thr_nz" , 8 ]
88
82
} ,
89
83
{
90
84
command : "reorder" ,
91
- numReordered : 4 ,
92
85
parameters : [ "block" , "warp" , "thread" , "thr_nz" ]
93
86
} ,
94
87
{
95
88
command : "precompute" ,
96
- parameters : [ "thr_nz" , "thr_nz_pre" , "A(i, j) * x(j)" ]
89
+ parameters : [ "A(i, j) * x(j)" , "thr_nz" , "thr_nz_pre" ]
90
+ } ,
91
+ {
92
+ command : "unroll" ,
93
+ parameters : [ "thr_nz_pre" , 8 ]
94
+ } ,
95
+ {
96
+ command : "parallelize" ,
97
+ parameters : [ "block" , "GPU Block" , "Ignore Races" ]
98
+ } ,
99
+ {
100
+ command : "parallelize" ,
101
+ parameters : [ "warp" , "GPU Warp" , "Ignore Races" ]
102
+ } ,
103
+ {
104
+ command : "parallelize" ,
105
+ parameters : [ "thread" , "GPU Thread" , "Atomics" ]
106
+ }
107
+ ] ,
108
+ add : [ ] ,
109
+ ttv : [
110
+ {
111
+ command : "fuse" ,
112
+ parameters : [ "j" , "k" , "jk" ]
113
+ } ,
114
+ {
115
+ command : "fuse" ,
116
+ parameters : [ "i" , "jk" , "f" ]
117
+ } ,
118
+ {
119
+ command : "pos" ,
120
+ parameters : [ "f" , "fpos" , "B" ]
121
+ } ,
122
+ {
123
+ command : "split" ,
124
+ parameters : [ "fpos" , "block" , "fpos1" , 2048 ]
125
+ } ,
126
+ {
127
+ command : "split" ,
128
+ parameters : [ "fpos1" , "warp" , "fpos2" , 256 ]
129
+ } ,
130
+ {
131
+ command : "split" ,
132
+ parameters : [ "fpos2" , "thread" , "thr_nz" , 8 ]
133
+ } ,
134
+ {
135
+ command : "reorder" ,
136
+ parameters : [ "block" , "warp" , "thread" , "thr_nz" ]
137
+ } ,
138
+ {
139
+ command : "precompute" ,
140
+ parameters : [ "B(i, j, k) * c(k)" , "thr_nz" , "thr_nz_pre" ]
97
141
} ,
98
142
{
99
143
command : "unroll" ,
100
- parameters : [ "thr_nz_pre" , NNZ_PER_THREAD ]
144
+ parameters : [ "thr_nz_pre" , 8 ]
101
145
} ,
102
146
{
103
147
command : "parallelize" ,
@@ -111,5 +155,55 @@ var default_GPU_schedules = {
111
155
command : "parallelize" ,
112
156
parameters : [ "thread" , "GPU Thread" , "Atomics" ]
113
157
}
114
- ]
158
+ ] ,
159
+ mttkrp : [
160
+ {
161
+ command : "reorder" ,
162
+ parameters : [ "i" , "k" , "l" , "j" ]
163
+ } ,
164
+ {
165
+ command : "fuse" ,
166
+ parameters : [ "k" , "l" , "kl" ]
167
+ } ,
168
+ {
169
+ command : "fuse" ,
170
+ parameters : [ "i" , "kl" , "f" ]
171
+ } ,
172
+ {
173
+ command : "pos" ,
174
+ parameters : [ "f" , "fpos" , "B" ]
175
+ } ,
176
+ {
177
+ command : "split" ,
178
+ parameters : [ "fpos" , "block" , "fpos1" , 128 ]
179
+ } ,
180
+ {
181
+ command : "split" ,
182
+ parameters : [ "fpos1" , "warp" , "nnz" , 16 ]
183
+ } ,
184
+ {
185
+ command : "split" ,
186
+ parameters : [ "j" , "dense_un" , "thread" , 32 ]
187
+ } ,
188
+ {
189
+ command : "bound" ,
190
+ parameters : [ "dense_un" , "dense_val" , 1 , "Max Exact" ]
191
+ } ,
192
+ {
193
+ command : "reorder" ,
194
+ parameters : [ "block" , "warp" , "dense_val" , "thread" , "nnz" ]
195
+ } ,
196
+ {
197
+ command : "parallelize" ,
198
+ parameters : [ "block" , "GPU Block" , "Ignore Races" ]
199
+ } ,
200
+ {
201
+ command : "parallelize" ,
202
+ parameters : [ "warp" , "GPU Warp" , "Ignore Races" ]
203
+ } ,
204
+ {
205
+ command : "parallelize" ,
206
+ parameters : [ "thread" , "GPU Thread" , "Atomics" ]
207
+ }
208
+ ] ,
115
209
}
0 commit comments