Skip to content

Commit 1e7c6c5

Browse files
author
Jessica Shi
committed
add rest of gpu schedules, make tweaks
1 parent 735be6f commit 1e7c6c5

File tree

3 files changed

+140
-61
lines changed

3 files changed

+140
-61
lines changed

codegen.html

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,7 @@ <h6 style="margin-bottom: 0px; margin-top: 18px">Input a tensor algebra expressi
151151
</button>
152152
</div>
153153
</div>
154-
<table class="mdl-data-table mdl-js-data-table" style="width: 100%;
155-
margin-bottom: 8px">
154+
<table class="mdl-data-table mdl-js-data-table" style="width: 100%; margin-bottom: 8px">
156155
<tbody id="tblSchedule">
157156
</tbody>
158157
</table>

javascripts/default-schedules.js

Lines changed: 109 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11

2-
var NNZ_PER_THREAD = 8;
3-
var WARP_SIZE = 32;
4-
var BLOCK_SIZE = 256;
5-
62
var default_CPU_schedules = {
73
spmv: [
84
{
@@ -11,7 +7,6 @@ var default_CPU_schedules = {
117
},
128
{
139
command: "reorder",
14-
numReordered: 3,
1510
parameters: ["i0", "i1", "j"]
1611
},
1712
{
@@ -35,7 +30,6 @@ var default_CPU_schedules = {
3530
},
3631
{
3732
command: "reorder",
38-
numReordered: 3,
3933
parameters: ["chunk", "fpos2", "k"]
4034
},
4135
{
@@ -46,12 +40,11 @@ var default_CPU_schedules = {
4640
mttkrp: [
4741
{
4842
command: "reorder",
49-
numReordered: 4,
5043
parameters: ["i", "k", "l", "j"]
5144
},
5245
{
5346
command: "precompute",
54-
parameters: ["j", "j", "B(i,k,l) * D(l,j)"]
47+
parameters: ["B(i,k,l) * D(l,j)", "j", "j"]
5548
},
5649
{
5750
command: "split",
@@ -64,6 +57,7 @@ var default_CPU_schedules = {
6457
]
6558
}
6659

60+
6761
var default_GPU_schedules = {
6862
spmv: [
6963
{
@@ -76,28 +70,78 @@ var default_GPU_schedules = {
7670
},
7771
{
7872
command: "split",
79-
parameters: ["fpos", "block", "fpos1", NNZ_PER_THREAD * BLOCK_SIZE]
73+
parameters: ["fpos", "block", "fpos1", 2048]
8074
},
8175
{
8276
command: "split",
83-
parameters: ["fpos1", "warp", "fpos2", NNZ_PER_THREAD * WARP_SIZE]
77+
parameters: ["fpos1", "warp", "fpos2", 216]
8478
},
8579
{
8680
command: "split",
87-
parameters: ["fpos2", "thread", "thr_nz", NNZ_PER_THREAD]
81+
parameters: ["fpos2", "thread", "thr_nz", 8]
8882
},
8983
{
9084
command: "reorder",
91-
numReordered: 4,
9285
parameters: ["block", "warp", "thread", "thr_nz"]
9386
},
9487
{
9588
command: "precompute",
96-
parameters: ["thr_nz", "thr_nz_pre", "A(i, j) * x(j)"]
89+
parameters: ["A(i, j) * x(j)", "thr_nz", "thr_nz_pre"]
90+
},
91+
{
92+
command: "unroll",
93+
parameters: ["thr_nz_pre", 8]
94+
},
95+
{
96+
command: "parallelize",
97+
parameters: ["block", "GPU Block", "Ignore Races"]
98+
},
99+
{
100+
command: "parallelize",
101+
parameters: ["warp", "GPU Warp", "Ignore Races"]
102+
},
103+
{
104+
command: "parallelize",
105+
parameters: ["thread", "GPU Thread", "Atomics"]
106+
}
107+
],
108+
add: [],
109+
ttv: [
110+
{
111+
command: "fuse",
112+
parameters: ["j", "k", "jk"]
113+
},
114+
{
115+
command: "fuse",
116+
parameters: ["i", "jk", "f"]
117+
},
118+
{
119+
command: "pos",
120+
parameters: ["f", "fpos", "B"]
121+
},
122+
{
123+
command: "split",
124+
parameters: ["fpos", "block", "fpos1", 2048]
125+
},
126+
{
127+
command: "split",
128+
parameters: ["fpos1", "warp", "fpos2", 256]
129+
},
130+
{
131+
command: "split",
132+
parameters: ["fpos2", "thread", "thr_nz", 8]
133+
},
134+
{
135+
command: "reorder",
136+
parameters: ["block", "warp", "thread", "thr_nz"]
137+
},
138+
{
139+
command: "precompute",
140+
parameters: ["B(i, j, k) * c(k)", "thr_nz", "thr_nz_pre"]
97141
},
98142
{
99143
command: "unroll",
100-
parameters: ["thr_nz_pre", NNZ_PER_THREAD]
144+
parameters: ["thr_nz_pre", 8]
101145
},
102146
{
103147
command: "parallelize",
@@ -111,5 +155,55 @@ var default_GPU_schedules = {
111155
command: "parallelize",
112156
parameters: ["thread", "GPU Thread", "Atomics"]
113157
}
114-
]
158+
],
159+
mttkrp: [
160+
{
161+
command: "reorder",
162+
parameters: ["i", "k", "l", "j"]
163+
},
164+
{
165+
command: "fuse",
166+
parameters: ["k", "l", "kl"]
167+
},
168+
{
169+
command: "fuse",
170+
parameters: ["i", "kl", "f"]
171+
},
172+
{
173+
command: "pos",
174+
parameters: ["f", "fpos", "B"]
175+
},
176+
{
177+
command: "split",
178+
parameters: ["fpos", "block", "fpos1", 128]
179+
},
180+
{
181+
command: "split",
182+
parameters: ["fpos1", "warp", "nnz", 16]
183+
},
184+
{
185+
command: "split",
186+
parameters: ["j", "dense_un", "thread", 32]
187+
},
188+
{
189+
command: "bound",
190+
parameters: ["dense_un", "dense_val", 1, "Max Exact"]
191+
},
192+
{
193+
command: "reorder",
194+
parameters: ["block", "warp", "dense_val", "thread", "nnz"]
195+
},
196+
{
197+
command: "parallelize",
198+
parameters: ["block", "GPU Block", "Ignore Races"]
199+
},
200+
{
201+
command: "parallelize",
202+
parameters: ["warp", "GPU Warp", "Ignore Races"]
203+
},
204+
{
205+
command: "parallelize",
206+
parameters: ["thread", "GPU Thread", "Atomics"]
207+
}
208+
],
115209
}

0 commit comments

Comments
 (0)