Skip to content

Commit faa3868

Browse files
committed
Updated web tool example code
1 parent 22b8ffb commit faa3868

8 files changed

+124
-59
lines changed

examples/mttkrp_full.c

+7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
#include <math.h>
1010
#include <complex.h>
1111
#include <string.h>
12+
#if _OPENMP
13+
#include <omp.h>
14+
#endif
1215
#define TACO_MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
1316
#define TACO_MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
1417
#define TACO_DEREF(_a) (((___context___*)(*__ctx__))->_a)
@@ -26,6 +29,10 @@ typedef struct {
2629
int32_t vals_size; // values array size
2730
} taco_tensor_t;
2831
#endif
32+
#if !_OPENMP
33+
int omp_get_thread_num() { return 0; }
34+
int omp_get_max_threads() { return 1; }
35+
#endif
2936
int cmp(const void *a, const void *b) {
3037
return *((const int*)a) - *((const int*)b);
3138
}

examples/spadd_full.c

+7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
#include <math.h>
1010
#include <complex.h>
1111
#include <string.h>
12+
#if _OPENMP
13+
#include <omp.h>
14+
#endif
1215
#define TACO_MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
1316
#define TACO_MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
1417
#define TACO_DEREF(_a) (((___context___*)(*__ctx__))->_a)
@@ -26,6 +29,10 @@ typedef struct {
2629
int32_t vals_size; // values array size
2730
} taco_tensor_t;
2831
#endif
32+
#if !_OPENMP
33+
int omp_get_thread_num() { return 0; }
34+
int omp_get_max_threads() { return 1; }
35+
#endif
2936
int cmp(const void *a, const void *b) {
3037
return *((const int*)a) - *((const int*)b);
3138
}

examples/spgemm_assembly.c

+18-10
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,15 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
1717
int32_t* restrict A2_nnz = 0;
1818
A2_nnz = (int32_t*)malloc(sizeof(int32_t) * B1_dimension);
1919

20+
int32_t* restrict qworkspace_index_list_all = 0;
21+
qworkspace_index_list_all = (int32_t*)malloc(sizeof(int32_t) * (C2_dimension * omp_get_max_threads()));
22+
bool* restrict qworkspace_already_set_all = calloc((C2_dimension * omp_get_max_threads()), sizeof(bool));
23+
2024
#pragma omp parallel for schedule(runtime)
2125
for (int32_t i = 0; i < B1_dimension; i++) {
2226
int32_t qworkspace_index_list_size = 0;
23-
int32_t* restrict qworkspace_index_list = 0;
24-
qworkspace_index_list = (int32_t*)malloc(sizeof(int32_t) * C2_dimension);
25-
bool* restrict qworkspace_already_set = calloc(C2_dimension, sizeof(bool));
27+
int32_t* restrict qworkspace_index_list = qworkspace_index_list_all + C2_dimension * omp_get_thread_num();
28+
bool* restrict qworkspace_already_set = qworkspace_already_set_all + C2_dimension * omp_get_thread_num();
2629
for (int32_t kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) {
2730
int32_t k = B2_crd[kB];
2831
for (int32_t jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) {
@@ -41,10 +44,11 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
4144
qworkspace_already_set[j] = 0;
4245
}
4346
A2_nnz[i] = tjA2_nnz_val;
44-
free(qworkspace_index_list);
45-
free(qworkspace_already_set);
4647
}
4748

49+
free(qworkspace_index_list_all);
50+
free(qworkspace_already_set_all);
51+
4852
A2_pos = (int32_t*)malloc(sizeof(int32_t) * (A1_dimension + 1));
4953
A2_pos[0] = 0;
5054
for (int32_t i = 0; i < A1_dimension; i++) {
@@ -53,12 +57,15 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
5357
A2_crd = (int32_t*)malloc(sizeof(int32_t) * A2_pos[A1_dimension]);
5458
A_vals = (double*)malloc(sizeof(double) * A2_pos[A1_dimension]);
5559

60+
int32_t* restrict workspace_index_list_all = 0;
61+
workspace_index_list_all = (int32_t*)malloc(sizeof(int32_t) * (C2_dimension * omp_get_max_threads()));
62+
bool* restrict workspace_already_set_all = calloc((C2_dimension * omp_get_max_threads()), sizeof(bool));
63+
5664
#pragma omp parallel for schedule(runtime)
5765
for (int32_t i = 0; i < B1_dimension; i++) {
5866
int32_t workspace_index_list_size = 0;
59-
int32_t* restrict workspace_index_list = 0;
60-
workspace_index_list = (int32_t*)malloc(sizeof(int32_t) * C2_dimension);
61-
bool* restrict workspace_already_set = calloc(C2_dimension, sizeof(bool));
67+
int32_t* restrict workspace_index_list = workspace_index_list_all + C2_dimension * omp_get_thread_num();
68+
bool* restrict workspace_already_set = workspace_already_set_all + C2_dimension * omp_get_thread_num();
6269
for (int32_t kB0 = B2_pos[i]; kB0 < B2_pos[(i + 1)]; kB0++) {
6370
int32_t k = B2_crd[kB0];
6471
for (int32_t jC0 = C2_pos[k]; jC0 < C2_pos[(k + 1)]; jC0++) {
@@ -79,10 +86,11 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
7986
A2_crd[pA2] = j;
8087
workspace_already_set[j] = 0;
8188
}
82-
free(workspace_index_list);
83-
free(workspace_already_set);
8489
}
8590

91+
free(workspace_index_list_all);
92+
free(workspace_already_set_all);
93+
8694
for (int32_t p = 0; p < A1_dimension; p++) {
8795
A2_pos[A1_dimension - p] = A2_pos[((A1_dimension - p) - 1)];
8896
}

examples/spgemm_compute.c

+13-8
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,18 @@ int compute(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
1515
int* restrict C2_crd = (int*)(C->indices[1][1]);
1616
double* restrict C_vals = (double*)(C->vals);
1717

18+
double* restrict workspace_all = 0;
19+
int32_t* restrict workspace_index_list_all = 0;
20+
workspace_index_list_all = (int32_t*)malloc(sizeof(int32_t) * (C2_dimension * omp_get_max_threads()));
21+
bool* restrict workspace_already_set_all = calloc((C2_dimension * omp_get_max_threads()), sizeof(bool));
22+
workspace_all = (double*)malloc(sizeof(double) * (C2_dimension * omp_get_max_threads()));
23+
1824
#pragma omp parallel for schedule(runtime)
1925
for (int32_t i = 0; i < B1_dimension; i++) {
2026
int32_t workspace_index_list_size = 0;
21-
double* restrict workspace = 0;
22-
int32_t* restrict workspace_index_list = 0;
23-
workspace_index_list = (int32_t*)malloc(sizeof(int32_t) * C2_dimension);
24-
bool* restrict workspace_already_set = calloc(C2_dimension, sizeof(bool));
25-
workspace = (double*)malloc(sizeof(double) * C2_dimension);
27+
double* restrict workspace = workspace_all + C2_dimension * omp_get_thread_num();
28+
int32_t* restrict workspace_index_list = workspace_index_list_all + C2_dimension * omp_get_thread_num();
29+
bool* restrict workspace_already_set = workspace_already_set_all + C2_dimension * omp_get_thread_num();
2630
for (int32_t kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) {
2731
int32_t k = B2_crd[kB];
2832
for (int32_t jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) {
@@ -46,11 +50,12 @@ int compute(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
4650
A_vals[pA2] = workspace[j];
4751
workspace_already_set[j] = 0;
4852
}
49-
free(workspace_index_list);
50-
free(workspace_already_set);
51-
free(workspace);
5253
}
5354

55+
free(workspace_index_list_all);
56+
free(workspace_already_set_all);
57+
free(workspace_all);
58+
5459
for (int32_t p = 0; p < A1_dimension; p++) {
5560
A2_pos[A1_dimension - p] = A2_pos[((A1_dimension - p) - 1)];
5661
}

examples/spgemm_full.c

+60-31
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
#include <math.h>
1010
#include <complex.h>
1111
#include <string.h>
12+
#if _OPENMP
13+
#include <omp.h>
14+
#endif
1215
#define TACO_MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
1316
#define TACO_MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
1417
#define TACO_DEREF(_a) (((___context___*)(*__ctx__))->_a)
@@ -26,6 +29,10 @@ typedef struct {
2629
int32_t vals_size; // values array size
2730
} taco_tensor_t;
2831
#endif
32+
#if !_OPENMP
33+
int omp_get_thread_num() { return 0; }
34+
int omp_get_max_threads() { return 1; }
35+
#endif
2936
int cmp(const void *a, const void *b) {
3037
return *((const int*)a) - *((const int*)b);
3138
}
@@ -122,14 +129,18 @@ int compute(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
122129
int* restrict C2_crd = (int*)(C->indices[1][1]);
123130
double* restrict C_vals = (double*)(C->vals);
124131

132+
double* restrict workspace_all = 0;
133+
int32_t* restrict workspace_index_list_all = 0;
134+
workspace_index_list_all = (int32_t*)malloc(sizeof(int32_t) * (C2_dimension * omp_get_max_threads()));
135+
bool* restrict workspace_already_set_all = calloc((C2_dimension * omp_get_max_threads()), sizeof(bool));
136+
workspace_all = (double*)malloc(sizeof(double) * (C2_dimension * omp_get_max_threads()));
137+
125138
#pragma omp parallel for schedule(runtime)
126139
for (int32_t i = 0; i < B1_dimension; i++) {
127140
int32_t workspace_index_list_size = 0;
128-
double* restrict workspace = 0;
129-
int32_t* restrict workspace_index_list = 0;
130-
workspace_index_list = (int32_t*)malloc(sizeof(int32_t) * C2_dimension);
131-
bool* restrict workspace_already_set = calloc(C2_dimension, sizeof(bool));
132-
workspace = (double*)malloc(sizeof(double) * C2_dimension);
141+
double* restrict workspace = workspace_all + C2_dimension * omp_get_thread_num();
142+
int32_t* restrict workspace_index_list = workspace_index_list_all + C2_dimension * omp_get_thread_num();
143+
bool* restrict workspace_already_set = workspace_already_set_all + C2_dimension * omp_get_thread_num();
133144
for (int32_t kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) {
134145
int32_t k = B2_crd[kB];
135146
for (int32_t jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) {
@@ -153,11 +164,12 @@ int compute(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
153164
A_vals[pA2] = workspace[j];
154165
workspace_already_set[j] = 0;
155166
}
156-
free(workspace_index_list);
157-
free(workspace_already_set);
158-
free(workspace);
159167
}
160168

169+
free(workspace_index_list_all);
170+
free(workspace_already_set_all);
171+
free(workspace_all);
172+
161173
for (int32_t p = 0; p < A1_dimension; p++) {
162174
A2_pos[A1_dimension - p] = A2_pos[((A1_dimension - p) - 1)];
163175
}
@@ -184,12 +196,15 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
184196
int32_t* restrict A2_nnz = 0;
185197
A2_nnz = (int32_t*)malloc(sizeof(int32_t) * B1_dimension);
186198

199+
int32_t* restrict qworkspace_index_list_all = 0;
200+
qworkspace_index_list_all = (int32_t*)malloc(sizeof(int32_t) * (C2_dimension * omp_get_max_threads()));
201+
bool* restrict qworkspace_already_set_all = calloc((C2_dimension * omp_get_max_threads()), sizeof(bool));
202+
187203
#pragma omp parallel for schedule(runtime)
188204
for (int32_t i = 0; i < B1_dimension; i++) {
189205
int32_t qworkspace_index_list_size = 0;
190-
int32_t* restrict qworkspace_index_list = 0;
191-
qworkspace_index_list = (int32_t*)malloc(sizeof(int32_t) * C2_dimension);
192-
bool* restrict qworkspace_already_set = calloc(C2_dimension, sizeof(bool));
206+
int32_t* restrict qworkspace_index_list = qworkspace_index_list_all + C2_dimension * omp_get_thread_num();
207+
bool* restrict qworkspace_already_set = qworkspace_already_set_all + C2_dimension * omp_get_thread_num();
193208
for (int32_t kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) {
194209
int32_t k = B2_crd[kB];
195210
for (int32_t jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) {
@@ -208,10 +223,11 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
208223
qworkspace_already_set[j] = 0;
209224
}
210225
A2_nnz[i] = tjA2_nnz_val;
211-
free(qworkspace_index_list);
212-
free(qworkspace_already_set);
213226
}
214227

228+
free(qworkspace_index_list_all);
229+
free(qworkspace_already_set_all);
230+
215231
A2_pos = (int32_t*)malloc(sizeof(int32_t) * (A1_dimension + 1));
216232
A2_pos[0] = 0;
217233
for (int32_t i = 0; i < A1_dimension; i++) {
@@ -220,12 +236,15 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
220236
A2_crd = (int32_t*)malloc(sizeof(int32_t) * A2_pos[A1_dimension]);
221237
A_vals = (double*)malloc(sizeof(double) * A2_pos[A1_dimension]);
222238

239+
int32_t* restrict workspace_index_list_all = 0;
240+
workspace_index_list_all = (int32_t*)malloc(sizeof(int32_t) * (C2_dimension * omp_get_max_threads()));
241+
bool* restrict workspace_already_set_all = calloc((C2_dimension * omp_get_max_threads()), sizeof(bool));
242+
223243
#pragma omp parallel for schedule(runtime)
224244
for (int32_t i = 0; i < B1_dimension; i++) {
225245
int32_t workspace_index_list_size = 0;
226-
int32_t* restrict workspace_index_list = 0;
227-
workspace_index_list = (int32_t*)malloc(sizeof(int32_t) * C2_dimension);
228-
bool* restrict workspace_already_set = calloc(C2_dimension, sizeof(bool));
246+
int32_t* restrict workspace_index_list = workspace_index_list_all + C2_dimension * omp_get_thread_num();
247+
bool* restrict workspace_already_set = workspace_already_set_all + C2_dimension * omp_get_thread_num();
229248
for (int32_t kB0 = B2_pos[i]; kB0 < B2_pos[(i + 1)]; kB0++) {
230249
int32_t k = B2_crd[kB0];
231250
for (int32_t jC0 = C2_pos[k]; jC0 < C2_pos[(k + 1)]; jC0++) {
@@ -246,10 +265,11 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
246265
A2_crd[pA2] = j;
247266
workspace_already_set[j] = 0;
248267
}
249-
free(workspace_index_list);
250-
free(workspace_already_set);
251268
}
252269

270+
free(workspace_index_list_all);
271+
free(workspace_already_set_all);
272+
253273
for (int32_t p = 0; p < A1_dimension; p++) {
254274
A2_pos[A1_dimension - p] = A2_pos[((A1_dimension - p) - 1)];
255275
}
@@ -281,12 +301,15 @@ int evaluate(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
281301
int32_t* restrict A2_nnz = 0;
282302
A2_nnz = (int32_t*)malloc(sizeof(int32_t) * B1_dimension);
283303

304+
int32_t* restrict qworkspace_index_list_all = 0;
305+
qworkspace_index_list_all = (int32_t*)malloc(sizeof(int32_t) * (C2_dimension * omp_get_max_threads()));
306+
bool* restrict qworkspace_already_set_all = calloc((C2_dimension * omp_get_max_threads()), sizeof(bool));
307+
284308
#pragma omp parallel for schedule(runtime)
285309
for (int32_t i = 0; i < B1_dimension; i++) {
286310
int32_t qworkspace_index_list_size = 0;
287-
int32_t* restrict qworkspace_index_list = 0;
288-
qworkspace_index_list = (int32_t*)malloc(sizeof(int32_t) * C2_dimension);
289-
bool* restrict qworkspace_already_set = calloc(C2_dimension, sizeof(bool));
311+
int32_t* restrict qworkspace_index_list = qworkspace_index_list_all + C2_dimension * omp_get_thread_num();
312+
bool* restrict qworkspace_already_set = qworkspace_already_set_all + C2_dimension * omp_get_thread_num();
290313
for (int32_t kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) {
291314
int32_t k = B2_crd[kB];
292315
for (int32_t jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) {
@@ -305,10 +328,11 @@ int evaluate(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
305328
qworkspace_already_set[j] = 0;
306329
}
307330
A2_nnz[i] = tjA2_nnz_val;
308-
free(qworkspace_index_list);
309-
free(qworkspace_already_set);
310331
}
311332

333+
free(qworkspace_index_list_all);
334+
free(qworkspace_already_set_all);
335+
312336
A2_pos = (int32_t*)malloc(sizeof(int32_t) * (A1_dimension + 1));
313337
A2_pos[0] = 0;
314338
for (int32_t i = 0; i < A1_dimension; i++) {
@@ -317,14 +341,18 @@ int evaluate(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
317341
A2_crd = (int32_t*)malloc(sizeof(int32_t) * A2_pos[A1_dimension]);
318342
A_vals = (double*)malloc(sizeof(double) * A2_pos[A1_dimension]);
319343

344+
double* restrict workspace_all = 0;
345+
int32_t* restrict workspace_index_list_all = 0;
346+
workspace_index_list_all = (int32_t*)malloc(sizeof(int32_t) * (C2_dimension * omp_get_max_threads()));
347+
bool* restrict workspace_already_set_all = calloc((C2_dimension * omp_get_max_threads()), sizeof(bool));
348+
workspace_all = (double*)malloc(sizeof(double) * (C2_dimension * omp_get_max_threads()));
349+
320350
#pragma omp parallel for schedule(runtime)
321351
for (int32_t i = 0; i < B1_dimension; i++) {
322352
int32_t workspace_index_list_size = 0;
323-
double* restrict workspace = 0;
324-
int32_t* restrict workspace_index_list = 0;
325-
workspace_index_list = (int32_t*)malloc(sizeof(int32_t) * C2_dimension);
326-
bool* restrict workspace_already_set = calloc(C2_dimension, sizeof(bool));
327-
workspace = (double*)malloc(sizeof(double) * C2_dimension);
353+
double* restrict workspace = workspace_all + C2_dimension * omp_get_thread_num();
354+
int32_t* restrict workspace_index_list = workspace_index_list_all + C2_dimension * omp_get_thread_num();
355+
bool* restrict workspace_already_set = workspace_already_set_all + C2_dimension * omp_get_thread_num();
328356
for (int32_t kB0 = B2_pos[i]; kB0 < B2_pos[(i + 1)]; kB0++) {
329357
int32_t k = B2_crd[kB0];
330358
for (int32_t jC0 = C2_pos[k]; jC0 < C2_pos[(k + 1)]; jC0++) {
@@ -350,11 +378,12 @@ int evaluate(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
350378
A_vals[pA2] = workspace[j];
351379
workspace_already_set[j] = 0;
352380
}
353-
free(workspace_index_list);
354-
free(workspace_already_set);
355-
free(workspace);
356381
}
357382

383+
free(workspace_index_list_all);
384+
free(workspace_already_set_all);
385+
free(workspace_all);
386+
358387
for (int32_t p = 0; p < A1_dimension; p++) {
359388
A2_pos[A1_dimension - p] = A2_pos[((A1_dimension - p) - 1)];
360389
}

examples/spmv_full.c

+7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
#include <math.h>
1010
#include <complex.h>
1111
#include <string.h>
12+
#if _OPENMP
13+
#include <omp.h>
14+
#endif
1215
#define TACO_MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
1316
#define TACO_MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
1417
#define TACO_DEREF(_a) (((___context___*)(*__ctx__))->_a)
@@ -26,6 +29,10 @@ typedef struct {
2629
int32_t vals_size; // values array size
2730
} taco_tensor_t;
2831
#endif
32+
#if !_OPENMP
33+
int omp_get_thread_num() { return 0; }
34+
int omp_get_max_threads() { return 1; }
35+
#endif
2936
int cmp(const void *a, const void *b) {
3037
return *((const int*)a) - *((const int*)b);
3138
}

examples/ttv_assembly.c

+2-4
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *c) {
2323
int32_t tjA2_nnz_val = 0;
2424
for (int32_t jB = B2_pos[iB]; jB < B2_pos[(iB + 1)]; jB++) {
2525
bool qtkA_val = 0;
26-
for (int32_t kB = B3_pos[jB]; kB < B3_pos[(jB + 1)]; kB++) {
27-
int32_t k = B3_crd[kB];
26+
if (B3_pos[jB] < B3_pos[(jB + 1)]) {
2827
qtkA_val = 1;
2928
}
3029
tjA2_nnz_val += (int32_t)qtkA_val;
@@ -47,8 +46,7 @@ int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *c) {
4746
for (int32_t jB0 = B2_pos[iB0]; jB0 < B2_pos[(iB0 + 1)]; jB0++) {
4847
int32_t j = B2_crd[jB0];
4948
bool tkA_set = 0;
50-
for (int32_t kB0 = B3_pos[jB0]; kB0 < B3_pos[(jB0 + 1)]; kB0++) {
51-
int32_t k = B3_crd[kB0];
49+
if (B3_pos[jB0] < B3_pos[(jB0 + 1)]) {
5250
tkA_set = 1;
5351
}
5452
if (tkA_set) {

0 commit comments

Comments
 (0)