Skip to content

Commit 7bc27d9

Browse files
committed
Updated examples
1 parent cefc4aa commit 7bc27d9

12 files changed

+1710
-370
lines changed

examples/add_assembly.c

Lines changed: 93 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,100 @@
11
// Generated by the Tensor Algebra Compiler (tensor-compiler.org)
2-
/* init_alloc_size should be initialized to a power of two */
3-
int32_t init_alloc_size = 1048576;
4-
allocate A2_pos[init_alloc_size]
5-
allocate A2_idx[init_alloc_size]
6-
A2_pos[0] = 0;
7-
8-
int32_t pA1 = 0;
9-
int32_t pA2 = A2_pos[pA1];
10-
for (int32_t iB = 0; iB < B1_size; iB++) {
11-
int32_t pB2 = B2_pos[iB];
12-
int32_t pC2 = C2_pos[iB];
13-
while ((pB2 < B2_pos[iB + 1]) && (pC2 < C2_pos[iB + 1])) {
14-
int32_t jB = B2_idx[pB2];
15-
int32_t jC = C2_idx[pC2];
16-
int32_t j = min(jB, jC);
17-
if ((jB == j) && (jC == j)) {
18-
A2_idx[pA2] = j;
19-
pA2++;
20-
if ((0 == ((pA2 + 1) & pA2)) && (init_alloc_size <= (pA2 + 1)))
21-
reallocate A2_idx[(2 * (pA2 + 1))]
2+
// taco "A(i,j)=B(i,j)+C(i,j)" -f=A:ds:0,1 -f=B:ds:0,1 -f=C:ds:0,1 -write-source=taco_kernel.c -write-compute=taco_compute.c -write-assembly=taco_assembly.c
3+
4+
int assemble(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
5+
int A1_dimension = (int)(A->dimensions[0]);
6+
int* restrict A2_pos = (int*)(A->indices[1][0]);
7+
int* restrict A2_crd = (int*)(A->indices[1][1]);
8+
double* restrict A_vals = (double*)(A->vals);
9+
int B1_dimension = (int)(B->dimensions[0]);
10+
int* restrict B2_pos = (int*)(B->indices[1][0]);
11+
int* restrict B2_crd = (int*)(B->indices[1][1]);
12+
int C1_dimension = (int)(C->dimensions[0]);
13+
int* restrict C2_pos = (int*)(C->indices[1][0]);
14+
int* restrict C2_crd = (int*)(C->indices[1][1]);
15+
16+
A2_pos = (int32_t*)malloc(sizeof(int32_t) * (A1_dimension + 1));
17+
A2_pos[0] = 0;
18+
for (int32_t pA2 = 1; pA2 < (A1_dimension + 1); pA2++) {
19+
A2_pos[pA2] = 0;
20+
}
21+
int32_t A2_crd_size = 1048576;
22+
A2_crd = (int32_t*)malloc(sizeof(int32_t) * A2_crd_size);
23+
int32_t jA = 0;
24+
25+
for (int32_t i = 0; i < C1_dimension; i++) {
26+
int32_t pA2_begin = jA;
27+
28+
int32_t jB = B2_pos[i];
29+
int32_t pB2_end = B2_pos[(i + 1)];
30+
int32_t jC = C2_pos[i];
31+
int32_t pC2_end = C2_pos[(i + 1)];
32+
33+
while (jB < pB2_end && jC < pC2_end) {
34+
int32_t jB0 = B2_crd[jB];
35+
int32_t jC0 = C2_crd[jC];
36+
int32_t j = TACO_MIN(jB0,jC0);
37+
if (jB0 == j && jC0 == j) {
38+
if (A2_crd_size <= jA) {
39+
A2_crd = (int32_t*)realloc(A2_crd, sizeof(int32_t) * (A2_crd_size * 2));
40+
A2_crd_size *= 2;
41+
}
42+
A2_crd[jA] = j;
43+
jA++;
44+
}
45+
else if (jB0 == j) {
46+
if (A2_crd_size <= jA) {
47+
A2_crd = (int32_t*)realloc(A2_crd, sizeof(int32_t) * (A2_crd_size * 2));
48+
A2_crd_size *= 2;
49+
}
50+
A2_crd[jA] = j;
51+
jA++;
52+
}
53+
else {
54+
if (A2_crd_size <= jA) {
55+
A2_crd = (int32_t*)realloc(A2_crd, sizeof(int32_t) * (A2_crd_size * 2));
56+
A2_crd_size *= 2;
57+
}
58+
A2_crd[jA] = j;
59+
jA++;
60+
}
61+
jB += (int32_t)(jB0 == j);
62+
jC += (int32_t)(jC0 == j);
2263
}
23-
else if (jB == j) {
24-
A2_idx[pA2] = j;
25-
pA2++;
26-
if ((0 == ((pA2 + 1) & pA2)) && (init_alloc_size <= (pA2 + 1)))
27-
reallocate A2_idx[(2 * (pA2 + 1))]
64+
while (jB < pB2_end) {
65+
int32_t j = B2_crd[jB];
66+
if (A2_crd_size <= jA) {
67+
A2_crd = (int32_t*)realloc(A2_crd, sizeof(int32_t) * (A2_crd_size * 2));
68+
A2_crd_size *= 2;
69+
}
70+
A2_crd[jA] = j;
71+
jA++;
72+
jB++;
2873
}
29-
else {
30-
A2_idx[pA2] = j;
31-
pA2++;
32-
if ((0 == ((pA2 + 1) & pA2)) && (init_alloc_size <= (pA2 + 1)))
33-
reallocate A2_idx[(2 * (pA2 + 1))]
74+
while (jC < pC2_end) {
75+
int32_t j = C2_crd[jC];
76+
if (A2_crd_size <= jA) {
77+
A2_crd = (int32_t*)realloc(A2_crd, sizeof(int32_t) * (A2_crd_size * 2));
78+
A2_crd_size *= 2;
79+
}
80+
A2_crd[jA] = j;
81+
jA++;
82+
jC++;
3483
}
35-
if (jB == j) pB2++;
36-
if (jC == j) pC2++;
37-
}
38-
while (pB2 < B2_pos[iB + 1]) {
39-
int32_t jB0 = B2_idx[pB2];
40-
A2_idx[pA2] = jB0;
41-
pA2++;
42-
if ((0 == ((pA2 + 1) & pA2)) && (init_alloc_size <= (pA2 + 1)))
43-
reallocate A2_idx[(2 * (pA2 + 1))]
44-
pB2++;
84+
85+
A2_pos[i + 1] = jA - pA2_begin;
4586
}
46-
while (pC2 < C2_pos[iB + 1]) {
47-
int32_t jC0 = C2_idx[pC2];
48-
A2_idx[pA2] = jC0;
49-
pA2++;
50-
if ((0 == ((pA2 + 1) & pA2)) && (init_alloc_size <= (pA2 + 1)))
51-
reallocate A2_idx[(2 * (pA2 + 1))]
52-
pC2++;
87+
88+
int32_t csA2 = 0;
89+
for (int32_t pA20 = 1; pA20 < (A1_dimension + 1); pA20++) {
90+
csA2 += A2_pos[pA20];
91+
A2_pos[pA20] = csA2;
5392
}
54-
A2_pos[(iB + 1)] = pA2;
55-
}
5693

57-
allocate A_vals[pA2]
94+
A_vals = (double*)malloc(sizeof(double) * jA);
95+
96+
A->indices[1][0] = (uint8_t*)(A2_pos);
97+
A->indices[1][1] = (uint8_t*)(A2_crd);
98+
A->vals = (uint8_t*)A_vals;
99+
return 0;
100+
}

examples/add_compute.c

Lines changed: 50 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,55 @@
11
// Generated by the Tensor Algebra Compiler (tensor-compiler.org)
2-
int32_t pA1 = 0;
3-
int32_t pA2 = A2_pos[pA1];
4-
for (int32_t iB = 0; iB < B1_size; iB++) {
5-
int32_t pB2 = B2_pos[iB];
6-
int32_t pC2 = C2_pos[iB];
7-
while ((pB2 < B2_pos[iB + 1]) && (pC2 < C2_pos[iB + 1])) {
8-
int32_t jB = B2_idx[pB2];
9-
int32_t jC = C2_idx[pC2];
10-
int32_t j = min(jB, jC);
11-
if ((jB == j) && (jC == j)) {
12-
A_vals[pA2] = B_vals[pB2] + C_vals[pC2];
13-
pA2++;
2+
// taco "A(i,j)=B(i,j)+C(i,j)" -f=A:ds:0,1 -f=B:ds:0,1 -f=C:ds:0,1 -write-source=taco_kernel.c -write-compute=taco_compute.c -write-assembly=taco_assembly.c
3+
4+
int compute(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
5+
int A1_dimension = (int)(A->dimensions[0]);
6+
double* restrict A_vals = (double*)(A->vals);
7+
int B1_dimension = (int)(B->dimensions[0]);
8+
int* restrict B2_pos = (int*)(B->indices[1][0]);
9+
int* restrict B2_crd = (int*)(B->indices[1][1]);
10+
double* restrict B_vals = (double*)(B->vals);
11+
int C1_dimension = (int)(C->dimensions[0]);
12+
int* restrict C2_pos = (int*)(C->indices[1][0]);
13+
int* restrict C2_crd = (int*)(C->indices[1][1]);
14+
double* restrict C_vals = (double*)(C->vals);
15+
16+
int32_t jA = 0;
17+
18+
for (int32_t i = 0; i < C1_dimension; i++) {
19+
int32_t jB = B2_pos[i];
20+
int32_t pB2_end = B2_pos[(i + 1)];
21+
int32_t jC = C2_pos[i];
22+
int32_t pC2_end = C2_pos[(i + 1)];
23+
24+
while (jB < pB2_end && jC < pC2_end) {
25+
int32_t jB0 = B2_crd[jB];
26+
int32_t jC0 = C2_crd[jC];
27+
int32_t j = TACO_MIN(jB0,jC0);
28+
if (jB0 == j && jC0 == j) {
29+
A_vals[jA] = B_vals[jB] + C_vals[jC];
30+
jA++;
31+
}
32+
else if (jB0 == j) {
33+
A_vals[jA] = B_vals[jB];
34+
jA++;
35+
}
36+
else {
37+
A_vals[jA] = C_vals[jC];
38+
jA++;
39+
}
40+
jB += (int32_t)(jB0 == j);
41+
jC += (int32_t)(jC0 == j);
1442
}
15-
else if (jB == j) {
16-
A_vals[pA2] = B_vals[pB2];
17-
pA2++;
43+
while (jB < pB2_end) {
44+
A_vals[jA] = B_vals[jB];
45+
jA++;
46+
jB++;
1847
}
19-
else {
20-
A_vals[pA2] = C_vals[pC2];
21-
pA2++;
48+
while (jC < pC2_end) {
49+
A_vals[jA] = C_vals[jC];
50+
jA++;
51+
jC++;
2252
}
23-
if (jB == j) pB2++;
24-
if (jC == j) pC2++;
2553
}
26-
while (pB2 < B2_pos[iB + 1]) {
27-
int32_t jB0 = B2_idx[pB2];
28-
A_vals[pA2] = B_vals[pB2];
29-
pA2++;
30-
pB2++;
31-
}
32-
while (pC2 < C2_pos[iB + 1]) {
33-
int32_t jC0 = C2_idx[pC2];
34-
A_vals[pA2] = C_vals[pC2];
35-
pA2++;
36-
pC2++;
37-
}
38-
}
54+
return 0;
55+
}

0 commit comments

Comments
 (0)