1
1
// Generated by the Tensor Algebra Compiler (tensor-compiler.org)
2
- /* init_alloc_size should be initialized to a power of two */
3
- int32_t init_alloc_size = 1048576 ;
4
- allocate A2_pos [init_alloc_size ]
5
- allocate A2_idx [init_alloc_size ]
6
- A2_pos [0 ] = 0 ;
7
-
8
- int32_t pA1 = 0 ;
9
- int32_t pA2 = A2_pos [pA1 ];
10
- for (int32_t iB = 0 ; iB < B1_size ; iB ++ ) {
11
- int32_t pB2 = B2_pos [iB ];
12
- int32_t pC2 = C2_pos [iB ];
13
- while ((pB2 < B2_pos [iB + 1 ]) && (pC2 < C2_pos [iB + 1 ])) {
14
- int32_t jB = B2_idx [pB2 ];
15
- int32_t jC = C2_idx [pC2 ];
16
- int32_t j = min (jB , jC );
17
- if ((jB == j ) && (jC == j )) {
18
- A2_idx [pA2 ] = j ;
19
- pA2 ++ ;
20
- if ((0 == ((pA2 + 1 ) & pA2 )) && (init_alloc_size <= (pA2 + 1 )))
21
- reallocate A2_idx [(2 * (pA2 + 1 ))]
2
+ // taco "A(i,j)=B(i,j)+C(i,j)" -f=A:ds:0,1 -f=B:ds:0,1 -f=C:ds:0,1 -write-source=taco_kernel.c -write-compute=taco_compute.c -write-assembly=taco_assembly.c
3
+
4
+ int assemble (taco_tensor_t * A , taco_tensor_t * B , taco_tensor_t * C ) {
5
+ int A1_dimension = (int )(A -> dimensions [0 ]);
6
+ int * restrict A2_pos = (int * )(A -> indices [1 ][0 ]);
7
+ int * restrict A2_crd = (int * )(A -> indices [1 ][1 ]);
8
+ double * restrict A_vals = (double * )(A -> vals );
9
+ int B1_dimension = (int )(B -> dimensions [0 ]);
10
+ int * restrict B2_pos = (int * )(B -> indices [1 ][0 ]);
11
+ int * restrict B2_crd = (int * )(B -> indices [1 ][1 ]);
12
+ int C1_dimension = (int )(C -> dimensions [0 ]);
13
+ int * restrict C2_pos = (int * )(C -> indices [1 ][0 ]);
14
+ int * restrict C2_crd = (int * )(C -> indices [1 ][1 ]);
15
+
16
+ A2_pos = (int32_t * )malloc (sizeof (int32_t ) * (A1_dimension + 1 ));
17
+ A2_pos [0 ] = 0 ;
18
+ for (int32_t pA2 = 1 ; pA2 < (A1_dimension + 1 ); pA2 ++ ) {
19
+ A2_pos [pA2 ] = 0 ;
20
+ }
21
+ int32_t A2_crd_size = 1048576 ;
22
+ A2_crd = (int32_t * )malloc (sizeof (int32_t ) * A2_crd_size );
23
+ int32_t jA = 0 ;
24
+
25
+ for (int32_t i = 0 ; i < C1_dimension ; i ++ ) {
26
+ int32_t pA2_begin = jA ;
27
+
28
+ int32_t jB = B2_pos [i ];
29
+ int32_t pB2_end = B2_pos [(i + 1 )];
30
+ int32_t jC = C2_pos [i ];
31
+ int32_t pC2_end = C2_pos [(i + 1 )];
32
+
33
+ while (jB < pB2_end && jC < pC2_end ) {
34
+ int32_t jB0 = B2_crd [jB ];
35
+ int32_t jC0 = C2_crd [jC ];
36
+ int32_t j = TACO_MIN (jB0 ,jC0 );
37
+ if (jB0 == j && jC0 == j ) {
38
+ if (A2_crd_size <= jA ) {
39
+ A2_crd = (int32_t * )realloc (A2_crd , sizeof (int32_t ) * (A2_crd_size * 2 ));
40
+ A2_crd_size *= 2 ;
41
+ }
42
+ A2_crd [jA ] = j ;
43
+ jA ++ ;
44
+ }
45
+ else if (jB0 == j ) {
46
+ if (A2_crd_size <= jA ) {
47
+ A2_crd = (int32_t * )realloc (A2_crd , sizeof (int32_t ) * (A2_crd_size * 2 ));
48
+ A2_crd_size *= 2 ;
49
+ }
50
+ A2_crd [jA ] = j ;
51
+ jA ++ ;
52
+ }
53
+ else {
54
+ if (A2_crd_size <= jA ) {
55
+ A2_crd = (int32_t * )realloc (A2_crd , sizeof (int32_t ) * (A2_crd_size * 2 ));
56
+ A2_crd_size *= 2 ;
57
+ }
58
+ A2_crd [jA ] = j ;
59
+ jA ++ ;
60
+ }
61
+ jB += (int32_t )(jB0 == j );
62
+ jC += (int32_t )(jC0 == j );
22
63
}
23
- else if (jB == j ) {
24
- A2_idx [pA2 ] = j ;
25
- pA2 ++ ;
26
- if ((0 == ((pA2 + 1 ) & pA2 )) && (init_alloc_size <= (pA2 + 1 )))
27
- reallocate A2_idx [(2 * (pA2 + 1 ))]
64
+ while (jB < pB2_end ) {
65
+ int32_t j = B2_crd [jB ];
66
+ if (A2_crd_size <= jA ) {
67
+ A2_crd = (int32_t * )realloc (A2_crd , sizeof (int32_t ) * (A2_crd_size * 2 ));
68
+ A2_crd_size *= 2 ;
69
+ }
70
+ A2_crd [jA ] = j ;
71
+ jA ++ ;
72
+ jB ++ ;
28
73
}
29
- else {
30
- A2_idx [pA2 ] = j ;
31
- pA2 ++ ;
32
- if ((0 == ((pA2 + 1 ) & pA2 )) && (init_alloc_size <= (pA2 + 1 )))
33
- reallocate A2_idx [(2 * (pA2 + 1 ))]
74
+ while (jC < pC2_end ) {
75
+ int32_t j = C2_crd [jC ];
76
+ if (A2_crd_size <= jA ) {
77
+ A2_crd = (int32_t * )realloc (A2_crd , sizeof (int32_t ) * (A2_crd_size * 2 ));
78
+ A2_crd_size *= 2 ;
79
+ }
80
+ A2_crd [jA ] = j ;
81
+ jA ++ ;
82
+ jC ++ ;
34
83
}
35
- if (jB == j ) pB2 ++ ;
36
- if (jC == j ) pC2 ++ ;
37
- }
38
- while (pB2 < B2_pos [iB + 1 ]) {
39
- int32_t jB0 = B2_idx [pB2 ];
40
- A2_idx [pA2 ] = jB0 ;
41
- pA2 ++ ;
42
- if ((0 == ((pA2 + 1 ) & pA2 )) && (init_alloc_size <= (pA2 + 1 )))
43
- reallocate A2_idx [(2 * (pA2 + 1 ))]
44
- pB2 ++ ;
84
+
85
+ A2_pos [i + 1 ] = jA - pA2_begin ;
45
86
}
46
- while (pC2 < C2_pos [iB + 1 ]) {
47
- int32_t jC0 = C2_idx [pC2 ];
48
- A2_idx [pA2 ] = jC0 ;
49
- pA2 ++ ;
50
- if ((0 == ((pA2 + 1 ) & pA2 )) && (init_alloc_size <= (pA2 + 1 )))
51
- reallocate A2_idx [(2 * (pA2 + 1 ))]
52
- pC2 ++ ;
87
+
88
+ int32_t csA2 = 0 ;
89
+ for (int32_t pA20 = 1 ; pA20 < (A1_dimension + 1 ); pA20 ++ ) {
90
+ csA2 += A2_pos [pA20 ];
91
+ A2_pos [pA20 ] = csA2 ;
53
92
}
54
- A2_pos [(iB + 1 )] = pA2 ;
55
- }
56
93
57
- allocate A_vals [pA2 ]
94
+ A_vals = (double * )malloc (sizeof (double ) * jA );
95
+
96
+ A -> indices [1 ][0 ] = (uint8_t * )(A2_pos );
97
+ A -> indices [1 ][1 ] = (uint8_t * )(A2_crd );
98
+ A -> vals = (uint8_t * )A_vals ;
99
+ return 0 ;
100
+ }
0 commit comments