@@ -90,8 +90,8 @@ static void bench_image_xor(benchmark::State& state, const Format& f) {
90
90
for (auto & it : iterate<int64_t >(result)) {
91
91
nnz++;
92
92
}
93
- std::cout << " Result NNZ = " << nnz << std::endl;
94
- std::cout << result << std::endl;
93
+ // std::cout << "Result NNZ = " << nnz << std::endl;
94
+ // std::cout << result << std::endl;
95
95
}
96
96
}
97
97
static void CustomArguments (benchmark::internal::Benchmark* b) {
@@ -112,40 +112,40 @@ static void bench_image_fused(benchmark::State& state, const Format& f) {
112
112
// write("temp/taco-mat1-" + std::to_string(num) + ".tns", matrix1);
113
113
// write("temp/taco-mat2-" + std::to_string(num) + ".tns", matrix2);
114
114
// write("temp/taco-mat3-" + std::to_string(num) + ".tns", matrix3);
115
- int nnz = 0 ;
116
- for (auto & it : iterate<int64_t >(matrix1)) {
117
- nnz++;
118
- }
119
- std::cout << " Matrix1 NNZ = " << nnz << std::endl;
120
- nnz = 0 ;
121
- for (auto & it : iterate<int64_t >(matrix2)) {
122
- nnz++;
123
- }
124
- std::cout << " Matrix2 NNZ = " << nnz << std::endl;
125
- nnz = 0 ;
126
- for (auto & it : iterate<int64_t >(matrix3)) {
127
- nnz++;
128
- }
129
- std::cout << " Matrix3 NNZ = " << nnz << std::endl;
115
+ // int nnz = 0;
116
+ // for (auto& it : iterate<int64_t>(matrix1)) {
117
+ // nnz++;
118
+ // }
119
+ // std::cout << "Matrix1 NNZ = " << nnz << std::endl;
120
+ // nnz = 0;
121
+ // for (auto& it : iterate<int64_t>(matrix2)) {
122
+ // nnz++;
123
+ // }
124
+ // std::cout << "Matrix2 NNZ = " << nnz << std::endl;
125
+ // nnz = 0;
126
+ // for (auto& it : iterate<int64_t>(matrix3)) {
127
+ // nnz++;
128
+ // }
129
+ // std::cout << "Matrix3 NNZ = " << nnz << std::endl;
130
130
131
131
for (auto _ : state) {
132
132
state.PauseTiming ();
133
133
Tensor<int64_t > result (" result" , dims, f, 0 );
134
134
135
135
IndexVar i (" i" ), j (" j" );
136
- result (i, j) = testOp (matrix1 (i, j), matrix2 (i, j), matrix3 (i, j));
136
+ result (i, j) = xorAndOp (matrix1 (i, j), matrix2 (i, j), matrix3 (i, j));
137
137
IndexStmt stmt = result.getAssignment ().concretize ();
138
138
result.setAssembleWhileCompute (true );
139
139
result.compile ();
140
140
state.ResumeTiming ();
141
141
result.compute ();
142
142
result = result.removeExplicitZeros (result.getFormat ());
143
- int nnz = 0 ;
144
- for (auto & it : iterate<int64_t >(result)) {
145
- nnz++;
146
- }
147
143
148
- std::cout << " Result NNZ = " << nnz << std::endl;
144
+ // int nnz = 0;
145
+ // for (auto& it : iterate<int64_t>(result)) {
146
+ // nnz++;
147
+ // }
148
+ // std::cout << "Result NNZ = " << nnz << std::endl;
149
149
// write("temp/taco-result" + std::to_string(num) + ".tns", result);
150
150
// Used to print out generated TACO code
151
151
// std::shared_ptr<ir::CodeGen> codegen = ir::CodeGen::init_default(std::cout, ir::CodeGen::ImplementationGen);
@@ -154,4 +154,48 @@ static void bench_image_fused(benchmark::State& state, const Format& f) {
154
154
}
155
155
}
156
156
TACO_BENCH_ARGS (bench_image_fused, csr, CSR)->Apply(CustomArguments);
157
- TACO_BENCH_ARGS (bench_image_fused, csr, CSR)->Apply(CustomArguments);
157
+
158
+ static void bench_image_window (benchmark::State& state, const Format& f, double window_size) {
159
+ int num = state.range (0 );
160
+ auto t1 = 0.5 ;
161
+ auto t2 = 0.55 ;
162
+ Tensor<int64_t > matrix1 = castToTypeZero<int64_t >(" A" , loadImageTensor (" A" , num, f, t1, 1 /* variant */ ));
163
+ Tensor<int64_t > matrix2 = castToTypeZero<int64_t >(" B" , loadImageTensor (" B" , num, f, t2, 2 /* variant */ ));
164
+ auto dims = matrix1.getDimensions ();
165
+
166
+ int mid0 = (dims[0 ]/2.0 );
167
+ int mid1 = (dims[1 ]/2.0 );
168
+ int win_len0 = int (window_size * dims[0 ]);
169
+ int win_len1 = int (window_size * dims[1 ]);
170
+
171
+ for (auto _ : state) {
172
+ state.PauseTiming ();
173
+ Tensor<int64_t > result (" result" , {2 *win_len0, 2 *win_len1}, f, 0 );
174
+
175
+ IndexVar i (" i" ), j (" j" );
176
+ result (i, j) = xorOp1 (matrix1 (i (mid0-win_len0, mid0+win_len0), j (mid1-win_len1, mid1+win_len1)),
177
+ matrix2 (i (mid0-win_len0, mid0+win_len0), j (mid1-win_len1, mid1+win_len1)));
178
+ IndexStmt stmt = result.getAssignment ().concretize ();
179
+ result.setAssembleWhileCompute (true );
180
+ result.compile ();
181
+ state.ResumeTiming ();
182
+ result.compute ();
183
+ result = result.removeExplicitZeros (result.getFormat ());
184
+
185
+ // int nnz = 0;
186
+ // for (auto& it : iterate<int64_t>(result)) {
187
+ // nnz++;
188
+ // }
189
+ // std::cout << "Result NNZ = " << nnz << std::endl;
190
+
191
+ // write("temp/taco-result" + std::to_string(num) + ".tns", result);
192
+ // Used to print out generated TACO code
193
+ // std::shared_ptr<ir::CodeGen> codegen = ir::CodeGen::init_default(std::cout, ir::CodeGen::ImplementationGen);
194
+ // ir::Stmt compute = lower(stmt, "compute", false, true);
195
+ // codegen->compile(compute, true);
196
+ }
197
+ }
198
+ TACO_BENCH_ARGS (bench_image_window, csr/0.25 , CSR, 0.25 )->Apply(CustomArguments);
199
+ TACO_BENCH_ARGS (bench_image_window, csr/0.2 , CSR, 0.2 )->Apply(CustomArguments);
200
+ TACO_BENCH_ARGS (bench_image_window, csr/0.15 , CSR, 0.15 )->Apply(CustomArguments);
201
+ TACO_BENCH_ARGS (bench_image_window, csr/0.1 , CSR, 0.1 )->Apply(CustomArguments);
0 commit comments