Consider the following setup: ``` struct GeneralAdd { ir::Expr operator()(const std::vector<ir::Expr> &v) { taco_iassert(v.size() >= 1) << "Add operator needs at least one operand"; if (v.size() == 1) return ir::Add::make(v[0], ir::Literal::zero(v[0].type())); ir::Expr add = ir::Add::make(v[0], v[1]); for (size_t idx = 2; idx < v.size(); ++idx) { add = ir::Add::make(add, v[idx]); } return add; } }; struct xorAlgebra { IterationAlgebra operator()(const std::vector<IndexExpr>& regions) { IterationAlgebra noIntersect = Complement(Intersect(regions[0], regions[1])); return Intersect(noIntersect, Union(regions[0], regions[1])); } }; Func xorOp("logical_xor", GeneralAdd(), xorAlgebra()); static void bench_test(benchmark::State& state) { int dim = 5000; auto sparsity = 0.01; auto f = CSR; Tensor<double> matrix = loadRandomTensor("A", {dim, dim}, sparsity, f); Tensor<double> result("B", {dim, dim}, f); IndexVar i("i"), j("j"); result(i, j) = xorOp(matrix(i, j), matrix2(i, j)); result.compile(); std::cout << result.getSource() << std::endl; } ``` Taco generates code like: ``` int compute(taco_tensor_t *B, taco_tensor_t *A) { int B1_dimension = (int)(B->dimensions[0]); double* restrict B_vals = (double*)(B->vals); int A1_dimension = (int)(A->dimensions[0]); int* restrict A2_pos = (int*)(A->indices[1][0]); int* restrict A2_crd = (int*)(A->indices[1][1]); double* restrict A_vals = (double*)(A->vals); int32_t jB = 0; for (int32_t i = 0; i < A1_dimension; i++) { for (int32_t jA = A2_pos[i]; jA < A2_pos[(i + 1)]; jA++) { B_vals[jB] = A_vals[jA] + A_vals[jA]; jB++; } } return 0; } ``` which isn't right, because for the xor, no data should be output if a tensor is xor'd with itself.