Skip to content

Commit 644fa90

Browse files
committed
boost 32x32
1 parent cad704d commit 644fa90

File tree

5 files changed

+75691
-15143
lines changed

5 files changed

+75691
-15143
lines changed

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ file(GLOB_RECURSE SOURCE_FILES
2222
"${CMAKE_CURRENT_SOURCE_DIR}/matmul.c"
2323
)
2424
add_compile_options(-std=c11)
25-
add_compile_options(-O3)
25+
add_compile_options(-O2)
2626
add_compile_options(-march=native)
2727
add_compile_options(-D_GNU_SOURCE)
2828

cmat.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ int assign_double_clone(double_cmat m1, double_cmat m2) {
389389
return 0;
390390
}
391391

392-
inline int matlincomb_double_contiguous(double_cmat res, shape_uint n_mats, double_cmat* mats, int8_t* coeffs) {
392+
int matlincomb_double_contiguous(double_cmat res, shape_uint n_mats, double_cmat* mats, int8_t* coeffs) {
393393
//printf("lincomb\n");
394394
// res = coeffs[0] * mats[0] + ... + coeffs[n_mats-1] * mats[n_mats-1]
395395
// memset(&res.data[0][0], 0, sizeof(res.data[0][0])*res.shape[0]*res.shape[1]); // should not reset because it could appear in RHS
@@ -398,7 +398,7 @@ inline int matlincomb_double_contiguous(double_cmat res, shape_uint n_mats, doub
398398
}
399399
shape_uint n_elems = mats[0].shape[0] * mats[0].shape[1];
400400
for (shape_uint i = 0; i < n_mats; i++) {
401-
cblas_daxpy(n_elems, coeffs[i], &mats[i].data[0][0], 1, &res.data[0][0], 1);
401+
cblas_daxpy(n_elems, coeffs[i], mats[i].data[0], 1, res.data[0], 1);
402402
}
403403
return 0;
404404
}

0 commit comments

Comments
 (0)