|
99 | 99 |
|
100 | 100 | s = (sA[1],)
|
101 | 101 | T = promote_op(matprod, TA, Tb)
|
| 102 | + #println(T) |
102 | 103 |
|
103 | 104 | if sb[1] != sA[2]
|
104 | 105 | error("Dimension mismatch")
|
|
130 | 131 | end
|
131 | 132 | end
|
132 | 133 |
|
| 134 | +# This happens to be size-inferrable from A |
| 135 | +@generated function *(A::StaticMatrix, b::AbstractVector) |
| 136 | + TA = eltype(A) |
| 137 | + Tb = eltype(b) |
| 138 | + sA = size(A) |
| 139 | + #sb = size(b) |
| 140 | + |
| 141 | + s = (sA[1],) |
| 142 | + T = promote_op(matprod, TA, Tb) |
| 143 | + |
| 144 | + if T == Tb |
| 145 | + newtype = similar_type(A, s) |
| 146 | + else |
| 147 | + newtype = similar_type(A, T, s) |
| 148 | + end |
| 149 | + |
| 150 | + if sA[2] != 0 |
| 151 | + exprs = [reduce((ex1,ex2) -> :(+($ex1,$ex2)), [:(A[$(sub2ind(sA, k, j))]*b[$j]) for j = 1:sA[2]]) for k = 1:sA[1]] |
| 152 | + else |
| 153 | + exprs = [zero(T) for k = 1:sA[1]] |
| 154 | + end |
| 155 | + |
| 156 | + return quote |
| 157 | + $(Expr(:meta,:inline)) |
| 158 | + if length(b) != $(sA[2]) |
| 159 | + error("Dimension mismatch") |
| 160 | + end |
| 161 | + @inbounds return $(Expr(:call, newtype, Expr(:tuple, exprs...))) |
| 162 | + end |
| 163 | +end |
| 164 | + |
133 | 165 | @generated function *(a::StaticVector, B::StaticMatrix)
|
134 | 166 | Ta = eltype(a)
|
135 | 167 | TB = eltype(B)
|
|
402 | 434 |
|
403 | 435 | # The idea here is to get pointers to stack variables and call BLAS.
|
404 | 436 | # This saves an aweful lot of time compared to copying SArray's to Ref{SArray{...}}
|
405 |
| -# and should be fastest for (very) large SArrays |
| 437 | +# and using BLAS should be fastest for (very) large SArrays |
406 | 438 |
|
407 | 439 | # Here is an LLVM function that gets the pointer to its input, %x
|
408 | 440 | # After this we would make the ccall above.
|
|
413 | 445 | # ret i32* %1
|
414 | 446 | # }
|
415 | 447 |
|
| 448 | +@generated function A_mul_B!(c::StaticVector, A::StaticMatrix, b::StaticVector) |
| 449 | + sA = size(A) |
| 450 | + sb = size(b) |
| 451 | + s = size(c) |
| 452 | + T = eltype(c) |
| 453 | + |
| 454 | + if sb[1] != sA[2] || s[1] != sA[1] |
| 455 | + error("Dimension mismatch") |
| 456 | + end |
| 457 | + |
| 458 | + if sA[2] != 0 |
| 459 | + exprs = [:(c[$k] = $(reduce((ex1,ex2) -> :(+($ex1,$ex2)), [:(A[$(sub2ind(sA, k, j))]*b[$j]) for j = 1:sA[2]]))) for k = 1:sA[1]] |
| 460 | + else |
| 461 | + exprs = [:(c[$k] = $(zero(T))) for k = 1:sA[1]] |
| 462 | + end |
| 463 | + |
| 464 | + return quote |
| 465 | + $(Expr(:meta,:inline)) |
| 466 | + @inbounds $(Expr(:block, exprs...)) |
| 467 | + end |
| 468 | +end |
| 469 | + |
| 470 | +# The unrolled code is inferrable from the size of A |
| 471 | +@generated function A_mul_B!(c::AbstractVector, A::StaticMatrix, b::AbstractVector) |
| 472 | + sA = size(A) |
| 473 | + T = eltype(c) |
| 474 | + |
| 475 | + if sA[2] != 0 |
| 476 | + exprs = [:(c[$k] = $(reduce((ex1,ex2) -> :(+($ex1,$ex2)), [:(A[$(sub2ind(sA, k, j))]*b[$j]) for j = 1:sA[2]]))) for k = 1:sA[1]] |
| 477 | + else |
| 478 | + exprs = [:(c[$k] = $(zero(T))) for k = 1:sA[1]] |
| 479 | + end |
| 480 | + |
| 481 | + return quote |
| 482 | + $(Expr(:meta,:inline)) |
| 483 | + if length(b) != $(sA[2]) || length(c) != $(sA[1]) |
| 484 | + error("Dimension mismatch") |
| 485 | + end |
| 486 | + @inbounds $(Expr(:block, exprs...)) |
| 487 | + end |
| 488 | +end |
| 489 | + |
416 | 490 |
|
417 | 491 | @generated function A_mul_B!(C::StaticMatrix, A::StaticMatrix, B::StaticMatrix)
|
418 | 492 | if isbits(C)
|
|
0 commit comments