Skip to content
This repository was archived by the owner on May 27, 2021. It is now read-only.

Commit 2298f57

Browse files
Revert "Remove explicit vectorisation"
This reverts commit b33cbce.
1 parent b33cbce commit 2298f57

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

src/device/matmul_kernels/layout.jl

+9-5
Original file line numberDiff line numberDiff line change
@@ -39,31 +39,35 @@ struct AlignedColMajor{T} <: LayoutBase{T} end
3939

4040
# TODO: cleanup vectorisation
4141
@inline function load(::Type{AlignedColMajor{T}}, workspace, tile::Tile{size}) where {T, size}
42-
res = MArray{Tuple{size[1], size[2]}, T}(undef)
42+
vec_len = 16 ÷ sizeof(T)
43+
N = (sizeof(T) * vec_len) ÷ sizeof(Float32)
44+
res = MArray{Tuple{size[1] ÷ vec_len, size[2]}, NTuple{N, VecElement{Float32}}}(undef)
4345

4446
@unroll for j = 1 : size[2]
45-
@unroll for i = 1 : size[1]
47+
@unroll for i = 1 : vec_len : size[1]
4648
t = translate(tile, (i - 1, j - 1))
4749

4850
linear_base = linearise(t.base, Base.size(workspace))
4951
linear_offset = linearise(t.offset, Base.size(workspace))
5052

51-
@inbounds res[i, j] = workspace[linear_base + linear_offset - 1]
53+
@inbounds res[i, j] = vloada(Vec{vec_len, T}, pointer(workspace, linear_base), linear_offset)
5254
end
5355
end
5456

5557
return res
5658
end
5759

5860
@inline function store!(::Type{AlignedColMajor{T}}, workspace, value, tile::Tile{size}) where {T, size}
61+
vec_len = 16 ÷ sizeof(T)
62+
5963
@unroll for j = 1 : size[2]
60-
@unroll for i = 1 : size[1]
64+
@unroll for i = 1 : vec_len : size[1]
6165
t = translate(tile, (i - 1, j - 1))
6266

6367
linear_base = linearise(t.base, Base.size(workspace))
6468
linear_offset = linearise(t.offset, Base.size(workspace))
6569

66-
@inbounds workspace[linear_base + linear_offset - 1] = value[i,j]
70+
vstorea!(Vec{vec_len, T}, pointer(workspace, linear_base), value[i, j], linear_offset)
6771
end
6872
end
6973
end

0 commit comments

Comments
 (0)