@@ -39,31 +39,35 @@ struct AlignedColMajor{T} <: LayoutBase{T} end
39
39
40
40
# TODO : cleanup vectorisation
41
41
@inline function load (:: Type{AlignedColMajor{T}} , workspace, tile:: Tile{size} ) where {T, size}
42
- res = MArray {Tuple{size[1], size[2]}, T} (undef)
42
+ vec_len = 16 ÷ sizeof (T)
43
+ N = (sizeof (T) * vec_len) ÷ sizeof (Float32)
44
+ res = MArray {Tuple{size[1] ÷ vec_len, size[2]}, NTuple{N, VecElement{Float32}}} (undef)
43
45
44
46
@unroll for j = 1 : size[2 ]
45
- @unroll for i = 1 : size[1 ]
47
+ @unroll for i = 1 : vec_len : size[1 ]
46
48
t = translate (tile, (i - 1 , j - 1 ))
47
49
48
50
linear_base = linearise (t. base, Base. size (workspace))
49
51
linear_offset = linearise (t. offset, Base. size (workspace))
50
52
51
- @inbounds res[i, j] = workspace[linear_base + linear_offset - 1 ]
53
+ @inbounds res[i, j] = vloada (Vec{vec_len, T}, pointer (workspace, linear_base), linear_offset)
52
54
end
53
55
end
54
56
55
57
return res
56
58
end
57
59
58
60
@inline function store! (:: Type{AlignedColMajor{T}} , workspace, value, tile:: Tile{size} ) where {T, size}
61
+ vec_len = 16 ÷ sizeof (T)
62
+
59
63
@unroll for j = 1 : size[2 ]
60
- @unroll for i = 1 : size[1 ]
64
+ @unroll for i = 1 : vec_len : size[1 ]
61
65
t = translate (tile, (i - 1 , j - 1 ))
62
66
63
67
linear_base = linearise (t. base, Base. size (workspace))
64
68
linear_offset = linearise (t. offset, Base. size (workspace))
65
69
66
- @inbounds workspace[ linear_base + linear_offset - 1 ] = value[i,j]
70
+ vstorea! (Vec{vec_len, T}, pointer ( workspace, linear_base), value[i, j], linear_offset)
67
71
end
68
72
end
69
73
end
0 commit comments