@@ -39,35 +39,31 @@ struct AlignedColMajor{T} <: LayoutBase{T} end
39
39
40
40
# TODO : cleanup vectorisation
41
41
@inline function load (:: Type{AlignedColMajor{T}} , workspace, tile:: Tile{size} ) where {T, size}
42
- vec_len = 16 ÷ sizeof (T)
43
- N = (sizeof (T) * vec_len) ÷ sizeof (Float32)
44
- res = MArray {Tuple{size[1] ÷ vec_len, size[2]}, NTuple{N, VecElement{Float32}}} (undef)
42
+ res = MArray {Tuple{size[1], size[2]}, T} (undef)
45
43
46
44
@unroll for j = 1 : size[2 ]
47
- @unroll for i = 1 : vec_len : size[1 ]
45
+ @unroll for i = 1 : size[1 ]
48
46
t = translate (tile, (i - 1 , j - 1 ))
49
47
50
48
linear_base = linearise (t. base, Base. size (workspace))
51
49
linear_offset = linearise (t. offset, Base. size (workspace))
52
50
53
- @inbounds res[i, j] = vloada (Vec{vec_len, T}, pointer ( workspace, linear_base), linear_offset)
51
+ @inbounds res[i, j] = workspace[ linear_base + linear_offset - 1 ]
54
52
end
55
53
end
56
54
57
55
return res
58
56
end
59
57
60
58
@inline function store! (:: Type{AlignedColMajor{T}} , workspace, value, tile:: Tile{size} ) where {T, size}
61
- vec_len = 16 ÷ sizeof (T)
62
-
63
59
@unroll for j = 1 : size[2 ]
64
- @unroll for i = 1 : vec_len : size[1 ]
60
+ @unroll for i = 1 : size[1 ]
65
61
t = translate (tile, (i - 1 , j - 1 ))
66
62
67
63
linear_base = linearise (t. base, Base. size (workspace))
68
64
linear_offset = linearise (t. offset, Base. size (workspace))
69
65
70
- vstorea! (Vec{vec_len, T}, pointer ( workspace, linear_base), value[i, j], linear_offset)
66
+ @inbounds workspace[ linear_base + linear_offset - 1 ] = value[i,j]
71
67
end
72
68
end
73
69
end
0 commit comments