|
1 | 1 | # reversing |
2 | 2 |
|
3 | | -# the kernel works by treating the array as 1d. after reversing by dimension x an element at |
4 | | -# pos [i1, i2, i3, ... , i{x}, ..., i{n}] will be at |
5 | | -# pos [i1, i2, i3, ... , d{x} - i{x} + 1, ..., i{n}] where d{x} is the size of dimension x |
6 | | - |
7 | | -# out-of-place version, copying a single value per thread from input to output |
8 | | -function _reverse(input::AnyCuArray{T, N}, output::AnyCuArray{T, N}; |
9 | | - dims=1:ndims(input)) where {T, N} |
10 | | - @assert size(input) == size(output) |
11 | | - rev_dims = ntuple((d)-> d in dims && size(input, d) > 1, N) |
12 | | - ref = size(input) .+ 1 |
13 | | - # converts an ND-index in the data array to the linear index |
14 | | - lin_idx = LinearIndices(input) |
15 | | - # converts a linear index in a reduced array to an ND-index, but using the reduced size |
16 | | - nd_idx = CartesianIndices(input) |
17 | | - |
18 | | - ## COV_EXCL_START |
19 | | - function kernel(input::AbstractArray{T, N}, output::AbstractArray{T, N}) where {T, N} |
20 | | - offset_in = blockDim().x * (blockIdx().x - 1i32) |
21 | | - index_in = offset_in + threadIdx().x |
22 | | - |
23 | | - @inbounds if index_in <= length(input) |
24 | | - idx = Tuple(nd_idx[index_in]) |
25 | | - idx = ifelse.(rev_dims, ref .- idx, idx) |
26 | | - index_out = lin_idx[idx...] |
27 | | - output[index_out] = input[index_in] |
28 | | - end |
29 | | - |
30 | | - return |
31 | | - end |
32 | | - ## COV_EXCL_STOP |
33 | | - |
34 | | - nthreads = 256 |
35 | | - nblocks = cld(length(input), nthreads) |
36 | | - |
37 | | - @cuda threads=nthreads blocks=nblocks kernel(input, output) |
38 | | -end |
39 | | - |
40 | | -# in-place version, swapping elements on half the number of threads |
41 | | -function _reverse!(data::AnyCuArray{T, N}; dims=1:ndims(data)) where {T, N} |
42 | | - rev_dims = ntuple((d)-> d in dims && size(data, d) > 1, N) |
43 | | - half_dim = findlast(rev_dims) |
44 | | - if isnothing(half_dim) |
45 | | - # no reverse operation needed at all in this case. |
46 | | - return |
47 | | - end |
48 | | - ref = size(data) .+ 1 |
49 | | - # converts an ND-index in the data array to the linear index |
50 | | - lin_idx = LinearIndices(data) |
51 | | - reduced_size = ntuple((d)->ifelse(d==half_dim, cld(size(data,d),2), size(data,d)), N) |
52 | | - reduced_length = prod(reduced_size) |
53 | | - # converts a linear index in a reduced array to an ND-index, but using the reduced size |
54 | | - nd_idx = CartesianIndices(reduced_size) |
55 | | - |
56 | | - ## COV_EXCL_START |
57 | | - function kernel(data::AbstractArray{T, N}) where {T, N} |
58 | | - offset_in = blockDim().x * (blockIdx().x - 1i32) |
59 | | - |
60 | | - index_in = offset_in + threadIdx().x |
61 | | - |
62 | | - @inbounds if index_in <= reduced_length |
63 | | - idx = Tuple(nd_idx[index_in]) |
64 | | - index_in = lin_idx[idx...] |
65 | | - idx = ifelse.(rev_dims, ref .- idx, idx) |
66 | | - index_out = lin_idx[idx...] |
67 | | - |
68 | | - if index_in < index_out |
69 | | - temp = data[index_out] |
70 | | - data[index_out] = data[index_in] |
71 | | - data[index_in] = temp |
72 | | - end |
73 | | - end |
74 | | - |
75 | | - return |
76 | | - end |
77 | | - ## COV_EXCL_STOP |
78 | | - |
79 | | - # NOTE: we launch slightly more than half the number of elements in the array as threads. |
80 | | - # The last non-singleton dimension along which to reverse is used to define how the array is split. |
81 | | - # Only the middle row in case of an odd array dimension could cause trouble, but this is prevented by |
82 | | - # ignoring the threads that cross the mid-point |
83 | | - |
84 | | - nthreads = 256 |
85 | | - nblocks = cld(prod(reduced_size), nthreads) |
86 | | - |
87 | | - @cuda threads=nthreads blocks=nblocks kernel(data) |
88 | | -end |
| 3 | +# # the kernel works by treating the array as 1d. after reversing by dimension x an element at |
| 4 | +# # pos [i1, i2, i3, ... , i{x}, ..., i{n}] will be at |
| 5 | +# # pos [i1, i2, i3, ... , d{x} - i{x} + 1, ..., i{n}] where d{x} is the size of dimension x |
| 6 | + |
| 7 | +# # out-of-place version, copying a single value per thread from input to output |
| 8 | +# function _reverse(input::AnyCuArray{T, N}, output::AnyCuArray{T, N}; |
| 9 | +# dims=1:ndims(input)) where {T, N} |
| 10 | +# @assert size(input) == size(output) |
| 11 | +# rev_dims = ntuple((d)-> d in dims && size(input, d) > 1, N) |
| 12 | +# ref = size(input) .+ 1 |
| 13 | +# # converts an ND-index in the data array to the linear index |
| 14 | +# lin_idx = LinearIndices(input) |
| 15 | +# # converts a linear index in a reduced array to an ND-index, but using the reduced size |
| 16 | +# nd_idx = CartesianIndices(input) |
| 17 | + |
| 18 | +# ## COV_EXCL_START |
| 19 | +# function kernel(input::AbstractArray{T, N}, output::AbstractArray{T, N}) where {T, N} |
| 20 | +# offset_in = blockDim().x * (blockIdx().x - 1i32) |
| 21 | +# index_in = offset_in + threadIdx().x |
| 22 | + |
| 23 | +# @inbounds if index_in <= length(input) |
| 24 | +# idx = Tuple(nd_idx[index_in]) |
| 25 | +# idx = ifelse.(rev_dims, ref .- idx, idx) |
| 26 | +# index_out = lin_idx[idx...] |
| 27 | +# output[index_out] = input[index_in] |
| 28 | +# end |
| 29 | + |
| 30 | +# return |
| 31 | +# end |
| 32 | +# ## COV_EXCL_STOP |
| 33 | + |
| 34 | +# nthreads = 256 |
| 35 | +# nblocks = cld(length(input), nthreads) |
| 36 | + |
| 37 | +# @cuda threads=nthreads blocks=nblocks kernel(input, output) |
| 38 | +# end |
| 39 | + |
| 40 | +# # in-place version, swapping elements on half the number of threads |
| 41 | +# function _reverse!(data::AnyCuArray{T, N}; dims=1:ndims(data)) where {T, N} |
| 42 | +# rev_dims = ntuple((d)-> d in dims && size(data, d) > 1, N) |
| 43 | +# half_dim = findlast(rev_dims) |
| 44 | +# if isnothing(half_dim) |
| 45 | +# # no reverse operation needed at all in this case. |
| 46 | +# return |
| 47 | +# end |
| 48 | +# ref = size(data) .+ 1 |
| 49 | +# # converts an ND-index in the data array to the linear index |
| 50 | +# lin_idx = LinearIndices(data) |
| 51 | +# reduced_size = ntuple((d)->ifelse(d==half_dim, cld(size(data,d),2), size(data,d)), N) |
| 52 | +# reduced_length = prod(reduced_size) |
| 53 | +# # converts a linear index in a reduced array to an ND-index, but using the reduced size |
| 54 | +# nd_idx = CartesianIndices(reduced_size) |
| 55 | + |
| 56 | +# ## COV_EXCL_START |
| 57 | +# function kernel(data::AbstractArray{T, N}) where {T, N} |
| 58 | +# offset_in = blockDim().x * (blockIdx().x - 1i32) |
| 59 | + |
| 60 | +# index_in = offset_in + threadIdx().x |
| 61 | + |
| 62 | +# @inbounds if index_in <= reduced_length |
| 63 | +# idx = Tuple(nd_idx[index_in]) |
| 64 | +# index_in = lin_idx[idx...] |
| 65 | +# idx = ifelse.(rev_dims, ref .- idx, idx) |
| 66 | +# index_out = lin_idx[idx...] |
| 67 | + |
| 68 | +# if index_in < index_out |
| 69 | +# temp = data[index_out] |
| 70 | +# data[index_out] = data[index_in] |
| 71 | +# data[index_in] = temp |
| 72 | +# end |
| 73 | +# end |
| 74 | + |
| 75 | +# return |
| 76 | +# end |
| 77 | +# ## COV_EXCL_STOP |
| 78 | + |
| 79 | +# # NOTE: we launch slightly more than half the number of elements in the array as threads. |
| 80 | +# # The last non-singleton dimension along which to reverse is used to define how the array is split. |
| 81 | +# # Only the middle row in case of an odd array dimension could cause trouble, but this is prevented by |
| 82 | +# # ignoring the threads that cross the mid-point |
| 83 | + |
| 84 | +# nthreads = 256 |
| 85 | +# nblocks = cld(prod(reduced_size), nthreads) |
| 86 | + |
| 87 | +# @cuda threads=nthreads blocks=nblocks kernel(data) |
| 88 | +# end |
89 | 89 |
|
90 | 90 |
|
91 | 91 | # n-dimensional API |
92 | 92 |
|
93 | | -function Base.reverse!(data::AnyCuArray{T, N}; dims=:) where {T, N} |
94 | | - if isa(dims, Colon) |
95 | | - dims = 1:ndims(data) |
96 | | - end |
97 | | - if !applicable(iterate, dims) |
98 | | - throw(ArgumentError("dimension $dims is not an iterable")) |
99 | | - end |
100 | | - if !all(1 .≤ dims .≤ ndims(data)) |
101 | | - throw(ArgumentError("dimension $dims is not 1 ≤ $dims ≤ $(ndims(data))")) |
102 | | - end |
103 | | - |
104 | | - _reverse!(data; dims=dims) |
105 | | - |
106 | | - return data |
107 | | -end |
108 | | - |
109 | | -# out-of-place |
110 | | -function Base.reverse(input::AnyCuArray{T, N}; dims=:) where {T, N} |
111 | | - if isa(dims, Colon) |
112 | | - dims = 1:ndims(input) |
113 | | - end |
114 | | - if !applicable(iterate, dims) |
115 | | - throw(ArgumentError("dimension $dims is not an iterable")) |
116 | | - end |
117 | | - if !all(1 .≤ dims .≤ ndims(input)) |
118 | | - throw(ArgumentError("dimension $dims is not 1 ≤ $dims ≤ $(ndims(input))")) |
119 | | - end |
120 | | - |
121 | | - if all(size(input)[[dims...]].==1) |
122 | | - # no reverse operation needed at all in this case. |
123 | | - return copy(input) |
124 | | - else |
125 | | - output = similar(input) |
126 | | - _reverse(input, output; dims=dims) |
127 | | - return output |
128 | | - end |
129 | | -end |
| 93 | +# function Base.reverse!(data::AnyCuArray{T, N}; dims=:) where {T, N} |
| 94 | +# if isa(dims, Colon) |
| 95 | +# dims = 1:ndims(data) |
| 96 | +# end |
| 97 | +# if !applicable(iterate, dims) |
| 98 | +# throw(ArgumentError("dimension $dims is not an iterable")) |
| 99 | +# end |
| 100 | +# if !all(1 .≤ dims .≤ ndims(data)) |
| 101 | +# throw(ArgumentError("dimension $dims is not 1 ≤ $dims ≤ $(ndims(data))")) |
| 102 | +# end |
| 103 | + |
| 104 | +# _reverse!(data; dims=dims) |
| 105 | + |
| 106 | +# return data |
| 107 | +# end |
| 108 | + |
| 109 | +# # out-of-place |
| 110 | +# function Base.reverse(input::AnyCuArray{T, N}; dims=:) where {T, N} |
| 111 | +# if isa(dims, Colon) |
| 112 | +# dims = 1:ndims(input) |
| 113 | +# end |
| 114 | +# if !applicable(iterate, dims) |
| 115 | +# throw(ArgumentError("dimension $dims is not an iterable")) |
| 116 | +# end |
| 117 | +# if !all(1 .≤ dims .≤ ndims(input)) |
| 118 | +# throw(ArgumentError("dimension $dims is not 1 ≤ $dims ≤ $(ndims(input))")) |
| 119 | +# end |
| 120 | + |
| 121 | +# if all(size(input)[[dims...]].==1) |
| 122 | +# # no reverse operation needed at all in this case. |
| 123 | +# return copy(input) |
| 124 | +# else |
| 125 | +# output = similar(input) |
| 126 | +# _reverse(input, output; dims=dims) |
| 127 | +# return output |
| 128 | +# end |
| 129 | +# end |
130 | 130 |
|
131 | 131 |
|
132 | 132 | # 1-dimensional API |
133 | 133 |
|
134 | | -# in-place |
135 | | -Base.@propagate_inbounds function Base.reverse!(data::AnyCuVector{T}, start::Integer, |
136 | | - stop::Integer=length(data)) where {T} |
137 | | - _reverse!(view(data, start:stop)) |
138 | | - return data |
139 | | -end |
| 134 | +# # in-place |
| 135 | +# Base.@propagate_inbounds function Base.reverse!(data::AnyCuVector{T}, start::Integer, |
| 136 | +# stop::Integer=length(data)) where {T} |
| 137 | +# _reverse!(view(data, start:stop)) |
| 138 | +# return data |
| 139 | +# end |
140 | 140 |
|
141 | | -Base.reverse!(data::AnyCuVector{T}) where {T} = @inbounds reverse!(data, 1, length(data)) |
| 141 | +# Base.reverse!(data::AnyCuVector{T}) where {T} = @inbounds reverse!(data, 1, length(data)) |
142 | 142 |
|
143 | | -# out-of-place |
144 | | -Base.@propagate_inbounds function Base.reverse(input::AnyCuVector{T}, start::Integer, |
145 | | - stop::Integer=length(input)) where {T} |
146 | | - output = similar(input) |
| 143 | +# # out-of-place |
| 144 | +# Base.@propagate_inbounds function Base.reverse(input::AnyCuVector{T}, start::Integer, |
| 145 | +# stop::Integer=length(input)) where {T} |
| 146 | +# output = similar(input) |
147 | 147 |
|
148 | | - start > 1 && copyto!(output, 1, input, 1, start-1) |
149 | | - _reverse(view(input, start:stop), view(output, start:stop)) |
150 | | - stop < length(input) && copyto!(output, stop+1, input, stop+1) |
| 148 | +# start > 1 && copyto!(output, 1, input, 1, start-1) |
| 149 | +# _reverse(view(input, start:stop), view(output, start:stop)) |
| 150 | +# stop < length(input) && copyto!(output, stop+1, input, stop+1) |
151 | 151 |
|
152 | | - return output |
153 | | -end |
| 152 | +# return output |
| 153 | +# end |
154 | 154 |
|
155 | | -Base.reverse(data::AnyCuVector{T}) where {T} = @inbounds reverse(data, 1, length(data)) |
| 155 | +# Base.reverse(data::AnyCuVector{T}) where {T} = @inbounds reverse(data, 1, length(data)) |
0 commit comments