Skip to content

Commit 827298b

Browse files
reorganise multithreaded code
1 parent 93292d2 commit 827298b

File tree

5 files changed

+211
-172
lines changed

5 files changed

+211
-172
lines changed

src/GPE.jl

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -215,20 +215,6 @@ function update_mll!(gp::GPE; noise::Bool=true, domean::Bool=true, kern::Bool=tr
215215
gp
216216
end
217217

218-
function _dmll_kern_row!(dmll, buf, k, ααinvcKI, X, data, j, dim, nparams)
219-
# diagonal
220-
dKij_dθ!(buf, k, X, X, data, j, j, dim, nparams)
221-
@inbounds for iparam in 1:nparams
222-
dmll[iparam] += buf[iparam] * ααinvcKI[j, j] / 2.0
223-
end
224-
# off-diagonal
225-
@inbounds for i in 1:j-1
226-
dKij_dθ!(buf, k, X, X, data, i, j, dim, nparams)
227-
@simd for iparam in 1:nparams
228-
dmll[iparam] += buf[iparam] * ααinvcKI[i, j]
229-
end
230-
end
231-
end
232218
"""
233219
dmll_kern!((dmll::AbstractVector, k::Kernel, X::AbstractMatrix, data::KernelData, ααinvcKI::AbstractMatrix))
234220
@@ -241,20 +227,20 @@ function dmll_kern!(dmll::AbstractVector, k::Kernel, X::AbstractMatrix, data::Ke
241227
@assert nparams == length(dmll)
242228
dK_buffer = Vector{Float64}(undef, nparams)
243229
dmll[:] .= 0.0
244-
# make a copy per thread for objects that are potentially not thread-safe:
245-
kcopies = [deepcopy(k) for _ in 1:Threads.nthreads()]
246-
buffercopies = [similar(dK_buffer) for _ in 1:Threads.nthreads()]
247-
dmllcopies = [deepcopy(dmll) for _ in 1:Threads.nthreads()]
248-
249-
@inbounds Threads.@threads for j in 1:nobs
250-
kthread = kcopies[Threads.threadid()]
251-
bufthread = buffercopies[Threads.threadid()]
252-
dmllthread = dmllcopies[Threads.threadid()]
253-
_dmll_kern_row!(dmllthread, bufthread, kthread,
254-
ααinvcKI, X, data, j, dim, nparams)
230+
@inbounds for j in 1:nobs
231+
# diagonal
232+
dKij_dθ!(dK_buffer, k, X, X, data, j, j, dim, nparams)
233+
for iparam in 1:nparams
234+
dmll[iparam] += dK_buffer[iparam] * ααinvcKI[j, j] / 2.0
235+
end
236+
# off-diagonal
237+
for i in j+1:nobs
238+
dKij_dθ!(dK_buffer, k, X, X, data, i, j, dim, nparams)
239+
@simd for iparam in 1:nparams
240+
dmll[iparam] += dK_buffer[iparam] * ααinvcKI[i, j]
241+
end
242+
end
255243
end
256-
257-
dmll[:] = sum(dmllcopies) # sum up the results from all threads
258244
return dmll
259245
end
260246

src/GaussianProcesses.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ const invΦ = norminvcdf
2727
# all package code should be included here
2828
include("means/means.jl")
2929
include("kernels/kernels.jl")
30+
include("covariance/covariance.jl")
3031
include("likelihoods/likelihoods.jl")
3132
include("common.jl")
3233
include("utils.jl")

src/covariance/covariance.jl

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""
2+
cov(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix)
3+
4+
Create covariance matrix from kernel `k` and matrices of observations `X1` and `X2`, where
5+
each column is an observation.
6+
"""
7+
function cov(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData=EmptyData())
8+
dim1, nobs1 = size(X1)
9+
dim2, nobs2 = size(X2)
10+
dim1==dim2 || throw(ArgumentError("X1 and X2 must have same dimension"))
11+
cK = Array{promote_type(eltype(X1), eltype(X2))}(undef, nobs1, nobs2)
12+
cov!(cK, k, X1, X2, data)
13+
end
14+
15+
"""
16+
cov(k::Kernel, X::AbstractMatrix[, data::KernelData = EmptyData()])
17+
18+
Create covariance matrix from kernel `k`, matrix of observations `X`, where each column is
19+
an observation, and kernel data `data` constructed from input observations.
20+
"""
21+
cov(k::Kernel, X::AbstractMatrix, data::KernelData=EmptyData()) = cov(k, X, X, data)
22+
23+
function cov!(cK::AbstractMatrix, k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData=EmptyData())
24+
dim, nobs1 = size(X1)
25+
dim, nobs2 = size(X2)
26+
cK .= cov_ij.(Ref(k), Ref(X1), Ref(X2), Ref(data), 1:nobs1, (1:nobs2)', dim)
27+
end
28+
cov!(cK::AbstractMatrix, k::Kernel, X::AbstractMatrix, data::KernelData=EmptyData()) = cov!(cK, k, X, X, data)
29+
30+
@inline cov_ij(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, i::Int, j::Int, dim::Int) = cov(k, @view(X1[:,i]), @view(X2[:,j]))
31+
# the default is to drop the KernelData
32+
@inline cov_ij(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData, i::Int, j::Int, dim::Int) = cov_ij(k, X1, X2, i, j, dim)
33+
34+
############################
35+
##### Kernel Gradients #####
36+
############################
37+
@inline @inbounds function dKij_dθ!(dK::AbstractVector, kern::Kernel, X1::AbstractMatrix, X2::AbstractMatrix,
38+
data::KernelData, i::Int, j::Int, dim::Int, npars::Int)
39+
for iparam in 1:npars
40+
dK[iparam] = dKij_dθp(kern, X1, X2, data, i, j, iparam, dim)
41+
end
42+
end
43+
44+
# Calculates the stack [dk / dθᵢ] of kernel matrix gradients
45+
function grad_stack!(stack::AbstractArray, k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData)
46+
@inbounds for p in 1:num_params(k)
47+
grad_slice!(view(stack, :, :, p), k, X1, X2, data, p)
48+
end
49+
stack
50+
end
51+
52+
grad_stack!(stack::AbstractArray, k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix) =
53+
grad_stack!(stack, k, X1, X2, KernelData(k, X1, X2))
54+
55+
grad_stack(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix) = grad_stack(k, X1, X2, KernelData(k, X1, X2))
56+
57+
function grad_stack(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData)
58+
nobs1 = size(X1, 2)
59+
nobs2 = size(X2, 2)
60+
stack = Array{eltype(X)}(undef, nobs1, nobs2, num_params(k))
61+
grad_stack!(stack, k, X1, X2, data)
62+
end
63+
64+
function grad_slice!(dK::AbstractMatrix, k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData, p::Int)
65+
dim, nobs1 = size(X1)
66+
dim, nobs2 = size(X2)
67+
dK .= dKij_dθp.(Ref(k), Ref(X1), Ref(X2), Ref(data), 1:nobs1, (1:nobs2)', p, dim)
68+
end
69+
70+
@inline function dKij_dθp(k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData, i::Int, j::Int, p::Int, dim::Int)
71+
return dKij_dθp(k, X1, X2, i, j, p, dim)
72+
end

src/covariance/multithreaded.jl

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
function _cov_row!(cK, k, X::AbstractMatrix, data, j, dim)
2+
cK[j,j] = cov_ij(k, X, X, data, j, j, dim)
3+
@inbounds for i in 1:j-1
4+
cK[i,j] = cov_ij(k, X, X, data, i, j, dim)
5+
cK[j,i] = cK[i,j]
6+
end
7+
end
8+
function cov!(cK::Matrix, k::Kernel, X::Matrix, data::KernelData=EmptyData())
9+
dim, nobs = size(X)
10+
(nobs,nobs) == size(cK) || throw(ArgumentError("cK has size $(size(cK)) and X has size $(size(X))"))
11+
kcopies = [deepcopy(k) for _ in 1:Threads.nthreads()] # in case k is not threadsafe (e.g. ADkernel)
12+
@inbounds Threads.@threads for j in 1:nobs
13+
kthread = kcopies[Threads.threadid()]
14+
_cov_row!(cK, k, X, data, j, dim)
15+
end
16+
return cK
17+
end
18+
function _cov_row!(cK, k, X1::AbstractMatrix, X2::AbstractMatrix, data, i, dim, nobs2)
19+
@inbounds for j in 1:nobs2
20+
cK[i,j] = cov_ij(k, X1, X2, data, i, j, dim)
21+
end
22+
end
23+
"""
24+
cov!(cK::AbstractMatrix, k::Kernel, X1::AbstractMatrix, X2::AbstractMatrix, data::KernelData=EmptyData())
25+
26+
Like [`cov(k, X1, X2)`](@ref), but stores the result in `cK` rather than a new matrix.
27+
"""
28+
function cov!(cK::Matrix, k::Kernel, X1::Matrix, X2::Matrix, data::KernelData=EmptyData())
29+
if X1 === X2
30+
return cov!(cK, k, X1, data)
31+
end
32+
dim1, nobs1 = size(X1)
33+
dim2, nobs2 = size(X2)
34+
dim1==dim2 || throw(ArgumentError("X1 and X2 must have same dimension"))
35+
dim = size(X1, 1)
36+
(nobs1,nobs2) == size(cK) || throw(ArgumentError("cK has size $(size(cK)) X1 $(size(X1)) and X2 $(size(X2))"))
37+
kcopies = [deepcopy(k) for _ in 1:Threads.nthreads()]
38+
@inbounds Threads.@threads for i in 1:nobs1
39+
kthread = kcopies[Threads.threadid()]
40+
_cov_row!(cK, kthread, X1, X2, data, i, dim, nobs2)
41+
end
42+
return cK
43+
end
44+
45+
function _grad_slice_row!(dK, k, X::AbstractMatrix, data, j, p, dim)
46+
dK[j,j] = dKij_dθp(k,X,X,data,j,j,p,dim)
47+
@inbounds @simd for i in 1:(j-1)
48+
dK[i,j] = dKij_dθp(k,X,X,data,i,j,p,dim)
49+
dK[j,i] = dK[i,j]
50+
end
51+
end
52+
function grad_slice!(dK::AbstractMatrix, k::Kernel, X::Matrix, data::KernelData, p::Int)
53+
dim, nobs = size(X)
54+
(nobs,nobs) == size(dK) || throw(ArgumentError("dK has size $(size(dK)) and X has size $(size(X))"))
55+
kcopies = [deepcopy(k) for _ in 1:Threads.nthreads()]
56+
@inbounds Threads.@threads for j in 1:nobs
57+
kthread = kcopies[Threads.threadid()]
58+
_grad_slice_row!(dK, kthread, X, data, j, p, dim)
59+
end
60+
return dK
61+
end
62+
function _grad_slice_row!(dK, k, X1::AbstractMatrix, X2::AbstractMatrix, data, i, p, dim, nobs2)
63+
@inbounds @simd for j in 1:nobs2
64+
dK[i,j] = dKij_dθp(k,X1,X2,data,i,j,p,dim)
65+
end
66+
end
67+
function grad_slice!(dK::AbstractMatrix, k::Kernel, X1::Matrix, X2::Matrix, data::KernelData, p::Int)
68+
if X1 === X2
69+
return grad_slice!(dK, k, X1, data, p)
70+
end
71+
dim1, nobs1 = size(X1)
72+
dim2, nobs2 = size(X2)
73+
dim1==dim2 || throw(ArgumentError("X1 and X2 must have same dimension"))
74+
(nobs1,nobs2) == size(dK) || throw(ArgumentError("dK has size $(size(dK)) X1 $(size(X1)) and X2 $(size(X2))"))
75+
dim=dim1
76+
kcopies = [deepcopy(k) for _ in 1:Threads.nthreads()]
77+
@inbounds Threads.@threads for i in 1:nobs1
78+
kthread = kcopies[Threads.threadid()]
79+
_grad_slice_row!(dK, kthread, X1, X2, data, i, p, dim, nobs2)
80+
end
81+
return dK
82+
end
83+
84+
function _dmll_kern_row!(dmll, buf, k, ααinvcKI, X, data, j, dim, nparams)
85+
# diagonal
86+
dKij_dθ!(buf, k, X, X, data, j, j, dim, nparams)
87+
@inbounds for iparam in 1:nparams
88+
dmll[iparam] += buf[iparam] * ααinvcKI[j, j] / 2.0
89+
end
90+
# off-diagonal
91+
@inbounds for i in 1:j-1
92+
dKij_dθ!(buf, k, X, X, data, i, j, dim, nparams)
93+
@simd for iparam in 1:nparams
94+
dmll[iparam] += buf[iparam] * ααinvcKI[i, j]
95+
end
96+
end
97+
end
98+
"""
99+
dmll_kern!((dmll::AbstractVector, k::Kernel, X::AbstractMatrix, data::KernelData, ααinvcKI::AbstractMatrix))
100+
101+
Derivative of the marginal log likelihood log p(Y|θ) with respect to the kernel hyperparameters.
102+
"""
103+
function dmll_kern!(dmll::AbstractVector, k::Kernel, X::Matrix, data::KernelData,
104+
ααinvcKI::Matrix{Float64}, covstrat::CovarianceStrategy)
105+
dim, nobs = size(X)
106+
nparams = num_params(k)
107+
@assert nparams == length(dmll)
108+
dK_buffer = Vector{Float64}(undef, nparams)
109+
dmll[:] .= 0.0
110+
# make a copy per thread for objects that are potentially not thread-safe:
111+
kcopies = [deepcopy(k) for _ in 1:Threads.nthreads()]
112+
buffercopies = [similar(dK_buffer) for _ in 1:Threads.nthreads()]
113+
dmllcopies = [deepcopy(dmll) for _ in 1:Threads.nthreads()]
114+
115+
@inbounds Threads.@threads for j in 1:nobs
116+
kthread = kcopies[Threads.threadid()]
117+
bufthread = buffercopies[Threads.threadid()]
118+
dmllthread = dmllcopies[Threads.threadid()]
119+
_dmll_kern_row!(dmllthread, bufthread, kthread,
120+
ααinvcKI, X, data, j, dim, nparams)
121+
end
122+
123+
dmll[:] = sum(dmllcopies) # sum up the results from all threads
124+
return dmll
125+
end

0 commit comments

Comments
 (0)