Skip to content

Commit e857158

Browse files
Merge pull request #35 from MikaelSlevinsky/feat-triangular-harmonics
Start triangular harmonics, support experimental native multithreading
2 parents bc60c12 + d308a75 commit e857158

15 files changed

+391
-37
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
docs/build/
2-
docs/site/
2+
docs/site/

LICENSE.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
The FastTransforms.jl package is licensed under the MIT "Expat" License:
22

3-
> Copyright (c) 2017: Richard Mikael Slevinsky.
3+
> Copyright (c) 2016-2018: Richard Mikael Slevinsky and other contributors:
4+
>
5+
> https://github.com/MikaelSlevinsky/FastTransforms.jl/graphs/contributors
46
>
57
> Permission is hereby granted, free of charge, to any person obtaining
68
> a copy of this software and associated documentation files (the

README.md

+6-4
Original file line numberDiff line numberDiff line change
@@ -166,14 +166,16 @@ As with other fast transforms, `plan_sph2fourier` saves effort by caching the pr
166166

167167
[1] B. Alpert and V. Rokhlin. <a href="http://dx.doi.org/10.1137/0912009">A fast algorithm for the evaluation of Legendre expansions</a>, *SIAM J. Sci. Stat. Comput.*, **12**:158—179, 1991.
168168

169-
[2] N. Hale and A. Townsend. <a href="http://dx.doi.org/10.1137/130932223">A fast, simple, and stable Chebyshev—Legendre transform using and asymptotic formula</a>, *SIAM J. Sci. Comput.*, **36**:A148—A167, 2014.
169+
[2] N. Hale and A. Townsend. <a href="http://dx.doi.org/10.1137/130932223">A fast, simple, and stable Chebyshev—Legendre transform using an asymptotic formula</a>, *SIAM J. Sci. Comput.*, **36**:A148—A167, 2014.
170170

171171
[3] J. Keiner. <a href="http://dx.doi.org/10.1137/070703065">Computing with expansions in Gegenbauer polynomials</a>, *SIAM J. Sci. Comput.*, **31**:2151—2171, 2009.
172172

173173
[4] D. Ruiz—Antolín and A. Townsend. <a href="https://arxiv.org/abs/1701.04492">A nonuniform fast Fourier transform based on low rank approximation</a>, arXiv:1701.04492, 2017.
174174

175-
[5] R. M. Slevinsky. <a href="https://doi.org/10.1093/imanum/drw070">On the use of Hahn's asymptotic formula and stabilized recurrence for a fast, simple, and stable Chebyshev—Jacobi transform</a>, in press at *IMA J. Numer. Anal.*, 2017.
175+
[5] R. M. Slevinsky. <a href="https://doi.org/10.1093/imanum/drw070">On the use of Hahn's asymptotic formula and stabilized recurrence for a fast, simple, and stable Chebyshev—Jacobi transform</a>, *IMA J. Numer. Anal.*, **38**:102—124, 2018.
176176

177-
[6] R. M. Slevinsky. <a href="https://arxiv.org/abs/1705.05448">Fast and backward stable transforms between spherical harmonic expansions and bivariate Fourier series</a>, arXiv:1705.05448, 2017.
177+
[6] R. M. Slevinsky. <a href="https://doi.org/10.1016/j.acha.2017.11.001">Fast and backward stable transforms between spherical harmonic expansions and bivariate Fourier series</a>, in press at *Appl. Comput. Harmon. Anal.*, 2017.
178178

179-
[7] A. Townsend, M. Webb, and S. Olver. <a href="https://doi.org/10.1090/mcom/3277">Fast polynomial transforms based on Toeplitz and Hankel matrices</a>, in press at *Math. Comp.*, 2017.
179+
[7] R. M. Slevinsky, <a href="https://arxiv.org/abs/1711.07866">Conquering the pre-computation in two-dimensional harmonic polynomial transforms</a>, arXiv:1711.07866, 2017.
180+
181+
[8] A. Townsend, M. Webb, and S. Olver. <a href="https://doi.org/10.1090/mcom/3277">Fast polynomial transforms based on Toeplitz and Hankel matrices</a>, in press at *Math. Comp.*, 2017.

REQUIRE

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
julia 0.6
22
ToeplitzMatrices 0.2
3-
HierarchicalMatrices 0.0.2
3+
HierarchicalMatrices 0.1
44
LowRankApprox 0.0.2
55
ProgressMeter 0.3.4
66
SpecialFunctions 0.3.4

src/FastTransforms.jl

+6
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ export SlowSphericalHarmonicPlan, FastSphericalHarmonicPlan, ThinSphericalHarmon
4242
export sph2fourier, fourier2sph, plan_sph2fourier
4343
export sphones, sphzeros, sphrand, sphrandn, sphevaluate
4444

45+
export SlowTriangularHarmonicPlan
46+
export tri2cheb, cheb2tri, plan_tri2cheb
47+
export triones, trizeros, trirand, trirandn, trievaluate
48+
4549
# Other module methods and constants:
4650
#export ChebyshevJacobiPlan, jac2cheb, cheb2jac
4751
#export sqrtpi, pochhammer, stirlingseries, stirlingremainder, Aratio, Cratio, Anαβ
@@ -51,6 +55,7 @@ export sphones, sphzeros, sphrand, sphrandn, sphevaluate
5155
#export fejer2, fejer_plan2, fejerweights2
5256
#export RecurrencePlan, forward_recurrence!, backward_recurrence
5357

58+
include("stepthreading.jl")
5459
include("fftBigFloat.jl")
5560
include("specialfunctions.jl")
5661
include("clenshawcurtis.jl")
@@ -82,6 +87,7 @@ jac2jac(x...)=th_jac2jac(x...)
8287

8388
include("hierarchical.jl")
8489
include("SphericalHarmonics/SphericalHarmonics.jl")
90+
include("TriangularHarmonics/TriangularHarmonics.jl")
8591

8692
include("gaunt.jl")
8793

src/SphericalHarmonics/fastplan.jl

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ struct FastSphericalHarmonicPlan{T} <: SphericalHarmonicPlan{T}
88
B::Matrix{T}
99
end
1010

11-
function FastSphericalHarmonicPlan{T}(A::Matrix{T}, L::Int; opts...)
11+
function FastSphericalHarmonicPlan(A::Matrix{T}, L::Int; opts...) where T
1212
M, N = size(A)
1313
n = (N+1)÷2
14-
RP = RotationPlan(T, n-1)
14+
RP = RotationPlan(T, M-1)
1515
p1 = plan_normleg2cheb(A)
1616
p2 = plan_normleg12cheb2(A)
1717
p1inv = plan_cheb2normleg(A)
@@ -77,7 +77,7 @@ function Base.At_mul_B!(Y::Matrix, FP::FastSphericalHarmonicPlan, X::Matrix)
7777
At_mul_B_col_J!(Y, BF[J-1], B, 2J)
7878
At_mul_B_col_J!(Y, BF[J-1], B, 2J+1)
7979
end
80-
zero_spurious_modes!(Y)
80+
sph_zero_spurious_modes!(Y)
8181
end
8282

8383
Base.Ac_mul_B!(Y::Matrix, FP::FastSphericalHarmonicPlan, X::Matrix) = At_mul_B!(Y, FP, X)

src/SphericalHarmonics/slowplan.jl

+70-9
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import Base.LinAlg: Givens, AbstractRotation
22

33
### These three A_mul_B! should be in Base, but for the time being they do not add methods to Base.A_mul_B!; they add methods to the internal A_mul_B!.
44

5-
function A_mul_B!{T<:Real}(G::Givens{T}, A::AbstractVecOrMat)
5+
function A_mul_B!(G::Givens{T}, A::AbstractVecOrMat) where T<:Real
66
m, n = size(A, 1), size(A, 2)
77
if G.i2 > m
88
throw(DimensionMismatch("column indices for rotation are outside the matrix"))
@@ -28,7 +28,7 @@ function A_mul_B!(A::AbstractMatrix, G::Givens)
2828
return A
2929
end
3030

31-
function A_mul_B!{T<:Real}(A::AbstractMatrix, G::Givens{T})
31+
function A_mul_B!(A::AbstractMatrix, G::Givens{T}) where T<:Real
3232
m, n = size(A, 1), size(A, 2)
3333
if G.i2 > n
3434
throw(DimensionMismatch("column indices for rotation are outside the matrix"))
@@ -45,7 +45,7 @@ struct Pnmp2toPlm{T} <: AbstractRotation{T}
4545
rotations::Vector{Givens{T}}
4646
end
4747

48-
function Pnmp2toPlm{T}(::Type{T}, n::Int, m::Int)
48+
function Pnmp2toPlm(::Type{T}, n::Int, m::Int) where T
4949
G = Vector{Givens{T}}(n)
5050
@inbounds for= 1:n
5151
c = sqrt(T((2m+2)*(2+2m+3))/T((ℓ+2m+2)*(ℓ+2m+3)))
@@ -75,16 +75,77 @@ end
7575

7676
struct RotationPlan{T} <: AbstractRotation{T}
7777
layers::Vector{Pnmp2toPlm{T}}
78+
snm::Vector{T}
79+
cnm::Vector{T}
7880
end
7981

80-
function RotationPlan{T}(::Type{T}, n::Int)
82+
function RotationPlan(::Type{T}, n::Int) where T
8183
layers = Vector{Pnmp2toPlm{T}}(n-1)
8284
@inbounds for m = 0:n-2
8385
layers[m+1] = Pnmp2toPlm(T, n-1-m, m)
8486
end
85-
RotationPlan(layers)
87+
n = n+1
88+
snm = zeros(T, (n*(n+1))÷2)
89+
cnm = zeros(T, (n*(n+1))÷2)
90+
@inbounds for l = 0:n-1
91+
for m = 0:n-l-1
92+
nums = T((l+1)*(l+2))
93+
numc = T((2*m+2)*(2*l+2*m+5))
94+
den = T((l+2*m+3)*(l+2*m+4))
95+
snm[l+(m*(2*n+1-m))÷2+1] = sqrt(nums/den)
96+
cnm[l+(m*(2*n+1-m))÷2+1] = sqrt(numc/den)
97+
end
98+
end
99+
RotationPlan(layers, snm, cnm)
100+
end
101+
102+
function Base.A_mul_B!(P::RotationPlan, A::AbstractMatrix)
103+
N, M = size(A)
104+
snm = P.snm
105+
cnm = P.cnm
106+
@stepthreads for m = M÷2:-1:2
107+
@inbounds for j = m:-2:2
108+
for l = N-j:-1:1
109+
s = snm[l+(j-2)*(2*N+3-j)÷2]
110+
c = cnm[l+(j-2)*(2*N+3-j)÷2]
111+
a1 = A[l+N*(2*m-1)]
112+
a2 = A[l+2+N*(2*m-1)]
113+
a3 = A[l+N*(2*m)]
114+
a4 = A[l+2+N*(2*m)]
115+
A[l+N*(2*m-1)] = c*a1 + s*a2
116+
A[l+2+N*(2*m-1)] = c*a2 - s*a1
117+
A[l+N*(2*m)] = c*a3 + s*a4
118+
A[l+2+N*(2*m)] = c*a4 - s*a3
119+
end
120+
end
121+
end
122+
A
123+
end
124+
125+
function Base.At_mul_B!(P::RotationPlan, A::AbstractMatrix)
126+
N, M = size(A)
127+
snm = P.snm
128+
cnm = P.cnm
129+
@stepthreads for m = M÷2:-1:2
130+
@inbounds for j = reverse(m:-2:2)
131+
for l = 1:N-j
132+
s = snm[l+(j-2)*(2*N+3-j)÷2]
133+
c = cnm[l+(j-2)*(2*N+3-j)÷2]
134+
a1 = A[l+N*(2*m-1)]
135+
a2 = A[l+2+N*(2*m-1)]
136+
a3 = A[l+N*(2*m)]
137+
a4 = A[l+2+N*(2*m)]
138+
A[l+N*(2*m-1)] = c*a1 - s*a2
139+
A[l+2+N*(2*m-1)] = c*a2 + s*a1
140+
A[l+N*(2*m)] = c*a3 - s*a4
141+
A[l+2+N*(2*m)] = c*a4 + s*a3
142+
end
143+
end
144+
end
145+
A
86146
end
87147

148+
#=
88149
function Base.A_mul_B!(P::RotationPlan, A::AbstractMatrix)
89150
M, N = size(A)
90151
@inbounds for m = N÷2-2:-1:0
@@ -122,6 +183,7 @@ function Base.At_mul_B!(P::RotationPlan, A::AbstractMatrix)
122183
end
123184
A
124185
end
186+
=#
125187

126188
Base.Ac_mul_B!(P::RotationPlan, A::AbstractMatrix) = At_mul_B!(P, A)
127189

@@ -135,10 +197,9 @@ struct SlowSphericalHarmonicPlan{T} <: SphericalHarmonicPlan{T}
135197
B::Matrix{T}
136198
end
137199

138-
function SlowSphericalHarmonicPlan{T}(A::Matrix{T})
200+
function SlowSphericalHarmonicPlan(A::Matrix{T}) where T
139201
M, N = size(A)
140-
n = (N+1)÷2
141-
RP = RotationPlan(T, n-1)
202+
RP = RotationPlan(T, M-1)
142203
p1 = plan_normleg2cheb(A)
143204
p2 = plan_normleg12cheb2(A)
144205
p1inv = plan_cheb2normleg(A)
@@ -177,7 +238,7 @@ function Base.At_mul_B!(Y::Matrix, SP::SlowSphericalHarmonicPlan, X::Matrix)
177238
A_mul_B_col_J!!(Y, p1inv, B, J)
178239
A_mul_B_col_J!!(Y, p1inv, B, J+1)
179240
end
180-
zero_spurious_modes!(At_mul_B!(RP, Y))
241+
sph_zero_spurious_modes!(At_mul_B!(RP, Y))
181242
end
182243

183244
Base.Ac_mul_B!(Y::Matrix, SP::SlowSphericalHarmonicPlan, X::Matrix) = At_mul_B!(Y, SP, X)

src/SphericalHarmonics/sphfunctions.jl

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
function zero_spurious_modes!(A::AbstractMatrix)
1+
function sph_zero_spurious_modes!(A::AbstractMatrix)
22
M, N = size(A)
33
n = N÷2
44
@inbounds for j = 1:n
@@ -10,7 +10,7 @@ function zero_spurious_modes!(A::AbstractMatrix)
1010
A
1111
end
1212

13-
function sphrand{T}(::Type{T}, m::Int, n::Int)
13+
function sphrand(::Type{T}, m::Int, n::Int) where T
1414
A = zeros(T, m, 2n-1)
1515
for i = 1:m
1616
A[i,1] = rand(T)
@@ -24,7 +24,7 @@ function sphrand{T}(::Type{T}, m::Int, n::Int)
2424
A
2525
end
2626

27-
function sphrandn{T}(::Type{T}, m::Int, n::Int)
27+
function sphrandn(::Type{T}, m::Int, n::Int) where T
2828
A = zeros(T, m, 2n-1)
2929
for i = 1:m
3030
A[i,1] = randn(T)
@@ -38,7 +38,7 @@ function sphrandn{T}(::Type{T}, m::Int, n::Int)
3838
A
3939
end
4040

41-
function sphones{T}(::Type{T}, m::Int, n::Int)
41+
function sphones(::Type{T}, m::Int, n::Int) where T
4242
A = zeros(T, m, 2n-1)
4343
for i = 1:m
4444
A[i,1] = one(T)
@@ -52,7 +52,7 @@ function sphones{T}(::Type{T}, m::Int, n::Int)
5252
A
5353
end
5454

55-
sphzeros{T}(::Type{T}, m::Int, n::Int) = zeros(T, m, 2n-1)
55+
sphzeros(::Type{T}, m::Int, n::Int) where T = zeros(T, m, 2n-1)
5656

5757
function normalizecolumns!(A::AbstractMatrix)
5858
m, n = size(A)

src/SphericalHarmonics/synthesisanalysis.jl

+9-9
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ struct SynthesisPlan{T, P1, P2}
55
temp::Vector{T}
66
end
77

8-
function plan_synthesis{T<:fftwNumber}(A::Matrix{T})
8+
function plan_synthesis(A::Matrix{T}) where T<:fftwNumber
99
m, n = size(A)
1010
x = FFTW.FakeArray(T, m)
1111
y = FFTW.FakeArray(T, n)
@@ -22,7 +22,7 @@ struct AnalysisPlan{T, P1, P2}
2222
temp::Vector{T}
2323
end
2424

25-
function plan_analysis{T<:fftwNumber}(A::Matrix{T})
25+
function plan_analysis(A::Matrix{T}) where T<:fftwNumber
2626
m, n = size(A)
2727
x = FFTW.FakeArray(T, m)
2828
y = FFTW.FakeArray(T, n)
@@ -32,7 +32,7 @@ function plan_analysis{T<:fftwNumber}(A::Matrix{T})
3232
AnalysisPlan(planθ, planφ, C, zeros(T, n))
3333
end
3434

35-
function Base.A_mul_B!{T}(Y::Matrix{T}, P::SynthesisPlan{T}, X::Matrix{T})
35+
function Base.A_mul_B!(Y::Matrix{T}, P::SynthesisPlan{T}, X::Matrix{T}) where T
3636
M, N = size(X)
3737

3838
# Column synthesis
@@ -73,7 +73,7 @@ function Base.A_mul_B!{T}(Y::Matrix{T}, P::SynthesisPlan{T}, X::Matrix{T})
7373
Y
7474
end
7575

76-
function Base.A_mul_B!{T}(Y::Matrix{T}, P::AnalysisPlan{T}, X::Matrix{T})
76+
function Base.A_mul_B!(Y::Matrix{T}, P::AnalysisPlan{T}, X::Matrix{T}) where T
7777
M, N = size(X)
7878

7979
# Row analysis
@@ -112,7 +112,7 @@ end
112112

113113

114114

115-
function row_analysis!{T}(P, C, vals::Vector{T})
115+
function row_analysis!(P, C, vals::Vector{T}) where T
116116
n = length(vals)
117117
cfs = scale!(two(T)/n,P*vals)
118118
cfs[1] *= half(T)
@@ -123,7 +123,7 @@ function row_analysis!{T}(P, C, vals::Vector{T})
123123
negateeven!(reverseeven!(A_mul_B!(C, cfs)))
124124
end
125125

126-
function row_synthesis!{T}(P, C, cfs::Vector{T})
126+
function row_synthesis!(P, C, cfs::Vector{T}) where T
127127
n = length(cfs)
128128
Ac_mul_B!(C, reverseeven!(negateeven!(cfs)))
129129
if iseven(n)
@@ -171,17 +171,17 @@ function negateeven!(x::Vector)
171171
x
172172
end
173173

174-
function A_mul_B_col_J!{T}(Y::Matrix{T}, P::r2rFFTWPlan{T}, X::Matrix{T}, J::Int)
174+
function A_mul_B_col_J!(Y::Matrix{T}, P::r2rFFTWPlan{T}, X::Matrix{T}, J::Int) where T
175175
unsafe_execute_col_J!(P, X, Y, J)
176176
return Y
177177
end
178178

179-
function unsafe_execute_col_J!{T<:fftwDouble}(plan::r2rFFTWPlan{T}, X::Matrix{T}, Y::Matrix{T}, J::Int)
179+
function unsafe_execute_col_J!(plan::r2rFFTWPlan{T}, X::Matrix{T}, Y::Matrix{T}, J::Int) where T<:fftwDouble
180180
M = size(X, 1)
181181
ccall((:fftw_execute_r2r, libfftw), Void, (PlanPtr, Ptr{T}, Ptr{T}), plan, pointer(X, M*(J-1)+1), pointer(Y, M*(J-1)+1))
182182
end
183183

184-
function unsafe_execute_col_J!{T<:fftwSingle}(plan::r2rFFTWPlan{T}, X::Matrix{T}, Y::Matrix{T}, J::Int)
184+
function unsafe_execute_col_J!(plan::r2rFFTWPlan{T}, X::Matrix{T}, Y::Matrix{T}, J::Int) where T<:fftwSingle
185185
M = size(X, 1)
186186
ccall((:fftwf_execute_r2r, libfftwf), Void, (PlanPtr, Ptr{T}, Ptr{T}), plan, pointer(X, M*(J-1)+1), pointer(Y, M*(J-1)+1))
187187
end

src/SphericalHarmonics/thinplan.jl

+3-3
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ struct ThinSphericalHarmonicPlan{T} <: SphericalHarmonicPlan{T}
1212
B::Matrix{T}
1313
end
1414

15-
function ThinSphericalHarmonicPlan{T}(A::Matrix{T}, L::Int; opts...)
15+
function ThinSphericalHarmonicPlan(A::Matrix{T}, L::Int; opts...) where T
1616
M, N = size(A)
1717
n = (N+1)÷2
18-
RP = RotationPlan(T, n-1)
18+
RP = RotationPlan(T, M-1)
1919
p1 = plan_normleg2cheb(A)
2020
p2 = plan_normleg12cheb2(A)
2121
p1inv = plan_cheb2normleg(A)
@@ -148,7 +148,7 @@ function Base.At_mul_B!(Y::Matrix, TP::ThinSphericalHarmonicPlan, X::Matrix)
148148
end
149149
end
150150

151-
zero_spurious_modes!(Y)
151+
sph_zero_spurious_modes!(Y)
152152
end
153153

154154
Base.Ac_mul_B!(Y::Matrix, TP::ThinSphericalHarmonicPlan, X::Matrix) = At_mul_B!(Y, TP, X)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
@compat abstract type TriangularHarmonicPlan{T} end
2+
3+
function *(P::TriangularHarmonicPlan, X::AbstractMatrix)
4+
A_mul_B!(zero(X), P, X)
5+
end
6+
7+
function \(P::TriangularHarmonicPlan, X::AbstractMatrix)
8+
At_mul_B!(zero(X), P, X)
9+
end
10+
11+
include("trifunctions.jl")
12+
include("slowplan.jl")
13+
14+
function plan_tri2cheb(A::AbstractMatrix, α, β, γ; opts...)
15+
M, N = size(A)
16+
if M 1023
17+
SlowTriangularHarmonicPlan(A, α, β, γ)
18+
else
19+
ThinTriangularHarmonicPlan(A, α, β, γ; opts...)
20+
end
21+
end
22+
23+
tri2cheb(A::AbstractMatrix, α, β, γ; opts...) = plan_tri2cheb(A, α, β, γ; opts...)*A
24+
cheb2tri(A::AbstractMatrix, α, β, γ; opts...) = plan_tri2cheb(A, α, β, γ; opts...)\A

0 commit comments

Comments
 (0)