Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inexact HMatrix #60

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion src/compressor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ function _aca_partial(K, irange, jrange, atol, rmax, rtol, istart, buffer_ = not
ishift, jshift = first(irange) - 1, first(jrange) - 1
# maps global indices to local indices
end
####
#Vector of residuals to execute InexactGMRES
rlist = Vector{Float64}()
####
buffer = isnothing(buffer_) ? ACABuffer(T, m, n) : buffer_
A, B, I, J = buffer.A, buffer.B, buffer.I, buffer.J # extract the individual buffers for convenience
@assert A.k == 0 && B.k == 0 "buffers must be empty"
Expand Down Expand Up @@ -158,14 +162,23 @@ function _aca_partial(K, irange, jrange, atol, rmax, rtol, istart, buffer_ = not
# estimate the norm by || K || ≈ || R_k ||
est_norm = _update_frob_norm(est_norm, A, B)
i = _aca_partial_pivot(a, I)

###Pushing residue to list
push!(rlist,(er/est_norm))

@debug r, er, m, n
end
end
# copy from buffer to matrix and create Rk matrix
R_ = RkMatrix(Matrix(A), Matrix(B))
R_ = RkMatrix(Matrix(A), Matrix(B),Vector(rlist))
# # indicate that `A` and `B` can be reused
reset!(A)
reset!(B)
## also reusing rlist, so we empty it
## could have used VectorOfVectors() but didn't think of anything better to
## adapt it a varying vector, since rlist may not be equal to rmax, the only fixed
## parameter we have
empty!(rlist)
return R_
end

Expand Down
131 changes: 131 additions & 0 deletions src/multiplication.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
"""
mutable struct ITerm{T}

Structure to be used in the inexact product.

"""
mutable struct ITerm{R,T}<:AbstractMatrix{T}
hmatrix::HMatrix{R,T}
rtol::Float64
end

Base.size(H::ITerm) = size(H.hmatrix)

Check warning on line 12 in src/multiplication.jl

View check run for this annotation

Codecov / codecov/patch

src/multiplication.jl#L12

Added line #L12 was not covered by tests

function Base.show(io::IO,hmat::ITerm)
isclean(hmat.hmatrix) || return print(io,"Dirty HMatrix in the ITerm")
print(io, "Inexact Product with tol $(hmat.rtol) of HMatrix of $(eltype(hmat.hmatrix)) with range $(rowrange(hmat.hmatrix)) × $(colrange(hmat.hmatrix))")
_show(io, hmat.hmatrix, false)
return io

Check warning on line 18 in src/multiplication.jl

View check run for this annotation

Codecov / codecov/patch

src/multiplication.jl#L14-L18

Added lines #L14 - L18 were not covered by tests
end

Base.show(io::IO, ::MIME"text/plain", hmat::ITerm) = show(io, hmat)

Check warning on line 21 in src/multiplication.jl

View check run for this annotation

Codecov / codecov/patch

src/multiplication.jl#L21

Added line #L21 was not covered by tests

"""
hmul!(C::HMatrix,A::HMatrix,B::HMatrix,a,b,compressor)

Expand Down Expand Up @@ -527,3 +549,112 @@
end
return H
end

##Inexact implementation

###Stablishes low-rank matrix-vector products with variable tolerance eps
function LinearAlgebra.mul!(
y::AbstractVector,
R::RkMatrix,
x::AbstractVector,
a::Number,
b::Number,
eps::Float64
)

@assert typeof(R)<:RkMatrix "R must be a RkMatrix"
k=findfirst(x->x<=eps, R.rel_er)
if isnothing(k) #tolerance too low for the residues in the rkmatrix, we'll use all columns
k = size(R.A,2)
end
tmp = adjoint(view(R.B,:,1:k)) * x
# tmp = mul!(R.buffer, adjoint(R.B), x)
return mul!(y, view(R.A,:,1:k), tmp, a, b)
end

### HMatrix-vector product with variable tolerance

function LinearAlgebra.mul!(
y::AbstractVector,
A::ITerm,
x::AbstractVector,
a::Number,
b::Number;
global_index = use_global_index(),
threads = use_threads(),
)
# since the HMatrix represents A = inv(Pr)*H*Pc, where Pr and Pc are row and column
# permutations, we need first to rewrite C <-- b*C + a*(inv(Pr)*H*Pc)*B as
# C <-- inv(Pr)*(b*Pr*C + a*H*(Pc*B)). Following this rewrite, the
# multiplication is performed by first defining B <-- Pc*B, and C <--
# Pr*C, doing the multiplication with the permuted entries, and then
# permuting the result back C <-- inv(Pr)*C at the end.
if global_index
# permute input
x = x[colperm(A.hmatrix)]
y = permute!(y, rowperm(A.hmatrix))
rmul!(x, a) # multiply in place since this is a new copy, so does not mutate exterior x
elseif a != 1
x = a * x # new copy of x since we should not mutate the external x in mul!
end
iszero(b) ? fill!(y, zero(eltype(y))) : rmul!(y, b)
# offset in case A is not indexed starting at (1,1); e.g. A is not the root
# of and HMatrix
offset = pivot(A.hmatrix) .- 1
if threads
_hgemv_threads!(y, x, leaves(A.hmatrix), offset,A.rtol) # threaded implementation
else
_hgemv_recursive!(y, A.hmatrix, x, offset,A.rtol) # serial implementation
end
#_hgemv_recursive!(y, A, x, offset,tol) # serial implementation
# permute output
global_index && invpermute!(y, rowperm(A.hmatrix))
return y
end


function _hgemv_recursive!(C::AbstractVector, A::HTypes, B::AbstractVector, offset,tol::Float64)
if isleaf(A)
irange = rowrange(A) .- offset[1]
jrange = colrange(A) .- offset[2]
d = data(A)
if isadmissible(A)
mul!(view(C, irange), d, view(B, jrange), 1, 1,tol)
else
mul!(view(C, irange), d, view(B, jrange), 1, 1)
end
else
for block in children(A)
_hgemv_recursive!(C, block, B, offset,tol)
end
end
return C
end

function _hgemv_threads!(C::AbstractVector, B::AbstractVector, leaves, offset, tol::Float64)
acc = Threads.Atomic{Int}(1)
lck = ReentrantLock()
# spawn np workers to assemble the leaves in parallel
np = Threads.nthreads()
nl = length(leaves)
@sync for _ in 1:np
Threads.@spawn begin
buf = zero(C)
while true
i = Threads.atomic_add!(acc, 1)
i > nl && break
leaf = leaves[i]
irange = rowrange(leaf) .- offset[1]
jrange = colrange(leaf) .- offset[2]
if isadmissible(leaf)
mul!(view(buf, irange), data(leaf), view(B, jrange), 1, 1,tol)
else
mul!(view(buf, irange), data(leaf), view(B, jrange), 1, 1)
end
end
# add the local buffer to the global buffer
@lock lck axpy!(true, buf, C)
end
end

Check warning on line 658 in src/multiplication.jl

View check run for this annotation

Codecov / codecov/patch

src/multiplication.jl#L658

Added line #L658 was not covered by tests
return C
end
13 changes: 10 additions & 3 deletions src/rkmatrix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,25 @@ to get the respective adjoints.
"""
mutable struct RkMatrix{T} <: AbstractMatrix{T}
A::Matrix{T}
B::Matrix{T}
function RkMatrix(A::Matrix{T}, B::Matrix{T}) where {T}
B::Matrix{T}
rel_er :: Vector{Float64}
function RkMatrix(A::Matrix{T}, B::Matrix{T}, rlist::Vector{Float64}) where {T}
@assert size(A, 2) == size(B, 2) "second dimension of `A` and `B` must match"
m, r = size(A)
n = size(B, 1)
if r * (m + n) >= m * n
@debug "Inefficient RkMatrix:" size(A) size(B)
end
return new{T}(A, B)
return new{T}(A, B, rlist)
end
end

function RkMatrix(A::Matrix{T}, B::Matrix{T}) where {T}
rk_matrix = RkMatrix(A,B,Vector{Float64}(undef,0))

return rk_matrix
end

function Base.setproperty!(R::RkMatrix, s::Symbol, mat::Base.Matrix)
setfield!(R, s, mat)
return R
Expand Down
3 changes: 2 additions & 1 deletion test/compressor_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ Random.seed!(1)

@testset "Scalar" begin
T = ComplexF64
m, n = 100, 100
c= 1000
m, n = c, c
X = rand(SVector{3,Float64}, m)
Y = map(i -> SVector(10, 0, 0) + rand(SVector{3,Float64}), 1:n)
K = helmholtz_matrix(X, Y, 1.0)
Expand Down
36 changes: 35 additions & 1 deletion test/multiplication_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ using Random
using StaticArrays

using HMatrices: RkMatrix

using HMatrices: ITerm
include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl"))

Random.seed!(1)
Expand Down Expand Up @@ -86,6 +86,38 @@ end
@test exact ≈ approx
end

@testset "exact vs inexact HMatrix-vector products" begin
h_term_test = ITerm(H,1.0e-5)

#tests with and without global indexes

exact = mul!(copy(y), H, x, α, β; threads = false, global_index = false)
#approx= mul!(copy(y), H, x, α, β,1.0e-5; threads = false, global_index = false)
approx= mul!(copy(y), h_term_test, x, α, β; threads = false, global_index = false)
@test isapprox(exact,approx;rtol=1.0e-5)
exact = mul!(copy(y), H, x, α, β;threads=false)
#approx = mul!(copy(y), H, x, α, β,1e-5;threads=false)
approx= mul!(copy(y), h_term_test, x, α, β; threads = false)
@test isapprox(exact,approx;rtol=1.0e-5)

#no mentinon to threads, so it's on by default
exact = mul!(copy(y), H, x, α, β; global_index = false)
#approx= mul!(copy(y), H, x, α, β,1.0e-5; global_index = false)
approx= mul!(copy(y), h_term_test, x, α, β;global_index = false)
@test isapprox(exact,approx;rtol=1.0e-5)

exact = mul!(copy(y), H, x, α, β)
#approx = mul!(copy(y), H, x, α, β,1e-5)
approx= mul!(copy(y), h_term_test, x, α, β)
@test isapprox(exact,approx;rtol=1.0e-5)

#last test checks that if the desired tolerance is to small, we'll use the full low rank matrix instead
exact = mul!(copy(y), H, x, α, β)
h_term_test.rtol=0.0
approx = mul!(copy(y), h_term_test, x, α, β)
@test isapprox(exact,approx;rtol=1.0e-5)
end

@testset "hermitian" begin
threads = false
for threads in (true, false)
Expand Down Expand Up @@ -126,4 +158,6 @@ end
approx = mul!(copy(y), H, x, α, β; threads = false, global_index = true)
@test exact ≈ approx
end


end
Loading