IntegralEquations · maltezfaria · Jun 28, 2024 · Jun 28, 2024 · Jun 28, 2024 · Jun 28, 2024
diff --git a/src/compressor.jl b/src/compressor.jl
@@ -100,6 +100,10 @@ function _aca_partial(K, irange, jrange, atol, rmax, rtol, istart, buffer_ = not
         ishift, jshift = first(irange) - 1, first(jrange) - 1
         # maps global indices to local indices
     end
+    ####
+    #Vector of residuals to execute InexactGMRES
+    rlist = Vector{Float64}()
+    ####
     buffer = isnothing(buffer_) ? ACABuffer(T, m, n) : buffer_
     A, B, I, J = buffer.A, buffer.B, buffer.I, buffer.J # extract the individual buffers for convenience
     @assert A.k == 0 && B.k == 0 "buffers must be empty"
@@ -158,14 +162,23 @@ function _aca_partial(K, irange, jrange, atol, rmax, rtol, istart, buffer_ = not
             # estimate the norm by || K || ≈ || R_k ||
             est_norm = _update_frob_norm(est_norm, A, B)
             i = _aca_partial_pivot(a, I)
+
+            ###Pushing residue to list
+            push!(rlist,(er/est_norm))
+
             @debug r, er, m, n
         end
     end
     # copy from buffer to matrix and create Rk matrix
-    R_ = RkMatrix(Matrix(A), Matrix(B))
+    R_ = RkMatrix(Matrix(A), Matrix(B),Vector(rlist))
     # # indicate that `A` and `B` can be reused
     reset!(A)
     reset!(B)
+    ## also reusing rlist, so we empty it
+    ## could have used VectorOfVectors() but didn't think of anything better to
+    ## adapt it a varying vector, since rlist may not be equal to rmax, the only fixed
+    ## parameter we have
+    empty!(rlist)
     return R_
 end
 

diff --git a/src/multiplication.jl b/src/multiplication.jl
@@ -1,3 +1,25 @@
+"""
+    mutable struct ITerm{T}
+
+Structure to be used in the inexact product. 
+
+"""
+mutable struct ITerm{R,T}<:AbstractMatrix{T}
+    hmatrix::HMatrix{R,T}
+    rtol::Float64
+end
+
+Base.size(H::ITerm) = size(H.hmatrix)
+
+function Base.show(io::IO,hmat::ITerm)
+    isclean(hmat.hmatrix) || return print(io,"Dirty HMatrix in the ITerm")
+    print(io, "Inexact Product with tol $(hmat.rtol) of HMatrix of $(eltype(hmat.hmatrix)) with range $(rowrange(hmat.hmatrix)) × $(colrange(hmat.hmatrix))")
+    _show(io, hmat.hmatrix, false)
+    return io
+end
+
+Base.show(io::IO, ::MIME"text/plain", hmat::ITerm) = show(io, hmat)
+
 """
     hmul!(C::HMatrix,A::HMatrix,B::HMatrix,a,b,compressor)
 
@@ -527,3 +549,112 @@
     end
     return H
 end
+
+##Inexact implementation
+
+###Stablishes low-rank matrix-vector products with variable tolerance eps
+function LinearAlgebra.mul!(
+    y::AbstractVector,
+    R::RkMatrix,
+    x::AbstractVector,
+    a::Number,
+    b::Number,
+    eps::Float64
+)
+
+    @assert typeof(R)<:RkMatrix "R must be a RkMatrix"
+    k=findfirst(x->x<=eps, R.rel_er)
+    if isnothing(k) #tolerance too low for the residues in the rkmatrix, we'll use all columns
+        k = size(R.A,2)
+    end
+    tmp = adjoint(view(R.B,:,1:k)) * x
+    # tmp = mul!(R.buffer, adjoint(R.B), x)
+    return mul!(y, view(R.A,:,1:k), tmp, a, b)
+end
+
+### HMatrix-vector product with variable tolerance
+
+function LinearAlgebra.mul!(
+    y::AbstractVector,
+    A::ITerm,
+    x::AbstractVector,
+    a::Number,
+    b::Number;
+    global_index = use_global_index(),
+    threads = use_threads(),
+)
+    # since the HMatrix represents A = inv(Pr)*H*Pc, where Pr and Pc are row and column
+    # permutations, we need first to rewrite C <-- b*C + a*(inv(Pr)*H*Pc)*B as
+    # C <-- inv(Pr)*(b*Pr*C + a*H*(Pc*B)). Following this rewrite, the
+    # multiplication is performed by first defining B <-- Pc*B, and C <--
+    # Pr*C, doing the multiplication with the permuted entries, and then
+    # permuting the result  back C <-- inv(Pr)*C at the end.
+    if global_index
+        # permute input
+        x = x[colperm(A.hmatrix)]
+        y = permute!(y, rowperm(A.hmatrix))
+        rmul!(x, a) # multiply in place since this is a new copy, so does not mutate exterior x
+    elseif a != 1
+        x = a * x # new copy of x since we should not mutate the external x in mul!
+    end
+    iszero(b) ? fill!(y, zero(eltype(y))) : rmul!(y, b)
+    # offset in case A is not indexed starting at (1,1); e.g. A is not the root
+    # of and HMatrix
+    offset = pivot(A.hmatrix) .- 1
+    if threads
+        _hgemv_threads!(y, x, leaves(A.hmatrix), offset,A.rtol)  # threaded implementation
+    else
+        _hgemv_recursive!(y, A.hmatrix, x, offset,A.rtol) # serial implementation
+    end
+    #_hgemv_recursive!(y, A, x, offset,tol) # serial implementation
+    # permute output
+    global_index && invpermute!(y, rowperm(A.hmatrix))
+    return y
+end
+
+
+function _hgemv_recursive!(C::AbstractVector, A::HTypes, B::AbstractVector, offset,tol::Float64)
+    if isleaf(A)
+        irange = rowrange(A) .- offset[1]
+        jrange = colrange(A) .- offset[2]
+        d = data(A)
+        if isadmissible(A)
+            mul!(view(C, irange), d, view(B, jrange), 1, 1,tol)
+        else
+            mul!(view(C, irange), d, view(B, jrange), 1, 1)
+        end
+    else
+        for block in children(A)
+            _hgemv_recursive!(C, block, B, offset,tol)
+        end
+    end
+    return C
+end
+
+function _hgemv_threads!(C::AbstractVector, B::AbstractVector, leaves, offset, tol::Float64)
+    acc = Threads.Atomic{Int}(1)
+    lck = ReentrantLock()
+    # spawn np workers to assemble the leaves in parallel
+    np = Threads.nthreads()
+    nl = length(leaves)
+    @sync for _ in 1:np
+        Threads.@spawn begin
+            buf = zero(C)
+            while true
+                i = Threads.atomic_add!(acc, 1)
+                i > nl && break
+                leaf = leaves[i]
+                irange = rowrange(leaf) .- offset[1]
+                jrange = colrange(leaf) .- offset[2]
+                if isadmissible(leaf)
+                    mul!(view(buf, irange), data(leaf), view(B, jrange), 1, 1,tol)
+                else
+                    mul!(view(buf, irange), data(leaf), view(B, jrange), 1, 1)
+                end
+            end
+            # add the local buffer to the global buffer
+            @lock lck axpy!(true, buf, C)
+        end
+    end
+    return C
+end
diff --git a/src/rkmatrix.jl b/src/rkmatrix.jl
@@ -9,18 +9,25 @@ to get the respective adjoints.
 """
 mutable struct RkMatrix{T} <: AbstractMatrix{T}
     A::Matrix{T}
-    B::Matrix{T}
-    function RkMatrix(A::Matrix{T}, B::Matrix{T}) where {T}
+    B::Matrix{T}    
+    rel_er :: Vector{Float64}
+    function RkMatrix(A::Matrix{T}, B::Matrix{T}, rlist::Vector{Float64}) where {T}
         @assert size(A, 2) == size(B, 2) "second dimension of `A` and `B` must match"
         m, r = size(A)
         n = size(B, 1)
         if r * (m + n) >= m * n
             @debug "Inefficient RkMatrix:" size(A) size(B)
         end
-        return new{T}(A, B)
+        return new{T}(A, B, rlist)
     end
 end
 
+function RkMatrix(A::Matrix{T}, B::Matrix{T}) where {T}
+     rk_matrix = RkMatrix(A,B,Vector{Float64}(undef,0))
+
+     return rk_matrix
+end
+
 function Base.setproperty!(R::RkMatrix, s::Symbol, mat::Base.Matrix)
     setfield!(R, s, mat)
     return R

diff --git a/test/compressor_test.jl b/test/compressor_test.jl
@@ -11,7 +11,8 @@ Random.seed!(1)
 
 @testset "Scalar" begin
     T = ComplexF64
-    m, n = 100, 100
+    c= 1000
+    m, n = c, c
     X = rand(SVector{3,Float64}, m)
     Y = map(i -> SVector(10, 0, 0) + rand(SVector{3,Float64}), 1:n)
     K = helmholtz_matrix(X, Y, 1.0)

diff --git a/test/multiplication_test.jl b/test/multiplication_test.jl
@@ -5,7 +5,7 @@ using Random
 using StaticArrays
 
 using HMatrices: RkMatrix
-
+using HMatrices: ITerm
 include(joinpath(HMatrices.PROJECT_ROOT, "test", "testutils.jl"))
 
 Random.seed!(1)
@@ -86,6 +86,38 @@ end
         @test exact ≈ approx
     end
 
+    @testset "exact vs inexact HMatrix-vector products" begin
+        h_term_test = ITerm(H,1.0e-5)
+
+        #tests with and without global indexes
+
+        exact = mul!(copy(y), H, x, α, β; threads = false, global_index = false)
+        #approx= mul!(copy(y), H, x, α, β,1.0e-5; threads = false, global_index = false)
+        approx= mul!(copy(y), h_term_test, x, α, β; threads = false, global_index = false)
+        @test isapprox(exact,approx;rtol=1.0e-5)
+        exact = mul!(copy(y), H, x, α, β;threads=false)  
+        #approx = mul!(copy(y), H, x, α, β,1e-5;threads=false)
+        approx= mul!(copy(y), h_term_test, x, α, β; threads = false)
+        @test isapprox(exact,approx;rtol=1.0e-5)
+
+        #no mentinon to threads, so it's on by default
+        exact = mul!(copy(y), H, x, α, β;  global_index = false)
+        #approx= mul!(copy(y), H, x, α, β,1.0e-5; global_index = false)
+        approx= mul!(copy(y), h_term_test, x, α, β;global_index = false)
+        @test isapprox(exact,approx;rtol=1.0e-5)
+
+        exact = mul!(copy(y), H, x, α, β)   
+        #approx = mul!(copy(y), H, x, α, β,1e-5)
+        approx= mul!(copy(y), h_term_test, x, α, β)
+        @test isapprox(exact,approx;rtol=1.0e-5)
+
+        #last test checks that if the desired tolerance is to small, we'll use the full low rank matrix instead
+        exact = mul!(copy(y), H, x, α, β)
+        h_term_test.rtol=0.0
+        approx = mul!(copy(y), h_term_test, x, α, β)
+        @test isapprox(exact,approx;rtol=1.0e-5)
+    end
+
     @testset "hermitian" begin
         threads = false
         for threads in (true, false)
@@ -126,4 +158,6 @@ end
         approx = mul!(copy(y), H, x, α, β; threads = false, global_index = true)
         @test exact ≈ approx
     end
+
+
 end