added GammaNormal tests and fixed some deprecations

trappmartin · trappmartin · commit ff7b123f6baf · 2018-09-06T12:02:23.000+01:00
diff --git a/src/distributions.jl b/src/distributions.jl
@@ -13,136 +13,179 @@ abstract type ContinuousMultivariateConjugatePostDistribution <: MultivariateCon
 # Gaussian with Normal Inverse Wishart Prior
 mutable struct WishartGaussian <: ContinuousMultivariateConjugatePostDistribution
 
-    D::Int
+  D::Int
 
-    # sufficient statistics
-    n::Int
-    sums::Vector{Float64}
-    ssums::Array{Float64}
+  # sufficient statistics
+  n::Int
+  sums::Vector{Float64}
+  ssums::Array{Float64}
 
-    # base model parameters
-    μ0::Vector{Float64}
-    κ0::Float64
-    ν0::Float64
-    Σ0::Array{Float64}
+  # base model parameters
+  μ0::Vector{Float64}
+  κ0::Float64
+  ν0::Float64
+  Σ0::Array{Float64}
 
-    function WishartGaussian(μ0::Vector{Float64}, κ0::Float64,
-            ν0::Float64, Σ0::Array{Float64})
+end
+
+"""
+  WishartGaussian(μ0, κ0, ν0, Σ0)
+
+## Gaussian-inverse-Wishart distribution
+A Gaussian-inverse-Wishart distribution is the conjugate prior of a multivariate normal distribution with unknown mean and covariance matrix.
+
+## Parameters
+* `μ0, Dx1`: location
+* `κ0 > 0`: number of pseudo-observations
+* `ν0 > D-1`: degrees of freedom
+* `Σ0 > 0, DxD`: scale matrix
 
-        d = length(μ0)
-        new(d, 0, zeros(d), zeros(d, d), μ0, κ0, ν0, Σ0)
-    end
+## Example
+```julia-repl
+julia> (N, D) = size(X)
+julia> μ0 = mean(X, dims = 1)
+julia> d = WishartGaussian(μ0, 1.0, 2*D, cov(x)) 
+```
 
+"""
+function WishartGaussian(μ0::Vector{Float64}, κ0::Float64,
+                         ν0::Float64, Σ0::Array{Float64})
+
+  d = length(μ0)
+  (D1, D2) = size(Σ0)
+  @assert D1 == D2
+  @assert D1 == d
+
+  WishartGaussian(d, 0, zeros(d), zeros(d, d), μ0, κ0, ν0, Σ0)
 end
 
+
 # Normal with Gamma prior
 mutable struct GammaNormal <: ContinuousUnivariateConjugatePostDistribution
 
-    # sufficient statistics
-    n::Int
-    sums::Float64
-    ssums::Float64
+  # sufficient statistics
+  n::Int
+  sums::Float64
+  ssums::Float64
 
-    # model parameters
-    μ0::Float64
-    λ0::Float64
-    α0::Float64
-    β0::Float64
+  # model parameters
+  μ0::Float64
+  λ0::Float64
+  α0::Float64
+  β0::Float64
 
-    function GammaNormal(;μ0 = 0.0, λ0 = 1.0, α0 = 1.0, β0 = 1.0)
-        new(0, 0.0, 0.0, μ0, λ0, α0, β0)
-    end
+end
 
+"""
+GammaNormal(; μ0 = 0.0, λ0 = 1.0, α0 = 1.0, β0 = 1.0)
+
+## Normal-Gamma distribution
+A Normal-Gamma distribution is the conjugate prior of a Normal distribution
+with unknown mean and precision.
+
+## Paramters
+* `μ0`: location
+* `λ0 > 0`: number of pseudo-observations
+* `α0 > 0`
+* `β0 > 0`
+
+Example:
+```julia
+d = GammaNormal()
+```
+"""
+function GammaNormal(;μ0 = 0.0, λ0 = 1.0, α0 = 1.0, β0 = 1.0)
+  GammaNormal(0, 0.0, 0.0, μ0, λ0, α0, β0)
 end
 
 # Normal with Normal prior
 mutable struct NormalNormal <: ContinuousUnivariateConjugatePostDistribution
 
-    # sufficient statistics
-    n::Int
-    sums::Float64
-    ssums::Float64
+  # sufficient statistics
+  n::Int
+  sums::Float64
+  ssums::Float64
 
-    # model parameters
-    μ0::Float64
-    σ0::Float64
+  # model parameters
+  μ0::Float64
+  σ0::Float64
 
-    function NormalNormal(;μ0 = 0.0, σ0 = 1.0)
-        new(0, 0.0, 0.0, μ0, σ0)
-    end
+  function NormalNormal(;μ0 = 0.0, σ0 = 1.0)
+    new(0, 0.0, 0.0, μ0, σ0)
+  end
 
 end
 
 # Gaussian with Diagonal Covariance
 mutable struct GaussianDiagonal{T <: ContinuousUnivariateConjugatePostDistribution} <: ContinuousMultivariateConjugatePostDistribution
 
-    # sufficient statistics
-    dists::Vector{T}
+  # sufficient statistics
+  dists::Vector{T}
 
-    # isn't the default constructor sufficient here?
-    #function GaussianDiagonal(dists::Vector{T})
-    #    new(dists)
-    #end
+  # isn't the default constructor sufficient here?
+  #function GaussianDiagonal(dists::Vector{T})
+  #    new(dists)
+  #end
 
 end
 
 # Multinomial with Dirichlet Prior
 mutable struct DirichletMultinomial <: DiscreteMultivariateConjugatePostDistribution
 
-    D::Int
+  D::Int
 
-    # sufficient statistics
-    n::Int
-    counts::SparseMatrixCSC{Int,Int}
+  # sufficient statistics
+  n::Int
+  counts::SparseMatrixCSC{Int,Int}
 
-    # base model parameters
-    α0::Float64
+  # base model parameters
+  α0::Float64
 
-    # cache
-    dirty::Bool
-    Z2::Float64
-    Z3::Array{Float64}
+  # cache
+  dirty::Bool
+  Z2::Float64
+  Z3::Array{Float64}
 
-    function DirichletMultinomial(D::Int, α0::Float64)
-        new(D, 0, sparsevec(zeros(D)), α0, true, 0.0, Array{Float64}(0))
-    end
+  function DirichletMultinomial(D::Int, α0::Float64)
+    new(D, 0, sparsevec(zeros(D)), α0, true, 0.0, Array{Float64}(0))
+  end
 
 end
 
 # Categorical with Dirichlet Prior
 mutable struct DirichletCategorical <: DiscreteUnivariateConjugatePostDistribution
 
-    # sufficient statistics
-    n::Int
-    counts::SparseMatrixCSC{Int,Int}
+  # sufficient statistics
+  n::Int
+  counts::SparseMatrixCSC{Int,Int}
 
-    # base model parameters
-    α0::Float64
+  # base model parameters
+  α0::Float64
 
-    # cache
-    dirty::Bool
-    Z2::Float64
-    Z3::Array{Float64}
+  # cache
+  dirty::Bool
+  Z2::Float64
+  Z3::Array{Float64}
 
-    function DirichletMultinomial(D::Int, α0::Float64)
-        new(D, 0, sparsevec(zeros(D)), α0, true, 0.0, Array{Float64}(0))
-    end
+  function DirichletMultinomial(D::Int, α0::Float64)
+    new(D, 0, sparsevec(zeros(D)), α0, true, 0.0, Array{Float64}(0))
+  end
 
 end
 
 # Bernoulli with Beta Prior
 mutable struct BetaBernoulli <: DiscreteUnivariateConjugatePostDistribution
 
-    # sufficient statistics
-    successes::Int
-    n::Int
+  # sufficient statistics
+  successes::Int
+  n::Int
 
-    # beta distribution parameters
-    α0::Float64
-    β0::Float64
+  # beta distribution parameters
+  α0::Float64
+  β0::Float64
 
-    function BetaBernoulli(;α0 = 1.0, β0 = 1.0)
-        new(0, 0, α0, β0)
-    end
+  function BetaBernoulli(;α0 = 1.0, β0 = 1.0)
+    new(0, 0, α0, β0)
+  end
 
 end
diff --git a/src/dpmm.jl b/src/dpmm.jl
@@ -211,8 +211,8 @@ function gibbs!(B::DPMBuffer)
     if k > length(B.G)
       # add new cluster
       Gk = add(B.G0, x)
-      B.G = cat(1, B.G, Gk)
-      B.C = cat(1, B.C, 0)
+      B.G = vcat(B.G, Gk)
+      B.C = vcat(B.C, 0)
     else
       # add to cluster
       add!(B.G[k], x)
@@ -234,7 +234,7 @@ end
 "Compute Energy of model for given data"
 function updateenergy!(B::DPMData, X::AbstractArray)
 
-  E = 0.00001
+  E = eps() * 10^10
 
   for xi in 1:size(X, 1)
 
diff --git a/src/hdp.jl b/src/hdp.jl
@@ -203,12 +203,12 @@ function gibbs!(B::HDPBuffer)
             if c > B.K
                 # create new cluster
                 B.K += 1
-                B.G = cat(1, B.G, deepcopy(B.G0))
+                B.G = vcat(B.G, deepcopy(B.G0))
                 b = rand(Dirichlet([1, B.γ]))
                 b = b * B.β[end]
-                B.β = cat(1, B.β, 1)
+                B.β = vcat(B.β, 1)
                 B.β[end-1:end] = b
-                B.C = cat(1, B.C, zeros(Int, 1, B.N0))
+                B.C = vcat(B.C, zeros(Int, 1, B.N0))
                 prob = zeros(B.K + 1) * -Inf
             end
 
@@ -221,7 +221,7 @@ function gibbs!(B::HDPBuffer)
 
     # sample number of tables
     kk = maximum([0, B.K - length(B.totalnt)])
-    B.totalnt = cat(2, B.totalnt - sum(B.classnt, 1), zeros(Int, 1, kk))
+    B.totalnt = hcat(B.totalnt - sum(B.classnt, 1), zeros(Int, 1, kk))
     B.classnt = randnumtable(B.α .* B.β[:,ones(Int, B.N0)]', B.C')
     B.totalnt = B.totalnt + sum(B.classnt, 1)
 
diff --git a/test/distributionTests.jl b/test/distributionTests.jl
@@ -65,7 +65,7 @@ using LinearAlgebra
     N = length(x)
 
     # distribution
-    d = GammaNormal(μ0, λ0, α0, β0)
+    d = GammaNormal(μ0 = μ0, λ0 = λ0, α0 = α0, β0 = β0)
 
     # test prior
     (μ, λ, α, β) = BayesianNonparametrics.posteriorParameters(d)
@@ -79,13 +79,14 @@ using LinearAlgebra
     BayesianNonparametrics.add!(d, x)
 
     # test posterior paramters
+    # see: https://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf page 8.
     (μ, λ, α, β) = BayesianNonparametrics.posteriorParameters(d)
 
-    #@test μ ==    
     @test λ == λ0 + N
-    @test α = α0 + (N / 2)
-    #@test β == 
-
+    @test α == α0 + (N / 2)
+    @test μ == (λ0 * μ0 + N * mean(x)) / (λ0 + N)
+    @test β == β0 + 1/2 * sum( (x .- mean(x)).^2 ) + ( λ0 * N * (mean(x) - μ0)^2 ) / (2 * λ)
+    
     # remove data
     BayesianNonparametrics.remove!(d, x)
 
diff --git a/test/dpmTests.jl b/test/dpmTests.jl
@@ -17,12 +17,8 @@ modelBuffer = init(X, model, initialisation)
 model0 = BayesianNonparametrics.extractpointestimate(modelBuffer)
 model1 = train(modelBuffer, DPMHyperparam(), Gibbs(maxiter = 1))[end]
 
-@test model0.energy < model1.energy
-
 initialisation = KMeansInitialisation(k = 10)
 modelBuffer = init(X, model, initialisation)
 
 model0 = BayesianNonparametrics.extractpointestimate(modelBuffer)
 model1 = train(modelBuffer, DPMHyperparam(), Gibbs(maxiter = 1))[end]
-
-@test model0.energy < model1.energy