Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HyperLaplace distribution implemented #194

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
4 changes: 3 additions & 1 deletion copulas/univariate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from copulas.univariate.gamma import GammaUnivariate
from copulas.univariate.gaussian import GaussianUnivariate
from copulas.univariate.gaussian_kde import GaussianKDE
from copulas.univariate.hyper_laplace import HyperLaplace
from copulas.univariate.log_laplace import LogLaplace
from copulas.univariate.student_t import StudentTUnivariate
from copulas.univariate.truncated_gaussian import TruncatedGaussian
Expand All @@ -19,5 +20,6 @@
'ParametricType',
'BoundedType',
'UniformUnivariate',
'LogLaplace'
'LogLaplace',
'HyperLaplace'
)
300 changes: 300 additions & 0 deletions copulas/univariate/hyper_laplace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,300 @@
from random import random

import numpy as np
import scipy.stats
from scipy.special import gamma, psi

from copulas.univariate.base import BoundedType, ParametricType, Univariate


def gamma_d(x):
"""
Compute the derivative of gamma function

Arguments:
x (float)

Returns:
float:
The derivative of gamma function at x
"""
return gamma(x) * psi(x)


def g3g1_over_g22(x):
"""
Compute the value of Gamma(3x)Gamma(x)/Gamma^2(2x)

Arguments:
x (float)

Returns:
float:
The value of Gamma(3x)Gamma(x)/Gamma^2(2x)
"""
return gamma(3 * x) * gamma(x) / gamma(2 * x) ** 2


def g3g1_over_g22_d(x):
"""
Compute the derivative of Gamma(3x)Gamma(x)/Gamma^2(2x)

Arguments:
x (float)

Returns:
float:
The derivative of Gamma(3x)Gamma(x)/Gamma^2(2x) at x
"""
x1 = gamma(x)
x2 = gamma(2 * x)
x3 = gamma(3 * x)

x3_d = 3 * gamma_d(3 * x) * x1 * (x2 ** 2)
x1_d = x3 * gamma_d(x) * (x2 ** 2)
x2_d = 4 * x3 * x1 * gamma_d(2 * x) * x2
return (x3_d + x1_d - x2_d) / (x2 ** 4)


def solve_gamma_equation(y, num_iter=10, upper_threshold=2, lower_threshold=0.5):
"""
Solve for the equation Gamma(3x)Gamma(x)/Gamma^2(2x) = y using Newton's method

Arguments:
y (float)

num_iter (int):
number of iterations when using Newton's method

upper_threshold (float):
Maximum value of x_new/x_old allowed in Newton's method. Should be > 1

lower_threshold (float):
Minimum value of x_new/x_old allowed in Newton's method. Should be in (0,1)

Returns:
float:
The solution to Gamma(3x)Gamma(x)/Gamma^2(2x) = y
"""
x = 1
for i in range(num_iter):
x_new = x + (y - g3g1_over_g22(x)) / g3g1_over_g22_d(x)
if x_new > upper_threshold * x:
x = upper_threshold * x
elif x_new < lower_threshold * x:
x = lower_threshold * x
else:
x = x_new
return x


class HyperLaplace(Univariate):
"""
An HyperLaplace model object, implemented via a wrapper around scipy.stats.gamma

Documentation: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html

Math derivation: HyperLaplace(k, alpha) =
(gamma(loc = 0, scale = 1/k, a = 1/alpha))**(1/alpha) * Unif({-1,1})
"""

PARAMETRIC = ParametricType.PARAMETRIC
BOUNDED = BoundedType.UNBOUNDED

MODEL_CLASS = scipy.stats.gamma

_params = None
_model = None

alpha = None
k = None

def probability_density(self, X):
"""Compute the probability density for each point in X.

Arguments:
X (numpy.ndarray):
Values for which the probability density will be computed.
It must have shape (n, 1).

Returns:
numpy.ndarray:
Probability density values for points in X.

Raises:
NotFittedError:
if the model is not fitted.
"""
self.check_fit()
return self._model.pdf(abs(X) ** self.alpha) / 2 * self.alpha * abs(X) ** (self.alpha - 1)

def log_probability_density(self, X):
"""Compute the log of the probability density for each point in X.

Arguments:
X (numpy.ndarray):
Values for which the log probability density will be computed.
It must have shape (n, 1).

Returns:
numpy.ndarray:
Log probability density values for points in X.

Raises:
NotFittedError:
if the model is not fitted.
"""
self.check_fit()

return np.log(self.probability_density(X))

def cumulative_distribution(self, X):
"""Compute the cumulative distribution value for each point in X.

Arguments:
X (numpy.ndarray):
Values for which the cumulative distribution will be computed.
It must have shape (n, 1).

Returns:
numpy.ndarray:
Cumulative distribution values for points in X.

Raises:
NotFittedError:
if the model is not fitted.
"""
self.check_fit()
gamma_cdf = self._model.cdf(abs(X) ** self.alpha)
for i in range(len(X)):
cd, x = gamma_cdf[i], X[i]
if x > 0:
gamma_cdf[i] = (cd + 1) / 2
else:
gamma_cdf[i] = (1 - cd) / 2

return gamma_cdf

def percent_point(self, U):
"""Compute the inverse cumulative distribution value for each point in U.

Arguments:
U (numpy.ndarray):
Values for which the cumulative distribution will be computed.
It must have shape (n, 1) and values must be in [0,1].

Returns:
numpy.ndarray:
Inverse cumulative distribution values for points in U.

Raises:
NotFittedError:
if the model is not fitted.
"""
self.check_fit()
gamma_ppf = self._model.ppf(abs(U - 0.5) * 2) ** (1 / self.alpha)
for i in range(len(U)):
pp, u = gamma_ppf[i], U[i]
if u < 0.5:
gamma_ppf[i] = - pp

return gamma_ppf

def sample(self, n_samples=1):
"""Sample values from this model.

Argument:
n_samples (int):
Number of values to sample

Returns:
numpy.ndarray:
Array of shape (n_samples, 1) with values randomly
sampled from this model distribution.

Raises:
NotFittedError:
if the model is not fitted.
"""
self.check_fit()
gamma_rvs = self._model.rvs(n_samples) ** (1 / self.alpha)
for i in range(n_samples):
if random() > 0.5:
gamma_rvs[i] = - gamma_rvs[i]
return gamma_rvs

def _fit(self, X):
"""Fit the model to a non-constant random variable.

This fitting method is implemented by matching the theoritical mean
and variance with the emperical mean and variance.

Arguments:
X (numpy.ndarray):
Values of the random variable. It must have shape (n, 1).
"""
mean = np.mean(abs(X))
square = np.mean(X ** 2)
a = solve_gamma_equation(square / (mean ** 2))
self.alpha = 1 / a
self.k = (gamma(2 * a) / (gamma(a) * mean)) ** self.alpha

self._params = {
'loc': 0,
'scale': 1 / self.k,
'a': a
}

def _get_model(self):
return self.MODEL_CLASS(**self._params)

def _fit_constant(self, X):
self._params = {
'loc': np.unique(X)[0],
'scale': 0,
'a': 1
}

def _is_constant(self):
return self._params['scale'] == 0

def fit(self, X):
"""Fit the model to a random variable.

Arguments:
X (numpy.ndarray):
Values of the random variable. It must have shape (n, 1).
"""
if self._check_constant_value(X):
self._fit_constant(X)
else:
self._fit(X)
self._model = self._get_model()

self.fitted = True

def _get_params(self):
"""Return attributes from self._model to serialize.

Must be implemented in all the subclasses.

Returns:
dict:
Parameters to recreate self._model in its current fit status.
"""
return self._params.copy()

def _set_params(self, params):
"""Set the parameters of this univariate.

Args:
params (dict):
Parameters to recreate this instance.
"""
self._params = params.copy()
self.k = 1 / self._params['scale']
self.alpha = 1 / self._params['a']
if self._is_constant():
self._replace_constant_methods()
else:
self._model = self._get_model()
10 changes: 7 additions & 3 deletions tests/unit/univariate/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from copulas.univariate.gamma import GammaUnivariate
from copulas.univariate.gaussian import GaussianUnivariate
from copulas.univariate.gaussian_kde import GaussianKDE
from copulas.univariate.hyper_laplace import HyperLaplace
from copulas.univariate.log_laplace import LogLaplace
from copulas.univariate.student_t import StudentTUnivariate
from copulas.univariate.truncated_gaussian import TruncatedGaussian
Expand All @@ -29,7 +30,8 @@ def test__select_candidates(self):
GammaUnivariate,
StudentTUnivariate,
UniformUnivariate,
LogLaplace
LogLaplace,
HyperLaplace
}

def test__select_candidates_parametric(self):
Expand All @@ -44,7 +46,8 @@ def test__select_candidates_parametric(self):
GammaUnivariate,
StudentTUnivariate,
UniformUnivariate,
LogLaplace
LogLaplace,
HyperLaplace
}

def test__select_candidates_non_parametric(self):
Expand Down Expand Up @@ -73,7 +76,8 @@ def test__select_candidates_unbounded(self):
assert set(candidates) == {
GaussianKDE,
GaussianUnivariate,
StudentTUnivariate
StudentTUnivariate,
HyperLaplace
}

def test__select_candidates_semibounded(self):
Expand Down
44 changes: 44 additions & 0 deletions tests/unit/univariate/test_hyper_laplace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from unittest import TestCase

import numpy as np
from scipy.stats import norm

from copulas.univariate.hyper_laplace import HyperLaplace


class TestHyperLaplace(TestCase):

def test__fit_constant(self):
distribution = HyperLaplace()

distribution._fit_constant(np.array([1, 1, 1, 1]))

assert distribution._params == {
'loc': 1,
'scale': 0,
'a': 1
}

def test__fit(self):
distribution = HyperLaplace()

data = norm.rvs(size=100000, loc=0, scale=1)
distribution._fit(data)

assert distribution._params['loc'] == 0
assert distribution._params['scale'] >= 1.90 and distribution._params['scale'] <= 2.10
assert distribution._params['a'] >= 0.48 and distribution._params['a'] <= 0.52

def test__is_constant_true(self):
distribution = HyperLaplace()

distribution.fit(np.array([1, 1, 1, 1]))

assert distribution._is_constant()

def test__is_constant_false(self):
distribution = HyperLaplace()

distribution.fit(np.array([1, 2, 3, 4]))

assert not distribution._is_constant()