Skip to content

Commit

Permalink
Version bump to 1.2.
Browse files Browse the repository at this point in the history
JIT-compiled some components of linear, periodic and sigmoidal kernels with numba where faster than numpy (between 1.45x and 4.6x); left some benchmarking code.
Removed the now-obsolete overflow-preventing versions of methods in periodicKernel.py.
Improved numerical stability due to the above and the updated package requirements.
Changed paper link to accepted-papers page
  • Loading branch information
T-Flet committed Jul 30, 2023
1 parent 075d9c5 commit e06a4bc
Show file tree
Hide file tree
Showing 15 changed files with 469 additions and 233 deletions.
50 changes: 36 additions & 14 deletions GPy_ABCD/Kernels/linearOffsetKernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
from paramz.transformations import Logexp
from paramz.caching import Cache_this

from GPy_ABCD.Util.numba_types import *


class LinearWithOffset(Kern):
"""
'''
Linear kernel with horizontal offset
.. math::
Expand All @@ -25,7 +27,7 @@ class LinearWithOffset(Kern):
:param name: Name of the kernel for output
:type String
:rtype: Kernel object
"""
'''

def __init__(self, input_dim: int, variance: float = 1., offset: float = 0., active_dims: int = None, name: str = 'linear_with_offset') -> None:
super(LinearWithOffset, self).__init__(input_dim, active_dims, name)
Expand All @@ -40,35 +42,55 @@ def __init__(self, input_dim: int, variance: float = 1., offset: float = 0., act

self.link_parameters(self.variance, self.offset)


def to_dict(self):
input_dict = super(LinearWithOffset, self)._save_to_input_dict()
input_dict["class"] = "LinearWithOffset"
input_dict["variance"] = self.variance.values.tolist()
input_dict["offset"] = self.offset
input_dict['class'] = 'LinearWithOffset'
input_dict['variance'] = self.variance.values.tolist()
input_dict['offset'] = self.offset
return input_dict


@staticmethod
def _build_from_input_dict(kernel_class, input_dict):
useGPU = input_dict.pop('useGPU', None)
return LinearWithOffset(**input_dict)


@Cache_this(limit=3)
def K(self, X, X2=None):
@Cache_this(limit = 3)
def K(self, X, X2 = None):
if X2 is None: X2 = X
return self.variance * (X - self.offset) * (X2 - self.offset).T

# return _K(self.variance, self.offset, X, X if X2 is None else X2)

def Kdiag(self, X):
return np.sum(self.variance * np.square(X - self.offset), -1)

# return np.sum(self.variance * np.square(X - self.offset), -1)
return _Kdiag(self.variance, self.offset, X)

def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: X2 = X
dK_dV = (X - self.offset) * (X2 - self.offset).T
dK_do = self.variance * (2 * self.offset - X - X2)

self.variance.gradient = np.sum(dL_dK * dK_dV)
self.offset.gradient = np.sum(dL_dK * dK_do)
self.offset.gradient = np.sum(dL_dK * dK_do)
# self.variance.gradient, self.offset.gradient = _update_gradients_full(self.offset, self.variance, dL_dK, X, X if X2 is None else X2)



## numba versions of some methods

# from GPy_ABCD.Util.benchmarking import numba_comparisons

# # @njit(f8A2(f8A, f8A, f8A2, f8A2)) # The numpy version is faster
# def _K(variance, offset, X, X2):
# return variance * (X - offset) * (X2 - offset).T

@njit(f8A(f8A, f8A, f8A2)) # 2x faster than numpy version
def _Kdiag(variance, offset, X):
return np.sum(variance * np.square(X - offset), -1)

# # @njit(nTup(f8, f8)(f8A, f8A, f8A2, f8A2, f8A2)) # The numpy version is faster
# def _update_gradients_full(offset, variance, dL_dK, X, X2):
# dK_dV = (X - offset) * (X2 - offset).T
# dK_do = variance * (2 * offset - X - X2)
# return np.sum(dL_dK * dK_dV), np.sum(dL_dK * dK_do) # variance.gradient and offset.gradient


201 changes: 122 additions & 79 deletions GPy_ABCD/Kernels/periodicKernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,30 @@
from paramz.transformations import Logexp
from paramz.caching import Cache_this

from GPy_ABCD.Util.numba_types import *
from GPy_ABCD.Util.benchmarking import compare_implementations, numba_comparisons


@njit(f8A(f8A))
def _embi0(x): # == exp(-x) * besseli(0, x) => 9.8.2 Abramowitz & Stegun (http://people.math.sfu.ca/~cbm/aands/page_378.htm)
y = 3.75 / x # The below is an efficient polynomial computation
f = 0.39894228 + (0.01328592 + (0.00225319 + (-0.00157565 + (0.00916281 + (-0.02057706 + (0.02635537 + (-0.01647633 + (0.00392377)*y)*y)*y)*y)*y)*y)*y)*y
return f / np.sqrt(x)
@njit(f8A(f8A))
def _embi1(x): # == exp(-x) * besseli(1, x) => 9.8.4 Abramowitz & Stegun (http://people.math.sfu.ca/~cbm/aands/page_378.htm)
y = 3.75 / x # The below is an efficient polynomial computation
f = 0.39894228 + (-0.03988024 + (-0.00362018 + (0.00163801 + (-0.01031555 + (0.02282967 + (-0.02895312 + (0.01787654 + (-0.00420059)*y)*y)*y)*y)*y)*y)*y)*y
return f / np.sqrt(x)
@njit(f8A(f8A))
def _embi0min1(x): # == embi0(x) - embi1(x)
y = 3.75 / x # The below is an efficient polynomial computation (with the 0-th power term equal to 0)
f = (0.05316616 + (0.00587337 + (-0.00321366 + (0.01947836 + (-0.04340673 + (0.05530849 + (-0.03435287 + (0.00812436)*y)*y)*y)*y)*y)*y)*y)*y
return f / np.sqrt(x)


# Based on GPy's StdPeriodic kernel and gaussianprocess.org/gpml/code/matlab/cov/covPeriodicNoDC.m
class PureStdPeriodicKernel(Kern):
"""
'''
The standard periodic kernel due to MacKay (1998) can be decomposed into a sum of a periodic and a constant component;
this kernel is the purely periodic component, as mentioned in:
Lloyd, James Robert; Duvenaud, David Kristjanson; Grosse, Roger Baker; Tenenbaum, Joshua B.; Ghahramani, Zoubin (2014):
Expand Down Expand Up @@ -37,7 +57,7 @@ class PureStdPeriodicKernel(Kern):
:param name: Name of the kernel for output
:type String
:rtype: Kernel object
"""
'''

def __init__(self, input_dim: int, variance: float = 1., period: float = 2.*np.pi, lengthscale: float = 2.*np.pi,
active_dims: int = None, name: str = 'pure_std_periodic') -> None:
Expand All @@ -47,48 +67,32 @@ def __init__(self, input_dim: int, variance: float = 1., period: float = 2.*np.p

if period is not None:
period = np.asarray(period)
assert period.size == input_dim, "bad number of periods"
assert period.size == input_dim, 'bad number of periods'
else:
period = 2.*np.pi * np.ones(input_dim)
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == input_dim, "bad number of lengthscales"
assert lengthscale.size == input_dim, 'bad number of lengthscales'
else:
lengthscale = 2.*np.pi * np.ones(input_dim)

self.variance = Param('variance', variance, Logexp())
assert self.variance.size == 1, "Variance size must be one"
assert self.variance.size == 1, 'Variance size must be one'
self.period = Param('period', period, Logexp())
self.lengthscale = Param('lengthscale', lengthscale, Logexp())

self.link_parameters(self.variance, self.period, self.lengthscale)

def to_dict(self):
input_dict = super(PureStdPeriodicKernel, self)._save_to_input_dict()
input_dict["class"] = "BesselShiftedPeriodic"
input_dict["variance"] = self.variance.values.tolist()
input_dict["period"] = self.period.values.tolist()
input_dict["lengthscale"] = self.lengthscale.values.tolist()
input_dict['class'] = 'BesselShiftedPeriodic'
input_dict['variance'] = self.variance.values.tolist()
input_dict['period'] = self.period.values.tolist()
input_dict['lengthscale'] = self.lengthscale.values.tolist()
return input_dict

@staticmethod
def embi0(x): # == exp(-x) * besseli(0, x) => 9.8.2 Abramowitz & Stegun (http://people.math.sfu.ca/~cbm/aands/page_378.htm)
y = 3.75 / x # The below is an efficient polynomial computation
f = 0.39894228 + (0.01328592 + (0.00225319 + (-0.00157565 + (0.00916281 + (-0.02057706 + (0.02635537 + (-0.01647633 + (0.00392377)*y)*y)*y)*y)*y)*y)*y)*y
return f / np.sqrt(x)
@staticmethod
def embi1(x): # == exp(-x) * besseli(1, x) => 9.8.4 Abramowitz & Stegun (http://people.math.sfu.ca/~cbm/aands/page_378.htm)
y = 3.75 / x # The below is an efficient polynomial computation
f = 0.39894228 + (-0.03988024 + (-0.00362018 + (0.00163801 + (-0.01031555 + (0.02282967 + (-0.02895312 + (0.01787654 + (-0.00420059)*y)*y)*y)*y)*y)*y)*y)*y
return f / np.sqrt(x)
@staticmethod
def embi0min1(x): # == embi0(x) - embi1(x)
y = 3.75 / x # The below is an efficient polynomial computation (with the 0-th power term equal to 0)
f = (0.05316616 + (0.00587337 + (-0.00321366 + (0.01947836 + (-0.04340673 + (0.05530849 + (-0.03435287 + (0.00812436)*y)*y)*y)*y)*y)*y)*y)*y
return f / np.sqrt(x)

@Cache_this(limit = 3)
def K(self, X, X2=None):
def K(self, X, X2 = None):
if X2 is None: X2 = X
cos_term = np.cos((2 * np.pi / self.period) * (X - X2.T))
invL2 = 1 / self.lengthscale ** 2
Expand All @@ -101,21 +105,11 @@ def K(self, X, X2=None):
return self.variance * ((exp_term - bessel0) / (np.exp(invL2) - bessel0)) # The brackets prevent an overflow; want division first
else:
exp_term = np.exp((cos_term - 1) * invL2)
embi0 = self.embi0(invL2)
return self.variance * ((exp_term - embi0) / (1 - embi0)) # The brackets prevent an overflow; want division first
# @Cache_this(limit = 3)
# def K(self, X, X2 = None):
# if X2 is None: X2 = X
# cos_term = np.cos(2 * np.pi * (X - X2.T) / self.period)
#
# if np.any(self.lengthscale > 1e4): # Limit for l -> infinity
# return self.variance * cos_term
# else:
# # Overflow on exp and i0 by going backwards from sys.float_info.max (1.7976931348623157e+308): 1/l^2 < 709.782712893384
# invL2 = np.clip(1 / np.where(self.lengthscale < 1e-100, 1e-200, self.lengthscale ** 2), 0, 705)
# exp_term = np.exp(cos_term * invL2)
# bessel0 = i0(invL2)
# return self.variance * ((exp_term - bessel0) / (np.exp(invL2) - bessel0)) # The brackets prevent an overflow; want division first
embi0 = _embi0(invL2)
return self.variance * ((exp_term - embi0) / (1 - embi0)) # The brackets prevent an overflow; want division first
# invL2 = 1 / self.lengthscale ** 2
# bessel0 = i0(invL2)
# return _K(self.variance, self.period, self.lengthscale, invL2, bessel0, X, X if X2 is None else X2)

def Kdiag(self, X): # Correct; nice and fast
ret = np.empty(X.shape[0])
Expand All @@ -135,7 +129,8 @@ def update_gradients_full(self, dL_dK, X, X2 = None):
dK_dp = (self.variance / self.period) * trig_arg * sin_term

# This is 0 in the limit, but best to set it to a small non-0 value
dK_dl = 1e-4 / self.lengthscale
dK_dl = np.empty_like(dL_dK)
dK_dl[:, :] = 1e-4 / self.lengthscale[0]
elif np.any(invL2 < 3.75):
bessel0 = i0(invL2)
bessel1 = i1(invL2)
Expand All @@ -153,10 +148,10 @@ def update_gradients_full(self, dL_dK, X, X2 = None):

dK_dl = dInvL2_dl * self.variance * ( (cos_term * exp_term - bessel1) - K_no_Var * (eInvL2 - bessel1) ) / denom
else:
embi0 = self.embi0(invL2)
# embi1 = self.embi1(invL2)
embi0 = _embi0(invL2)
# embi1 = _embi1(invL2)
# embi0min1 = embi0 - embi1
embi0min1 = self.embi0min1(invL2)
embi0min1 = _embi0min1(invL2)
dInvL2_dl = -2 * invL2 / self.lengthscale # == -2 / l^3

denom = 1 - embi0
Expand All @@ -173,40 +168,10 @@ def update_gradients_full(self, dL_dK, X, X2 = None):
self.variance.gradient = np.sum(dL_dK * dK_dV)
self.period.gradient = np.sum(dL_dK * dK_dp)
self.lengthscale.gradient = np.sum(dL_dK * dK_dl)
# def update_gradients_full(self, dL_dK, X, X2 = None):
# if X2 is None: X2 = X
# trig_arg = 2 * np.pi * (X - X2.T) / self.period
# cos_term = np.cos(trig_arg)
# sin_term = np.sin(trig_arg)
#
# if np.any(self.lengthscale > 1e4): # Limit for l -> infinity
# dK_dV = cos_term # K / V
#
# dK_dp = self.variance * trig_arg * sin_term / self.period
#
# # This is 0 in the limit, but best to set it to a small non-0 value
# dK_dl = 1e-4 / self.lengthscale
# else:
# # Overflow on exp and i0 by going backwards from sys.float_info.max (1.7976931348623157e+308): 1/l^2 < 709.782712893384
# invL2 = np.clip(1 / np.where(self.lengthscale < 1e-100, 1e-200, self.lengthscale ** 2), 0, 705)
# bessel0 = i0(invL2)
# bessel1 = i1(invL2)
# eInvL2 = np.exp(invL2)
# denom = eInvL2 - bessel0
# exp_term = np.exp(cos_term * invL2)
#
# dK_dV = (exp_term - bessel0) / denom # = K / V
#
# dK_dp = (self.variance * invL2 * trig_arg * sin_term / self.period) * (exp_term / denom) # exp terms division separate because of overflow risk
#
# K = self.variance * dK_dV
# dInvL2_dl = -2 / np.where(self.lengthscale < 1e-100, 1e-300, self.lengthscale ** 3)
# dK_dl = dInvL2_dl * ( self.variance * ((cos_term * exp_term - bessel1) / denom) - K * ((eInvL2 - bessel1) / denom) ) # The brackets prevent some overflows; want those divisions first
#
# self.variance.gradient = np.sum(dL_dK * dK_dV)
# self.period.gradient = np.sum(dL_dK * dK_dp)
# self.lengthscale.gradient = np.sum(dL_dK * dK_dl)

# invL2 = 1 / self.lengthscale ** 2
# bessel0 = i0(invL2)
# bessel1 = i1(invL2)
# self.variance.gradient, self.period.gradient, self.lengthscale.gradient = _update_gradients_full(self.variance, self.period, self.lengthscale, invL2, bessel0, bessel1, dL_dK, X, X if X2 is None else X2)


# Unused functions for a possible version of this class as a subclass of Stationary
Expand Down Expand Up @@ -236,3 +201,81 @@ def update_gradients_full(self, dL_dK, X, X2 = None):
# return - self.variance * invL2 * (trig_arg / r) * sin_term * exp_term / (np.exp(invL2) - i0(invL2))



## numba versions of some methods

# from GPy_ABCD.Util.benchmarking import numba_comparisons
# numba_comparisons(_embi0, f8A(f8A), n = 200, args = [x])


# # if X2 is None: X2 = X
# # invL2 = 1 / lengthscale ** 2
# # bessel0 = i0(invL2)
# # @njit(f8A2(f8A, f8A, f8A, f8A, f8A, f8A2, f8A2)) # The numpy version is faster
# def _K(variance, period, lengthscale, invL2, bessel0, X, X2):
# cos_term = np.cos((2 * np.pi / period) * (X - X2.T))
#
# if np.any(lengthscale > 1e4): # Limit for l -> infinity
# return variance * cos_term
# elif np.any(invL2 < 3.75):
# exp_term = np.exp(cos_term * invL2)
# return variance * ((exp_term - bessel0) / (np.exp(invL2) - bessel0)) # The brackets prevent an overflow; want division first
# else:
# exp_term = np.exp((cos_term - 1) * invL2)
# embi0 = _embi0(invL2)
# return variance * ((exp_term - embi0) / (1 - embi0)) # The brackets prevent an overflow; want division first



# # invL2 = 1 / lengthscale ** 2
# # bessel0 = i0(invL2)
# # bessel1 = i1(invL2)
# # @njit(nTup(f8, f8, f8)(f8A, f8A, f8A, f8A, f8A, f8A, f8A2, f8A2, f8A2)) # The numpy version is faster
# def _update_gradients_full(variance, period, lengthscale, invL2, bessel0, bessel1, dL_dK, X, X2):
# trig_arg = (2 * np.pi / period) * (X - X2.T)
# cos_term = np.cos(trig_arg)
# sin_term = np.sin(trig_arg)
#
# if np.any(lengthscale > 1e4): # Limit for l -> infinity
# dK_dV = cos_term # K / V
#
# dK_dp = (variance / period) * trig_arg * sin_term
#
# # This is 0 in the limit, but best to set it to a small non-0 value
# dK_dl = np.empty_like(dL_dK)
# dK_dl[:,:] = 1e-4 / lengthscale[0]
# elif np.any(invL2 < 3.75):
# eInvL2 = np.exp(invL2)
# dInvL2_dl = -2 * invL2 / lengthscale # == -2 / l^3
#
# denom = eInvL2 - bessel0
# exp_term = np.exp(cos_term * invL2)
# K_no_Var = (exp_term - bessel0) / denom # == K / V; here just for clarity of further expressions
#
#
# dK_dV = K_no_Var
#
# dK_dp = (variance / period) * invL2 * trig_arg * sin_term * exp_term / denom
#
# dK_dl = dInvL2_dl * variance * ( (cos_term * exp_term - bessel1) - K_no_Var * (eInvL2 - bessel1) ) / denom
# else:
# embi0 = _embi0(invL2)
# # embi1 = _embi1(invL2)
# # embi0min1 = embi0 - embi1
# embi0min1 = _embi0min1(invL2)
# dInvL2_dl = -2 * invL2 / lengthscale # == -2 / l^3
#
# denom = 1 - embi0
# exp_term = np.exp((cos_term - 1) * invL2)
# K_no_Var = (exp_term - embi0) / denom # == K / V; here just for clarity of further expressions
#
#
# dK_dV = K_no_Var
#
# dK_dp = (variance / period) * invL2 * trig_arg * sin_term * exp_term / denom # I.e. SAME as the above case at this abstraction level
#
# dK_dl = dInvL2_dl * variance * ( (cos_term - 1) * exp_term + embi0min1 - K_no_Var * embi0min1 ) / denom
#
# return np.sum(dL_dK * dK_dV), np.sum(dL_dK * dK_dp), np.sum(dL_dK * dK_dl) # variance.gradient, period.gradient, lengthscale.gradient


Loading

0 comments on commit e06a4bc

Please sign in to comment.