diff --git a/GPy_ABCD/Kernels/linearOffsetKernel.py b/GPy_ABCD/Kernels/linearOffsetKernel.py index 752daa0..7eece4e 100644 --- a/GPy_ABCD/Kernels/linearOffsetKernel.py +++ b/GPy_ABCD/Kernels/linearOffsetKernel.py @@ -4,9 +4,11 @@ from paramz.transformations import Logexp from paramz.caching import Cache_this +from GPy_ABCD.Util.numba_types import * + class LinearWithOffset(Kern): - """ + ''' Linear kernel with horizontal offset .. math:: @@ -25,7 +27,7 @@ class LinearWithOffset(Kern): :param name: Name of the kernel for output :type String :rtype: Kernel object - """ + ''' def __init__(self, input_dim: int, variance: float = 1., offset: float = 0., active_dims: int = None, name: str = 'linear_with_offset') -> None: super(LinearWithOffset, self).__init__(input_dim, active_dims, name) @@ -40,30 +42,27 @@ def __init__(self, input_dim: int, variance: float = 1., offset: float = 0., act self.link_parameters(self.variance, self.offset) - def to_dict(self): input_dict = super(LinearWithOffset, self)._save_to_input_dict() - input_dict["class"] = "LinearWithOffset" - input_dict["variance"] = self.variance.values.tolist() - input_dict["offset"] = self.offset + input_dict['class'] = 'LinearWithOffset' + input_dict['variance'] = self.variance.values.tolist() + input_dict['offset'] = self.offset return input_dict - @staticmethod def _build_from_input_dict(kernel_class, input_dict): useGPU = input_dict.pop('useGPU', None) return LinearWithOffset(**input_dict) - - @Cache_this(limit=3) - def K(self, X, X2=None): + @Cache_this(limit = 3) + def K(self, X, X2 = None): if X2 is None: X2 = X return self.variance * (X - self.offset) * (X2 - self.offset).T - + # return _K(self.variance, self.offset, X, X if X2 is None else X2) def Kdiag(self, X): - return np.sum(self.variance * np.square(X - self.offset), -1) - + # return np.sum(self.variance * np.square(X - self.offset), -1) + return _Kdiag(self.variance, self.offset, X) def update_gradients_full(self, dL_dK, X, X2=None): if X2 is None: X2 = X @@ -71,4 +70,27 @@ def update_gradients_full(self, dL_dK, X, X2=None): dK_do = self.variance * (2 * self.offset - X - X2) self.variance.gradient = np.sum(dL_dK * dK_dV) - self.offset.gradient = np.sum(dL_dK * dK_do) + self.offset.gradient = np.sum(dL_dK * dK_do) + # self.variance.gradient, self.offset.gradient = _update_gradients_full(self.offset, self.variance, dL_dK, X, X if X2 is None else X2) + + + +## numba versions of some methods + +# from GPy_ABCD.Util.benchmarking import numba_comparisons + +# # @njit(f8A2(f8A, f8A, f8A2, f8A2)) # The numpy version is faster +# def _K(variance, offset, X, X2): +# return variance * (X - offset) * (X2 - offset).T + +@njit(f8A(f8A, f8A, f8A2)) # 2x faster than numpy version +def _Kdiag(variance, offset, X): + return np.sum(variance * np.square(X - offset), -1) + +# # @njit(nTup(f8, f8)(f8A, f8A, f8A2, f8A2, f8A2)) # The numpy version is faster +# def _update_gradients_full(offset, variance, dL_dK, X, X2): +# dK_dV = (X - offset) * (X2 - offset).T +# dK_do = variance * (2 * offset - X - X2) +# return np.sum(dL_dK * dK_dV), np.sum(dL_dK * dK_do) # variance.gradient and offset.gradient + + diff --git a/GPy_ABCD/Kernels/periodicKernel.py b/GPy_ABCD/Kernels/periodicKernel.py index c81be21..8a4e80f 100644 --- a/GPy_ABCD/Kernels/periodicKernel.py +++ b/GPy_ABCD/Kernels/periodicKernel.py @@ -5,10 +5,30 @@ from paramz.transformations import Logexp from paramz.caching import Cache_this +from GPy_ABCD.Util.numba_types import * +from GPy_ABCD.Util.benchmarking import compare_implementations, numba_comparisons + + +@njit(f8A(f8A)) +def _embi0(x): # == exp(-x) * besseli(0, x) => 9.8.2 Abramowitz & Stegun (http://people.math.sfu.ca/~cbm/aands/page_378.htm) + y = 3.75 / x # The below is an efficient polynomial computation + f = 0.39894228 + (0.01328592 + (0.00225319 + (-0.00157565 + (0.00916281 + (-0.02057706 + (0.02635537 + (-0.01647633 + (0.00392377)*y)*y)*y)*y)*y)*y)*y)*y + return f / np.sqrt(x) +@njit(f8A(f8A)) +def _embi1(x): # == exp(-x) * besseli(1, x) => 9.8.4 Abramowitz & Stegun (http://people.math.sfu.ca/~cbm/aands/page_378.htm) + y = 3.75 / x # The below is an efficient polynomial computation + f = 0.39894228 + (-0.03988024 + (-0.00362018 + (0.00163801 + (-0.01031555 + (0.02282967 + (-0.02895312 + (0.01787654 + (-0.00420059)*y)*y)*y)*y)*y)*y)*y)*y + return f / np.sqrt(x) +@njit(f8A(f8A)) +def _embi0min1(x): # == embi0(x) - embi1(x) + y = 3.75 / x # The below is an efficient polynomial computation (with the 0-th power term equal to 0) + f = (0.05316616 + (0.00587337 + (-0.00321366 + (0.01947836 + (-0.04340673 + (0.05530849 + (-0.03435287 + (0.00812436)*y)*y)*y)*y)*y)*y)*y)*y + return f / np.sqrt(x) + # Based on GPy's StdPeriodic kernel and gaussianprocess.org/gpml/code/matlab/cov/covPeriodicNoDC.m class PureStdPeriodicKernel(Kern): - """ + ''' The standard periodic kernel due to MacKay (1998) can be decomposed into a sum of a periodic and a constant component; this kernel is the purely periodic component, as mentioned in: Lloyd, James Robert; Duvenaud, David Kristjanson; Grosse, Roger Baker; Tenenbaum, Joshua B.; Ghahramani, Zoubin (2014): @@ -37,7 +57,7 @@ class PureStdPeriodicKernel(Kern): :param name: Name of the kernel for output :type String :rtype: Kernel object - """ + ''' def __init__(self, input_dim: int, variance: float = 1., period: float = 2.*np.pi, lengthscale: float = 2.*np.pi, active_dims: int = None, name: str = 'pure_std_periodic') -> None: @@ -47,17 +67,17 @@ def __init__(self, input_dim: int, variance: float = 1., period: float = 2.*np.p if period is not None: period = np.asarray(period) - assert period.size == input_dim, "bad number of periods" + assert period.size == input_dim, 'bad number of periods' else: period = 2.*np.pi * np.ones(input_dim) if lengthscale is not None: lengthscale = np.asarray(lengthscale) - assert lengthscale.size == input_dim, "bad number of lengthscales" + assert lengthscale.size == input_dim, 'bad number of lengthscales' else: lengthscale = 2.*np.pi * np.ones(input_dim) self.variance = Param('variance', variance, Logexp()) - assert self.variance.size == 1, "Variance size must be one" + assert self.variance.size == 1, 'Variance size must be one' self.period = Param('period', period, Logexp()) self.lengthscale = Param('lengthscale', lengthscale, Logexp()) @@ -65,30 +85,14 @@ def __init__(self, input_dim: int, variance: float = 1., period: float = 2.*np.p def to_dict(self): input_dict = super(PureStdPeriodicKernel, self)._save_to_input_dict() - input_dict["class"] = "BesselShiftedPeriodic" - input_dict["variance"] = self.variance.values.tolist() - input_dict["period"] = self.period.values.tolist() - input_dict["lengthscale"] = self.lengthscale.values.tolist() + input_dict['class'] = 'BesselShiftedPeriodic' + input_dict['variance'] = self.variance.values.tolist() + input_dict['period'] = self.period.values.tolist() + input_dict['lengthscale'] = self.lengthscale.values.tolist() return input_dict - @staticmethod - def embi0(x): # == exp(-x) * besseli(0, x) => 9.8.2 Abramowitz & Stegun (http://people.math.sfu.ca/~cbm/aands/page_378.htm) - y = 3.75 / x # The below is an efficient polynomial computation - f = 0.39894228 + (0.01328592 + (0.00225319 + (-0.00157565 + (0.00916281 + (-0.02057706 + (0.02635537 + (-0.01647633 + (0.00392377)*y)*y)*y)*y)*y)*y)*y)*y - return f / np.sqrt(x) - @staticmethod - def embi1(x): # == exp(-x) * besseli(1, x) => 9.8.4 Abramowitz & Stegun (http://people.math.sfu.ca/~cbm/aands/page_378.htm) - y = 3.75 / x # The below is an efficient polynomial computation - f = 0.39894228 + (-0.03988024 + (-0.00362018 + (0.00163801 + (-0.01031555 + (0.02282967 + (-0.02895312 + (0.01787654 + (-0.00420059)*y)*y)*y)*y)*y)*y)*y)*y - return f / np.sqrt(x) - @staticmethod - def embi0min1(x): # == embi0(x) - embi1(x) - y = 3.75 / x # The below is an efficient polynomial computation (with the 0-th power term equal to 0) - f = (0.05316616 + (0.00587337 + (-0.00321366 + (0.01947836 + (-0.04340673 + (0.05530849 + (-0.03435287 + (0.00812436)*y)*y)*y)*y)*y)*y)*y)*y - return f / np.sqrt(x) - @Cache_this(limit = 3) - def K(self, X, X2=None): + def K(self, X, X2 = None): if X2 is None: X2 = X cos_term = np.cos((2 * np.pi / self.period) * (X - X2.T)) invL2 = 1 / self.lengthscale ** 2 @@ -101,21 +105,11 @@ def K(self, X, X2=None): return self.variance * ((exp_term - bessel0) / (np.exp(invL2) - bessel0)) # The brackets prevent an overflow; want division first else: exp_term = np.exp((cos_term - 1) * invL2) - embi0 = self.embi0(invL2) - return self.variance * ((exp_term - embi0) / (1 - embi0)) # The brackets prevent an overflow; want division first - # @Cache_this(limit = 3) - # def K(self, X, X2 = None): - # if X2 is None: X2 = X - # cos_term = np.cos(2 * np.pi * (X - X2.T) / self.period) - # - # if np.any(self.lengthscale > 1e4): # Limit for l -> infinity - # return self.variance * cos_term - # else: - # # Overflow on exp and i0 by going backwards from sys.float_info.max (1.7976931348623157e+308): 1/l^2 < 709.782712893384 - # invL2 = np.clip(1 / np.where(self.lengthscale < 1e-100, 1e-200, self.lengthscale ** 2), 0, 705) - # exp_term = np.exp(cos_term * invL2) - # bessel0 = i0(invL2) - # return self.variance * ((exp_term - bessel0) / (np.exp(invL2) - bessel0)) # The brackets prevent an overflow; want division first + embi0 = _embi0(invL2) + return self.variance * ((exp_term - embi0) / (1 - embi0)) # The brackets prevent an overflow; want division first + # invL2 = 1 / self.lengthscale ** 2 + # bessel0 = i0(invL2) + # return _K(self.variance, self.period, self.lengthscale, invL2, bessel0, X, X if X2 is None else X2) def Kdiag(self, X): # Correct; nice and fast ret = np.empty(X.shape[0]) @@ -135,7 +129,8 @@ def update_gradients_full(self, dL_dK, X, X2 = None): dK_dp = (self.variance / self.period) * trig_arg * sin_term # This is 0 in the limit, but best to set it to a small non-0 value - dK_dl = 1e-4 / self.lengthscale + dK_dl = np.empty_like(dL_dK) + dK_dl[:, :] = 1e-4 / self.lengthscale[0] elif np.any(invL2 < 3.75): bessel0 = i0(invL2) bessel1 = i1(invL2) @@ -153,10 +148,10 @@ def update_gradients_full(self, dL_dK, X, X2 = None): dK_dl = dInvL2_dl * self.variance * ( (cos_term * exp_term - bessel1) - K_no_Var * (eInvL2 - bessel1) ) / denom else: - embi0 = self.embi0(invL2) - # embi1 = self.embi1(invL2) + embi0 = _embi0(invL2) + # embi1 = _embi1(invL2) # embi0min1 = embi0 - embi1 - embi0min1 = self.embi0min1(invL2) + embi0min1 = _embi0min1(invL2) dInvL2_dl = -2 * invL2 / self.lengthscale # == -2 / l^3 denom = 1 - embi0 @@ -173,40 +168,10 @@ def update_gradients_full(self, dL_dK, X, X2 = None): self.variance.gradient = np.sum(dL_dK * dK_dV) self.period.gradient = np.sum(dL_dK * dK_dp) self.lengthscale.gradient = np.sum(dL_dK * dK_dl) - # def update_gradients_full(self, dL_dK, X, X2 = None): - # if X2 is None: X2 = X - # trig_arg = 2 * np.pi * (X - X2.T) / self.period - # cos_term = np.cos(trig_arg) - # sin_term = np.sin(trig_arg) - # - # if np.any(self.lengthscale > 1e4): # Limit for l -> infinity - # dK_dV = cos_term # K / V - # - # dK_dp = self.variance * trig_arg * sin_term / self.period - # - # # This is 0 in the limit, but best to set it to a small non-0 value - # dK_dl = 1e-4 / self.lengthscale - # else: - # # Overflow on exp and i0 by going backwards from sys.float_info.max (1.7976931348623157e+308): 1/l^2 < 709.782712893384 - # invL2 = np.clip(1 / np.where(self.lengthscale < 1e-100, 1e-200, self.lengthscale ** 2), 0, 705) - # bessel0 = i0(invL2) - # bessel1 = i1(invL2) - # eInvL2 = np.exp(invL2) - # denom = eInvL2 - bessel0 - # exp_term = np.exp(cos_term * invL2) - # - # dK_dV = (exp_term - bessel0) / denom # = K / V - # - # dK_dp = (self.variance * invL2 * trig_arg * sin_term / self.period) * (exp_term / denom) # exp terms division separate because of overflow risk - # - # K = self.variance * dK_dV - # dInvL2_dl = -2 / np.where(self.lengthscale < 1e-100, 1e-300, self.lengthscale ** 3) - # dK_dl = dInvL2_dl * ( self.variance * ((cos_term * exp_term - bessel1) / denom) - K * ((eInvL2 - bessel1) / denom) ) # The brackets prevent some overflows; want those divisions first - # - # self.variance.gradient = np.sum(dL_dK * dK_dV) - # self.period.gradient = np.sum(dL_dK * dK_dp) - # self.lengthscale.gradient = np.sum(dL_dK * dK_dl) - + # invL2 = 1 / self.lengthscale ** 2 + # bessel0 = i0(invL2) + # bessel1 = i1(invL2) + # self.variance.gradient, self.period.gradient, self.lengthscale.gradient = _update_gradients_full(self.variance, self.period, self.lengthscale, invL2, bessel0, bessel1, dL_dK, X, X if X2 is None else X2) # Unused functions for a possible version of this class as a subclass of Stationary @@ -236,3 +201,81 @@ def update_gradients_full(self, dL_dK, X, X2 = None): # return - self.variance * invL2 * (trig_arg / r) * sin_term * exp_term / (np.exp(invL2) - i0(invL2)) + +## numba versions of some methods + +# from GPy_ABCD.Util.benchmarking import numba_comparisons +# numba_comparisons(_embi0, f8A(f8A), n = 200, args = [x]) + + +# # if X2 is None: X2 = X +# # invL2 = 1 / lengthscale ** 2 +# # bessel0 = i0(invL2) +# # @njit(f8A2(f8A, f8A, f8A, f8A, f8A, f8A2, f8A2)) # The numpy version is faster +# def _K(variance, period, lengthscale, invL2, bessel0, X, X2): +# cos_term = np.cos((2 * np.pi / period) * (X - X2.T)) +# +# if np.any(lengthscale > 1e4): # Limit for l -> infinity +# return variance * cos_term +# elif np.any(invL2 < 3.75): +# exp_term = np.exp(cos_term * invL2) +# return variance * ((exp_term - bessel0) / (np.exp(invL2) - bessel0)) # The brackets prevent an overflow; want division first +# else: +# exp_term = np.exp((cos_term - 1) * invL2) +# embi0 = _embi0(invL2) +# return variance * ((exp_term - embi0) / (1 - embi0)) # The brackets prevent an overflow; want division first + + + +# # invL2 = 1 / lengthscale ** 2 +# # bessel0 = i0(invL2) +# # bessel1 = i1(invL2) +# # @njit(nTup(f8, f8, f8)(f8A, f8A, f8A, f8A, f8A, f8A, f8A2, f8A2, f8A2)) # The numpy version is faster +# def _update_gradients_full(variance, period, lengthscale, invL2, bessel0, bessel1, dL_dK, X, X2): +# trig_arg = (2 * np.pi / period) * (X - X2.T) +# cos_term = np.cos(trig_arg) +# sin_term = np.sin(trig_arg) +# +# if np.any(lengthscale > 1e4): # Limit for l -> infinity +# dK_dV = cos_term # K / V +# +# dK_dp = (variance / period) * trig_arg * sin_term +# +# # This is 0 in the limit, but best to set it to a small non-0 value +# dK_dl = np.empty_like(dL_dK) +# dK_dl[:,:] = 1e-4 / lengthscale[0] +# elif np.any(invL2 < 3.75): +# eInvL2 = np.exp(invL2) +# dInvL2_dl = -2 * invL2 / lengthscale # == -2 / l^3 +# +# denom = eInvL2 - bessel0 +# exp_term = np.exp(cos_term * invL2) +# K_no_Var = (exp_term - bessel0) / denom # == K / V; here just for clarity of further expressions +# +# +# dK_dV = K_no_Var +# +# dK_dp = (variance / period) * invL2 * trig_arg * sin_term * exp_term / denom +# +# dK_dl = dInvL2_dl * variance * ( (cos_term * exp_term - bessel1) - K_no_Var * (eInvL2 - bessel1) ) / denom +# else: +# embi0 = _embi0(invL2) +# # embi1 = _embi1(invL2) +# # embi0min1 = embi0 - embi1 +# embi0min1 = _embi0min1(invL2) +# dInvL2_dl = -2 * invL2 / lengthscale # == -2 / l^3 +# +# denom = 1 - embi0 +# exp_term = np.exp((cos_term - 1) * invL2) +# K_no_Var = (exp_term - embi0) / denom # == K / V; here just for clarity of further expressions +# +# +# dK_dV = K_no_Var +# +# dK_dp = (variance / period) * invL2 * trig_arg * sin_term * exp_term / denom # I.e. SAME as the above case at this abstraction level +# +# dK_dl = dInvL2_dl * variance * ( (cos_term - 1) * exp_term + embi0min1 - K_no_Var * embi0min1 ) / denom +# +# return np.sum(dL_dK * dK_dV), np.sum(dL_dK * dK_dp), np.sum(dL_dK * dK_dl) # variance.gradient, period.gradient, lengthscale.gradient + + diff --git a/GPy_ABCD/Kernels/sigmoidalKernels.py b/GPy_ABCD/Kernels/sigmoidalKernels.py index 803be49..feb5ae0 100644 --- a/GPy_ABCD/Kernels/sigmoidalKernels.py +++ b/GPy_ABCD/Kernels/sigmoidalKernels.py @@ -5,12 +5,41 @@ from paramz.transformations import Logexp from paramz.caching import Cache_this +from GPy_ABCD.Util.numba_types import * +# from GPy_ABCD.Util.benchmarking import numba_comparisons + + +@njit(f8A2(f8A2)) +def _sigmoid_function(arg): return (1 + arg / (1 + np.abs(arg))) / 2 +@njit(f8A2(f8A2)) +def _sigmoid_function_d(arg): return 0.5 / (1 + np.abs(arg)) ** 2 +# Since arg is usually (x - l) / s, derivatives by the constituent terms are just _sigmoid_function_d(arg) * ... +# x: * 1/s +# l: * -1/s +# s: * -(x-l)/s^2, i.e. * -arg/s +# When arg is (x - l - w) / (-s) [NOTE THE -s], derivatives by the constituent terms are just _sigmoid_function_d(arg) * ... +# x: * -1/s +# l: * 1/s +# w: * 1/s +# s: * (x-l-w)/(-s)^2, i.e. * -argW/s + +@njit(f8(f8)) +def _core_sigmoid_function(arg): return arg / (1 + np.abs(arg)) +# NOTE: This function can be used to compute the maximum height of the sum of opposite sigmoidals - 1 by feeding it arg = w / (2 * s) +# Let arg = (x - l) / s and argW (x - l - w) / (-s) [NOTE THE -s] +# This is the height of sig(arg) + sig(argW) - 1 for midpoint x: l + w/2 in both args (which become w/2s) +# so 2*sig(w/2s) - 1 = the core sigmoid ([-1,1]) on +ve values because of +ve w and s: core_sig(w/2s) +@njit(f8(f8)) +def _inv_core_sigmoid_d(arg): return - 1 / arg ** 2 +# NOTE: This function comes up in computing the derivatives of the scaled (to 1) version of the above sum of sigmoidals +# Since arg is always w / (2 * s), derivatives are just _inv_core_sigmoid_d(arg) * ... +# w: * 1/2s +# s: * -w/(2s^2), i.e. * -arg/s + class SigmoidalKernelBase(BasisFuncKernel): - """ - Abstract superclass for sigmoidal kernels - """ + '''Abstract superclass for sigmoidal kernels''' def __init__(self, input_dim: int, reverse: bool = False, variance: float = 1., location: float = 0., slope: float = 0.2, active_dims: int = None, name: str = 'sigmoidal_kernel_base', fixed_slope = False) -> None: @@ -30,33 +59,6 @@ def __init__(self, input_dim: int, reverse: bool = False, variance: float = 1., self.slope = Param('slope', slope, Logexp()) # This +ve constraint makes non-reverse sigmoids only fit (+ve or -ve) curves going away from 0; similarly for other kernels self.link_parameter(self.slope) - @staticmethod - def _sigmoid_function(arg): return (1 + arg / (1 + abs(arg))) / 2 - @staticmethod - def _sigmoid_function_d(arg): return 0.5 / (1 + abs(arg)) ** 2 - # Since arg is usually (x - l) / s, derivatives by the constituent terms are just _sigmoid_function_d(arg) * ... - # x: * 1/s - # l: * -1/s - # s: * -(x-l)/s^2, i.e. * -arg/s - # When arg is (x - l - w) / (-s) [NOTE THE -s], derivatives by the constituent terms are just _sigmoid_function_d(arg) * ... - # x: * -1/s - # l: * 1/s - # w: * 1/s - # s: * (x-l-w)/(-s)^2, i.e. * -argW/s - - @staticmethod - def _core_sigmoid_function(arg): return arg / (1 + abs(arg)) - # NOTE: This function can be used to compute the maximum height of the sum of opposite sigmoidals - 1 by feeding it arg = w / (2 * s) - # Let arg = (x - l) / s and argW (x - l - w) / (-s) [NOTE THE -s] - # This is the height of sig(arg) + sig(argW) - 1 for midpoint x: l + w/2 in both args (which become w/2s) - # so 2*sig(w/2s) - 1 = the core sigmoid ([-1,1]) on +ve values because of +ve w and s: core_sig(w/2s) - @staticmethod - def _inv_core_sigmoid_d(arg): return - 1 / arg ** 2 - # NOTE: This function comes up in computing the derivatives of the scaled (to 1) version of the above sum of sigmoidals - # Since arg is always w / (2 * s), derivatives are just _inv_core_sigmoid_d(arg) * ... - # w: * 1/2s - # s: * -w/(2s^2), i.e. * -arg/s - def update_gradients_diag(self, dL_dKdiag, X): raise NotImplementedError @@ -64,68 +66,84 @@ def update_gradients_diag(self, dL_dKdiag, X): # Based on LogisticBasisFuncKernel class SigmoidalKernel(SigmoidalKernelBase): - """ - Sigmoidal kernel - (ascending by default; descending by reverse=True) - """ + '''Sigmoidal kernel + (ascending by default; descending by reverse = True)''' def __init__(self, input_dim: int, reverse: bool = False, variance: float = 1., location: float = 0., slope: float = 0.2, active_dims: int = None, name: str = 'sigmoidal', fixed_slope = False) -> None: super(SigmoidalKernel, self).__init__(input_dim, reverse, variance, location, slope, active_dims, name, fixed_slope) - @Cache_this(limit=3, ignore_args=()) + @Cache_this(limit = 3) def _phi(self, X): - asc = self._sigmoid_function((X - self.location) / self.slope) - return 1 - asc if self.reverse else asc + return _phi_S(self.location[0], self.slope if self._fixed_slope else self.slope[0], self.reverse, X) - def update_gradients_full(self, dL_dK, X, X2=None): + def update_gradients_full(self, dL_dK, X, X2 = None): super(SigmoidalKernel, self).update_gradients_full(dL_dK, X, X2) - if X2 is None or X is X2: - phi1 = self.phi(X) - if phi1.ndim != 2: - phi1 = phi1[:, None] - - arg = (X - self.location) / self.slope - sig_d = self._sigmoid_function_d(arg) - - dphi1_dl = sig_d / (-self.slope) - self.location.gradient = np.sum(self.variance * 2 * (dL_dK * phi1.dot(dphi1_dl.T)).sum()) + phi1 = self.phi(X) + if phi1.ndim != 2: phi1 = phi1[:, None] - if not self._fixed_slope: - dphi1_ds = sig_d * (-arg / self.slope) - self.slope.gradient = np.sum(self.variance * 2 * (dL_dK * phi1.dot(dphi1_ds.T)).sum()) + if X2 is None or X is X2: + self.location.gradient, slope_gradient = _update_gradients_full_S_1(self.variance[0], self.location[0], + self.slope if self._fixed_slope else self.slope[0], self._fixed_slope, phi1, dL_dK, X) + if not self._fixed_slope: self.slope.gradient = slope_gradient else: - phi1 = self.phi(X) phi2 = self.phi(X2) - if phi1.ndim != 2: - phi1 = phi1[:, None] - phi2 = phi2[:, None] - - arg, arg2 = (X - self.location) / self.slope, (X2 - self.location) / self.slope - sig_d, sig_d2 = self._sigmoid_function_d(arg), self._sigmoid_function_d(arg2) + if phi1.ndim != 2: phi2 = phi2[:, None] - dphi1_dl, dphi2_dl = sig_d / (-self.slope), sig_d2 / (-self.slope) - self.location.gradient = np.sum(self.variance * (dL_dK * (phi1.dot(dphi2_dl.T) + phi2.dot(dphi1_dl.T))).sum()) - self.location.gradient = np.where(np.isnan(self.location.gradient), 0, self.location.gradient) - - if not self._fixed_slope: - dphi1_ds = sig_d * (-arg / self.slope) - dphi2_ds = sig_d2 * (-arg2 / self.slope) - self.slope.gradient = np.sum(self.variance * (dL_dK * (phi1.dot(dphi2_ds.T) + phi2.dot(dphi1_ds.T))).sum()) - self.slope.gradient = np.where(np.isnan(self.slope.gradient), 0, self.slope.gradient) + self.location.gradient, slope_gradient = _update_gradients_full_S_2(self.variance[0], self.location[0], + self.slope if self._fixed_slope else self.slope[0], self._fixed_slope, phi1, phi2, dL_dK, X, X2) + if not self._fixed_slope: self.slope.gradient = slope_gradient if self.reverse: # Works this simply (a single sign flip per product above) self.location.gradient = - self.location.gradient if not self._fixed_slope: self.slope.gradient = - self.slope.gradient +@njit(f8A2(f8, f8, b1, f8A2)) # Almost 2x faster than numpy version +def _phi_S(location, slope, reverse, X): + asc = _sigmoid_function((X - location) / slope) + return 1 - asc if reverse else asc + +@njit(nTup(f8, f8)(f8, f8, f8, b1, f8A2, f8A2, f8A2)) # 1.5x faster than numpy version +def _update_gradients_full_S_1(variance, location, slope, _fixed_slope, phi1, dL_dK, X): + arg = (X - location) / slope + sig_d = _sigmoid_function_d(arg) + + dphi1_dl = sig_d / (-slope) + location_gradient = variance * 2 * (dL_dK * phi1.dot(dphi1_dl.T)).sum() + if np.isnan(location_gradient): location_gradient = 0.0 + + if not _fixed_slope: + dphi1_ds = sig_d * (-arg / slope) + slope_gradient = variance * 2 * (dL_dK * phi1.dot(dphi1_ds.T)).sum() + if np.isnan(slope_gradient): slope_gradient = 0.0 + else: slope_gradient = 1.0 # Going to discard it anyway + + return location_gradient, slope_gradient + +@njit(nTup(f8, f8)(f8, f8, f8, b1, f8A2, f8A2, f8A2, f8A2, f8A2)) +def _update_gradients_full_S_2(variance, location, slope, _fixed_slope, phi1, phi2, dL_dK, X, X2): + arg, arg2 = (X - location) / slope, (X2 - location) / slope + sig_d, sig_d2 = _sigmoid_function_d(arg), _sigmoid_function_d(arg2) + + dphi1_dl, dphi2_dl = sig_d / (-slope), sig_d2 / (-slope) + location_gradient = variance * (dL_dK * (phi1.dot(dphi2_dl.T) + phi2.dot(dphi1_dl.T))).sum() + if np.isnan(location_gradient): location_gradient = 0.0 + + if not _fixed_slope: + dphi1_ds = sig_d * (-arg / slope) + dphi2_ds = sig_d2 * (-arg2 / slope) + slope_gradient = variance * (dL_dK * (phi1.dot(dphi2_ds.T) + phi2.dot(dphi1_ds.T))).sum() + if np.isnan(slope_gradient): slope_gradient = 0.0 + else: slope_gradient = 1.0 # Going to discard it anyway + + return location_gradient, slope_gradient + class SigmoidalIndicatorKernel(SigmoidalKernelBase): - """ - Sigmoidal indicator function kernel with parametrised start location and width: + '''Sigmoidal indicator function kernel with parametrised start location and width: ascendingSigmoid(location) + (1 - ascendingSigmoid(stop_location + width)) - 1, i.e. ascendingSigmoid(location) - ascendingSigmoid(location + width) - (hat if width > 0, and positive-well otherwise; can flip from one to the other by reverse=True) - """ + (hat if width > 0, and positive-well otherwise; can flip from one to the other by reverse=True)''' def __init__(self, input_dim: int, reverse: bool = False, variance: float = 1., location: float = 0., slope: float = 0.2, width: float = 1., active_dims: int = None, name: str = 'sigmoidal_indicator', fixed_slope = False) -> None: @@ -135,13 +153,9 @@ def __init__(self, input_dim: int, reverse: bool = False, variance: float = 1., @Cache_this(limit = 3) def _phi(self, X): - asc = self._sigmoid_function((X - self.location) / self.slope) - desc = self._sigmoid_function((X - self.location - self.width) / (-self.slope)) - height = self._core_sigmoid_function(self.width / (2 * self.slope)) - hat = (asc + desc - 1) / height - return 1 - hat if self.reverse else hat + return _phi_SI(self.location[0], self.slope if self._fixed_slope else self.slope[0], self.width[0], self.reverse, X) - def update_gradients_full(self, dL_dK, X, X2=None): + def update_gradients_full(self, dL_dK, X, X2 = None): super(SigmoidalIndicatorKernel, self).update_gradients_full(dL_dK, X, X2) if X2 is None: X2 = X @@ -151,51 +165,68 @@ def update_gradients_full(self, dL_dK, X, X2=None): phi1 = phi1[:, None] phi2 = phi2[:, None] if X2 is not X else phi1 - arg, arg2 = (X - self.location) / self.slope, (X2 - self.location) / self.slope - argW, argW2 = (X - self.location - self.width) / (-self.slope), (X2 - self.location - self.width) / (-self.slope) - sig_d, sig_d2 = self._sigmoid_function_d(arg), self._sigmoid_function_d(arg2) - sig_dW, sig_dW2 = self._sigmoid_function_d(argW), self._sigmoid_function_d(argW2) - - numerator1 = self._sigmoid_function(arg) + self._sigmoid_function(argW) - 1 # asc1 + desc1 - 1 - numerator2 = self._sigmoid_function(arg2) + self._sigmoid_function(argW2) - 1 # asc2 + desc2 - 1 - - height_arg = self.width / (2 * self.slope) - inv_height = 1 / self._core_sigmoid_function(height_arg) - inv_height_d = self._inv_core_sigmoid_d(height_arg) - dinvheight_dw = inv_height_d / (2 * self.slope) - dinvheight_ds = inv_height_d * (-height_arg / self.slope) - - # Repeated _sigmoid_function_d reference: - # Since arg is usually (x - l) / s, derivatives by the constituent terms are just _sigmoid_function_d(arg) * ... - # x: * 1/s - # l: * -1/s - # s: * -(x-l)/s^2, i.e. * -arg/s - # When arg is (x - l - w) / (-s) [NOTE THE -s], derivatives by the constituent terms are just _sigmoid_function_d(arg) * ... - # x: * -1/s - # l: * 1/s - # w: * 1/s - # s: * (x-l-w)/(-s)^2, i.e. * -argW/s - - dphi1_dl = ((- sig_d + sig_dW) / self.slope) * inv_height #+ (asc + desc) * dinvheight_dl, which is 0 - dphi2_dl = ((- sig_d2 + sig_dW2) / self.slope) * inv_height #+ (asc + desc) * dinvheight_dl, which is 0 - self.location.gradient = np.sum(self.variance * (dL_dK * (phi1.dot(dphi2_dl.T) + phi2.dot(dphi1_dl.T))).sum()) - self.location.gradient = np.where(np.isnan(self.location.gradient), 0, self.location.gradient) - - if not self._fixed_slope: - dphi1_ds = ((sig_d * arg + sig_dW * argW) / (-self.slope)) * inv_height + numerator1 * dinvheight_ds - dphi2_ds = ((sig_d2 * arg2 + sig_dW2 * argW2) / (-self.slope)) * inv_height + numerator2 * dinvheight_ds - self.slope.gradient = np.sum(self.variance * (dL_dK * (phi1.dot(dphi2_ds.T) + phi2.dot(dphi1_ds.T))).sum()) - self.slope.gradient = np.where(np.isnan(self.slope.gradient), 0, self.slope.gradient) - - dphi1_dw = (sig_dW / self.slope) * inv_height + numerator1 * dinvheight_dw # d(asc)_dw = 0 - dphi2_dw = (sig_dW2 / self.slope) * inv_height + numerator2 * dinvheight_dw # d(asc)_dw = 0 - self.width.gradient = np.sum(self.variance * (dL_dK * (phi1.dot(dphi2_dw.T) + phi2.dot(dphi1_dw.T))).sum()) - self.width.gradient = np.where(np.isnan(self.width.gradient), 0, self.width.gradient) - + self.location.gradient, slope_gradient, self.width.gradient = _update_gradients_full_SI( + self.variance[0], self.location[0], self.slope if self._fixed_slope else self.slope[0], self.width[0], + self.reverse, self._fixed_slope, phi1, phi2, dL_dK, X, X2) + if not self._fixed_slope: self.slope.gradient = slope_gradient + +@njit(f8A2(f8, f8, f8, b1, f8A2)) # 4.6x faster than numpy version +def _phi_SI(location, slope, width, reverse, X): + asc = _sigmoid_function((X - location) / slope) + desc = _sigmoid_function((X - location - width) / (-slope)) + height = _core_sigmoid_function(width / (2 * slope)) + hat = (asc + desc - 1) / height + return 1 - hat if reverse else hat + +@njit(nTup(f8, f8, f8)(f8, f8, f8, f8, b1, b1, f8A2, f8A2, f8A2, f8A2, f8A2)) # 1.45x faster than numpy version +def _update_gradients_full_SI(variance, location, slope, width, reverse, _fixed_slope, phi1, phi2, dL_dK, X, X2): + arg, arg2 = (X - location) / slope, (X2 - location) / slope + argW, argW2 = (X - location - width) / (-slope), (X2 - location - width) / (-slope) + sig_d, sig_d2 = _sigmoid_function_d(arg), _sigmoid_function_d(arg2) + sig_dW, sig_dW2 = _sigmoid_function_d(argW), _sigmoid_function_d(argW2) + + numerator1 = _sigmoid_function(arg) + _sigmoid_function(argW) - 1 # asc1 + desc1 - 1 + numerator2 = _sigmoid_function(arg2) + _sigmoid_function(argW2) - 1 # asc2 + desc2 - 1 + + height_arg = width / (2 * slope) + inv_height = 1 / _core_sigmoid_function(height_arg) + inv_height_d = _inv_core_sigmoid_d(height_arg) + dinvheight_dw = inv_height_d / (2 * slope) + dinvheight_ds = inv_height_d * (-height_arg / slope) + + # Repeated _sigmoid_function_d reference: + # Since arg is usually (x - l) / s, derivatives by the constituent terms are just _sigmoid_function_d(arg) * ... + # x: * 1/s + # l: * -1/s + # s: * -(x-l)/s^2, i.e. * -arg/s + # When arg is (x - l - w) / (-s) [NOTE THE -s], derivatives by the constituent terms are just _sigmoid_function_d(arg) * ... + # x: * -1/s + # l: * 1/s + # w: * 1/s + # s: * (x-l-w)/(-s)^2, i.e. * -argW/s - if self.reverse: # Works this simply (a single sign flip per product above) - self.location.gradient = - self.location.gradient - if not self._fixed_slope: self.slope.gradient = - self.slope.gradient - self.width.gradient = - self.width.gradient + dphi1_dl = ((- sig_d + sig_dW) / slope) * inv_height #+ (asc + desc) * dinvheight_dl, which is 0 + dphi2_dl = ((- sig_d2 + sig_dW2) / slope) * inv_height #+ (asc + desc) * dinvheight_dl, which is 0 + location_gradient = variance * (dL_dK * (phi1.dot(dphi2_dl.T) + phi2.dot(dphi1_dl.T))).sum() + if np.isnan(location_gradient): location_gradient = 0.0 + + if not _fixed_slope: + dphi1_ds = ((sig_d * arg + sig_dW * argW) / (-slope)) * inv_height + numerator1 * dinvheight_ds + dphi2_ds = ((sig_d2 * arg2 + sig_dW2 * argW2) / (-slope)) * inv_height + numerator2 * dinvheight_ds + slope_gradient = variance * (dL_dK * (phi1.dot(dphi2_ds.T) + phi2.dot(dphi1_ds.T))).sum() + if np.isnan(slope_gradient): slope_gradient = 0.0 + else: slope_gradient = 1.0 # Going to discard it anyway + + dphi1_dw = (sig_dW / slope) * inv_height + numerator1 * dinvheight_dw # d(asc)_dw = 0 + dphi2_dw = (sig_dW2 / slope) * inv_height + numerator2 * dinvheight_dw # d(asc)_dw = 0 + width_gradient = variance * (dL_dK * (phi1.dot(dphi2_dw.T) + phi2.dot(dphi1_dw.T))).sum() + if np.isnan(width_gradient): width_gradient = 0.0 + + if reverse: # Works this simply (a single sign flip per product above) + location_gradient = - location_gradient + if not _fixed_slope: slope_gradient = - slope_gradient + width_gradient = - width_gradient + + return location_gradient, slope_gradient, width_gradient diff --git a/GPy_ABCD/Models/modelSearch.py b/GPy_ABCD/Models/modelSearch.py index effb5c2..047edf8 100644 --- a/GPy_ABCD/Models/modelSearch.py +++ b/GPy_ABCD/Models/modelSearch.py @@ -39,8 +39,8 @@ def fit_mods_parallel_processes(X, Y, k_exprs, restarts = 5, optimiser = GPy_opt def explore_model_space(X, Y, - start_kernels: List[Union[str, KernelExpression]] = start_kernels['Default'], p_rules: List[Callable] = production_rules['Default'], utility_function: Callable = BIC, - rounds: int = 2, beam: Union[int, List[int]] = [3, 2, 1], restarts: int = 5, + start_kernels: List[Union[str, KernelExpression]] = start_kernels['Default'], p_rules: List[Callable] = production_rules['Default'], + utility_function: Callable = BIC, rounds: int = 2, beam: Union[int, List[int]] = [3, 2, 1], restarts: int = 5, model_list_fitter: Callable = fit_mods_parallel_processes, optimiser: str = GPy_optimisers[0], verbose: bool = True) -> Tuple[List[GPModel], List[List[GPModel]], List[KernelExpression], List[GPModel], List[GPModel]]: '''Perform `rounds` rounds of kernel expansion followed by model fit starting from the given `start_kernels` with and expanding the best `buffer` of them with `p_rules` production rules diff --git a/GPy_ABCD/Util/benchmarking.py b/GPy_ABCD/Util/benchmarking.py new file mode 100644 index 0000000..5bf37ad --- /dev/null +++ b/GPy_ABCD/Util/benchmarking.py @@ -0,0 +1,100 @@ +'''This file is an old, reduced, Python-3.8-compatible version of the one in https://github.com/T-Flet/Python-Generic-Util''' +import time +from pandas import DataFrame, concat +from numba import njit, vectorize + +from typing import Callable, Dict, List, Tuple + + +def timethis(f: Callable, n = 2, *args, **kwargs): + '''Return n execution times for the given function with given arguments''' + ts = [] + for i in range(n): + ts.append(time.perf_counter()) + f(*args, **kwargs) + ts[-1] = time.perf_counter() - ts[-1] + return ts + + +def compare_implementations(fs_with_shared_args: Dict[str, Callable], n = 200, wait = 1, verbose = True, + fs_with_own_args: Dict[str, Tuple[Callable, List, Dict]] = None, args: List = None, kwargs: Dict = None): + '''Benchmark multiple implementations of the same function called n times each with the same args and kwargs, with a break between functions. + fs_with_own_args is meant for additional functions taking different *args and **kwargs. + Recommended output view if not verbose: print(table.to_markdown(index = False)).''' + assert n >= 3 + table = [] + for name, f in fs_with_shared_args.items(): + time.sleep(wait) + if args: + if kwargs: times = timethis(f, n, *args, **kwargs) + else: times = timethis(f, n, *args) + elif kwargs: times = timethis(f, n, **kwargs) + else: times = timethis(f, n) + table.append([name, sum(times) / len(times), sum(times[1:]) / (len(times)-1), times[0], times[1], times[2]]) + if verbose: print(f'Benchmarked {name} - mean {table[-1][1]} and mean excluding 1st run {table[-1][2]}') + + if fs_with_own_args: + for name, f_a_k in fs_with_own_args.items(): + f, args, kwargs = f_a_k + time.sleep(wait) + if args: + if kwargs: times = timethis(f, n, *args, **kwargs) + else: times = timethis(f, n, *args) + elif kwargs: times = timethis(f, n, **kwargs) + else: times = timethis(f, n) + table.append([name, sum(times) / len(times), sum(times[1:]) / (len(times)-1), times[0], times[1], times[2]]) + if verbose: print(f'Benchmarked {name} - mean {table[-1][1]} and mean excluding 1st run {table[-1][2]}') + + table = sorted(table, key = lambda row: row[1]) + + last, last1 = table[0][1], table[0][2] + table = [(name, m0, m1, m0 / table[0][1], m1 / table[0][2], next_mean_ratio, next_mean_ratio1, t0, t1, t2) + for name, m0, m1, t0, t1, t2 in table + if (next_mean_ratio := m0 / last) if (next_mean_ratio1 := m1 / last1) + if (last := m0) if (last1 := m1)] + + df = DataFrame(table, columns = ['f', 'mean', 'mean excl. 1st', 'best mean ratio', 'best mean1 ratio', 'next mean ratio', 'next mean1 ratio', 't0', 't1', 't2']) + if verbose: print('\n', df.to_markdown(index = False), sep = '') + return df + + +def numba_comparisons(f: Callable = None, numba_signature = None, separate_numba_signatures = None, + f_scalar: Callable = None, numba_signatures_scalar = None, separate_numba_signaturess_scalar = None, parallel = False, + fs_with_own_args: Dict[str, Tuple[Callable, List, Dict]] = None, n = 200, wait = 1, prefix = None, verbose = True, + args: List = None, kwargs: dict = None): + '''Combine available ingredients to benchmark possible numba versions of the given function. + numba_signature and parallel are only used if f is given. + numba_signatures_scalar is only used if f_scalar is given. + separate_numba_signatures and separate_numba_signaturess_scalar are, respectively, a list and a list of lists of + additional signatures to be tried in addition to, respectively, numba_signature and numba_signatures_scalar.''' + f_dict = dict() + if f: + f_dict['Base'], f_dict['Lazy'] = f, njit(f) + if parallel: f_dict['Parallel'] = njit(parallel = True)(f) + if numba_signature: + f_dict['Eager'] = njit(numba_signature)(f) + if parallel: f_dict['Parallel Eager'] = njit(numba_signature, parallel = True)(f) + if separate_numba_signatures: + for i, sig in enumerate(separate_numba_signatures): + f_dict[f'Eager{i+2}'] = njit(sig)(f) + if parallel: f_dict[f'Parallel Eager{i+2}'] = njit(sig, parallel = True)(f) + if f_scalar: + f_dict['Vec Lazy'] = vectorize(f_scalar) + if numba_signatures_scalar: f_dict['Vec Eager'] = vectorize(numba_signatures_scalar)(f_scalar) + if separate_numba_signaturess_scalar: + for i, sigs in enumerate(separate_numba_signaturess_scalar): f_dict[f'Vec Eager{i+2}'] = vectorize(sigs)(f_scalar) + if prefix: f_dict = {(prefix + k): v for k, v in f_dict.items()} + return compare_implementations(f_dict, n, wait, verbose, fs_with_own_args, args, kwargs) + + +def merge_bench_tables(*tables, verbose = True): + table = concat(tables).sort_values(by = 'mean').reset_index(drop = True) + table['best mean ratio'] = table['mean'] / table['mean'].values[0] + table['best mean1 ratio'] = table['mean excl. 1st'] / table['mean excl. 1st'].values[0] + table['next mean ratio'] = table['mean'] / table['mean'].shift() + table['next mean1 ratio'] = table['mean excl. 1st'] / table['mean excl. 1st'].shift() + table.loc[0, 'next mean ratio'], table.loc[0, 'next mean1 ratio'] = 1, 1 + if verbose: print('\n', table.to_markdown(index = False), sep = '') + return table + + diff --git a/GPy_ABCD/Util/modelUtil.py b/GPy_ABCD/Util/modelUtil.py index 8d5ad03..1df490e 100644 --- a/GPy_ABCD/Util/modelUtil.py +++ b/GPy_ABCD/Util/modelUtil.py @@ -56,7 +56,7 @@ def fit_GPy_kern(X, Y, kernel, restarts, score = BIC, **kwargs): m.plot() print(m.kern) print(f'Log-Likelihood: {m.log_likelihood()}') - print(f'{score.__name__}: {score(m.log_likelihood(), len(X), m._size_transformed())}') + print(f'{score.__name__}: {score(m, m.log_likelihood(), len(X), m._size_transformed())}') plt.show() diff --git a/GPy_ABCD/Util/numba_types.py b/GPy_ABCD/Util/numba_types.py new file mode 100644 index 0000000..84ab29d --- /dev/null +++ b/GPy_ABCD/Util/numba_types.py @@ -0,0 +1,27 @@ +'''This file is an old, reduced, Python-3.8-compatible version of the one in https://github.com/T-Flet/Python-Generic-Util''' +from numba import njit, vectorize, guvectorize, stencil, typeof, b1, f8, i8 +from numba.core.types import Array +from numba.core.types.containers import Tuple + +from typing import Union + + +## This script serves to export common numba functions and type signatures easily and without namespace pollution +__all__ = ['njit', 'typeof', + 'b1', 'f8', 'i8', # The basic types used in the project + 'b1A', 'f8A', 'i8A', # Shorthand for their C-consecutive 1d arrays + 'b1A2', 'f8A2', 'i8A2', # Shorthand for their C-consecutive 2d arrays + 'nTup', 'Union'] # numba's Tuple (renamed to nTup to avoid clashes) and typing's Union + + +b1A = b1[::1] +f8A = f8[::1] +i8A = i8[::1] + +b1A2 = b1[:, ::1] +f8A2 = f8[:, ::1] +i8A2 = i8[:, ::1] + +def nTup(*args): return Tuple(args) + + diff --git a/GPy_ABCD/__init__.py b/GPy_ABCD/__init__.py index 1ee717f..b7c3feb 100644 --- a/GPy_ABCD/__init__.py +++ b/GPy_ABCD/__init__.py @@ -1,6 +1,6 @@ """GPy-ABCD - Basic implementation with GPy of an Automatic Bayesian Covariance Discovery (ABCD) system""" -__version__ = '1.1' # Change it in setup.py too +__version__ = '1.2' # Change it in setup.py too __author__ = 'Thomas Fletcher ' # __all__ = [] diff --git a/README.rst b/README.rst index 7e2407b..57d334e 100644 --- a/README.rst +++ b/README.rst @@ -31,7 +31,7 @@ fits based on the identified functional shapes. See the picture in `Usage` below for an example input/output and read the paper for further details: `Fletcher, T., Bundy, A., & Nuamah, K. . GPy-ABCD: A Configurable Automatic Bayesian Covariance Discovery Implementation. -8th ICML Workshop on Automated Machine Learning (2021) `_ +8th ICML Workshop on Automated Machine Learning (2021) `_ diff --git a/Tests/regressLinearKernels.py b/Tests/regressLinearKernels.py index b353937..e776ec2 100644 --- a/Tests/regressLinearKernels.py +++ b/Tests/regressLinearKernels.py @@ -37,7 +37,11 @@ plt.show() - # VERDICT: LinearWithOffset is just better: better fits (even accounting for extra params) which is interpretable (the offsets are just roots) - # And having a single variance per product is even better + # VERDICT: LinearWithOffset is just better: better fits (even accounting for extra params), is interpretable (the offsets are just roots), + # and having a single variance per product is even better + +# from GPy_ABCD.Util.benchmarking import timethis +# times = timethis(fit_GPy_kern, 5, X, Y, kernel, 20, optimizer = GPy_optimisers[0]) +# print(sum(times) / len(times)) diff --git a/Tests/regressPeriodicKernel.py b/Tests/regressPeriodicKernel.py index 6d56c61..aecff22 100644 --- a/Tests/regressPeriodicKernel.py +++ b/Tests/regressPeriodicKernel.py @@ -37,3 +37,7 @@ plt.show() +# from GPy_ABCD.Util.benchmarking import timethis +# times = timethis(fit_kex, 5, X, Y, correct_k, 10) +# print(sum(times) / len(times)) + diff --git a/Tests/regressSigmoidalKernels.py b/Tests/regressSigmoidalKernels.py index db509a9..a33474f 100644 --- a/Tests/regressSigmoidalKernels.py +++ b/Tests/regressSigmoidalKernels.py @@ -20,19 +20,19 @@ def sigTwoLocIndicatorHeight(location, stop_location, slope): # YS = sig((X - 5) / 10) * 5 + np.random.randn(101, 1) * 0.5 #- 100 # fit_GPy_kern(X, YS, S(), 5) # fit_GPy_kern(X, YS, Sr(), 5) -# fit_GPy_kern(X, YS, S + C, 5) +# fit_GPy_kern(X, YS, S() + C(), 5) # YS = -YS # fit_GPy_kern(X, YS, S(), 5) # fit_GPy_kern(X, YS, Sr(), 5) -# fit_GPy_kern(X, YS, S + C, 5) +# fit_GPy_kern(X, YS, S() + C(), 5) # YS = sig((X - 5) / (-10)) * 5 + np.random.randn(101, 1) * 0.5 #- 100 # fit_GPy_kern(X, YS, S(), 5) # fit_GPy_kern(X, YS, Sr(), 5) -# fit_GPy_kern(X, YS, S + C, 5) +# fit_GPy_kern(X, YS, S() + C(), 5) # YS = -YS # fit_GPy_kern(X, YS, S(), 5) # fit_GPy_kern(X, YS, Sr(), 5) -# fit_GPy_kern(X, YS, S + C, 5) +# fit_GPy_kern(X, YS, S() + C(), 5) # I.e: S only fits functions moving from 0 to +ve or -ve values; Sr ones going TO 0 # Also, adding a constant makes either version fit any vaguely sigmoidal shape diff --git a/Tests/testData.py b/Tests/testData.py index 4e2466c..c5fe708 100644 --- a/Tests/testData.py +++ b/Tests/testData.py @@ -15,6 +15,7 @@ # Only 1, 2, 10 and 11 have published analyses, and their identified formulae are (deciphered from component descriptions): # 1: LIN + PER * LIN + SE + WN * LIN # Default Rules: (PER + C + LIN * (PER + C)) * (PER + PER + WN) * (C + LIN) + # or LIN + PER * LIN * (C + WN) * (PER + PER + C), LIN + PER * (C + WN) * (PER + C) * (PER + LIN) # 2: C + CW_1643_1716(PER + SE + RQ + WN * LIN + WN * LIN, C + WN) # Default Rules: C + (PER + C) * (PER + PER + PER + WN) # 10: PER + SE + CP_64(PER + WN, CW_69_77(SE, SE) + CP_90(C + WN, WN)) diff --git a/requirements.txt b/requirements.txt index 57f21ab..c80a5d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,9 @@ -# paramz>=0.9.5 -plotnine>=0.7.1 -numpy>=1.19.2 -matplotlib>=3.3.2 +numpy==1.23.5 # Because GPy 1.12 uses np.int, which was removed in numpy 1.24 +numba>=0.55.2 +GPy>=1.12.0 scipy>=1.6.2 pandas>=1.2.4 -GPy>=1.10.0 +matplotlib>=3.3.2 +plotnine>=0.7.1 # ipython>=7.25.0 +# paramz>=0.9.5 diff --git a/setup.py b/setup.py index c0e0e0b..3a658c9 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ def read_requirements(): setup( name = 'GPy-ABCD', - version = '1.1', # Change it in __init__.py too + version = '1.2', # Change it in __init__.py too url = 'https://github.com/T-Flet/GPy-ABCD', license = 'BSD 3-Clause', @@ -37,6 +37,9 @@ def read_requirements(): 'Development Status :: 4 - Beta', # 'Development Status :: 5 - Production/Stable', 'Programming Language :: Python', - 'Programming Language :: Python :: 3.8' + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11' ], )