diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 9d27ef6..27f5125 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -10,7 +10,7 @@ jobs: max-parallel: 4 matrix: os: [windows-latest, ubuntu-latest, macOS-latest] - python-version: [3.8] + python-version: ["3.10"] steps: - uses: actions/checkout@v1 diff --git a/GPy_ABCD/KernelExpansion/kernelOperations.py b/GPy_ABCD/KernelExpansion/kernelOperations.py index fb99bf9..55c927c 100644 --- a/GPy_ABCD/KernelExpansion/kernelOperations.py +++ b/GPy_ABCD/KernelExpansion/kernelOperations.py @@ -1,3 +1,5 @@ +from copy import deepcopy + from GPy_ABCD.Kernels.baseKernels import * from GPy_ABCD.Kernels.baseKernels import __FIX_SIGMOIDAL_KERNELS_SLOPE @@ -33,7 +35,8 @@ def fit_ker_to_kex_with_params(ker, kex, verbose = False): return kex.match_up_fit_parameters(param_dict, 'GP_regression.') -def init_rand_params(kex, verbose = True): # A testing function to initialise a kernel with random parameters +def init_rand_params(kex, verbose = True): + '''A testing function to initialise a kernel with random parameters''' if verbose: print(kex) ker = kex._initialise().to_kernel() ker.randomize() diff --git a/GPy_ABCD/KernelExpressions/base.py b/GPy_ABCD/KernelExpressions/base.py index b6fcbb8..b77015a 100644 --- a/GPy_ABCD/KernelExpressions/base.py +++ b/GPy_ABCD/KernelExpressions/base.py @@ -53,10 +53,10 @@ def simplify(self): def extract_if_singleton(self): pass - # NOTE: both traverse and reduce ignore raw-string leaves (which can only happen in ChangeKEs); - # care has to be taken to perform required operations on them from their parent @abstractmethod def traverse(self): + '''NOTE: both traverse and reduce ignore raw-string leaves (which can only happen in ChangeKEs); + care has to be taken to perform required operations on them from their parent''' pass # Same note as traverse; see Test.checkKernelExpressions for an example func @@ -98,7 +98,8 @@ def new_tree_with_self_replaced(self, replacement_node): # NOTE: replacement_nod return copied_replacement.parent.reassign_child(copied_self, copied_replacement) @abstractmethod - def reassign_child(self, old_child, new_child): # NOTE: has to return new_child (used by new_tree_with_self_replaced) + def reassign_child(self, old_child, new_child): + '''NOTE: has to return new_child (used by new_tree_with_self_replaced)''' pass @abstractmethod @@ -128,11 +129,16 @@ def _new_parameters(self, new_parameters): return self @abstractmethod - def match_up_fit_parameters(self, fit_ker, prefix): # Note: the prefix has to already contain THIS node's name followed by a dot at the end + def match_up_fit_parameters(self, fit_ker, prefix): + '''NOTE: the prefix has to already contain THIS node's name followed by a dot at the end''' pass @abstractmethod - def sum_of_prods_form(self): # Return either a ProductKE or a SumKE whose composite_terms are only ProductKEs + def sum_of_prods_form(self): + '''Return either a ProductKE or a SumKE whose composite_terms are only ProductKEs. + + NOTE: this method CAN only be called when parameters are present (i.e. after .match_up_fit_parameters has been called), + and SHOULD only be called indirectly through GPModel.sum_of_prods_kex or GPModel.interpret()''' pass def get_interpretation(self, sops = None): diff --git a/GPy_ABCD/KernelExpressions/change.py b/GPy_ABCD/KernelExpressions/change.py index d235a52..0c8afcc 100644 --- a/GPy_ABCD/KernelExpressions/change.py +++ b/GPy_ABCD/KernelExpressions/change.py @@ -115,6 +115,7 @@ def add_sum_of_prods_terms(k1, k2): return res._set_all_parents() def sum_of_prods_form(self): + assert self.parameters, 'A sum-of-products form can only be generated when parameters are present (i.e. after .match_up_fit_parameters has been triggered), and should only be called indirectly through GPModel.sum_of_prods_kex or GPModel.interpret()' new_children = [] for branch, kex in (('left', self.left), ('right', self.right)): sigmoid_parameters = (change_k_sigmoid_names[self.CP_or_CW][branch], self.parameters[self.CP_or_CW][0]) diff --git a/GPy_ABCD/KernelExpressions/commutatives.py b/GPy_ABCD/KernelExpressions/commutatives.py index a647467..46bedf9 100644 --- a/GPy_ABCD/KernelExpressions/commutatives.py +++ b/GPy_ABCD/KernelExpressions/commutatives.py @@ -36,6 +36,7 @@ def simplify_base_terms_params(self): return self def sum_of_prods_form(self): + assert self.parameters, 'A sum-of-products form can only be generated when parameters are present (i.e. after .match_up_fit_parameters has been triggered), and should only be called indirectly through GPModel.sum_of_prods_kex or GPModel.interpret()' cts = [ct.sum_of_prods_form() for ct in self.composite_terms] self.composite_terms.clear() for ct in cts: # Only SumKEs or ProductKEs now @@ -133,6 +134,7 @@ def multiply_pure_prods_with_params(k0, ks): # k0 is meant to be the ProductKE c return ProductKE([]).new_bases_with_parameters([(key, p) for kex in [k0] + ks for key, ps in list(kex.parameters.items()) for p in ps]) def sum_of_prods_form(self): + assert self.parameters, 'A sum-of-products form can only be generated when parameters are present (i.e. after .match_up_fit_parameters has been triggered), and should only be called indirectly through GPModel.sum_of_prods_kex or GPModel.interpret()' sops = SumKE([]) if not self.composite_terms: sops.composite_terms.append(ProductKE(self.base_terms)._new_parameters(self.parameters)) # Avoid triggering simplify() diff --git a/GPy_ABCD/Models/model.py b/GPy_ABCD/Models/model.py index d3e6dba..37762bd 100644 --- a/GPy_ABCD/Models/model.py +++ b/GPy_ABCD/Models/model.py @@ -12,6 +12,7 @@ def __init__(self, X, Y, kernel_expression = SumKE(['WN'])._initialise()): self.X = X self.Y = Y self.kernel_expression = kernel_expression + self._sum_of_prods_kex = None self.restarts = None self.model = None self.cached_utility_function = None @@ -30,7 +31,20 @@ def fit(self, restarts = None, optimiser = 'lbfgsb', verbose = False, robust = F self.model.optimize_restarts(num_restarts = self.restarts, verbose = verbose, robust = robust, optimizer = optimiser, **kwargs) return self - def interpret(self): return fit_ker_to_kex_with_params(self.model.kern, deepcopy(self.kernel_expression)).get_interpretation() + @property + def sum_of_prods_kex(self): + '''The canonical kernel form (the one described in .interpret). + + NOTE: this property/method can only be called after the model has been fitted.''' + if self.model is None: raise ValueError('No parameters to insert into the kernel expression since the model has not yet been fitted') + elif self._sum_of_prods_kex is None: self._sum_of_prods_kex = fit_ker_to_kex_with_params(self.model.kern, deepcopy(self.kernel_expression)).sum_of_prods_form() + return self._sum_of_prods_kex + + def interpret(self): + '''Describe the model with a few sentences (which break down the expanded kernel form, i.e. .sum_of_prods_kex). + + NOTE: this method can only be called after the model has been fitted.''' + return self.sum_of_prods_kex.get_interpretation(sops = self._sum_of_prods_kex) def predict(self, X, quantiles = (2.5, 97.5), full_cov = False, Y_metadata = None, kern = None, likelihood = None, include_likelihood = True): mean, cov = self.model.predict(X, full_cov, Y_metadata, kern, likelihood, include_likelihood) diff --git a/GPy_ABCD/__init__.py b/GPy_ABCD/__init__.py index fff6c1b..a8663b6 100644 --- a/GPy_ABCD/__init__.py +++ b/GPy_ABCD/__init__.py @@ -1,6 +1,6 @@ """GPy-ABCD - Basic implementation with GPy of an Automatic Bayesian Covariance Discovery (ABCD) system""" -__version__ = '1.2.1' # Change it in setup.py too +__version__ = '1.2.2' # Change it in setup.py too __author__ = 'Thomas Fletcher ' # __all__ = [] diff --git a/README.rst b/README.rst index b3af015..4185e77 100644 --- a/README.rst +++ b/README.rst @@ -21,17 +21,25 @@ GPy-ABCD :target: https://github.com/T-Flet/GPy-ABCD/blob/master/LICENSE :alt: License -Basic implementation with GPy of an Automatic Bayesian Covariance Discovery (ABCD) system +*(Temporary note: the "failing" build badge above is due to the workflow pip not finding GPy 1.12.0 for some reason; the tests are successful)* -Briefly: ABCD is a modelling system which consists in exploring a space of compositional kernels -(i.e. covariances of Gaussian Processes) constructed by iteratively combining a small set of base ones, -returning the best fitting models using them, and capable of generating simple text descriptions of the +GPy-ABCD is a basic implementation with GPy of an Automatic Bayesian Covariance Discovery (ABCD) system. + +Briefly, ABCD is a (Gaussian Process) modelling method which consists in exploring a space of modular kernels +(i.e. covariances) constructed by iteratively combining a small set of simple ones, +and returning the best fitting models using them; +due to its modularity, it is capable of generating simple text descriptions of the fits based on the identified functional shapes. +The usefulness of ABCD is in identifying the underlying shape of data, but the process is +computationally expensive, therefore a typical use for it is in initial data overviews +(possibly on subsampled datasets for efficiency), then followed by more direct exploration +of its top few results' kernels (on the full dataset if subsampled before). + See the picture in `Usage` below for an example input/output and read the paper for further details: `Fletcher, T., Bundy, A., & Nuamah, K. . GPy-ABCD: A Configurable Automatic Bayesian Covariance Discovery Implementation. -8th ICML Workshop on Automated Machine Learning (2021) `_ +8th ICML Workshop on Automated Machine Learning (2021) `_ @@ -159,7 +167,10 @@ Further Notes Generic: - Please reach out if you have successfully used this project in your own research -- Feel free to fork and expand this project (pull requests are welcome) since it is not the focus of my research; it was written just because I needed to use it in a broader adaptive statistical modelling context and therefore I have no need to expand its functionality in the near future +- Feel free to fork and expand this project (pull requests are welcome) since it + was only written because its particular features were required in a broader + adaptive statistical modelling context, + and therefore it is unlikely that its functionality will be expanded in the near future Code-related: diff --git a/Tests/test_kernelExpressions.py b/Tests/test_kernelExpressions.py index 22b1460..895781a 100644 --- a/Tests/test_kernelExpressions.py +++ b/Tests/test_kernelExpressions.py @@ -73,8 +73,8 @@ def testFunc(node, acc): # Sets arbitrary root, adds LIN base term and returns a return acc res = test_expr.reduce(testFunc, []) - assert res == ['PER', 'LIN', 'WN', 'C', 'LIN', 'SE', 'LIN', 'WN', 'LIN'] # This is reduce handles str ChangeKE branches - # assert res == ['PER', 'LIN', 'WN', 'C', 'LIN', 'SE', 'WN'] # This one if it does not + assert res == ['PER', 'LIN', 'WN', 'LIN', 'SE', 'LIN', 'WN', 'LIN'] # This if reduce handles str ChangeKE branches + # assert res == ['PER', 'LIN', 'WN', 'LIN', 'SE', 'WN'] # This one if it does not assert [kex.root == 'HI' for kex in test_expr.traverse()] assert test_expr == ChangeKE('CP', ProductKE(['PER', 'LIN'], [SumKE(['WN', 'C', 'LIN'])]), ChangeKE('CW', 'SE', 'WN')) diff --git a/setup.py b/setup.py index 779f15b..e9b4cc4 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ def read_requirements(): setup( name = 'GPy-ABCD', - version = '1.2.1', # Change it in __init__.py too + version = '1.2.2', # Change it in __init__.py too url = 'https://github.com/T-Flet/GPy-ABCD', license = 'BSD 3-Clause', @@ -40,6 +40,7 @@ def read_requirements(): 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11' + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12' ], )