Fix covariance overparameterisation (GPflow#150)

GPflow often optimizes positive-definite matrices. To maintain positive-definiteness without constrained optimization, a lower-triangular matrix is optimized. Sigma + L L ^T The previous approach to optimizing L was to ignore the upper half. The mean that there were some extra variables in the optimization vector, which did nothing. This PR implements a tensorflow op which transforms back-and-forth between triangular matrix L and a 'packed' vector representation. The result is that there are no redundant parameters in the optimization vector.
Bonnevie · Aug 9, 2016 · 5726dd4 · 5726dd4
1 parent 9c2faf6
commit 5726dd4
Show file tree

Hide file tree

Showing 11 changed files with 437 additions and 24 deletions.
diff --git a/.gitignore b/.gitignore
@@ -55,3 +55,12 @@ docs/_build/
 
 # PyBuilder
 target/
+
+# Emacs backups
+*~
+
+# Pycharm IDE directory
+.idea
+
+# IPython Notebooks
+*/.ipynb_checkpoints
diff --git a/GPflow/param.py b/GPflow/param.py
@@ -213,17 +213,15 @@ def make_tf_array(self, free_array):
         Then we return the number of elements that we've used to construct the
         array, so that it can be sliced for the next Param.
         """
-
-        # TODO what about constraints that change the size ??
-
         if self.fixed:
             # fixed parameters are treated by tf.placeholder
             return 0
-        x_free = free_array[:self.size]
+        free_size = self.transform.free_state_size(self.shape)
+        x_free = free_array[:free_size]
         mapped_array = self.transform.tf_forward(x_free)
         self._tf_array = tf.reshape(mapped_array, self.shape)
         self._log_jacobian = self.transform.tf_log_jacobian(x_free)
-        return self.size
+        return free_size
 
     def get_free_state(self):
         """
@@ -255,10 +253,11 @@ def set_state(self, x):
         """
         if self.fixed:
             return 0
-        new_array = self.transform.forward(x[:self.size]).reshape(self.shape)
+        free_size = self.transform.free_state_size(self.shape)
+        new_array = self.transform.forward(x[:free_size]).reshape(self.shape)
         assert new_array.shape == self.shape
         self._array[...] = new_array
-        return self.size
+        return free_size
 
     def build_prior(self):
         """

diff --git a/GPflow/svgp.py b/GPflow/svgp.py
@@ -102,7 +102,7 @@ def __init__(self, X, Y, kern, likelihood, Z, mean_function=Zero(),
         else:
             q_sqrt = np.array([np.eye(self.num_inducing)
                                for _ in range(self.num_latent)]).swapaxes(0, 2)
-            self.q_sqrt = Param(q_sqrt)
+            self.q_sqrt = Param(q_sqrt, transforms.LowerTriangular(q_sqrt.shape[2]))
 
     def build_prior_KL(self):
         if self.whiten:

diff --git a/GPflow/tf_hacks.py b/GPflow/tf_hacks.py
@@ -1,11 +1,11 @@
 # Copyright 2016 James Hensman, alexggmatthews
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 # http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,8 +19,31 @@
 Hopefully we can remove these as the library matures
 """
 
+import os
 import tensorflow as tf
 
 
 def eye(N):
     return tf.diag(tf.ones(tf.pack([N, ]), dtype='float64'))
+
+
+_custom_op_module = tf.load_op_library(os.path.join(os.path.dirname(__file__), 'tfops', 'matpackops.so'))
+vec_to_tri = _custom_op_module.vec_to_tri
+tri_to_vec = _custom_op_module.tri_to_vec
+
+
+@tf.python.framework.ops.RegisterGradient("VecToTri")
+def _vec_to_tri_grad(op, grad):
+    return [tri_to_vec(grad)]
+
+
+@tf.RegisterShape("VecToTri")
+def _vec_to_tri_shape(op):
+    in_shape = op.inputs[0].get_shape().with_rank(2)
+    M = in_shape[1].value
+    if M is None:
+        k = None
+    else:
+        k = int((M * 8 + 1) ** 0.5 / 2.0 - 0.5)
+    shape = tf.TensorShape([in_shape[0], k, k])
+    return [shape]
diff --git a/GPflow/tfops/tri_to_vec.cc b/GPflow/tfops/tri_to_vec.cc
@@ -0,0 +1,91 @@
+// Copyright 2016 Mark van der Wilk
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cmath>
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+
+
+REGISTER_OP("TriToVec")
+.Attr("T: realnumbertype")
+.Input("trimat: T")
+.Output("vec: T")
+.Doc(R"doc(
+Converts a series of triangular matrices to a series of vectors (i.e. a matrix). 
+
+If the input is D x N x N, then the output is D x M, where the lower
+triangle of each N x N matrix has been packed into an M-vector.
+)doc");
+
+using namespace tensorflow;
+
+template <typename T>
+class TriToVecOp : public OpKernel {
+public:
+  explicit TriToVecOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    const Tensor& input_tensor = context->input(0);
+
+    const TensorShape& input_shape = input_tensor.shape();
+    const int rank = input_shape.dims();
+
+    // For now, keep it as just a matrix
+    OP_REQUIRES(context, rank == 3,
+		errors::InvalidArgument("TriToVec expects a rank-3 tensor, received shape: ",
+		input_shape.DebugString()));
+
+    const int k = input_shape.dim_size(rank - 1);  // Matrix size
+    OP_REQUIRES(context, k == input_shape.dim_size(rank - 2),
+        errors::InvalidArgument("input's last two dimensions must be equal, received shape: ",
+        input_shape.DebugString()));
+
+    auto f = input_tensor.flat_inner_dims<T, 3>();
+
+    // Create an output tensor
+    TensorShape out_shape({input_shape.dim_size(rank - 3), k * (k+1) / 2});
+    Tensor* output_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape,
+	                                                 &output_tensor));
+
+
+    auto output = output_tensor->template flat<T>();
+    int i = 0;
+    for (int z = 0; z != f.dimension(0); z++) {
+      for (int y = 0; y != f.dimension(1); y++) {
+        for (int x = 0; x != f.dimension(2); x++) {
+          if (y >= x) {
+            output(i) = f(z, y, x);
+            i++;
+          }
+        }
+      }
+    }
+  }
+};
+
+#define REGISTER_KERNEL(type)             \
+  REGISTER_KERNEL_BUILDER(                \
+      Name("TriToVec")                    \
+      .Device(DEVICE_CPU)                 \
+      .TypeConstraint<type>("T"),         \
+      TriToVecOp<type>                    \
+  );
+
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);
+
+#undef REGISTER_KERNEL
diff --git a/GPflow/tfops/vec_to_tri.cc b/GPflow/tfops/vec_to_tri.cc
@@ -0,0 +1,93 @@
+// Copyright 2016 Mark van der Wilk
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cmath>
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+
+
+REGISTER_OP("VecToTri")
+.Attr("T: realnumbertype")
+.Input("vec: T")
+.Output("matrix: T")
+.Doc(R"doc(
+Converts a matrix into a series of triangular matrices.
+
+If the input is D x M, then the output is D x N x N, where the lower
+triangle of each N x N matrix is constructed by unpacking each M-vector.
+
+See also: TriToVec.
+)doc");
+
+
+
+using namespace tensorflow;
+
+template <typename T>
+class VecToTriOp : public OpKernel {
+public:
+  explicit VecToTriOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    const Tensor& input_tensor = context->input(0);
+    auto input = input_tensor.flat<T>();
+
+    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_tensor.shape()),
+		errors::InvalidArgument("VecToTri expects a 2-D matrix."));
+
+    auto ds = input_tensor.shape().dim_sizes();
+
+    int matsize = (int)std::floor(std::sqrt(ds[1] * 8 + 1) / 2.0 - 0.5);  // Deduce square matrix size
+    int recvecsize = (int)std::round(0.5*matsize*(matsize+1));            // Reconstruct number of required vector elements
+
+    OP_REQUIRES(context, recvecsize == ds[1],
+	        errors::InvalidArgument("Must have triangle number of elements in the input vector.")
+	        );
+
+    // Create an output tensor
+    TensorShape out_shape({ds[0], matsize, matsize});
+    Tensor* output_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape,
+						     &output_tensor));
+
+    auto output = output_tensor->template flat<T>();
+    const int N = output.size();
+    for (int i = 0; i != N; i++) {
+      int output_mat = i / (matsize * matsize);
+      int x = i % matsize;
+      int y = (i / matsize) % matsize;
+      if (x > y) {
+        output(i) = (T)0;
+      } else {
+        int idx = (i % (matsize*matsize)) - (int)std::round(matsize*y-0.5*y*y-0.5*y);
+        output(i) = input(idx + ds[1] * output_mat);
+      }
+    }
+  }
+};
+
+#define REGISTER_KERNEL(type)             \
+  REGISTER_KERNEL_BUILDER(                \
+      Name("VecToTri")                    \
+      .Device(DEVICE_CPU)                 \
+      .TypeConstraint<type>("T"),         \
+      VecToTriOp<type>                    \
+  );
+
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);
+
+#undef REGISTER_KERNEL