Skip to content

Commit

Permalink
Fix covariance overparameterisation (GPflow#150)
Browse files Browse the repository at this point in the history
GPflow often optimizes positive-definite matrices. To maintain positive-definiteness without constrained optimization, a lower-triangular matrix is optimized. 

  Sigma + L L ^T

The previous approach to optimizing L was to ignore the upper half. The mean  that there were some extra variables in the optimization vector, which did nothing. This PR implements a tensorflow op which transforms back-and-forth between triangular matrix L and a 'packed' vector representation. The result is that there are no redundant parameters in the optimization vector.
  • Loading branch information
Mark van der Wilk authored and jameshensman committed Aug 9, 2016
1 parent 9c2faf6 commit 5726dd4
Show file tree
Hide file tree
Showing 11 changed files with 437 additions and 24 deletions.
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,12 @@ docs/_build/

# PyBuilder
target/

# Emacs backups
*~

# Pycharm IDE directory
.idea

# IPython Notebooks
*/.ipynb_checkpoints
13 changes: 6 additions & 7 deletions GPflow/param.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,17 +213,15 @@ def make_tf_array(self, free_array):
Then we return the number of elements that we've used to construct the
array, so that it can be sliced for the next Param.
"""

# TODO what about constraints that change the size ??

if self.fixed:
# fixed parameters are treated by tf.placeholder
return 0
x_free = free_array[:self.size]
free_size = self.transform.free_state_size(self.shape)
x_free = free_array[:free_size]
mapped_array = self.transform.tf_forward(x_free)
self._tf_array = tf.reshape(mapped_array, self.shape)
self._log_jacobian = self.transform.tf_log_jacobian(x_free)
return self.size
return free_size

def get_free_state(self):
"""
Expand Down Expand Up @@ -255,10 +253,11 @@ def set_state(self, x):
"""
if self.fixed:
return 0
new_array = self.transform.forward(x[:self.size]).reshape(self.shape)
free_size = self.transform.free_state_size(self.shape)
new_array = self.transform.forward(x[:free_size]).reshape(self.shape)
assert new_array.shape == self.shape
self._array[...] = new_array
return self.size
return free_size

def build_prior(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion GPflow/svgp.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def __init__(self, X, Y, kern, likelihood, Z, mean_function=Zero(),
else:
q_sqrt = np.array([np.eye(self.num_inducing)
for _ in range(self.num_latent)]).swapaxes(0, 2)
self.q_sqrt = Param(q_sqrt)
self.q_sqrt = Param(q_sqrt, transforms.LowerTriangular(q_sqrt.shape[2]))

def build_prior_KL(self):
if self.whiten:
Expand Down
29 changes: 26 additions & 3 deletions GPflow/tf_hacks.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Copyright 2016 James Hensman, alexggmatthews
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand All @@ -19,8 +19,31 @@
Hopefully we can remove these as the library matures
"""

import os
import tensorflow as tf


def eye(N):
return tf.diag(tf.ones(tf.pack([N, ]), dtype='float64'))


_custom_op_module = tf.load_op_library(os.path.join(os.path.dirname(__file__), 'tfops', 'matpackops.so'))
vec_to_tri = _custom_op_module.vec_to_tri
tri_to_vec = _custom_op_module.tri_to_vec


@tf.python.framework.ops.RegisterGradient("VecToTri")
def _vec_to_tri_grad(op, grad):
return [tri_to_vec(grad)]


@tf.RegisterShape("VecToTri")
def _vec_to_tri_shape(op):
in_shape = op.inputs[0].get_shape().with_rank(2)
M = in_shape[1].value
if M is None:
k = None
else:
k = int((M * 8 + 1) ** 0.5 / 2.0 - 0.5)
shape = tf.TensorShape([in_shape[0], k, k])
return [shape]
91 changes: 91 additions & 0 deletions GPflow/tfops/tri_to_vec.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Copyright 2016 Mark van der Wilk
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cmath>
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"


REGISTER_OP("TriToVec")
.Attr("T: realnumbertype")
.Input("trimat: T")
.Output("vec: T")
.Doc(R"doc(
Converts a series of triangular matrices to a series of vectors (i.e. a matrix).
If the input is D x N x N, then the output is D x M, where the lower
triangle of each N x N matrix has been packed into an M-vector.
)doc");

using namespace tensorflow;

template <typename T>
class TriToVecOp : public OpKernel {
public:
explicit TriToVecOp(OpKernelConstruction* context) : OpKernel(context) {}

void Compute(OpKernelContext* context) override {
// Grab the input tensor
const Tensor& input_tensor = context->input(0);

const TensorShape& input_shape = input_tensor.shape();
const int rank = input_shape.dims();

// For now, keep it as just a matrix
OP_REQUIRES(context, rank == 3,
errors::InvalidArgument("TriToVec expects a rank-3 tensor, received shape: ",
input_shape.DebugString()));

const int k = input_shape.dim_size(rank - 1); // Matrix size
OP_REQUIRES(context, k == input_shape.dim_size(rank - 2),
errors::InvalidArgument("input's last two dimensions must be equal, received shape: ",
input_shape.DebugString()));

auto f = input_tensor.flat_inner_dims<T, 3>();

// Create an output tensor
TensorShape out_shape({input_shape.dim_size(rank - 3), k * (k+1) / 2});
Tensor* output_tensor = NULL;
OP_REQUIRES_OK(context, context->allocate_output(0, out_shape,
&output_tensor));


auto output = output_tensor->template flat<T>();
int i = 0;
for (int z = 0; z != f.dimension(0); z++) {
for (int y = 0; y != f.dimension(1); y++) {
for (int x = 0; x != f.dimension(2); x++) {
if (y >= x) {
output(i) = f(z, y, x);
i++;
}
}
}
}
}
};

#define REGISTER_KERNEL(type) \
REGISTER_KERNEL_BUILDER( \
Name("TriToVec") \
.Device(DEVICE_CPU) \
.TypeConstraint<type>("T"), \
TriToVecOp<type> \
);


TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);

#undef REGISTER_KERNEL
93 changes: 93 additions & 0 deletions GPflow/tfops/vec_to_tri.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright 2016 Mark van der Wilk
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cmath>
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"


REGISTER_OP("VecToTri")
.Attr("T: realnumbertype")
.Input("vec: T")
.Output("matrix: T")
.Doc(R"doc(
Converts a matrix into a series of triangular matrices.
If the input is D x M, then the output is D x N x N, where the lower
triangle of each N x N matrix is constructed by unpacking each M-vector.
See also: TriToVec.
)doc");



using namespace tensorflow;

template <typename T>
class VecToTriOp : public OpKernel {
public:
explicit VecToTriOp(OpKernelConstruction* context) : OpKernel(context) {}

void Compute(OpKernelContext* context) override {
// Grab the input tensor
const Tensor& input_tensor = context->input(0);
auto input = input_tensor.flat<T>();

OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_tensor.shape()),
errors::InvalidArgument("VecToTri expects a 2-D matrix."));

auto ds = input_tensor.shape().dim_sizes();

int matsize = (int)std::floor(std::sqrt(ds[1] * 8 + 1) / 2.0 - 0.5); // Deduce square matrix size
int recvecsize = (int)std::round(0.5*matsize*(matsize+1)); // Reconstruct number of required vector elements

OP_REQUIRES(context, recvecsize == ds[1],
errors::InvalidArgument("Must have triangle number of elements in the input vector.")
);

// Create an output tensor
TensorShape out_shape({ds[0], matsize, matsize});
Tensor* output_tensor = NULL;
OP_REQUIRES_OK(context, context->allocate_output(0, out_shape,
&output_tensor));

auto output = output_tensor->template flat<T>();
const int N = output.size();
for (int i = 0; i != N; i++) {
int output_mat = i / (matsize * matsize);
int x = i % matsize;
int y = (i / matsize) % matsize;
if (x > y) {
output(i) = (T)0;
} else {
int idx = (i % (matsize*matsize)) - (int)std::round(matsize*y-0.5*y*y-0.5*y);
output(i) = input(idx + ds[1] * output_mat);
}
}
}
};

#define REGISTER_KERNEL(type) \
REGISTER_KERNEL_BUILDER( \
Name("VecToTri") \
.Device(DEVICE_CPU) \
.TypeConstraint<type>("T"), \
VecToTriOp<type> \
);


TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);

#undef REGISTER_KERNEL
Loading

0 comments on commit 5726dd4

Please sign in to comment.