Compatibility with next version of TensorFlow (GPflow#316)

* Compatibility with next version of TensorFlow. - Remove references to batch_matmul. - Various functions that need to be renamed.
Bonnevie · Jan 13, 2017 · 0e4b3c4 · 0e4b3c4
1 parent 252baea
commit 0e4b3c4
Show file tree

Hide file tree

Showing 18 changed files with 83 additions and 84 deletions.
diff --git a/GPflow/conditionals.py b/GPflow/conditionals.py
@@ -71,10 +71,10 @@ def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False):
     # compute the covariance due to the conditioning
     if full_cov:
         fvar = kern.K(Xnew) - tf.matmul(A, A, transpose_a=True)
-        shape = tf.pack([tf.shape(f)[1], 1, 1])
+        shape = tf.stack([tf.shape(f)[1], 1, 1])
     else:
         fvar = kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
-        shape = tf.pack([tf.shape(f)[1], 1])
+        shape = tf.stack([tf.shape(f)[1], 1])
     fvar = tf.tile(tf.expand_dims(fvar, 0), shape)  # D x N x N or D x N
 
     # another backsubstitution in the unwhitened case
@@ -89,13 +89,13 @@ def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False):
             LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # D x M x N
         elif q_sqrt.get_shape().ndims == 3:
             L = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0)  # D x M x M
-            A_tiled = tf.tile(tf.expand_dims(A, 0), tf.pack([tf.shape(f)[1], 1, 1]))
-            LTA = tf.batch_matmul(L, A_tiled, adj_x=True)  # D x M x N
+            A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([tf.shape(f)[1], 1, 1]))
+            LTA = tf.matmul(L, A_tiled, transpose_a=True)  # D x M x N
         else:  # pragma: no cover
             raise ValueError("Bad dimension for q_sqrt: %s" %
                              str(q_sqrt.get_shape().ndims))
         if full_cov:
-            fvar = fvar + tf.batch_matmul(LTA, LTA, adj_x=True)  # D x N x N
+            fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True)  # D x N x N
         else:
             fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # D x N
     fvar = tf.transpose(fvar)  # N x D or N x N x D

diff --git a/GPflow/densities.py b/GPflow/densities.py
@@ -30,7 +30,7 @@ def lognormal(x, mu, var):
 
 
 def bernoulli(p, y):
-    return tf.log(tf.select(tf.equal(y, 1), p, 1-p))
+    return tf.log(tf.where(tf.equal(y, 1), p, 1-p))
 
 
 def poisson(lamb, y):

diff --git a/GPflow/ekernels.py b/GPflow/ekernels.py
@@ -64,10 +64,10 @@ def exKxz(self, Z, Xmu, Xcov):
         M = tf.shape(Z)[0]
         N = tf.shape(Xmu)[0] - 1
         D = tf.shape(Xmu)[1]
-        Xsigmb = tf.slice(Xcov, [0, 0, 0, 0], tf.pack([-1, N, -1, -1]))
+        Xsigmb = tf.slice(Xcov, [0, 0, 0, 0], tf.stack([-1, N, -1, -1]))
         Xsigm = Xsigmb[0, :, :, :]  # NxDxD
         Xsigmc = Xsigmb[1, :, :, :]  # NxDxD
-        Xmum = tf.slice(Xmu, [0, 0], tf.pack([N, -1]))
+        Xmum = tf.slice(Xmu, [0, 0], tf.stack([N, -1]))
         Xmup = Xmu[1:, :]
         lengthscales = self.lengthscales if self.ARD else tf.zeros((D,), dtype=float_type) + self.lengthscales
         scalemat = tf.expand_dims(tf.diag(lengthscales ** 2.0), 0) + Xsigm  # NxDxD
@@ -82,10 +82,10 @@ def exKxz(self, Z, Xmu, Xcov):
         smIvec = tf.matrix_solve(rsm, tf.expand_dims(vec, 3))[:, :, :, 0]  # NxMxDx1
         q = tf.reduce_sum(smIvec * vec, [2])  # NxM
 
-        addvec = tf.batch_matmul(
+        addvec = tf.matmul(
             tf.tile(tf.expand_dims(Xsigmc, 1), (1, M, 1, 1)),
             tf.expand_dims(smIvec, 3),
-            adj_x=True
+            transpose_a=True
         )[:, :, :, 0] + tf.expand_dims(Xmup, 1)  # NxMxD
 
         return self.variance * addvec * tf.reshape(det ** -0.5, (N, 1, 1)) * tf.expand_dims(tf.exp(-0.5 * q), 2)
@@ -135,7 +135,7 @@ def eKxz(self, Z, Xmu, Xcov):
             raise NotImplementedError
         # use only active dimensions
         Z, Xmu = self._slice(Z, Xmu)
-        return self.variance * tf.batch_matmul(Xmu, tf.transpose(Z))
+        return self.variance * tf.matmul(Xmu, tf.transpose(Z))
 
     def exKxz(self, Z, Xmu, Xcov):
         with tf.control_dependencies([
@@ -149,7 +149,7 @@ def exKxz(self, Z, Xmu, Xcov):
         Xmum = Xmu[:-1, :]
         Xmup = Xmu[1:, :]
         op = tf.expand_dims(Xmum, 2) * tf.expand_dims(Xmup, 1) + Xcov[1, :-1, :, :]  # NxDxD
-        return self.variance * tf.batch_matmul(tf.tile(tf.expand_dims(Z, 0), (N, 1, 1)), op)
+        return self.variance * tf.matmul(tf.tile(tf.expand_dims(Z, 0), (N, 1, 1)), op)
 
     def eKzxKxz(self, Z, Xmu, Xcov):
         """
@@ -165,7 +165,7 @@ def eKzxKxz(self, Z, Xmu, Xcov):
         N = tf.shape(Xmu)[0]
         mom2 = tf.expand_dims(Xmu, 1) * tf.expand_dims(Xmu, 2) + Xcov  # NxDxD
         eZ = tf.tile(tf.expand_dims(Z, 0), (N, 1, 1))  # NxMxD
-        return self.variance ** 2.0 * tf.batch_matmul(tf.batch_matmul(eZ, mom2), eZ, adj_y=True)
+        return self.variance ** 2.0 * tf.matmul(tf.matmul(eZ, mom2), eZ, transpose_b=True)
 
 
 class Add(kernels.Add):
@@ -247,10 +247,10 @@ def Linear_RBF_eKxzKzx(self, Ka, Kb, Z, Xmu, Xcov):
         vecplus = (Z[None, :, :, None] / lengthscales2[None, None, :, None] +
                    tf.matrix_solve(Xcov, Xmu[:, :, None])[:, None, :, :])  # NxMxDx1
         mean = tf.cholesky_solve(tcgm,
-                                 tf.batch_matmul(tf.tile(Xcov[:, None, :, :], [1, M, 1, 1]), vecplus)
+                                 tf.matmul(tf.tile(Xcov[:, None, :, :], [1, M, 1, 1]), vecplus)
                                  )[:, :, :, 0] * lengthscales2[None, None, :]  # NxMxD
-        a = tf.batch_matmul(tf.tile(Z[None, :, :], [N, 1, 1]),
-                            mean * exp[:, :, None] * det[:, None, None] * const, adj_y=True)
+        a = tf.matmul(tf.tile(Z[None, :, :], [N, 1, 1]),
+                            mean * exp[:, :, None] * det[:, None, None] * const, transpose_b=True)
         return a + tf.transpose(a, [0, 2, 1])
 
     def quad_eKzx1Kxz2(self, Ka, Kb, Z, Xmu, Xcov):
@@ -264,8 +264,7 @@ def quad_eKzx1Kxz2(self, Ka, Kb, Z, Xmu, Xcov):
 
         # transform points based on Gaussian parameters
         cholXcov = tf.cholesky(Xcov)  # NxDxD
-        Xt = tf.batch_matmul(cholXcov, tf.tile(xn[None, :, :], (N, 1, 1)),
-                             adj_y=True)  # NxDxH**D
+        Xt = tf.matmul(cholXcov, tf.tile(xn[None, :, :], (N, 1, 1)), transpose_b=True)  # NxDxH**D
 
         X = 2.0 ** 0.5 * Xt + tf.expand_dims(Xmu, 2)  # NxDxH**D
         Xr = tf.reshape(tf.transpose(X, [2, 0, 1]), (-1, self.input_dim))  # (H**D*N)xD

diff --git a/GPflow/gplvm.py b/GPflow/gplvm.py
@@ -179,11 +179,11 @@ def build_predict(self, Xnew, full_cov=False):
         if full_cov:
             var = self.kern.K(Xnew) + tf.matmul(tf.transpose(tmp2), tmp2) \
                   - tf.matmul(tf.transpose(tmp1), tmp1)
-            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
+            shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
             var = tf.tile(tf.expand_dims(var, 2), shape)
         else:
             var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
                   - tf.reduce_sum(tf.square(tmp1), 0)
-            shape = tf.pack([1, tf.shape(self.Y)[1]])
+            shape = tf.stack([1, tf.shape(self.Y)[1]])
             var = tf.tile(tf.expand_dims(var, 1), shape)
         return mean + self.mean_function(Xnew), var
diff --git a/GPflow/gpr.py b/GPflow/gpr.py
@@ -80,7 +80,7 @@ def build_predict(self, Xnew, full_cov=False):
         fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew)
         if full_cov:
             fvar = self.kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
-            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
+            shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
             fvar = tf.tile(tf.expand_dims(fvar, 2), shape)
         else:
             fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)

diff --git a/GPflow/kernels.py b/GPflow/kernels.py
@@ -107,7 +107,7 @@ def _slice_cov(self, cov):
             gather1 = tf.gather(tf.transpose(covr, [2, 1, 0]), self.active_dims)
             gather2 = tf.gather(tf.transpose(gather1, [1, 0, 2]), self.active_dims)
             cov = tf.reshape(tf.transpose(gather2, [2, 0, 1]),
-                             tf.concat(0, [cov_shape[:-2], [len(self.active_dims), len(self.active_dims)]]))
+                             tf.concat_v2([cov_shape[:-2], [len(self.active_dims), len(self.active_dims)]], 0))
         return cov
 
     def __add__(self, other):
@@ -208,10 +208,10 @@ def exKxz(self, Z, Xmu, Xcov):
 
         # First, transform the compact representation of Xmu and Xcov into a
         # list of full distributions.
-        fXmu = tf.concat(1, (Xmu[:-1, :], Xmu[1:, :]))  # Nx2D
-        fXcovt = tf.concat(2, (Xcov[0, :-1, :, :], Xcov[1, :-1, :, :]))  # NxDx2D
-        fXcovb = tf.concat(2, (tf.transpose(Xcov[1, :-1, :, :], (0, 2, 1)), Xcov[0, 1:, :, :]))
-        fXcov = tf.concat(1, (fXcovt, fXcovb))
+        fXmu = tf.concat_v2((Xmu[:-1, :], Xmu[1:, :]), 1)  # Nx2D
+        fXcovt = tf.concat_v2((Xcov[0, :-1, :, :], Xcov[1, :-1, :, :]), 2)  # NxDx2D
+        fXcovb = tf.concat_v2((tf.transpose(Xcov[1, :-1, :, :], (0, 2, 1)), Xcov[0, 1:, :, :]), 2)
+        fXcov = tf.concat_v2((fXcovt, fXcovb), 1)
         return mvnquad(lambda x: tf.expand_dims(self.K(x[:, :D], Z), 2) *
                                  tf.expand_dims(x[:, D:], 1),
                        fXmu, fXcov, self.num_gauss_hermite_points,
@@ -250,7 +250,7 @@ def __init__(self, input_dim, variance=1.0, active_dims=None):
         self.variance = Param(variance, transforms.positive)
 
     def Kdiag(self, X):
-        return tf.fill(tf.pack([tf.shape(X)[0]]), tf.squeeze(self.variance))
+        return tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
 
 
 class White(Static):
@@ -260,10 +260,10 @@ class White(Static):
 
     def K(self, X, X2=None, presliced=False):
         if X2 is None:
-            d = tf.fill(tf.pack([tf.shape(X)[0]]), tf.squeeze(self.variance))
+            d = tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
             return tf.diag(d)
         else:
-            shape = tf.pack([tf.shape(X)[0], tf.shape(X2)[0]])
+            shape = tf.stack([tf.shape(X)[0], tf.shape(X2)[0]])
             return tf.zeros(shape, float_type)
 
 
@@ -274,9 +274,9 @@ class Constant(Static):
 
     def K(self, X, X2=None, presliced=False):
         if X2 is None:
-            shape = tf.pack([tf.shape(X)[0], tf.shape(X)[0]])
+            shape = tf.stack([tf.shape(X)[0], tf.shape(X)[0]])
         else:
-            shape = tf.pack([tf.shape(X)[0], tf.shape(X2)[0]])
+            shape = tf.stack([tf.shape(X)[0], tf.shape(X2)[0]])
         return tf.fill(shape, tf.squeeze(self.variance))
 
 
@@ -344,7 +344,7 @@ def euclid_dist(self, X, X2):
         return tf.sqrt(r2 + 1e-12)
 
     def Kdiag(self, X, presliced=False):
-        return tf.fill(tf.pack([tf.shape(X)[0]]), tf.squeeze(self.variance))
+        return tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
 
 
 class RBF(Stationary):
@@ -503,7 +503,7 @@ def __init__(self, input_dim, period=1.0, variance=1.0,
         self.period = Param(period, transforms.positive)
 
     def Kdiag(self, X, presliced=False):
-        return tf.fill(tf.pack([tf.shape(X)[0]]), tf.squeeze(self.variance))
+        return tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
 
     def K(self, X, X2=None, presliced=False):
         if not presliced:

diff --git a/GPflow/kullback_leiblers.py b/GPflow/kullback_leiblers.py
@@ -131,7 +131,7 @@ def gauss_kl(q_mu, q_sqrt, K):
     KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]), float_type)  # constant term
     Lq = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0)  # force lower triangle
     KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.matrix_diag_part(Lq))))  # logdet
-    L_tiled = tf.tile(tf.expand_dims(L, 0), tf.pack([tf.shape(Lq)[0], 1, 1]))
+    L_tiled = tf.tile(tf.expand_dims(L, 0), tf.stack([tf.shape(Lq)[0], 1, 1]))
     LiLq = tf.matrix_triangular_solve(L_tiled, Lq, lower=True)
     KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
     return KL
diff --git a/GPflow/likelihoods.py b/GPflow/likelihoods.py
@@ -418,7 +418,7 @@ def logp(self, F, Y):
             hits = tf.equal(tf.expand_dims(tf.argmax(F, 1), 1), Y)
             yes = tf.ones(tf.shape(Y), dtype=float_type) - self.invlink.epsilon
             no = tf.zeros(tf.shape(Y), dtype=float_type) + self.invlink._eps_K1
-            p = tf.select(hits, yes, no)
+            p = tf.where(hits, yes, no)
             return tf.log(p)
         else:
             raise NotImplementedError
@@ -434,10 +434,10 @@ def variational_expectations(self, Fmu, Fvar, Y):
     def predict_mean_and_var(self, Fmu, Fvar):
         if isinstance(self.invlink, RobustMax):
             # To compute this, we'll compute the density for each possible output
-            possible_outputs = [tf.fill(tf.pack([tf.shape(Fmu)[0], 1]), np.array(i, dtype=np.int64)) for i in
+            possible_outputs = [tf.fill(tf.stack([tf.shape(Fmu)[0], 1]), np.array(i, dtype=np.int64)) for i in
                                 range(self.num_classes)]
             ps = [self.predict_density(Fmu, Fvar, po) for po in possible_outputs]
-            ps = tf.transpose(tf.pack([tf.reshape(p, (-1,)) for p in ps]))
+            ps = tf.transpose(tf.stack([tf.reshape(p, (-1,)) for p in ps]))
             return ps, ps - tf.square(ps)
         else:
             raise NotImplementedError
@@ -510,8 +510,8 @@ def variational_expectations(self, Fmu, Fvar, Y):
 
     def predict_mean_and_var(self, Fmu, Fvar):
         mu_list, var_list = zip(*[lik.predict_mean_and_var(Fmu, Fvar) for lik in self.likelihood_list])
-        mu = tf.concat(1, mu_list)
-        var = tf.concat(1, var_list)
+        mu = tf.concat_v2(mu_list, 1)
+        var = tf.concat_v2(var_list, 1)
         return mu, var
 
 
@@ -555,8 +555,8 @@ def __init__(self, bin_edges):
 
     def logp(self, F, Y):
         Y = tf.cast(Y, tf.int32)
-        scaled_bins_left = tf.concat(0, [self.bin_edges/self.sigma, np.array([np.inf])])
-        scaled_bins_right = tf.concat(0, [np.array([-np.inf]), self.bin_edges/self.sigma])
+        scaled_bins_left = tf.concat_v2([self.bin_edges/self.sigma, np.array([np.inf])], 0)
+        scaled_bins_right = tf.concat_v2([np.array([-np.inf]), self.bin_edges/self.sigma], 0)
         selected_bins_left = tf.gather(scaled_bins_left, Y)
         selected_bins_right = tf.gather(scaled_bins_right, Y)
 
@@ -571,8 +571,8 @@ def _make_phi(self, F):
 
         Note that a matrix of F values is flattened.
         """
-        scaled_bins_left = tf.concat(0, [self.bin_edges/self.sigma, np.array([np.inf])])
-        scaled_bins_right = tf.concat(0, [np.array([-np.inf]), self.bin_edges/self.sigma])
+        scaled_bins_left = tf.concat_v2([self.bin_edges/self.sigma, np.array([np.inf])], 0)
+        scaled_bins_right = tf.concat_v2([np.array([-np.inf]), self.bin_edges/self.sigma], 0)
         return probit(scaled_bins_left - tf.reshape(F, (-1, 1)) / self.sigma)\
             - probit(scaled_bins_right - tf.reshape(F, (-1, 1)) / self.sigma)
 

diff --git a/GPflow/mean_functions.py b/GPflow/mean_functions.py
@@ -45,7 +45,7 @@ def __mul__(self, other):
 
 class Zero(MeanFunction):
     def __call__(self, X):
-        return tf.zeros(tf.pack([tf.shape(X)[0], 1]), dtype=float_type)
+        return tf.zeros(tf.stack([tf.shape(X)[0], 1]), dtype=float_type)
 
 
 class Linear(MeanFunction):
@@ -80,7 +80,7 @@ def __init__(self, c=None):
         self.c = Param(c)
 
     def __call__(self, X):
-        shape = tf.pack([tf.shape(X)[0], 1])
+        shape = tf.stack([tf.shape(X)[0], 1])
         return tf.tile(tf.reshape(self.c, (1, -1)), shape)
 
 

diff --git a/GPflow/model.py b/GPflow/model.py
@@ -128,8 +128,8 @@ def _compile(self, optimizer=None):
                 f = self.build_likelihood() + self.build_prior()
                 g, = tf.gradients(f, self._free_vars)
 
-            self._minusF = tf.neg(f, name='objective')
-            self._minusG = tf.neg(g, name='grad_objective')
+            self._minusF = tf.negative(f, name='objective')
+            self._minusG = tf.negative(g, name='grad_objective')
 
             # The optimiser needs to be part of the computational graph, and needs
             # to be initialised before tf.initialise_all_variables() is called.
@@ -384,10 +384,10 @@ def predict_f_samples(self, Xnew, num_samples):
         samples = []
         for i in range(self.num_latent):
             L = tf.cholesky(var[:, :, i] + jitter)
-            shape = tf.pack([tf.shape(L)[0], num_samples])
+            shape = tf.stack([tf.shape(L)[0], num_samples])
             V = tf.random_normal(shape, dtype=settings.dtypes.float_type)
             samples.append(mu[:, i:i + 1] + tf.matmul(L, V))
-        return tf.transpose(tf.pack(samples))
+        return tf.transpose(tf.stack(samples))
 
     @AutoFlow((float_type, [None, None]))
     def predict_y(self, Xnew):

diff --git a/GPflow/quadrature.py b/GPflow/quadrature.py
@@ -52,13 +52,12 @@ def mvnquad(f, means, covs, H, Din, Dout=()):
 
     # transform points based on Gaussian parameters
     cholXcov = tf.cholesky(covs)  # NxDxD
-    Xt = tf.batch_matmul(cholXcov, tf.tile(xn[None, :, :], (N, 1, 1)),
-                         adj_y=True)  # NxDxH**D
+    Xt = tf.matmul(cholXcov, tf.tile(xn[None, :, :], (N, 1, 1)), transpose_b=True)  # NxDxH**D
     X = 2.0 ** 0.5 * Xt + tf.expand_dims(means, 2)  # NxDxH**D
     Xr = tf.reshape(tf.transpose(X, [2, 0, 1]), (-1, Din))  # (H**D*N)xD
 
     # perform quadrature
     fX = tf.reshape(f(Xr), (H ** Din, N,) + Dout)
     wr = np.reshape(wn * np.pi ** (-Din * 0.5),
-                    (-1,) + (1,)*(1+len(Dout)))
+                    (-1,) + (1,) * (1 + len(Dout)))
     return tf.reduce_sum(fX * wr, 0)