[FIX] numerical errors in ot.gmm (#690)

samuelbx · Samuel Boïté · rflamary · web-flow · commit 6311e256dd59 · 2024-11-19T09:15:55.000+01:00
* fixed numerical errors in density computations

* lint

* hotfix

* cholesky not useful anymore

* vectorization

* backend tests for slogdet

* update releases.md

* added contribution

---------

Co-authored-by: Samuel Boïté &lt;samuel.boite@polytechnique.edu&gt;
Co-authored-by: Rémi Flamary &lt;remi.flamary@gmail.com&gt;
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -54,6 +54,7 @@ The contributors to this library are:
 * [Sonia Mazelet](https://github.com/SoniaMaz8) (Template based GNN layers)
 * [Laurène David](https://github.com/laudavid) (Low rank sinkhorn, Low rank Gromov-Wasserstein samples)
 * [Julie Delon](https://judelo.github.io/) (GMM OT)
+* [Samuel Boïté](https://samuelbx.github.io/) (GMM OT)
 
 ## Acknowledgments
 
diff --git a/RELEASES.md b/RELEASES.md
@@ -7,6 +7,7 @@
 
 #### Closed issues
 - Fixed `ot.mapping` solvers which depended on deprecated `cvxpy` `ECOS` solver (PR #692, Issue #668)
+- Fixed numerical errors in `ot.gmm` (PR #690, Issue #689)
 
 
 ## 0.9.5
diff --git a/ot/backend.py b/ot/backend.py
@@ -1073,6 +1073,14 @@ def det(self, a):
         """
         raise NotImplementedError()
 
+    def slogdet(self, a):
+        r"""
+        Compute the sign and (natural) logarithm of the determinant of an array.
+
+        See: https://numpy.org/doc/stable/reference/generated/numpy.linalg.slogdet.html
+        """
+        raise NotImplementedError()
+
 
 class NumpyBackend(Backend):
     """
@@ -1433,6 +1441,9 @@ def nan_to_num(self, x, copy=True, nan=0.0, posinf=None, neginf=None):
     def det(self, a):
         return np.linalg.det(a)
 
+    def slogdet(self, a):
+        return np.linalg.slogdet(a)
+
 
 _register_backend_implementation(NumpyBackend)
 
@@ -1826,6 +1837,9 @@ def nan_to_num(self, x, copy=True, nan=0.0, posinf=None, neginf=None):
     def det(self, x):
         return jnp.linalg.det(x)
 
+    def slogdet(self, a):
+        return jnp.linalg.slogdet(a)
+
 
 if jax:
     # Only register jax backend if it is installed
@@ -2359,6 +2373,9 @@ def nan_to_num(self, x, copy=True, nan=0.0, posinf=None, neginf=None):
     def det(self, x):
         return torch.linalg.det(x)
 
+    def slogdet(self, a):
+        return torch.linalg.slogdet(a)
+
 
 if torch:
     # Only register torch backend if it is installed
@@ -2767,6 +2784,9 @@ def nan_to_num(self, x, copy=True, nan=0.0, posinf=None, neginf=None):
     def det(self, x):
         return cp.linalg.det(x)
 
+    def slogdet(self, a):
+        return cp.linalg.slogdet(a)
+
 
 if cp:
     # Only register cp backend if it is installed
@@ -3205,6 +3225,9 @@ def nan_to_num(self, x, copy=True, nan=0.0, posinf=None, neginf=None):
     def det(self, x):
         return tf.linalg.det(x)
 
+    def slogdet(self, a):
+        return tf.linalg.slogdet(a)
+
 
 if tf:
     # Only register tensorflow backend if it is installed
diff --git a/ot/gmm.py b/ot/gmm.py
@@ -16,9 +16,9 @@
 from .gaussian import bures_wasserstein_mapping
 
 
-def gaussian_pdf(x, m, C):
+def gaussian_logpdf(x, m, C):
     r"""
-    Compute the probability density function of a multivariate
+    Compute the log of the probability density function of a multivariate
     Gaussian distribution.
 
     Parameters
@@ -40,10 +40,35 @@ def gaussian_pdf(x, m, C):
         x.shape[-1] == m.shape[-1] == C.shape[-1] == C.shape[-2]
     ), "Dimension mismatch"
     nx = get_backend(x, m, C)
-    d = x.shape[-1]
-    z = (2 * np.pi) ** (-d / 2) * nx.det(C) ** (-0.5)
-    exp = nx.exp(-0.5 * nx.sum(((x - m) @ nx.inv(C)) * (x - m), axis=-1))
-    return z * exp
+    d = m.shape[0]
+    diff = x - m
+    inv_C = nx.inv(C)
+    z = nx.sum(diff * (diff @ inv_C), axis=-1)
+    _, log_det_C = nx.slogdet(C)
+    return -0.5 * (d * np.log(2 * np.pi) + log_det_C + z)
+
+
+def gaussian_pdf(x, m, C):
+    r"""
+    Compute the probability density function of a multivariate
+    Gaussian distribution.
+
+    Parameters
+    ----------
+    x : array-like, shape (..., d)
+        The input samples.
+    m : array-like, shape (d,)
+        The mean vector of the Gaussian distribution.
+    C : array-like, shape (d, d)
+        The covariance matrix of the Gaussian distribution.
+
+    Returns
+    -------
+    pdf : array-like, shape (...,)
+        The probability density function evaluated at each sample.
+
+    """
+    return get_backend(x, m, C).exp(gaussian_logpdf(x, m, C))
 
 
 def gmm_pdf(x, m, C, w):
@@ -281,25 +306,28 @@ def gmm_ot_apply_map(
     n_samples = x.shape[0]
 
     if method == "bary":
-        normalization = gmm_pdf(x, m_s, C_s, w_s)[:, None]
         out = nx.zeros(x.shape)
-        print("where plan > 0", nx.where(plan > 0))
+        logpdf = nx.stack(
+            [gaussian_logpdf(x, m_s[k], C_s[k])[:, None] for k in range(k_s)]
+        )
 
         # only need to compute for non-zero plan entries
         for i, j in zip(*nx.where(plan > 0)):
             Cs12 = nx.sqrtm(C_s[i])
             Cs12inv = nx.inv(Cs12)
-            g = gaussian_pdf(x, m_s[i], C_s[i])[:, None]
 
             M0 = nx.sqrtm(Cs12 @ C_t[j] @ Cs12)
             A = Cs12inv @ M0 @ Cs12inv
             b = m_t[j] - A @ m_s[i]
 
             # gaussian mapping between components i and j applied to x
             T_ij_x = x @ A + b
-            out = out + plan[i, j] * g * T_ij_x
+            z = w_s[:, None, None] * nx.exp(logpdf - logpdf[i][None, :, :])
+            denom = nx.sum(z, axis=0)
 
-        return out / normalization
+            out = out + plan[i, j] * T_ij_x / denom
+
+        return out
 
     else:  # rand
         # A[i, j] is the linear part of the gaussian mapping between components
@@ -318,13 +346,19 @@ def gmm_ot_apply_map(
             A[i, j] = Cs12inv @ M0 @ Cs12inv
             b[i, j] = m_t[j] - A[i, j] @ m_s[i]
 
-        normalization = gmm_pdf(x, m_s, C_s, w_s)  # (n_samples,)
-        gs = np.stack([gaussian_pdf(x, m_s[i], C_s[i]) for i in range(k_s)], axis=-1)
+        logpdf = nx.stack(
+            [gaussian_logpdf(x, m_s[k], C_s[k]) for k in range(k_s)], axis=-1
+        )
         # (n_samples, k_s)
         out = nx.zeros(x.shape)
 
         for i_sample in range(n_samples):
-            p_mat = plan * gs[i_sample][:, None] / normalization[i_sample]
+            log_g = logpdf[i_sample]
+            log_diff = log_g[:, None] - log_g[None, :]
+            weighted_exp = w_s[:, None] * nx.exp(log_diff)
+            denom = nx.sum(weighted_exp, axis=0)[:, None] * nx.ones(plan.shape[1])
+            p_mat = plan / denom
+
             p = p_mat.reshape(k_s * k_t)  # stack line-by-line
             # sample between 0 and k_s * k_t - 1
             ij_mat = rng.choice(k_s * k_t, p=p)
diff --git a/test/test_backend.py b/test/test_backend.py
@@ -271,6 +271,8 @@ def test_empty_backend():
         nx.eigh(M)
     with pytest.raises(NotImplementedError):
         nx.det(M)
+    with pytest.raises(NotImplementedError):
+        nx.slogdet(M)
 
 
 def test_func_backends(nx):
@@ -691,6 +693,11 @@ def test_func_backends(nx):
         lst_b.append(nx.to_numpy(d))
         lst_name.append("det")
 
+        s, logabsd = nx.slogdet(M1b)
+        s, logabsd = nx.to_numpy(s), nx.to_numpy(logabsd)
+        lst_b.append(np.array([s, logabsd]))
+        lst_name.append("slogdet")
+
         assert not nx.array_equal(Mb, vb), "array_equal (shape)"
         assert nx.array_equal(Mb, Mb), "array_equal (elements) - expected true"
         assert not nx.array_equal(