Wasserstein defined as the cost itself (do not return transportation matrix)

rtavenar · rtavenar · commit c92e595009ad · 2019-06-27T11:08:15.000+02:00
diff --git a/ot/__init__.py b/ot/__init__.py
@@ -23,7 +23,7 @@
 from . import unbalanced
 
 # OT functions
-from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d, wasserstein2_1d
+from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d
 from .bregman import sinkhorn, sinkhorn2, barycenter
 from .unbalanced import sinkhorn_unbalanced, barycenter_unbalanced
 from .da import sinkhorn_lpl1_mm
@@ -35,6 +35,6 @@
 
 __all__ = ["emd", "emd2", 'emd_1d', "sinkhorn", "sinkhorn2", "utils", 'datasets',
            'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
-           'emd_1d', 'emd2_1d', 'wasserstein_1d', 'wasserstein2_1d',
+           'emd_1d', 'emd2_1d', 'wasserstein_1d',
            'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim',
            'sinkhorn_unbalanced', "barycenter_unbalanced"]
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
@@ -21,7 +21,7 @@
 from ..utils import dist
 
 __all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
-         'emd_1d', 'emd2_1d', 'wasserstein_1d', 'wasserstein2_1d']
+         'emd_1d', 'emd2_1d', 'wasserstein_1d']
 
 
 def emd(a, b, M, numItermax=100000, log=False):
@@ -529,9 +529,9 @@ def emd2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
     return cost
 
 
-def wasserstein_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
+def wasserstein_1d(x_a, x_b, a=None, b=None, p=1.):
     """Solves the p-Wasserstein distance problem between 1d measures and returns
-    the OT matrix
+    the distance
 
 
     .. math::
@@ -560,22 +560,11 @@ def wasserstein_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
         Target histogram (default is uniform weight)
     p: float, optional (default=1.0)
          The order of the p-Wasserstein distance to be computed
-    dense: boolean, optional (default=True)
-        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
-        Otherwise returns a sparse representation using scipy's `coo_matrix`
-        format. Due to implementation details, this function runs faster when
-        `'sqeuclidean'`, `'minkowski'`, `'cityblock'`,  or `'euclidean'` metrics
-        are used.
-    log: boolean, optional (default=False)
-        If True, returns a dictionary containing the cost.
-        Otherwise returns only the optimal transportation matrix.
 
     Returns
     -------
-    gamma: (ns, nt) ndarray
-        Optimal transportation matrix for the given parameters
-    log: dict
-        If input log is True, a dictionary containing the cost
+    dist: float
+        p-Wasserstein distance
 
 
     Examples
@@ -590,96 +579,8 @@ def wasserstein_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
     >>> x_a = [2., 0.]
     >>> x_b = [0., 3.]
     >>> ot.wasserstein_1d(x_a, x_b, a, b)
-    array([[0. ,  0.5],
-           [0.5,  0. ]])
-    >>> ot.wasserstein_1d(x_a, x_b)
-    array([[0. ,  0.5],
-           [0.5,  0. ]])
-
-    References
-    ----------
-
-    .. [1]  Peyré, G., & Cuturi, M. (2017). "Computational Optimal
-        Transport", 2018.
-
-    See Also
-    --------
-    ot.lp.emd_1d : EMD for 1d distributions
-    ot.lp.wasserstein2_1d : Wasserstein for 1d distributions (returns the cost
-        instead of the transportation matrix)
-    """
-    if log:
-        G, log = emd_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
-                        dense=dense, log=log)
-        log['cost'] = np.power(log['cost'], 1. / p)
-        return G, log
-    return emd_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
-                  dense=dense, log=log)
-
-
-def wasserstein2_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
-    """Solves the p-Wasserstein distance problem between 1d measures and returns
-    the loss
-
-
-    .. math::
-        \gamma = arg\min_\gamma \left( \sum_i \sum_j \gamma_{ij}
-            |x_a[i] - x_b[j]|^p \\right)^{1/p}
-
-        s.t. \gamma 1 = a,
-             \gamma^T 1= b,
-             \gamma\geq 0
-    where :
-
-    - x_a and x_b are the samples
-    - a and b are the sample weights
-
-    Uses the algorithm detailed in [1]_
-
-    Parameters
-    ----------
-    x_a : (ns,) or (ns, 1) ndarray, float64
-        Source dirac locations (on the real line)
-    x_b : (nt,) or (ns, 1) ndarray, float64
-        Target dirac locations (on the real line)
-    a : (ns,) ndarray, float64, optional
-        Source histogram (default is uniform weight)
-    b : (nt,) ndarray, float64, optional
-        Target histogram (default is uniform weight)
-    p: float, optional (default=1.0)
-         The order of the p-Wasserstein distance to be computed
-    dense: boolean, optional (default=True)
-        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
-        Otherwise returns a sparse representation using scipy's `coo_matrix`
-        format. Only used if log is set to True. Due to implementation details,
-        this function runs faster when dense is set to False.
-    log: boolean, optional (default=False)
-        If True, returns a dictionary containing the transportation matrix.
-        Otherwise returns only the loss.
-
-    Returns
-    -------
-    loss: float
-        Cost associated to the optimal transportation
-    log: dict
-        If input log is True, a dictionary containing the Optimal transportation
-        matrix for the given parameters
-
-
-    Examples
-    --------
-
-    Simple example with obvious solution. The function wasserstein2_1d accepts
-    lists and performs automatic conversion to numpy arrays
-
-    >>> import ot
-    >>> a=[.5, .5]
-    >>> b=[.5, .5]
-    >>> x_a = [2., 0.]
-    >>> x_b = [0., 3.]
-    >>> ot.wasserstein2_1d(x_a, x_b, a, b)
     0.5
-    >>> ot.wasserstein2_1d(x_a, x_b)
+    >>> ot.wasserstein_1d(x_a, x_b)
     0.5
 
     References
@@ -690,14 +591,8 @@ def wasserstein2_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
 
     See Also
     --------
-    ot.lp.emd2_1d : EMD for 1d distributions
-    ot.lp.wasserstein_1d : Wasserstein for 1d distributions (returns the
-        transportation matrix instead of the cost)
+    ot.lp.emd_1d : EMD for 1d distributions
     """
-    if log:
-        cost, log = emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
-                            dense=dense, log=log)
-        cost = np.power(cost, 1. / p)
-        return cost, log
-    return np.power(emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
-                            dense=dense, log=log), 1. /  p)
+    cost_emd = emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
+                       dense=False, log=False)
+    return np.power(cost_emd, 1. / p)
diff --git a/test/test_ot.py b/test/test_ot.py
@@ -98,15 +98,11 @@ def test_wass_1d():
     G, log = ot.emd([], [], M, log=True)
     wass = log["cost"]
 
-    G_1d, log = ot.wasserstein_1d(u, v, [], [], p=2., log=True)
-    wass1d = log["cost"]
+    wass1d = ot.wasserstein_1d(u, v, [], [], p=2.)
 
     # check loss is similar
     np.testing.assert_allclose(np.sqrt(wass), wass1d)
 
-    # check G is similar
-    np.testing.assert_allclose(G, G_1d)
-
 
 def test_emd_empty():
     # test emd and emd2 for simple identity