ml-opt
diff --git a/‎mallows_hamming.py
+230 b/‎mallows_hamming.py
+230
@@ -0,0 +1,230 @@
+import numpy as np
+import itertools as it
+from scipy.optimize import linear_sum_assignment
+import mallows_model as mm
+
+
+
+#************* Distance **************#
+
+def distance(A, B=None):
+    """
+    This function computes Hamming distance between two permutations.
+    If only one permutation is given, the distance will be computed with the
+    identity permutation as the second permutation.
+   Parameters
+   ----------
+   A: ndarray
+        The first permutation
+   B: ndarray, optional
+        The second permutation (default is None)
+   Returns
+   -------
+   int
+        Hamming distance between A and B
+    """
+    if B is None : B = np.arange(len(A))
+
+    return sum(A!=B)
+
+
+def dist_at_uniform(n): return n
+
+
+#************ Sampling ************#
+
+def sample(m, n, *, theta=None, phi=None, s0=None):
+    """This function generates m permutations (rankings) according to Mallows Models.
+        Parameters
+        ----------
+        m: int
+            Number of rankings to generate
+        n: int
+            Length of rankings
+        theta: float, optional (if phi given)
+            Dispersion parameter theta
+        phi: float, optional (if theta given)
+            Dispersion parameter phi
+        s0: ndarray
+            Consensus ranking
+        Returns
+        -------
+        ndarray
+            The rankings generated.
+    """
+    sample = np.zeros((m, n))
+    theta, phi = mm.check_theta_phi(theta, phi)
+
+    facts_ = np.array([1, 1]+[0]*(n-1), dtype=np.float)
+    deran_num_ = np.array([1, 0]+[0]*(n-1), dtype=np.float)
+    for i in range(2, n+1):
+        facts_[i] = facts_[i-1] * i
+        deran_num_[i] = deran_num_[i-1]*(i-1) + deran_num_[i-2]*(i-1);
+    hamm_count_ = np.array([ deran_num_[d]*facts_[n] / (facts_[d] * facts_[n - d]) for d in range(n+1)], dtype=np.float)
+    probsd = np.array([hamm_count_[d] * np.exp(-theta * d) for d in range(n+1)], dtype=np.float)
+
+    for m_ in range(m):
+        target_distance = np.random.choice(n+1,p=probsd/probsd.sum())
+        sample[m_,:] = sample_at_dist(n, target_distance, s0)
+
+    return sample
+
+
+def sample_at_dist(n, dist, sigma0=None):
+    """This function randomly generates a permutation with length n at distance
+    dist to a given permutation sigma0.
+        Parameters
+        ----------
+        n: int
+            Length of the permutations
+        dist: int
+            Distance between the permutation generated randomly and a known
+            permutation sigma0
+        sigma0: ndarray, optional
+            A known permutation (If not given, then it equals the identity)
+        Returns
+        -------
+        ndarray
+            A random permutation at distance dist to sigma0.
+    """
+    if sigma0 is None: sigma0 = np.arange(n)
+    sigma = np.zeros(n)-1
+    fixed_points = np.random.choice(n, n-dist, replace=False)
+    sigma[fixed_points] = fixed_points
+    unfix = np.setdiff1d(np.arange(n), fixed_points)
+    unfix = np.random.permutation(unfix)
+    for i in range(len(unfix)-1):
+        sigma[unfix[i]] = unfix[i+1]
+    if len(unfix) > 0 : sigma[unfix[-1]] = unfix[0]
+    return sigma[sigma0].astype(int)
+
+#********* Expected distance *********#
+
+def expected_dist_mm(n, theta=None, phi=None):
+    """The function computes the expected value of Hamming distance under Mallows Models (MMs).
+        Parameters
+        ----------
+        n: int
+            Length of the permutation in the considered model
+        theta: float
+            Real dispersion parameter, optional (if phi is given)
+        phi: float
+            Real dispersion parameter, optional (if theta is given)
+        Returns
+        -------
+        float
+            The expected distance under MMs.
+    """
+    theta, phi = mm.check_theta_phi(theta, phi)
+
+    facts_ = np.array([1,1] + [0]*(n-1), dtype=np.float)
+    for i in range(2, n+1):
+        facts_[i] = facts_[i-1] * i
+    x_n_1 , x_n= 0, 0
+
+    for k in range(n+1):
+        aux = (np.exp(theta)-1)**k / facts_[k]
+        x_n += aux
+        if k<n: x_n_1 += aux
+    return (n * x_n - x_n_1 * np.exp( theta )) / x_n
+
+
+#************ Learning ************#
+
+def median(sample, ws=1):
+    """This function computes the central permutation (consensus ranking) given
+    several permutations using Hungarian algorithm.
+        Parameters
+        ----------
+        sample: ndarray
+            Matrix of several permutations
+        ws: float optional
+            weight (not weighted by default)
+        Returns
+        -------
+        ndarray
+            The central permutation of permutations given
+    """
+    m, n = sample.shape
+    wmarg = np.zeros((n, n))
+    for i in range(n):
+      for j in range(n):
+        freqs = (sample[:, i]==j)
+        wmarg[i, j] = (freqs * ws).sum()
+    row_ind, col_ind  = linear_sum_assignment( - wmarg )
+
+    return col_ind
+
+
+def prob(sigma, sigma0, theta=None, phi=None):
+    """ Probability mass function of a MM with central ranking sigma0 and
+    dispersion parameter theta/phi.
+    Parameters
+    ----------
+    sigma: ndarray
+        A pemutation
+    sigma0: ndarray
+        central permutation
+    theta: float
+        Dispersion parameter (optional, if phi is given)
+    phi: float
+        Dispersion parameter (optional, if theta is given)
+    Returns
+    -------
+    float
+        Probability mass function.
+    """
+    theta, phi = mm.check_theta_phi(theta, phi)
+    d = distance(sigma, sigma0)
+    n = len(sigma)
+    facts_ = np.array([1, 1] + [0]*(n-1), dtype=np.float)
+
+    for i in range(2, n+1):
+        facts_[i] = facts_[i-1] * i
+    sum = 0
+    for i in range(n+1):
+        sum += (((np.exp(theta)-1)**i)/facts_[ i ])
+    psi = sum * np.exp(-n * theta )*facts_[ n ]
+    return np.exp(-d * theta) / psi
+
+
+
+def find_phi(n, dmin, dmax):
+    """ Find the dispersion parameter phi that gives an expected distance between
+    dmin and dmax where the length of rankings is n.
+    Parameters
+    ----------
+    n: int
+        Length of permutations
+    dmin: int
+        Minimum of expected distance
+    dmax: int
+        Maximum of expected distance
+    Returns
+    -------
+    float
+        The value of phi.
+    """
+    assert dmin < dmax
+    imin, imax = 0.0, 1.0
+    iterat = 0
+    while iterat < 500:
+        med = (imax + imin) / 2
+        d = expected_dist_mm(n, phi = med)
+
+        if d < dmin: imin = med
+        elif d > dmax: imax = med
+        else: return med
+        iterat  += 1
+
+    assert False, "Max iterations reached"
+
+
+
+
+
+
+
+
+
+    # end