Skip to content

Commit 8fc28ef

Browse files
author
Jairo Rojas Delgado
committed
Initial commit
0 parents  commit 8fc28ef

5 files changed

+1448
-0
lines changed

mallows_hamming.py

+230
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
import numpy as np
2+
import itertools as it
3+
from scipy.optimize import linear_sum_assignment
4+
import mallows_model as mm
5+
6+
7+
8+
#************* Distance **************#
9+
10+
def distance(A, B=None):
11+
"""
12+
This function computes Hamming distance between two permutations.
13+
If only one permutation is given, the distance will be computed with the
14+
identity permutation as the second permutation.
15+
Parameters
16+
----------
17+
A: ndarray
18+
The first permutation
19+
B: ndarray, optional
20+
The second permutation (default is None)
21+
Returns
22+
-------
23+
int
24+
Hamming distance between A and B
25+
"""
26+
if B is None : B = np.arange(len(A))
27+
28+
return sum(A!=B)
29+
30+
31+
def dist_at_uniform(n): return n
32+
33+
34+
#************ Sampling ************#
35+
36+
def sample(m, n, *, theta=None, phi=None, s0=None):
37+
"""This function generates m permutations (rankings) according to Mallows Models.
38+
Parameters
39+
----------
40+
m: int
41+
Number of rankings to generate
42+
n: int
43+
Length of rankings
44+
theta: float, optional (if phi given)
45+
Dispersion parameter theta
46+
phi: float, optional (if theta given)
47+
Dispersion parameter phi
48+
s0: ndarray
49+
Consensus ranking
50+
Returns
51+
-------
52+
ndarray
53+
The rankings generated.
54+
"""
55+
sample = np.zeros((m, n))
56+
theta, phi = mm.check_theta_phi(theta, phi)
57+
58+
facts_ = np.array([1, 1]+[0]*(n-1), dtype=np.float)
59+
deran_num_ = np.array([1, 0]+[0]*(n-1), dtype=np.float)
60+
for i in range(2, n+1):
61+
facts_[i] = facts_[i-1] * i
62+
deran_num_[i] = deran_num_[i-1]*(i-1) + deran_num_[i-2]*(i-1);
63+
hamm_count_ = np.array([ deran_num_[d]*facts_[n] / (facts_[d] * facts_[n - d]) for d in range(n+1)], dtype=np.float)
64+
probsd = np.array([hamm_count_[d] * np.exp(-theta * d) for d in range(n+1)], dtype=np.float)
65+
66+
for m_ in range(m):
67+
target_distance = np.random.choice(n+1,p=probsd/probsd.sum())
68+
sample[m_,:] = sample_at_dist(n, target_distance, s0)
69+
70+
return sample
71+
72+
73+
def sample_at_dist(n, dist, sigma0=None):
74+
"""This function randomly generates a permutation with length n at distance
75+
dist to a given permutation sigma0.
76+
Parameters
77+
----------
78+
n: int
79+
Length of the permutations
80+
dist: int
81+
Distance between the permutation generated randomly and a known
82+
permutation sigma0
83+
sigma0: ndarray, optional
84+
A known permutation (If not given, then it equals the identity)
85+
Returns
86+
-------
87+
ndarray
88+
A random permutation at distance dist to sigma0.
89+
"""
90+
if sigma0 is None: sigma0 = np.arange(n)
91+
sigma = np.zeros(n)-1
92+
fixed_points = np.random.choice(n, n-dist, replace=False)
93+
sigma[fixed_points] = fixed_points
94+
unfix = np.setdiff1d(np.arange(n), fixed_points)
95+
unfix = np.random.permutation(unfix)
96+
for i in range(len(unfix)-1):
97+
sigma[unfix[i]] = unfix[i+1]
98+
if len(unfix) > 0 : sigma[unfix[-1]] = unfix[0]
99+
return sigma[sigma0].astype(int)
100+
101+
#********* Expected distance *********#
102+
103+
def expected_dist_mm(n, theta=None, phi=None):
104+
"""The function computes the expected value of Hamming distance under Mallows Models (MMs).
105+
Parameters
106+
----------
107+
n: int
108+
Length of the permutation in the considered model
109+
theta: float
110+
Real dispersion parameter, optional (if phi is given)
111+
phi: float
112+
Real dispersion parameter, optional (if theta is given)
113+
Returns
114+
-------
115+
float
116+
The expected distance under MMs.
117+
"""
118+
theta, phi = mm.check_theta_phi(theta, phi)
119+
120+
facts_ = np.array([1,1] + [0]*(n-1), dtype=np.float)
121+
for i in range(2, n+1):
122+
facts_[i] = facts_[i-1] * i
123+
x_n_1 , x_n= 0, 0
124+
125+
for k in range(n+1):
126+
aux = (np.exp(theta)-1)**k / facts_[k]
127+
x_n += aux
128+
if k<n: x_n_1 += aux
129+
return (n * x_n - x_n_1 * np.exp( theta )) / x_n
130+
131+
132+
#************ Learning ************#
133+
134+
def median(sample, ws=1):
135+
"""This function computes the central permutation (consensus ranking) given
136+
several permutations using Hungarian algorithm.
137+
Parameters
138+
----------
139+
sample: ndarray
140+
Matrix of several permutations
141+
ws: float optional
142+
weight (not weighted by default)
143+
Returns
144+
-------
145+
ndarray
146+
The central permutation of permutations given
147+
"""
148+
m, n = sample.shape
149+
wmarg = np.zeros((n, n))
150+
for i in range(n):
151+
for j in range(n):
152+
freqs = (sample[:, i]==j)
153+
wmarg[i, j] = (freqs * ws).sum()
154+
row_ind, col_ind = linear_sum_assignment( - wmarg )
155+
156+
return col_ind
157+
158+
159+
def prob(sigma, sigma0, theta=None, phi=None):
160+
""" Probability mass function of a MM with central ranking sigma0 and
161+
dispersion parameter theta/phi.
162+
Parameters
163+
----------
164+
sigma: ndarray
165+
A pemutation
166+
sigma0: ndarray
167+
central permutation
168+
theta: float
169+
Dispersion parameter (optional, if phi is given)
170+
phi: float
171+
Dispersion parameter (optional, if theta is given)
172+
Returns
173+
-------
174+
float
175+
Probability mass function.
176+
"""
177+
theta, phi = mm.check_theta_phi(theta, phi)
178+
d = distance(sigma, sigma0)
179+
n = len(sigma)
180+
facts_ = np.array([1, 1] + [0]*(n-1), dtype=np.float)
181+
182+
for i in range(2, n+1):
183+
facts_[i] = facts_[i-1] * i
184+
sum = 0
185+
for i in range(n+1):
186+
sum += (((np.exp(theta)-1)**i)/facts_[ i ])
187+
psi = sum * np.exp(-n * theta )*facts_[ n ]
188+
return np.exp(-d * theta) / psi
189+
190+
191+
192+
def find_phi(n, dmin, dmax):
193+
""" Find the dispersion parameter phi that gives an expected distance between
194+
dmin and dmax where the length of rankings is n.
195+
Parameters
196+
----------
197+
n: int
198+
Length of permutations
199+
dmin: int
200+
Minimum of expected distance
201+
dmax: int
202+
Maximum of expected distance
203+
Returns
204+
-------
205+
float
206+
The value of phi.
207+
"""
208+
assert dmin < dmax
209+
imin, imax = 0.0, 1.0
210+
iterat = 0
211+
while iterat < 500:
212+
med = (imax + imin) / 2
213+
d = expected_dist_mm(n, phi = med)
214+
215+
if d < dmin: imin = med
216+
elif d > dmax: imax = med
217+
else: return med
218+
iterat += 1
219+
220+
assert False, "Max iterations reached"
221+
222+
223+
224+
225+
226+
227+
228+
229+
230+
# end

0 commit comments

Comments
 (0)