1
+ import numpy as np
2
+ import itertools as it
3
+ from scipy .optimize import linear_sum_assignment
4
+ import mallows_model as mm
5
+
6
+
7
+
8
+ #************* Distance **************#
9
+
10
+ def distance (A , B = None ):
11
+ """
12
+ This function computes Hamming distance between two permutations.
13
+ If only one permutation is given, the distance will be computed with the
14
+ identity permutation as the second permutation.
15
+ Parameters
16
+ ----------
17
+ A: ndarray
18
+ The first permutation
19
+ B: ndarray, optional
20
+ The second permutation (default is None)
21
+ Returns
22
+ -------
23
+ int
24
+ Hamming distance between A and B
25
+ """
26
+ if B is None : B = np .arange (len (A ))
27
+
28
+ return sum (A != B )
29
+
30
+
31
+ def dist_at_uniform (n ): return n
32
+
33
+
34
+ #************ Sampling ************#
35
+
36
+ def sample (m , n , * , theta = None , phi = None , s0 = None ):
37
+ """This function generates m permutations (rankings) according to Mallows Models.
38
+ Parameters
39
+ ----------
40
+ m: int
41
+ Number of rankings to generate
42
+ n: int
43
+ Length of rankings
44
+ theta: float, optional (if phi given)
45
+ Dispersion parameter theta
46
+ phi: float, optional (if theta given)
47
+ Dispersion parameter phi
48
+ s0: ndarray
49
+ Consensus ranking
50
+ Returns
51
+ -------
52
+ ndarray
53
+ The rankings generated.
54
+ """
55
+ sample = np .zeros ((m , n ))
56
+ theta , phi = mm .check_theta_phi (theta , phi )
57
+
58
+ facts_ = np .array ([1 , 1 ]+ [0 ]* (n - 1 ), dtype = np .float )
59
+ deran_num_ = np .array ([1 , 0 ]+ [0 ]* (n - 1 ), dtype = np .float )
60
+ for i in range (2 , n + 1 ):
61
+ facts_ [i ] = facts_ [i - 1 ] * i
62
+ deran_num_ [i ] = deran_num_ [i - 1 ]* (i - 1 ) + deran_num_ [i - 2 ]* (i - 1 );
63
+ hamm_count_ = np .array ([ deran_num_ [d ]* facts_ [n ] / (facts_ [d ] * facts_ [n - d ]) for d in range (n + 1 )], dtype = np .float )
64
+ probsd = np .array ([hamm_count_ [d ] * np .exp (- theta * d ) for d in range (n + 1 )], dtype = np .float )
65
+
66
+ for m_ in range (m ):
67
+ target_distance = np .random .choice (n + 1 ,p = probsd / probsd .sum ())
68
+ sample [m_ ,:] = sample_at_dist (n , target_distance , s0 )
69
+
70
+ return sample
71
+
72
+
73
+ def sample_at_dist (n , dist , sigma0 = None ):
74
+ """This function randomly generates a permutation with length n at distance
75
+ dist to a given permutation sigma0.
76
+ Parameters
77
+ ----------
78
+ n: int
79
+ Length of the permutations
80
+ dist: int
81
+ Distance between the permutation generated randomly and a known
82
+ permutation sigma0
83
+ sigma0: ndarray, optional
84
+ A known permutation (If not given, then it equals the identity)
85
+ Returns
86
+ -------
87
+ ndarray
88
+ A random permutation at distance dist to sigma0.
89
+ """
90
+ if sigma0 is None : sigma0 = np .arange (n )
91
+ sigma = np .zeros (n )- 1
92
+ fixed_points = np .random .choice (n , n - dist , replace = False )
93
+ sigma [fixed_points ] = fixed_points
94
+ unfix = np .setdiff1d (np .arange (n ), fixed_points )
95
+ unfix = np .random .permutation (unfix )
96
+ for i in range (len (unfix )- 1 ):
97
+ sigma [unfix [i ]] = unfix [i + 1 ]
98
+ if len (unfix ) > 0 : sigma [unfix [- 1 ]] = unfix [0 ]
99
+ return sigma [sigma0 ].astype (int )
100
+
101
+ #********* Expected distance *********#
102
+
103
+ def expected_dist_mm (n , theta = None , phi = None ):
104
+ """The function computes the expected value of Hamming distance under Mallows Models (MMs).
105
+ Parameters
106
+ ----------
107
+ n: int
108
+ Length of the permutation in the considered model
109
+ theta: float
110
+ Real dispersion parameter, optional (if phi is given)
111
+ phi: float
112
+ Real dispersion parameter, optional (if theta is given)
113
+ Returns
114
+ -------
115
+ float
116
+ The expected distance under MMs.
117
+ """
118
+ theta , phi = mm .check_theta_phi (theta , phi )
119
+
120
+ facts_ = np .array ([1 ,1 ] + [0 ]* (n - 1 ), dtype = np .float )
121
+ for i in range (2 , n + 1 ):
122
+ facts_ [i ] = facts_ [i - 1 ] * i
123
+ x_n_1 , x_n = 0 , 0
124
+
125
+ for k in range (n + 1 ):
126
+ aux = (np .exp (theta )- 1 )** k / facts_ [k ]
127
+ x_n += aux
128
+ if k < n : x_n_1 += aux
129
+ return (n * x_n - x_n_1 * np .exp ( theta )) / x_n
130
+
131
+
132
+ #************ Learning ************#
133
+
134
+ def median (sample , ws = 1 ):
135
+ """This function computes the central permutation (consensus ranking) given
136
+ several permutations using Hungarian algorithm.
137
+ Parameters
138
+ ----------
139
+ sample: ndarray
140
+ Matrix of several permutations
141
+ ws: float optional
142
+ weight (not weighted by default)
143
+ Returns
144
+ -------
145
+ ndarray
146
+ The central permutation of permutations given
147
+ """
148
+ m , n = sample .shape
149
+ wmarg = np .zeros ((n , n ))
150
+ for i in range (n ):
151
+ for j in range (n ):
152
+ freqs = (sample [:, i ]== j )
153
+ wmarg [i , j ] = (freqs * ws ).sum ()
154
+ row_ind , col_ind = linear_sum_assignment ( - wmarg )
155
+
156
+ return col_ind
157
+
158
+
159
+ def prob (sigma , sigma0 , theta = None , phi = None ):
160
+ """ Probability mass function of a MM with central ranking sigma0 and
161
+ dispersion parameter theta/phi.
162
+ Parameters
163
+ ----------
164
+ sigma: ndarray
165
+ A pemutation
166
+ sigma0: ndarray
167
+ central permutation
168
+ theta: float
169
+ Dispersion parameter (optional, if phi is given)
170
+ phi: float
171
+ Dispersion parameter (optional, if theta is given)
172
+ Returns
173
+ -------
174
+ float
175
+ Probability mass function.
176
+ """
177
+ theta , phi = mm .check_theta_phi (theta , phi )
178
+ d = distance (sigma , sigma0 )
179
+ n = len (sigma )
180
+ facts_ = np .array ([1 , 1 ] + [0 ]* (n - 1 ), dtype = np .float )
181
+
182
+ for i in range (2 , n + 1 ):
183
+ facts_ [i ] = facts_ [i - 1 ] * i
184
+ sum = 0
185
+ for i in range (n + 1 ):
186
+ sum += (((np .exp (theta )- 1 )** i )/ facts_ [ i ])
187
+ psi = sum * np .exp (- n * theta )* facts_ [ n ]
188
+ return np .exp (- d * theta ) / psi
189
+
190
+
191
+
192
+ def find_phi (n , dmin , dmax ):
193
+ """ Find the dispersion parameter phi that gives an expected distance between
194
+ dmin and dmax where the length of rankings is n.
195
+ Parameters
196
+ ----------
197
+ n: int
198
+ Length of permutations
199
+ dmin: int
200
+ Minimum of expected distance
201
+ dmax: int
202
+ Maximum of expected distance
203
+ Returns
204
+ -------
205
+ float
206
+ The value of phi.
207
+ """
208
+ assert dmin < dmax
209
+ imin , imax = 0.0 , 1.0
210
+ iterat = 0
211
+ while iterat < 500 :
212
+ med = (imax + imin ) / 2
213
+ d = expected_dist_mm (n , phi = med )
214
+
215
+ if d < dmin : imin = med
216
+ elif d > dmax : imax = med
217
+ else : return med
218
+ iterat += 1
219
+
220
+ assert False , "Max iterations reached"
221
+
222
+
223
+
224
+
225
+
226
+
227
+
228
+
229
+
230
+ # end
0 commit comments