-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdfMax.c
305 lines (247 loc) · 10 KB
/
pdfMax.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
/*----------------------------------------------------------------------+
| |
| pdfMax.c -- Maximum of multiple Gaussians |
| |
+----------------------------------------------------------------------*/
/*
* Copyright (C) 2015, Marcel van Kervinck
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*----------------------------------------------------------------------+
| Includes |
+----------------------------------------------------------------------*/
#define _XOPEN_SOURCE // for M_SQRT2
// Standard includes
#include <assert.h>
#include <float.h>
#include <math.h>
#include <string.h>
// Own include
#include "pdfMax.h"
/*----------------------------------------------------------------------+
| Functions |
+----------------------------------------------------------------------*/
/*----------------------------------------------------------------------+
| PDF / PDF1 |
+----------------------------------------------------------------------*/
static const double normC = -0.91893853320467267; // -log(sqrt(2.0 * M_PI))
#if 0
/*
* Normal distribution with mu = 0
*/
static double PDF(double x, double sigma)
{
double xs = x / sigma;
return exp(normC - 0.5 * xs * xs) / sigma;
}
#endif
/*
* Standard normal distribution
*/
static double PDF1(double x)
{
return exp(normC - 0.5 * x * x);
}
/*----------------------------------------------------------------------+
| CDF / CDF1 |
+----------------------------------------------------------------------*/
/*
* Cumulative normal distribution for mu = 0
*/
static double CDF(double x, double sigma)
{
return 0.5 * erfc(-x / (sigma * M_SQRT2));
}
/*
* Cumulative standard normal distribution
*/
static double CDF1(double x)
{
return 0.5 * erfc(-x / M_SQRT2);
}
/*----------------------------------------------------------------------+
| pdfMax |
+----------------------------------------------------------------------*/
/*
* Helper to perform numeric integration for pdfMax
*/
static int calcMomentsAndOdds(
double pdfList[][2], int n, double epsilon,
double a, double CDFa[],
double b, double CDFb[],
double moments[3], double odds[])
{
assert(a < b);
double m = 0.5 * (a + b);
double CDFm[n];
double sumErrors = 0.0;
for (int i=0; i<n; i++) {
CDFm[i] = CDF(m - pdfList[i][0], pdfList[i][1]);
// Use sum as good estimate of "1 - prod(1 - e for e errors)"
sumErrors += fabs(0.5 * (CDFa[i] + CDFb[i]) - CDFm[i]);
}
if (sumErrors <= epsilon) {
double S[n];
double productS = 1.0;
for (int i=0; i<n; i++) {
S[i] = (CDFa[i] + 4.0 * CDFm[i] + CDFb[i]) / 6.0; // Simpson's rule
productS *= S[i];
}
double Ex0 = 0.0;
for (int i=0; i<n; i++) {
/*
* Effectively calculate
* p = PDF(m - mu_i, sigma_i)
* * prod(CDF(m - mu_j, sigma_j) for j != i)
* * (b - a)
*/
double p = (CDFb[i] - CDFa[i]) * productS / S[i];
odds[i] += p;
Ex0 += p;
}
moments[0] += Ex0;
moments[1] += Ex0 * m;
moments[2] += Ex0 * m * m;
return 1;
} else {
// Recursion on the split interval
return calcMomentsAndOdds(pdfList, n, epsilon, a, CDFa, m, CDFm, moments, odds)
+ calcMomentsAndOdds(pdfList, n, epsilon, m, CDFm, b, CDFb, moments, odds);
}
}
/*
* Calculate the PDF of the max and the odds that each item is best.
* Give an exact result for n<=2, and use numerical integration upto the
* given epsilon for n>=3.
* Return the number of segments used (0 means exact result). This can be
* used to learn about the runtime.
*/
int pdfMax(double pdfList[][2], int n, double epsilon,
double *mu, double *sigma, double odds[])
{
assert(n > 0);
assert(epsilon > 0.0);
if (n == 1) {
/*
* The most basic case
*/
*mu = pdfList[0][0];
*sigma = pdfList[0][1];
odds[0] = 1.0;
return 0;
}
if (n == 2) {
/*
* Clark's two-value exact formula, see also
* http://www.eecs.berkeley.edu/~alanmi/research/timing/papers/clark1961.pdf
*/
double mu1 = pdfList[0][0];
double sigma1 = pdfList[0][1];
double mu2 = pdfList[1][0];
double sigma2 = pdfList[1][1];
double theta = sqrt(sigma1*sigma1 + sigma2*sigma2);
double alpha = (mu1 - mu2) / theta;
double Ex1 = mu1 * CDF1( alpha)
+ mu2 * CDF1(-alpha)
+ theta * PDF1(alpha);
double Ex2 = (sigma1*sigma1 + mu1*mu1) * CDF1( alpha)
+ (sigma2*sigma2 + mu2*mu2) * CDF1(-alpha)
+ (mu1 + mu2) * theta * PDF1(alpha);
*mu = Ex1;
*sigma = sqrt(Ex2 - Ex1*Ex1);
odds[0] = CDF(mu1 - mu2, theta); // First odds is simply P(A-B) > 0
odds[1] = 1.0 - odds[0];
return 0;
}
/*
* Estimate relative size of required integration range for given epsilon
*/
double k = 2.0 + (log(n) - log(epsilon)) / 5.0;
if (k < 1.0) k = 1.0;
assert(n * CDF1(-k) <= epsilon);
/*
* Determine the boundaries of the integration intervals
*/
double a = -DBL_MAX, b = -DBL_MAX, m = -DBL_MAX;
for (int i=0; i<n; i++) {
double kSigma = k * pdfList[i][1];
// Lower bound
double ai = pdfList[i][0] - kSigma;
if (ai > a) a = ai;
// Upper bound
double bi = pdfList[i][0] + kSigma;
if (bi > b) b = bi;
// Rough center of mass
double mi = pdfList[i][0];
if (mi > m) m = mi;
}
/*
* Shift pdfList by -m to reduce the error of moments[2]
*/
double shifted[n][2];
for (int i=0; i<n; i++) {
shifted[i][0] = pdfList[i][0] - m;
shifted[i][1] = pdfList[i][1];
}
/*
* CDFs at the interval ends
*/
double CDFa[n], CDFb[n], CDFm[n];
for (int i=0; i<n; i++) {
CDFa[i] = CDF(a - pdfList[i][0], pdfList[i][1]);
CDFb[i] = CDF(b - pdfList[i][0], pdfList[i][1]);
CDFm[i] = CDF(m - pdfList[i][0], pdfList[i][1]);
}
/*
* Reset the result accumulators
*/
double moments[] = {0.0, 0.0, 0.0};
memset(odds, 0, n * sizeof(odds[0]));
/*
* Calculate the integral numerically
* Pre-splitting at max(mu) avoids some of the trivial early termination
* problems due to the point symmetry of the CDF function around 0.
* Clip epsilon to avoid sqrt to fail
*/
if (epsilon > 0.25) epsilon = 0.25;
int nrSegments = calcMomentsAndOdds(shifted, n, epsilon, a - m, CDFa, 0.0, CDFm, moments, odds)
+ calcMomentsAndOdds(shifted, n, epsilon, 0.0, CDFm, b - m, CDFb, moments, odds);
/*
* Convert the outputs and return
*/
*mu = moments[1] / moments[0] + m;
*sigma = sqrt(moments[2] - moments[1] * moments[1]);
for (int i=0; i<n; i++) {
odds[i] /= moments[0];
}
return nrSegments;
}
/*----------------------------------------------------------------------+
| |
+----------------------------------------------------------------------*/