Skip to content

Commit 2cfe33c

Browse files
rcalvo12jmshapir
andauthored
Pull Request for #19: Confidence interval for SK distance (#20)
* Methods for #19 * #19 Add get_sk_ci() * #19 Adding unit test for get_sk_ci() * Ceiling for #20 * #19 Apply suggestions from jms review Co-authored-by: jmshapir <[email protected]> * #19 Capping ub at 1 --------- Co-authored-by: jmshapir <[email protected]>
1 parent 1f327a7 commit 2cfe33c

File tree

4 files changed

+78
-1
lines changed

4 files changed

+78
-1
lines changed

BootstrapReport.lyx

+50
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,56 @@ Here
294294
\end_layout
295295

296296
\end_deeper
297+
\begin_layout Section*
298+
Confidence interval for SK distance
299+
\end_layout
300+
301+
\begin_layout Itemize
302+
For a value
303+
\begin_inset Formula $x\ge0$
304+
\end_inset
305+
306+
let
307+
\begin_inset Formula
308+
\begin{align*}
309+
\overline{x}_{\alpha} & =\min\left\{ x+\sqrt{\frac{\ln\left(2/\alpha\right)}{2J}},1\right\} \\
310+
\underline{x}_{\alpha} & =\max\left\{ x-\sqrt{\frac{\ln\left(2/\alpha\right)}{2J}},0\right\}
311+
\end{align*}
312+
313+
\end_inset
314+
315+
316+
\end_layout
317+
318+
\begin_layout Itemize
319+
We calculate the
320+
\begin_inset CommandInset href
321+
LatexCommand href
322+
name "DKW"
323+
target "https://en.wikipedia.org/wiki/Dvoretzky%E2%80%93Kiefer%E2%80%93Wolfowitz_inequality"
324+
literal "false"
325+
326+
\end_inset
327+
328+
confidence interval whose bounds are
329+
\begin_inset Formula
330+
\[
331+
\min\left\{ \overline{SK}_{\alpha}^{+}\left(N\left(\mu,\sigma^{2}\right),\hat{\eta}\right)+\overline{SK}_{\alpha}^{-}\left(N\left(\mu,\sigma^{2}\right),\hat{\eta}\right),1\right\}
332+
\]
333+
334+
\end_inset
335+
336+
and
337+
\begin_inset Formula
338+
\[
339+
\underline{SK}_{\alpha}^{+}\left(N\left(\mu,\sigma^{2}\right),\hat{\eta}\right)+\underline{SK}_{\alpha}^{-}\left(N\left(\mu,\sigma^{2}\right),\hat{\eta}\right)
340+
\]
341+
342+
\end_inset
343+
344+
345+
\end_layout
346+
297347
\begin_layout Section*
298348
Number of changes in direction in the difference in CDFs
299349
\end_layout

src/BootstrapReport/helpers.py

+16
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,22 @@ def get_sk_dist(rep, normal, sep = False):
138138
else:
139139
return -neg_dist + pos_dist
140140

141+
def get_sk_ci(num_replicates, neg_dist, pos_dist, alpha = 0.05):
142+
'''calculates confidence interval for sk distance
143+
:param num_replicates: number of replicates
144+
:param neg_dist: maximum negative distance
145+
:param post_dist: maximum positive distance
146+
:alpha: 1 - alpha = confidence level for confidence bands
147+
'''
148+
sk_ci_lb = max(pos_dist - np.sqrt(np.log(2/alpha)/(2 * num_replicates)), 0) + \
149+
max(neg_dist - np.sqrt(np.log(2/alpha)/(2 * num_replicates)), 0)
150+
151+
sk_ci_ub = min(pos_dist + np.sqrt(np.log(2/alpha)/(2 * num_replicates)), 1) + \
152+
min(neg_dist + np.sqrt(np.log(2/alpha)/(2 * num_replicates)), 1)
153+
sk_ci_ub = min(sk_ci_ub, 1)
154+
155+
return sk_ci_lb, sk_ci_ub
156+
141157
def select_bandwidth(rot_se, max_grid, min_grid, num_gridpoints,
142158
estimate, se, num_sets, num_replicates, lbound, rbound, first_seed,
143159
num_expansions, max_subdivisions):

src/BootstrapReport/main.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ def __init__(self, estimate, se, replicates):
2525
self.best_bandwidth_value = np.NaN
2626
self.bias_value = None
2727
self.crossings = None
28-
self.sk_dist = helpers.get_sk_dist(self.replicates, norm(loc = estimate, scale = se), sep = False)
28+
self.sk_neg, self.sk_pos = helpers.get_sk_dist(self.replicates, norm(loc = self.estimate, scale = self.se), sep = True)
29+
self.sk_dist = self.sk_neg + self.sk_pos
30+
self.sk_ci_lb, self.sk_ci_ub = helpers.get_sk_ci(len(self.replicates), self.sk_neg, self.sk_pos)
2931

3032
def get_bias_corrected_tvd(self, num_gridpoints=10, num_sets=100, min_grid=-1, max_grid=1,
3133
bounds_of_integration=np.inf, first_seed=42, second_seed=11,

tests/test_BootstrapReport.py

+9
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@ def test_sk_distance():
1717
assert test.neg_dist <= 1 and test.neg_dist >= 0
1818
assert test.pos_dist <= 1 and test.pos_dist >= 0
1919

20+
def test_sk_ci():
21+
test_replicates = pd.read_csv('examples/gamma_replicates.csv')['replicate_value'].values
22+
estimate, standard_error = 0, 1
23+
test = ObjectOfInterest(estimate = estimate, se = standard_error, replicates = test_replicates)
24+
25+
assert test.sk_ci_lb <= test.sk_dist and test.sk_ci_ub >= test.sk_dist
26+
assert test.sk_ci_ub <= 1
27+
assert test.sk_ci_lb >= 0
28+
2029
def test_density_plot():
2130
test_replicates = pd.read_csv('examples/gamma_replicates.csv')['replicate_value'].values
2231
estimate, standard_error = 0, 1

0 commit comments

Comments
 (0)