@@ -3,13 +3,7 @@ library("repr")
3
3
options(warn=-1)# repr.plot.width=14, repr.plot.height=8)
4
4
```
5
5
6
- TO DO
7
-
8
- - introduction about methods, few formulas
9
- - algorithm presentation
10
- - generate data, comparison with "normal" histogram
11
- - application on spectrum
12
- - different prior studies
6
+ Algorithm
13
7
14
8
``` {r}
15
9
bayesian_blocks <- function(xs, prior=7.61, nn_vec=rep(1,length(xs))){
@@ -73,6 +67,8 @@ bayesian_blocks <- function(xs, prior=7.61, nn_vec=rep(1,length(xs))){
73
67
}
74
68
```
75
69
70
+ Test dataset
71
+
76
72
``` {r}
77
73
test <- c(rnorm(1000, 0, 1), rgamma(1000, 9, 2))
78
74
test <- test[(test > -5) & (test < 10)]
@@ -94,19 +90,7 @@ h_bb <- hist(test, breaks=cp_h, freq=FALSE, col=rgb(1,0,0,0), add=TRUE)
94
90
95
91
```
96
92
97
- Prior studies
98
-
99
- The needed ncp_prior–p0 relationship is easily found by
100
- noting that the rates of correct and incorrect responses to
101
- fluctuations in simulated pure noise can be controlled by
102
- adjusting the value of ncp_prior. The procedure is: generate a
103
- synthetic pure noise time series; apply the algorithm for a range
104
- of ncp_prior; and select the smallest value that yields false
105
- detection frequency equal or less than the desired rate, such
106
- as 0.05. The values of ncp_prior determined in this way are
107
- averaged over a large number of realizations of the random data.
108
- The result depends on only the number of data points and the
109
- adopted value of p0:
93
+ Test priors
110
94
111
95
``` {r}
112
96
ncp_prior <- function(p0, N){ 4 - log(73.53 * p0 * N^(-0.478))}
@@ -126,11 +110,7 @@ for (i in 1:length(p0)){
126
110
127
111
```
128
112
129
- ``` {r}
130
-
131
- ```
132
-
133
- Import data
113
+ Import "real" dataset
134
114
135
115
``` {r}
136
116
data <- read.table("./Data/B19036_AmCsCo_20180316.dat", skip=2)
@@ -159,37 +139,7 @@ ncp_prior <- c(1, 1.3, 2, 2.5, 3.2, 4)
159
139
160
140
```
161
141
162
- Algorithm
163
-
164
- ``` {r}
165
- #create histogram
166
-
167
- rebin_bb <- function(bins, counts, change_points){
168
-
169
- rebin <- NULL
170
- y <- 0
171
- dn <- bins[2]-bins[1]
172
- n <- 1
173
-
174
- N <- length(bins)
175
- start <- bins[1]
176
- stop <- bins[N]
177
-
178
- mids <- c(0.5*(bins[2:N]+bins[1:N-1]), stop)
179
-
180
- for (i in 1:(length(counts))){
181
- y <- y + (dn*counts[i])
182
- ifelse( mids[i] %in% change_points,
183
- {y <- y/n
184
- rebin <- c(rebin, y)
185
- y <- 0
186
- n <- 1},
187
- n <- n+1
188
- )
189
- }
190
- return(rebin)
191
- }
192
- ```
142
+ Algorithm to rebin
193
143
194
144
``` {r}
195
145
#create histogram
@@ -229,181 +179,3 @@ lines(cp, rebin, type='s', col='red')
229
179
``` {r}
230
180
231
181
```
232
-
233
- ``` {r}
234
-
235
- ```
236
-
237
- ``` {r}
238
-
239
- ```
240
-
241
- ``` {r}
242
- par(mfrow=c(3,2), mar=c(3.5,3.5,0.5,0.5), oma=c(0.1,0.1,0.1,0.5), mgp=c(2.0,0.8,0))
243
-
244
- for(i in 1:length(ncp_prior)) {
245
- cp <- bayesian_blocks(blocks = block_length, data = nn_vec, prior = ncp_prior[i])
246
- h <- rebin_bb(data = data[,1], change_points = cp)
247
- plot(block_length[length(block_length):2], data[,1], col= 'grey',
248
- type='s', log='y', lwd=0.1, main=ncp_prior[i])
249
- lines(cp, h, col='red', type='s', lwd=2)
250
- }
251
-
252
- ```
253
-
254
- ``` {r}
255
-
256
- ```
257
-
258
- ``` {r}
259
-
260
- ```
261
-
262
- ``` {r}
263
-
264
- ```
265
-
266
- ``` {r}
267
-
268
- ```
269
-
270
- ``` {r}
271
-
272
- ```
273
-
274
- ``` {r}
275
-
276
- ```
277
-
278
- ``` {r}
279
-
280
- ```
281
-
282
- ``` {r}
283
- # ---------------------------------------------
284
- # Start with first data cell; add one cell at
285
- # each iteration
286
- # ---------------------------------------------
287
- best <- NULL
288
- last <- NULL
289
- supp <- NULL
290
-
291
- for (R in 1:8193){
292
- # Compute fit_vec : fitness of putative last block (end at R)
293
- arg_log <- block_length[1:R] - block_length[R+1]
294
- arg_log[arg_log <= 0] <- Inf
295
-
296
- nn_cum_vec <- cumsum(nn_vec[R:1])
297
- nn_cum_vec <- nn_cum_vec[R:1]
298
-
299
- fit_vec <- nn_cum_vec * (log(nn_cum_vec) - log(arg_log))
300
-
301
- supp <- c(0, best) + fit_vec - ncp_prior
302
-
303
- best <- c(best, max(supp))
304
- last <- c(last, which.max(supp))
305
- }
306
- ```
307
-
308
- ``` {r}
309
- last
310
- ```
311
-
312
- ``` {r}
313
- # #---------------------------------------------
314
- # # Now find changepoints by iteratively peeling
315
- # off the last block
316
- # #---------------------------------------------
317
- index <- last[length(nn_vec)]
318
- change_points <- NULL
319
-
320
- while (index > 1){
321
- change_points <- c(index, change_points)
322
- index <- last[index - 1]
323
- }
324
- change_points <- c(change_points, 8191)
325
- ```
326
-
327
- ``` {r}
328
- plot(block_length[length(block_length):2], data[,1], col= 'red', type='s', log='y')
329
- lines(change_points, c(rebin, rebin[length(rebin)]), col='green', type='s')
330
-
331
- #plot(change_points, c(rebin, rebin[length(rebin)]), col='green', type='s')
332
-
333
- ```
334
-
335
- ``` {r}
336
-
337
- ```
338
-
339
- ``` {r}
340
- bayesian_blocks <- function(blocks, data, prior=7.61){
341
-
342
- #data <- sort(data)
343
- #N <- length(data)
344
-
345
- # ---------------------------------------------
346
- # Start with first data cell; add one cell at
347
- # each iteration
348
- # ---------------------------------------------
349
-
350
- best <- NULL
351
- last <- NULL
352
- supp <- NULL
353
-
354
- for (R in 1:length(blocks)){
355
- # Compute fit_vec : fitness of putative last block (end at R)
356
- arg_log <- blocks[1:R] - blocks[R+1]
357
- arg_log[arg_log <= 0] <- Inf
358
-
359
- nn_cum_vec <- cumsum(data[R:1])
360
- nn_cum_vec <- nn_cum_vec[R:1]
361
-
362
- fit_vec <- nn_cum_vec * (log(nn_cum_vec) - log(arg_log))
363
-
364
- supp <- c(0, best) + fit_vec - prior
365
-
366
- best <- c(best, max(supp))
367
- last <- c(last, which.max(supp))
368
-
369
- }
370
-
371
- # #---------------------------------------------
372
- # # Now find changepoints by iteratively peeling
373
- # off the last block
374
- # #---------------------------------------------
375
- index <- last[length(data)]
376
- change_points <- NULL
377
-
378
- while(index > 1){
379
- change_points <- c(index, change_points)
380
- index <- last[index - 1]
381
- }
382
-
383
- change_points <- c(change_points, blocks[1])
384
-
385
- return(change_points)
386
- }
387
- ```
388
-
389
- ``` {r}
390
- #create histogram
391
-
392
- rebin_bb <- function(data, change_points){
393
- rebin <- NULL
394
- y <- NULL
395
- n <- 1
396
-
397
- for (i in 1:length(data)){
398
- y <- y + data[i]
399
- ifelse( i %in% change_points,
400
- {y <- y/n
401
- rebin <- c(rebin, y)
402
- y <- 0
403
- n <- 1},
404
- n <- n+1
405
- )
406
- }
407
- return(c(rebin, rebin[length(rebin)]))
408
- }
409
- ```
0 commit comments