-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmax min simulation.R
120 lines (94 loc) · 3 KB
/
max min simulation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Copyright 2021 Twitter, Inc.
# SPDX-License-Identifier: Apache-2.0
require(dplyr)
require(reshape2)
require(ggplot2)
mean_pairwise_abs_diff <- function(x){
out <- mean(as.numeric(dist(x, method = "manhattan")))
return(out)
}
gini <- function(x){
out <- mean(as.numeric(dist(x, method = "manhattan")))/2*mean(x)
return(out)
}
mean_abs_diff <- function(x){
out <- mean(abs(x - mean(x)))
return(out)
}
maxmin_diff <- function(x){
return(max(x) - min(x))
}
maxmin_ratio <- function(x, eps = .05){
return(max(x + eps)/min(x + eps))
}
max_abs_diff <- function(x){
return(max(abs(x - mean(x))))
}
generalized_entropy <- function(x, alpha = 1){
K <- length(x)
if(alpha != 0 & alpha != 1){
out <- 1/(K*alpha*(alpha - 1))*sum((x/mean(x))^alpha-1)
}
if(alpha == 1){
out <- 1/K*sum(x/mean(x)*log(x/mean(x)))
}
if(alpha == 0){
out <- -1/K*sum(log(x/mean(x)))
}
return(out)
}
Ks <- seq(5, 150, 5)
lowers <- seq(.1, .9, .1)
n <- 5000
results <- list()
for(lower in lowers){
biases <- NULL
for(K in Ks){
mu_k <- seq(lower, .9, length.out = K)
nks <- rep(round(n/K), K)
mmd <- NULL
mmr <- NULL
mads <- NULL
pw_abs <- NULL
max_ads <- NULL
vars <- NULL
geis <- NULL
ginis <- NULL
for(i in 1:1000){
Y_k <- rbinom(length(mu_k), nks, mu_k)/nks
mmd <- c(mmd, maxmin_diff(Y_k))
mmr <- c(mmr, maxmin_ratio(Y_k))
mads <- c(mads, mean_abs_diff(Y_k))
pw_abs <- c(pw_abs, mean_pairwise_abs_diff(Y_k))
vars <- c(vars, var(Y_k))
max_ads <- c(max_ads, max_abs_diff(Y_k))
geis <- c(geis, generalized_entropy(Y_k, alpha = .5))
ginis <- c(ginis, gini(Y_k))
}
bias <- c(mean(mmd) - maxmin_diff(mu_k),
mean(mmr) - maxmin_ratio(mu_k),
mean(mads) - mean_abs_diff(mu_k),
mean(pw_abs) - mean_pairwise_abs_diff(mu_k),
mean(vars) - var(mu_k),
mean(max_ads) - max_abs_diff(mu_k),
mean(geis - generalized_entropy(mu_k, alpha = .5)),
mean(ginis) - gini(mu_k))
biases <- rbind(biases, bias)
}
results[[as.character(lower)]] <- biases
}
all.biases <- do.call("rbind", results)
allKs <- rep(Ks, length(lowers))
allLowers <- rep(lowers, each = length(Ks))
df <- data.frame(maxmin_diff = all.biases[,1], max_min_ratio = all.biases[,2],
max_abs_diff = all.biases[,6], mean_abs_dev = all.biases[,3], var = all.biases[,5], gei = all.biases[,7], K = allKs, lower = allLowers) #
df <- df %>% melt(id.vars=c("K", "lower"))
ggplot(df, aes(x = K, y = value, color = factor(lower), group = factor(lower))) +
geom_line() +
theme_bw() +
ggtitle("Statistical Bias of Model ``Bias'' Estimators") +
theme(text = element_text(size = 25)) +
guides(color=guide_legend(title="Lower Bound")) +
facet_wrap(vars(variable), scales = 'free') +
ylab(expression(paste("E[M(Y)] - E[M(", mu, ")]", sep='')))
ggsave("~/Desktop/max-min.png", width = 12, height = 7, units = 'in')