-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsemiParam3.jl
238 lines (212 loc) · 7.14 KB
/
semiParam3.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
using CSV
using GLM
using RCall
using Term
using QuadGK
using DataFrames
using StatsFuns
using StatsPlots
using Distributions
using PrettyTables
using DataFramesMeta
#########################################################################
# IMPLEMENTATIONS FOR SIMULATION STUDY
#########################################################################
function expit(x)
1 / (1 + exp(-x))
end
# Used in sandwich estimation
function expit2(x)
exp(x) / (1 + exp(x))^2
end
# Simulation of data from binary outcome RCT
# with true values of α = -0.5 and β = 0.3
function simData(n, γ)
R = rand(Bernoulli(0.5), n)
X = randn(n)
condMean = -0.5 .+ 0.3*R + γ*X
μ = expit.(condMean)
y = @. rand(Bernoulli(μ))
return (R=R, Y=y, X = X)
end
# derivative of g wrt. p0
function gprime0(p0)
1 / (p0*(p0 - 1))
end
# derivative of g wrt. p1
function gprime1(p1)
1 / (p1*(1 - p1))
end
# Naive estimator and Variance
function naiveEst(R, Y, X)
n = length(R)
estVec0 = zeros(n)
estVec1 = zeros(n)
p0hat = sum((1 .- R) .* Y) / sum(1 .- R)
p1hat = sum(R .* Y) / sum(R)
βhat = log(p1hat / (1 - p1hat)) - log(p0hat / (1 - p0hat))
δhat = mean(R)
influenceVec = [(1 - R[i])*(Y[i] - p0hat) * gprime0(p0hat) / (1 - δhat) +
R[i] * (Y[i] - p1hat) * gprime1(p1hat) / δhat for i in 1:n]
estSD = mean(influenceVec .* influenceVec) |> sqrt
(βhat = βhat, ϕϕhat = estSD / sqrt(n))
end
# The efficient estimator
function effEst(R,Y,X)
n = length(R)
p0hat = sum((1 .- R) .* Y) / sum(1 .- R)
p1hat = sum(R .* Y) / sum(R)
δhat = mean(R)
βhat = log(p1hat / (1 - p1hat)) - log(p0hat / (1 - p0hat))
fit = glm(@formula(Y ~ R + X), (;R, Y, X ), Binomial(), LogitLink())
pred0 = predict(fit, (;R = zeros(Int, size(R)), X = X ))
pred1 = predict(fit, (;R = ones(Int, size(R)), X = X ))
influenceVec = zeros(n)
for i in 1:n
influenceVec[i] += gprime0(p0hat) * (1 - R[i]) * (Y[i] - p0hat) / (1-δhat)
influenceVec[i] += gprime1(p1hat) * R[i] * (Y[i] - p1hat) / δhat
influenceVec[i] += gprime0(p0hat) * (R[i] - δhat) * (pred0[i] - p0hat) / (1-δhat)
influenceVec[i] -= gprime1(p1hat) * (R[i] - δhat) * (pred1[i] - p1hat) / δhat
end
βhatEff = βhat + mean(influenceVec)
estSD = mean(influenceVec .* influenceVec) |> sqrt
(βhat = βhatEff, ϕϕhat = estSD/sqrt(n))
end
# Efficient estimator with misspecified means
function effMisEst(R,Y,X)
n = length(R)
p0hat = sum((1 .- R) .* Y) / sum(1 .- R)
p1hat = sum(R .* Y) / sum(R)
δhat = mean(R)
βhat = log(p1hat / (1 - p1hat)) - log(p0hat / (1 - p0hat))
fit = lm(@formula(Y ~ R + X), (;R, Y, X ))
pred0 = predict(fit, (;R = zeros(Int, size(R)), X = X ))
pred1 = predict(fit, (;R = ones(Int, size(R)), X = X ))
influenceVec = zeros(n)
for i in 1:n
influenceVec[i] += gprime0(p0hat) * (1 - R[i]) * (Y[i] - p0hat) / (1-δhat)
influenceVec[i] += gprime1(p1hat) * R[i] * (Y[i] - p1hat) / δhat
influenceVec[i] += gprime0(p0hat) * (R[i] - δhat) * (pred0[i] - p0hat) / (1-δhat)
influenceVec[i] -= gprime1(p1hat) * (R[i] - δhat) * (pred1[i] - p1hat) / δhat
end
βhatEff = βhat + mean(influenceVec)
estSD = mean(influenceVec .* influenceVec) |> sqrt
(βhat = βhatEff, ϕϕhat = estSD / sqrt(n))
end
# Polynomial expansion
function qExp(X)
[ones(Float64, size(X)) X X.^2 X.^3]'
end
# Polynomial estimator
function polEst(R,Y,X)
n = length(R)
p0hat = sum((1 .- R) .* Y) / sum(1 .- R)
p1hat = sum(R .* Y) / sum(R)
δhat = mean(R)
βhat = log(p1hat / (1 - p1hat)) - log(p0hat / (1 - p0hat))
influenceVec = [(1 - R[i])*(Y[i] - p0hat) * gprime0(p0hat) / (1 - δhat) +
R[i] * (Y[i] - p1hat) * gprime1(p1hat) / δhat for i in 1:n]
# Compute θ₀
θ0hat1 = inv(qExp(X) * qExp(X)')
θ0hat2 = qExp(X) * ((R .- δhat) .* influenceVec)
θhat0 = 1 / (δhat*(1 - δhat)) * θ0hat1' * θ0hat2
ϕtilde = influenceVec .- ((θhat0' * qExp(X))' .* (R .- δhat))
βtilde = βhat + mean(ϕtilde)
estSD = std(ϕtilde)
estSD = mean(ϕtilde .* ϕtilde) |> sqrt
(βtilde = βtilde, ϕϕhat = estSD/sqrt(n))
end
# Retrieve function, that compute
function logORMargTheo()
α = -0.5
β = 0.3
σX = 1
function logORZ(γ)
# Get normal pdf
normalpdf(x) = pdf(Normal(0, σX), x)
#
pY1R1(x) = normalpdf(x) * expit(α + β + γ*x)
#pY0R1(x) = normalPdf(x) * expitAlt(β + γ*x)
# and
pY1R0(x) = normalpdf(x) * expit(α + γ*x)
#pY0R0(x) = normalPdf(x) * expitAlt(γ*x)
#then
# Numerator is
numpY1R1 = first(quadgk(pY1R1, -Inf, Inf, rtol = 1e-3))
numpY0R1 = 1 - numpY1R1
# Denominator is
denumpY1R0 = first(quadgk(pY1R0, -Inf, Inf, rtol = 1e-3))
denumpY0R0 = 1 - denumpY1R0
# finally
#log((numpY1R1 / numpY0R1) / (denumpY1R0 / denumpY0R0))
log(numpY1R1) - log(numpY0R1) - (log(denumpY1R0) - log(denumpY0R0))
end
return logORZ
end
# Compute thoeretical marginal log OR for each γ in the param:γlist
function computeTheoMargLogOr(γlist)
saveTheoMargLogOr = Array{Float64}(undef,length(γlist))
# Compute the thoeretical marginal log odds
for i in eachindex(γlist)
logORmarg = logORMargTheo()
saveTheoMargLogOr[i] = logORmarg(γlist[i])
end
saveTheoMargLogOr
end
function getMargTheo()
γlist = [0, -log(4), -log(6)]
computeTheoMargLogOr(γlist)
end
# compute one run of the simulation study
function oneSimRun(n, γ, estimator, num)
betaVec = zeros(num)
betaSDVec = zeros(num)
@inbounds for i in 1:num
R, Y, X = simData(n, γ)
betaVec[i], betaSDVec[i] = estimator(R,Y,X)
end
return (meanβ = mean(betaVec), sdβ = std(betaVec), meanSDβ = mean(betaSDVec))
end
# Make the simulation study
function makeSimStudy(estimator, num)
nlist = [200, 400]
γlist = [0, -log(4), -log(6)]
# γ x n matrix
saveMeanβ = Array{Float64}(undef, 3, 2)
saveSDβ = Array{Float64}(undef, 3, 2)
saveMeanSDβ = Array{Float64}(undef, 3, 2)
for i in eachindex(γlist)
for j in eachindex(nlist)
a,b,c = [oneSimRun(nlist[j], γlist[i], estimator, num)...]
saveMeanβ[i,j] = a
saveSDβ[i,j] = b
saveMeanSDβ[i,j] = c
end
end
return (saveMeanβ, saveSDβ, saveMeanSDβ)
end
# Helper function to get from simulation study to interpretable output.
function fromMatrixToDF(saveMeanβ, saveSDβ, saveMeanSDβ)
estimate = ["meanβ", "sdβ", "meanSEβ"]
nlistNext = [200, 200, 200, 400, 400, 400]
γlist = ["0", "-log(4)", "-log(6)"]
df1 = DataFrame(
ests = saveMeanβ[:],
type = estimate[1],
n = nlistNext,
γ = repeat(γlist, 2)
)
df2 = DataFrame(
ests = saveSDβ[:],
type = estimate[2],
n = nlistNext,
γ = repeat(γlist, 2)
)
df3 = DataFrame(
ests = saveMeanSDβ[:],
type = estimate[3],
n = nlistNext,
γ = repeat(γlist, 2)
)
[df1 ; df2 ; df3]
end