-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathSpaceMix_workflow.R
207 lines (171 loc) · 8.28 KB
/
SpaceMix_workflow.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/usr/bin/Rscript
interactive_mode <- TRUE # Change to false if running as a script on a "headless" server.
# Read SNP data:
require(SpaceMix)
counts <- read.csv("~/Desktop/Qsuber_GBS/Clust5/11-SpaceMix/Qsuber_indiv90_miss80_maf06_noLD.vcf.counts", sep=" ")
sizes <- read.csv("~/Desktop/Qsuber_GBS/Clust5/11-SpaceMix/Qsuber_indiv90_miss80_maf06_noLD.vcf.size", sep=" ")
counts_matrix <- as.matrix(counts)
sizes_matrix <- as.matrix(sizes)
# Read geographic coordinates:.
geo.coords <- read.csv("~/Desktop/Qsuber_GBS/Clust5/11-SpaceMix/coords.csv", sep=",", header=TRUE, row.names=1)
# Run SpaceMix analysis:
run.spacemix.analysis(n.fast.reps = 10,
fast.MCMC.ngen = 1e5,
fast.model.option = "source_and_target",
long.model.option = "source_and_target",
data.type = "counts",
sample.frequencies=NULL,
mean.sample.sizes=NULL,
counts = counts_matrix,
sample.sizes = sizes_matrix,
sample.covariance=NULL,
target.spatial.prior.scale=NULL,
source.spatial.prior.scale=NULL,
spatial.prior.X.coordinates = geo.coords[,1],
spatial.prior.Y.coordinates = geo.coords[,2],
round.earth = FALSE,
long.run.initial.parameters=NULL,
k = 77,
loci = 1356,
ngen = 1e6,
printfreq = 1e2,
samplefreq = 1e3,
mixing.diagn.freq = 50,
savefreq = 1e5,
directory="~/Desktop/Qsuber_GBS/Clust5/11-SpaceMix/run1/",
prefix = "Qsuber_GBS_run1")
# Post prob. trace plot
load("~/Desktop/Qsuber_GBS/Clust5/11-SpaceMix/run1/Qsuber_GBS_run1_LongRun/Qsuber_GBS_run1_space_MCMC_output1.Robj")
if (interactive_mode == FALSE) {svg(filename="MCMC_iterations.svg")}
plot(Prob,
xlab="MCMC iterations",
ylab="value",
main="Posterior probability trace plot",
type='l')
if (interactive_mode == FALSE) {dev.off()}
# Trace plots of alpha parameters of the spatial covariance function
if (interactive_mode == FALSE) {svg(filename="MCMC_nugget.svg")}
matplot(t(nugget),type='l',
xlab="MCMC iterations",ylab="Parameter value",
main="Trace plot of nugget parameters")
if (interactive_mode == FALSE) {dev.off()}
# Joint marginal plot of a0 and a1
# colored by where in the MCMC these
# parameters took their values
if (interactive_mode == FALSE) {svg(filename="joint_marginal.svg")}
plot(a0, a1, xlab="a0", ylab="a1",
main="Joint marginal of a0 and a1", pch=20,
col=adjustcolor(rainbow(1000, start=4/6, end=6/6), 0.3))
legend(x="bottomright", pch=19, cex=0.8,
col=rainbow(1000, start=4/6, end=6/6)[c(1, 500, 1000)],
legend=c("Sampled MCMC iteration 1",
"Sampled MCMC iteration 500",
"Sampled MCMC iteration 1000"))
if (interactive_mode == FALSE) {dev.off()}
# Acceptance rate of a0 over the course of the
# MCMC analysis
if (interactive_mode == FALSE) {svg(filename="acceptance.svg")}
plot(accept_rates$a0_accept_rate,
xlab="MCMC iterations", ylab="Acceptance rate",
main="Acceptance rate of a0", type='l',
ylim=c(0.35, 0.6))
abline(h=0.44, col="gray", lty=2)
if (interactive_mode == FALSE) {dev.off()}
# Acceptance rates of nugget parameters over the
# course of the MCMC analysis
if (interactive_mode == FALSE) {svg(filename="acceptance_nugget.svg")}
matplot(t(accept_rates$nugget_accept_rate),
xlab="MCMC iterations", ylab="Acceptance rate",
main="Acceptance rates of nuggets", type='l',
ylim=c(0.3, 0.7))
abline(h=0.44, col="gray", lty=2)
if (interactive_mode == FALSE) {dev.off()}
## Model adequacy
load("~/Desktop/Qsuber_GBS/Clust5/11-SpaceMix/run1/Qsuber_GBS_run1_LongRun/Qsuber_GBS_run1_MCN.frequencies.list.Robj")
# Now, calculate the sample covariance from the mean centered
# and normalized sample allele frequencies.
sample.covariance <- cov(t(MCN.frequencies.list$mean.centered.normalized.sample.frequencies),
use="pairwise.complete.obs")
# Create a matrix that will perform a mean-centering
# on the parametric covariance matrix
# Then, mean-center the parametric ovariance matrix.
k <- nrow(MCN.frequencies.list$mean.centered.normalized.sample.frequencies)
MC.matrix <- diag(k) - matrix(1/last.params$inv.mean.sample.sizes /
(sum(1/last.params$inv.mean.sample.sizes)),
nrow=k, ncol=k, byrow=TRUE)
MC.parametric.covariance <- (MC.matrix) %*%
last.params$admixed.covariance %*%
t(MC.matrix)
# Finally, compare the sample covariance to the parametric
# covariance. Ideally, there will be a very tight correspondence
# between the data and the model. If there is not, it may
# be an indication either that the MCMC has not converged on
# the stationary distribution or that the process that generated
# the data is only poorly approximated by SpaceMix's model.
# The sample and parametric covariances can be plotted
# against each other (if model fit is good they should
# fall on the x=y red line)
index.matrix <- upper.tri(sample.covariance, diag=TRUE)
if (interactive_mode == FALSE) {svg(filename="model_adequacy.svg")}
plot(sample.covariance[index.matrix],
MC.parametric.covariance[index.matrix],
col=adjustcolor("black", 0.3), pch=20,
xlab="Sample covariance",
ylab="Parametric covariance",
main="Model adequacy:\n matrix comparison")
abline(0, 1, col="red")
if (interactive_mode == FALSE) {dev.off()}
# Or the patterns of decay of covariance with
# geographic distance can be compared between
# the data and the model.
if (interactive_mode == FALSE) {svg(filename="model_adequacy_IBD.svg")}
plot(last.params$D[1:k,1:k][index.matrix],
sample.covariance[index.matrix],
pch=19, col="black",
xlab="geogenetic distance",
ylab="covariance",
main="Model adequacy:\n IBD patterns")
points(last.params$D[1:k,1:k][index.matrix],
MC.parametric.covariance[index.matrix], col="red", pch=20)
legend(x="topright", pch=19, col=c(1, 2),
legend=c("observed", "model estimate"))
if (interactive_mode == FALSE) {dev.off()}
## Now, for the main course!
# Sample names
sample.names <- as.vector(rownames(counts))
N = length(sample.names)
# Make geo.coords a matrix
geo.coords <- as.matrix(geo.coords)
# And generate a vector of sample colors
sample.colors <- rainbow(n=N,start=1/6,end=6/6)[as.numeric(cut(geo.coords[,1],N))]
# And now generate a sample map list using a 95%
# credible interval on parameter estimates without
# `burning' (i.e., discarding) any sampled iterations
# of the MCMC.
run.spacemix.map.list <- make.spacemix.map.list(MCMC.output.file = "~/Desktop/Qsuber_GBS/Clust5/11-SpaceMix/run1/Qsuber_GBS_run1_LongRun/Qsuber_GBS_run1_space_MCMC_output1.Robj",
geographic.locations = geo.coords,
name.vector = sample.names,
color.vector = sample.colors,
quantile=0.95,
burnin=0)
# Now we generate a map of the output showing sample names
# at the locations of the maximum a posteriori (MAP)
# geogenetic location parameter estimates
if (interactive_mode == FALSE) {svg(filename="geogenetic_map_names.svg")}
make.spacemix.map(spacemix.map.list = run.spacemix.map.list,
text=TRUE,
ellipses=FALSE,
source.option=FALSE)
if (interactive_mode == FALSE) {dev.off()}
if (interactive_mode == FALSE) {svg(filename="geogenetic_map_elipses.svg")}
make.spacemix.map(spacemix.map.list = run.spacemix.map.list,
text=TRUE,
ellipses=TRUE,
source.option=FALSE)
if (interactive_mode == FALSE) {dev.off()}
if (interactive_mode == FALSE) {svg(filename="geogenetic_map_admix.svg")}
make.spacemix.map(spacemix.map.list = run.spacemix.map.list,
text=TRUE,
ellipses=TRUE,
source.option=TRUE)
if (interactive_mode == FALSE) {dev.off()}