-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathpuncta_thermo_calc.Rmd
280 lines (215 loc) · 10.6 KB
/
puncta_thermo_calc.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
---
title: "Compute thermodynamic features of puncta"
output:
html_document:
df_print: paged
---
```{r}
library(dplyr)
library(openxlsx)
library(plyr)
`%notin%` <- Negate(`%in%`)
```
```{r}
############################
dir0 = "../../example_data/thermodynamic_characterization/" # define directory for the input files
## cell.data is the input cell level features from punctatools: e.g., "cell_stat.csv"
cell.data <- read.csv(paste0(dir0, "cell_quants.csv"))
## puncta.data is the input puncta level features from punctatools: e.g., "puncta_stat.csv"
puncta.data <- read.csv(paste0(dir0, "puncta_quants.csv"))
## cell.list.ex is an optional user provided input list of cells (mitotic/ dead) to exclude: e.g., "cells_to_exclude.csv"
#cell.list.ex <- read.csv(paste0(dir0, "cells_to_exclude.csv")) #(optional)
## cell.channel is the user provided name of the cell channel: e.g., "Hoechst"
cell.channel <- "Hoechst"
## puncta.channel is the user provided name of the puncta channel(s): e.g., "GFP" for single channel or c("GFP", "mCherry") for two puncta channels
puncta.channel <- "GFP" #c("GFP", "mCherry")
## factor_conc is the user provided ratio between FL intensity and concentration in uM from FL calibration: e.g., 0.02 for single channel "GFP" or c(0.02, 0.03) for two channel
factor_conc <- 0.02 #c(0.02, 0.03)
## temp is the user provided temperature in K: e.g., 310
temp <- 310
##
#############################
```
```{r}
# Define a function
process.image.file=function(cell.data, # cell data data.frame
puncta.data, # puncta data data.frame
cell.list.ex, # cells (mitotic/ dead) to exclude (optional)
cell.channel, # string with cell channel
puncta.channel, # strings with names of puncta channels
factor_conc, # ratio between FL intensity and concentration in uM
temp) # temperature in degrees K
{ # BEGIN function
#=======Stop if any of the the required input is missing=========
stopifnot("cell.data is missing" = !missing(cell.data))
stopifnot("puncta.data is missing" = !missing(puncta.data))
stopifnot("cell.channel is missing" = !missing(cell.channel))
stopifnot("puncta.channel is missing" = !missing(puncta.channel))
stopifnot("factor_conc is missing" = !missing(factor_conc))
stopifnot("temp is missing" = !missing(temp))
#====================Puncta data=====================
# Calculate "total FL puncta intensity per cell"
puncta.data %>%
group_by(condition, sample, ROI.label) %>%
summarise_at(c(paste0(puncta.channel,".mean.intensity.per.puncta")), sum) -> df0.p
#====================Cell data=======================
df0.c<-cell.data
#==========Combine the puncta and cell data==========
#keep all the cells even with 0 punctum
merge(df0.c, df0.p, by=c("condition", "sample", "ROI.label"), all.x=TRUE) -> df_c
# Create an unique idetifier for each cell
df_c$position.cell_label<-paste0(df_c$sample, "_", df_c$ROI.label)
# Read the list of cells to exclude (Optional)
if (missing(cell.list.ex)){
print("Cells to exclude is empty.")
df_c -> df.c
} else {
cell.list.ex -> df.ex
df.ex$position.cell_label <- paste0(df.ex$sample, "_", df.ex$ROI.label)
list.cell.ex <- df.ex$position.cell_label
print(paste0("Cells to exclude: ", paste(list.cell.ex, collapse = ", ")))
df_c %>% filter(position.cell_label %notin% list.cell.ex) -> df.c
}
## ==============Extract and calculate puncta features==========================================
## Puncta molar fraction
df.c[, paste0("puncta.mol.fraction.", puncta.channel)] <-
df.c[ , paste0(puncta.channel, ".integrated.intensity.inside.", puncta.channel, ".puncta")]/
df.c[, paste0(puncta.channel, ".integrated.intensity.per.ROI")]
## Puncta volume fraction
df.c[, paste0("puncta.vol.fraction.", puncta.channel)] <-
df.c[, paste0("total.", puncta.channel, ".puncta.volume.pix.per.ROI")]/df.c[, "ROI.volume.pix"]
# Protein concentration in uM (micro molar)
df.c[, paste0("mean.intensity.per.cell.", puncta.channel)] <- df.c[, paste0(puncta.channel,".mean.intensity.per.ROI")]
## Convert to concentration
df.c[, paste0("protein.conc.per.cell.", puncta.channel)] <- df.c[, paste0(puncta.channel,".mean.intensity.per.ROI")]*factor_conc
# Number of FL puncta per unit nuclear volume
df.c[, paste0("puncta.number.per.nuclear.vol.", puncta.channel)] <- (df.c[, paste0("number.of.", puncta.channel, ".puncta")]/df.c$ROI.volume.um)*1000
# FL “light phase”, {[Total FL intensity of the cell – Total puncta FL intensity of the same cell]/[nucleus volume – total volume of puncta per cell]}
df.c[, paste0("light.phase.intensity.", puncta.channel)] <- (df.c[, paste0(puncta.channel, ".integrated.intensity.outside.", puncta.channel, ".puncta")]/
(df.c$ROI.volume.pix-df.c[ , paste0("total.", puncta.channel, ".puncta.volume.pix.per.ROI")]))
## Convert to concentration
df.c[, paste0("light.phase.conc.", puncta.channel)] <- df.c[, paste0("light.phase.intensity.", puncta.channel)]*factor_conc
# Calculate Vp (average puncta vol.)
df.c[, paste0("avg.puncta.vol.", puncta.channel)] <- df.c[, paste0("average.", puncta.channel, ".puncta.volume.um.per.ROI")]
# Calculate puncta dense phase conc.
df.c[, paste0("puncta.dense.phase.intensity.", puncta.channel)] <- (df.c[, paste0(puncta.channel, ".mean.intensity.per.puncta")]/
df.c[ , paste0("number.of.", puncta.channel, ".puncta")])
## Convert to concentration
df.c[, paste0("puncta.dense.phase.conc.", puncta.channel)] <- df.c[, paste0("puncta.dense.phase.intensity.", puncta.channel)]*factor_conc
# Calculate partition coefficient & transfer free energy
RT = 0.002*temp # kcal/ mol
## Kp=[DP]/[LP]
df.c[, paste0("Kp.", puncta.channel)] <- df.c[, paste0("puncta.dense.phase.conc.", puncta.channel)]/df.c[, paste0("light.phase.conc.", puncta.channel)]
## Transfer free energy
df.c[, paste0("DeltaGtr.", puncta.channel)] <- -RT*log(df.c[, paste0("Kp.", puncta.channel)])
# Keep only the relevant features
cell.data <- df.c[, c("condition", "sample", "ROI.label",
paste0("Mutual.information.", cell.channel, ".vs.", puncta.channel),
paste0("Pearson.correlation.coefficient.", cell.channel, ".vs.", puncta.channel),
paste0("puncta.mol.fraction.", puncta.channel),
paste0("puncta.vol.fraction.", puncta.channel),
paste0("mean.intensity.per.cell.", puncta.channel),
paste0("light.phase.intensity.", puncta.channel),
paste0("puncta.dense.phase.intensity.", puncta.channel),
paste0("protein.conc.per.cell.", puncta.channel),
paste0("puncta.number.per.nuclear.vol.", puncta.channel),
paste0("light.phase.conc.", puncta.channel),
paste0("avg.puncta.vol.", puncta.channel),
paste0("puncta.dense.phase.conc.", puncta.channel),
paste0("Kp.", puncta.channel),
paste0("DeltaGtr.", puncta.channel))]
#############################
# return the results
res=as.data.frame(cell.data)
attr(res,"cell.channel") = cell.channel
attr(res,"puncta.channel") = puncta.channel
attr(res,"factor_conc") = factor_conc
attr(res,"temp") = temp
return(res)
} # END function
```
```{r}
###############################
##
## Call the function with input files and arguments
##
###############################
process.image.file(cell.data = cell.data,
puncta.data = puncta.data,
#cell.list.ex = cell.list.ex, #(optional)
cell.channel = cell.channel,
puncta.channel = puncta.channel,
factor_conc = factor_conc,
temp = temp
)-> df
# Print the attributes
attr(df, "cell.channel")
attr(df, "puncta.channel")
attr(df, "factor_conc")
attr(df, "temp")
```
```{r}
head(df)
```
```{r}
df %>% names()
```
```{r}
levels(as.factor(df$condition))
```
```{r}
names(df)
```
```{r}
# write the puncta features into an excel file
write.csv(df, file = paste0(dir0, "puncta_thermo_features.csv"), row.names=FALSE)
```
```{r}
df$condition <- gsub('NHA9_WT', 'NHA9', df$condition)
```
```{r}
## make plots
library(ggplot2)
library(cowplot)
library(ggpubr)
list.const<- c("NHA9", "NHA9_21FGAA", "NHA9_DeltaDNA")
cols.const<-c("darkgreen", "darkblue", "red")
plots <- list()
for(nm in names(df)){
plots[[nm]] <- ggplot(data=df %>% filter(condition %in% list.const)) +
geom_point(aes_string(x="protein.conc.per.cell.GFP", y=nm, fill="condition", color="condition"), alpha=0.5, size=1.) +
scale_fill_manual(values=cols.const, labels = c(expression("G-NHA9"), expression("G-NHA9-21FGAA"), expression("G-NHA9-"*Delta*"DNA"))) +
scale_color_manual(values=cols.const, labels = c(expression("G-NHA9"), expression("G-NHA9-21FGAA"), expression("G-NHA9-"*Delta*"DNA"))) +
scale_y_continuous(trans='log10') +
xlab(expression("Avg. mEGFP conc. ("~mu*M~")")) +
theme_cowplot(14) +
border()
}
p1<-plots[["puncta.number.per.nuclear.vol.GFP"]] + ylab(~"Puncta # (/"~10^3~mu*m^3~")")
p2<-plots[["avg.puncta.vol.GFP"]] + ylab(V[p]~"("~mu*m^3~")")
p3<-plots[["puncta.mol.fraction.GFP"]] + ylab(expression(F[mol]))
p4<-plots[["puncta.vol.fraction.GFP"]] + ylab(expression(F[vol]))
p5<-plots[["light.phase.conc.GFP"]] + ylab(~"[LP] ("~mu*M~")")
p6<-plots[["puncta.dense.phase.conc.GFP"]] + ylab(~"[DP] ("~mu*M~")")
p7<-plots[["Kp.GFP"]] + ylab(expression(K[p]))
```
```{r}
plots1 <- list()
for(nm in names(df)){
plots1[[nm]] <- ggplot(data=df %>% filter(condition %in% list.const)) +
geom_point(aes_string(x="protein.conc.per.cell.GFP", y=nm, fill="condition", color="condition"), alpha=0.5, size=1.) +
scale_fill_manual(values=cols.const, labels = c(expression("G-NHA9"), expression("G-NHA9-21FGAA"), expression("G-NHA9-"*Delta*"DNA"))) +
scale_color_manual(values=cols.const, labels = c(expression("G-NHA9"), expression("G-NHA9-21FGAA"), expression("G-NHA9-"*Delta*"DNA"))) +
xlab(expression("Avg. mEGFP conc. ("~mu*M~")")) +
theme_cowplot(14) +
border()
}
p8<-plots1[["DeltaGtr.GFP"]] + ylab(expression(Delta*G[Tr]~"("~kcal~mol^{-1}~")"))
```
```{r, fig.width=12.5, fig.height=6}
library(patchwork)
p.c<-(p1 | p2 | p3 | p4) / (p5 | p6 | p7 | p8) & theme(legend.position = "bottom")
png(file = paste0("../../docs/", "puncta_thermo", ".png"), width=12.5, height=6, units="in", res=300)
p.c + plot_layout(guides = "collect")
dev.off()
```