scripts/thermodynamic_characterization/puncta_thermo_calc.Rmd

---
title: "Compute thermodynamic features of puncta"
output:
  html_document:
    df_print: paged
---

```{r}
library(dplyr)
library(openxlsx)
library(plyr)
`%notin%` <- Negate(`%in%`)
```

```{r}
############################

dir0 = "../../example_data/thermodynamic_characterization/" # define directory for the input files

## cell.data is the input cell level features from punctatools: e.g., "cell_stat.csv" 
cell.data <- read.csv(paste0(dir0, "cell_quants.csv"))

## puncta.data is the input puncta level features from punctatools: e.g., "puncta_stat.csv"
puncta.data <- read.csv(paste0(dir0, "puncta_quants.csv")) 

## cell.list.ex is an optional user provided input list of cells (mitotic/ dead) to exclude: e.g., "cells_to_exclude.csv"
#cell.list.ex <- read.csv(paste0(dir0, "cells_to_exclude.csv")) #(optional)

## cell.channel is the user provided name of the cell channel: e.g., "Hoechst"
cell.channel <- "Hoechst" 

## puncta.channel is the user provided name of the puncta channel(s): e.g., "GFP" for single channel or c("GFP", "mCherry") for two puncta channels
puncta.channel <- "GFP" #c("GFP", "mCherry")

## factor_conc is the user provided ratio between FL intensity and concentration in uM from FL calibration: e.g., 0.02 for single channel "GFP" or c(0.02, 0.03) for two channel
factor_conc <- 0.02 #c(0.02, 0.03)

## temp is the user provided temperature in K: e.g., 310
temp <- 310
##
#############################
```

```{r}
# Define a function 

process.image.file=function(cell.data,      # cell data data.frame
                            puncta.data,    # puncta data data.frame
                            cell.list.ex,   # cells (mitotic/ dead) to exclude (optional)
                            cell.channel,   # string with cell channel
                            puncta.channel, # strings with names of puncta channels
                            factor_conc,    # ratio between FL intensity and concentration in uM
                            temp)           # temperature in degrees K
  
{ # BEGIN function
  
  #=======Stop if any of the the required input is missing=========
  stopifnot("cell.data is missing" = !missing(cell.data))
  stopifnot("puncta.data is missing" = !missing(puncta.data))
  stopifnot("cell.channel is missing" = !missing(cell.channel))
  stopifnot("puncta.channel is missing" = !missing(puncta.channel))
  stopifnot("factor_conc is missing" = !missing(factor_conc))
  stopifnot("temp is missing" = !missing(temp))
  
  #====================Puncta data=====================
  # Calculate "total FL puncta intensity per cell"
  puncta.data %>% 
  group_by(condition, sample, ROI.label) %>% 
  summarise_at(c(paste0(puncta.channel,".mean.intensity.per.puncta")), sum) -> df0.p
  
  #====================Cell data=======================
  df0.c<-cell.data
  
  #==========Combine the puncta and cell data==========
  #keep all the cells even with 0 punctum
  merge(df0.c, df0.p, by=c("condition", "sample", "ROI.label"), all.x=TRUE) -> df_c
  
  # Create an unique idetifier for each cell 
  df_c$position.cell_label<-paste0(df_c$sample, "_", df_c$ROI.label)
  
  # Read the list of cells to exclude (Optional)
  if (missing(cell.list.ex)){
    print("Cells to exclude is empty.")
    df_c -> df.c

  } else {
    cell.list.ex -> df.ex
    df.ex$position.cell_label <- paste0(df.ex$sample, "_", df.ex$ROI.label)
    list.cell.ex <- df.ex$position.cell_label
    print(paste0("Cells to exclude: ", paste(list.cell.ex, collapse = ", ")))
    df_c %>% filter(position.cell_label %notin% list.cell.ex) -> df.c

  }
 
  ## ==============Extract and calculate puncta features==========================================
  ## Puncta molar fraction
  df.c[, paste0("puncta.mol.fraction.", puncta.channel)] <- 
    df.c[ , paste0(puncta.channel, ".integrated.intensity.inside.", puncta.channel, ".puncta")]/ 
    df.c[, paste0(puncta.channel, ".integrated.intensity.per.ROI")]
  
  ## Puncta volume fraction
  df.c[, paste0("puncta.vol.fraction.", puncta.channel)] <- 
  df.c[, paste0("total.", puncta.channel, ".puncta.volume.pix.per.ROI")]/df.c[, "ROI.volume.pix"] 
  
  # Protein concentration in uM (micro molar)
  df.c[, paste0("mean.intensity.per.cell.", puncta.channel)] <- df.c[, paste0(puncta.channel,".mean.intensity.per.ROI")]
  ## Convert to concentration
  df.c[, paste0("protein.conc.per.cell.", puncta.channel)] <- df.c[, paste0(puncta.channel,".mean.intensity.per.ROI")]*factor_conc
  
  # Number of FL puncta per unit nuclear volume
  df.c[, paste0("puncta.number.per.nuclear.vol.", puncta.channel)] <- (df.c[, paste0("number.of.", puncta.channel, ".puncta")]/df.c$ROI.volume.um)*1000
  
  # FL “light phase”, {[Total FL intensity of the cell – Total puncta FL intensity of the same cell]/[nucleus volume – total volume of puncta per cell]}
  df.c[, paste0("light.phase.intensity.", puncta.channel)] <- (df.c[, paste0(puncta.channel, ".integrated.intensity.outside.", puncta.channel, ".puncta")]/
    (df.c$ROI.volume.pix-df.c[ , paste0("total.", puncta.channel, ".puncta.volume.pix.per.ROI")]))
  ## Convert to concentration
  df.c[, paste0("light.phase.conc.", puncta.channel)] <- df.c[, paste0("light.phase.intensity.", puncta.channel)]*factor_conc
  
  # Calculate Vp (average puncta vol.)
  df.c[, paste0("avg.puncta.vol.", puncta.channel)] <- df.c[, paste0("average.", puncta.channel, ".puncta.volume.um.per.ROI")]
  
  # Calculate puncta dense phase conc.
  df.c[, paste0("puncta.dense.phase.intensity.", puncta.channel)] <- (df.c[, paste0(puncta.channel, ".mean.intensity.per.puncta")]/
    df.c[ , paste0("number.of.", puncta.channel, ".puncta")])
  ## Convert to concentration
  df.c[, paste0("puncta.dense.phase.conc.", puncta.channel)] <- df.c[, paste0("puncta.dense.phase.intensity.", puncta.channel)]*factor_conc
  
  # Calculate partition coefficient & transfer free energy
  RT = 0.002*temp # kcal/ mol
  ## Kp=[DP]/[LP]
  df.c[, paste0("Kp.", puncta.channel)] <- df.c[, paste0("puncta.dense.phase.conc.", puncta.channel)]/df.c[, paste0("light.phase.conc.", puncta.channel)] 
  ## Transfer free energy
  df.c[, paste0("DeltaGtr.", puncta.channel)] <- -RT*log(df.c[, paste0("Kp.", puncta.channel)])
  
  # Keep only the relevant features
  cell.data <- df.c[, c("condition", "sample", "ROI.label", 
                        paste0("Mutual.information.", cell.channel, ".vs.", puncta.channel), 
                        paste0("Pearson.correlation.coefficient.", cell.channel, ".vs.", puncta.channel), 
                        paste0("puncta.mol.fraction.", puncta.channel),
                        paste0("puncta.vol.fraction.", puncta.channel),
                        paste0("mean.intensity.per.cell.", puncta.channel),
                        paste0("light.phase.intensity.", puncta.channel),
                        paste0("puncta.dense.phase.intensity.", puncta.channel),
                        paste0("protein.conc.per.cell.", puncta.channel), 
                        paste0("puncta.number.per.nuclear.vol.", puncta.channel), 
                        paste0("light.phase.conc.", puncta.channel), 
                        paste0("avg.puncta.vol.", puncta.channel), 
                        paste0("puncta.dense.phase.conc.", puncta.channel), 
                        paste0("Kp.", puncta.channel), 
                        paste0("DeltaGtr.", puncta.channel))] 
  
  #############################
  # return the results
  res=as.data.frame(cell.data)
  
  attr(res,"cell.channel") = cell.channel
  attr(res,"puncta.channel") = puncta.channel
  attr(res,"factor_conc") = factor_conc
  attr(res,"temp") = temp
  
  return(res)

} # END function
```


```{r}
###############################
##
## Call the function with input files and arguments
##
###############################

process.image.file(cell.data = cell.data,
           puncta.data = puncta.data,
           #cell.list.ex = cell.list.ex, #(optional)
           cell.channel = cell.channel,
           puncta.channel = puncta.channel, 
           factor_conc = factor_conc, 
           temp = temp
           )-> df 

# Print the attributes
attr(df, "cell.channel")
attr(df, "puncta.channel")
attr(df, "factor_conc")
attr(df, "temp")
```

```{r}
head(df)
```


```{r}
df %>% names()
```


```{r}
levels(as.factor(df$condition))
```


```{r}
names(df)
```


```{r}
# write the puncta features into an excel file
write.csv(df, file = paste0(dir0, "puncta_thermo_features.csv"), row.names=FALSE)
```


```{r}
df$condition <- gsub('NHA9_WT', 'NHA9', df$condition)
```


```{r}
## make plots

library(ggplot2)
library(cowplot)
library(ggpubr)

list.const<- c("NHA9", "NHA9_21FGAA", "NHA9_DeltaDNA") 
cols.const<-c("darkgreen", "darkblue", "red") 

plots <- list()
for(nm in names(df)){
    plots[[nm]] <- ggplot(data=df %>% filter(condition %in% list.const)) + 
      geom_point(aes_string(x="protein.conc.per.cell.GFP", y=nm, fill="condition", color="condition"), alpha=0.5, size=1.) +
      scale_fill_manual(values=cols.const, labels = c(expression("G-NHA9"), expression("G-NHA9-21FGAA"), expression("G-NHA9-"*Delta*"DNA"))) +
      scale_color_manual(values=cols.const, labels = c(expression("G-NHA9"), expression("G-NHA9-21FGAA"), expression("G-NHA9-"*Delta*"DNA"))) +
      scale_y_continuous(trans='log10') +
      xlab(expression("Avg. mEGFP conc. ("~mu*M~")")) +
      theme_cowplot(14) +
      border()
}

p1<-plots[["puncta.number.per.nuclear.vol.GFP"]] + ylab(~"Puncta # (/"~10^3~mu*m^3~")")
p2<-plots[["avg.puncta.vol.GFP"]] + ylab(V[p]~"("~mu*m^3~")")
p3<-plots[["puncta.mol.fraction.GFP"]] + ylab(expression(F[mol]))
p4<-plots[["puncta.vol.fraction.GFP"]] + ylab(expression(F[vol]))
p5<-plots[["light.phase.conc.GFP"]] + ylab(~"[LP] ("~mu*M~")")
p6<-plots[["puncta.dense.phase.conc.GFP"]] + ylab(~"[DP] ("~mu*M~")")
p7<-plots[["Kp.GFP"]] + ylab(expression(K[p]))
```


```{r}
plots1 <- list()
for(nm in names(df)){
    plots1[[nm]] <- ggplot(data=df %>% filter(condition %in% list.const)) + 
      geom_point(aes_string(x="protein.conc.per.cell.GFP", y=nm, fill="condition", color="condition"), alpha=0.5, size=1.) +
      scale_fill_manual(values=cols.const, labels = c(expression("G-NHA9"), expression("G-NHA9-21FGAA"), expression("G-NHA9-"*Delta*"DNA"))) +
      scale_color_manual(values=cols.const, labels = c(expression("G-NHA9"), expression("G-NHA9-21FGAA"), expression("G-NHA9-"*Delta*"DNA"))) +
      xlab(expression("Avg. mEGFP conc. ("~mu*M~")")) +
      theme_cowplot(14) +
      border()
}

p8<-plots1[["DeltaGtr.GFP"]] + ylab(expression(Delta*G[Tr]~"("~kcal~mol^{-1}~")"))
```


```{r, fig.width=12.5, fig.height=6}
library(patchwork)
p.c<-(p1 | p2 | p3 | p4) / (p5 | p6 | p7 | p8) & theme(legend.position = "bottom")

png(file = paste0("../../docs/", "puncta_thermo", ".png"), width=12.5, height=6, units="in", res=300)
p.c + plot_layout(guides = "collect") 
dev.off()
```