Pseudobulk replicates: filter and output

cengenproject · Oct 11, 2022 · ea14718 · ea14718
1 parent 4d6b69b
commit ea14718
Show file tree

Hide file tree

Showing 5 changed files with 93 additions and 67 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,6 @@ Dataset_*
 google-analytics-script2.html
 rsconnect*
 data/
+tests/*
+Rplots.pdf
+*_data
diff --git a/Functions.R b/Functions.R
@@ -53,9 +53,9 @@ mean.fxn <- function(x) {
 FoldChange <- function (cells.1, cells.2, features) {
   thresh.min <- 0
   pct.1 <- round(x = rowSums(x = allCells.data[features, cells.1, 
-                                        drop = FALSE] > thresh.min)/length(x = cells.1), digits = 3)
+                                               drop = FALSE] > thresh.min)/length(x = cells.1), digits = 3)
   pct.2 <- round(x = rowSums(x = allCells.data[features, cells.2, 
-                                        drop = FALSE] > thresh.min)/length(x = cells.2), digits = 3)
+                                               drop = FALSE] > thresh.min)/length(x = cells.2), digits = 3)
   data.1 <- mean.fxn(allCells.data[features, cells.1, drop = FALSE])
   data.2 <- mean.fxn(allCells.data[features, cells.2, drop = FALSE])
   fc <- (data.1 - data.2)
@@ -113,13 +113,17 @@ perform_de_sc <- function(ident.1 , ident.2, min.pct = 0.1, min.diff.pct = -Inf,
 
 
   p_val_adj = p.adjust(p_val, method = "bonferroni", n = nrow(allCells.data))
-
+  
   data.frame(gene = features,
              pct.1 = fc.results[features,]$pct.1,
              pct.2 = fc.results[features,]$pct.2,
              avg_logFC = fc.results[features,]$avg_log2FC,
              p_val = p_val,
-             p_val_adj = p_val_adj)
+             p_val_adj = p_val_adj) |>
+    dplyr::arrange(p_val_adj, p_val, desc(abs(avg_logFC))) |>
+    mutate(p_val = signif(p_val, 2),
+           p_val_adj = signif(p_val_adj, 2),
+           avg_logFC = round(avg_logFC, 1))
 }
 
 
@@ -146,13 +150,19 @@ perform_de_pb_wilcoxon <- function(ident.1, ident.2, ...){
                                                                   statistics = data.use[x, ])), 1)
                   })
 
-  p_val_adj <- p.adjust(p_val, method = "bonferroni")
+  FDR <- p.adjust(p_val, method = "BH")
   data.frame(gene = rownames(pseudobulk_matrix),
              mean_1 = mean_1,
              mean_2 = mean_2,
              log2FC = log2FC,
              p_val = p_val,
-             p_val_adj = p_val_adj)
+             FDR = FDR) |>
+    dplyr::arrange(FDR, p_val, desc(abs(log2FC))) |>
+    mutate(p_val = signif(p_val, 2),
+           FDR = signif(FDR, 2),
+           log2FC = round(log2FC, 1),
+           across(contains("mean"),
+                  ~ round(.x, 1)))
 }
 
 
@@ -162,14 +172,18 @@ perform_de_pb_edger <- function(ident.1, ident.2, ...){
 
   load_as_needed("edger_precomputed")
 
-  et <- exactTest(edger_precomputed, pair = c(ident.1, ident.2))
+  et <- exactTest(edger_precomputed, pair = c(ident.2, ident.1))
 
   et$table |>
     tibble::rownames_to_column() |>
-    dplyr::mutate(p_val_adj = p.adjust(PValue, method = "bonferroni")) |>
+    dplyr::mutate(p_val_adj = p.adjust(PValue, method = "BH")) |>
     dplyr::rename(gene = rowname,
                   p_val = PValue,
-                  p_val_adj = p_val_adj)
+                  FDR = p_val_adj) |>
+    dplyr::arrange(FDR, p_val) |>
+    mutate(p_val = signif(p_val, 2),
+           FDR = signif(FDR, 2),
+           logFC = round(logFC, 1))
 }
 
 
@@ -196,24 +210,7 @@ perform_de <- function(ident.1, ident.2, method, ...){
 
   # finish
 
-  tableDEX <-
-    merge(
-      tableDEX,
-      gene_list,
-      by.x = "gene",
-      by.y = "gene_id",
-      all.x = TRUE
-    ) |>
-    dplyr::arrange(p_val_adj) |>
-    mutate(across(starts_with("p_val"),
-                  ~ signif(.x, 2)),
-           across(contains("log"),
-                  ~ round(.x, 1)),
-           across(contains("mean"),
-                  ~ round(.x, 1)))
-
-
-  tableDEX
+  left_join(tableDEX, gene_list, by = c("gene" = "gene_id"))
 }
 
 # # Tests

diff --git a/save_data.R b/save_data.R
@@ -59,14 +59,24 @@ pseudobulk_matrix <- AggregateExpression(allCells,
                                   slot = "counts",
                                   group.by = c("Neuron.type", "SampleID"))[["RNA"]]
 
-pseudosamples <- as.character(colnames(pseudobulk_matrix))
-regx <- stringr::str_match(pseudosamples,
-                           "^([A-Za-z0-9\\-_]+)_([0-9]{4}-ST-[12])$")
-pseudobulk_metadata <- data.frame(sample = pseudosamples,
-                                  cell_type = regx[,2],
-                                  batch = regx[,3])
-
-rm(regx); rm(pseudosamples)
+pseudosample_counts <- allCells@meta.data |>
+  count(Neuron.type, SampleID) |>
+  mutate(sample_id = paste(Neuron.type, SampleID, sep = "_"))
+
+
+stopifnot(all.equal(pseudosample_counts$sample_id,
+                    as.character(colnames(pseudobulk_matrix))))
+
+# filter out replicates with <10 single cells
+pseudobulk_matrix <- pseudobulk_matrix[,pseudosample_counts$sample_id[pseudosample_counts$n > 10]]
+
+pseudobulk_metadata <- pseudosample_counts |>
+  filter(n > 10) |>
+  rename(sample_id = sample_id,
+         cell_type = Neuron.type,
+         batch = SampleID,
+         nb_single_cells = n)
+
 
 # Precompute edgeR object
 library(edgeR)

diff --git a/server.R b/server.R
@@ -403,6 +403,51 @@ server <- function(input, output) {
 
     } else{
 
+
+      if (any(b2 == "ALL")){
+        print("testing vs ALL")
+
+        b2 <- all_cell_types |> setdiff(b1)
+
+      }
+
+      if (any(b2 == "NEURONS")){
+        print("Testing against all neurons.")
+
+        load_as_needed("allCells.metadata")
+
+        b2 <- all_neuron_types |> setdiff(b1) #does NOT contain "_stressed" neurons
+      }
+
+      tableDEX <-
+        perform_de(
+          ident.1 = b1,
+          ident.2 = b2,
+          method = input$test
+        )
+
+
+      if(input$test == "Pseudobulk: edgeR pairwise exact test" ||
+         input$test == "Pseudobulk: Wilcoxon"){
+
+        load_as_needed("edger_precomputed")
+
+        rows_b1 <- edger_precomputed$samples$group %in% b1
+        rows_b2 <- edger_precomputed$samples$group %in% b2
+
+        nb_sc_group_1 <- edger_precomputed$samples$nb_single_cells[rows_b1] |> sum()
+        nb_sc_group_2 <- edger_precomputed$samples$nb_single_cells[rows_b2] |> sum()
+        nb_rep_group_1 <- edger_precomputed$samples$nb_single_cells[rows_b1] |> length()
+        nb_rep_group_2 <- edger_precomputed$samples$nb_single_cells[rows_b2] |> length()
+
+        output$pseudobulk_metadata <- renderText({
+          paste0("Comparing group 1 (",nb_sc_group_1," single cells in ",nb_rep_group_1,
+                 " replicates) to group 2 (",nb_sc_group_2," single cells in ",
+                 nb_rep_group_2," replicates)")
+        }, sep = "<br>")
+      } else{output$pseudobulk_metadata <- renderText({""}, sep = "<br>")}
+
+
       output$text_error_dex <- renderText({""})
       output$legend_de_columns <- renderText({
         if(input$test == "Wilcoxon on single cells"){
@@ -432,38 +477,8 @@ server <- function(input, output) {
         }
       }, sep = "<br>")
 
-      if (!any(b2 %in% c("ALL", "NEURONS"))){
-        tableDEX <-
-          perform_de(
-            ident.1 = b1,
-            ident.2 = b2,
-            method = input$test
-          )
-      }
 
-      if (any(b2 == "ALL")){
-        print("testing vs ALL")
-
-        tableDEX <-
-          perform_de(
-            ident.1 = b1,
-            ident.2 = all_cell_types |> setdiff(b1),
-            method = input$test
-          )
-      }
 
-      if (any(b2 == "NEURONS")){
-        print("Testing against all neurons.")
-
-        load_as_needed("allCells.metadata")
-
-        tableDEX <-
-          perform_de(
-            ident.1 = b1,
-            ident.2 = all_neuron_types |> setdiff(b1), #does NOT contain "_stressed" neurons
-            method = input$test
-          )
-      }
 
       # Finalize output
       if (nrow(tableDEX) > 0) {

diff --git a/ui.R b/ui.R
@@ -360,14 +360,15 @@ ui <- fluidPage(
           )
         ),
         br(),
-        htmlOutput("legend_de_columns", container = h6),
+        htmlOutput("pseudobulk_metadata", container = h6),
         br(),
 
         span(textOutput("text_error_dex"), style =
                "color:red"),
         br(),
         DT::dataTableOutput("MarkTable_Batch"),
-        downloadButton('downloadDEX', "Download table")
+        downloadButton('downloadDEX', "Download table"),
+        htmlOutput("legend_de_columns", container = h6)
       )
     ),