diff --git a/.gitignore b/.gitignore index 90e7e7e..1226dda 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ Dataset_* google-analytics-script2.html rsconnect* data/ +tests/* +Rplots.pdf +*_data diff --git a/Functions.R b/Functions.R index 3ea8574..3335796 100644 --- a/Functions.R +++ b/Functions.R @@ -53,9 +53,9 @@ mean.fxn <- function(x) { FoldChange <- function (cells.1, cells.2, features) { thresh.min <- 0 pct.1 <- round(x = rowSums(x = allCells.data[features, cells.1, - drop = FALSE] > thresh.min)/length(x = cells.1), digits = 3) + drop = FALSE] > thresh.min)/length(x = cells.1), digits = 3) pct.2 <- round(x = rowSums(x = allCells.data[features, cells.2, - drop = FALSE] > thresh.min)/length(x = cells.2), digits = 3) + drop = FALSE] > thresh.min)/length(x = cells.2), digits = 3) data.1 <- mean.fxn(allCells.data[features, cells.1, drop = FALSE]) data.2 <- mean.fxn(allCells.data[features, cells.2, drop = FALSE]) fc <- (data.1 - data.2) @@ -113,13 +113,17 @@ perform_de_sc <- function(ident.1 , ident.2, min.pct = 0.1, min.diff.pct = -Inf, p_val_adj = p.adjust(p_val, method = "bonferroni", n = nrow(allCells.data)) - + data.frame(gene = features, pct.1 = fc.results[features,]$pct.1, pct.2 = fc.results[features,]$pct.2, avg_logFC = fc.results[features,]$avg_log2FC, p_val = p_val, - p_val_adj = p_val_adj) + p_val_adj = p_val_adj) |> + dplyr::arrange(p_val_adj, p_val, desc(abs(avg_logFC))) |> + mutate(p_val = signif(p_val, 2), + p_val_adj = signif(p_val_adj, 2), + avg_logFC = round(avg_logFC, 1)) } @@ -146,13 +150,19 @@ perform_de_pb_wilcoxon <- function(ident.1, ident.2, ...){ statistics = data.use[x, ])), 1) }) - p_val_adj <- p.adjust(p_val, method = "bonferroni") + FDR <- p.adjust(p_val, method = "BH") data.frame(gene = rownames(pseudobulk_matrix), mean_1 = mean_1, mean_2 = mean_2, log2FC = log2FC, p_val = p_val, - p_val_adj = p_val_adj) + FDR = FDR) |> + dplyr::arrange(FDR, p_val, desc(abs(log2FC))) |> + mutate(p_val = signif(p_val, 2), + FDR = signif(FDR, 2), + log2FC = round(log2FC, 1), + across(contains("mean"), + ~ round(.x, 1))) } @@ -162,14 +172,18 @@ perform_de_pb_edger <- function(ident.1, ident.2, ...){ load_as_needed("edger_precomputed") - et <- exactTest(edger_precomputed, pair = c(ident.1, ident.2)) + et <- exactTest(edger_precomputed, pair = c(ident.2, ident.1)) et$table |> tibble::rownames_to_column() |> - dplyr::mutate(p_val_adj = p.adjust(PValue, method = "bonferroni")) |> + dplyr::mutate(p_val_adj = p.adjust(PValue, method = "BH")) |> dplyr::rename(gene = rowname, p_val = PValue, - p_val_adj = p_val_adj) + FDR = p_val_adj) |> + dplyr::arrange(FDR, p_val) |> + mutate(p_val = signif(p_val, 2), + FDR = signif(FDR, 2), + logFC = round(logFC, 1)) } @@ -196,24 +210,7 @@ perform_de <- function(ident.1, ident.2, method, ...){ # finish - tableDEX <- - merge( - tableDEX, - gene_list, - by.x = "gene", - by.y = "gene_id", - all.x = TRUE - ) |> - dplyr::arrange(p_val_adj) |> - mutate(across(starts_with("p_val"), - ~ signif(.x, 2)), - across(contains("log"), - ~ round(.x, 1)), - across(contains("mean"), - ~ round(.x, 1))) - - - tableDEX + left_join(tableDEX, gene_list, by = c("gene" = "gene_id")) } # # Tests diff --git a/save_data.R b/save_data.R index 2f0733c..7e8e838 100644 --- a/save_data.R +++ b/save_data.R @@ -59,14 +59,24 @@ pseudobulk_matrix <- AggregateExpression(allCells, slot = "counts", group.by = c("Neuron.type", "SampleID"))[["RNA"]] -pseudosamples <- as.character(colnames(pseudobulk_matrix)) -regx <- stringr::str_match(pseudosamples, - "^([A-Za-z0-9\\-_]+)_([0-9]{4}-ST-[12])$") -pseudobulk_metadata <- data.frame(sample = pseudosamples, - cell_type = regx[,2], - batch = regx[,3]) - -rm(regx); rm(pseudosamples) +pseudosample_counts <- allCells@meta.data |> + count(Neuron.type, SampleID) |> + mutate(sample_id = paste(Neuron.type, SampleID, sep = "_")) + + +stopifnot(all.equal(pseudosample_counts$sample_id, + as.character(colnames(pseudobulk_matrix)))) + +# filter out replicates with <10 single cells +pseudobulk_matrix <- pseudobulk_matrix[,pseudosample_counts$sample_id[pseudosample_counts$n > 10]] + +pseudobulk_metadata <- pseudosample_counts |> + filter(n > 10) |> + rename(sample_id = sample_id, + cell_type = Neuron.type, + batch = SampleID, + nb_single_cells = n) + # Precompute edgeR object library(edgeR) diff --git a/server.R b/server.R index 3492b19..5d51c68 100644 --- a/server.R +++ b/server.R @@ -403,6 +403,51 @@ server <- function(input, output) { } else{ + + if (any(b2 == "ALL")){ + print("testing vs ALL") + + b2 <- all_cell_types |> setdiff(b1) + + } + + if (any(b2 == "NEURONS")){ + print("Testing against all neurons.") + + load_as_needed("allCells.metadata") + + b2 <- all_neuron_types |> setdiff(b1) #does NOT contain "_stressed" neurons + } + + tableDEX <- + perform_de( + ident.1 = b1, + ident.2 = b2, + method = input$test + ) + + + if(input$test == "Pseudobulk: edgeR pairwise exact test" || + input$test == "Pseudobulk: Wilcoxon"){ + + load_as_needed("edger_precomputed") + + rows_b1 <- edger_precomputed$samples$group %in% b1 + rows_b2 <- edger_precomputed$samples$group %in% b2 + + nb_sc_group_1 <- edger_precomputed$samples$nb_single_cells[rows_b1] |> sum() + nb_sc_group_2 <- edger_precomputed$samples$nb_single_cells[rows_b2] |> sum() + nb_rep_group_1 <- edger_precomputed$samples$nb_single_cells[rows_b1] |> length() + nb_rep_group_2 <- edger_precomputed$samples$nb_single_cells[rows_b2] |> length() + + output$pseudobulk_metadata <- renderText({ + paste0("Comparing group 1 (",nb_sc_group_1," single cells in ",nb_rep_group_1, + " replicates) to group 2 (",nb_sc_group_2," single cells in ", + nb_rep_group_2," replicates)") + }, sep = "
") + } else{output$pseudobulk_metadata <- renderText({""}, sep = "
")} + + output$text_error_dex <- renderText({""}) output$legend_de_columns <- renderText({ if(input$test == "Wilcoxon on single cells"){ @@ -432,38 +477,8 @@ server <- function(input, output) { } }, sep = "
") - if (!any(b2 %in% c("ALL", "NEURONS"))){ - tableDEX <- - perform_de( - ident.1 = b1, - ident.2 = b2, - method = input$test - ) - } - if (any(b2 == "ALL")){ - print("testing vs ALL") - - tableDEX <- - perform_de( - ident.1 = b1, - ident.2 = all_cell_types |> setdiff(b1), - method = input$test - ) - } - if (any(b2 == "NEURONS")){ - print("Testing against all neurons.") - - load_as_needed("allCells.metadata") - - tableDEX <- - perform_de( - ident.1 = b1, - ident.2 = all_neuron_types |> setdiff(b1), #does NOT contain "_stressed" neurons - method = input$test - ) - } # Finalize output if (nrow(tableDEX) > 0) { diff --git a/ui.R b/ui.R index 85d2541..f9afe3d 100644 --- a/ui.R +++ b/ui.R @@ -360,14 +360,15 @@ ui <- fluidPage( ) ), br(), - htmlOutput("legend_de_columns", container = h6), + htmlOutput("pseudobulk_metadata", container = h6), br(), span(textOutput("text_error_dex"), style = "color:red"), br(), DT::dataTableOutput("MarkTable_Batch"), - downloadButton('downloadDEX', "Download table") + downloadButton('downloadDEX', "Download table"), + htmlOutput("legend_de_columns", container = h6) ) ),