Skip to content

Commit

Permalink
Pseudobulk replicates: filter and output
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexWeinreb committed Oct 11, 2022
1 parent 4d6b69b commit ea14718
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 67 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ Dataset_*
google-analytics-script2.html
rsconnect*
data/
tests/*
Rplots.pdf
*_data
51 changes: 24 additions & 27 deletions Functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ mean.fxn <- function(x) {
FoldChange <- function (cells.1, cells.2, features) {
thresh.min <- 0
pct.1 <- round(x = rowSums(x = allCells.data[features, cells.1,
drop = FALSE] > thresh.min)/length(x = cells.1), digits = 3)
drop = FALSE] > thresh.min)/length(x = cells.1), digits = 3)
pct.2 <- round(x = rowSums(x = allCells.data[features, cells.2,
drop = FALSE] > thresh.min)/length(x = cells.2), digits = 3)
drop = FALSE] > thresh.min)/length(x = cells.2), digits = 3)
data.1 <- mean.fxn(allCells.data[features, cells.1, drop = FALSE])
data.2 <- mean.fxn(allCells.data[features, cells.2, drop = FALSE])
fc <- (data.1 - data.2)
Expand Down Expand Up @@ -113,13 +113,17 @@ perform_de_sc <- function(ident.1 , ident.2, min.pct = 0.1, min.diff.pct = -Inf,


p_val_adj = p.adjust(p_val, method = "bonferroni", n = nrow(allCells.data))

data.frame(gene = features,
pct.1 = fc.results[features,]$pct.1,
pct.2 = fc.results[features,]$pct.2,
avg_logFC = fc.results[features,]$avg_log2FC,
p_val = p_val,
p_val_adj = p_val_adj)
p_val_adj = p_val_adj) |>
dplyr::arrange(p_val_adj, p_val, desc(abs(avg_logFC))) |>
mutate(p_val = signif(p_val, 2),
p_val_adj = signif(p_val_adj, 2),
avg_logFC = round(avg_logFC, 1))
}


Expand All @@ -146,13 +150,19 @@ perform_de_pb_wilcoxon <- function(ident.1, ident.2, ...){
statistics = data.use[x, ])), 1)
})

p_val_adj <- p.adjust(p_val, method = "bonferroni")
FDR <- p.adjust(p_val, method = "BH")
data.frame(gene = rownames(pseudobulk_matrix),
mean_1 = mean_1,
mean_2 = mean_2,
log2FC = log2FC,
p_val = p_val,
p_val_adj = p_val_adj)
FDR = FDR) |>
dplyr::arrange(FDR, p_val, desc(abs(log2FC))) |>
mutate(p_val = signif(p_val, 2),
FDR = signif(FDR, 2),
log2FC = round(log2FC, 1),
across(contains("mean"),
~ round(.x, 1)))
}


Expand All @@ -162,14 +172,18 @@ perform_de_pb_edger <- function(ident.1, ident.2, ...){

load_as_needed("edger_precomputed")

et <- exactTest(edger_precomputed, pair = c(ident.1, ident.2))
et <- exactTest(edger_precomputed, pair = c(ident.2, ident.1))

et$table |>
tibble::rownames_to_column() |>
dplyr::mutate(p_val_adj = p.adjust(PValue, method = "bonferroni")) |>
dplyr::mutate(p_val_adj = p.adjust(PValue, method = "BH")) |>
dplyr::rename(gene = rowname,
p_val = PValue,
p_val_adj = p_val_adj)
FDR = p_val_adj) |>
dplyr::arrange(FDR, p_val) |>
mutate(p_val = signif(p_val, 2),
FDR = signif(FDR, 2),
logFC = round(logFC, 1))
}


Expand All @@ -196,24 +210,7 @@ perform_de <- function(ident.1, ident.2, method, ...){

# finish

tableDEX <-
merge(
tableDEX,
gene_list,
by.x = "gene",
by.y = "gene_id",
all.x = TRUE
) |>
dplyr::arrange(p_val_adj) |>
mutate(across(starts_with("p_val"),
~ signif(.x, 2)),
across(contains("log"),
~ round(.x, 1)),
across(contains("mean"),
~ round(.x, 1)))


tableDEX
left_join(tableDEX, gene_list, by = c("gene" = "gene_id"))
}

# # Tests
Expand Down
26 changes: 18 additions & 8 deletions save_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,24 @@ pseudobulk_matrix <- AggregateExpression(allCells,
slot = "counts",
group.by = c("Neuron.type", "SampleID"))[["RNA"]]

pseudosamples <- as.character(colnames(pseudobulk_matrix))
regx <- stringr::str_match(pseudosamples,
"^([A-Za-z0-9\\-_]+)_([0-9]{4}-ST-[12])$")
pseudobulk_metadata <- data.frame(sample = pseudosamples,
cell_type = regx[,2],
batch = regx[,3])

rm(regx); rm(pseudosamples)
pseudosample_counts <- allCells@meta.data |>
count(Neuron.type, SampleID) |>
mutate(sample_id = paste(Neuron.type, SampleID, sep = "_"))


stopifnot(all.equal(pseudosample_counts$sample_id,
as.character(colnames(pseudobulk_matrix))))

# filter out replicates with <10 single cells
pseudobulk_matrix <- pseudobulk_matrix[,pseudosample_counts$sample_id[pseudosample_counts$n > 10]]

pseudobulk_metadata <- pseudosample_counts |>
filter(n > 10) |>
rename(sample_id = sample_id,
cell_type = Neuron.type,
batch = SampleID,
nb_single_cells = n)


# Precompute edgeR object
library(edgeR)
Expand Down
75 changes: 45 additions & 30 deletions server.R
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,51 @@ server <- function(input, output) {

} else{


if (any(b2 == "ALL")){
print("testing vs ALL")

b2 <- all_cell_types |> setdiff(b1)

}

if (any(b2 == "NEURONS")){
print("Testing against all neurons.")

load_as_needed("allCells.metadata")

b2 <- all_neuron_types |> setdiff(b1) #does NOT contain "_stressed" neurons
}

tableDEX <-
perform_de(
ident.1 = b1,
ident.2 = b2,
method = input$test
)


if(input$test == "Pseudobulk: edgeR pairwise exact test" ||
input$test == "Pseudobulk: Wilcoxon"){

load_as_needed("edger_precomputed")

rows_b1 <- edger_precomputed$samples$group %in% b1
rows_b2 <- edger_precomputed$samples$group %in% b2

nb_sc_group_1 <- edger_precomputed$samples$nb_single_cells[rows_b1] |> sum()
nb_sc_group_2 <- edger_precomputed$samples$nb_single_cells[rows_b2] |> sum()
nb_rep_group_1 <- edger_precomputed$samples$nb_single_cells[rows_b1] |> length()
nb_rep_group_2 <- edger_precomputed$samples$nb_single_cells[rows_b2] |> length()

output$pseudobulk_metadata <- renderText({
paste0("Comparing group 1 (",nb_sc_group_1," single cells in ",nb_rep_group_1,
" replicates) to group 2 (",nb_sc_group_2," single cells in ",
nb_rep_group_2," replicates)")
}, sep = "<br>")
} else{output$pseudobulk_metadata <- renderText({""}, sep = "<br>")}


output$text_error_dex <- renderText({""})
output$legend_de_columns <- renderText({
if(input$test == "Wilcoxon on single cells"){
Expand Down Expand Up @@ -432,38 +477,8 @@ server <- function(input, output) {
}
}, sep = "<br>")

if (!any(b2 %in% c("ALL", "NEURONS"))){
tableDEX <-
perform_de(
ident.1 = b1,
ident.2 = b2,
method = input$test
)
}

if (any(b2 == "ALL")){
print("testing vs ALL")

tableDEX <-
perform_de(
ident.1 = b1,
ident.2 = all_cell_types |> setdiff(b1),
method = input$test
)
}

if (any(b2 == "NEURONS")){
print("Testing against all neurons.")

load_as_needed("allCells.metadata")

tableDEX <-
perform_de(
ident.1 = b1,
ident.2 = all_neuron_types |> setdiff(b1), #does NOT contain "_stressed" neurons
method = input$test
)
}

# Finalize output
if (nrow(tableDEX) > 0) {
Expand Down
5 changes: 3 additions & 2 deletions ui.R
Original file line number Diff line number Diff line change
Expand Up @@ -360,14 +360,15 @@ ui <- fluidPage(
)
),
br(),
htmlOutput("legend_de_columns", container = h6),
htmlOutput("pseudobulk_metadata", container = h6),
br(),

span(textOutput("text_error_dex"), style =
"color:red"),
br(),
DT::dataTableOutput("MarkTable_Batch"),
downloadButton('downloadDEX', "Download table")
downloadButton('downloadDEX', "Download table"),
htmlOutput("legend_de_columns", container = h6)
)
),

Expand Down

0 comments on commit ea14718

Please sign in to comment.