-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
470 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
##### Name: composition.R | ||
##### Version: 1.0 | ||
##### Description: Composition plots by sample | ||
##### Date of Creation: Jan-2023 | ||
##### Author: Natália Faraj Murad | ||
##### E-MAIL: [email protected] | ||
##### PROJECT: https://github.com/natmurad/amplicon16S | ||
|
||
|
||
############################ | ||
###--- Load libraries ---### | ||
############################ | ||
library(tidyverse) # data manipulation | ||
library(dplyr) # data manipulation | ||
library(tidyr) # data manipulation | ||
library(ggrepel) | ||
library(ggtree) # phylogenetic trees visualization | ||
library(ape) # phylogenetic trees manipulation | ||
library(ggplot2) | ||
library(ggthemes) # colors | ||
library(RColorBrewer) | ||
library(qiime2R) # read qiime2 outputs | ||
library(phyloseq) | ||
############################ | ||
|
||
|
||
###--- Graph config ---### | ||
theme_set(theme_bw()) | ||
|
||
###--- Set directory ---### | ||
setwd("/Users/natmurad/Documents/") | ||
outDir <- "/Users/natmurad/Documents/results/" | ||
|
||
###--- Read files ---### | ||
# Create a Phyloseq Object | ||
physeq<-qza_to_phyloseq( | ||
features="./DADA2_denoising_output/table.qza", | ||
tree="rooted_tree.qza", | ||
"taxonomy.qza", | ||
metadata = "q2metadata.tsv" | ||
) | ||
|
||
###--- Sample Composition ---### | ||
|
||
###--- Filtering ---### | ||
# Remove OTUs that do not show appear more than 5 times in more than half the samples | ||
wh0 = genefilter_sample(physeq, filterfun_sample(function(x) x > 5), A=0.5*nsamples(physeq)) | ||
physeq_filt = prune_taxa(wh0, physeq) | ||
|
||
# Transform to even sampling depth. | ||
physeq_filt = transform_sample_counts(physeq_filt, function(x) 1E6 * x/sum(x)) | ||
|
||
# Keep only the most abundant five phyla. | ||
phylum.sum = tapply(taxa_sums(physeq_filt), tax_table(physeq_filt)[, "Phylum"], sum, na.rm=TRUE) | ||
top5phyla = names(sort(phylum.sum, TRUE))[1:5] | ||
physeq_filt = prune_taxa((tax_table(physeq_filt)[, "Phylum"] %in% top5phyla), physeq_filt) | ||
|
||
### dendrogram | ||
tree <- plot_tree(physeq_filt, color = "Subgroup1", label.tips = "Genus", size = "abundance", | ||
plot.margin = 0.2, ladderize = TRUE, sizebase = 2) + | ||
scale_colour_tableau("Classic 10") + | ||
ggtitle("Phylogenetic Tree - Genus") + | ||
theme(plot.title = element_text(hjust = 0.5)) + labs(colour = "Condition") | ||
|
||
ggsave("treeGenus.pdf", plot = tree, | ||
device = "pdf", path = outDir, width = 15, height = 15, | ||
units = "in") | ||
|
||
###--- barplot composition ---### | ||
# scaling | ||
# set seed | ||
set.seed(1) | ||
|
||
# scaling the mouse data to the smallest samples. Note: rngseed is similar to set.seed | ||
physeq_scaled <- rarefy_even_depth(physeq_filt,sample.size=19000, replace=FALSE, rngseed = 1) | ||
|
||
###--- Organizing treatments in an order ---### | ||
sample_data(physeq_scaled)$Subgroup1 <- factor((sample_data(physeq_scaled)$Subgroup1), levels=c("Control","AD","NDGA","AD_NDGA")) | ||
|
||
# labels for the samples | ||
rownames <- sample_data(physeq_scaled)[,"customer_label"] | ||
dim(otu_table(physeq_scaled)) | ||
colourCount <- length(unique(tax_table(physeq_scaled)[,"Genus"])) # number of genus for colors | ||
|
||
getPalette = colorRampPalette(brewer.pal(10, "Paired")) # create color palette | ||
|
||
###--- bar plot ---### | ||
composition <- plot_bar(physeq_scaled, x = "customer_label", fill = "Genus") + | ||
facet_grid(~Subgroup1, scales="free_x") + | ||
scale_fill_manual(values=getPalette(colourCount))+ | ||
ggtitle("Sample Composition - Genus") + | ||
theme(plot.title = element_text(hjust = 0.5)) | ||
|
||
ggsave("sampleComposition.pdf", plot = composition, | ||
device = "pdf", path = outDir, width = 12, height = 8.5, | ||
units = "in") | ||
|
||
|
||
#p <- plot_composition(physeq, | ||
# "Family", | ||
# numberOfTaxa = 30, fill = Subgroup1) | ||
## plot facetting | ||
#p <- p + facet_wrap(~EnvType, | ||
#scales = "free_x" | ||
#, nrow = 1) | ||
#plot(p) | ||
|
||
|
||
###--- Heatmap ---### | ||
|
||
# from here: https://forum.qiime2.org/t/tutorial-integrating-qiime2-and-r-for-data-visualization-and-analysis-using-qiime2r/4121 | ||
|
||
metadata<-read_q2metadata("q2metadata.tsv") | ||
countsTable<-read_qza("./DADA2_denoising_output/table.qza")$data | ||
taxonomy<-read_qza("taxonomy.qza")$data | ||
|
||
countsTable<-apply(countsTable, 2, function(x) x/sum(x)*100) # convert to percent | ||
|
||
countsToPlot<- | ||
data.frame(MeanAbundance=rowMeans(countsTable)) %>% # find the average abundance of a SV | ||
rownames_to_column("Feature.ID") %>% | ||
arrange(desc(MeanAbundance)) %>% | ||
top_n(40, MeanAbundance) %>% | ||
pull(Feature.ID) # extract only the names from the table | ||
|
||
heatmap <- countsTable %>% | ||
as.data.frame() %>% | ||
rownames_to_column("Feature.ID") %>% | ||
gather(-Feature.ID, key="SampleID", value="Abundance") %>% | ||
mutate(Feature.ID=if_else(Feature.ID %in% countsToPlot, Feature.ID, "Remainder")) %>% # flag features to be collapsed | ||
group_by(SampleID, Feature.ID) %>% | ||
summarize(Abundance=sum(Abundance)) %>% | ||
left_join(metadata) %>% | ||
mutate(NormAbundance=log10(Abundance+0.01)) %>% # do a log10 transformation after adding a 0.01% pseudocount. Could also add 1 read before transformation to percent | ||
left_join(taxonomy) %>% | ||
mutate(Feature=paste(Feature.ID, Taxon)) %>% | ||
mutate(Feature=gsub("[kpcofgs]__", "", Feature)) %>% # trim out leading text from taxonomy string | ||
ggplot(aes(x=customer_label, y=Taxon, fill=NormAbundance)) + | ||
geom_tile() + | ||
facet_grid(~Subgroup1, scales="free_x") + | ||
theme(axis.text.x=element_text(angle=45, hjust=1)) + | ||
scale_fill_viridis_c(name="log10(% Abundance)") | ||
|
||
ggsave("heatmapSpecies.pdf", plot = heatmap, | ||
device = "pdf", path = outDir, width = 14, height = 8.5, | ||
units = "in") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
##### Name: differentalAbundance.R | ||
##### Version: 1.0 | ||
##### Description: Differential abundance analysis across groups/samples | ||
##### Date of Creation: Jan-2023 | ||
##### Author: Natália Faraj Murad | ||
##### E-MAIL: [email protected] | ||
##### PROJECT: https://github.com/natmurad/amplicon16S | ||
|
||
###--- Load libraries ---### | ||
library(tidyverse) # data manipulation | ||
library(dplyr) # data manipulation | ||
library(tidyr) # data manipulation | ||
library(EnhancedVolcano) # plot volcano | ||
library(DT) # visualize tables | ||
library(htmltools) # export html | ||
library(ggrepel) # for offset labels | ||
library(ggplot2) | ||
library(ggthemes) # colors | ||
library(ggpubr) # add stats on plots | ||
library(qiime2R) # read qiime2 outputs | ||
library(metagenomeSeq) # for differential abundance analysis - zero inflated model | ||
|
||
###--- Graph config ---### | ||
theme_set(theme_bw()) | ||
|
||
###--- Set directory ---### | ||
###--- Set directory ---### | ||
setwd("/Users/natmurad/Documents/") | ||
outDir <- "/Users/natmurad/Documents/results/" | ||
|
||
# Creating a Phyloseq Object | ||
physeq<-qza_to_phyloseq( | ||
features="./DADA2_denoising_output/table.qza", | ||
tree="rooted_tree.qza", | ||
"taxonomy.qza", | ||
metadata = "q2metadata.tsv" | ||
) | ||
|
||
###--- Conversions needed ---### | ||
##--- metagenomeseq ---## | ||
mtseq <- phyloseq_to_metagenomeSeq(physeq) | ||
|
||
#################################### | ||
###--- Differential Abundance ---### | ||
#################################### | ||
|
||
###--- Filtering ---### | ||
#filterData(obj, present = 10, depth = 1000) | ||
|
||
###--- Calculating normalization factors ---### | ||
p = cumNormStatFast(mtseq) | ||
mtseq = cumNorm(mtseq, p = p) | ||
|
||
###--- Statistical testing ---### | ||
###--- Zero-inflated Gaussian Mixture model ---### | ||
|
||
|
||
normFactor = normFactors(mtseq) | ||
normFactor = log2(normFactor/median(normFactor) + 1) | ||
|
||
# maxit=1 is for demonstration purposes | ||
settings = zigControl(maxit = 1, verbose = FALSE) | ||
mod = model.matrix(~ 0 + Subgroup1, pData(mtseq)) | ||
#colnames(mod) = levels(as.factor(treat)) | ||
# fitting the ZIG model | ||
res = fitZig(obj = mtseq, mod = mod, control = settings) | ||
|
||
# The output of fitZig contains a list of various useful | ||
# items. hint: names(res). Probably the most useful is the | ||
# limma 'MLArrayLM' object called fit. | ||
zigFit = slot(res, "fit") | ||
finalMod = slot(res, "fit")$design | ||
|
||
# Ad x Control; Ad_Drug x Ad; Drug x Control | ||
|
||
contrasts <- combn(colnames(finalMod)[1:length(colnames(finalMod))-1], 2) | ||
updn_cols <- c(RColorBrewer::brewer.pal(6, 'Blues')[3], RColorBrewer::brewer.pal(6, 'Reds')[3]) | ||
|
||
for(i in 1:ncol(contrasts)){ | ||
x <- paste0(contrasts[2,i], "-", contrasts[1,i]) | ||
contrast.matrix = makeContrasts(x, levels = finalMod) | ||
fit2 = contrasts.fit(zigFit, contrast.matrix) | ||
fit2 = eBayes(fit2) | ||
table <- topTable(fit2, sort="none",n=Inf) | ||
table$seq <- rownames(table) | ||
table <- merge(table, fData(mtseq)[,c("Genus", "Species")], by = "row.names") | ||
write.csv(table, paste0(outDir, gsub("Subgroup1", "", paste0(contrasts[2,i], "vs", contrasts[1,i])), '.csv')) | ||
html <- table %>% | ||
datatable(caption = gsub("Subgroup1", "", paste0(contrasts[2,i], " vs ", contrasts[1,i]))) %>% | ||
DT::formatStyle('logFC', | ||
valueColumns = 'logFC', | ||
backgroundColor = DT::styleInterval(0, rev(updn_cols))) %>% | ||
DT::formatSignif(1:6, digits = 4) | ||
save_html(html, file = paste0(outDir, gsub("Subgroup1", "", paste0(contrasts[2,i], "vs", contrasts[1,i])), '.html'), background = "white", libdir = "lib", lang = "en") | ||
|
||
###--- Volcano ---### | ||
volcano <- EnhancedVolcano(table, | ||
lab = paste0(table[,"Genus"], " ", table[,"Species"]), | ||
x = 'logFC', | ||
y = 'adj.P.Val', | ||
labSize = 3, | ||
xlim = c(-10, 10), | ||
title = gsub("Subgroup1", "", paste0(contrasts[2,i], " vs ", contrasts[1,i])), | ||
subtitle = "LogFC vs -Log10Pvalue", | ||
pCutoff = 0.05, | ||
col=c('#030000', '#15B01A', '#0343DF', '#E50000') | ||
) | ||
file <- gsub("Subgroup1", "", paste0("volcano", contrasts[2,i], "vs", contrasts[1,i], ".pdf")) | ||
ggsave(file, plot = volcano, | ||
device = "pdf", path = outDir, width = 7, height = 7, | ||
units = "in") | ||
} | ||
|
||
for(i in 1:ncol(contrasts)){ | ||
x <- paste0(contrasts[1,i], "-", contrasts[2,i]) | ||
contrast.matrix = makeContrasts(x, levels = finalMod) | ||
fit2 = contrasts.fit(zigFit, contrast.matrix) | ||
fit2 = eBayes(fit2) | ||
table <- topTable(fit2, sort="none",n=Inf) | ||
table$seq <- rownames(table) | ||
table <- merge(table, fData(mtseq)[,c("Genus", "Species")], by = "row.names") | ||
write.csv(table, paste0(outDir, gsub("Subgroup1", "", paste0(contrasts[1,i], "vs", contrasts[2,i])), '.csv')) | ||
html <- table %>% | ||
datatable(caption = gsub("Subgroup1", "", paste0(contrasts[1,i], " vs ", contrasts[2,i]))) %>% | ||
DT::formatStyle('logFC', | ||
valueColumns = 'logFC', | ||
backgroundColor = DT::styleInterval(0, rev(updn_cols))) %>% | ||
DT::formatSignif(1:6, digits = 4) | ||
save_html(html, file = paste0(outDir, gsub("Subgroup1", "", paste0(contrasts[1,i], "vs", contrasts[2,i])), '.html'), background = "white", libdir = "lib", lang = "en") | ||
|
||
###--- Volcano ---### | ||
volcano <- EnhancedVolcano(table, | ||
lab = paste0(table[,"Genus"], " ", table[,"Species"]), | ||
x = 'logFC', | ||
y = 'adj.P.Val', | ||
labSize = 3, | ||
xlim = c(-10, 10), | ||
title = gsub("Subgroup1", "", paste0(contrasts[1,i], " vs ", contrasts[2,i])), | ||
subtitle = "LogFC vs -Log10Pvalue", | ||
pCutoff = 0.05, | ||
col=c('#030000', '#15B01A', '#0343DF', '#E50000') | ||
) | ||
file <- gsub("Subgroup1", "", paste0("volcano", contrasts[1,i], "vs", contrasts[2,i], ".pdf")) | ||
ggsave(file, plot = volcano, | ||
device = "pdf", path = outDir, width = 7, height = 7, | ||
units = "in") | ||
} |
Oops, something went wrong.