Skip to content

Commit

Permalink
rscripts addition
Browse files Browse the repository at this point in the history
  • Loading branch information
natmurad committed Feb 2, 2023
1 parent b907686 commit aaf09e4
Show file tree
Hide file tree
Showing 4 changed files with 470 additions and 0 deletions.
Binary file modified .DS_Store
Binary file not shown.
147 changes: 147 additions & 0 deletions RScripts/composition.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
##### Name: composition.R
##### Version: 1.0
##### Description: Composition plots by sample
##### Date of Creation: Jan-2023
##### Author: Natália Faraj Murad
##### E-MAIL: [email protected]
##### PROJECT: https://github.com/natmurad/amplicon16S


############################
###--- Load libraries ---###
############################
library(tidyverse) # data manipulation
library(dplyr) # data manipulation
library(tidyr) # data manipulation
library(ggrepel)
library(ggtree) # phylogenetic trees visualization
library(ape) # phylogenetic trees manipulation
library(ggplot2)
library(ggthemes) # colors
library(RColorBrewer)
library(qiime2R) # read qiime2 outputs
library(phyloseq)
############################


###--- Graph config ---###
theme_set(theme_bw())

###--- Set directory ---###
setwd("/Users/natmurad/Documents/")
outDir <- "/Users/natmurad/Documents/results/"

###--- Read files ---###
# Create a Phyloseq Object
physeq<-qza_to_phyloseq(
features="./DADA2_denoising_output/table.qza",
tree="rooted_tree.qza",
"taxonomy.qza",
metadata = "q2metadata.tsv"
)

###--- Sample Composition ---###

###--- Filtering ---###
# Remove OTUs that do not show appear more than 5 times in more than half the samples
wh0 = genefilter_sample(physeq, filterfun_sample(function(x) x > 5), A=0.5*nsamples(physeq))
physeq_filt = prune_taxa(wh0, physeq)

# Transform to even sampling depth.
physeq_filt = transform_sample_counts(physeq_filt, function(x) 1E6 * x/sum(x))

# Keep only the most abundant five phyla.
phylum.sum = tapply(taxa_sums(physeq_filt), tax_table(physeq_filt)[, "Phylum"], sum, na.rm=TRUE)
top5phyla = names(sort(phylum.sum, TRUE))[1:5]
physeq_filt = prune_taxa((tax_table(physeq_filt)[, "Phylum"] %in% top5phyla), physeq_filt)

### dendrogram
tree <- plot_tree(physeq_filt, color = "Subgroup1", label.tips = "Genus", size = "abundance",
plot.margin = 0.2, ladderize = TRUE, sizebase = 2) +
scale_colour_tableau("Classic 10") +
ggtitle("Phylogenetic Tree - Genus") +
theme(plot.title = element_text(hjust = 0.5)) + labs(colour = "Condition")

ggsave("treeGenus.pdf", plot = tree,
device = "pdf", path = outDir, width = 15, height = 15,
units = "in")

###--- barplot composition ---###
# scaling
# set seed
set.seed(1)

# scaling the mouse data to the smallest samples. Note: rngseed is similar to set.seed
physeq_scaled <- rarefy_even_depth(physeq_filt,sample.size=19000, replace=FALSE, rngseed = 1)

###--- Organizing treatments in an order ---###
sample_data(physeq_scaled)$Subgroup1 <- factor((sample_data(physeq_scaled)$Subgroup1), levels=c("Control","AD","NDGA","AD_NDGA"))

# labels for the samples
rownames <- sample_data(physeq_scaled)[,"customer_label"]
dim(otu_table(physeq_scaled))
colourCount <- length(unique(tax_table(physeq_scaled)[,"Genus"])) # number of genus for colors

getPalette = colorRampPalette(brewer.pal(10, "Paired")) # create color palette

###--- bar plot ---###
composition <- plot_bar(physeq_scaled, x = "customer_label", fill = "Genus") +
facet_grid(~Subgroup1, scales="free_x") +
scale_fill_manual(values=getPalette(colourCount))+
ggtitle("Sample Composition - Genus") +
theme(plot.title = element_text(hjust = 0.5))

ggsave("sampleComposition.pdf", plot = composition,
device = "pdf", path = outDir, width = 12, height = 8.5,
units = "in")


#p <- plot_composition(physeq,
# "Family",
# numberOfTaxa = 30, fill = Subgroup1)
## plot facetting
#p <- p + facet_wrap(~EnvType,
#scales = "free_x"
#, nrow = 1)
#plot(p)


###--- Heatmap ---###

# from here: https://forum.qiime2.org/t/tutorial-integrating-qiime2-and-r-for-data-visualization-and-analysis-using-qiime2r/4121

metadata<-read_q2metadata("q2metadata.tsv")
countsTable<-read_qza("./DADA2_denoising_output/table.qza")$data
taxonomy<-read_qza("taxonomy.qza")$data

countsTable<-apply(countsTable, 2, function(x) x/sum(x)*100) # convert to percent

countsToPlot<-
data.frame(MeanAbundance=rowMeans(countsTable)) %>% # find the average abundance of a SV
rownames_to_column("Feature.ID") %>%
arrange(desc(MeanAbundance)) %>%
top_n(40, MeanAbundance) %>%
pull(Feature.ID) # extract only the names from the table

heatmap <- countsTable %>%
as.data.frame() %>%
rownames_to_column("Feature.ID") %>%
gather(-Feature.ID, key="SampleID", value="Abundance") %>%
mutate(Feature.ID=if_else(Feature.ID %in% countsToPlot, Feature.ID, "Remainder")) %>% # flag features to be collapsed
group_by(SampleID, Feature.ID) %>%
summarize(Abundance=sum(Abundance)) %>%
left_join(metadata) %>%
mutate(NormAbundance=log10(Abundance+0.01)) %>% # do a log10 transformation after adding a 0.01% pseudocount. Could also add 1 read before transformation to percent
left_join(taxonomy) %>%
mutate(Feature=paste(Feature.ID, Taxon)) %>%
mutate(Feature=gsub("[kpcofgs]__", "", Feature)) %>% # trim out leading text from taxonomy string
ggplot(aes(x=customer_label, y=Taxon, fill=NormAbundance)) +
geom_tile() +
facet_grid(~Subgroup1, scales="free_x") +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
scale_fill_viridis_c(name="log10(% Abundance)")

ggsave("heatmapSpecies.pdf", plot = heatmap,
device = "pdf", path = outDir, width = 14, height = 8.5,
units = "in")

147 changes: 147 additions & 0 deletions RScripts/differentialAbundance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
##### Name: differentalAbundance.R
##### Version: 1.0
##### Description: Differential abundance analysis across groups/samples
##### Date of Creation: Jan-2023
##### Author: Natália Faraj Murad
##### E-MAIL: [email protected]
##### PROJECT: https://github.com/natmurad/amplicon16S

###--- Load libraries ---###
library(tidyverse) # data manipulation
library(dplyr) # data manipulation
library(tidyr) # data manipulation
library(EnhancedVolcano) # plot volcano
library(DT) # visualize tables
library(htmltools) # export html
library(ggrepel) # for offset labels
library(ggplot2)
library(ggthemes) # colors
library(ggpubr) # add stats on plots
library(qiime2R) # read qiime2 outputs
library(metagenomeSeq) # for differential abundance analysis - zero inflated model

###--- Graph config ---###
theme_set(theme_bw())

###--- Set directory ---###
###--- Set directory ---###
setwd("/Users/natmurad/Documents/")
outDir <- "/Users/natmurad/Documents/results/"

# Creating a Phyloseq Object
physeq<-qza_to_phyloseq(
features="./DADA2_denoising_output/table.qza",
tree="rooted_tree.qza",
"taxonomy.qza",
metadata = "q2metadata.tsv"
)

###--- Conversions needed ---###
##--- metagenomeseq ---##
mtseq <- phyloseq_to_metagenomeSeq(physeq)

####################################
###--- Differential Abundance ---###
####################################

###--- Filtering ---###
#filterData(obj, present = 10, depth = 1000)

###--- Calculating normalization factors ---###
p = cumNormStatFast(mtseq)
mtseq = cumNorm(mtseq, p = p)

###--- Statistical testing ---###
###--- Zero-inflated Gaussian Mixture model ---###


normFactor = normFactors(mtseq)
normFactor = log2(normFactor/median(normFactor) + 1)

# maxit=1 is for demonstration purposes
settings = zigControl(maxit = 1, verbose = FALSE)
mod = model.matrix(~ 0 + Subgroup1, pData(mtseq))
#colnames(mod) = levels(as.factor(treat))
# fitting the ZIG model
res = fitZig(obj = mtseq, mod = mod, control = settings)

# The output of fitZig contains a list of various useful
# items. hint: names(res). Probably the most useful is the
# limma 'MLArrayLM' object called fit.
zigFit = slot(res, "fit")
finalMod = slot(res, "fit")$design

# Ad x Control; Ad_Drug x Ad; Drug x Control

contrasts <- combn(colnames(finalMod)[1:length(colnames(finalMod))-1], 2)
updn_cols <- c(RColorBrewer::brewer.pal(6, 'Blues')[3], RColorBrewer::brewer.pal(6, 'Reds')[3])

for(i in 1:ncol(contrasts)){
x <- paste0(contrasts[2,i], "-", contrasts[1,i])
contrast.matrix = makeContrasts(x, levels = finalMod)
fit2 = contrasts.fit(zigFit, contrast.matrix)
fit2 = eBayes(fit2)
table <- topTable(fit2, sort="none",n=Inf)
table$seq <- rownames(table)
table <- merge(table, fData(mtseq)[,c("Genus", "Species")], by = "row.names")
write.csv(table, paste0(outDir, gsub("Subgroup1", "", paste0(contrasts[2,i], "vs", contrasts[1,i])), '.csv'))
html <- table %>%
datatable(caption = gsub("Subgroup1", "", paste0(contrasts[2,i], " vs ", contrasts[1,i]))) %>%
DT::formatStyle('logFC',
valueColumns = 'logFC',
backgroundColor = DT::styleInterval(0, rev(updn_cols))) %>%
DT::formatSignif(1:6, digits = 4)
save_html(html, file = paste0(outDir, gsub("Subgroup1", "", paste0(contrasts[2,i], "vs", contrasts[1,i])), '.html'), background = "white", libdir = "lib", lang = "en")

###--- Volcano ---###
volcano <- EnhancedVolcano(table,
lab = paste0(table[,"Genus"], " ", table[,"Species"]),
x = 'logFC',
y = 'adj.P.Val',
labSize = 3,
xlim = c(-10, 10),
title = gsub("Subgroup1", "", paste0(contrasts[2,i], " vs ", contrasts[1,i])),
subtitle = "LogFC vs -Log10Pvalue",
pCutoff = 0.05,
col=c('#030000', '#15B01A', '#0343DF', '#E50000')
)
file <- gsub("Subgroup1", "", paste0("volcano", contrasts[2,i], "vs", contrasts[1,i], ".pdf"))
ggsave(file, plot = volcano,
device = "pdf", path = outDir, width = 7, height = 7,
units = "in")
}

for(i in 1:ncol(contrasts)){
x <- paste0(contrasts[1,i], "-", contrasts[2,i])
contrast.matrix = makeContrasts(x, levels = finalMod)
fit2 = contrasts.fit(zigFit, contrast.matrix)
fit2 = eBayes(fit2)
table <- topTable(fit2, sort="none",n=Inf)
table$seq <- rownames(table)
table <- merge(table, fData(mtseq)[,c("Genus", "Species")], by = "row.names")
write.csv(table, paste0(outDir, gsub("Subgroup1", "", paste0(contrasts[1,i], "vs", contrasts[2,i])), '.csv'))
html <- table %>%
datatable(caption = gsub("Subgroup1", "", paste0(contrasts[1,i], " vs ", contrasts[2,i]))) %>%
DT::formatStyle('logFC',
valueColumns = 'logFC',
backgroundColor = DT::styleInterval(0, rev(updn_cols))) %>%
DT::formatSignif(1:6, digits = 4)
save_html(html, file = paste0(outDir, gsub("Subgroup1", "", paste0(contrasts[1,i], "vs", contrasts[2,i])), '.html'), background = "white", libdir = "lib", lang = "en")

###--- Volcano ---###
volcano <- EnhancedVolcano(table,
lab = paste0(table[,"Genus"], " ", table[,"Species"]),
x = 'logFC',
y = 'adj.P.Val',
labSize = 3,
xlim = c(-10, 10),
title = gsub("Subgroup1", "", paste0(contrasts[1,i], " vs ", contrasts[2,i])),
subtitle = "LogFC vs -Log10Pvalue",
pCutoff = 0.05,
col=c('#030000', '#15B01A', '#0343DF', '#E50000')
)
file <- gsub("Subgroup1", "", paste0("volcano", contrasts[1,i], "vs", contrasts[2,i], ".pdf"))
ggsave(file, plot = volcano,
device = "pdf", path = outDir, width = 7, height = 7,
units = "in")
}
Loading

0 comments on commit aaf09e4

Please sign in to comment.