Skip to content

Commit

Permalink
readme
Browse files Browse the repository at this point in the history
  • Loading branch information
noriakis committed Mar 21, 2024
1 parent 0acf99b commit 3dec3fa
Show file tree
Hide file tree
Showing 14 changed files with 263 additions and 48 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export(checkPATRIC)
export(checkPATRICSimple)
export(cnDiscretize)
export(combineGenes)
export(combineMaf)
export(combineSeqs)
export(compareGenes)
export(consensusSeq)
Expand All @@ -22,6 +23,7 @@ export(consensusSeqMIDAS1)
export(consensusSeqMIDAS1Slow)
export(consensusSeqMIDAS2)
export(consensusSeqMIDAS2Slow)
export(convertWideToMaf)
export(doAdonis)
export(doBoruta)
export(doGSEA)
Expand Down Expand Up @@ -58,6 +60,7 @@ export(plotHeatmap)
export(plotKEGGPathway)
export(plotMAF)
export(plotPCA)
export(plotSNVInfo)
export(returnGenes)
export(reverseAnnot)
export(setAnnotation)
Expand Down
8 changes: 5 additions & 3 deletions R/NMF.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ NMF <- function(stana, species, rank=3, target="KO", seed=53, method="snmf/r",
cat("Filtered samples:", dim(mat)[2], "\n")

## Test multiple ranks
if (estimate_rank) {
if (estimate) {
test <- nmfEstimateRank(mat, range=estimate_range, method=method)
val <- test$measures[, "cophenetic"]
b <- -1
Expand Down Expand Up @@ -142,8 +142,10 @@ plotAbundanceWithinSpecies <- function(stana, species, tss=TRUE, return_data=FAL
#' @param species species ID
#' @param tss perform total sum scaling to the resulting matrix
#' @param change_name change pathway names to description
#' @param summarize summarizing function, default to base::sum
#' @export
pathwayWithFactor <- function(stana, species, tss=FALSE, change_name=FALSE) {
pathwayWithFactor <- function(stana, species, tss=FALSE, change_name=FALSE,
summarize=sum) {
dat <- stana@NMF[[species]]
dat <- basis(dat)

Expand All @@ -161,7 +163,7 @@ pathwayWithFactor <- function(stana, species, tss=FALSE, change_name=FALSE) {
tmp <- summed[summed$V1==i, ]
int <- length(intersect(row.names(dat), tmp$V2))
if (int>1) {
tmpsum <- apply(dat[intersect(row.names(dat), tmp$V2),], 2, sum)
tmpsum <- apply(dat[intersect(row.names(dat), tmp$V2),], 2, summarize)
return(c(i, tmpsum))
} else if (int==1) {
return(c(i, dat[intersect(row.names(dat), tmp$V2),]))
Expand Down
105 changes: 105 additions & 0 deletions R/convertWideToMaf.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#' convertWideToMaf
#'
#' should be six column layout with the column corresponding to
#' "species_id","snv_id","ref_allele","alt_allele","ref_count","alt_count"
#' @return species-named list of MAF data.frame
#' @export
convertWideToMaf <- function(df) {
colnames(df) <- c("species_id","snv_id","ref_allele","alt_allele","ref_count","alt_count")
df$ref_count <- as.numeric(df$ref_count)
df$alt_count <- as.numeric(df$alt_count)

df$count <- df$ref_count + df$alt_count
df$maf <- apply(df, 1, function(x) {
if (as.numeric(x[5]) > as.numeric(x[6])) {
as.numeric(x[6]) / as.numeric(x[7])
} else {
as.numeric(x[5]) / as.numeric(x[7])
}
})
df$major <- apply(df, 1, function(x) {
if (as.numeric(x[5]) > as.numeric(x[6])) {
x[3]
} else {
x[4]
}
})
df$minor <- apply(df, 1, function(x) {
if (as.numeric(x[5]) > as.numeric(x[6])) {
x[4]
} else {
x[3]
}
})
sp <- unique(df$species_id)
ret <- lapply(sp, function(s) {
tmp <- df %>% dplyr::filter(species_id == s)
tmp <- tmp[,c("snv_id", "maf")]
row.names(tmp) <- tmp$snv_id
tmp$snv_id <- NULL
tmp
})
names(ret) <- sp

summary <- lapply(sp, function(s) {
tmp <- df %>% dplyr::filter(species_id == s)
tmp <- tmp[,c("snv_id", "major", "minor")]
row.names(tmp) <- tmp$snv_id
tmp$snv_id <- NULL
colnames(tmp) <- c("major_allele","minor_allele")
tmp$species_id <- s
tmp
})
names(ret) <- sp
names(summary) <- sp

return(list(ret, summary))

}

#' combineMaf
#' combine the maf table produced by convertWideToMaf()
#' @param mafs named list of the results of convertWideToMaf()
#' @export
combineMaf <- function(mafs) {
if (is.null(names(mafs))) {stop("The list must be named")}
sps <- do.call(union, lapply(names(mafs), function(s) {
## Species
names(mafs[[s]][[1]])
}))
snvls <- lapply(sps, function(s) {
tmpdf <- do.call(rbind, lapply(names(mafs), function(sample) {
tmp <- mafs[[sample]][[1]][[s]]
if (is.null(tmp)) {return(tmp)}
tmp$sample <- sample
tmp$snv_id <- row.names(tmp)
return(tmp)
}))
tbl <- tidyr::pivot_wider(tmpdf,
id_cols=snv_id,
names_from=sample,
values_from = maf)
tbl <- data.frame(tbl)
row.names(tbl) <- tbl$snv_id
tbl$snv_id <- NULL
tbl
})

snvsu <- lapply(sps, function(s) {
tmpdf <- do.call(rbind, lapply(names(mafs), function(sample) {
tmp <- mafs[[sample]][[2]][[s]]
if (is.null(tmp)) {return(tmp)}
tmp$sample_name <- sample
return(tmp)
}))
tbl <- data.frame(tmpdf)
tbl
})

names(snvls) <- sps
stana <- new("stana")
stana@type <- "manual"
stana@snps <- snvls
stana@snpsInfo <- snvsu
stana
}
38 changes: 23 additions & 15 deletions R/doAdonis.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ doAdonis <- function(stana, specs, cl=NULL,
snps <- stana@genes[[sp]]
}

if (!target %in% c("tree","fasta")) {
if (!(target %in% c("tree","fasta"))) {
if (maj & target=="snps" & stana@type=="MIDAS1") {
chk <- read.table(paste0(stana@mergeDir,
"/",sp,"/snps_info.txt"),
Expand Down Expand Up @@ -85,22 +85,30 @@ doAdonis <- function(stana, specs, cl=NULL,
distArg[["method"]] <- distMethod
d <- do.call(dist, distArg)
gr <- NULL
for (cn in colnames(snps)){
for (clm in seq_along(cl)){
if (cn %in% cl[[clm]]) {
gr <- c(gr, names(cl)[clm])
}
}
for (cn in sn){
if (cn %in% unlist(cl)) {
for (clm in seq_along(cl)){
if (cn %in% cl[[clm]]) {
gr <- c(gr, names(cl)[clm])
}
}
} else {
gr <- c(gr, NA)
}
}
} else {
gr <- NULL
for (cn in sn){
for (clm in seq_along(cl)){
if (cn %in% cl[[clm]]) {
gr <- c(gr, names(cl)[clm])
}
}
}
if (cn %in% unlist(cl)) {
for (clm in seq_along(cl)){
if (cn %in% cl[[clm]]) {
gr <- c(gr, names(cl)[clm])
}
}
} else {
gr <- c(gr, NA)
}
}
d <- as.dist(d)
}

Expand All @@ -111,15 +119,15 @@ doAdonis <- function(stana, specs, cl=NULL,
formulaPass <- as.formula(formula)
pr <- FALSE
}

argList[["na.action"]] <- na.omit
argList[["formula"]] <- formulaPass
adores <- do.call("adonis2", argList)

if (pr) {
pr <- adores$`Pr(>F)`
pr <- pr[!is.na(pr)]
r2 <- adores$R2[1]
qqcat(" R2: @{r2}, Pr: @{pr}\n")
qqcat(" F: @{adores$F[1]}, R2: @{r2}, Pr: @{pr}\n")
}
stana@adonisList[[sp]] <- adores
}
Expand Down
4 changes: 3 additions & 1 deletion R/doBoruta.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ doBoruta <- function(stana, sp, cl=NULL, doFix=TRUE,
qqcat("Using grouping from the slot\n")
cl <- stana@cl
}
if (target!="snps" & is.null(mat)){
if (target=="genes" & is.null(mat)){
qqcat("If needed, please provide preprocessed matrix of genes to `mat`\n")
filtDf <- stana@genes[[sp]]
} else if (target=="kos") {
filtDf <- stana@kos[[sp]]
} else if (target=="snps"){
filtDf <- stana@snps[[sp]]
if (deleteZeroDepth) {
Expand Down
11 changes: 6 additions & 5 deletions R/stana.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ setClass("stana", slots=list(
#' cat_subtle
#' @noRd
cat_subtle <- function(...) cat(pillar::style_subtle(paste0(...)))

#' show
#' print the description of stana
#' @importFrom dplyr group_by mutate summarise n
Expand Down Expand Up @@ -94,15 +95,15 @@ setMethod("show",
cat_subtle("# Inferred fasta: ", length(object@kos), " ID: ", paste0(names(object@kos)[1], collapse="/"), "\n", sep="")
}
cat_subtle("# Size:", object.size(object), " B\n", sep="")
cat_subtle("# \n")
cat_subtle("# SNV description\n")
if (object@type %in% c("MIDAS", "MIDAS2")) {
df <- stana@snpsSummary %>%
dplyr::filter(species_id %in% names(object@snps))
cat_subtle("# \n")
cat_subtle("# SNV description\n")
df <- object@snpsSummary %>%
dplyr::filter(.data$species_id %in% names(object@snps))
if (object@type=="MIDAS2") {
df$species_id <- loadDic()[[object@db]][as.character(df$species_id)]
}
if (length(stana@cl)!=0) {
if (length(object@cl)!=0) {
print(df %>% mutate(group=listToNV(object@cl)[sample_name]) %>%
group_by(group, species_id) %>%
summarise(n=n()))
Expand Down
24 changes: 23 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ returnGenes <- function(stana, species, snvs) {
}
}


#' @noRd
listToNV <- function(l) {
nm <- NULL
val <- NULL
Expand All @@ -60,3 +60,25 @@ listToNV <- function(l) {
names(nm) <- val
return(nm)
}


#' plotSNVinfo
#'
#' @param stana stana object
#' @param sp candidate species
#' @export
#' @return ggplot
plotSNVInfo <- function(stana, sp) {
d <- stana@snpsInfo[[sp]]
tbl <- data.frame(table(paste0(d$major_allele,"/",d$minor_allele)))
if ("locus_type" %in% colnames(d)) {
cdsc <- data.frame(table(d$locus_type))
cdsc <- paste(paste0(cdsc$Var1, " ", cdsc$Freq), collapse=" ")
title <- paste0("Total: ",dim(d)," (", cdsc, ")")
}
ggplot(tbl, aes(x=Var1, y=Freq)) +
geom_col()+
xlab("major_allele/minor_allele")+
cowplot::theme_cowplot()+
ggtitle(title)
}
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ getID(stana)
samples <- getSlot(stana, "snps")[[1]] |> colnames()
metadata <- data.frame(
row.names=samples,
treatment=sample(1:3, length(samples), replace=TRUE),
treatment=factor(sample(1:3, length(samples), replace=TRUE)),
marker=runif(length(samples))
)
Expand Down
Loading

0 comments on commit 3dec3fa

Please sign in to comment.