readme

noriakis · Mar 21, 2024 · 3dec3fa · 3dec3fa
1 parent 0acf99b
commit 3dec3fa
Show file tree

Hide file tree

Showing 14 changed files with 263 additions and 48 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -14,6 +14,7 @@ export(checkPATRIC)
 export(checkPATRICSimple)
 export(cnDiscretize)
 export(combineGenes)
+export(combineMaf)
 export(combineSeqs)
 export(compareGenes)
 export(consensusSeq)
@@ -22,6 +23,7 @@ export(consensusSeqMIDAS1)
 export(consensusSeqMIDAS1Slow)
 export(consensusSeqMIDAS2)
 export(consensusSeqMIDAS2Slow)
+export(convertWideToMaf)
 export(doAdonis)
 export(doBoruta)
 export(doGSEA)
@@ -58,6 +60,7 @@ export(plotHeatmap)
 export(plotKEGGPathway)
 export(plotMAF)
 export(plotPCA)
+export(plotSNVInfo)
 export(returnGenes)
 export(reverseAnnot)
 export(setAnnotation)

diff --git a/R/NMF.R b/R/NMF.R
@@ -41,7 +41,7 @@ NMF <- function(stana, species, rank=3, target="KO", seed=53, method="snmf/r",
     cat("Filtered samples:", dim(mat)[2], "\n")
 
     ## Test multiple ranks
-    if (estimate_rank) {
+    if (estimate) {
     	test <- nmfEstimateRank(mat, range=estimate_range, method=method)
     	val <- test$measures[, "cophenetic"]
         b <- -1
@@ -142,8 +142,10 @@ plotAbundanceWithinSpecies <- function(stana, species, tss=TRUE, return_data=FAL
 #' @param species species ID
 #' @param tss perform total sum scaling to the resulting matrix
 #' @param change_name change pathway names to description
+#' @param summarize summarizing function, default to base::sum
 #' @export
-pathwayWithFactor <- function(stana, species, tss=FALSE, change_name=FALSE) {
+pathwayWithFactor <- function(stana, species, tss=FALSE, change_name=FALSE,
+	summarize=sum) {
   dat <- stana@NMF[[species]]
   dat <- basis(dat)
 
@@ -161,7 +163,7 @@ pathwayWithFactor <- function(stana, species, tss=FALSE, change_name=FALSE) {
     tmp <- summed[summed$V1==i, ]
     int <- length(intersect(row.names(dat), tmp$V2))
     if (int>1) {
-      tmpsum <- apply(dat[intersect(row.names(dat), tmp$V2),], 2, sum)
+      tmpsum <- apply(dat[intersect(row.names(dat), tmp$V2),], 2, summarize)
       return(c(i, tmpsum))
     } else if (int==1) {
       return(c(i, dat[intersect(row.names(dat), tmp$V2),]))

diff --git a/R/convertWideToMaf.R b/R/convertWideToMaf.R
@@ -0,0 +1,105 @@
+#' convertWideToMaf
+#' 
+#' should be six column layout with the column corresponding to
+#' "species_id","snv_id","ref_allele","alt_allele","ref_count","alt_count"
+#' @return species-named list of MAF data.frame
+#' @export
+convertWideToMaf <- function(df) {
+    colnames(df) <- c("species_id","snv_id","ref_allele","alt_allele","ref_count","alt_count")
+    df$ref_count <- as.numeric(df$ref_count)
+    df$alt_count <- as.numeric(df$alt_count)
+
+    df$count <- df$ref_count + df$alt_count
+    df$maf <- apply(df, 1, function(x) {
+        if (as.numeric(x[5]) > as.numeric(x[6])) {
+            as.numeric(x[6]) / as.numeric(x[7])
+        } else {
+            as.numeric(x[5]) / as.numeric(x[7])
+        }
+    })
+    df$major <- apply(df, 1, function(x) {
+        if (as.numeric(x[5]) > as.numeric(x[6])) {
+            x[3]
+        } else {
+            x[4]
+        }
+    })
+    df$minor <- apply(df, 1, function(x) {
+        if (as.numeric(x[5]) > as.numeric(x[6])) {
+            x[4]
+        } else {
+            x[3]
+        }
+    })
+    sp <- unique(df$species_id)
+    ret <- lapply(sp, function(s) {
+        tmp <- df %>% dplyr::filter(species_id == s)
+        tmp <- tmp[,c("snv_id", "maf")]
+        row.names(tmp) <- tmp$snv_id
+        tmp$snv_id <- NULL
+        tmp
+    })
+    names(ret) <- sp
+
+    summary <- lapply(sp, function(s) {
+        tmp <- df %>% dplyr::filter(species_id == s)
+        tmp <- tmp[,c("snv_id", "major", "minor")]
+        row.names(tmp) <- tmp$snv_id
+        tmp$snv_id <- NULL
+        colnames(tmp) <- c("major_allele","minor_allele")
+        tmp$species_id <- s
+        tmp
+    })
+    names(ret) <- sp
+    names(summary) <- sp
+
+    return(list(ret, summary))
+
+}
+
+#' combineMaf
+#' combine the maf table produced by convertWideToMaf()
+#' @param mafs named list of the results of convertWideToMaf()
+#' @export
+combineMaf <- function(mafs) {
+  if (is.null(names(mafs))) {stop("The list must be named")}
+  sps <- do.call(union, lapply(names(mafs), function(s) {
+    ## Species
+    names(mafs[[s]][[1]])
+  }))
+  snvls <- lapply(sps, function(s) {
+    tmpdf <- do.call(rbind, lapply(names(mafs), function(sample) {
+      tmp <- mafs[[sample]][[1]][[s]]
+      if (is.null(tmp)) {return(tmp)}
+      tmp$sample <- sample
+      tmp$snv_id <- row.names(tmp)
+      return(tmp)
+    }))
+    tbl <- tidyr::pivot_wider(tmpdf,
+                       id_cols=snv_id,
+                       names_from=sample,
+                       values_from = maf)
+    tbl <- data.frame(tbl)
+    row.names(tbl) <- tbl$snv_id
+    tbl$snv_id <- NULL
+    tbl
+  })
+
+  snvsu <- lapply(sps, function(s) {
+    tmpdf <- do.call(rbind, lapply(names(mafs), function(sample) {
+      tmp <- mafs[[sample]][[2]][[s]]
+      if (is.null(tmp)) {return(tmp)}
+      tmp$sample_name <- sample
+      return(tmp)
+    }))
+    tbl <- data.frame(tmpdf)
+    tbl
+  })
+
+  names(snvls) <- sps
+  stana <- new("stana")
+  stana@type <- "manual"
+  stana@snps <- snvls
+  stana@snpsInfo <- snvsu
+  stana
+}
diff --git a/R/doAdonis.R b/R/doAdonis.R
@@ -57,7 +57,7 @@ doAdonis <- function(stana, specs, cl=NULL,
             snps <- stana@genes[[sp]]          
         }
 
-        if (!target %in% c("tree","fasta")) {
+        if (!(target %in% c("tree","fasta"))) {
           if (maj & target=="snps" & stana@type=="MIDAS1") {
               chk <- read.table(paste0(stana@mergeDir,
                 "/",sp,"/snps_info.txt"),
@@ -85,22 +85,30 @@ doAdonis <- function(stana, specs, cl=NULL,
           distArg[["method"]] <- distMethod
           d <- do.call(dist, distArg)
           gr <- NULL
-          for (cn in colnames(snps)){
-            for (clm in seq_along(cl)){
-              if (cn %in% cl[[clm]]) {
-                gr <- c(gr, names(cl)[clm])
-              }
-            }
+          for (cn in sn){
+          	if (cn %in% unlist(cl)) {
+	            for (clm in seq_along(cl)){
+	              if (cn %in% cl[[clm]]) {
+	                gr <- c(gr, names(cl)[clm])
+	              }
+	            }          		
+          	} else {
+          		gr <- c(gr, NA)
+          	}
           }       
         } else {
           gr <- NULL
           for (cn in sn){
-            for (clm in seq_along(cl)){
-              if (cn %in% cl[[clm]]) {
-                gr <- c(gr, names(cl)[clm])
-              }
-            }
-          }  
+          	if (cn %in% unlist(cl)) {
+	            for (clm in seq_along(cl)){
+	              if (cn %in% cl[[clm]]) {
+	                gr <- c(gr, names(cl)[clm])
+	              }
+	            }          		
+          	} else {
+          		gr <- c(gr, NA)
+          	}
+          }
           d <- as.dist(d)         
         }
 
@@ -111,15 +119,15 @@ doAdonis <- function(stana, specs, cl=NULL,
             formulaPass <- as.formula(formula)
             pr <- FALSE
         }
-
+        argList[["na.action"]] <- na.omit
         argList[["formula"]] <- formulaPass
         adores <- do.call("adonis2", argList)
 
         if (pr) {
             pr <- adores$`Pr(>F)`
             pr <- pr[!is.na(pr)]
             r2 <- adores$R2[1]
-            qqcat("  R2: @{r2}, Pr: @{pr}\n")
+            qqcat("  F: @{adores$F[1]}, R2: @{r2}, Pr: @{pr}\n")
         }
         stana@adonisList[[sp]] <- adores
       }

diff --git a/R/doBoruta.R b/R/doBoruta.R
@@ -25,9 +25,11 @@ doBoruta <- function(stana, sp, cl=NULL, doFix=TRUE,
     qqcat("Using grouping from the slot\n")
     cl <- stana@cl
   }
-  if (target!="snps" & is.null(mat)){
+  if (target=="genes" & is.null(mat)){
     qqcat("If needed, please provide preprocessed matrix of genes to `mat`\n")
     filtDf <- stana@genes[[sp]]
+  } else if (target=="kos") {
+    filtDf <- stana@kos[[sp]]  	
   } else if (target=="snps"){
     filtDf <- stana@snps[[sp]]
     if (deleteZeroDepth) {

diff --git a/R/stana.R b/R/stana.R
@@ -53,6 +53,7 @@ setClass("stana", slots=list(
 #' cat_subtle
 #' @noRd
 cat_subtle <- function(...) cat(pillar::style_subtle(paste0(...)))
+
 #' show
 #' print the description of stana
 #' @importFrom dplyr group_by mutate summarise n
@@ -94,15 +95,15 @@ setMethod("show",
       cat_subtle("# Inferred fasta: ", length(object@kos), " ID: ", paste0(names(object@kos)[1], collapse="/"), "\n", sep="")
     }
     cat_subtle("# Size:", object.size(object), " B\n", sep="")
-    cat_subtle("# \n")
-    cat_subtle("# SNV description\n")
     if (object@type %in% c("MIDAS", "MIDAS2")) {
-    	df <- stana@snpsSummary %>%
-    	    dplyr::filter(species_id %in% names(object@snps))
+	    cat_subtle("# \n")
+	    cat_subtle("# SNV description\n")
+    	df <- object@snpsSummary %>%
+    	    dplyr::filter(.data$species_id %in% names(object@snps))
     	if (object@type=="MIDAS2") {
     		df$species_id <- loadDic()[[object@db]][as.character(df$species_id)]
     	}
-    	if (length(stana@cl)!=0) {
+    	if (length(object@cl)!=0) {
     		print(df %>% mutate(group=listToNV(object@cl)[sample_name]) %>%
     		group_by(group, species_id) %>%
     		summarise(n=n()))

diff --git a/R/utils.R b/R/utils.R
@@ -49,7 +49,7 @@ returnGenes <- function(stana, species, snvs) {
   }
 }
 
-
+#' @noRd
 listToNV <- function(l) {
     nm <- NULL
     val <- NULL
@@ -60,3 +60,25 @@ listToNV <- function(l) {
     names(nm) <- val
     return(nm)
 }
+
+
+#' plotSNVinfo
+#' 
+#' @param stana stana object
+#' @param sp candidate species
+#' @export
+#' @return ggplot
+plotSNVInfo <- function(stana, sp) {
+    d <- stana@snpsInfo[[sp]]
+    tbl <- data.frame(table(paste0(d$major_allele,"/",d$minor_allele)))
+    if ("locus_type" %in% colnames(d)) {
+        cdsc <- data.frame(table(d$locus_type))
+        cdsc <- paste(paste0(cdsc$Var1, " ", cdsc$Freq), collapse=" ")
+        title <- paste0("Total: ",dim(d)," (", cdsc, ")")
+    }
+    ggplot(tbl, aes(x=Var1, y=Freq)) +
+        geom_col()+
+        xlab("major_allele/minor_allele")+
+        cowplot::theme_cowplot()+
+        ggtitle(title)
+}
diff --git a/README.Rmd b/README.Rmd
@@ -56,7 +56,7 @@ getID(stana)
 samples <- getSlot(stana, "snps")[[1]] |> colnames()
 metadata <- data.frame(
     row.names=samples,
-    treatment=sample(1:3, length(samples), replace=TRUE),
+    treatment=factor(sample(1:3, length(samples), replace=TRUE)),
     marker=runif(length(samples))
 )