Skip to content

Commit 666f9b5

Browse files
committed
NMF
1 parent 7080794 commit 666f9b5

File tree

2 files changed

+58
-25
lines changed

2 files changed

+58
-25
lines changed

R/NMF.R

Lines changed: 56 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
#' @param estimate estimate rank
1717
#' @param estimate_range range of ranks for the estimation
1818
#' @param nnlm_flag if TRUE, use NNLM package which can handle missing values in the data.
19-
#' The different approach for estimating k is taken.
19+
#' The different approach for estimating k is taken when estimate is TRUE.
20+
#' When estimate is TRUE, only the error matrix is returned.
2021
#' @param nnlm_args arguments passed to NNLM functions
2122
#' @import NMF
2223
#' @export
@@ -49,35 +50,57 @@ NMF <- function(stana, species, rank=3, target="KO", seed=53, method="snmf/r",
4950
nnlm_flag <- TRUE
5051
}
5152
}
52-
cat("Original features:", dim(mat)[1], "\n")
53-
cat("Original samples:", dim(mat)[2], "\n")
53+
cat_subtle("# Original features: ", dim(mat)[1], "\n", sep="")
54+
cat_subtle("# Original samples: ", dim(mat)[2], "\n", sep="")
5455
if (!nnlm_flag) {
5556
mat <- mat[apply(mat, 1, function(x) sum(x)!=0),]
5657
mat <- mat[,apply(mat, 2, function(x) sum(x)!=0)]
5758

58-
cat("Filtered features:", dim(mat)[1], "\n")
59-
cat("Filtered samples:", dim(mat)[2], "\n")
59+
cat_subtle("# Filtered features:", dim(mat)[1], "\n", sep="")
60+
cat_subtle("# Filtered samples:", dim(mat)[2], "\n", sep="")
6061
}
6162

6263
## Test multiple ranks
6364
if (estimate) {
64-
test <- nmfEstimateRank(as.matrix(mat),
65-
range=estimate_range, method=method)
66-
val <- test$measures[, "cophenetic"]
67-
b <- -1
68-
for (i in seq_along(val)) {
69-
if (is.na(val[i])) {
70-
next
71-
} else {
72-
if (val[i] > b) {
73-
b <- val[i]
74-
} else {
75-
break
76-
}
77-
}
78-
}
79-
rank <- estimate_range[i]
80-
cat("Chosen rank:", rank, "\n")
65+
if (nnlm_flag) {
66+
cat_subtle("# NNLM flag enabled, the cross-validation error matrix only will be returned.\n")
67+
## Following the vignette procedures in NNLM
68+
## Comparing the MSE and randomly assigned missing values
69+
A <- as.matrix(mat)
70+
already_na <- which(is.na(A))
71+
allind <- seq_len(length(A))
72+
newind <- allind[!(allind %in% already_na)]
73+
ind <- sample(newind, 0.1*length(newind));
74+
A2 <- A;
75+
A2[ind] <- NA;
76+
77+
err <- sapply(X = estimate_range,
78+
FUN = function(k) {
79+
z <- nnmf(A2, k);
80+
c(mean((with(z, W %*% H)[ind] - A[ind])^2), tail(z$mse, 1));
81+
}
82+
);
83+
return(err)
84+
} else {
85+
## Following the cophenetic correlation coefficient drop procedure
86+
test <- nmfEstimateRank(as.matrix(mat),
87+
range=estimate_range, method=method)
88+
val <- test$measures[, "cophenetic"]
89+
b <- -1
90+
for (i in seq_along(val)) {
91+
if (is.na(val[i])) {
92+
next
93+
} else {
94+
if (val[i] > b) {
95+
b <- val[i]
96+
} else {
97+
break
98+
}
99+
}
100+
}
101+
rank <- estimate_range[i]
102+
cat("Chosen rank:", rank, "\n")
103+
}
81104
}
82105

83106
cat("Rank", rank, "\n")
@@ -142,6 +165,12 @@ plotStackedBarPlot <- function(stana, sp, by="NMF") {
142165
}
143166

144167
#' alphaDiversityWithinSpecies
168+
#' @param stana stana object
169+
#' @param species species
170+
#' @param method method for vegan::diversity
171+
#' if `spc`, factor count will be returned.
172+
#' @param rank if NMF is not performed, this performs the NMF beforehand.
173+
#' rank can be specified here.
145174
#' @export
146175
alphaDiversityWithinSpecies <- function(stana, species, method="shannon", rank=5) {
147176
if (is.null(stana@NMF[[species]])) {
@@ -172,6 +201,7 @@ alphaDiversityWithinSpecies <- function(stana, species, method="shannon", rank=5
172201
#' @param species species ID
173202
#' @param tss perform total sum scaling
174203
#' @param return_data return only the data, not plot
204+
#' @param by NMF or coef matrix set to `coefMat` slot
175205
#' @export
176206
plotAbundanceWithinSpecies <- function(stana, species, tss=TRUE, return_data=FALSE, by="NMF") {
177207
if (by=="NMF") {
@@ -180,8 +210,10 @@ plotAbundanceWithinSpecies <- function(stana, species, tss=TRUE, return_data=FAL
180210
}
181211
res <- stana@NMF[[species]]
182212
H <- coef(res)
183-
} else {
213+
} else if (by=="coef") {
184214
H <- stana@coefMat[[species]]
215+
} else {
216+
stop("NMF or coef should be specified in `by`")
185217
}
186218

187219
if (tss) {
@@ -248,7 +280,7 @@ pathwayWithFactor <- function(stana, species, tss=FALSE, change_name=FALSE,
248280
})) %>% data.frame()
249281
row.names(pathdf) <- pathdf[,1]
250282
pathdf[,1] <- NULL
251-
colnames(pathdf) <- as.character(paste0("factor",seq_len(ncol(pathdf))))
283+
# colnames(pathdf) <- as.character(paste0("factor",seq_len(ncol(pathdf))))
252284
pathdf <- dplyr::mutate_all(pathdf, as.numeric)
253285
if (tss) {
254286
pathdf <- apply(pathdf, 2, function(x) x / sum(x))

man/NMF.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)