16
16
# ' @param estimate estimate rank
17
17
# ' @param estimate_range range of ranks for the estimation
18
18
# ' @param nnlm_flag if TRUE, use NNLM package which can handle missing values in the data.
19
- # ' The different approach for estimating k is taken.
19
+ # ' The different approach for estimating k is taken when estimate is TRUE.
20
+ # ' When estimate is TRUE, only the error matrix is returned.
20
21
# ' @param nnlm_args arguments passed to NNLM functions
21
22
# ' @import NMF
22
23
# ' @export
@@ -49,35 +50,57 @@ NMF <- function(stana, species, rank=3, target="KO", seed=53, method="snmf/r",
49
50
nnlm_flag <- TRUE
50
51
}
51
52
}
52
- cat( " Original features:" , dim(mat )[1 ], " \n " )
53
- cat( " Original samples:" , dim(mat )[2 ], " \n " )
53
+ cat_subtle( " # Original features: " , dim(mat )[1 ], " \n " , sep = " " )
54
+ cat_subtle( " # Original samples: " , dim(mat )[2 ], " \n " , sep = " " )
54
55
if (! nnlm_flag ) {
55
56
mat <- mat [apply(mat , 1 , function (x ) sum(x )!= 0 ),]
56
57
mat <- mat [,apply(mat , 2 , function (x ) sum(x )!= 0 )]
57
58
58
- cat( " Filtered features:" , dim(mat )[1 ], " \n " )
59
- cat( " Filtered samples:" , dim(mat )[2 ], " \n " )
59
+ cat_subtle( " # Filtered features:" , dim(mat )[1 ], " \n " , sep = " " )
60
+ cat_subtle( " # Filtered samples:" , dim(mat )[2 ], " \n " , sep = " " )
60
61
}
61
62
62
63
# # Test multiple ranks
63
64
if (estimate ) {
64
- test <- nmfEstimateRank(as.matrix(mat ),
65
- range = estimate_range , method = method )
66
- val <- test $ measures [, " cophenetic" ]
67
- b <- - 1
68
- for (i in seq_along(val )) {
69
- if (is.na(val [i ])) {
70
- next
71
- } else {
72
- if (val [i ] > b ) {
73
- b <- val [i ]
74
- } else {
75
- break
76
- }
77
- }
78
- }
79
- rank <- estimate_range [i ]
80
- cat(" Chosen rank:" , rank , " \n " )
65
+ if (nnlm_flag ) {
66
+ cat_subtle(" # NNLM flag enabled, the cross-validation error matrix only will be returned.\n " )
67
+ # # Following the vignette procedures in NNLM
68
+ # # Comparing the MSE and randomly assigned missing values
69
+ A <- as.matrix(mat )
70
+ already_na <- which(is.na(A ))
71
+ allind <- seq_len(length(A ))
72
+ newind <- allind [! (allind %in% already_na )]
73
+ ind <- sample(newind , 0.1 * length(newind ));
74
+ A2 <- A ;
75
+ A2 [ind ] <- NA ;
76
+
77
+ err <- sapply(X = estimate_range ,
78
+ FUN = function (k ) {
79
+ z <- nnmf(A2 , k );
80
+ c(mean((with(z , W %*% H )[ind ] - A [ind ])^ 2 ), tail(z $ mse , 1 ));
81
+ }
82
+ );
83
+ return (err )
84
+ } else {
85
+ # # Following the cophenetic correlation coefficient drop procedure
86
+ test <- nmfEstimateRank(as.matrix(mat ),
87
+ range = estimate_range , method = method )
88
+ val <- test $ measures [, " cophenetic" ]
89
+ b <- - 1
90
+ for (i in seq_along(val )) {
91
+ if (is.na(val [i ])) {
92
+ next
93
+ } else {
94
+ if (val [i ] > b ) {
95
+ b <- val [i ]
96
+ } else {
97
+ break
98
+ }
99
+ }
100
+ }
101
+ rank <- estimate_range [i ]
102
+ cat(" Chosen rank:" , rank , " \n " )
103
+ }
81
104
}
82
105
83
106
cat(" Rank" , rank , " \n " )
@@ -142,6 +165,12 @@ plotStackedBarPlot <- function(stana, sp, by="NMF") {
142
165
}
143
166
144
167
# ' alphaDiversityWithinSpecies
168
+ # ' @param stana stana object
169
+ # ' @param species species
170
+ # ' @param method method for vegan::diversity
171
+ # ' if `spc`, factor count will be returned.
172
+ # ' @param rank if NMF is not performed, this performs the NMF beforehand.
173
+ # ' rank can be specified here.
145
174
# ' @export
146
175
alphaDiversityWithinSpecies <- function (stana , species , method = " shannon" , rank = 5 ) {
147
176
if (is.null(stana @ NMF [[species ]])) {
@@ -172,6 +201,7 @@ alphaDiversityWithinSpecies <- function(stana, species, method="shannon", rank=5
172
201
# ' @param species species ID
173
202
# ' @param tss perform total sum scaling
174
203
# ' @param return_data return only the data, not plot
204
+ # ' @param by NMF or coef matrix set to `coefMat` slot
175
205
# ' @export
176
206
plotAbundanceWithinSpecies <- function (stana , species , tss = TRUE , return_data = FALSE , by = " NMF" ) {
177
207
if (by == " NMF" ) {
@@ -180,8 +210,10 @@ plotAbundanceWithinSpecies <- function(stana, species, tss=TRUE, return_data=FAL
180
210
}
181
211
res <- stana @ NMF [[species ]]
182
212
H <- coef(res )
183
- } else {
213
+ } else if ( by == " coef " ) {
184
214
H <- stana @ coefMat [[species ]]
215
+ } else {
216
+ stop(" NMF or coef should be specified in `by`" )
185
217
}
186
218
187
219
if (tss ) {
@@ -248,7 +280,7 @@ pathwayWithFactor <- function(stana, species, tss=FALSE, change_name=FALSE,
248
280
})) %> % data.frame ()
249
281
row.names(pathdf ) <- pathdf [,1 ]
250
282
pathdf [,1 ] <- NULL
251
- colnames(pathdf ) <- as.character(paste0(" factor" ,seq_len(ncol(pathdf ))))
283
+ # colnames(pathdf) <- as.character(paste0("factor",seq_len(ncol(pathdf))))
252
284
pathdf <- dplyr :: mutate_all(pathdf , as.numeric )
253
285
if (tss ) {
254
286
pathdf <- apply(pathdf , 2 , function (x ) x / sum(x ))
0 commit comments