Skip to content

Commit

Permalink
added R package files
Browse files Browse the repository at this point in the history
  • Loading branch information
elior rahmani authored and elior rahmani committed May 15, 2019
1 parent 6dc0b33 commit 61df06d
Show file tree
Hide file tree
Showing 56 changed files with 910,190 additions and 0 deletions.
16 changes: 16 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Package: TCA
Type: Package
Title: Tensor Composition Analysis
Version: 1.0.0
Authors@R: person("Elior", "Rahmani", email = "[email protected]", role = c("aut", "cre"))
Author: Elior Rahmani [aut, cre]
Maintainer: Elior Rahmani <[email protected]>
Description: Tensor Composition Analysis (TCA) allows the deconvolution of two-dimensional data (features by observations) coming from a mixture of sources into a three-dimensional matrix of signals (features by observations by sources). TCA further allows to test the features in the data for different statistical relations with an outcome of interest while modeling source-specific effects (TCA regression); particularly, it allows to look for statistical relations between source-specific signals and an outcome. For example, TCA can deconvolve bulk tissue-level DNA methylation data (methylation sites by individuals) into a tensor of cell-type-specific methylation levels for each individual (methylation sites by individuals by cell types) and it allows to detect cell-type-specific relations (associations) with an outcome of interest.
License: GPL-3
Encoding: UTF-8
LazyData: true
Imports: config, data.table, futile.logger, gmodels, Matrix, matrixcalc, matrixStats, nloptr, parallel, pbapply, pracma, rsvd, stats, quadprog
RoxygenNote: 6.1.1
Depends: R (>= 3.4.0)
Suggests: testthat
URL: https://www.biorxiv.org/content/10.1101/437368v1
40 changes: 40 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Generated by roxygen2: do not edit by hand

export(refactor)
export(tca)
export(tcareg)
export(tcasub)
export(tensor)
export(test_data)
importFrom(data.table,fread)
importFrom(data.table,fwrite)
importFrom(futile.logger,appender.console)
importFrom(futile.logger,appender.tee)
importFrom(futile.logger,flog.appender)
importFrom(futile.logger,flog.debug)
importFrom(futile.logger,flog.info)
importFrom(futile.logger,flog.threshold)
importFrom(gmodels,fast.prcomp)
importFrom(matrixStats,colVars)
importFrom(matrixcalc,hadamard.prod)
importFrom(matrixcalc,matrix.inverse)
importFrom(nloptr,nloptr)
importFrom(parallel,clusterEvalQ)
importFrom(parallel,clusterExport)
importFrom(parallel,detectCores)
importFrom(parallel,makeCluster)
importFrom(parallel,stopCluster)
importFrom(pbapply,pblapply)
importFrom(pbapply,pboptions)
importFrom(pracma,Reshape)
importFrom(pracma,lsqlincon)
importFrom(pracma,repmat)
importFrom(rsvd,rpca)
importFrom(stats,lm)
importFrom(stats,logLik)
importFrom(stats,p.adjust)
importFrom(stats,pchisq)
importFrom(stats,residuals)
importFrom(stats,rnorm)
importFrom(stats,runif)
importFrom(stats,sd)
471 changes: 471 additions & 0 deletions R/TCA.R

Large diffs are not rendered by default.

710 changes: 710 additions & 0 deletions R/model_fit.R

Large diffs are not rendered by default.

229 changes: 229 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@

#' @importFrom futile.logger flog.appender
#' @importFrom futile.logger appender.tee
#' @importFrom futile.logger appender.console
#' @importFrom futile.logger flog.threshold
#' @importFrom futile.logger flog.debug
#' @importFrom parallel makeCluster
#' @importFrom parallel clusterEvalQ
#' @importFrom data.table fread
#' @importFrom data.table fwrite
#' @importFrom parallel stopCluster
#' @importFrom parallel detectCores
#' @importFrom stats lm
#' @importFrom stats logLik
#' @importFrom stats p.adjust
#' @importFrom stats pchisq
#' @importFrom stats residuals
#' @importFrom stats rnorm
#' @importFrom stats runif
#' @importFrom stats sd
start_logger <- function(log_file, debug){
config_level <- if (debug) "debug" else "default"
Sys.setenv(R_CONFIG_ACTIVE = config_level)
if (is.null(log_file)){
invisible(flog.appender(appender.console()))
}else{
invisible(flog.appender(appender.tee(log_file)))
}
invisible(flog.threshold(if(debug) "DEBUG" else "INFO"))
}


init_cluster <- function(num_cores = NULL){
flog.debug("Initiate cluster...")
cl <- makeCluster(get_num_cores(num_cores))
flog.debug("Parallel is on with %s nodes.",get_num_cores(num_cores))
invisible(clusterEvalQ(cl, c(library("pracma"), library("matrixcalc"), library("nloptr"), library("glmnet") )))
flog.debug("Packages were loaded into the cluster nodes.")
return(cl)
}


stop_cluster <- function(cl){
flog.debug("Stop cluster")
stopCluster(cl)
}


get_num_cores <- function(num_cores){
if (is.null(num_cores)){
return (detectCores() - 1)
}else{
return (num_cores)
}
}


assert <- function (expr, error) {
if (! expr) stop(error, call. = FALSE)
}


tca.validate_input <- function(X, W, C1, C2, refit_W, refit_features, refit_t, parallel, num_cores, max_iters, log_file, debug){

flog.debug("Validating input types...")
assert(is.matrix(X), "X must be of class 'matrix'")
assert(is.matrix(W), "W must be of class 'matrix'")
assert(is.null(C1) | is.matrix(C1), "C1 must be of class 'matrix' or NULL")
assert(is.null(C2) | is.matrix(C2), "C2 must be of class 'matrix' or NULL")
assert(is.null(refit_features) | is.character(refit_features), "refit_features must be of class 'character' or NULL")

assert(is.numeric(refit_t), "refit_t must be of class 'logical' or 'numeric'")
assert(is.numeric(max_iters), "max_iters must be of class 'numeric'")

assert(is.logical(refit_W), "refit_W must be of class 'logical'")
assert(is.logical(parallel), "parallel must be of class 'logical'")
assert(is.logical(debug), "debug must be of class 'logical'")
assert(is.character(log_file) | is.null(log_file), "log_file must be of class 'character' or NULL")

flog.debug("Validating input stucture and values...")
assert(!is.null(rownames(X)) | !is.null(colnames(X)), "X must have row names and column names")
assert(!is.null(rownames(W)) | !is.null(colnames(W)), "W must have row names and column names")
if (!is.null(C1)) assert(!is.null(rownames(C1)) | !is.null(colnames(C1)), "C1 must have row names and column names")
if (!is.null(C2)) assert(!is.null(rownames(C2)) | !is.null(colnames(C2)), "C2 must have row names and column names")

flog.debug("Validating input conditions...")
if (refit_W) assert(refit_t <= nrow(X) , "argument refit_t must satisfy refit_t <= nrow(X)")

flog.debug("Validating matrix dimensions...")
assert(dim(X)[2] == dim(W)[1] , "The number of columns in X is inconsistent with the number of rows in W")
if (!is.null(C1)) assert(dim(X)[2] == dim(C1)[1] , "The number of columns in X is inconsistent with the number of rows in C1")
if (!is.null(C2)) assert(dim(X)[2] == dim(C2)[1] , "The number of columns in X is inconsistent with the number of rows in C2")

flog.debug("Validating the order of observations across matrices...")
assert(all(colnames(X) == rownames(W)), "The order of observations in W (in the rows) must match the order of the observations in X (in the columns).")
if (!is.null(C1)) assert(all(colnames(X) == rownames(C1)), "The order of observations in C1 (in the rows) must match the order of the observations in X (in the columns).")
if (!is.null(C2)) assert(all(colnames(X) == rownames(C2)), "The order of observations in C2 (in the rows) must match the order of the observations in X (in the columns).")

flog.debug("Validating that W is non-negative and each row sums up to 1...")
assert(all(W >= 0), "The entries of W must be non-negative.")
assert(all(abs(rowSums(W) - 1) < 0.0001), "Each row in W must sum up to 1.")

}


tcasub.validate_input <- function(tca.mdl, features, log_file, debug){
flog.debug("Validating input...")
assert(is.list(tca.mdl) & is.matrix(tca.mdl[["W"]]) &
is.matrix(tca.mdl[["mus_hat"]]) &
is.matrix(tca.mdl[["sigmas_hat"]]) &
is.numeric(tca.mdl[["tau_hat"]]) &
is.matrix(tca.mdl[["deltas_hat"]]) &
is.matrix(tca.mdl[["gammas_hat"]]) &
is.matrix(tca.mdl[["C1"]]) &
is.matrix(tca.mdl[["C2"]]), "'tca.mdl' must be a returned value of the function 'tca'")
assert((!is.null(features)) & length(intersect(rownames(tca.mdl$mus), features)) == length(features), "'features' must be a subset of the features in 'tca.mdl'")
assert(is.logical(debug), "debug must be of class 'logical'")
assert(is.character(log_file) | is.null(log_file), "log_file must be of class 'character' or NULL")
}


tcareg.validate_input <- function(X, W, y, C3, test, null_model, alternative_model, save_results, output, sort_results, parallel, num_cores, log_file, features_metadata ,debug){

flog.debug("Validating input type for tcareg...")
options <- c("marginal","marginal_conditional","joint","single_effect","custom")
flog.debug("Validating input type...")
assert(is.matrix(X), "argument X must be of class 'matrix'")
assert(is.matrix(y), "argument y must be of class 'matrix'")
assert(is.null(C3) | is.matrix(C3), "argument C3 must be of class 'matrix' or NULL")

assert(is.element(test,options), paste("argument test must be one of the following options: ", paste(options, collapse=', ' ),sep = ""))

assert(is.null(num_cores) | is.numeric(num_cores), "argument num_cores must take a numric value or NULL")
assert(is.null(null_model) | is.character(null_model), "argument null_model must take a character value or NULL")
assert(is.null(alternative_model) | is.character(alternative_model), "argument alternative_model must take a character value or NULL")
assert(is.null(features_metadata) | is.character(features_metadata), "argument features_metadata must take a character value or NULL")
assert(is.character(output), "argument output must take a character value")

assert(is.logical(save_results), "argument save_results must take a logical value")
assert(is.logical(sort_results), "argument sort_results must take a logical value")
assert(is.logical(parallel), "argument parallel must take a logical value")
assert(is.logical(debug), "argument debug must take a logical value")
assert(is.character(log_file) | is.null(log_file), "argument log_file must take a character value or NULL")

flog.debug("Validating input stucture and values...")
assert(!is.null(rownames(X)) | !is.null(colnames(X)), "X must have row names and column names")
if (!is.null(C3)) assert(!is.null(rownames(C3)) | !is.null(colnames(C3)), "C3 must have row names and column names")
if (test == "custom") assert( (!is.null(alternative_model)) & (!is.null(null_model)), "arguments null_model and alternative_model cannot be NULL when test=custom")
if ( (!is.null(alternative_model)) | (!is.null(null_model))){
assert(test == "custom", "argument test must be set to 'custom' if arguments null_model and alternative_model are not NULL")
# make sure that null_model and alternative_model include labels that exist in the column names of W
assert(all(is.element(null_model, colnames(W))) & all(is.element(alternative_model, colnames(W))), "null_model and alternative_model must include values that exist in the column names of W")
# make sure the two models are nested
assert(length(setdiff(alternative_model, null_model)) > 0 & length(setdiff(null_model, alternative_model)) == 0, "null_model must be nested within alternative_model")
}

flog.debug("Validating matrix dimensions...")
if (!is.null(C3)) assert(dim(X)[2] == dim(C3)[1] , "the number of columns in X is inconsistent with the number of rows in C3")
assert(dim(X)[2] == length(y) , "the number of columns in X is inconsistent with the number of rows in y")

flog.debug("Validating the order of observations across matrices...")
assert(all(colnames(X) == rownames(W)), "The order of observations in W (in the rows) must match the order of the observations in X (in the columns).")
if (!is.null(C3)) assert(all(colnames(X) == rownames(C3)), "The order of observations in C3 (in the rows) must match the order of the observations in X (in the columns).")
assert(all(colnames(X) == rownames(y)), "The order of observations in y (in the rows) must match the order of the observations in X (in the columns).")

}


# log likelihood ratio test
# ll0 - vector with the log likelihood values for m tests under the null model
# ll1 - vector with the log likelihood values for m tests under the alternative model
# df - vector with the degrees of freedom for each of the m tests
lrt.test <- function(ll0, ll1, df){
flog.debug("Running lrt.test...")
m <- length(ll1)
stats = numeric(m)
pvals = numeric(m)
for (j in 1:m){
stats[j] <- -2*(ll0[j]-ll1[j])
pvals[j] <- pchisq(stats[j], df = df, lower.tail=FALSE)
}
return(list("stats" = stats, "pvals" = pvals))
}


save_association_results <- function(res, output, test, alternative_model, feature_ids, W_names, C3_names, sort_results, features_metadata){
flog.debug("Running save_association_results...")
m <- length(feature_ids)
metadata <- if (is.null(features_metadata)) matrix(0,m,0) else parse_features_metadata(feature_ids, features_metadata)
if (test == "marginal" | test == "marginal_conditional"){
# marginal or marginal_conditional
for (i in 1:length(res)){
filename <- paste(output, ".", test, ".", W_names[i], ".txt", sep ="")
wnames <- if (test == "marginal") W_names[i] else W_names
save_association_results.save(res[[i]], filename, feature_ids, sort_results, wnames, C3_names, metadata)
}
}else{
# joint, single_effect, or custom
filename <- paste(output, ".", test, ".txt", sep ="")
if (test == "custom") keep <- alternative_model
if (test == "joint") keep <- 1:length(W_names)
if (test == "single_effect") keep <- 1
save_association_results.save(res, filename, feature_ids, sort_results, W_names[keep], C3_names, metadata)
}
}


save_association_results.save <- function(res, filename, feature_ids, sort_results, W_names, C3_names, metadata){
flog.debug("save_association_results.save...")
#config <- config::get()
config <- config::get(file = system.file("extdata", "config.yml", package = "TCA"), use_parent = FALSE)
m <- length(feature_ids)
data <- data.frame(feature_ids, metadata, res$pvals, res$qvals, res$beta, res$null_ll, res$alternative_ll, res$stats, res$df, res$intercept, res$alpha)
betas <- if(length(W_names) == 1) "beta" else lapply(1:length(W_names), function(i) paste("beta.",W_names[i],sep=""))
colnames(data) <- c("ID", colnames(metadata), "pval", "qval", betas, "null_ll", "alternative_ll", "chi_squared", "df", "intercept", C3_names)
if (sort_results){
# sort by p-value
data <- data[order(data$pval),]
}
fwrite(data, file = filename, row.names = FALSE, col.names = TRUE, quote = FALSE, sep = config$sep, showProgress = config$show_progress, verbose = config$verbose)
}


parse_features_metadata <- function(feature_ids, features_metadata_file){
flog.debug("parse_features_metadata...")
metadata <- data.frame(fread(file = features_metadata_file, header=TRUE, showProgress=FALSE))
rownames(metadata) <- metadata[,1]
return(metadata[feature_ids,2:ncol(metadata)])
}
Loading

0 comments on commit 61df06d

Please sign in to comment.