diff --git a/DESCRIPTION b/DESCRIPTION
index 44bccd9..66e7023 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,17 +1,18 @@
 Package: TCA
 Type: Package
 Title: Tensor Composition Analysis
-Version: 1.1.0
-Authors@R: person("Elior", "Rahmani", email = "elior.rahmani@gmail.com", role = c("aut", "cre"))
-Author: Elior Rahmani [aut, cre]
-Maintainer: Elior Rahmani <elior.rahmani@gmail.com>
-Description: Tensor Composition Analysis (TCA) allows the deconvolution of two-dimensional data (features by observations) coming from a mixture of sources into a three-dimensional matrix of signals (features by observations by sources). TCA further allows to test the features in the data for different statistical relations with an outcome of interest while modeling source-specific effects (TCA regression); particularly, it allows to look for statistical relations between source-specific signals and an outcome. For example, TCA can deconvolve bulk tissue-level DNA methylation data (methylation sites by individuals) into a tensor of cell-type-specific methylation levels for each individual (methylation sites by individuals by cell types) and it allows to detect cell-type-specific relations (associations) with an outcome of interest. For more details see Rahmani et al. (2018) <DOI:10.1101/437368>.
+Version: 1.2.1
+Authors@R: c(
+    person("Elior", "Rahmani", role=c("aut", "cre"), email = "elior.rahmani@gmail.com"),
+    person("Brandon", "Jew", role=c("ctb"))
+    )
+Description: Tensor Composition Analysis (TCA) allows the deconvolution of two-dimensional data (features by observations) coming from a mixture of heterogeneous sources into a three-dimensional matrix of signals (features by observations by sources). The TCA framework further allows to test the features in the data for different statistical relations with an outcome of interest while modeling source-specific effects; particularly, it allows to look for statistical relations between source-specific signals and an outcome. For example, TCA can deconvolve bulk tissue-level DNA methylation data (methylation sites by individuals) into a three-dimensional tensor of cell-type-specific methylation levels for each individual (i.e. methylation sites by individuals by cell types) and it allows to detect cell-type-specific statistical relations (associations) with phenotypes. For more details see Rahmani et al. (2019) <DOI:10.1038/s41467-019-11052-9>.
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
 Imports: config, data.table, futile.logger, gmodels, Matrix, matrixcalc, matrixStats, nloptr, parallel, pbapply, pracma, rsvd, stats, quadprog, glmnet
-RoxygenNote: 6.1.1
-Depends: R (>= 3.4.0)
+RoxygenNote: 7.1.1
+Depends: R (>= 3.5.0)
 Suggests: 
     testthat,
     knitr,
diff --git a/NAMESPACE b/NAMESPACE
index 40c282c..4b73fe2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -30,7 +30,9 @@ importFrom(pbapply,pboptions)
 importFrom(pracma,Reshape)
 importFrom(pracma,lsqlincon)
 importFrom(pracma,repmat)
+importFrom(pracma,sqrtm)
 importFrom(rsvd,rpca)
+importFrom(stats,anova)
 importFrom(stats,lm)
 importFrom(stats,logLik)
 importFrom(stats,p.adjust)
diff --git a/NEWS.md b/NEWS.md
index c4ab15e..a18feab 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,23 @@
+TCA v1.2.1 (Release date: 02-13-2021)
+==============
+
+*NOTE* v1.2.1 is not backward compatible with earlier versions
+
+Changes:
+
+* A new argument 'vars.mle' in 'tca' which controls the method used for optimization; setting to FALSE allows a much faster optimization.
+
+* A new argument 'fast_mode' in 'tcareg' allows a much faster optimization.
+
+* A new argument 'constrain_mu' in 'tca' allows to determine whether to constrain the mean parameters in the optimization; setting to false now allows to obtain p-values for the covariates in 'C1' and 'C2'.
+
+* A new 'scale' argument in 'tensor' for dividing estimates by estimated standard deviations.
+
+* Default values of several arguments have been changed.
+
+* Switched the default FDR method used in ‘tcareg’ back from “BY” to “BH” (can be changed by editing the config file).
+
+
 TCA v1.1.0 (Release date: 11-14-2019)
 ==============
 
diff --git a/R/TCA.R b/R/TCA.R
index d6418b5..cdcb0da 100644
--- a/R/TCA.R
+++ b/R/TCA.R
@@ -1,55 +1,60 @@
 #' @title Fitting the TCA model
 #'
-#' @description Fits the TCA model for an input matrix of observations coming from a mixture of \code{k} sources, under the assumption that each observation is a mixture of unique source-specific values (in each feature in the data). For example, in the context of tissue-level bulk DNA methylation data coming from a mixture of cell types (i.e. the input is methylation sites by individuals), \code{tca} allows to model the methylation of each individual as a mixture of cell-type-specific methylation levels that are unique to the individual.
+#' @description Fits the TCA model for an input matrix of features by observations that are coming from a mixture of \code{k} sources, under the assumption that each observation is a mixture of unique (unobserved) source-specific values (in each feature in the data). This function further allows to statistically test the effect of covariates on source-specific values. For example, in the context of tissue-level bulk DNA methylation data coming from a mixture of cell types (i.e. the input is methylation sites by individuals), \code{tca} allows to model the methylation of each individual as a mixture of cell-type-specific methylation levels that are unique to the individual. In addition, it allows to statistically test the effects of covariates and phenotypes on methylation at the cell-type level.
 #'
-#' @param X An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} different sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.
-#' @param W An \code{n} by \code{k} matrix of weights - the weights of \code{k} sources for each of the \code{n} mixtures (observations). All the weights must be positive and each row, corresponding to the weights of a single observation, must sum up to 1. Note that \code{W} must include row names and column names and that NA values are currently not supported. In case where only initial estimates of \code{W} are available, \code{tca} can be set to re-estimate \code{W} (see \code{refit_W}).
-#' @param C1 An \code{n} by \code{p1} design matrix of covariates that may affect the hidden source-specific values (possibly a different effect on each source). Note that \code{C1} must include row names and column names and should not include an intercept term. NA values are currently not supported.
-#' @param C1.map An \code{p1} by \code{k} matrix of 0/1 values, indicating for each of the \code{p1} covariates in \code{C1} whether to consider its potential effects on the values of each of the \code{k} sources (e.g., if position \code{i,j} in \code{C1.map} is 1 then the potential effect of the \code{i}-th covariate in \code{C1} on the {j}-th source will be considered). If \code{C1.map == NULL} then effects for all covariates in \code{C1} in each of the sources will be considered.
+#' @param X An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.
+#' @param W An \code{n} by \code{k} matrix of weights - the weights of \code{k} sources for each of the \code{n} mixtures (observations). All the weights must be positive and each row - corresponding to the weights of a single observation - must sum up to 1. Note that \code{W} must include row names and column names and that NA values are currently not supported. In cases where only noisy estimates of \code{W} are available, \code{tca} can be set to re-estimate \code{W} (see \code{refit_W}).
+#' @param C1 An \code{n} by \code{p1} design matrix of covariates that may affect the hidden source-specific values (possibly a different effect size in each source). Note that \code{C1} must include row names and column names and should not include an intercept term. NA values are currently not supported. Note that each covariate in \code{C1} results in \code{k} additional parameters in the model of each feature, therefore, in order to alleviate the possibility of model overfitting, it is advised to be mindful of the balance between the size of \code{C1} and the sample size in \code{X}.
+#' @param C1.map An \code{p1} by \code{k} matrix of 0/1 values, indicating for each of the \code{p1} covariates in \code{C1} whether to consider its potential effects on the values of each of the \code{k} sources (e.g., if position \code{i,j} in \code{C1.map} is 1 then the potential effect of the \code{i}-th covariate in \code{C1} on the {j}-th source will be considered). If \code{C1.map == NULL} then effects for all covariates in \code{C1} will be considered in each of the sources. Note that \code{C1.map} is available only if \code{constrain_mu == TRUE}.
 #' @param C2 An \code{n} by \code{p2} design matrix of covariates that may affect the mixture (i.e. rather than directly the sources of the mixture; for example, variables that capture biases in the collection of the measurements). Note that \code{C2} must include row names and column names and should not include an intercept term. NA values are currently not supported.
 #' @param refit_W A logical value indicating whether to re-estimate the input \code{W} under the TCA model.
-#' @param refit_W.features A vector with the names of the features in \code{X} to consider when re-estimating \code{W} (i.e. when \code{refit_W == TRUE}). This is useful since oftentimes just a subset of the features in \code{X} will be informative for estimating \code{W}. If \code{refit_W.features == NULL} then the ReFACTor algorithm will be used for performing feature selection (see also \code{refit_W.sparsity, refit_W.sd_threshold}).
+#' @param refit_W.features A vector with the names of the features in \code{X} to consider when re-estimating \code{W} (i.e. when \code{refit_W == TRUE}). This is useful since in general only a subset of the features in \code{X} are expected to be highly informative for estimating \code{W}. If \code{refit_W.features == NULL} then the ReFACTor algorithm will be used for performing feature selection (see also \code{refit_W.sparsity, refit_W.sd_threshold}).
 #' @param refit_W.sparsity A numeric value indicating the number of features to select using the ReFACTor algorithm when re-estimating \code{W} (activated only if \code{refit_W == TRUE} and \code{refit_W.features == NULL}). Note that \code{refit_W.sparsity} must be lower or equal to the number of features in \code{X}. For more information, see the argument \code{sparsity} in \link{refactor}.
 #' @param refit_W.sd_threshold A numeric value indicating a standard deviation threshold to be used for excluding low-variance features in \code{X} (activated only if \code{refit_W == TRUE} and \code{refit_W.features == NULL}). For more information, see the argument \code{sd_threshold} in \link{refactor}.
 #' @param tau A non-negative numeric value of the standard deviation of the measurement noise (i.e. the i.i.d. component of variation in the model). If \code{tau == NULL} then \code{tca} will estimate \code{tau}.
+#' @param vars.mle A logical value indicating whether to use maximum likelihood estimation when learning the variances in the model. If \code{vars.mle == FALSE} then \code{tca} will use a non-negative least-squares optimization for learning the variances. In practice, both approaches appear to provide highly correlated models, however, setting \code{vars.mle == FALSE}allows a substantial speedup.
+#' @param constrain_mu A logical value indicating whether to constrain the estimates of the mean parameters (i.e. \eqn{\{\mu_{hj}\}}; see details below), in which case they will be constrained to the range of the values in \code{X}. Note that if \code{constrain_mu == TRUE} then \code{tca} will not output p-values for the effect sizes of the covariates in \code{C1} and in \code{C2}.
 #' @param parallel A logical value indicating whether to use parallel computing (possible when using a multi-core machine).
 #' @param num_cores A numeric value indicating the number of cores to use (activated only if \code{parallel == TRUE}). If \code{num_cores == NULL} then all available cores except for one will be used.
-#' @param max_iters A numeric value indicating the maximal number of iterations to use in the optimization of the TCA model (\code{max_iters} iterations will be used as long as the optimization does not converge in earlier iterations).
-#' @param log_file A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == TRUE} then no output file will be generated regardless of the value of \code{log_file}.
-#' @param debug A logical value indicating whether to set the logger to a more detailed debug level; please set \code{debug} to \code{TRUE} before reporting issues.
+#' @param max_iters A numeric value indicating the maximal number of iterations to use in the optimization of the TCA model (\code{max_iters} iterations will be used as long as the optimization does not converge earlier).
+#' @param log_file A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == FALSE} then no output file will be generated regardless of the value of \code{log_file}.
+#' @param debug A logical value indicating whether to set the logger to a more detailed debug level; set \code{debug} to \code{TRUE} before reporting issues.
 #' @param verbose A logical value indicating whether to print logs.
 #'
 #' @importFrom futile.logger flog.info
 #' @importFrom pbapply pboptions
 #'
-#' @details The TCA model assumes that the hidden source-specific values are random variables. Formally, denote by \eqn{Z_{hj}^i} the source-specific value of observation \eqn{i} in feature \eqn{j} source \eqn{h}, the TCA model assumes: \deqn{Z_{hj}^i \sim N(\mu_{hj},\sigma_{hj}^2)} where \eqn{\mu_{hj},\sigma_{hj}} represent the mean and standard deviation that are specific to feature \eqn{j} source \eqn{h}. The model further assumes that the observed value of observation \eqn{i} in feature \eqn{j} is a mixture of \eqn{k} different sources: \deqn{X_{ji} =  \sum_{h=1}^k W_{ih}Z_{hj}^i + \epsilon_{ji}} where \eqn{W_{ih}} is the non-negative proportion of source \eqn{h} in the mixture of observation \eqn{i} such that \eqn{\sum_{h=1}^kW_{ih} = 1}, and \eqn{\epsilon_{ji} \sim N(0,\tau^2)} is an i.i.d. component of variation that models measurement noise. Note that the mixture proportions in \eqn{W} are, in general, unique for each individual, therefore each entry in the data matrix \eqn{X} is coming from a unique distribution (i.e. a different mean and a different variance).
+#' @details The TCA model assumes that the hidden source-specific values are random variables. Formally, denote by \eqn{Z_{hj}^i} the source-specific value of observation \eqn{i} in feature \eqn{j} source \eqn{h}, the TCA model assumes: \deqn{Z_{hj}^i \sim N(\mu_{hj},\sigma_{hj}^2)} where \eqn{\mu_{hj},\sigma_{hj}} represent the mean and standard deviation that are specific to feature \eqn{j}, source \eqn{h}. The model further assumes that the observed value of observation \eqn{i} in feature \eqn{j} is a mixture of \eqn{k} different sources: \deqn{X_{ji} =  \sum_{h=1}^k W_{ih}Z_{hj}^i + \epsilon_{ji}} where \eqn{W_{ih}} is the non-negative proportion of source \eqn{h} in the mixture of observation \eqn{i} such that \eqn{\sum_{h=1}^kW_{ih} = 1}, and \eqn{\epsilon_{ji} \sim N(0,\tau^2)} is an i.i.d. component of variation that models measurement noise. Note that the mixture proportions in \eqn{W} are, in general, unique for each individual, therefore each entry in \eqn{X} is coming from a unique distribution (i.e. a different mean and a different variance).
 #'
-#' In cases where the true \code{W} is unknown, \code{tca} can be provided with initial estimates of \code{W} and then re-estimate \code{W} as part of the optimization procedure (see argument \code{refit_W}). These initial estimates should not be random but rather capture the information in \code{W} to some extent. When the argument \code{refit_W} is used, it is typically the case that only a subset of the features should be used for re-estimating \code{W}. Therefore, when re-estimating \code{W}, \code{tca} performs feature selection using the ReFACTor algorithm; alternatively, it can also be provided with a user-specified list of features to be used in the re-estimation (see argument \code{refit_W.features}).
+#' In cases where the true \code{W} is unknown, \code{tca} can be provided with noisy estimates of \code{W} and then re-estimate \code{W} as part of the optimization procedure (see argument \code{refit_W}). These initial estimates should not be random but rather capture the information in \code{W} to some extent. When the argument \code{refit_W} is used, it is typically the case that only a subset of the features should be used for re-estimating \code{W}. Therefore, when re-estimating \code{W}, \code{tca} performs feature selection using the ReFACTor algorithm; alternatively, it can also be provided with a user-specified list of features to be used in the re-estimation, assuming that such list of features that are most informative for estimating \eqn{W} exist (see argument \code{refit_W.features}).
 #'
-#' Factors that systematically affect the source-specific values \eqn{Z_{hj}^i} can be further considered (see argument \code{C1}). In that case, we assume: \deqn{Z_{hj}^i \sim N(\mu_{hj}+c^{(1)}_i \gamma_j^h,\sigma_{hj}^2)} where \eqn{c^{(1)}_i} is a row vector from \code{C1}, corresponding to the values of the \eqn{p_1} factors for observation \eqn{i}, and \eqn{\gamma_j^h} is a vector of \eqn{p_1} corresponding effect sizes.
+#' Covariates that systematically affect the source-specific values \eqn{Z_{hj}^i} can be further considered (see argument \code{C1}). In that case, we assume: \deqn{Z_{hj}^i \sim N(\mu_{hj}+c^{(1)}_i \gamma_j^h,\sigma_{hj}^2)} where \eqn{c^{(1)}_i} and \eqn{\gamma_j^h} correspond to the \eqn{p_1} covariate values of observation \eqn{i} (i.e. a row vector from \code{C1}) and their effect sizes, respectively.
 #'
-#' Factors that systematically affect the mixture values \eqn{X_{ji}}, such as variables that capture biases in the collection of the measurements, can also be considered (see argument \code{C2}). In that case, we assume: \deqn{X_{ji} \sim \sum_{h=1}^k W_{ih}Z_{hj}^i + c^{(2)}_i \delta_j + \epsilon_{ij}} where \eqn{c^{(2)}_i} is a row vector from \code{C2}, corresponding to the values of the \eqn{p_2} factors for observation \eqn{i}, and \eqn{\delta_j} is a vector of \eqn{p_2} corresponding effect sizes.
+#' Covariates that systematically affect the mixture values \eqn{X_{ji}}, such as variables that capture technical biases in the collection of the measurements, can also be considered (see argument \code{C2}). In that case, we assume: \deqn{X_{ji} = \sum_{h=1}^k W_{ih}Z_{hj}^i + c^{(2)}_i \delta_j + \epsilon_{ij}} where \eqn{c^{(2)}_i} and \eqn{\delta_j} correspond to the \eqn{p_2} covariate values of observation \eqn{i} (i.e. a row vector from \code{C2}) and their effect sizes, respectively.
 #'
+#' Since the standard deviation of \eqn{X_{ji}} is specific to observation \eqn{i} and feature \eqn{j}, we can obtain p-values for the estimates of \eqn{\gamma_j^h} and \eqn{\delta_j} by dividing each observed data point \eqn{x_{ji}} by its estimated standard deviation and calculating T-statistics under a standard linear regression framework.
 #'
-#' @return A list with the estimated parameters of the model. This list can be then used as the input to other functions such as \code{tcareg}.
-#' \item{W}{An \code{n} by \code{k} matrix of weights. If \code{refit_W == TRUE} then this is the re-estimated \code{W}; otherwise this is the input \code{W}}
+#'
+#' @return A list with the estimated parameters of the model. This list can be then used as the input to other functions such as \link{tcareg}.
+#' \item{W}{An \code{n} by \code{k} matrix of weights. If \code{refit_W == TRUE} then this is the re-estimated \code{W}, and otherwise this is the input \code{W}}
 #' \item{mus_hat}{An \code{m} by \code{k} matrix of estimates for the mean of each source in each feature.}
 #' \item{sigmas_hat}{An \code{m} by \code{k} matrix of estimates for the standard deviation of each source in each feature.}
 #' \item{tau_hat}{An estimate of the standard deviation of the i.i.d. component of variation in \code{X}. If an input value was provided for \code{tau} (i.e. \code{tau != NULL}) then \code{tau_hat == tau}.}
-#' \item{gammas_hat}{An \code{m} by \code{k*p1} matrix of the estimated effects of the \code{p1} factors in \code{C1} on each of the \code{m} features in \code{X}, where the first \code{p1} columns are the source-specific effects of the \code{p1} factors on the first source, the following \code{p1} columns are the source-specific effects on the second source and so on.}
-#' \item{deltas_hat}{An \code{m} by \code{p2} matrix of the estimated effects of the \code{p2} factors in \code{C2} on the mixture values of each of the \code{m} features in \code{X}.}
+#' \item{gammas_hat}{An \code{m} by \code{k*p1} matrix of the estimated effects of the \code{p1} covariates in \code{C1} on each of the \code{m} features in \code{X}, where the first \code{p1} columns are the source-specific effects of the \code{p1} covariates on the first source, the following \code{p1} columns are the source-specific effects on the second source and so on.}
+#' \item{deltas_hat}{An \code{m} by \code{p2} matrix of the estimated effects of the \code{p2} covariates in \code{C2} on the mixture values of each of the \code{m} features in \code{X}.}
+#' \item{gammas_hat_pvals}{An \code{m} by \code{k*p1} matrix of p-values for the estimates in \code{gammas_hat} (based on a T-test). Note that \code{gammas_hat_pvals} is set to NULL if \code{constrain_mu == TRUE}.}
+#' \item{gammas_hat_pvals.joint}{An \code{m} by \code{p1} matrix of p-values for the joint effects (i.e. across all \code{k} sources) of each of the \code{p1} covariates in \code{C1} on each of the \code{m} features in \code{X} (based on a partial F-test). In other words, these are p-values for the combined statistical effects of each one of the \code{p1} covariates on each of the \code{m} features under the TCA model. Note that \code{gammas_hat_pvals.joint} is set to NULL if \code{constrain_mu == TRUE}.}
+#' \item{deltas_hat_pvals}{An \code{m} by \code{p2} matrix of p-values for the estimates in \code{deltas_hat} (based on a T-test). Note that \code{deltas_hat_pvals} is set to NULL if \code{constrain_mu == TRUE}.}
 #'
 #' @examples
 #' data <- test_data(100, 20, 3, 1, 1, 0.01)
 #' tca.mdl <- tca(X = data$X, W = data$W, C1 = data$C1, C2 = data$C2)
 #'
-#' @section Note: The function \code{tca} may require a long running time when the input matrix \code{X} is very large; to alleviate this, it is strongly advised to use the \code{parallel} argument, given that a multi-core machine is available.
-#'
-#' @references Rahmani E, Schweiger R, Rhead B, Criswell LA, Barcellos LF, Eskin E, Rosset S, Sankararaman S, Halperin E. Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology. Nature Communications 2018.
+#' @references Rahmani E, Schweiger R, Rhead B, Criswell LA, Barcellos LF, Eskin E, Rosset S, Sankararaman S, Halperin E. Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology. Nature Communications 2019.
 #' @references Rahmani E, Zaitlen N, Baran Y, Eng C, Hu D, Galanter J, Oh S, Burchard EG, Eskin E, Zou J, Halperin E. Sparse PCA corrects for cell type heterogeneity in epigenome-wide association studies. Nature Methods 2016.
 #'
 #' @export tca
-tca <- function(X, W, C1 = NULL, C1.map = NULL, C2 = NULL, refit_W = FALSE, refit_W.features = NULL, refit_W.sparsity = 500, refit_W.sd_threshold = 0.02, tau = NULL, parallel = FALSE, num_cores = NULL, max_iters = 10, log_file = "TCA.log", debug = FALSE, verbose = TRUE){
+tca <- function(X, W, C1 = NULL, C1.map = NULL, C2 = NULL, refit_W = FALSE, refit_W.features = NULL, refit_W.sparsity = 500, refit_W.sd_threshold = 0.02, tau = NULL, vars.mle = FALSE, constrain_mu = FALSE, parallel = FALSE, num_cores = NULL, max_iters = 10, log_file = "TCA.log", debug = FALSE, verbose = TRUE){
 
   start_logger(log_file, debug, verbose)
 
@@ -66,7 +71,7 @@ tca <- function(X, W, C1 = NULL, C1.map = NULL, C2 = NULL, refit_W = FALSE, refi
   C2 <- if (is.matrix(C2) | is.null(C2)) C2 else as.matrix(C2)
 
   flog.info("Validating input...")
-  tca.validate_input(X, W, C1, C1.map, C2, refit_W, refit_W.features, refit_W.sparsity, refit_W.sd_threshold, tau, parallel, num_cores, max_iters, log_file, debug)
+  tca.validate_input(X, W, C1, C1.map, C2, refit_W, refit_W.features, refit_W.sparsity, refit_W.sd_threshold, tau, constrain_mu, parallel, num_cores, max_iters, log_file, debug)
   if (is.null(C1)) C1 <- matrix(0, nrow=ncol(X), ncol=0)
   if (is.null(C2)) C2 <- matrix(0, nrow=ncol(X), ncol=0)
 
@@ -77,27 +82,29 @@ tca <- function(X, W, C1 = NULL, C1.map = NULL, C2 = NULL, refit_W = FALSE, refi
     flog.info("Starting re-estimation of W...")
     if (is.null(refit_W.features)){
       flog.info("Performing feature selection using refactor...")
-      #ref <- refactor(X, ncol(W), sparsity = refit_W.sparsity, C = if (ncol(cbind(C1,C2))) cbind(C1,C2) else NULL, sd_threshold = refit_W.sd_threshold, rand_svd = config$rand_svd, log_file = NULL)
       ref <- refactor.run(X = X, k = ncol(W), sparsity = refit_W.sparsity, C = if (ncol(cbind(C1,C2))) cbind(C1,C2) else NULL, C.remove = FALSE, sd_threshold = refit_W.sd_threshold, num_comp = NULL, rand_svd = config$rand_svd, log_file = NULL, logger_on = FALSE, verbose = verbose)
       refit_W.features <- ref$ranked_list[1:refit_W.sparsity]
     }
     X_sub <- subset(X, subset = rownames(X) %in% refit_W.features)
     flog.info("Fitting the TCA model using the selected features for re-estimating W...")
-    mdl0 <- tca.fit(X = t(X_sub), W = W, C1 = C1, C1.map = C1.map, C2 = C2, refit_W = TRUE, tau = tau, parallel = parallel, num_cores = num_cores, max_iters = max_iters)
+    mdl0 <- tca.fit(X = t(X_sub), W = W, C1 = C1, C1.map = C1.map, C2 = C2, refit_W = TRUE, tau = tau, vars.mle = vars.mle, constrain_mu = constrain_mu, parallel = parallel, num_cores = num_cores, max_iters = max_iters)
     W <- mdl0[["W"]]
     msg <- "Fitting the TCA model given the updated W..."
   }
   X <- t(X)
   flog.info(msg)
-  mdl <- tca.fit(X = X, W = W, C1 = C1, C1.map = C1.map, C2 = C2, refit_W = FALSE, tau = tau, parallel = parallel, num_cores = num_cores, max_iters = max_iters)
+  mdl <- tca.fit(X = X, W = W, C1 = C1, C1.map = C1.map, C2 = C2, refit_W = FALSE, tau = tau, vars.mle = vars.mle, constrain_mu = constrain_mu, parallel = parallel, num_cores = num_cores, max_iters = max_iters)
   W <- mdl[["W"]]
   mus_hat <- mdl[["mus_hat"]]
   sigmas_hat <- mdl[["sigmas_hat"]]
   tau_hat <- mdl[["tau_hat"]]
   deltas_hat <- mdl[["deltas_hat"]]
   gammas_hat <- mdl[["gammas_hat"]]
+  deltas_hat_pvals <- mdl[["deltas_hat_pvals"]]
+  gammas_hat_pvals <- mdl[["gammas_hat_pvals"]]
+  gammas_hat_pvals.joint <- mdl[["gammas_hat_pvals.joint"]]
   flog.info("Finished tca.")
-  return( list("W" = W, "mus_hat" = mus_hat, "sigmas_hat" = sigmas_hat, "tau_hat" = tau_hat, "deltas_hat" = deltas_hat, "gammas_hat" = gammas_hat, "C1" = C1, "C2" = C2) )
+  return (list("W" = W, "mus_hat" = mus_hat, "sigmas_hat" = sigmas_hat, "tau_hat" = tau_hat, "deltas_hat" = deltas_hat, "gammas_hat" = gammas_hat, "deltas_hat_pvals" = deltas_hat_pvals, "gammas_hat_pvals" = gammas_hat_pvals, "gammas_hat_pvals.joint" = gammas_hat_pvals.joint, "C1" = C1, "C2" = C2) )
 }
 
 
@@ -107,8 +114,8 @@ tca <- function(X, W, C1 = NULL, C1.map = NULL, C2 = NULL, refit_W = FALSE, refi
 #'
 #' @param tca.mdl The value returned by applying the function \code{tca} to some data matrix\code{X}.
 #' @param features A vector with the identifiers of the features to extract (as they appear in the rows of \code{X}).
-#' @param log_file A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == TRUE} then no output file will be generated regardless of the value of \code{log_file}.
-#' @param debug A logical value indicating whether to set the logger to a more detailed debug level; please set \code{debug} to \code{TRUE} before reporting issues.
+#' @param log_file A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == FALSE} then no output file will be generated regardless of the value of \code{log_file}.
+#' @param debug A logical value indicating whether to set the logger to a more detailed debug level; set \code{debug} to \code{TRUE} before reporting issues.
 #' @param verbose A logical value indicating whether to print logs.
 #'
 #' @importFrom futile.logger flog.info
@@ -122,14 +129,18 @@ tca <- function(X, W, C1 = NULL, C1.map = NULL, C2 = NULL, refit_W = FALSE, refi
 #' \item{tau_hat}{Equals to \code{tca.mdl$tau_hat}}
 #' \item{gammas_hat}{A \code{q} by \code{k*p1}  matrix which is a subset of the matrix \code{tca.mdl$gammas_hat}, where \code{q} is the number of features in the argument \code{features}.}
 #' \item{deltas_hat}{A \code{q} by \code{p2} matrix which is a subset of the matrix \code{tca.mdl$deltas_hat}, where \code{q} is the number of features in the argument \code{features}.}
+#' \item{gammas_hat_pvals}{A \code{q} by \code{k*p1} matrix which is a subset of the matrix \code{tca.mdl$gammas_hat_pvals}, where \code{q} is the number of features in the argument \code{features}. Note that if \code{tca.mdl$gammas_hat_pvals == NULL} then \code{gammas_hat_pvals} is also set to NULL.}
+#' \item{gammas_hat_pvals.joint}{A \code{q} by \code{p1} matrix which is a subset of the matrix \code{tca.mdl$gammas_hat_pvals.joint}, where \code{q} is the number of features in the argument \code{features}. Note that if \code{tca.mdl$gammas_hat_pvals.joint == NULL} then \code{gammas_hat_pvals} is also set to NULL.}
+#' \item{deltas_hat_pvals}{A \code{q} by \code{p2} matrix which is a subset of the matrix \code{tca.mdl$deltas_hat_pvals}, where \code{q} is the number of features in the argument \code{features}. Note that if \code{tca.mdl$deltas_hat_pvals == NULL} then \code{deltas_hat_pvals} is also set to NULL.}
 #'
 #' @examples
 #' data <- test_data(50, 20, 3, 0, 0, 0.01)
 #' tca.mdl <- tca(X = data$X, W = data$W)
 #' tca.mdl.subset <- tcasub(tca.mdl, rownames(data$X)[1:10])
 #' y <- matrix(rexp(50, rate=.1), ncol=1)
+#' rownames(y) <- rownames(data$W)
 #' # run tcareg test with an outcome y:
-#' res <- tcareg(data$X[1:10,], tca.mdl.subset, y, test = "joint", save_results = FALSE)
+#' res <- tcareg(data$X[1:10,], tca.mdl.subset, y, test = "joint")
 #'
 #' @export tcasub
 tcasub <- function(tca.mdl, features, log_file = "TCA.log", debug = FALSE, verbose = TRUE){
@@ -138,18 +149,16 @@ tcasub <- function(tca.mdl, features, log_file = "TCA.log", debug = FALSE, verbo
   flog.info("Starting tcasub...")
   tcasub.validate_input(tca.mdl, features, log_file, debug)
   flog.info("Finished tcasub.")
-  if (length(features) == 1){
-    mus_hat <- t(as.matrix(tca.mdl[["mus_hat"]][features,]))
-    sigmas_hat <- t(as.matrix(tca.mdl[["sigmas_hat"]][features,]))
-    deltas_hat <- t(as.matrix(tca.mdl[["deltas_hat"]][features,]))
-    gammas_hat <- t(as.matrix(tca.mdl[["gammas_hat"]][features,]))
-  }else{
-    mus_hat <- as.matrix(tca.mdl[["mus_hat"]][features,])
-    sigmas_hat <- as.matrix(tca.mdl[["sigmas_hat"]][features,])
-    deltas_hat <- as.matrix(tca.mdl[["deltas_hat"]][features,])
-    gammas_hat <- as.matrix(tca.mdl[["gammas_hat"]][features,])
-  }
-  return( list("W" = tca.mdl[["W"]], "mus_hat" = mus_hat, "sigmas_hat" = sigmas_hat, "tau_hat" = tca.mdl[["tau_hat"]], "deltas_hat" = deltas_hat, "gammas_hat" = gammas_hat, "C1" = as.matrix(tca.mdl[["C1"]]), "C2" = as.matrix(tca.mdl[["C2"]])) )
+
+  mus_hat <- as.matrix(tca.mdl[["mus_hat"]][features,,drop=F])
+  sigmas_hat <- as.matrix(tca.mdl[["sigmas_hat"]][features,,drop=F])
+  deltas_hat <- as.matrix(tca.mdl[["deltas_hat"]][features,,drop=F])
+  gammas_hat <- as.matrix(tca.mdl[["gammas_hat"]][features,,drop=F])
+  deltas_hat_pvals <- if(is.null(tca.mdl[["deltas_hat_pvals"]])) NULL else as.matrix(tca.mdl[["deltas_hat_pvals"]][features,,drop=F])
+  gammas_hat_pvals <- if(is.null(tca.mdl[["gammas_hat_pvals"]])) NULL else as.matrix(tca.mdl[["gammas_hat_pvals"]][features,,drop=F])
+  gammas_hat_pvals.joint <- if (is.null(tca.mdl[["gammas_hat_pvals.joint"]])) NULL else as.matrix(tca.mdl[["gammas_hat_pvals.joint"]][features,,drop=F])
+
+  return( list("W" = tca.mdl[["W"]], "mus_hat" = mus_hat, "sigmas_hat" = sigmas_hat, "tau_hat" = tca.mdl[["tau_hat"]], "deltas_hat" = deltas_hat, "gammas_hat" = gammas_hat, "deltas_hat_pvals" = deltas_hat_pvals, "gammas_hat_pvals" = gammas_hat_pvals, "gammas_hat_pvals.joint" = gammas_hat_pvals.joint, "C1" = as.matrix(tca.mdl[["C1"]]), "C2" = as.matrix(tca.mdl[["C2"]])) )
 
 }
 
@@ -158,15 +167,16 @@ tcasub <- function(tca.mdl, features, log_file = "TCA.log", debug = FALSE, verbo
 #'
 #' @description Estimates 3-dimensional signals (features by observations by sources) from input of mixtures (features by observations), under the assumption of the TCA model that each observation is a mixture of unique source-specific values (in each feature in the data). For example, in the context of  tissue-level bulk DNA methylation data coming from a mixture of cell types (i.e. the input is methylation sites by individuals), \code{tensor} allows to estimate a tensor of cell-type-specific levels for each individual in each methylation site (i.e. a tensor of methylation sites by individuals by cell types).
 #'
-#' @param X An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} different sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.
+#' @param X An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.
 #' @param tca.mdl The value returned by applying the function \code{tca} to \code{X}.
+#' @param scale A logical value indicating whether to divide the estimate of each entry in the tensor by its estimated standard deviation.
 #' @param parallel A logical value indicating whether to use parallel computing (possible when using a multi-core machine).
 #' @param num_cores A numeric value indicating the number of cores to use (activated only if \code{parallel == TRUE}). If \code{num_cores == NULL} then all available cores except for one will be used.
-#' @param log_file A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == TRUE} then no output file will be generated regardless of the value of \code{log_file}.
-#' @param debug A logical value indicating whether to set the logger to a more detailed debug level; please set \code{debug} to \code{TRUE} before reporting issues.
+#' @param log_file A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == FALSE} then no output file will be generated regardless of the value of \code{log_file}.
+#' @param debug A logical value indicating whether to set the logger to a more detailed debug level; set \code{debug} to \code{TRUE} before reporting issues.
 #' @param verbose A logical value indicating whether to print logs.
 #'
-#' @details See \link{tca} for notations and details about the TCA model. Given estimates of the parameters of the model (given by \link{tca}), \code{tensor} uses the conditional distribution \eqn{Z_{hj}^i|X_{ji}} for estimating the \eqn{k} source-specific levels of each observation \eqn{i} in each feature \eqn{j}.
+#' @details See \link{tca} for notations and details about the TCA model. Given estimates of the parameters of the model (given by \link{tca}), \code{tensor} uses the conditional distribution \eqn{Z_{hj}^i|X_{ji}=x_{ji}} for estimating the \eqn{k} source-specific levels of each observation \eqn{i} in each feature \eqn{j}.
 #
 #' @return A list with the estimated source-specific values. The first element in the list is an \code{m} by \code{n} matrix (features by observations) corresponding to the estimated values coming from the first source, the second element in the list is another \code{m} by \code{n} matrix (features by observations) corresponding to the estimated values coming from the second source and so on.
 #'
@@ -175,15 +185,18 @@ tcasub <- function(tca.mdl, features, log_file = "TCA.log", debug = FALSE, verbo
 #' tca.mdl <- tca(X = data$X, W = data$W, C1 = data$C1, C2 = data$C2)
 #' Z_hat <- tensor(data$X, tca.mdl)
 #'
-#' @references Rahmani E, Schweiger R, Rhead B, Criswell LA, Barcellos LF, Eskin E, Rosset S, Sankararaman S, Halperin E. Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology. Nature Communications 2018.
+#' @references Rahmani E, Schweiger R, Rhead B, Criswell LA, Barcellos LF, Eskin E, Rosset S, Sankararaman S, Halperin E. Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology. Nature Communications 2019.
 #'
 #' @export tensor
-tensor <- function(X, tca.mdl, parallel = FALSE, num_cores = NULL, log_file = "TCA.log", debug = FALSE, verbose = TRUE){
+tensor <- function(X, tca.mdl, scale = FALSE, parallel = FALSE, num_cores = NULL, log_file = "TCA.log", debug = FALSE, verbose = TRUE){
 
   start_logger(log_file, debug, verbose)
 
   config <- config::get(file = system.file("extdata", "config.yml", package = "TCA"), use_parent = FALSE)
 
+  flog.info("Validating input...")
+  tensor.validate_input(X, scale, parallel, num_cores, log_file, debug)
+
   # progress bar options
   op <- if (verbose) pboptions(nout = config$nout, type = config[["type"]]) else pboptions(nout = config$nout, type = "none")
 
@@ -200,39 +213,43 @@ tensor <- function(X, tca.mdl, parallel = FALSE, num_cores = NULL, log_file = "T
   C1 <- tca.mdl[["C1"]]
   C2 <- tca.mdl[["C2"]]
 
-  return( estimate_Z(t(X), W, mus_hat, sigmas_hat, tau_hat, C2, deltas_hat, C1, gammas_hat, parallel, num_cores) )
+  return( estimate_Z(t(X), W, mus_hat, sigmas_hat, tau_hat, C2, deltas_hat, C1, gammas_hat, scale, parallel, num_cores) )
 }
 
 
 
 #' @title Fitting a TCA regression model
 #'
-#' @description TCA regression allows to test for several types of statistical relations between source-specific values and an outcome of interest. For example, in the context of tissue-level bulk DNA methylation data coming from a mixture of cell types (i.e. the input is methylation sites by individuals), \code{tcareg} allows to test for cell-type-specific effects of methylation on an outcome of interest.
+#' @description TCA regression allows to test, under several types of statistical tests, the effects of source-specific values on an outcome of interest (or on mediating components thereof). For example, in the context of tissue-level bulk DNA methylation data coming from a mixture of cell types (i.e. the input is methylation sites by individuals), \code{tcareg} allows to test for cell-type-specific effects of methylation on outcomes of interest (or on mediating components thereof).
 #'
-#' @param X An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} different sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.
-#' @param tca.mdl The value returned by applying the function \code{tca} to \code{X}.
+#' @param X An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.
+#' @param tca.mdl The value returned by applying \link{tca} to \code{X}.
 #' @param y An \code{n} by 1 matrix of an outcome of interest for each of the \code{n} observations in \code{X}. Note that \code{y} must include row names and column names and that NA values are currently not supported.
 #' @param C3 An \code{n} by \code{p3} design matrix of covariates that may affect \code{y}. Note that \code{C3} must include row names and column names and should not include an intercept term. NA values are currently not supported.
 #' @param test A character vector with the type of test to perform on each of the features in \code{X}; one of the following options: \code{'marginal'}, \code{'marginal_conditional'}, \code{'joint'}, \code{'single_effect'}, or \code{'custom'}. Setting \code{'marginal'} or \code{'marginal_conditional'} corresponds to testing each feature in \code{X} for a statistical relation between \code{y} and each of the \code{k} sources separately; for any particular source under test, the \code{marginal_conditional} option further accounts for possible effects of the rest of the \code{k-1} sources (\code{'marginal'} will therefore tend to be more powerful in discovering truly related features, but at the same time more prone to falsely tagging the correct related sources if sources are highly correlated). Setting \code{'joint'} or \code{'single_effect'} corresponds to testing each feature for an overall statistical relation with \code{y}, while modeling source-specific effects; the latter option further assumes that the source-specific effects are the same within each feature (\code{'single_effect'} means only one degree of freedom and will therefore be more powerful when the assumption of a single effect within a feature holds). Finally, \code{'custom'} corresponds to testing each feature in \code{X} for a statistical relation with \code{y} under a user-specified model (alternative model) with respect to a null model (null model); for example, for testing for relation of the combined (potentially different) effects of features 1 and 2 while accounting for the (potentially different) effects of 3 and 4, set the null model to be sources 3, 4 and the alternative model to be sources 1, 2, 3, 4. Indicating that \code{null_model} assumes no effect for any of the sources can be done by setting it to \code{NULL}.
 #' @param null_model A vector with a subset of the names of the sources in \code{tca.mdl$W} to be used as a null model (activated only if \code{test == 'custom'}). Note that the null model must be nested within the alternative model; set \code{null_model} to be \code{NULL} for indicating no effect for any of the sources under the null model.
 #' @param alternative_model A vector with a subset (or all) of the names of the sources in \code{tca.mdl$W} to be used as an alternative model (activated only if \code{test == 'custom'}).
-#' @param save_results A logical value indicating whether to save the returned results in a file. If \code{TRUE} and \code{test == 'marginal'} or \code{test == 'marginal_conditional'} then \code{k} files will be saved (one for the results of each source).
+#' @param save_results A logical value indicating whether to save the returned results in a file. If \code{test == 'marginal'} or (\code{fast_mode == TRUE} and \code{test == 'marginal_conditional'}) then \code{k} files will be saved (one for the results of each source).
+#' @param fast_mode A logical value indicating whether to use a fast version of TCA regression, in which source-specific-values are first estimated using the \code{tensor} function and then tested under a standard regression framework (see more details below).
 #' @param output Prefix for output files (activated only if \code{save_results == TRUE}).
-#' @param sort_results A logical value indicating whether to sort the results by their p-value (i.e. features with lower p-value will appear first in the results).
+#' @param sort_results A logical value indicating whether to sort the results by their p-value (i.e. features with lower p-value will appear first in the results). This option is not available if \code{fast_mode == TRUE} and \code{test == "marginal_conditional"}.
 #' @param parallel A logical value indicating whether to use parallel computing (possible when using a multi-core machine).
 #' @param num_cores A numeric value indicating the number of cores to use (activated only if \code{parallel == TRUE}). If \code{num_cores == NULL} then all available cores except for one will be used.
-#' @param log_file A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == TRUE} then no output file will be generated regardless of the value of \code{log_file}.
-#' @param features_metadata A path to a csv file containing metadata about the features in \code{X} that will be added to the output files (activated only if \code{save_results == TRUE}). Each row in the metadata file should correspond to one feature (with the row name being the feature identifier, as it appears in the rows of \code{X}) and each column should correspond to one metadata descriptor (with an appropriate column name). Features that do not exist in \code{X} will be ignored and features in \code{X} with missing metadata information will show missing values.
-#' @param debug A logical value indicating whether to set the logger to a more detailed debug level; please set \code{debug} to \code{TRUE} before reporting issues.
+#' @param log_file A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == FALSE} then no output file will be generated regardless of the value of \code{log_file}.
+#' @param features_metadata A path to a csv file containing metadata about the features in \code{X} that will be added to the output files (activated only if \code{save_results == TRUE}). Each row in the metadata file should correspond to one feature in \code{X} (with the row name being the feature identifier, as it appears in the rows of \code{X}) and each column should correspond to one metadata descriptor (with an appropriate column name). Features that do not exist in \code{X} will be ignored and features in \code{X} with missing metadata information will show missing values.
+#' @param debug A logical value indicating whether to set the logger to a more detailed debug level; set \code{debug} to \code{TRUE} before reporting issues.
 #' @param verbose A logical value indicating whether to print logs.
 #'
-#' @details TCA models \eqn{Z_{hj}^i} as the source-specific value of observation \eqn{i} in feature \eqn{j} coming from source \eqn{h} (see \link{tca} for more details). A TCA regression model tests an outcome \eqn{Y} for a linear statistical relation with the source-specific values of a feature \eqn{j} by assuming: \deqn{Y_i = \sum_{h=1}^k \beta_{hj} Z_{hj}^i + e_i} where \eqn{e_i \sim N(0,\phi^2)}. In practice, \code{tcareg} fits this model using the conditional distribution \eqn{Y|X}, which, effectively, integrates over the latent \eqn{Z_{hj}^i} parameters. Statistical significance is then calculated using a likelihood ratio test (LRT). Note that the null and alternative models will be set automatically, except when \code{test == 'custom'}, in which case they will be set according to the user-specified null and alternative hypotheses.
+#' @details TCA models \eqn{Z_{hj}^i} as the source-specific value of observation \eqn{i} in feature \eqn{j} coming from source \eqn{h} (see \link{tca} for more details). A TCA regression model tests an outcome \eqn{Y} for a linear statistical relation with the source-specific values of a feature \eqn{j} by assuming: \deqn{Y_i =  \alpha_{j,0} + \sum_{h=1}^k\beta_{hj} Z_{hj}^i + c_i^{(3)}\alpha_{j}  + e_i} where \eqn{\alpha_{j,0}} is an intercept term, \eqn{\beta_{hj}} is the effect of source \eqn{h}, \eqn{c_i^{(3)}} and \eqn{\alpha_j} correspond to the \eqn{p_3} covariate values of observation \eqn{i} (i.e. a row vector from \code{C3}) and their effect sizes, respectively, and \eqn{e_i \sim N(0,\phi^2)}. In practice, if \code{fast_mode == FALSE} then \code{tcareg} fits this model using the conditional distribution \eqn{Y|X}, which, effectively, integrates over the random \eqn{Z_{hj}^i}. Statistical significance is then calculated using a likelihood ratio test (LRT).
+#' Alternatively, in case \code{fast_mode == TRUE} the above model is fitted by first learning point estimates for \eqn{Z_{hj}^i} using the \link{tensor} function and then assessing statistical significance using T-tests and partial F-tests under a standard regression framework. This alternative provides a substantial boost in speed.
 #'
-#' Under the TCA regression model, several statistical tests can be performed by setting the argument \code{test} according to one of the following options.
+#' Note that the null and alternative models will be set automatically, except when \code{test == 'custom'}, in which case they will be set according to the user-specified null and alternative hypotheses.
+#'
+#' Under the TCA regression model, several statistical tests can be performed by setting the argument \code{test} according to one of the following options:
 #'
 #' 1. If \code{test == 'marginal'}, \code{tcareg} will perform the following for each source \eqn{l}. For each feature \eqn{j}, \eqn{\beta_{lj}} will be estimated and tested for a non-zero effect, while assuming \eqn{\beta_{hj}=0} for all other sources \eqn{h\neq l}.
 #'
-#' 2. If \code{test == 'marginal_conditional'}, \code{tcareg} will perform the following for each source \eqn{l}. For each feature \eqn{j}, \eqn{\beta_{lj}} will be estimated and tested for a non-zero effect, while also estimating the effect sizes \eqn{\beta_{hj}} for all other sources \eqn{h\neq l}.
+#' 2. If \code{test == 'marginal_conditional'}, \code{tcareg} will perform the following for each source \eqn{l}. For each feature \eqn{j}, \eqn{\beta_{lj}} will be estimated and tested for a non-zero effect, while also estimating the effect sizes \eqn{\beta_{hj}} for all other sources \eqn{h\neq l} (thus accounting for covariances between the estimated effects of different sources).
 #'
 #' 3. If \code{test == 'joint'}, \code{tcareg} will estimate for each feature \eqn{j} the effect sizes of all \eqn{k} sources \eqn{\beta_{1j},…,\beta_{kj}} and then test the set of \eqn{k} estimates of each feature \code{j} for a joint effect.
 #'
@@ -240,19 +257,17 @@ tensor <- function(X, tca.mdl, parallel = FALSE, num_cores = NULL, log_file = "T
 #'
 #' 5. If \code{test == 'custom'}, \code{tcareg} will estimate for each feature \eqn{j} the effect sizes of a predefined set of sources (defined by a user-specified alternative model) and then test their estimates for a joint effect, while accounting for a nested predefined set of sources (defined by a user-specified null model).
 #'
-#' @return A list with the results of applying the TCA regression model to each of the features in the data. If \code{test == 'marginal'} or \code{test == 'marginal_conditional'} then a list of \code{k} such lists of results are returned, one for the results of each source.
+#' @return A list with the results of applying the TCA regression model to each of the features in \code{X}. If \code{test == 'marginal'} or (\code{test == 'marginal_conditional'} and \code{fast_mode == FALSE}) then a list of \code{k} such lists of results are returned, one for the results of each source.
 #' \item{phi}{An estimate of the standard deviation of the i.i.d. component of variation in the TCA regression model.}
-#' \item{beta}{A matrix of effect size estimates for the source-specific effects, such that each row corresponds to the estimated effect sizes of one feature. The number of columns corresponds to the number of estimated effects (e.g., if \code{test} is set to \code{marginal} then \code{beta} will include a single column, if \code{test} is set to \code{joint} then \code{beta} will include \code{k} columns and so on).}
-#' \item{intercept}{An \code{m} by \code{1} matrix of estimates for the intercept of each feature.}
-#' \item{alpha}{An \code{m} by \code{p3} matrix of effect size estimates for the \code{p3} covariates in \code{C3}, such that each row corresponds to the estimated effect sizes of one feature.}
-#' \item{null_ll}{An \code{m} by \code{1} matrix of the log-likelihood of the model under the null hypothesis.}
+#' \item{beta}{A matrix of effect size estimates for the source-specific effects, such that each row corresponds to the estimated effect sizes of one feature in \code{X}. The number of columns corresponds to the number of estimated effects (e.g., if \code{test} is set to \code{marginal} then \code{beta} will include a single column, if \code{test} is set to \code{joint} then \code{beta} will include \code{k} columns etc.).}
+#' \item{intercept}{An \code{m} by \code{1} matrix of estimates for the intercept term of each feature.}
+#' \item{alpha}{An \code{m} by \code{p3} matrix of effect size estimates for the \code{p3} covariates in \code{C3}, such that each row corresponds to the estimated effect sizes of one feature in \code{X}.}
+#' \item{null_ll}{An \code{m} by \code{1} matrix of the log-likelihood of the model under the null hypothesis. Returned only if \code{fast_mode == FALSE}.}
 #' \item{alternative_ll}{An \code{m} by \code{1} matrix of the log-likelihood of the model under the alternative hypothesis.}
-#' \item{stats}{An \code{m} by \code{1} matrix of the LRT statistic for each feature in the data.}
-#' \item{df}{The degrees of freedom for deriving p-values using LRT.}
-#' \item{pvals}{An \code{m} by \code{1} matrix of the p-value for each feature in the data.}
-#' \item{qvals}{An \code{m} by \code{1} matrix of the q-value (FDR-adjusted p-values) for each feature in the data.}
-#'
-#' @section Note: The function \code{tcareg} may require a long running time when the input matrix \code{X} is very large; to alleviate this, it is strongly advised to use the \code{parallel} argument, given that a multi-core machine is available.
+#' \item{stats}{An \code{m} by \code{k} matrix of T statistics for each source in each feature in \code{X} assuming \code{test == "marginal_conditional"} and \code{fast_mode == TRUE}; otherwise, an \code{m} by \code{1} matrix of an (partial) F statistic (if \code{fast_mode == TRUE}) or a likelihood-ratio test statistic (if \code{fast_mode == FALSE}) for each feature in \code{X}.}
+#' \item{df}{The degrees of freedom for deriving p-values.}
+#' \item{pvals}{An \code{m} by \code{k} matrix of p-values for each source in each feature in \code{X} assuming \code{test == "marginal_conditional"} and \code{fast_mode == TRUE}; otherwise, an \code{m} by \code{1} matrix of the p-value for each feature in \code{X}.}
+#' \item{qvals}{An \code{m} by \code{k} matrix of q-values (FDR-adjusted p-values) for each source in each feature in \code{X} assuming \code{test == "marginal_conditional"} and \code{fast_mode == TRUE}; otherwise, an \code{m} by \code{1} matrix of the q-value for each feature in \code{X}. Note that if \code{test == "marginal_conditional"} and \code{fast_mode == TRUE} then q-values are calculated for each source separately.}
 #'
 #' @examples
 #' n <- 50
@@ -263,20 +278,23 @@ tensor <- function(X, tca.mdl, parallel = FALSE, num_cores = NULL, log_file = "T
 #' data <- test_data(n, m, k, p1, p2, 0.01)
 #' tca.mdl <- tca(X = data$X, W = data$W, C1 = data$C1, C2 = data$C2)
 #' y <- matrix(rexp(n, rate=.1), ncol=1)
+#' rownames(y) <- rownames(data$W)
+#' # marginal conditional test:
+#' res0 <- tcareg(data$X, tca.mdl, y)
 #' # joint test:
-#' res1 <- tcareg(data$X, tca.mdl, y, test = "joint", save_results = FALSE)
+#' res1 <- tcareg(data$X, tca.mdl, y, test = "joint")
 #' # custom test, testing for a joint effect of sources 1,2 while accounting for source 3
 #' res2 <- tcareg(data$X, tca.mdl, y, test = "custom", null_model = c("3"),
-#' alternative_model = c("1","2","3"), save_results = FALSE)
+#' alternative_model = c("1","2","3"))
 #' # custom test, testing for a joint effect of sources 1,2 assuming no effects under the null
 #' res3 <- tcareg(data$X, tca.mdl, y, test = "custom", null_model = NULL,
-#' alternative_model = c("1","2"), save_results = FALSE)
+#' alternative_model = c("1","2"))
 #'
-#' @references Rahmani E, Schweiger R, Rhead B, Criswell LA, Barcellos LF, Eskin E, Rosset S, Sankararaman S, Halperin E. Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology. Nature Communications 2018.
+#' @references Rahmani E, Schweiger R, Rhead B, Criswell LA, Barcellos LF, Eskin E, Rosset S, Sankararaman S, Halperin E. Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology. Nature Communications 2019.
 #'
 #' @export tcareg
 #'
-tcareg <- function(X, tca.mdl, y, C3 = NULL, test = "marginal", null_model = NULL, alternative_model = NULL, save_results = TRUE, output = "TCA", sort_results = TRUE, parallel = FALSE, num_cores = NULL, log_file = "TCA.log", features_metadata = NULL, debug = FALSE, verbose = TRUE){
+tcareg <- function(X, tca.mdl, y, C3 = NULL, test = "marginal_conditional", null_model = NULL, alternative_model = NULL, save_results = FALSE, fast_mode = TRUE, output = "TCA", sort_results = FALSE, parallel = FALSE, num_cores = NULL, log_file = "TCA.log", features_metadata = NULL, debug = FALSE, verbose = TRUE){
 
   start_logger(log_file, debug, verbose)
 
@@ -293,7 +311,7 @@ tcareg <- function(X, tca.mdl, y, C3 = NULL, test = "marginal", null_model = NUL
 
   W <- tca.mdl[['W']]
   flog.info("Validating input...")
-  tcareg.validate_input(X, W, y, C3, test, null_model, alternative_model, save_results, output, sort_results, parallel, num_cores, log_file, features_metadata, debug)
+  tcareg.validate_input(X, W, y, C3, test, null_model, alternative_model, save_results, fast_mode, output, sort_results, parallel, num_cores, log_file, features_metadata, debug)
 
   flog.info("Preparing data...")
   X <- t(X)
@@ -305,17 +323,16 @@ tcareg <- function(X, tca.mdl, y, C3 = NULL, test = "marginal", null_model = NUL
   C1 <- tca.mdl[["C1"]]
   gammas_hat <- tca.mdl[["gammas_hat"]]
   C3 <- if(!is.null(C3)) C3 else matrix(0, nrow=nrow(X), ncol=0)
-  #null_model <- match(null_model,colnames(W))
-  null_model <- if (is.null(null_model)) NULL else match(null_model,colnames(W))
-  alternative_model <- match(alternative_model,colnames(W))
+  null_model <- if (is.null(null_model)) NULL else sort(match(null_model,colnames(W)))
+  alternative_model <- sort(match(alternative_model,colnames(W)))
   k <- ncol(W)
 
   flog.info("Running TCA regression...")
-  res <- tcareg.fit(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, test, null_model, alternative_model, parallel, num_cores)
+  res <- tcareg.fit(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, test, null_model, alternative_model, fast_mode, parallel, num_cores)
 
   if (save_results){
     flog.info("Saving results...")
-    save_association_results(res, output, test, alternative_model, colnames(X), colnames(W), colnames(C3), sort_results, features_metadata)
+    save_association_results(res, output, test, fast_mode, alternative_model, colnames(X), colnames(W), colnames(C3), sort_results, features_metadata)
   }
 
   flog.info("Finished tcareg.")
@@ -328,16 +345,16 @@ tcareg <- function(X, tca.mdl, y, C3 = NULL, test = "marginal", null_model = NUL
 #'
 #' @description Performs unsupervised feature selection followed by principal component analysis (PCA) under a row-sparse model using the ReFACTor algorithm. For example, in the context of tissue-level bulk DNA methylation data coming from a mixture of cell types (i.e. the input is methylation sites by individuals), \code{refactor} allows to capture the variation in cell-type composition, which was shown to be a dominant sparse signal in methylation data.
 #'
-#' @param X An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} different sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.
-#' @param k A numeric value indicating the dimension of the signal in the data (i.e. the number of sources).
-#' @param sparsity A numeric value indicating the sparsity of the signal in the data (the number of signal rows).
-#' @param C An \code{n} by \code{p} design matrix of covariates that will be accounted for in the feature selection step. Note that \code{C} must include row names and column names and that NA values are currently not supported; ; set \code{C} to be \code{NULL} if there are no such covariates.
-#' @param C.remove A logical value indicating whether the covariates in X should be accounted for not only in the feature selection step, but also in the final calculation of the principal components (i.e. if \code{C.remove == TRUE} then the selected features will be adjusted for the covariates in \code{C} prior to calculating principal components). Note that setting \code{C.remove} to be \code{TRUE} is desired when ReFACTor is intended to be used for correction in downstream analysis, whereas setting \code{C.remove} to be \code{FALSE} is desired when ReFACTor is merely used for capturing the sparse signals in the data (i.e. regardless of correction).
+#' @param X An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.
+#' @param k A numeric value indicating the dimension of the signal in \code{X} (i.e. the number of sources).
+#' @param sparsity A numeric value indicating the sparsity of the signal in \code{X} (the number of signal rows).
+#' @param C An \code{n} by \code{p} design matrix of covariates that will be accounted for in the feature selection step. An intercept term will be included automatically. Note that \code{C} must include row names and column names and that NA values are currently not supported; set \code{C} to be \code{NULL} if there are no such covariates.
+#' @param C.remove A logical value indicating whether the covariates in X should be accounted for not only in the feature selection step, but also in the final calculation of the principal components (i.e. if \code{C.remove == TRUE} then the selected features will be adjusted for the covariates in \code{C} prior to calculating principal components). Note that setting \code{C.remove} to be \code{TRUE} is desired when ReFACTor is intended to be used for correction in downstream analysis, whereas setting \code{C.remove} to be \code{FALSE} is desired when ReFACTor is merely used for capturing the sparse signals in \code{X} (i.e. regardless of correction).
 #' @param sd_threshold A numeric value indicating a standard deviation threshold to be used for excluding low-variance features in \code{X} (i.e. features with standard deviation lower than \code{sd_threshold} will be excluded). Set \code{sd_threshold} to be \code{NULL} for turning off this filter. Note that removing features with very low variability tends to improve speed and performance.
 #' @param num_comp A numeric value indicating the number of ReFACTor components to return.
 #' @param rand_svd A logical value indicating whether to use random svd for estimating the low-rank structure of the data in the first step of the algorithm; random svd can result in a substantial speedup for large data.
-#' @param log_file A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == TRUE} then no output file will be generated regardless of the value of \code{log_file}.
-#' @param debug A logical value indicating whether to set the logger to a more detailed debug level; please set \code{debug} to \code{TRUE} before reporting issues.
+#' @param log_file A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == FALSE} then no output file will be generated regardless of the value of \code{log_file}.
+#' @param debug A logical value indicating whether to set the logger to a more detailed debug level; set \code{debug} to \code{TRUE} before reporting issues.
 #' @param verbose A logical value indicating whether to print logs.
 #'
 #' @importFrom rsvd rpca
@@ -350,7 +367,7 @@ tcareg <- function(X, tca.mdl, y, C3 = NULL, test = "marginal", null_model = NUL
 #' @return A list with the estimated components of the ReFACTor model.
 #' \item{scores}{An \code{n} by \code{num_comp} matrix of the ReFACTor components (the projection scores).}
 #' \item{coeffs}{A \code{sparsity} by \code{num_comp} matrix of the coefficients of the ReFACTor components (the projection loadings).}
-#' \item{ranked_list}{A vector with the features in the data, ranked by their scores in the feature selection step of the algorithm; the top scoring features (set according to the argument \code{sparsity}) are used for calculating the ReFACTor components. Note that features that were excluded according to \code{sd_threshold} will not appear in this \code{ranked_list}.}
+#' \item{ranked_list}{A vector with the features in \code{X}, ranked by their scores in the feature selection step of the algorithm; the top scoring features (set according to the argument \code{sparsity}) are used for calculating the ReFACTor components. Note that features that were excluded according to \code{sd_threshold} will not appear in this \code{ranked_list}.}
 #'
 #' @section Note: For very large input matrices it is advised to use random svd for speeding up the feature selection step (see argument \code{rand_svd}).
 #'
@@ -380,7 +397,7 @@ refactor <- function(X, k, sparsity = 500, C = NULL, C.remove = FALSE, sd_thresh
 #' @param p1 The number of covariates that affect the source-specific values to simulate.
 #' @param p2 The number of covariates that affect the mixture values to simulate.
 #' @param tau The variance of the i.i.d. component of variation to add on top of the simulated mixture values.
-#' @param log_file A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == TRUE} then no output file will be generated regardless of the value of \code{log_file}.
+#' @param log_file A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == FALSE} then no output file will be generated regardless of the value of \code{log_file}.
 #' @param verbose A logical value indicating whether to print logs.
 #'
 #' @importFrom futile.logger flog.info
@@ -396,8 +413,8 @@ refactor <- function(X, k, sparsity = 500, C = NULL, C.remove = FALSE, sd_thresh
 #' \item{sigmas}{An \code{m} by \code{k} matrix of the standard variation of each of the \code{m} features for each of the \code{k} sources.}
 #' \item{C1}{ An \code{n} by \code{p1} design matrix of simulated covariates that affect the hidden source-specific values.}
 #' \item{C2}{ An \code{n} by \code{p2} design matrix of simulated covariates that affect the mixture.}
-#' \item{gammas}{An \code{m} by \code{k*p1} matrix of the effects of the \code{p1} factors in \code{C1} on each of the \code{m} features in \code{X}, where the first \code{p1} columns are the source-specific effects of the \code{p1} factors on the first source, the following \code{p1} columns are the source-specific effects on the second source and so on.}
-#' \item{deltas}{An \code{m} by \code{p2} matrix of the effects of the \code{p2} factors in \code{C2} on the mixture values of each of the \code{m} features in \code{X}.}
+#' \item{gammas}{An \code{m} by \code{k*p1} matrix of the effects of the \code{p1} covariates in \code{C1} on each of the \code{m} features in \code{X}, where the first \code{p1} columns are the source-specific effects of the \code{p1} covariates on the first source, the following \code{p1} columns are the source-specific effects on the second source and so on.}
+#' \item{deltas}{An \code{m} by \code{p2} matrix of the effects of the \code{p2} covariates in \code{C2} on the mixture values of each of the \code{m} features in \code{X}.}
 #'
 #' @examples
 #' data <- test_data(100, 50, 3, 2, 2, 0.01)
diff --git a/R/model_fit.R b/R/model_fit.R
index cbcbc21..d11b5de 100644
--- a/R/model_fit.R
+++ b/R/model_fit.R
@@ -1,6 +1,9 @@
 
 # Fits the TCA model
-tca.fit <- function(X, W, C1, C1.map, C2, refit_W, tau, parallel, num_cores, max_iters){
+#' @importFrom parallel clusterExport
+#' @importFrom pbapply pblapply
+#' @importFrom stats anova
+tca.fit <- function(X, W, C1, C1.map, C2, refit_W, tau, vars.mle, constrain_mu, parallel, num_cores, max_iters){
 
   flog.debug("Starting function 'tca.fit'...")
 
@@ -19,6 +22,9 @@ tca.fit <- function(X, W, C1, C1.map, C2, refit_W, tau, parallel, num_cores, max
   deltas_hat <- init[["deltas_hat"]]
   gammas_hat <- init[["gammas_hat"]]
   tau_hat <- init[["tau_hat"]]
+  deltas_hat_pvals <- if(constrain_mu) NULL else init[["deltas_hat_pvals"]]
+  gammas_hat_pvals <- if(constrain_mu) NULL else init[["gammas_hat_pvals"]]
+  gammas_hat_pvals.joint <- if(constrain_mu) NULL else init[["gammas_hat_pvals.joint"]]
 
   const <- -n*log(2*pi)
   ll_prev <- -Inf
@@ -26,7 +32,7 @@ tca.fit <- function(X, W, C1, C1.map, C2, refit_W, tau, parallel, num_cores, max
     if (refit_W) flog.info("Iteration %s out of %s external iterations (fitting all parameters including W)...", iter, max_iters)
 
     flog.info("Fitting means and variances...")
-    res <- tca.fit_means_vars(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_hat, C1, gammas_hat, C1.map, tau, max_iters, parallel, num_cores)
+    res <- tca.fit_means_vars(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_hat, C1, gammas_hat, C1.map, tau, vars.mle, constrain_mu, max_iters, parallel, num_cores)
     mus_hat <- res[["mus_hat"]]
     sigmas_hat <- res[["sigmas_hat"]]
     deltas_hat <- res[["deltas_hat"]]
@@ -55,13 +61,64 @@ tca.fit <- function(X, W, C1, C1.map, C2, refit_W, tau, parallel, num_cores, max
 
   }
 
-  return (list("W" = W, "mus_hat" = mus_hat, "sigmas_hat" = sigmas_hat, "tau_hat" = tau_hat, "deltas_hat" = deltas_hat, "gammas_hat" = gammas_hat))
-
+  if(!constrain_mu){
+    flog.info("Calculate p-values for deltas and gammas.")
+    C1_ <- calc_C1_W_interactions(W,C1)
+    W_norms <- (tcrossprod(W**2,sigmas_hat**2)+tau_hat**2 )**0.5
+    X_tilde <- X/W_norms
+    cl <- NULL
+    if (parallel){
+      cl <- init_cluster(num_cores)
+      clusterExport(cl, varlist = c("C1_","W_norms","X_tilde","W","k","p2","C2","p1","p1","lm"), envir=environment())
+    }
+    res <- pblapply(1:m,function(j) {
+      df = data.frame(y = X_tilde[,j], cbind(W/t(repmat(W_norms[,j],k,1)),if (p2>0) C2/t(repmat(W_norms[,j],p2,1)) else C2,if (p1>0) C1_/t(repmat(W_norms[,j],k*p1,1)) else C1_));
+      mdl1.fit <- lm(y~., data = df)
+      mdl1.coef <- summary(mdl1.fit)$coefficients
+      mdl1.cov.names <- colnames(df)[colnames(df) != 'y']
+      deltas_gammas_hat_pvals <- sapply(mdl1.cov.names, function(x){
+        if (x %in% rownames(mdl1.coef)){
+          return(mdl1.coef[x,'Pr(>|t|)'])
+        }
+        else {
+          return(NA)
+        }
+      })
+      #deltas_gammas_hat_pvals <- summary(mdl1.fit)$coefficients[2:(1+k+p1*k+p2),4];
+      gammas_hat_pvals.joint <- numeric(p1)+1
+      if (p1){
+        C1_alt <- C1_/t(repmat(W_norms[,j],k*p1,1))
+        for (d in 1:p1){
+          C1_null <- C1_alt[,setdiff(1:(p1*k),seq(d,k*p1,p1))]
+          df = data.frame(y = X_tilde[,j], cbind(W/t(repmat(W_norms[,j],k,1)),if (p2>0) C2/t(repmat(W_norms[,j],p2,1)) else C2,C1_null));
+          mdl0.fit <- lm(y~., data = df)
+          anova.fit <- anova(mdl0.fit,mdl1.fit);
+          gammas_hat_pvals.joint[d] <- anova.fit$`Pr(>F)`[2];
+        }
+      }
+      return(c(deltas_gammas_hat_pvals,gammas_hat_pvals.joint));
+    }, cl = cl )
+    if (parallel) stop_cluster(cl)
+    for (j in 1:m){
+      deltas_hat_pvals[j,] <- res[[j]][(k+1):(k+p2)]
+      gammas_hat_pvals[j,] <- res[[j]][(k+p2+1):(k+p2+p1*k)]
+      gammas_hat_pvals.joint[j,] <- res[[j]][(k+p2+p1*k+1):(k+p2+p1*k+p1)]
+    }
+    # res <- pblapply(1:m,function(j) {
+    #   df = data.frame(y = X_tilde[,j], cbind(W/t(repmat(W_norms[,j],k,1)),if (p2>0) C2/t(repmat(W_norms[,j],p2,1)) else C2,if (p1>0) C1_/t(repmat(W_norms[,j],k*p1,1)) else C1_));
+    #   return(summary(lm(y~., data = df))$coefficients[2:(1+k+p1*k+p2),4]);
+    #   }, cl = cl )
+    # if (parallel) stop_cluster(cl)
+    # for (j in 1:m){
+    #   deltas_hat_pvals[j,] <- res[[j]][(k+1):(k+p2)]
+    #   gammas_hat_pvals[j,] <- res[[j]][(k+p2+1):(k+p2+p1*k)]
+    # }
+  }
+  return (list("W" = W, "mus_hat" = mus_hat, "sigmas_hat" = sigmas_hat, "tau_hat" = tau_hat, "deltas_hat" = deltas_hat, "gammas_hat" = gammas_hat, "deltas_hat_pvals" = deltas_hat_pvals, "gammas_hat_pvals"= gammas_hat_pvals, "gammas_hat_pvals.joint" = gammas_hat_pvals.joint))
 }
 
 
 init_means_vars <- function(C1_names, C2_names, feature_ids, source_ids, tau){
-  #config <- config::get()
   config <- config::get(file = system.file("extdata", "config.yml", package = "TCA"), use_parent = FALSE)
   k <- length(source_ids)
   m <- length(feature_ids)
@@ -78,6 +135,9 @@ init_means_vars <- function(C1_names, C2_names, feature_ids, source_ids, tau){
   sigmas_hat <- matrix(0, nrow=m, ncol=k)
   deltas_hat <- matrix(0, nrow=m, ncol=p2)
   gammas_hat <- matrix(0, nrow=m, ncol=p1*k)
+  deltas_hat_pvals <- matrix(1, nrow=m, ncol=p2)
+  gammas_hat_pvals <- matrix(1, nrow=m, ncol=p1*k)
+  gammas_hat_pvals.joint <- matrix(1, nrow=m, ncol=p1)
   if (is.null(tau)){
     tau_hat <- config[["tau_hat_init"]]
   }else{
@@ -92,22 +152,25 @@ init_means_vars <- function(C1_names, C2_names, feature_ids, source_ids, tau){
   colnames(deltas_hat) <- C2_names
   rownames(gammas_hat) <- feature_ids
   colnames(gammas_hat) <- C1_names_
-
-  return( list("mus_hat" = mus_hat, "sigmas_hat" = sigmas_hat, "gammas_hat" = gammas_hat, "deltas_hat"= deltas_hat, "tau_hat" = tau_hat) )
+  rownames(deltas_hat_pvals) <- feature_ids
+  colnames(deltas_hat_pvals) <- C2_names
+  rownames(gammas_hat_pvals) <- feature_ids
+  colnames(gammas_hat_pvals) <- C1_names_
+  rownames(gammas_hat_pvals.joint) <- feature_ids
+  colnames(gammas_hat_pvals.joint) <- C1_names
+
+  return( list("mus_hat" = mus_hat, "sigmas_hat" = sigmas_hat, "gammas_hat" = gammas_hat, "deltas_hat"= deltas_hat, "deltas_hat_pvals" = deltas_hat_pvals, "gammas_hat_pvals" = gammas_hat_pvals, "gammas_hat_pvals.joint" = gammas_hat_pvals.joint, "tau_hat" = tau_hat) )
 }
 
 
-#' @importFrom parallel clusterExport
-#' @importFrom pbapply pblapply
 #' @importFrom pracma repmat
 #' @importFrom pracma lsqlincon
 #' @importFrom nloptr nloptr
 #' @importFrom matrixStats colVars
-tca.fit_means_vars <- function(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_hat, C1, gammas_hat, C1.map, tau, max_iters, parallel, num_cores){
+tca.fit_means_vars <- function(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_hat, C1, gammas_hat, C1.map, tau, vars.mle, constrain_mu, max_iters, parallel, num_cores){
 
   flog.debug("Starting function 'tca.fit_means_vars'...")
 
-  #config <- config::get()
   config <- config::get(file = system.file("extdata", "config.yml", package = "TCA"), use_parent = FALSE)
   nloptr_opts = list("algorithm"=config[["nloptr_opts_algorithm"]], "xtol_rel"=config[["nloptr_opts_xtol_rel"]], "print_level" = config[["nloptr_opts_print_level"]], "check_derivatives" = config[["nloptr_opts_check_derivatives"]])
 
@@ -123,6 +186,7 @@ tca.fit_means_vars <- function(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_ha
 
   const <- -n*log(2*pi)
   W_squared <- W**2
+  W_squared_ <- cbind(W_squared,numeric(n) + 1)
 
   ll_prev <- -Inf
   # Perform an alternative optimization of the means (mus, deltas, gammas) and variances (sigmas and tau)
@@ -133,11 +197,13 @@ tca.fit_means_vars <- function(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_ha
     # (1) Estimate the means (mus, deltas, gammas)
 
     ub <- numeric(k+p2+p1*k)+config[["lsqlincon_inf"]]
-    ub[1:k] <- if(is.null(config[["mu_max"]])) max(X) else config[["mu_max"]]
-    ub[1:k] <- ub[1:k] - config[["mu_epsilon"]]
     lb <- numeric(k+p2+p1*k)-config[["lsqlincon_inf"]]
-    lb[1:k] <- if(is.null(config[["mu_min"]])) min(X) else config[["mu_min"]]
-    lb[1:k] <- lb[1:k] + config[["mu_epsilon"]]
+    if (constrain_mu){
+      ub[1:k] <- max(X)
+      ub[1:k] <- ub[1:k] - config[["mu_epsilon"]]
+      lb[1:k] <- min(X)
+      lb[1:k] <- lb[1:k] + config[["mu_epsilon"]]
+    }
 
     if (p1){
       # All zeros will get ub and lb set to 0 (i.e. effect sizes will not be estimated); use a small value (config[["nloptr_opts_xtol_rel"]]) for stability; otherwise nloptr may return an error in some cases.
@@ -185,94 +251,78 @@ tca.fit_means_vars <- function(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_ha
     # Calculate some quantities that will be repeatedly used throughout the optimization in this step
     U <- (tcrossprod(W,mus_hat) + tcrossprod(C2,deltas_hat) + tcrossprod(C1_,gammas_hat) - X)**2
 
-    if (sum(colSums(sigmas_hat)) == 0){
-      # Set a starting point for the optimization
-      flog.debug("Get initial estimates of sigmas")
-      row_names <- rownames(sigmas_hat)
-      col_names <- colnames(sigmas_hat)
-      sigmas_hat <- t(repmat((colVars(X)/k)**0.5,k,1))
-      rownames(sigmas_hat) <- row_names
-      colnames(sigmas_hat) <- col_names
-    }
-
-    # # (2.1) Get initial estimate of sigmas, tau under the assumption that tau is feature-specific
-    #
-    # flog.debug("Estimate sigmas, tau under the assumption that tau is feature-specific.")
-    # lb <- numeric(k+1)+config$min_var
-    # ub <- numeric(k+1)+Inf
-    # if (parallel){
-    #   clusterExport(cl, c("W_tilde","C2_tilde","C1_tilde","X_tilde","lb","ub","n","k","p1","p2","U","const","W_squared","sigmas_hat","tau_hat","nloptr_opts"))
-    #   res <- parLapply(cl, 1:m,function(j) nloptr( x0=c(sigmas_hat[j,],tau_hat),
-    #                                          eval_f=function(x,U_j,W_squared,const)
-    #                                          {k<-length(x)-1;
-    #                                          V <- tcrossprod(W_squared,t(x[1:k]**2))+x[k+1]**2;
-    #                                          V_squared <- V**2;
-    #                                          g<-numeric(k+1)
-    #                                          g[1:k] <- -(colSums(W_squared*repmat(x[1:k],n,1)*t(repmat(U_j,k,1))/repmat(V_squared,1,k)) - colSums(W_squared*repmat(x[1:k],n,1)/repmat(V,1,k)))
-    #                                          g[k+1] <- -(x[k+1]*(sum(U_j/V_squared)-sum(1./V)))
-    #                                          return(list("objective"= -0.5*(const-sum(log(V))-sum(U_j/V)),
-    #                                                      "gradient" = g))},
-    #                                          lb = lb,
-    #                                          ub = ub,
-    #                                          opts = nloptr_opts,
-    #                                          U_j = U[,j],
-    #                                          W_squared = W_squared,
-    #                                          const = const)$solution[1:k] )
-    # }else{
-    #   res <- lapply(1:m,function(j) nloptr( x0=c(sigmas_hat[j,],tau_hat),
-    #                                   eval_f=function(x,U_j,W_squared,const)
-    #                                   {k<-length(x)-1;
-    #                                   V <- tcrossprod(W_squared,t(x[1:k]**2))+x[k+1]**2;
-    #                                   V_squared <- V**2;
-    #                                   g<-numeric(k+1)
-    #                                   g[1:k] <- -(colSums(W_squared*repmat(x[1:k],n,1)*t(repmat(U_j,k,1))/repmat(V_squared,1,k)) - colSums(W_squared*repmat(x[1:k],n,1)/repmat(V,1,k)))
-    #                                   g[k+1] <- -(x[k+1]*(sum(U_j/V_squared)-sum(1./V)))
-    #                                   return(list("objective"= -0.5*(const-sum(log(V))-sum(U_j/V)),
-    #                                               "gradient" = g))},
-    #                                   lb = lb,
-    #                                   ub = ub,
-    #                                   opts = nloptr_opts,
-    #                                   U_j = U[,j],
-    #                                   W_squared = W_squared,
-    #                                   const = const)$solution[1:k] )
-    # }
-    # for (j in 1:m){
-    #   sigmas_hat[j,] = res[[j]]
-    # }
+    if (vars.mle){
+
+      if (sum(colSums(sigmas_hat)) == 0){
+        # Set a starting point for the optimization
+        flog.debug("Get initial estimates of sigmas")
+        row_names <- rownames(sigmas_hat)
+        col_names <- colnames(sigmas_hat)
+        sigmas_hat <- t(repmat((colVars(X)/k)**0.5,k,1))
+        rownames(sigmas_hat) <- row_names
+        colnames(sigmas_hat) <- col_names
+      }
+
+      # (2.2) Estimate sigmas
+      flog.debug("Estimate sigmas.")
+      lb <- numeric(k)+config[["min_sd"]]
+      ub <- numeric(k)+Inf
+      if (parallel) clusterExport(cl, c("lb","ub","n","k","U","const","W_squared","sigmas_hat","tau_hat","nloptr_opts","minus_log_likelihood_sigmas"), envir=environment())
+      res <- pblapply(1:m,function(j) nloptr( x0=sigmas_hat[j,],
+                                            eval_f = function(x,U_j,W_squared,const,tau_hat) minus_log_likelihood_sigmas(x,U_j,W_squared,const,tau_hat),
+                                            lb = lb,
+                                            ub = ub,
+                                            opts = nloptr_opts,
+                                            U_j = U[,j],
+                                            W_squared = W_squared,
+                                            const = const,
+                                            tau_hat = tau_hat)$solution, cl = cl )
+
+      for (j in 1:m){
+        sigmas_hat[j,] = res[[j]]
+      }
+
+      # (2.3) Estimate tau
+      if (is.null(tau)){
+        flog.debug("Estimate tau.")
+        lb <- config[["min_sd"]]
+        ub <- Inf
+        tau_hat = nloptr(x0=tau_hat,
+                         eval_f = function(x,U,W_squared,sigmas_hat,const) minus_log_likelihood_tau(U,W_squared,sigmas_hat,const,x),
+                         lb = lb,
+                         ub = ub,
+                         opts = nloptr_opts,
+                         U = U,
+                         W_squared = W_squared,
+                         sigmas_hat = sigmas_hat,
+                         const = const)$solution
+      }
 
-    # (2.2) Estimate sigmas
-    flog.debug("Estimate sigmas.")
-    lb <- numeric(k)+config[["min_sd"]]
-    ub <- numeric(k)+Inf
-    if (parallel) clusterExport(cl, c("lb","ub","n","k","U","const","W_squared","sigmas_hat","tau_hat","nloptr_opts","minus_log_likelihood_sigmas"), envir=environment())
-    res <- pblapply(1:m,function(j) nloptr( x0=sigmas_hat[j,],
-                                          eval_f = function(x,U_j,W_squared,const,tau_hat) minus_log_likelihood_sigmas(x,U_j,W_squared,const,tau_hat),
-                                          lb = lb,
-                                          ub = ub,
-                                          opts = nloptr_opts,
-                                          U_j = U[,j],
-                                          W_squared = W_squared,
-                                          const = const,
-                                          tau_hat = tau_hat)$solution, cl = cl )
-
-    for (j in 1:m){
-      sigmas_hat[j,] = res[[j]]
-    }
-
-    # (2.3) Estimate tau
-    if (is.null(tau)){
-      flog.debug("Estimate tau.")
-      lb <- config[["min_sd"]]
-      ub <- Inf
-      tau_hat = nloptr(x0=tau_hat,
-                       eval_f = function(x,U,W_squared,sigmas_hat,const) minus_log_likelihood_tau(U,W_squared,sigmas_hat,const,x),
-                       lb = lb,
-                       ub = ub,
-                       opts = nloptr_opts,
-                       U = U,
-                       W_squared = W_squared,
-                       sigmas_hat = sigmas_hat,
-                       const = const)$solution
+    }else{
+      # Learn the variances using gmm
+      flog.debug("Estimate sigmas, tau")
+      lb <- numeric(k+1)+config[["min_sd"]]
+      # calculate weights matrix V
+      if (sum(colSums(sigmas_hat)) == 0){
+        V <- matrix(1,n,m)
+      }else{
+        V <- abs(U - tcrossprod(W_squared_, cbind(sigmas_hat**2, matrix(tau_hat**2,m,1))))
+        V[V < config[["V_weight_limit"]]] <- config[["V_weight_limit"]]
+      }
+      if (parallel) clusterExport(cl, c("lb","U","W_squared_","lsqlincon","V","n"), envir=environment())
+      res <- pblapply(1:m,function(j) {
+        x <- W_squared_/t(repmat(V[,j],ncol(W_squared_),1));
+        # For numeric stability, normalize the design matrix and adjust the final estimats accordingly
+        norms <- (colSums(x**2))**0.5;
+        x <- x/repmat(norms,n,1);
+        lsqlincon(x, U[,j]/V[,j], lb = lb*norms)/norms
+        }, cl = cl)
+      tau_squared_hat <- 0
+      for (j in 1:m){
+        sigmas_hat[j,] <- sqrt(res[[j]][1:k])
+        tau_squared_hat <- tau_squared_hat + res[[j]][k+1]
+      }
+      tau_hat <- sqrt(tau_squared_hat/m)
     }
 
     flog.debug("Test for convergence.")
@@ -300,7 +350,6 @@ tca.fit_W <- function(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_hat, C1, ga
 
   flog.debug("Starting function 'tca.fit_W'...")
 
-  #config <- config::get()
   config <- config::get(file = system.file("extdata", "config.yml", package = "TCA"), use_parent = FALSE)
   nloptr_opts = list("algorithm"=config[["nloptr_opts_fit_W_algorithm"]], "xtol_rel"=config[["nloptr_opts_xtol_rel"]], "print_level" = config[["nloptr_opts_print_level"]], "check_derivatives" = config[["nloptr_opts_check_derivatives"]])
 
@@ -417,7 +466,7 @@ minus_log_likelihood_w <- function(w_i, x_i, c1_i, mus, tau, gammas, const, C_ti
 
 
 # X is n by m
-estimate_Z <- function(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_hat, C1, gammas_hat, parallel, num_cores){
+estimate_Z <- function(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_hat, C1, gammas_hat, scale, parallel, num_cores){
   flog.info("Estimate tensor...")
   # init
   n <- dim(X)[1]
@@ -439,7 +488,7 @@ estimate_Z <- function(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_hat, C1, g
   cl <- if (parallel) init_cluster(num_cores) else NULL
   if (parallel) clusterExport(cl, c("W","mus_hat","sigmas_hat","tau_hat","C1","gammas_hat","W_prime","C2_prime","estimate_Z_j"), envir=environment())
   # Estimate Z
-  res <- pblapply(1:m, function(j) estimate_Z_j(W, mus_hat[j,], sigmas_hat[j,], tau_hat, C1, gammas_hat[j,], W_prime, C2_prime[,j]), cl = cl )
+  res <- pblapply(1:m, function(j) estimate_Z_j(W, mus_hat[j,], sigmas_hat[j,], tau_hat, C1, gammas_hat[j,], W_prime, C2_prime[,j], scale), cl = cl )
   if (parallel) stop_cluster(cl)
   for (j in 1:m){
     for (h in 1:k){
@@ -459,53 +508,234 @@ estimate_Z <- function(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_hat, C1, g
 
 # Estimate Z for one feature j
 #' @importFrom matrixcalc matrix.inverse
-estimate_Z_j <- function(W, mus_hat_j, sigmas_hat_j, tau_hat, C1, gammas_hat_j, W_prime, C2_prime_j){
+#' @importFrom pracma sqrtm
+estimate_Z_j <- function(W, mus_hat_j, sigmas_hat_j, tau_hat, C1, gammas_hat_j, W_prime, C2_prime_j, scale){
   n <- nrow(W)
   k <- ncol(W)
   p1 <- ncol(C1)
   Z_j_hat <- matrix(0,n,k)
-  Sig_j <- matrix.inverse(diag(sigmas_hat_j**2))
+  Sig_j_orig <- diag(sigmas_hat_j**2)
+  Sig_j <- matrix.inverse(Sig_j_orig)
   C1_prime <- tcrossprod(C1, t(Reshape(gammas_hat_j,p1,k)))
   for (i in 1:n){
-    Z_j_hat[i,] = crossprod(matrix.inverse(W_prime[[i]]+Sig_j), ( tcrossprod(Sig_j,t(mus_hat_j+C1_prime[i,])) + W[i,]*C2_prime_j[i] ) )
+    #S_ij_inv <- W_prime[[i]]+Sig_j
+    #S_ij <- matrix.inverse(S_ij_inv)
+    ## the above two lines are the straightforward (slower) way to calculate S_ij; calculate 'matrix.inverse(W_prime[[i]]+Sig_j)' using the lemma from Miller 1981:
+    BA_inv <- W_prime[[i]] %*% Sig_j_orig
+    g <- sum(diag(BA_inv))
+    S_ij <- Sig_j_orig - ((1/(1+g))*(Sig_j_orig %*% BA_inv))
+    Z_j_hat[i,] = crossprod(S_ij, ( tcrossprod(Sig_j,t(mus_hat_j+C1_prime[i,])) + W[i,]*C2_prime_j[i] ) )
+    if (scale) Z_j_hat[i,] <- Z_j_hat[i,] / diag(S_ij)**0.5
   }
   return(Z_j_hat)
 }
 
 
-tcareg.fit <- function(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, test, null_model, alternative_model, parallel, num_cores){
+tcareg.fit <- function(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, test, null_model, alternative_model, fast_mode, parallel, num_cores){
 
   flog.debug("Starting function 'tcareg.fit'...")
 
-  single_effect <- if (test == "single_effect") TRUE else FALSE
-
-  if (test == "joint" | test == "single_effect") results <- tcareg.fit_joint(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, parallel, num_cores, single_effect)
-  if (test == "marginal") results <- tcareg.fit_marginal(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, parallel, num_cores)
-  if (test == "marginal_conditional") results <- tcareg.fit_marginal_conditional(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, parallel, num_cores)
-  if (test == "custom") results <- tcareg.fit_custom(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, parallel, num_cores, null_model, alternative_model)
+  k <- ncol(W)
 
+  if (fast_mode){
+    if (test == "joint" | test == "single_effect" | test == "marginal_conditional") alternative_model <- 1:k
+    if (test == "joint" | test == "single_effect") null_model <- c()
+    if (test == "marginal"){
+      results <- vector(mode="list", k)
+      for (h in 1:k) results[[h]] <- tcareg.fit.fast(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, test, null_model, alternative_model = h, parallel, num_cores)
+    }else{
+      results <- tcareg.fit.fast(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, test, null_model, alternative_model, parallel, num_cores)
+    }
+  }else{
+    single_effect <- if(test == "single_effect") TRUE else FALSE
+    if (test == "joint" | test == "single_effect") results <- tcareg.fit_joint(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, parallel, num_cores, single_effect)
+    if (test == "marginal") results <- tcareg.fit_marginal(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, parallel, num_cores)
+    if (test == "marginal_conditional") results <- tcareg.fit_marginal_conditional(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, parallel, num_cores)
+    if (test == "custom") results <- tcareg.fit_custom(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, parallel, num_cores, null_model, alternative_model)
+  }
   return(results)
+}
+
+
+#' @importFrom stats anova
+tcareg.fit.fast <- function(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, test, null_model, alternative_model, parallel, num_cores){
+
+  flog.debug("Starting function 'tcareg.fit.fast'...")
+
+  config <- config::get(file = system.file("extdata", "config.yml", package = "TCA"), use_parent = FALSE)
+
+  m <- ncol(X)
+  n <- nrow(X)
+  k <- ncol(W)
+
+  p3 <- ncol(C3)
+  num_betas <- if (test == "custom" | test == "joint" | test == "marginal_conditional") length(alternative_model) else 1
+  num_pvals <- if (test == "marginal_conditional") length(alternative_model) else 1
+  Z_hat <- estimate_Z(X, W, mus_hat, sigmas_hat, tau_hat, C2, deltas_hat, C1, gammas_hat, scale = FALSE, parallel, num_cores)
+  lm0.joint.fit <- if (p3) lm(y~.,data = data.frame(C3)) else lm(y ~ 1)
+  cl <- if (parallel) init_cluster(num_cores) else NULL
+  if (parallel) clusterExport(cl, c("Z_hat","n","k","y","test","null_model","alternative_model","C3","p3","num_betas","lm0.joint.fit"), envir=environment())
+  res <- pblapply(1:m, function(j){
+    r <- list();
+    D <- matrix(0,n,k);
+    for (h in 1:k) D[,h] <- Z_hat[[h]][j,];
+    if (test == "single_effect") D <- as.matrix(rowSums(D));
+    if (test == "custom"){
+      D <- D[,alternative_model,drop=F];
+    }
+    if (test == "marginal"){
+      D <- D[,alternative_model,drop=F];
+    }
+    colnames(D) <- paste("X",1:ncol(D),sep="")
+    D <- cbind(C3,D)
+    lm.fit <- lm(y~.,data = data.frame(D));
+    coefficients <- summary(lm.fit)$coefficients;
+    r[["intercept"]] <- coefficients[1,1];
+    if (p3) r[["alpha"]] <- coefficients[2:(1+p3),1];
+    r[["phi"]] <- summary(lm.fit)$sigma;
+    r[["alternative_ll"]] <- as.numeric(logLik(lm.fit));
+    if (test == "custom"){
+      lm0.fit <- if (is.null(null_model)) lm0.joint.fit else lm(y~.,data = data.frame(cbind(C3,D[,null_model])));
+      anova.fit <- anova(lm0.fit,lm.fit);
+      stats <- anova.fit$F[2];
+      pvals <- anova.fit$`Pr(>F)`[2];
+    }
+    if (test == "joint"){
+      anova.fit <- anova(lm0.joint.fit, lm.fit);
+      stats <- anova.fit$F[2];
+      pvals <- anova.fit$`Pr(>F)`[2];
+      #stats <- summary(lm.fit)$fstatistic[1];
+      #pvals <- pf(summary(lm.fit)$fstatistic[1],summary(lm.fit)$fstatistic[2],summary(lm.fit)$fstatistic[3],lower.tail=FALSE);
+    }
+    l <- nrow(coefficients);
+    beta <- matrix(NA,1,num_betas);
+    v <- if (max(0,nrow(coefficients)-1-p3)) match(rownames(coefficients)[(p3+2):nrow(coefficients)], colnames(D)[(p3+1):ncol(D)]) else NULL
+
+    if(!is.null(v)) beta[v[!is.na(v)]] <- coefficients[(p3+2):l,1];
+    if (test == "single_effect" | test == "marginal_conditional" | test == "marginal"){
+      stats <- matrix(NA,1,num_betas);
+      pvals <- matrix(1,1,num_betas);
+      if(!is.null(v)){
+        stats[v[!is.na(v)]] <- coefficients[(p3+2):l,3];
+        pvals[v[!is.na(v)]] <- coefficients[(p3+2):l,4];
+      }
+    }
+    r[["beta"]] <- beta;
+    r[["stats"]] <- stats;
+    r[["pvals"]] <- pvals;
+    return(r) }, cl = cl)
+  if (parallel) stop_cluster(cl)
+
+  lst <- init_fast_tcareg_output(colnames(X), rownames(X), colnames(W), colnames(C3), num_pvals, num_betas, alternative_model, test)
+  phi <- lst[["phi"]]
+  beta <- lst[["beta"]]
+  intercept <- lst[["intercept"]]
+  alpha <- lst[["alpha"]]
+  alternative_ll <- lst[["alternative_ll"]]
+  stats <- lst[["stats"]]
+  pvals <- lst[["pvals"]]
+  qvals <- lst[["qvals"]]
 
+  for (j in 1:m){
+    phi[j,1] <- res[[j]][["phi"]]
+    beta[j,] <- res[[j]][["beta"]]
+    intercept[j,1] <- res[[j]][["intercept"]]
+    alpha[j,] <- res[[j]][["alpha"]]
+    alternative_ll[j,1] <- res[[j]][["alternative_ll"]]
+    stats[j,] <- res[[j]][["stats"]]
+    pvals[j,] <- res[[j]][["pvals"]]
+  }
+  for (i in 1:num_pvals) qvals[,i] <- cbind(p.adjust(pvals[,i], method = config[["fdr_method"]]))
+
+  deg_freedom <- if (test == "marginal" | test == "marginal_conditional" | test == "single_effect") 1 else (length(alternative_model)-length(null_model))
+
+  return( list("phi" = phi, "beta" = beta, "intercept" = intercept, "alpha" = alpha, "null_ll" = NA, "alternative_ll" = alternative_ll, "pvals" = pvals, "qvals" = qvals, "stats" = stats, "df" = deg_freedom) )
+
+
+}
+
+init_fast_tcareg_output <- function(feature_ids, sample_ids, source_ids, C3_names, num_pvals, num_betas, alternative_model, test){
+
+  m <- length(feature_ids)
+  p3 <- length(C3_names)
+
+  phi <- matrix(0, m, 1)
+  beta <- matrix(0, m, num_betas)
+  intercept <- matrix(0, m, 1)
+  alpha <- matrix(0, m, p3)
+  alternative_ll <- matrix(0, m, 1)
+  stats <- matrix(0, m, num_pvals)
+  pvals <- matrix(1, m, num_pvals)
+  qvals <- matrix(1, m, num_pvals)
+
+  rownames(phi) <- feature_ids
+  rownames(beta) <- feature_ids
+  rownames(intercept) <- feature_ids
+  rownames(alpha) <- feature_ids
+  rownames(alternative_ll) <- feature_ids
+  rownames(stats) <- feature_ids
+  rownames(qvals) <- feature_ids
+  rownames(pvals) <- feature_ids
+
+  if(p3) colnames(alpha) <- C3_names
+  colnames(beta) <- if (test != "single_effect") source_ids[alternative_model] else "beta"
+  colnames(phi) <- "phi"
+  colnames(intercept) <- "intercept"
+  colnames(alternative_ll) <- "alternative_ll"
+
+  if (test == "marginal_conditional"){
+    colnames(stats) <- source_ids
+    colnames(pvals) <- source_ids
+    colnames(qvals) <- source_ids
+  }else{
+    colnames(stats) <- "stats"
+    colnames(pvals) <- "pvals"
+    colnames(qvals) <- "qvals"
+  }
+
+  return( list("phi" = phi, "beta" = beta, "intercept" = intercept, "alpha"= alpha, "alternative_ll" = alternative_ll, "stats" = stats, "pvals" = pvals, "qvals" = qvals) )
 }
 
 
 tcareg.fit_joint <- function(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, parallel, num_cores, single_effect){
+
+  flog.debug("Starting function 'tcareg.fit_joint'...")
+
   config <- config::get(file = system.file("extdata", "config.yml", package = "TCA"), use_parent = FALSE)
   k <- ncol(W)
+
+  lst <- init_tcareg.fit_model(colnames(X), colnames(W), colnames(C3), 1:k, single_effect)
+  phi <- lst[["phi"]]
+  beta <- lst[["beta"]]
+  intercept <- lst[["intercept"]]
+  alpha <- lst[["alpha"]]
+  null_ll <- lst[["null_ll"]]
+  alternative_ll <- lst[["alternative_ll"]]
+  pvals <- lst[["pvals"]]
+  qvals <- lst[["qvals"]]
+  stats <- lst[["stats"]]
+
   mdl1 <- tcareg.optimize(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, 1:k, parallel, num_cores, single_effect)
-  ll1 <- mdl1[["ll"]]
-  #d <- data.frame(y,C3)
-  #mdl0 <- lm(y~.,data = d) # the null log-likelihood under the model y ~ C3 (i.e. the null under the case of no source-specific effects)
+  phi[,1] <- mdl1[["phi"]]
+  beta[,] <- mdl1[["beta"]]
+  intercept[,1] <- mdl1[["intercept"]]
+  alpha[,] <- mdl1[["alpha"]]
+  alternative_ll[,] <- mdl1[["ll"]]
+
   if (ncol(C3) == 0){
     mdl0 <- lm(y ~ 1)
   }else{
     mdl0 <- lm(y~.,data = data.frame(C3))
   }
-  ll0 <- numeric(length(mdl1[["ll"]])) + as.numeric(logLik(mdl0))
+  null_ll[,1] <- numeric(length(mdl1[["ll"]])) + as.numeric(logLik(mdl0))
+
   df <- if(single_effect) 1 else ncol(W)
-  lrt <- lrt.test(ll0, mdl1[["ll"]], df = df)
-  qvals <- p.adjust(lrt[["pvals"]], method = config[["fdr_method"]])
-  return( list("phi" = mdl1[["phi"]], "beta" = mdl1[["beta"]], "intercept" = mdl1[["intercept"]], "alpha" = mdl1[["alpha"]], "null_ll" = ll0, "alternative_ll" = ll1, "pvals" = lrt[["pvals"]], "qvals" = qvals, "stats" = lrt[["stats"]], "df" = df) )
+  lrt <- lrt.test(null_ll[,1], alternative_ll[,1], df = df)
+  pvals[,1] <- lrt[["pvals"]]
+  qvals[,1] <- p.adjust(lrt[["pvals"]], method = config[["fdr_method"]])
+  stats[,1] <- lrt[["stats"]]
+
+  return( list("phi" = phi, "beta" = beta, "intercept" = intercept, "alpha" = alpha, "null_ll" = null_ll, "alternative_ll" = alternative_ll, "pvals" = pvals, "qvals" = qvals, "stats" = stats, "df" = df) )
 }
 
 
@@ -514,21 +744,40 @@ tcareg.fit_marginal <- function(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1
   m <- ncol(X)
   k <- ncol(W)
   results <- list()
-  #d <- data.frame(y,C3)
-  #mdl0 <- lm(y~.,data = d) # the null log-likelihood under the model y ~ C3 (i.e. the null under the case of no source-specific effects)
+
   if (ncol(C3) == 0){
     mdl0 <- lm(y ~ 1)
   }else{
     mdl0 <- lm(y~.,data = data.frame(C3))
   }
   for (h in 1:k){
+    lst <- init_tcareg.fit_model(colnames(X), colnames(W), colnames(C3), h, FALSE)
+    phi <- lst[["phi"]]
+    beta <- lst[["beta"]]
+    intercept <- lst[["intercept"]]
+    alpha <- lst[["alpha"]]
+    null_ll <- lst[["null_ll"]]
+    alternative_ll <- lst[["alternative_ll"]]
+    pvals <- lst[["pvals"]]
+    qvals <- lst[["qvals"]]
+    stats <- lst[["stats"]]
+
     mdl1 <- tcareg.optimize(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, h, parallel, num_cores, FALSE)
-    ll1 <- mdl1[["ll"]]
-    ll0 <- numeric(length(mdl1[["ll"]])) + as.numeric(logLik(mdl0))
+    alternative_ll[,1] <- mdl1[["ll"]]
+    null_ll[,1] <- numeric(length(mdl1[["ll"]])) + as.numeric(logLik(mdl0))
+    phi[,1] <- mdl1[["phi"]]
+    beta[,] <- mdl1[["beta"]]
+    intercept[,1] <- mdl1[["intercept"]]
+    alpha[,] <- mdl1[["alpha"]]
+
     df <- 1
-    lrt <- lrt.test(ll0, ll1, df = df)
-    qvals <- p.adjust(lrt[["pvals"]], method = config[["fdr_method"]])
-    results[[length(results)+1]] = list("phi" = mdl1[["phi"]], "beta" = mdl1[["beta"]], "intercept" = mdl1[["intercept"]], "alpha" = mdl1[["alpha"]], "null_ll" = ll0, "alternative_ll" = ll1, "pvals" = lrt[["pvals"]], "qvals" = qvals, "stats" = lrt[["stats"]], "df" = df)
+    lrt <- lrt.test(null_ll[,1], alternative_ll[,1], df = df)
+
+    pvals[,1] <- lrt[["pvals"]]
+    qvals[,1] <- p.adjust(lrt[["pvals"]], method = config[["fdr_method"]])
+    stats[,1] <- lrt[["stats"]]
+
+    results[[length(results)+1]] <- list("phi" = phi, "beta" = beta, "intercept" = intercept, "alpha" = alpha, "null_ll" = null_ll, "alternative_ll" = alternative_ll, "pvals" = pvals, "qvals" = qvals, "stats" = stats, "df" = df)
   }
   return(results)
 }
@@ -539,49 +788,135 @@ tcareg.fit_marginal_conditional <- function(X, y, W, mus_hat, sigmas_hat, C2, de
   m <- ncol(X)
   k <- ncol(W)
   results <- list()
+  # Calculate the likelihood of the alternative model
   mdl1 <- tcareg.optimize(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, 1:k, parallel, num_cores, FALSE)
-  ll1 <- mdl1[["ll"]]
+  # Calculate the likelihood of each of the null models
+  df <- 1
   for (h in 1:k){
+    lst <- init_tcareg.fit_model(colnames(X), colnames(W), colnames(C3), 1:k, FALSE)
+    phi <- lst[["phi"]]
+    beta <- lst[["beta"]]
+    intercept <- lst[["intercept"]]
+    alpha <- lst[["alpha"]]
+    null_ll <- lst[["null_ll"]]
+    alternative_ll <- lst[["alternative_ll"]]
+    pvals <- lst[["pvals"]]
+    qvals <- lst[["qvals"]]
+    stats <- lst[["stats"]]
+    phi[,1] <- mdl1[["phi"]]
+    beta[,] <- mdl1[["beta"]]
+
+    intercept[,1] <- mdl1[["intercept"]]
+    alpha[,] <- mdl1[["alpha"]]
+
     mdl0 <- tcareg.optimize(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, setdiff(1:k,h), parallel, num_cores, FALSE)
-    ll0 <- mdl0[["ll"]]
+    null_ll[,1] <- mdl0[["ll"]]
+    alternative_ll[,1] <- mdl1[["ll"]]
+
     df <- 1
-    lrt <- lrt.test(ll0, ll1, df = df)
-    qvals <- p.adjust(lrt[["pvals"]], method = config[["fdr_method"]])
-    results[[length(results)+1]] = list("phi" = mdl1[["phi"]], "beta" = mdl1[["beta"]], "intercept" = mdl1[["intercept"]], "alpha" = mdl1[["alpha"]], "null_ll" = ll0, "alternative_ll" = ll1, "pvals" = lrt[["pvals"]], "qvals" = qvals, "stats" = lrt[["stats"]], "df" = df)
+    lrt <- lrt.test(null_ll[,1], alternative_ll[,1], df = df)
+    pvals[,1] <- lrt[["pvals"]]
+    qvals[,1] <- p.adjust(lrt[["pvals"]], method = config[["fdr_method"]])
+    stats[,1] <- lrt[["stats"]]
+
+    results[[length(results)+1]] <- list("phi" = phi, "beta" = beta, "intercept" = intercept, "alpha" = alpha, "null_ll" = null_ll, "alternative_ll" = alternative_ll, "pvals" = pvals, "qvals" = qvals, "stats" = stats, "df" = df)
   }
   return(results)
 }
 
 
 tcareg.fit_custom <- function(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, parallel, num_cores, null_model, alternative_model){
+
   config <- config::get(file = system.file("extdata", "config.yml", package = "TCA"), use_parent = FALSE)
+
+  lst <- init_tcareg.fit_model(colnames(X), colnames(W), colnames(C3), alternative_model, FALSE)
+  phi <- lst[["phi"]]
+  beta <- lst[["beta"]]
+  intercept <- lst[["intercept"]]
+  alpha <- lst[["alpha"]]
+  null_ll <- lst[["null_ll"]]
+  alternative_ll <- lst[["alternative_ll"]]
+  pvals <- lst[["pvals"]]
+  qvals <- lst[["qvals"]]
+  stats <- lst[["stats"]]
+
+  mdl1 <- tcareg.optimize(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, alternative_model, parallel, num_cores, FALSE)
+  alternative_ll[,1] <- mdl1[["ll"]]
+  phi[,1] <- mdl1[["phi"]]
+  beta[,] <- mdl1[["beta"]]
+  intercept[,1] <- mdl1[["intercept"]]
+  alpha[,] <- mdl1[["alpha"]]
+
   if (is.null(null_model)){
-    #d <- data.frame(y,C3)
-    #mdl0 <- lm(y~.,data = d) # the null log-likelihood under the model y ~ C3 (i.e. the null under the case of no source-specific effects)
     if (ncol(C3) == 0){
       mdl0 <- lm(y ~ 1)
     }else{
       mdl0 <- lm(y~.,data = data.frame(C3))
     }
-    ll0 <- numeric(ncol(X)) + as.numeric(logLik(mdl0))
+    null_ll[,1] <- numeric(ncol(X)) + as.numeric(logLik(mdl0))
   }else{
+    # mdl0.ll <- conditional_model_log_likelihood.all(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, mdl1$phi, mdl1$beta[,match(null_model,alternative_model),drop=F], mdl1$alpha, mdl1$intercept, null_model, parallel, num_cores)
     mdl0 <- tcareg.optimize(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, null_model, parallel, num_cores, FALSE)
-    ll0 <- mdl0[["ll"]]
+    null_ll[,1] <- mdl0[["ll"]]
   }
-  mdl1 <- tcareg.optimize(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, alternative_model, parallel, num_cores, FALSE)
-  ll1 <- mdl1[["ll"]]
+
   df <- length(alternative_model) - length(null_model)
-  lrt <- lrt.test(ll0, ll1, df = df)
-  qvals <- p.adjust(lrt[["pvals"]], method = config[["fdr_method"]])
-  return( list("phi" = mdl1[["phi"]], "beta" = mdl1[["beta"]], "intercept" = mdl1[["intercept"]], "alpha" = mdl1[["alpha"]], "null_ll" = ll0, "alternative_ll" = ll1, "pvals" = lrt[["pvals"]], "qvals" = qvals, "stats" = lrt[["stats"]], "df" = df) )
+  lrt <- lrt.test(null_ll[,1], alternative_ll[,1], df = df)
+
+  pvals[,1] <- lrt[["pvals"]]
+  qvals[,1] <- p.adjust(lrt[["pvals"]], method = config[["fdr_method"]])
+  stats[,1] <- lrt[["stats"]]
+
+  return( list("phi" = phi, "beta" = beta, "intercept" = intercept, "alpha" = alpha, "null_ll" = null_ll, "alternative_ll" = alternative_ll, "pvals" = pvals, "qvals" = qvals, "stats" = stats, "df" = df) )
 }
 
+init_tcareg.fit_model <- function(feature_ids, source_ids, C3_names, mdl, single_effect){
+
+  flog.debug("Starting function init_tcareg.fit_model...")
+
+  m <- length(feature_ids)
+  p3 <- length(C3_names)
+  num_beta <- length(mdl)
+
+  phi <- matrix(0,m,1)
+  beta <- if (single_effect) matrix(0,m,1) else matrix(0,m,num_beta)
+  intercept <- matrix(0,m,1)
+  alpha <- matrix(0,m,p3)
+  null_ll <- matrix(0,m,1)
+  alternative_ll <- matrix(0,m,1)
+
+  pvals <- matrix(1,m,1)
+  qvals <- matrix(1,m,1)
+  stats <- matrix(1,m,1)
+
+  rownames(phi) <- feature_ids
+  rownames(beta) <- feature_ids
+  rownames(intercept) <- feature_ids
+  rownames(alpha) <- feature_ids
+  rownames(null_ll) <- feature_ids
+  rownames(alternative_ll) <- feature_ids
+  rownames(pvals) <- feature_ids
+  rownames(qvals) <- feature_ids
+  rownames(stats) <- feature_ids
+
+  colnames(beta) <- if(single_effect) "beta" else source_ids[mdl]
+  colnames(phi) <- "phi"
+  colnames(intercept) <- "intercept"
+  colnames(alpha) <- C3_names
+  colnames(null_ll) <- "null_ll"
+  colnames(alternative_ll) <- "alternative_ll"
+  colnames(pvals) <- "pvals"
+  colnames(qvals) <- "qvals"
+  colnames(stats) <- "stats"
+
+  return(list("phi" = phi, "beta" = beta, "intercept" = intercept, "alpha" = alpha, "null_ll" = null_ll, "alternative_ll" = alternative_ll,"pvals" = pvals, "qvals" = qvals, "stats" = stats))
+
+}
 
 tcareg.optimize <- function(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, mdl, parallel, num_cores, single_effect){
 
   flog.debug("...")
 
-  #config <- config::get()
   config <- config::get(file = system.file("extdata", "config.yml", package = "TCA"), use_parent = FALSE)
 
   nloptr_opts = list("algorithm"=config[["nloptr_opts_fit_conditional_algorithm"]], "xtol_rel"=config[["nloptr_opts_xtol_rel"]], "print_level" = config[["nloptr_opts_print_level"]], "check_derivatives" = config[["nloptr_opts_check_derivatives"]])
@@ -604,8 +939,6 @@ tcareg.optimize <- function(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, ga
   # X0 <- get_initial_estimates(Z_hat, y, C3, mdl, lambda, single_effect, parallel, num_cores)
 
   # Set initial estimates for phi and alpha for each feature j using the model y~C3; initialize all betas with 0
-  #d <- data.frame(y,C3)
-  #mdl0 <- lm(y~.,data = d)
   if (p3 == 0){
     mdl0 <- lm(y ~ 1)
   }else{
@@ -630,11 +963,13 @@ tcareg.optimize <- function(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, ga
   if (parallel) clusterExport(cl, c("lb","ub","p1","k","W","y","C1","C3","n","l","X0","const","sigmas_squared","X_tilde","V","mdl","gammas_hat","sigmas_squared","mus_hat","lambda","single_effect","nloptr_opts","conditional_model_minus_log_likelihood","tcareg.optimize_j"), envir=environment())
   res <- pblapply(1:m, function(j) tcareg.optimize_j(j, y, W, C1, C3, mus_hat, gammas_hat, const, sigmas_squared, V, X_tilde, X0, mdl, lb, ub, nloptr_opts, single_effect, lambda), cl = cl)
   if (parallel) stop_cluster(cl)
+
   intercept <- matrix(0, m, 1)
   alpha <- matrix(0, m, p3)
   beta <- matrix(0, m, num_betas)
   phi <- matrix(0, m, 1)
   ll <- matrix(0, m, 1)
+
   for (j in 1:m){
     phi[j,] <- res[[j]][["solution"]][1]
     beta[j,] <- res[[j]][["solution"]][2:(1+num_betas)]
@@ -681,6 +1016,55 @@ tcareg.optimize_j <- function(j, y, W, C1, C3, mus_hat, gammas_hat, const, sigma
   return( list("solution" = nloptr_res[["solution"]], "objective" = nloptr_res[["objective"]]) )
 }
 
+#
+# conditional_model_log_likelihood.all <- function(X, y, W, mus_hat, sigmas_hat, C2, deltas_hat, C1, gammas_hat, tau_hat, C3, phis, betas, alphas, intercepts, mdl, parallel, num_cores){
+#
+#   flog.debug("Starting function 'conditional_model_log_likelihood.all'...")
+#
+#   config <- config::get(file = system.file("extdata", "config.yml", package = "TCA"), use_parent = FALSE)
+#   lambda <- config[["lambda"]]
+#
+#   l <- length(mdl)
+#   k <- ncol(W)
+#   n <- nrow(W)
+#   m <- ncol(X)
+#   p1 <- ncol(C1)
+#   C1_ <- calc_C1_W_interactions(W,C1)
+#   X_tilde = X - (tcrossprod(C2,deltas_hat) + tcrossprod(C1_,gammas_hat) + tcrossprod(W,mus_hat))
+#   const <- -n*log(2*pi)
+#   sigmas_squared <- sigmas_hat**2
+#   V <- tau_hat**2 + tcrossprod(W**2,sigmas_squared)
+#   C3 <- cbind(as.matrix(numeric(n)+1), C3)
+#   cl <- NULL
+#   if (parallel){
+#     cl <- init_cluster(num_cores)
+#     clusterExport(cl, varlist = c("l","k","n","m","p1","C1_","X_tilde","const","V","X","y","W","mus_hat","sigmas_squared","C1","deltas_hat","C1","gammas_hat","tau_hat", "C3", "mdl", "phis", "alphas", "betas", "intercepts"), envir=environment())
+#   }
+#   flog.debug("Calculate the likelihood for every feature...")
+#   res <- pblapply(1:m,function(j){
+#     gammas_hat_j_tilde <- t(Reshape(gammas_hat[j,],p1,k)[,mdl]);
+#     gammas_hat_j_tilde <- if (p1 == 1) t(gammas_hat_j_tilde) else gammas_hat_j_tilde;
+#     e1 <- V[,j];
+#     e2 <- W[,mdl]*repmat(sigmas_squared[j,mdl],n,1);
+#     e3 <- t(repmat(e1,l,1));
+#     e4 <- repmat(mus_hat[j,mdl],n,1) + tcrossprod(C1, gammas_hat_j_tilde)  + (e2 * t(repmat(X_tilde[,j],l,1)) )/e1;
+#     e5 <- repmat(sigmas_squared[j,mdl],n,1);
+#     e6 <- e2/e3;
+#     sigmas_squared_j <- sigmas_squared[j,];
+#     phi_j <- phis[j];
+#     beta_j <- betas[j,];
+#     alpha_j <- alphas[j,];
+#     intercept_j <- intercepts[j,];
+#     x <- c(phi_j,beta_j,alpha_j,intercept_j)
+#     neg_ll <- conditional_model_minus_log_likelihood(x, y, C3, const, sigmas_squared_j, e1, e2, e3, e4, e5, e6, mdl, lambda);
+#     return(-neg_ll$objective);
+#     }, cl = cl )
+#   if (parallel) stop_cluster(cl)
+#   ll0 <- numeric(m)
+#   for (j in 1:m) ll0[j] <- res[[j]]
+#   return(ll0)
+# }
+
 
 conditional_model_minus_log_likelihood <- function(x, y, C3, const, sigmas_squared_j, e1, e2, e3, e4, e5, e6, mdl, lambda){
   l <- length(mdl)
@@ -690,7 +1074,6 @@ conditional_model_minus_log_likelihood <- function(x, y, C3, const, sigmas_squar
   alpha_j = x[-1:-(1+l)]
   s1 <- beta_j**2
   s2 <- phi**2 + sum((s1)*(sigmas_squared_j[mdl])) - (tcrossprod(e2,t(beta_j))**2)/e1 # sigma_ij_tilde_squared
-  #s3 <- tcrossprod(C3,t(alpha_j)) + tcrossprod(e4,t(beta_j)) - y  # mu_ij_tilde - y
   s3 <- C3 %*% as.matrix(alpha_j) + tcrossprod(e4,t(beta_j)) - y  # mu_ij_tilde - y
   s4 <- s3/s2
   s5 <- s3*s4
diff --git a/R/refactor.R b/R/refactor.R
index 7a2f26f..7e4e75b 100644
--- a/R/refactor.R
+++ b/R/refactor.R
@@ -27,7 +27,8 @@ refactor.run <- function(X, k, sparsity, C, C.remove, sd_threshold, num_comp, ra
   X_adj <- X
   if (!is.null(C)){
     flog.info("Adjusting the data for covariates...")
-    X_adj <- X - tcrossprod(tcrossprod(X,tcrossprod(matrix.inverse(crossprod(C,C)),C)),C)
+    C_ <- cbind(numeric(nrow(C)) + 1, C)
+    X_adj <- X - tcrossprod(tcrossprod(X,tcrossprod(matrix.inverse(crossprod(C_,C_)),C_)),C_)
   }
 
   flog.info("Running PCA on X using rand_svd == %s...", rand_svd)
diff --git a/R/utils.R b/R/utils.R
index 82442cb..bae0f04 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -61,8 +61,21 @@ assert <- function (expr, error) {
   if (! expr) stop(error, call. = FALSE)
 }
 
+tensor.validate_input <- function(X, scale, parallel, num_cores, log_file, debug){
 
-tca.validate_input <- function(X, W, C1, C1.map, C2, refit_W, refit_W.features, refit_W.sparsity, refit_W.sd_threshold, tau, parallel, num_cores, max_iters, log_file, debug){
+  flog.debug("Validating input types...")
+
+  assert(is.matrix(X), "X must be of class 'matrix'")
+  assert(!is.null(rownames(X)) & !is.null(colnames(X)), "X must have row names and column names")
+  assert(is.logical(scale), "scale must be of class 'logical'")
+  assert(is.logical(debug), "debug must be of class 'logical'")
+  assert(is.logical(parallel), "parallel must be of class 'logical'")
+  assert(is.null(num_cores) | is.numeric(num_cores), "argument num_cores must take a numric value or NULL")
+  assert(is.character(log_file) | is.null(log_file), "log_file must be of class 'character' or NULL")
+
+}
+
+tca.validate_input <- function(X, W, C1, C1.map, C2, refit_W, refit_W.features, refit_W.sparsity, refit_W.sd_threshold, tau, constrain_mu, parallel, num_cores, max_iters, log_file, debug){
 
   flog.debug("Validating input types...")
 
@@ -85,14 +98,15 @@ tca.validate_input <- function(X, W, C1, C1.map, C2, refit_W, refit_W.features,
   assert(is.character(log_file) | is.null(log_file), "log_file must be of class 'character' or NULL")
 
   flog.debug("Validating input stucture and values...")
-  assert(!is.null(rownames(X)) | !is.null(colnames(X)), "X must have row names and column names")
-  assert(!is.null(rownames(W)) | !is.null(colnames(W)), "W must have row names and column names")
-  if (!is.null(C1)) assert(!is.null(rownames(C1)) | !is.null(colnames(C1)), "C1 must have row names and column names")
-  if (!is.null(C2)) assert(!is.null(rownames(C2)) | !is.null(colnames(C2)), "C2 must have row names and column names")
+  assert(!is.null(rownames(X)) & !is.null(colnames(X)), "X must have row names and column names")
+  assert(!is.null(rownames(W)) & !is.null(colnames(W)), "W must have row names and column names")
+  if (!is.null(C1)) assert(!is.null(rownames(C1)) & !is.null(colnames(C1)), "C1 must have row names and column names")
+  if (!is.null(C2)) assert(!is.null(rownames(C2)) & !is.null(colnames(C2)), "C2 must have row names and column names")
 
   flog.debug("Validating input conditions...")
   if (refit_W) assert(refit_W.sparsity <= nrow(X) , "argument refit_W.sparsity must satisfy refit_W.sparsity <= nrow(X)")
   if (refit_W) assert(refit_W.sd_threshold >= 0 , "argument refit_W.sd_threshold must satisfy refit_W.sd_threshold >= 0")
+  if (!(is.null(C1.map))) assert(constrain_mu , "argument C1.map cannot be useed with constrain_mu set to FALSE")
 
   flog.debug("Validating matrix dimensions...")
   assert(dim(X)[2] == dim(W)[1] , "The number of columns in X is inconsistent with the number of rows in W")
@@ -134,7 +148,7 @@ tcasub.validate_input <- function(tca.mdl, features, log_file, debug){
 }
 
 
-tcareg.validate_input <- function(X, W, y, C3, test, null_model, alternative_model, save_results, output, sort_results, parallel, num_cores, log_file, features_metadata ,debug){
+tcareg.validate_input <- function(X, W, y, C3, test, null_model, alternative_model, save_results, fast_mode, output, sort_results, parallel, num_cores, log_file, features_metadata ,debug){
 
   flog.debug("Validating input type for tcareg...")
 
@@ -155,14 +169,17 @@ tcareg.validate_input <- function(X, W, y, C3, test, null_model, alternative_mod
   assert(is.character(output), "argument output must take a character value")
 
   assert(is.logical(save_results), "argument save_results must take a logical value")
+  assert(is.logical(fast_mode), "argument fast_mode must take a logical value")
   assert(is.logical(sort_results), "argument sort_results must take a logical value")
   assert(is.logical(parallel), "argument parallel must take a logical value")
   assert(is.logical(debug), "argument debug must take a logical value")
   assert(is.character(log_file) | is.null(log_file), "argument log_file must take a character value or NULL")
 
   flog.debug("Validating input stucture and values...")
-  assert(!is.null(rownames(X)) | !is.null(colnames(X)), "X must have row names and column names")
-  if (!is.null(C3)) assert(!is.null(rownames(C3)) | !is.null(colnames(C3)), "C3 must have row names and column names")
+  assert(!is.null(rownames(X)) & !is.null(colnames(X)), "X must have row names and column names")
+  assert(!is.null(rownames(W)) & !is.null(colnames(W)), "W must have row names and column names")
+  assert(!is.null(rownames(y)), "y must have row names")
+  if (!is.null(C3)) assert(!is.null(rownames(C3)) & !is.null(colnames(C3)), "C3 must have row names and column names")
   if (test == "custom") assert( (!is.null(alternative_model)), "argument alternative_model cannot be NULL when test=custom")
   if ( (!is.null(alternative_model))){
     assert(test == "custom", "argument test must be set to 'custom' if argument alternative_model is not NULL")
@@ -173,6 +190,7 @@ tcareg.validate_input <- function(X, W, y, C3, test, null_model, alternative_mod
       assert(length(setdiff(alternative_model, null_model)) > 0 & length(setdiff(null_model, alternative_model)) == 0, "null_model must be nested within alternative_model")
     }
   }
+  assert(!( (sort_results & test == "marginal_conditional") & fast_mode), "sort_results cannot be set to true under fast_mode == TRUE and test = 'marginal_conditional'")
 
   flog.debug("Validating matrix dimensions...")
   if (!is.null(C3)) assert(dim(X)[2] == dim(C3)[1] , "the number of columns in X is inconsistent with the number of rows in C3")
@@ -206,36 +224,44 @@ lrt.test <- function(ll0, ll1, df){
 }
 
 
-save_association_results <- function(res, output, test, alternative_model, feature_ids, W_names, C3_names, sort_results, features_metadata){
+save_association_results <- function(res, output, test, fast_mode, alternative_model, feature_ids, W_names, C3_names, sort_results, features_metadata){
   flog.debug("Running save_association_results...")
   m <- length(feature_ids)
   metadata <- if (is.null(features_metadata)) matrix(0,m,0) else parse_features_metadata(feature_ids, features_metadata)
-  if (test == "marginal" | test == "marginal_conditional"){
-    # marginal or marginal_conditional
+  if (test == "marginal" | (!fast_mode & test == "marginal_conditional") ){
     for (i in 1:length(res)){
       filename <- paste(output, ".", test, ".", W_names[i], ".txt", sep ="")
       wnames <- if (test == "marginal") W_names[i] else W_names
-      save_association_results.save(res[[i]], filename, feature_ids, sort_results, wnames, C3_names, metadata)
+      save_association_results.save(res[[i]], test, fast_mode, filename, feature_ids, sort_results, wnames, C3_names, metadata)
     }
   }else{
-    # joint, single_effect, or custom
     filename <- paste(output, ".", test, ".txt", sep ="")
+    keep <- 1:length(W_names)
     if (test == "custom") keep <- alternative_model
-    if (test == "joint") keep <- 1:length(W_names)
     if (test == "single_effect") keep <- 1
-    save_association_results.save(res, filename, feature_ids, sort_results, W_names[keep], C3_names, metadata)
+    save_association_results.save(res, test, fast_mode, filename, feature_ids, sort_results, W_names[keep], C3_names, metadata)
   }
 }
 
 
-save_association_results.save <- function(res, filename, feature_ids, sort_results, W_names, C3_names, metadata){
+save_association_results.save <- function(res, test, fast_mode, filename, feature_ids, sort_results, W_names, C3_names, metadata){
   flog.debug("save_association_results.save...")
-  #config <- config::get()
   config <- config::get(file = system.file("extdata", "config.yml", package = "TCA"), use_parent = FALSE)
   m <- length(feature_ids)
-  data <- data.frame(feature_ids, metadata, res$pvals, res$qvals, res$beta, res$null_ll, res$alternative_ll, res$stats, res$df, res$intercept, res$alpha)
-  betas <- if(length(W_names) == 1) "beta" else lapply(1:length(W_names), function(i) paste("beta.",W_names[i],sep=""))
-  colnames(data) <- c("ID", colnames(metadata), "pval", "qval", betas, "null_ll", "alternative_ll", "chi_squared", "df", "intercept", C3_names)
+  betas <- if(length(W_names) == 1) "beta" else unlist(lapply(1:length(W_names), function(i) paste("beta.",W_names[i],sep="")))
+  pvals <- if(test == "marginal_conditional" & fast_mode) unlist(lapply(1:length(W_names), function(i) paste("pval.",W_names[i],sep=""))) else "pval"
+  qvals <- if(test == "marginal_conditional" & fast_mode) unlist(lapply(1:length(W_names), function(i) paste("qval.",W_names[i],sep=""))) else "qval"
+  alpha <- if (length(C3_names)) unlist(lapply(1:length(C3_names), function(i) paste("alpha.",C3_names[i],sep=""))) else c()
+  df <- if(is.null(res$df)) c() else "df"
+  df.values <- if(is.null(res$df)) matrix(0,m,0) else matrix(0,m,1)+res$df
+
+  if (fast_mode){
+    data <- data.frame(feature_ids, metadata, res$pval, res$qval, res$beta, res$alternative_ll, res$stat, df.values, res$intercept, res$alpha, res$phi)
+    colnames(data) <- c("ID", colnames(metadata), pvals, qvals, betas, "alternative_ll", "stat", df, "intercept", alpha, "phi")
+  }else{
+    data <- data.frame(feature_ids, metadata, res$pval, res$qval, res$beta, res$null_ll, res$alternative_ll, res$stat, res$df, res$intercept, res$alpha, res$phi)
+    colnames(data) <- c("ID", colnames(metadata), "pval", "qval", betas, "null_ll", "alternative_ll", "chi_squared", "df", "intercept", alpha, "phi")
+  }
   if (sort_results){
     # sort by p-value
     data <- data[order(data$pval),]
diff --git a/README.md b/README.md
index f678cbd..28c3642 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ The R package of TCA is available on CRAN. Run the following in R to install TCA
 install.packages("TCA")
 ```
 
-The full documentation of the TCA R package can be found <a href="https://github.com/cozygene/TCA/blob/master/manual.pdf" target="_blank">here</a>.
+The manual of the TCA R package can be found <a href="https://cran.r-project.org/web/packages/TCA/TCA.pdf" target="_blank">here</a>.
 
 You can also find a full working example of TCA in <a href="https://cran.r-project.org/package=TCA/vignettes/tca-vignette.html" target="_blank">this vignette</a> about cell-type-specific resolution epigenetics using TCA in R.
 
@@ -23,7 +23,7 @@ You can also find a full working example of TCA in <a href="https://cran.r-proje
 
 ## Matlab version
 
-Tne Matlab version of TCA was implemented and tested using Matlab R2015b.
+The Matlab version of TCA was implemented and tested using Matlab R2015b.
 
 ### Usage
 
diff --git a/inst/extdata/config.yml b/inst/extdata/config.yml
index 77817d5..8af4925 100644
--- a/inst/extdata/config.yml
+++ b/inst/extdata/config.yml
@@ -4,11 +4,12 @@ default:
   # optimization configs:
   lsqlincon_inf: 1000000 # lsqlincon fails if given Inf
   min_sd: 0.0001 # minimal value for an estimated variance; required for stability
-  mu_min: NULL # minimal value for each estimated mean; if NULL then this will be set to min(X)
-  mu_max: NULL # maximal value for each estimated mean; if NULL then this will be set to max(X)
+  #mu_min: NULL # minimal value for each estimated mean; if NULL then this will be set to min(X)
+  #mu_max: NULL # maximal value for each estimated mean; if NULL then this will be set to max(X)
   mu_epsilon: 0.00001 # truncates the range [mu_min,mu_max] by this value so that mu is required to be in [mu_mean+mu_epsilon,mu_max-mu_epsilon]; required for numerical stability
   epsilon: 0.001 # percent of difference for convergence
   tau_hat_init: 0.01  # initial estimate of the iid component added on top of the mixture signal
+  V_weight_limit: 1.0e-6  # don't allow extremely small weights in the gmm if vars.mle == TRUE; otherwise estimation may be sensitive to extreme outliers
   # pbapply options:
   nout: 10  # the resolution of the progress bars (higher values -> highr resolution but increased overhead, which in turns results in longer running time)
   type: "txt" # change to "none" to supress the progress bars (this will result is some speed up of the running time)
@@ -23,7 +24,7 @@ default:
   sep: "," # separator for output files
   show_progress: FALSE
   verbose: FALSE
-  fdr_method: "BY"  # can use any method that is provided by p.adjust
+  fdr_method: "BH"  # can use any method that is provided by p.adjust
   # more configs:
   lambda: 0.1 # weight for L2 regularization to handle co-linearily in the association testing
 
diff --git a/man/refactor.Rd b/man/refactor.Rd
index c55c093..68b17bd 100644
--- a/man/refactor.Rd
+++ b/man/refactor.Rd
@@ -4,20 +4,30 @@
 \alias{refactor}
 \title{Sparse principal component analysis using ReFACTor}
 \usage{
-refactor(X, k, sparsity = 500, C = NULL, C.remove = FALSE,
-  sd_threshold = 0.02, num_comp = NULL, rand_svd = FALSE,
-  log_file = "TCA.log", debug = FALSE, verbose = TRUE)
+refactor(
+  X,
+  k,
+  sparsity = 500,
+  C = NULL,
+  C.remove = FALSE,
+  sd_threshold = 0.02,
+  num_comp = NULL,
+  rand_svd = FALSE,
+  log_file = "TCA.log",
+  debug = FALSE,
+  verbose = TRUE
+)
 }
 \arguments{
-\item{X}{An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} different sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.}
+\item{X}{An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.}
 
-\item{k}{A numeric value indicating the dimension of the signal in the data (i.e. the number of sources).}
+\item{k}{A numeric value indicating the dimension of the signal in \code{X} (i.e. the number of sources).}
 
-\item{sparsity}{A numeric value indicating the sparsity of the signal in the data (the number of signal rows).}
+\item{sparsity}{A numeric value indicating the sparsity of the signal in \code{X} (the number of signal rows).}
 
-\item{C}{An \code{n} by \code{p} design matrix of covariates that will be accounted for in the feature selection step. Note that \code{C} must include row names and column names and that NA values are currently not supported; ; set \code{C} to be \code{NULL} if there are no such covariates.}
+\item{C}{An \code{n} by \code{p} design matrix of covariates that will be accounted for in the feature selection step. An intercept term will be included automatically. Note that \code{C} must include row names and column names and that NA values are currently not supported; set \code{C} to be \code{NULL} if there are no such covariates.}
 
-\item{C.remove}{A logical value indicating whether the covariates in X should be accounted for not only in the feature selection step, but also in the final calculation of the principal components (i.e. if \code{C.remove == TRUE} then the selected features will be adjusted for the covariates in \code{C} prior to calculating principal components). Note that setting \code{C.remove} to be \code{TRUE} is desired when ReFACTor is intended to be used for correction in downstream analysis, whereas setting \code{C.remove} to be \code{FALSE} is desired when ReFACTor is merely used for capturing the sparse signals in the data (i.e. regardless of correction).}
+\item{C.remove}{A logical value indicating whether the covariates in X should be accounted for not only in the feature selection step, but also in the final calculation of the principal components (i.e. if \code{C.remove == TRUE} then the selected features will be adjusted for the covariates in \code{C} prior to calculating principal components). Note that setting \code{C.remove} to be \code{TRUE} is desired when ReFACTor is intended to be used for correction in downstream analysis, whereas setting \code{C.remove} to be \code{FALSE} is desired when ReFACTor is merely used for capturing the sparse signals in \code{X} (i.e. regardless of correction).}
 
 \item{sd_threshold}{A numeric value indicating a standard deviation threshold to be used for excluding low-variance features in \code{X} (i.e. features with standard deviation lower than \code{sd_threshold} will be excluded). Set \code{sd_threshold} to be \code{NULL} for turning off this filter. Note that removing features with very low variability tends to improve speed and performance.}
 
@@ -25,9 +35,9 @@ refactor(X, k, sparsity = 500, C = NULL, C.remove = FALSE,
 
 \item{rand_svd}{A logical value indicating whether to use random svd for estimating the low-rank structure of the data in the first step of the algorithm; random svd can result in a substantial speedup for large data.}
 
-\item{log_file}{A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == TRUE} then no output file will be generated regardless of the value of \code{log_file}.}
+\item{log_file}{A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == FALSE} then no output file will be generated regardless of the value of \code{log_file}.}
 
-\item{debug}{A logical value indicating whether to set the logger to a more detailed debug level; please set \code{debug} to \code{TRUE} before reporting issues.}
+\item{debug}{A logical value indicating whether to set the logger to a more detailed debug level; set \code{debug} to \code{TRUE} before reporting issues.}
 
 \item{verbose}{A logical value indicating whether to print logs.}
 }
@@ -35,7 +45,7 @@ refactor(X, k, sparsity = 500, C = NULL, C.remove = FALSE,
 A list with the estimated components of the ReFACTor model.
 \item{scores}{An \code{n} by \code{num_comp} matrix of the ReFACTor components (the projection scores).}
 \item{coeffs}{A \code{sparsity} by \code{num_comp} matrix of the coefficients of the ReFACTor components (the projection loadings).}
-\item{ranked_list}{A vector with the features in the data, ranked by their scores in the feature selection step of the algorithm; the top scoring features (set according to the argument \code{sparsity}) are used for calculating the ReFACTor components. Note that features that were excluded according to \code{sd_threshold} will not appear in this \code{ranked_list}.}
+\item{ranked_list}{A vector with the features in \code{X}, ranked by their scores in the feature selection step of the algorithm; the top scoring features (set according to the argument \code{sparsity}) are used for calculating the ReFACTor components. Note that features that were excluded according to \code{sd_threshold} will not appear in this \code{ranked_list}.}
 }
 \description{
 Performs unsupervised feature selection followed by principal component analysis (PCA) under a row-sparse model using the ReFACTor algorithm. For example, in the context of tissue-level bulk DNA methylation data coming from a mixture of cell types (i.e. the input is methylation sites by individuals), \code{refactor} allows to capture the variation in cell-type composition, which was shown to be a dominant sparse signal in methylation data.
diff --git a/man/tca.Rd b/man/tca.Rd
index 7fa1bcc..745fc7d 100644
--- a/man/tca.Rd
+++ b/man/tca.Rd
@@ -4,26 +4,41 @@
 \alias{tca}
 \title{Fitting the TCA model}
 \usage{
-tca(X, W, C1 = NULL, C1.map = NULL, C2 = NULL, refit_W = FALSE,
-  refit_W.features = NULL, refit_W.sparsity = 500,
-  refit_W.sd_threshold = 0.02, tau = NULL, parallel = FALSE,
-  num_cores = NULL, max_iters = 10, log_file = "TCA.log",
-  debug = FALSE, verbose = TRUE)
+tca(
+  X,
+  W,
+  C1 = NULL,
+  C1.map = NULL,
+  C2 = NULL,
+  refit_W = FALSE,
+  refit_W.features = NULL,
+  refit_W.sparsity = 500,
+  refit_W.sd_threshold = 0.02,
+  tau = NULL,
+  vars.mle = FALSE,
+  constrain_mu = FALSE,
+  parallel = FALSE,
+  num_cores = NULL,
+  max_iters = 10,
+  log_file = "TCA.log",
+  debug = FALSE,
+  verbose = TRUE
+)
 }
 \arguments{
-\item{X}{An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} different sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.}
+\item{X}{An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.}
 
-\item{W}{An \code{n} by \code{k} matrix of weights - the weights of \code{k} sources for each of the \code{n} mixtures (observations). All the weights must be positive and each row, corresponding to the weights of a single observation, must sum up to 1. Note that \code{W} must include row names and column names and that NA values are currently not supported. In case where only initial estimates of \code{W} are available, \code{tca} can be set to re-estimate \code{W} (see \code{refit_W}).}
+\item{W}{An \code{n} by \code{k} matrix of weights - the weights of \code{k} sources for each of the \code{n} mixtures (observations). All the weights must be positive and each row - corresponding to the weights of a single observation - must sum up to 1. Note that \code{W} must include row names and column names and that NA values are currently not supported. In cases where only noisy estimates of \code{W} are available, \code{tca} can be set to re-estimate \code{W} (see \code{refit_W}).}
 
-\item{C1}{An \code{n} by \code{p1} design matrix of covariates that may affect the hidden source-specific values (possibly a different effect on each source). Note that \code{C1} must include row names and column names and should not include an intercept term. NA values are currently not supported.}
+\item{C1}{An \code{n} by \code{p1} design matrix of covariates that may affect the hidden source-specific values (possibly a different effect size in each source). Note that \code{C1} must include row names and column names and should not include an intercept term. NA values are currently not supported. Note that each covariate in \code{C1} results in \code{k} additional parameters in the model of each feature, therefore, in order to alleviate the possibility of model overfitting, it is advised to be mindful of the balance between the size of \code{C1} and the sample size in \code{X}.}
 
-\item{C1.map}{An \code{p1} by \code{k} matrix of 0/1 values, indicating for each of the \code{p1} covariates in \code{C1} whether to consider its potential effects on the values of each of the \code{k} sources (e.g., if position \code{i,j} in \code{C1.map} is 1 then the potential effect of the \code{i}-th covariate in \code{C1} on the {j}-th source will be considered). If \code{C1.map == NULL} then effects for all covariates in \code{C1} in each of the sources will be considered.}
+\item{C1.map}{An \code{p1} by \code{k} matrix of 0/1 values, indicating for each of the \code{p1} covariates in \code{C1} whether to consider its potential effects on the values of each of the \code{k} sources (e.g., if position \code{i,j} in \code{C1.map} is 1 then the potential effect of the \code{i}-th covariate in \code{C1} on the {j}-th source will be considered). If \code{C1.map == NULL} then effects for all covariates in \code{C1} will be considered in each of the sources. Note that \code{C1.map} is available only if \code{constrain_mu == TRUE}.}
 
 \item{C2}{An \code{n} by \code{p2} design matrix of covariates that may affect the mixture (i.e. rather than directly the sources of the mixture; for example, variables that capture biases in the collection of the measurements). Note that \code{C2} must include row names and column names and should not include an intercept term. NA values are currently not supported.}
 
 \item{refit_W}{A logical value indicating whether to re-estimate the input \code{W} under the TCA model.}
 
-\item{refit_W.features}{A vector with the names of the features in \code{X} to consider when re-estimating \code{W} (i.e. when \code{refit_W == TRUE}). This is useful since oftentimes just a subset of the features in \code{X} will be informative for estimating \code{W}. If \code{refit_W.features == NULL} then the ReFACTor algorithm will be used for performing feature selection (see also \code{refit_W.sparsity, refit_W.sd_threshold}).}
+\item{refit_W.features}{A vector with the names of the features in \code{X} to consider when re-estimating \code{W} (i.e. when \code{refit_W == TRUE}). This is useful since in general only a subset of the features in \code{X} are expected to be highly informative for estimating \code{W}. If \code{refit_W.features == NULL} then the ReFACTor algorithm will be used for performing feature selection (see also \code{refit_W.sparsity, refit_W.sd_threshold}).}
 
 \item{refit_W.sparsity}{A numeric value indicating the number of features to select using the ReFACTor algorithm when re-estimating \code{W} (activated only if \code{refit_W == TRUE} and \code{refit_W.features == NULL}). Note that \code{refit_W.sparsity} must be lower or equal to the number of features in \code{X}. For more information, see the argument \code{sparsity} in \link{refactor}.}
 
@@ -31,50 +46,55 @@ tca(X, W, C1 = NULL, C1.map = NULL, C2 = NULL, refit_W = FALSE,
 
 \item{tau}{A non-negative numeric value of the standard deviation of the measurement noise (i.e. the i.i.d. component of variation in the model). If \code{tau == NULL} then \code{tca} will estimate \code{tau}.}
 
+\item{vars.mle}{A logical value indicating whether to use maximum likelihood estimation when learning the variances in the model. If \code{vars.mle == FALSE} then \code{tca} will use a non-negative least-squares optimization for learning the variances. In practice, both approaches appear to provide highly correlated models, however, setting \code{vars.mle == FALSE}allows a substantial speedup.}
+
+\item{constrain_mu}{A logical value indicating whether to constrain the estimates of the mean parameters (i.e. \eqn{\{\mu_{hj}\}}; see details below), in which case they will be constrained to the range of the values in \code{X}. Note that if \code{constrain_mu == TRUE} then \code{tca} will not output p-values for the effect sizes of the covariates in \code{C1} and in \code{C2}.}
+
 \item{parallel}{A logical value indicating whether to use parallel computing (possible when using a multi-core machine).}
 
 \item{num_cores}{A numeric value indicating the number of cores to use (activated only if \code{parallel == TRUE}). If \code{num_cores == NULL} then all available cores except for one will be used.}
 
-\item{max_iters}{A numeric value indicating the maximal number of iterations to use in the optimization of the TCA model (\code{max_iters} iterations will be used as long as the optimization does not converge in earlier iterations).}
+\item{max_iters}{A numeric value indicating the maximal number of iterations to use in the optimization of the TCA model (\code{max_iters} iterations will be used as long as the optimization does not converge earlier).}
 
-\item{log_file}{A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == TRUE} then no output file will be generated regardless of the value of \code{log_file}.}
+\item{log_file}{A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == FALSE} then no output file will be generated regardless of the value of \code{log_file}.}
 
-\item{debug}{A logical value indicating whether to set the logger to a more detailed debug level; please set \code{debug} to \code{TRUE} before reporting issues.}
+\item{debug}{A logical value indicating whether to set the logger to a more detailed debug level; set \code{debug} to \code{TRUE} before reporting issues.}
 
 \item{verbose}{A logical value indicating whether to print logs.}
 }
 \value{
-A list with the estimated parameters of the model. This list can be then used as the input to other functions such as \code{tcareg}.
-\item{W}{An \code{n} by \code{k} matrix of weights. If \code{refit_W == TRUE} then this is the re-estimated \code{W}; otherwise this is the input \code{W}}
+A list with the estimated parameters of the model. This list can be then used as the input to other functions such as \link{tcareg}.
+\item{W}{An \code{n} by \code{k} matrix of weights. If \code{refit_W == TRUE} then this is the re-estimated \code{W}, and otherwise this is the input \code{W}}
 \item{mus_hat}{An \code{m} by \code{k} matrix of estimates for the mean of each source in each feature.}
 \item{sigmas_hat}{An \code{m} by \code{k} matrix of estimates for the standard deviation of each source in each feature.}
 \item{tau_hat}{An estimate of the standard deviation of the i.i.d. component of variation in \code{X}. If an input value was provided for \code{tau} (i.e. \code{tau != NULL}) then \code{tau_hat == tau}.}
-\item{gammas_hat}{An \code{m} by \code{k*p1} matrix of the estimated effects of the \code{p1} factors in \code{C1} on each of the \code{m} features in \code{X}, where the first \code{p1} columns are the source-specific effects of the \code{p1} factors on the first source, the following \code{p1} columns are the source-specific effects on the second source and so on.}
-\item{deltas_hat}{An \code{m} by \code{p2} matrix of the estimated effects of the \code{p2} factors in \code{C2} on the mixture values of each of the \code{m} features in \code{X}.}
+\item{gammas_hat}{An \code{m} by \code{k*p1} matrix of the estimated effects of the \code{p1} covariates in \code{C1} on each of the \code{m} features in \code{X}, where the first \code{p1} columns are the source-specific effects of the \code{p1} covariates on the first source, the following \code{p1} columns are the source-specific effects on the second source and so on.}
+\item{deltas_hat}{An \code{m} by \code{p2} matrix of the estimated effects of the \code{p2} covariates in \code{C2} on the mixture values of each of the \code{m} features in \code{X}.}
+\item{gammas_hat_pvals}{An \code{m} by \code{k*p1} matrix of p-values for the estimates in \code{gammas_hat} (based on a T-test). Note that \code{gammas_hat_pvals} is set to NULL if \code{constrain_mu == TRUE}.}
+\item{gammas_hat_pvals.joint}{An \code{m} by \code{p1} matrix of p-values for the joint effects (i.e. across all \code{k} sources) of each of the \code{p1} covariates in \code{C1} on each of the \code{m} features in \code{X} (based on a partial F-test). In other words, these are p-values for the combined statistical effects of each one of the \code{p1} covariates on each of the \code{m} features under the TCA model. Note that \code{gammas_hat_pvals.joint} is set to NULL if \code{constrain_mu == TRUE}.}
+\item{deltas_hat_pvals}{An \code{m} by \code{p2} matrix of p-values for the estimates in \code{deltas_hat} (based on a T-test). Note that \code{deltas_hat_pvals} is set to NULL if \code{constrain_mu == TRUE}.}
 }
 \description{
-Fits the TCA model for an input matrix of observations coming from a mixture of \code{k} sources, under the assumption that each observation is a mixture of unique source-specific values (in each feature in the data). For example, in the context of tissue-level bulk DNA methylation data coming from a mixture of cell types (i.e. the input is methylation sites by individuals), \code{tca} allows to model the methylation of each individual as a mixture of cell-type-specific methylation levels that are unique to the individual.
+Fits the TCA model for an input matrix of features by observations that are coming from a mixture of \code{k} sources, under the assumption that each observation is a mixture of unique (unobserved) source-specific values (in each feature in the data). This function further allows to statistically test the effect of covariates on source-specific values. For example, in the context of tissue-level bulk DNA methylation data coming from a mixture of cell types (i.e. the input is methylation sites by individuals), \code{tca} allows to model the methylation of each individual as a mixture of cell-type-specific methylation levels that are unique to the individual. In addition, it allows to statistically test the effects of covariates and phenotypes on methylation at the cell-type level.
 }
 \details{
-The TCA model assumes that the hidden source-specific values are random variables. Formally, denote by \eqn{Z_{hj}^i} the source-specific value of observation \eqn{i} in feature \eqn{j} source \eqn{h}, the TCA model assumes: \deqn{Z_{hj}^i \sim N(\mu_{hj},\sigma_{hj}^2)} where \eqn{\mu_{hj},\sigma_{hj}} represent the mean and standard deviation that are specific to feature \eqn{j} source \eqn{h}. The model further assumes that the observed value of observation \eqn{i} in feature \eqn{j} is a mixture of \eqn{k} different sources: \deqn{X_{ji} =  \sum_{h=1}^k W_{ih}Z_{hj}^i + \epsilon_{ji}} where \eqn{W_{ih}} is the non-negative proportion of source \eqn{h} in the mixture of observation \eqn{i} such that \eqn{\sum_{h=1}^kW_{ih} = 1}, and \eqn{\epsilon_{ji} \sim N(0,\tau^2)} is an i.i.d. component of variation that models measurement noise. Note that the mixture proportions in \eqn{W} are, in general, unique for each individual, therefore each entry in the data matrix \eqn{X} is coming from a unique distribution (i.e. a different mean and a different variance).
+The TCA model assumes that the hidden source-specific values are random variables. Formally, denote by \eqn{Z_{hj}^i} the source-specific value of observation \eqn{i} in feature \eqn{j} source \eqn{h}, the TCA model assumes: \deqn{Z_{hj}^i \sim N(\mu_{hj},\sigma_{hj}^2)} where \eqn{\mu_{hj},\sigma_{hj}} represent the mean and standard deviation that are specific to feature \eqn{j}, source \eqn{h}. The model further assumes that the observed value of observation \eqn{i} in feature \eqn{j} is a mixture of \eqn{k} different sources: \deqn{X_{ji} =  \sum_{h=1}^k W_{ih}Z_{hj}^i + \epsilon_{ji}} where \eqn{W_{ih}} is the non-negative proportion of source \eqn{h} in the mixture of observation \eqn{i} such that \eqn{\sum_{h=1}^kW_{ih} = 1}, and \eqn{\epsilon_{ji} \sim N(0,\tau^2)} is an i.i.d. component of variation that models measurement noise. Note that the mixture proportions in \eqn{W} are, in general, unique for each individual, therefore each entry in \eqn{X} is coming from a unique distribution (i.e. a different mean and a different variance).
 
-In cases where the true \code{W} is unknown, \code{tca} can be provided with initial estimates of \code{W} and then re-estimate \code{W} as part of the optimization procedure (see argument \code{refit_W}). These initial estimates should not be random but rather capture the information in \code{W} to some extent. When the argument \code{refit_W} is used, it is typically the case that only a subset of the features should be used for re-estimating \code{W}. Therefore, when re-estimating \code{W}, \code{tca} performs feature selection using the ReFACTor algorithm; alternatively, it can also be provided with a user-specified list of features to be used in the re-estimation (see argument \code{refit_W.features}).
+In cases where the true \code{W} is unknown, \code{tca} can be provided with noisy estimates of \code{W} and then re-estimate \code{W} as part of the optimization procedure (see argument \code{refit_W}). These initial estimates should not be random but rather capture the information in \code{W} to some extent. When the argument \code{refit_W} is used, it is typically the case that only a subset of the features should be used for re-estimating \code{W}. Therefore, when re-estimating \code{W}, \code{tca} performs feature selection using the ReFACTor algorithm; alternatively, it can also be provided with a user-specified list of features to be used in the re-estimation, assuming that such list of features that are most informative for estimating \eqn{W} exist (see argument \code{refit_W.features}).
 
-Factors that systematically affect the source-specific values \eqn{Z_{hj}^i} can be further considered (see argument \code{C1}). In that case, we assume: \deqn{Z_{hj}^i \sim N(\mu_{hj}+c^{(1)}_i \gamma_j^h,\sigma_{hj}^2)} where \eqn{c^{(1)}_i} is a row vector from \code{C1}, corresponding to the values of the \eqn{p_1} factors for observation \eqn{i}, and \eqn{\gamma_j^h} is a vector of \eqn{p_1} corresponding effect sizes.
+Covariates that systematically affect the source-specific values \eqn{Z_{hj}^i} can be further considered (see argument \code{C1}). In that case, we assume: \deqn{Z_{hj}^i \sim N(\mu_{hj}+c^{(1)}_i \gamma_j^h,\sigma_{hj}^2)} where \eqn{c^{(1)}_i} and \eqn{\gamma_j^h} correspond to the \eqn{p_1} covariate values of observation \eqn{i} (i.e. a row vector from \code{C1}) and their effect sizes, respectively.
 
-Factors that systematically affect the mixture values \eqn{X_{ji}}, such as variables that capture biases in the collection of the measurements, can also be considered (see argument \code{C2}). In that case, we assume: \deqn{X_{ji} \sim \sum_{h=1}^k W_{ih}Z_{hj}^i + c^{(2)}_i \delta_j + \epsilon_{ij}} where \eqn{c^{(2)}_i} is a row vector from \code{C2}, corresponding to the values of the \eqn{p_2} factors for observation \eqn{i}, and \eqn{\delta_j} is a vector of \eqn{p_2} corresponding effect sizes.
-}
-\section{Note}{
- The function \code{tca} may require a long running time when the input matrix \code{X} is very large; to alleviate this, it is strongly advised to use the \code{parallel} argument, given that a multi-core machine is available.
-}
+Covariates that systematically affect the mixture values \eqn{X_{ji}}, such as variables that capture technical biases in the collection of the measurements, can also be considered (see argument \code{C2}). In that case, we assume: \deqn{X_{ji} = \sum_{h=1}^k W_{ih}Z_{hj}^i + c^{(2)}_i \delta_j + \epsilon_{ij}} where \eqn{c^{(2)}_i} and \eqn{\delta_j} correspond to the \eqn{p_2} covariate values of observation \eqn{i} (i.e. a row vector from \code{C2}) and their effect sizes, respectively.
 
+Since the standard deviation of \eqn{X_{ji}} is specific to observation \eqn{i} and feature \eqn{j}, we can obtain p-values for the estimates of \eqn{\gamma_j^h} and \eqn{\delta_j} by dividing each observed data point \eqn{x_{ji}} by its estimated standard deviation and calculating T-statistics under a standard linear regression framework.
+}
 \examples{
 data <- test_data(100, 20, 3, 1, 1, 0.01)
 tca.mdl <- tca(X = data$X, W = data$W, C1 = data$C1, C2 = data$C2)
 
 }
 \references{
-Rahmani E, Schweiger R, Rhead B, Criswell LA, Barcellos LF, Eskin E, Rosset S, Sankararaman S, Halperin E. Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology. Nature Communications 2018.
+Rahmani E, Schweiger R, Rhead B, Criswell LA, Barcellos LF, Eskin E, Rosset S, Sankararaman S, Halperin E. Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology. Nature Communications 2019.
 
 Rahmani E, Zaitlen N, Baran Y, Eng C, Hu D, Galanter J, Oh S, Burchard EG, Eskin E, Zou J, Halperin E. Sparse PCA corrects for cell type heterogeneity in epigenome-wide association studies. Nature Methods 2016.
 }
diff --git a/man/tcareg.Rd b/man/tcareg.Rd
index e9e0613..a0d8b11 100644
--- a/man/tcareg.Rd
+++ b/man/tcareg.Rd
@@ -4,16 +4,30 @@
 \alias{tcareg}
 \title{Fitting a TCA regression model}
 \usage{
-tcareg(X, tca.mdl, y, C3 = NULL, test = "marginal",
-  null_model = NULL, alternative_model = NULL, save_results = TRUE,
-  output = "TCA", sort_results = TRUE, parallel = FALSE,
-  num_cores = NULL, log_file = "TCA.log", features_metadata = NULL,
-  debug = FALSE, verbose = TRUE)
+tcareg(
+  X,
+  tca.mdl,
+  y,
+  C3 = NULL,
+  test = "marginal_conditional",
+  null_model = NULL,
+  alternative_model = NULL,
+  save_results = FALSE,
+  fast_mode = TRUE,
+  output = "TCA",
+  sort_results = FALSE,
+  parallel = FALSE,
+  num_cores = NULL,
+  log_file = "TCA.log",
+  features_metadata = NULL,
+  debug = FALSE,
+  verbose = TRUE
+)
 }
 \arguments{
-\item{X}{An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} different sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.}
+\item{X}{An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.}
 
-\item{tca.mdl}{The value returned by applying the function \code{tca} to \code{X}.}
+\item{tca.mdl}{The value returned by applying \link{tca} to \code{X}.}
 
 \item{y}{An \code{n} by 1 matrix of an outcome of interest for each of the \code{n} observations in \code{X}. Note that \code{y} must include row names and column names and that NA values are currently not supported.}
 
@@ -25,48 +39,53 @@ tcareg(X, tca.mdl, y, C3 = NULL, test = "marginal",
 
 \item{alternative_model}{A vector with a subset (or all) of the names of the sources in \code{tca.mdl$W} to be used as an alternative model (activated only if \code{test == 'custom'}).}
 
-\item{save_results}{A logical value indicating whether to save the returned results in a file. If \code{TRUE} and \code{test == 'marginal'} or \code{test == 'marginal_conditional'} then \code{k} files will be saved (one for the results of each source).}
+\item{save_results}{A logical value indicating whether to save the returned results in a file. If \code{test == 'marginal'} or (\code{fast_mode == TRUE} and \code{test == 'marginal_conditional'}) then \code{k} files will be saved (one for the results of each source).}
+
+\item{fast_mode}{A logical value indicating whether to use a fast version of TCA regression, in which source-specific-values are first estimated using the \code{tensor} function and then tested under a standard regression framework (see more details below).}
 
 \item{output}{Prefix for output files (activated only if \code{save_results == TRUE}).}
 
-\item{sort_results}{A logical value indicating whether to sort the results by their p-value (i.e. features with lower p-value will appear first in the results).}
+\item{sort_results}{A logical value indicating whether to sort the results by their p-value (i.e. features with lower p-value will appear first in the results). This option is not available if \code{fast_mode == TRUE} and \code{test == "marginal_conditional"}.}
 
 \item{parallel}{A logical value indicating whether to use parallel computing (possible when using a multi-core machine).}
 
 \item{num_cores}{A numeric value indicating the number of cores to use (activated only if \code{parallel == TRUE}). If \code{num_cores == NULL} then all available cores except for one will be used.}
 
-\item{log_file}{A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == TRUE} then no output file will be generated regardless of the value of \code{log_file}.}
+\item{log_file}{A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == FALSE} then no output file will be generated regardless of the value of \code{log_file}.}
 
-\item{features_metadata}{A path to a csv file containing metadata about the features in \code{X} that will be added to the output files (activated only if \code{save_results == TRUE}). Each row in the metadata file should correspond to one feature (with the row name being the feature identifier, as it appears in the rows of \code{X}) and each column should correspond to one metadata descriptor (with an appropriate column name). Features that do not exist in \code{X} will be ignored and features in \code{X} with missing metadata information will show missing values.}
+\item{features_metadata}{A path to a csv file containing metadata about the features in \code{X} that will be added to the output files (activated only if \code{save_results == TRUE}). Each row in the metadata file should correspond to one feature in \code{X} (with the row name being the feature identifier, as it appears in the rows of \code{X}) and each column should correspond to one metadata descriptor (with an appropriate column name). Features that do not exist in \code{X} will be ignored and features in \code{X} with missing metadata information will show missing values.}
 
-\item{debug}{A logical value indicating whether to set the logger to a more detailed debug level; please set \code{debug} to \code{TRUE} before reporting issues.}
+\item{debug}{A logical value indicating whether to set the logger to a more detailed debug level; set \code{debug} to \code{TRUE} before reporting issues.}
 
 \item{verbose}{A logical value indicating whether to print logs.}
 }
 \value{
-A list with the results of applying the TCA regression model to each of the features in the data. If \code{test == 'marginal'} or \code{test == 'marginal_conditional'} then a list of \code{k} such lists of results are returned, one for the results of each source.
+A list with the results of applying the TCA regression model to each of the features in \code{X}. If \code{test == 'marginal'} or (\code{test == 'marginal_conditional'} and \code{fast_mode == FALSE}) then a list of \code{k} such lists of results are returned, one for the results of each source.
 \item{phi}{An estimate of the standard deviation of the i.i.d. component of variation in the TCA regression model.}
-\item{beta}{A matrix of effect size estimates for the source-specific effects, such that each row corresponds to the estimated effect sizes of one feature. The number of columns corresponds to the number of estimated effects (e.g., if \code{test} is set to \code{marginal} then \code{beta} will include a single column, if \code{test} is set to \code{joint} then \code{beta} will include \code{k} columns and so on).}
-\item{intercept}{An \code{m} by \code{1} matrix of estimates for the intercept of each feature.}
-\item{alpha}{An \code{m} by \code{p3} matrix of effect size estimates for the \code{p3} covariates in \code{C3}, such that each row corresponds to the estimated effect sizes of one feature.}
-\item{null_ll}{An \code{m} by \code{1} matrix of the log-likelihood of the model under the null hypothesis.}
+\item{beta}{A matrix of effect size estimates for the source-specific effects, such that each row corresponds to the estimated effect sizes of one feature in \code{X}. The number of columns corresponds to the number of estimated effects (e.g., if \code{test} is set to \code{marginal} then \code{beta} will include a single column, if \code{test} is set to \code{joint} then \code{beta} will include \code{k} columns etc.).}
+\item{intercept}{An \code{m} by \code{1} matrix of estimates for the intercept term of each feature.}
+\item{alpha}{An \code{m} by \code{p3} matrix of effect size estimates for the \code{p3} covariates in \code{C3}, such that each row corresponds to the estimated effect sizes of one feature in \code{X}.}
+\item{null_ll}{An \code{m} by \code{1} matrix of the log-likelihood of the model under the null hypothesis. Returned only if \code{fast_mode == FALSE}.}
 \item{alternative_ll}{An \code{m} by \code{1} matrix of the log-likelihood of the model under the alternative hypothesis.}
-\item{stats}{An \code{m} by \code{1} matrix of the LRT statistic for each feature in the data.}
-\item{df}{The degrees of freedom for deriving p-values using LRT.}
-\item{pvals}{An \code{m} by \code{1} matrix of the p-value for each feature in the data.}
-\item{qvals}{An \code{m} by \code{1} matrix of the q-value (FDR-adjusted p-values) for each feature in the data.}
+\item{stats}{An \code{m} by \code{k} matrix of T statistics for each source in each feature in \code{X} assuming \code{test == "marginal_conditional"} and \code{fast_mode == TRUE}; otherwise, an \code{m} by \code{1} matrix of an (partial) F statistic (if \code{fast_mode == TRUE}) or a likelihood-ratio test statistic (if \code{fast_mode == FALSE}) for each feature in \code{X}.}
+\item{df}{The degrees of freedom for deriving p-values.}
+\item{pvals}{An \code{m} by \code{k} matrix of p-values for each source in each feature in \code{X} assuming \code{test == "marginal_conditional"} and \code{fast_mode == TRUE}; otherwise, an \code{m} by \code{1} matrix of the p-value for each feature in \code{X}.}
+\item{qvals}{An \code{m} by \code{k} matrix of q-values (FDR-adjusted p-values) for each source in each feature in \code{X} assuming \code{test == "marginal_conditional"} and \code{fast_mode == TRUE}; otherwise, an \code{m} by \code{1} matrix of the q-value for each feature in \code{X}. Note that if \code{test == "marginal_conditional"} and \code{fast_mode == TRUE} then q-values are calculated for each source separately.}
 }
 \description{
-TCA regression allows to test for several types of statistical relations between source-specific values and an outcome of interest. For example, in the context of tissue-level bulk DNA methylation data coming from a mixture of cell types (i.e. the input is methylation sites by individuals), \code{tcareg} allows to test for cell-type-specific effects of methylation on an outcome of interest.
+TCA regression allows to test, under several types of statistical tests, the effects of source-specific values on an outcome of interest (or on mediating components thereof). For example, in the context of tissue-level bulk DNA methylation data coming from a mixture of cell types (i.e. the input is methylation sites by individuals), \code{tcareg} allows to test for cell-type-specific effects of methylation on outcomes of interest (or on mediating components thereof).
 }
 \details{
-TCA models \eqn{Z_{hj}^i} as the source-specific value of observation \eqn{i} in feature \eqn{j} coming from source \eqn{h} (see \link{tca} for more details). A TCA regression model tests an outcome \eqn{Y} for a linear statistical relation with the source-specific values of a feature \eqn{j} by assuming: \deqn{Y_i = \sum_{h=1}^k \beta_{hj} Z_{hj}^i + e_i} where \eqn{e_i \sim N(0,\phi^2)}. In practice, \code{tcareg} fits this model using the conditional distribution \eqn{Y|X}, which, effectively, integrates over the latent \eqn{Z_{hj}^i} parameters. Statistical significance is then calculated using a likelihood ratio test (LRT). Note that the null and alternative models will be set automatically, except when \code{test == 'custom'}, in which case they will be set according to the user-specified null and alternative hypotheses.
+TCA models \eqn{Z_{hj}^i} as the source-specific value of observation \eqn{i} in feature \eqn{j} coming from source \eqn{h} (see \link{tca} for more details). A TCA regression model tests an outcome \eqn{Y} for a linear statistical relation with the source-specific values of a feature \eqn{j} by assuming: \deqn{Y_i =  \alpha_{j,0} + \sum_{h=1}^k\beta_{hj} Z_{hj}^i + c_i^{(3)}\alpha_{j}  + e_i} where \eqn{\alpha_{j,0}} is an intercept term, \eqn{\beta_{hj}} is the effect of source \eqn{h}, \eqn{c_i^{(3)}} and \eqn{\alpha_j} correspond to the \eqn{p_3} covariate values of observation \eqn{i} (i.e. a row vector from \code{C3}) and their effect sizes, respectively, and \eqn{e_i \sim N(0,\phi^2)}. In practice, if \code{fast_mode == FALSE} then \code{tcareg} fits this model using the conditional distribution \eqn{Y|X}, which, effectively, integrates over the random \eqn{Z_{hj}^i}. Statistical significance is then calculated using a likelihood ratio test (LRT).
+Alternatively, in case \code{fast_mode == TRUE} the above model is fitted by first learning point estimates for \eqn{Z_{hj}^i} using the \link{tensor} function and then assessing statistical significance using T-tests and partial F-tests under a standard regression framework. This alternative provides a substantial boost in speed.
+
+Note that the null and alternative models will be set automatically, except when \code{test == 'custom'}, in which case they will be set according to the user-specified null and alternative hypotheses.
 
-Under the TCA regression model, several statistical tests can be performed by setting the argument \code{test} according to one of the following options.
+Under the TCA regression model, several statistical tests can be performed by setting the argument \code{test} according to one of the following options:
 
 1. If \code{test == 'marginal'}, \code{tcareg} will perform the following for each source \eqn{l}. For each feature \eqn{j}, \eqn{\beta_{lj}} will be estimated and tested for a non-zero effect, while assuming \eqn{\beta_{hj}=0} for all other sources \eqn{h\neq l}.
 
-2. If \code{test == 'marginal_conditional'}, \code{tcareg} will perform the following for each source \eqn{l}. For each feature \eqn{j}, \eqn{\beta_{lj}} will be estimated and tested for a non-zero effect, while also estimating the effect sizes \eqn{\beta_{hj}} for all other sources \eqn{h\neq l}.
+2. If \code{test == 'marginal_conditional'}, \code{tcareg} will perform the following for each source \eqn{l}. For each feature \eqn{j}, \eqn{\beta_{lj}} will be estimated and tested for a non-zero effect, while also estimating the effect sizes \eqn{\beta_{hj}} for all other sources \eqn{h\neq l} (thus accounting for covariances between the estimated effects of different sources).
 
 3. If \code{test == 'joint'}, \code{tcareg} will estimate for each feature \eqn{j} the effect sizes of all \eqn{k} sources \eqn{\beta_{1j},…,\beta_{kj}} and then test the set of \eqn{k} estimates of each feature \code{j} for a joint effect.
 
@@ -74,10 +93,6 @@ Under the TCA regression model, several statistical tests can be performed by se
 
 5. If \code{test == 'custom'}, \code{tcareg} will estimate for each feature \eqn{j} the effect sizes of a predefined set of sources (defined by a user-specified alternative model) and then test their estimates for a joint effect, while accounting for a nested predefined set of sources (defined by a user-specified null model).
 }
-\section{Note}{
- The function \code{tcareg} may require a long running time when the input matrix \code{X} is very large; to alleviate this, it is strongly advised to use the \code{parallel} argument, given that a multi-core machine is available.
-}
-
 \examples{
 n <- 50
 m <- 10
@@ -87,16 +102,19 @@ p2 <- 1
 data <- test_data(n, m, k, p1, p2, 0.01)
 tca.mdl <- tca(X = data$X, W = data$W, C1 = data$C1, C2 = data$C2)
 y <- matrix(rexp(n, rate=.1), ncol=1)
+rownames(y) <- rownames(data$W)
+# marginal conditional test:
+res0 <- tcareg(data$X, tca.mdl, y)
 # joint test:
-res1 <- tcareg(data$X, tca.mdl, y, test = "joint", save_results = FALSE)
+res1 <- tcareg(data$X, tca.mdl, y, test = "joint")
 # custom test, testing for a joint effect of sources 1,2 while accounting for source 3
 res2 <- tcareg(data$X, tca.mdl, y, test = "custom", null_model = c("3"),
-alternative_model = c("1","2","3"), save_results = FALSE)
+alternative_model = c("1","2","3"))
 # custom test, testing for a joint effect of sources 1,2 assuming no effects under the null
 res3 <- tcareg(data$X, tca.mdl, y, test = "custom", null_model = NULL,
-alternative_model = c("1","2"), save_results = FALSE)
+alternative_model = c("1","2"))
 
 }
 \references{
-Rahmani E, Schweiger R, Rhead B, Criswell LA, Barcellos LF, Eskin E, Rosset S, Sankararaman S, Halperin E. Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology. Nature Communications 2018.
+Rahmani E, Schweiger R, Rhead B, Criswell LA, Barcellos LF, Eskin E, Rosset S, Sankararaman S, Halperin E. Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology. Nature Communications 2019.
 }
diff --git a/man/tcasub.Rd b/man/tcasub.Rd
index 493b0a0..25d5686 100644
--- a/man/tcasub.Rd
+++ b/man/tcasub.Rd
@@ -4,17 +4,16 @@
 \alias{tcasub}
 \title{Subsetting features from a TCA model}
 \usage{
-tcasub(tca.mdl, features, log_file = "TCA.log", debug = FALSE,
-  verbose = TRUE)
+tcasub(tca.mdl, features, log_file = "TCA.log", debug = FALSE, verbose = TRUE)
 }
 \arguments{
 \item{tca.mdl}{The value returned by applying the function \code{tca} to some data matrix\code{X}.}
 
 \item{features}{A vector with the identifiers of the features to extract (as they appear in the rows of \code{X}).}
 
-\item{log_file}{A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == TRUE} then no output file will be generated regardless of the value of \code{log_file}.}
+\item{log_file}{A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == FALSE} then no output file will be generated regardless of the value of \code{log_file}.}
 
-\item{debug}{A logical value indicating whether to set the logger to a more detailed debug level; please set \code{debug} to \code{TRUE} before reporting issues.}
+\item{debug}{A logical value indicating whether to set the logger to a more detailed debug level; set \code{debug} to \code{TRUE} before reporting issues.}
 
 \item{verbose}{A logical value indicating whether to print logs.}
 }
@@ -26,6 +25,9 @@ A list with the estimated parameters of the model for the given set of features.
 \item{tau_hat}{Equals to \code{tca.mdl$tau_hat}}
 \item{gammas_hat}{A \code{q} by \code{k*p1}  matrix which is a subset of the matrix \code{tca.mdl$gammas_hat}, where \code{q} is the number of features in the argument \code{features}.}
 \item{deltas_hat}{A \code{q} by \code{p2} matrix which is a subset of the matrix \code{tca.mdl$deltas_hat}, where \code{q} is the number of features in the argument \code{features}.}
+\item{gammas_hat_pvals}{A \code{q} by \code{k*p1} matrix which is a subset of the matrix \code{tca.mdl$gammas_hat_pvals}, where \code{q} is the number of features in the argument \code{features}. Note that if \code{tca.mdl$gammas_hat_pvals == NULL} then \code{gammas_hat_pvals} is also set to NULL.}
+\item{gammas_hat_pvals.joint}{A \code{q} by \code{p1} matrix which is a subset of the matrix \code{tca.mdl$gammas_hat_pvals.joint}, where \code{q} is the number of features in the argument \code{features}. Note that if \code{tca.mdl$gammas_hat_pvals.joint == NULL} then \code{gammas_hat_pvals} is also set to NULL.}
+\item{deltas_hat_pvals}{A \code{q} by \code{p2} matrix which is a subset of the matrix \code{tca.mdl$deltas_hat_pvals}, where \code{q} is the number of features in the argument \code{features}. Note that if \code{tca.mdl$deltas_hat_pvals == NULL} then \code{deltas_hat_pvals} is also set to NULL.}
 }
 \description{
 Extracts from a fitted TCA model (i.e. a value returned by the function \code{tca}) a subset of the features.
@@ -38,7 +40,8 @@ data <- test_data(50, 20, 3, 0, 0, 0.01)
 tca.mdl <- tca(X = data$X, W = data$W)
 tca.mdl.subset <- tcasub(tca.mdl, rownames(data$X)[1:10])
 y <- matrix(rexp(50, rate=.1), ncol=1)
+rownames(y) <- rownames(data$W)
 # run tcareg test with an outcome y:
-res <- tcareg(data$X[1:10,], tca.mdl.subset, y, test = "joint", save_results = FALSE)
+res <- tcareg(data$X[1:10,], tca.mdl.subset, y, test = "joint")
 
 }
diff --git a/man/tensor.Rd b/man/tensor.Rd
index d869e02..c3f0d56 100644
--- a/man/tensor.Rd
+++ b/man/tensor.Rd
@@ -4,21 +4,31 @@
 \alias{tensor}
 \title{Extracting hidden 3D signals from 2D input}
 \usage{
-tensor(X, tca.mdl, parallel = FALSE, num_cores = NULL,
-  log_file = "TCA.log", debug = FALSE, verbose = TRUE)
+tensor(
+  X,
+  tca.mdl,
+  scale = FALSE,
+  parallel = FALSE,
+  num_cores = NULL,
+  log_file = "TCA.log",
+  debug = FALSE,
+  verbose = TRUE
+)
 }
 \arguments{
-\item{X}{An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} different sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.}
+\item{X}{An \code{m} by \code{n} matrix of measurements of \code{m} features for \code{n} observations. Each column in \code{X} is assumed to be a mixture of \code{k} sources. Note that \code{X} must include row names and column names and that NA values are currently not supported. \code{X} should not include features that are constant across all observations.}
 
 \item{tca.mdl}{The value returned by applying the function \code{tca} to \code{X}.}
 
+\item{scale}{A logical value indicating whether to divide the estimate of each entry in the tensor by its estimated standard deviation.}
+
 \item{parallel}{A logical value indicating whether to use parallel computing (possible when using a multi-core machine).}
 
 \item{num_cores}{A numeric value indicating the number of cores to use (activated only if \code{parallel == TRUE}). If \code{num_cores == NULL} then all available cores except for one will be used.}
 
-\item{log_file}{A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == TRUE} then no output file will be generated regardless of the value of \code{log_file}.}
+\item{log_file}{A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == FALSE} then no output file will be generated regardless of the value of \code{log_file}.}
 
-\item{debug}{A logical value indicating whether to set the logger to a more detailed debug level; please set \code{debug} to \code{TRUE} before reporting issues.}
+\item{debug}{A logical value indicating whether to set the logger to a more detailed debug level; set \code{debug} to \code{TRUE} before reporting issues.}
 
 \item{verbose}{A logical value indicating whether to print logs.}
 }
@@ -29,7 +39,7 @@ A list with the estimated source-specific values. The first element in the list
 Estimates 3-dimensional signals (features by observations by sources) from input of mixtures (features by observations), under the assumption of the TCA model that each observation is a mixture of unique source-specific values (in each feature in the data). For example, in the context of  tissue-level bulk DNA methylation data coming from a mixture of cell types (i.e. the input is methylation sites by individuals), \code{tensor} allows to estimate a tensor of cell-type-specific levels for each individual in each methylation site (i.e. a tensor of methylation sites by individuals by cell types).
 }
 \details{
-See \link{tca} for notations and details about the TCA model. Given estimates of the parameters of the model (given by \link{tca}), \code{tensor} uses the conditional distribution \eqn{Z_{hj}^i|X_{ji}} for estimating the \eqn{k} source-specific levels of each observation \eqn{i} in each feature \eqn{j}.
+See \link{tca} for notations and details about the TCA model. Given estimates of the parameters of the model (given by \link{tca}), \code{tensor} uses the conditional distribution \eqn{Z_{hj}^i|X_{ji}=x_{ji}} for estimating the \eqn{k} source-specific levels of each observation \eqn{i} in each feature \eqn{j}.
 }
 \examples{
 data <- test_data(50, 20, 3, 2, 2, 0.01)
@@ -38,5 +48,5 @@ Z_hat <- tensor(data$X, tca.mdl)
 
 }
 \references{
-Rahmani E, Schweiger R, Rhead B, Criswell LA, Barcellos LF, Eskin E, Rosset S, Sankararaman S, Halperin E. Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology. Nature Communications 2018.
+Rahmani E, Schweiger R, Rhead B, Criswell LA, Barcellos LF, Eskin E, Rosset S, Sankararaman S, Halperin E. Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology. Nature Communications 2019.
 }
diff --git a/man/test_data.Rd b/man/test_data.Rd
index f8c44d9..cf0ea7a 100644
--- a/man/test_data.Rd
+++ b/man/test_data.Rd
@@ -19,7 +19,7 @@ test_data(n, m, k, p1, p2, tau, log_file = "TCA.log", verbose = TRUE)
 
 \item{tau}{The variance of the i.i.d. component of variation to add on top of the simulated mixture values.}
 
-\item{log_file}{A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == TRUE} then no output file will be generated regardless of the value of \code{log_file}.}
+\item{log_file}{A path to an output log file. Note that if the file \code{log_file} already exists then logs will be appended to the end of the file. Set \code{log_file} to \code{NULL} to prevent output from being saved into a file; note that if \code{verbose == FALSE} then no output file will be generated regardless of the value of \code{log_file}.}
 
 \item{verbose}{A logical value indicating whether to print logs.}
 }
@@ -32,8 +32,8 @@ A list with the simulated data and parameters.
 \item{sigmas}{An \code{m} by \code{k} matrix of the standard variation of each of the \code{m} features for each of the \code{k} sources.}
 \item{C1}{ An \code{n} by \code{p1} design matrix of simulated covariates that affect the hidden source-specific values.}
 \item{C2}{ An \code{n} by \code{p2} design matrix of simulated covariates that affect the mixture.}
-\item{gammas}{An \code{m} by \code{k*p1} matrix of the effects of the \code{p1} factors in \code{C1} on each of the \code{m} features in \code{X}, where the first \code{p1} columns are the source-specific effects of the \code{p1} factors on the first source, the following \code{p1} columns are the source-specific effects on the second source and so on.}
-\item{deltas}{An \code{m} by \code{p2} matrix of the effects of the \code{p2} factors in \code{C2} on the mixture values of each of the \code{m} features in \code{X}.}
+\item{gammas}{An \code{m} by \code{k*p1} matrix of the effects of the \code{p1} covariates in \code{C1} on each of the \code{m} features in \code{X}, where the first \code{p1} columns are the source-specific effects of the \code{p1} covariates on the first source, the following \code{p1} columns are the source-specific effects on the second source and so on.}
+\item{deltas}{An \code{m} by \code{p2} matrix of the effects of the \code{p2} covariates in \code{C2} on the mixture values of each of the \code{m} features in \code{X}.}
 }
 \description{
 Generates simple test data following the TCA model.
diff --git a/manual.pdf b/manual.pdf
deleted file mode 100644
index 0a1d9ed..0000000
Binary files a/manual.pdf and /dev/null differ
diff --git a/tests/testthat/test_refactor.R b/tests/testthat/test_refactor.R
index dcd30ff..c138c12 100644
--- a/tests/testthat/test_refactor.R
+++ b/tests/testthat/test_refactor.R
@@ -12,8 +12,6 @@ test_that("Compare the result of refactor with those of the matlab version of re
   X <- as.matrix(read.table(paste(basedir,"X.txt",sep=""), header = FALSE, sep=","))
   rownames(X) <- 1:nrow(X)
   C <- as.matrix(read.table(paste(basedir,"C2.txt",sep=""), header = FALSE, sep=","))
-  # add intercept
-  C <- cbind(matrix(1,nrow(X),1),C)
   t <- 50
   k <- 3
   matlab.ref_comp <- as.matrix(read.table(paste(basedir,"refactor.R_est.txt",sep=""), header = FALSE, sep=","))
@@ -44,3 +42,22 @@ test_that("Compare the result of refactor with those of the matlab version of re
   }
 
 })
+
+
+test_that("Verify the C.remove argument", {
+
+  skip_on_cran()
+
+  n = 100; m = 50; k = 3; tau = 0.01; p1 = 0; p2 = 2;
+  res <- test_data(n, m, k, p1, p2, tau, log_file = NULL)
+  X <- res$X
+  C2 <- res$C2
+
+  ref1 <- refactor(X, k, sparsity = 10, C = C2, C.remove = TRUE, debug = TRUE, log_file = NULL)
+  expect_equal(sum(abs(sort(cor(ref1$scores,C2))) > 10e-14) == 0, TRUE)
+
+  ref2 <- refactor(X, k, sparsity = 10, C = C2, C.remove = FALSE, debug = TRUE, log_file = NULL)
+  expect_equal(sum(abs(sort(cor(ref2$scores,C2))) > 10e-14) > 0, TRUE)
+
+})
+
diff --git a/tests/testthat/test_tca.R b/tests/testthat/test_tca.R
index c899ea1..6018dbd 100644
--- a/tests/testthat/test_tca.R
+++ b/tests/testthat/test_tca.R
@@ -19,6 +19,14 @@ test_that("Comparison with the results given by the matlab version", {
   W <- as.matrix(read.table(paste(basedir,"W.txt",sep=""), header = FALSE, sep=","))
   C1 <- as.matrix(read.table(paste(basedir,"C1.txt",sep=""), header = FALSE, sep=","))
   C2 <- as.matrix(read.table(paste(basedir,"C2.txt",sep=""), header = FALSE, sep=","))
+  rownames(X) <- 1:nrow(X)
+  colnames(X) <- 1:ncol(X)
+  rownames(W) <- rownames(X)
+  colnames(W) <- 1:ncol(W)
+  rownames(C1) <- rownames(X)
+  colnames(C1) <- 1:ncol(C1)
+  rownames(C2) <- rownames(X)
+  colnames(C2) <- 1:ncol(C2)
 
   # load real parameters
   gammas <- as.matrix(read.table(paste(basedir,"gammas.txt",sep=""), header = FALSE, sep=","))
@@ -34,7 +42,7 @@ test_that("Comparison with the results given by the matlab version", {
   matlab.W_hat <- as.matrix(read.table(paste(basedir,"W_hat.txt",sep=""), header = FALSE, sep=","))
 
   # run tca
-  res <- tca(X = t(X), W = W, C1 = C1, C2 = C2, refit_W = TRUE, refit_W.sparsity = ncol(X), parallel = FALSE, log_file = NULL)
+  res <- tca(X = t(X), W = W, C1 = C1, C2 = C2, refit_W = TRUE, refit_W.sparsity = ncol(X), parallel = FALSE, log_file = NULL, constrain_mu = TRUE, vars.mle = TRUE)
 
   # compare parameter estimates
   for (h in 1:ncol(res$mus_hat)){
@@ -53,7 +61,9 @@ test_that("Comparison with the results given by the matlab version", {
 
   # use noisy initial estimates of W
   W_noisy <- as.matrix(read.table(paste(basedir,"W_noisy.txt",sep=""), header = FALSE, sep=","))
-  res2 <- tca(X = t(X), W = W_noisy, C1 = C1, C2 = C2, refit_W = TRUE, refit_W.sparsity = ncol(X), parallel = FALSE, log_file = NULL)
+  colnames(W_noisy) <- 1:ncol(W_noisy)
+  rownames(W_noisy) <- 1:nrow(W_noisy)
+  res2 <- tca(X = t(X), W = W_noisy, C1 = C1, C2 = C2, refit_W = TRUE, refit_W.sparsity = ncol(X), parallel = FALSE, log_file = NULL, constrain_mu = TRUE, vars.mle = TRUE)
   matlab.W_noisy_hat <- as.matrix(read.table(paste(basedir,"W_noisy_hat.txt",sep=""), header = FALSE, sep=","))
   for (h in 1:ncol(res2$W)){
     expect_equal(cor(res2$W[,h],matlab.W_noisy_hat[,h]) > 0.99, TRUE)
@@ -61,7 +71,7 @@ test_that("Comparison with the results given by the matlab version", {
 
 })
 
-
+#
 test_that("Comparison with the results given by the matlab version for tensor", {
 
   skip_on_cran()
@@ -69,6 +79,8 @@ test_that("Comparison with the results given by the matlab version for tensor",
   basedir <- "../assets/"
   # load data
   X <- as.matrix(read.table(paste(basedir,"X.txt",sep=""), header = FALSE, sep=","))
+  colnames(X) <- 1:ncol(X)
+  rownames(X) <- 1:nrow(X)
   tca.mdl <- list()
   tca.mdl[["W"]] <- as.matrix(read.table(paste(basedir,"W.txt",sep=""), header = FALSE, sep=","))
   tca.mdl[["mus_hat"]] <- t(as.matrix(read.table(paste(basedir,"mus.txt",sep=""), header = FALSE, sep=",")))
@@ -106,31 +118,31 @@ test_that("Comparison with the results given by the matlab version for tensor",
 
 
 
-run_ewas <- function(X, W, tca.mdl, basedir, yfile, test){
+run_ewas <- function(X, W, tca.mdl, basedir, yfile, test, fast_mode = FALSE){
+
   Y <- as.matrix(read.table(paste(basedir,yfile,sep=""), header = FALSE, sep=","))
+  rownames(Y) <- 1:nrow(Y)
   m <- ncol(Y)
   k <- ncol(W)
   tca.mdl2 <- tca.mdl
   pvals <- matrix(0,m,1)
-  if (test == "marginal" | test == "marginal_conditional" ) pvals <- matrix(0,m,k)
+  if (test == "marginal" | test == "marginal_conditional") pvals <- matrix(0,m,k)
   for (j in 1:m){
     tca.mdl2[["mus_hat"]] <- t(as.matrix(tca.mdl[["mus_hat"]][j,]))
     tca.mdl2[["sigmas_hat"]] <- t(as.matrix(tca.mdl[["sigmas_hat"]][j,]))
     tca.mdl2[["deltas_hat"]] <- t(as.matrix(tca.mdl[["deltas_hat"]][j,]))
     tca.mdl2[["gammas_hat"]] <- t(as.matrix(tca.mdl[["gammas_hat"]][j,]))
+    y <- Y[,j,drop=F]
+    x <- t(X[,j,drop=F])
+    rownames(y) <- 1:length(y)
     if (test == "custom"){
-      res <- tcareg(X = t(X[,j]), tca.mdl = tca.mdl2, y = as.matrix(Y[,j]), test = test, save_results = FALSE, null_model = c("V3"), alternative_model = c("V1","V2","V3"), log_file = NULL)
+      res <- tcareg(X = x, tca.mdl = tca.mdl2, y = y, test = test, save_results = FALSE, null_model = c("3"), alternative_model = c("1","2","3"), log_file = NULL, fast_mode = fast_mode, verbose = FALSE)
     }else{
-      res <- tcareg(X = t(X[,j]), tca.mdl = tca.mdl2, y = as.matrix(Y[,j]), test = test, save_results = FALSE, log_file = NULL)
-    }
-    if (test == "single_effect" | test == "custom" | test == "joint"){
-      pvals[j,] <- res$pvals[1]
-    }
-    if (test == "marginal" | test == "marginal_conditional"){
-      for (h in 1:k){
-        pvals[j,h] <- res[[h]]$pvals[1]
-      }
+      res <- tcareg(X = x, tca.mdl = tca.mdl2, y = y, test = test, save_results = FALSE, log_file = NULL, fast_mode = fast_mode, verbose = FALSE)
     }
+    if (test == "single_effect" | test == "custom" | test == "joint") pvals[j,] <- res$pval[1]
+    if (test == "marginal" | (fast_mode == FALSE & test == "marginal_conditional")) for (h in 1:k) pvals[j,h] <- res[[h]]$pval[1]
+    if (fast_mode == TRUE & test == "marginal_conditional") pvals[j,] <- res$pval
   }
   return(pvals)
 }
@@ -146,36 +158,60 @@ test_that("Evaluate tcareg with the results of the matlab version", {
   colnames(X) <- 1:ncol(X)
   rownames(X) <- 1:nrow(X)
   W <- as.matrix(read.table(paste(basedir,"TCA_methylation_exp2_sim1_k_6_n_500_W.txt",sep=""), header = FALSE, sep=","))
+  colnames(W) <- 1:ncol(W)
+  rownames(W) <- 1:nrow(W)
   m <- ncol(X)
   k <- ncol(W)
 
-  tca.mdl <- tca(X = t(X), W = W, refit_W = FALSE, parallel = FALSE, log_file = NULL)
+  tca.mdl <- tca(X = t(X), W = W, refit_W = FALSE, parallel = FALSE, log_file = NULL, constrain_mu = TRUE, vars.mle = TRUE)
 
   # (1) joint test
   # power
-  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_joint_effect_size_10_W_noise_level_0.txt", test="joint")
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_joint_effect_size_10_W_noise_level_0.txt", test="joint", fast_mode = FALSE)
+  expect_equal(sum(pvals < 0.05/m)/m > 0.5, TRUE)
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_joint_effect_size_10_W_noise_level_0.txt", test="joint", fast_mode = TRUE)
   expect_equal(sum(pvals < 0.05/m)/m > 0.5, TRUE)
   # fps rate
-  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_fps_effect_size_1_W_noise_level_0.txt", test="joint")
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_fps_effect_size_1_W_noise_level_0.txt", test="joint", fast_mode = FALSE)
+  expect_equal(sum(pvals < 0.05/m)/m < 0.05, TRUE)
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_fps_effect_size_1_W_noise_level_0.txt", test="joint", fast_mode = TRUE)
   expect_equal(sum(pvals < 0.05/m)/m < 0.05, TRUE)
 
   # (2) marginal test
   # power
-  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_marginal_ct_1_effect_size_5_W_noise_level_0.txt", test="marginal")
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_marginal_ct_1_effect_size_5_W_noise_level_0.txt", test="marginal", fast_mode = FALSE)
+  expect_equal(sum(pvals[,1] < 0.05/m)/m > 0.9, TRUE)
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_marginal_ct_1_effect_size_5_W_noise_level_0.txt", test="marginal", fast_mode = TRUE)
   expect_equal(sum(pvals[,1] < 0.05/m)/m > 0.9, TRUE)
   # fps rate
-  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_fps_effect_size_1_W_noise_level_0.txt", test="marginal")
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_fps_effect_size_1_W_noise_level_0.txt", test="marginal", fast_mode = FALSE)
+  for (h in 1:k){
+    expect_equal(sum(pvals[,h] < 0.05)/m < 0.1, TRUE)
+  }
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_fps_effect_size_1_W_noise_level_0.txt", test="marginal", fast_mode = TRUE)
   for (h in 1:k){
     expect_equal(sum(pvals[,h] < 0.05)/m < 0.1, TRUE)
   }
 
   # (3) conditional marginal test
-  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_marginal_ct_1_effect_size_5_W_noise_level_0.txt", test="marginal_conditional")
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_marginal_ct_1_effect_size_5_W_noise_level_0.txt", test="marginal_conditional", fast_mode = FALSE)
+  expect_equal(sum(pvals[,1] < 0.05)/m > 0.9, TRUE)
+  for (h in 2:k){
+    expect_equal(sum(pvals[,h] < 0.05)/m > 0.1, FALSE)
+  }
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_marginal_ct_1_effect_size_5_W_noise_level_0.txt", test="marginal_conditional", fast_mode = TRUE)
   expect_equal(sum(pvals[,1] < 0.05)/m > 0.9, TRUE)
   for (h in 2:k){
     expect_equal(sum(pvals[,h] < 0.05)/m > 0.1, FALSE)
   }
-  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_marginal_ct_3_effect_size_10_W_noise_level_0.txt", test="marginal_conditional")
+
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_marginal_ct_3_effect_size_10_W_noise_level_0.txt", test="marginal_conditional", fast_mode = FALSE)
+  l <- 3
+  expect_equal(sum(pvals[,l] < 0.05)/m > 0.1, TRUE)
+  for (h in setdiff(1:k,l)){
+    expect_equal(sum(pvals[,h] < 0.05)/m > 0.1, FALSE)
+  }
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_marginal_ct_3_effect_size_10_W_noise_level_0.txt", test="marginal_conditional", fast_mode = TRUE)
   l <- 3
   expect_equal(sum(pvals[,l] < 0.05)/m > 0.1, TRUE)
   for (h in setdiff(1:k,l)){
@@ -184,18 +220,96 @@ test_that("Evaluate tcareg with the results of the matlab version", {
 
   # (4) joint test - single beta
   # power
-  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_joint_single_beta_effect_size_10_W_noise_level_0.txt", test="single_effect")
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_joint_single_beta_effect_size_10_W_noise_level_0.txt", test="single_effect", fast_mode = FALSE)
+  expect_equal(sum(pvals < 0.05/m)/m > 0.8, TRUE)
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_joint_single_beta_effect_size_10_W_noise_level_0.txt", test="single_effect", fast_mode = TRUE)
   expect_equal(sum(pvals < 0.05/m)/m > 0.8, TRUE)
   # fps rate
-  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_fps_effect_size_1_W_noise_level_0.txt", test="single_effect")
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_fps_effect_size_1_W_noise_level_0.txt", test="single_effect", fast_mode = FALSE)
+  expect_equal(sum(pvals < 0.05/m)/m > 0.05, FALSE)
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_fps_effect_size_1_W_noise_level_0.txt", test="single_effect", fast_mode = TRUE)
   expect_equal(sum(pvals < 0.05/m)/m > 0.05, FALSE)
 
   # (5) custom test
   # power
-  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_joint_effect_size_10_W_noise_level_0.txt", test="custom")
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_joint_effect_size_10_W_noise_level_0.txt", test="custom", fast_mode = FALSE)
+  expect_equal(sum(pvals < 0.05/m)/m > 0.4, TRUE)
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_joint_effect_size_10_W_noise_level_0.txt", test="custom", fast_mode = TRUE)
   expect_equal(sum(pvals < 0.05/m)/m > 0.4, TRUE)
   # fps rate
-  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_fps_effect_size_1_W_noise_level_0.txt", test="custom")
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_fps_effect_size_1_W_noise_level_0.txt", test="custom", fast_mode = FALSE)
+  expect_equal(sum(pvals < 0.05/m)/m > 0.1, FALSE)
+  pvals <- run_ewas(X, W, tca.mdl, basedir, "TCA_methylation_exp2_sim1_k_6_n_500_Y_fps_effect_size_1_W_noise_level_0.txt", test="custom", fast_mode = TRUE)
   expect_equal(sum(pvals < 0.05/m)/m > 0.1, FALSE)
 
 })
+
+
+test_that("Verify that tcareg with save_results set to TRUE functions well ", {
+
+  skip_on_cran()
+
+  outfile.prefix <- "test_tca.tmp_file"
+  n <- 50
+  m <- 5
+  k <- 3
+  p1 <- 2
+  p2 <- 2
+  data <- test_data(n, m, k, p1, p2, 0.01)
+  tca.mdl <- tca(X = data$X, W = data$W, C1 = data$C1, C2 = data$C2, log_file = NULL)
+  y <- matrix(rexp(n, rate=.1), ncol=1)
+  C3 <- data$C2
+  colnames(C3) <- c("C1_1","C1_2")
+  rownames(y) <- rownames(data$W)
+
+  pass <- FALSE
+  result = tryCatch({
+    for (fast_mode in c(TRUE,FALSE)){
+      res <- tcareg(data$X, tca.mdl, y, C3 = C3, test = "joint", save_results = TRUE, fast_mode = fast_mode, output = outfile.prefix, log_file = NULL)
+      res <- tcareg(data$X, tca.mdl, y, C3 = C3, test = "marginal", save_results = TRUE, fast_mode = fast_mode, output = outfile.prefix, log_file = NULL)
+      res <- tcareg(data$X, tca.mdl, y, C3 = C3, test = "marginal_conditional", save_results = TRUE, fast_mode = fast_mode, output = outfile.prefix, log_file = NULL)
+      res <- tcareg(data$X, tca.mdl, y, C3 = C3, test = "single_effect", save_results = TRUE, fast_mode = fast_mode, output = outfile.prefix, log_file = NULL)
+      res <- tcareg(data$X, tca.mdl, y, C3 = C3, test = "custom", save_results = TRUE, null_model = c("3"), alternative_model = c("1","2","3"), fast_mode = fast_mode, output = outfile.prefix, log_file = NULL)
+    }
+    pass <- TRUE
+  }, finally = {
+    for (f in list.files(path=".", pattern=glob2rx(paste(outfile.prefix,"*",sep="")), full.names=TRUE)){
+      if (file.exists(f)) file.remove(f)
+    }
+  })
+  expect_equal(pass, TRUE)
+
+})
+
+
+
+test_that("Verify that tcareg with parallel set to TRUE functions well ", {
+
+  skip_on_cran()
+
+  n <- 50
+  m <- 5
+  k <- 3
+  p1 <- 2
+  p2 <- 2
+  data <- test_data(n, m, k, p1, p2, 0.01)
+  tca.mdl <- tca(X = data$X, W = data$W, C1 = data$C1, C2 = data$C2, log_file = NULL)
+  y <- matrix(rexp(n, rate=.1), ncol=1)
+  C3 <- data$C2
+  colnames(C3) <- c("C1_1","C1_2")
+  rownames(y) <- rownames(data$W)
+
+  pass <- FALSE
+  result = tryCatch({
+    for (fast_mode in c(TRUE,FALSE)){
+      res <- tcareg(data$X, tca.mdl, y, C3 = C3, test = "joint", save_results = FALSE, fast_mode = fast_mode, log_file = NULL, parallel = TRUE)
+      res <- tcareg(data$X, tca.mdl, y, C3 = C3, test = "marginal", save_results = FALSE, fast_mode = fast_mode, log_file = NULL, parallel = TRUE)
+      res <- tcareg(data$X, tca.mdl, y, C3 = C3, test = "marginal_conditional", save_results = FALSE, fast_mode = fast_mode, log_file = NULL, parallel = TRUE)
+      res <- tcareg(data$X, tca.mdl, y, C3 = C3, test = "single_effect", save_results = FALSE, fast_mode = fast_mode, log_file = NULL, parallel = TRUE)
+      res <- tcareg(data$X, tca.mdl, y, C3 = C3, test = "custom", save_results = FALSE, null_model = c("2","3"), alternative_model = c("1","2","3"), fast_mode = fast_mode, log_file = NULL, parallel = TRUE)
+    }
+    pass <- TRUE
+  })
+  expect_equal(pass, TRUE)
+
+})
diff --git a/tests/testthat/test_tca_fit.R b/tests/testthat/test_tca_fit.R
new file mode 100644
index 0000000..01cab55
--- /dev/null
+++ b/tests/testthat/test_tca_fit.R
@@ -0,0 +1,50 @@
+library("TCA")
+
+context("Test tca fit")
+
+test_that("Verify that p-values are not returned for gamma and delta of constrain_mu == TRUE", {
+
+  skip_on_cran()
+
+  n = 20; m = 30; k = 3; tau = 0.01; p1 = 2; p2 = 2;
+  res <- test_data(n, m, k, p1, p2, tau, log_file = NULL)
+
+  tca.mdl <- tca(X = res$X, W = res$W, C1 = res$C1, C2 = res$C2, refit_W = FALSE, parallel = FALSE, log_file = NULL, constrain_mu = TRUE)
+  print(tca.mdl$gammas_hat_pvals)
+  print(tca.mdl$deltas_hat_pvals)
+  expect_equal(is.null(tca.mdl$gammas_hat_pvals), TRUE)
+  expect_equal(is.null(tca.mdl$deltas_hat_pvals), TRUE)
+
+  tca.mdl <- tca(X = res$X, W = res$W, C1 = res$C1, C2 = res$C2, refit_W = FALSE, parallel = FALSE, log_file = NULL, constrain_mu = FALSE)
+  expect_equal(nrow(tca.mdl$gammas_hat_pvals) == m & ncol(tca.mdl$gammas_hat_pvals) == p1*k, TRUE)
+  expect_equal(nrow(tca.mdl$deltas_hat_pvals) == m & ncol(tca.mdl$deltas_hat_pvals) == p2, TRUE)
+
+})
+
+
+
+test_that("Verify that setting vars.mle to either TRUE or FALSE provides similar results", {
+
+  skip_on_cran()
+
+  n = 500; m = 30; k = 3; tau = 0.01; p1 = 2; p2 = 2;
+  res <- test_data(n, m, k, p1, p2, tau, log_file = NULL)
+
+  tca.mdl1 <- tca(X = res$X, W = res$W, C1 = res$C1, C2 = res$C2, refit_W = FALSE, parallel = FALSE, log_file = NULL, constrain_mu = TRUE, vars.mle = TRUE)
+  tca.mdl2 <- tca(X = res$X, W = res$W, C1 = res$C1, C2 = res$C2, refit_W = FALSE, parallel = FALSE, log_file = NULL, constrain_mu = TRUE, vars.mle = FALSE)
+
+  parameters <- c("mus_hat","sigmas_hat","deltas_hat","gammas_hat")
+  for (params in parameters){
+    expect_equal( cor(Reshape(tca.mdl1[[params]],ncol(tca.mdl1[[params]])*nrow(tca.mdl1[[params]])), Reshape(tca.mdl2[[params]],ncol(tca.mdl2[[params]])*nrow(tca.mdl2[[params]])) )[1] > 0.9, TRUE)
+  }
+
+  # do not constrain mu this time
+  tca.mdl1 <- tca(X = res$X, W = res$W, C1 = res$C1, C2 = res$C2, refit_W = FALSE, parallel = FALSE, log_file = NULL, constrain_mu = FALSE, vars.mle = TRUE)
+  tca.mdl2 <- tca(X = res$X, W = res$W, C1 = res$C1, C2 = res$C2, refit_W = FALSE, parallel = FALSE, log_file = NULL, constrain_mu = FALSE, vars.mle = FALSE)
+
+  for (params in parameters){
+    expect_equal( cor(Reshape(tca.mdl1[[params]],ncol(tca.mdl1[[params]])*nrow(tca.mdl1[[params]])), Reshape(tca.mdl2[[params]],ncol(tca.mdl2[[params]])*nrow(tca.mdl2[[params]])) )[1] > 0.9, TRUE)
+  }
+
+})
+
diff --git a/vignettes/figs/Exp1.fig1.png b/vignettes/figs/Exp1.fig1.png
new file mode 100644
index 0000000..cd1f7b0
Binary files /dev/null and b/vignettes/figs/Exp1.fig1.png differ
diff --git a/vignettes/figs/Exp1.fig2.png b/vignettes/figs/Exp1.fig2.png
new file mode 100644
index 0000000..432ae64
Binary files /dev/null and b/vignettes/figs/Exp1.fig2.png differ
diff --git a/vignettes/figs/Exp1.fig3.png b/vignettes/figs/Exp1.fig3.png
new file mode 100644
index 0000000..fccbb21
Binary files /dev/null and b/vignettes/figs/Exp1.fig3.png differ
diff --git a/vignettes/figs/Exp1.fig4.png b/vignettes/figs/Exp1.fig4.png
new file mode 100644
index 0000000..d12117c
Binary files /dev/null and b/vignettes/figs/Exp1.fig4.png differ
diff --git a/vignettes/figs/exp2.fig1.png b/vignettes/figs/exp2.fig1.png
new file mode 100644
index 0000000..0af4aaf
Binary files /dev/null and b/vignettes/figs/exp2.fig1.png differ
diff --git a/vignettes/figs/exp2.fig2.png b/vignettes/figs/exp2.fig2.png
new file mode 100644
index 0000000..5ddc8ca
Binary files /dev/null and b/vignettes/figs/exp2.fig2.png differ
diff --git a/vignettes/figs/exp2.fig3.png b/vignettes/figs/exp2.fig3.png
new file mode 100644
index 0000000..a37b6af
Binary files /dev/null and b/vignettes/figs/exp2.fig3.png differ
diff --git a/vignettes/figs/exp2.fig4.png b/vignettes/figs/exp2.fig4.png
new file mode 100644
index 0000000..0e85a64
Binary files /dev/null and b/vignettes/figs/exp2.fig4.png differ
diff --git a/vignettes/figs/fig1.png b/vignettes/figs/fig1.png
deleted file mode 100644
index 1fc8d85..0000000
Binary files a/vignettes/figs/fig1.png and /dev/null differ
diff --git a/vignettes/figs/fig2.png b/vignettes/figs/fig2.png
deleted file mode 100644
index d607fb0..0000000
Binary files a/vignettes/figs/fig2.png and /dev/null differ
diff --git a/vignettes/tca-vignette.Rmd b/vignettes/tca-vignette.Rmd
index bb11d4b..a0a0ca4 100644
--- a/vignettes/tca-vignette.Rmd
+++ b/vignettes/tca-vignette.Rmd
@@ -1,10 +1,14 @@
 ---
-title: "Cell-type-specific resolution epigenetics using Tensor Composition Analysis in R"
+title: "Detecting differential DNA methylation at cell-type resolution using TCA"
 author: "Elior Rahmani"
 date: "`r Sys.Date()`"
-output: rmarkdown::html_vignette
+output:
+  rmarkdown::html_vignette:
+    toc: true
+    toc_depth: 2
+    number_sections: true
 vignette: >
-  %\VignetteIndexEntry{Cell-type-specific resolution epigenetics using TCA}
+  %\VignetteIndexEntry{Detecting differential DNA methylation at cell-type resolution using TCA}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---
@@ -16,254 +20,578 @@ knitr::opts_chunk$set(
 )
 ```
 
+Tensor Composition Analysis (TCA) allows the deconvolution of two-dimensional data (features by observations) coming from a mixture of sources into a three-dimensional matrix of signals (features by observations by sources). The TCA framework further allows to test the features in the data for different statistical relations with an outcome of interest while modeling effects at the source level <sup>[1](#myfootnote1)</sup>.
 
-In the context of DNA methylation data, Tensor Composition Analysis (TCA) can extract cell-type-specific resolution (individuals by methylation sites by cell types) from tissue-level bulk methylation data (individuals by methylation sites)<sup>[1](#myfootnote1)</sup>. Particularly, TCA can be used for conducting powerful association studies with cell-type-specific resolution and it allows to estimate the cell-type-specific methylation levels of each individual (therefore, it conceptually emulates the scenario in which each individual in the bulk data has been profiled with a single-cell resolution and then signals were aggregated in each cell population of the individual separately).
+In the context of DNA methylation, TCA allows to model methylation signals at cell-type-specific resolution (individuals by methylation sites by cell types) from tissue-level bulk data (individuals by methylation sites)<sup>[1](#myfootnote1)</sup>.
+This is done by explicitly modeling the cell-type level variation in methylation across individuals; in contrast, previous methods for modeling tissue heterogeneity in genomics do not model the intrinsic variability within individuals at the cell-type level.
 
-In this tutorial, we will analyze whole-blood methylation data from the Liu et al. study with rheumatoid arthritis (GEO accession ID GSE42861)<sup>[2](#myfootnote2)</sup>. We will apply TCA for detecting a known cell-type-specific association of RA with methylation in CD4 T-cells, and then we will estimate and inspect the CD4-specific methylation level of each individual in the associated site. Note that this tutorial does not include a formal definition and description of the TCA model; those can be found in the manual of the R package and in the TCA paper<sup>[1](#myfootnote1)</sup>.
+This tutorial demonstrates how to utilize TCA for a powerful detection of differential methylation at the cell-type level through two detailed step-by-step examples: (1) Detecting CD4-specific differential methylation with age in whole-blood data, and (2) detecting granulocyte-specific differential methylation with infant arousal in cord-blood data. These examples are preceded by guidelines for setting up the analysis with TCA, and followed by additional practical considerations for the application of TCA.
 
-## Data and setup
+# Tensor Composition Analysis: getting started 
 
-Our analysis is going to include the following five steps:
+This tutorial requires R (>= 3.5.0) and the package TCA (>= 1.2.1), which is available on CRAN and can be installed from within R as follows:
+```r 
+install.packages("TCA")
+```
+
+There are three key functions in the TCA package:
+
+* `tca` - the main function, used for fitting the TCA model and further allows to statistically test the effects of covariates and phenotypes on methylation at the cell-type level.
+* `tcareg` - given a TCA model (i.e. the output of `tca`), allows to statistically test for cell-type level effects of methylation on a phenotype of interest.
+* `tensor` - given a TCA model (i.e. the output of `tca`), can be used for estimating a three-dimensional tensor of cell-type-specific methylation (individuals by methylation sites by cell types) from tissue-level data (individuals by sites).
+
+Note that both `tca` and `tcareg` can be used for statistical testing, yet, each of them makes different assumptions about the statistical relation between the methylation and the phenotype of interest (more on that to come later).
+
+In the next sections, we describe the main components of these functions and discuss the key aspects in applying TCA for differential methylation analysis. For more details about the TCA package, including a full description of the arguments and output of each function, refer to the [package manual](https://cran.rstudio.com/web/packages/TCA/TCA.pdf).
+For further information about the TCA framework refer to the manuscript describing it <sup>[1](#myfootnote1)</sup>.
+
+# Preparing the input
+
+Fitting the TCA model using the function `tca` requires two arguments:
+
+* `X` - a tissue-level matrix of methylation values (methylation sites by individuals)
+* `W` - cell-type proportions for the individuals in `X`
+
+For practical reasons, cell count measurements are typically not collected in genomic studies.
+However, cell-type proportions can be computationally estimated. Particularly, a reference-based approach can be used in cases where appropriate reference is available (methylation from sorted cells composing the same tissue type under study) <sup>[2](#myfootnote2)</sup>, and otherwise, a semi-supervised reference-free approach can be used <sup>[3](#myfootnote3)</sup>. Notably, existing reference-free methods for estimating cell-type proportions were shown to be non-identifiable and are therefore not recommended in this case <sup>[3](#myfootnote3)</sup>.
+
+In cases where estimates are either known or expected to be of low quality, as we later demonstrate,  we can re-estimate them under the TCA model using the `refit_W` argument. For more information about this option refer to the [package manual](https://cran.rstudio.com/web/packages/TCA/TCA.pdf).
+
+In addition to the two required arguments above, the function `tca` can (and should, when appropriate) take into account covariates. There are two arguments that were designed to handle two types of covariates:
+
+* `C1` - for covariates that may affect methylation at the cell-type level (e.g., age and gender).
+* `C2` - for covariates that are not expected to affect methylation at the cell-type level but rather may affect tissue-level methylation, regardless of the cell-type composition and the methylation in the different cell types (e.g., batch information and factors that capture technical variation).
+
+Of note, each Covariate included in `C1` results in k additional parameters in the TCA model (per each site; k being the number of cell types). Therefore, for relatively small datasets, it is advised to be cautious with the number of covariates included in `C1` in order to avoid model overfitting and, as a result, bad performance.
+
+# Setting model assumptions and a statistical test for analysis
+
+## Model directionality 
+
+We model phenotypes as related to methylation by first making an assumption about the directionality of the underlying statistical relation. That is, we may assume that a phenotype of interest is either *affecting* methylation levels (or a mediating component therefore) or *affected* by methylation levels (or by a component that is mediated or statistically captured by methylation). The function `tca` allows to perform statistical testing under the first assumption (denote X|Y), and the function `tcareg` allows statistical testing under the second assumption (denote Y|X).
 
-1. Estimating the cell-type proportions of the individuals in the data (will be required by TCA as an input)
-1. Extracting a subset of the methylation sites from the data (for speeding up the runtime of this tutorial)
-1. Fitting the TCA model
-1. Testing rheumatoid arthritis (RA) status for cell-type-specific associations with methylation
-1. Estimating cell-type-specific methylation levels
+The decision of which of the two assumptions to take should be context- and phenotype-dependent.
+An an example, when looking for differential methylation with demographic factors such as age, it makes no sense to assume Y|X, as age cannot be altered by methylation. In contrary, if aiming at the detection of differential methylation that mediates causal components of a phenotype then one should assume Y|X.
+For more information on this see the final section in this vignette.
 
-You will first need to download the following [data files](https://www.dropbox.com/sh/xtlukhjzdhfhesc/AADBFX7atNPlf3hIIliZjo8ka?dl=0):
+## Statistical testing
 
-* GSE42861_processed.txt - processed methylation data from the Liu et al. study (219,096 methylation sites by 658 individuals)
-* GSE42861_processed.subset.cpg_identifiers.txt - a list of identifiers for a subset of 500 methylation sites (to be used for subsetting the full data file)
-* GSE42861_processed.covariates.txt - known covariates for the individuals in the data
-* GSE42861_processed.control_probes_pcs.txt - estimates of the technical variation in the data (see the TCA paper for more details<sup>[1](#myfootnote1)</sup>)
+Given either the X|Y or the Y|X assumption, the TCA framework allows us to run several different types of statistical tests, each of which can test a different hypothesis about the statistical relation between methylation and the phenotype of interest. Under the Y|X assumption, the `tcareg` function allows the following statistical tests for each methylation site in the data (by setting the `test` argument in `tcareg` according to these options):
 
-In the first two steps of the analysis, we will be using [glint](https://glint-epigenetics.readthedocs.io), a user-friendly command-line tool for fast analysis of genome-wide DNA methylation data<sup>[3](#myfootnote3)</sup>. The latest release of glint can be found [here](https://github.com/cozygene/glint/releases). glint will allow us to estimate cell-type proportions, which we will then provide to the R code as an input. In order to speed up the total runtime of this tutorial, we will also use glint for extracting only a small subset of the methylation sites from the full data matrix.
+* `"marginal_conditional"` - fits a model for all cell types jointly and tests the effect of each cell type separately for its statistical significance.
+* `"marginal"` - fits a model for each cell type separately and tests its effect for statistical significance.
+* `"joint"` - fits a model for all cell types jointly and tests the combined effect across all cell types for its statistical significance (i.e. a test at the CpG-level).
+* `"single_effect"` - same as `"joint"`, only under the assumption that all cell types have the exact same effect.
+* `"custom"` - compares and tests any two nested models, each representing a subset of the cell types, and tests for the statistical significance of the combined effect across all cell types in the larger model that are not in the smaller model.
 
-If you wish, you may skip the first two steps of the analysis, in which case you may ignore the full data file for now ('GSE42861_processed.txt') and download the results of the first two steps (from [here](https://www.dropbox.com/sh/xtlukhjzdhfhesc/AADBFX7atNPlf3hIIliZjo8ka?dl=0) under the directory 'glint_output'):
+Under the X|Y assumption, the `tca` function allows either a marginal conditional or a joint test; these are similar to the analogous ones under the Y|X assumption above, only with the change of the model direction.
 
-* 'GSE42861_processed.houseman_estimates.txt' - estimates of the cell-type proportions (658 individuals, 7 cell types)
-* 'GSE42861_processed.subset.txt' - a subset of the processed methylation data (500 methylation sites by 658 individuals)
+Importantly, the joint tests were designed to detect differential methylation in specific sites and not in specific cell types (i.e. CpG-level associations). Since these tests aggregate signals coming from all cell types in the tissue, they tend to be more effective for the detection of CpG-level associations when compared to the marginal conditional and marginal tests, which are cell-type level tests (conceptually similar to a T-test and an F-test in linear regression: the former considers the statistical evidence coming from one particular independent variable only and the latter tests for the combined statistical evidence coming from all independent variables together).
 
-Finally, before starting the analysis, make sure to install the TCA R package:
+In general, it is advised to take a two-step approach of first running a joint test for detecting associated sites, and second, performing a post-hoc analysis on the associated sites using a cell-type level test for detecting differential methylation in specific cell types.
+That said, in some cases, the two-step approach may not be the most powerful option; for more information and further practical considerations about the statistical tests available in TCA see the final section in this vignette.
+
+# Example 1: detecting CD4-specific differential methylation with age
+
+## Overview
+
+In this example, we will use TCA to detect differential methylation with age in CD4 cells.
+For that, we will use the whole-blood methylation data by Hannum et al. (n=656) <sup>[4](#myfootnote4)</sup> as the discovery data and methylation data collected from sorted CD4 cells by Liu et al. (n=214) <sup>[5](#myfootnote5)</sup> as the validation data.
+Since we expect age to affect methylation (and not vice versa), we will be working under the assumption X|Y (i.e. age affects methylation levels).
+
+To allow a quick execution of the code below, we will restrict our analysis to test only the methylation sites on chromosome 22. The total runtime of this example is around 5 minutes on a regular laptop.
+
+## Prerequisites and obtaining data
+
+The code below requires several packages: TCA, ggplot2, ggpubr, pracma, and matrixStats, which can all be installed as follows:
 ```r 
-install.packages("TCA")
+install.packages(c("TCA","ggplot2","ggpubr","pracma","matrixStats"))
 ```
 
-## Step 1: Estimating cell-type proportions
+We will be using two .RData files that can be obtained as follows:
+```r 
+data_path <- "./" # where files will be stored
+# Download data files
+download.file("github.com/cozygene/TCA/blob/master/vignettes/hannum.chr22.RData?raw=true",
+  paste(data_path,"hannum.chr22.RData",sep=""))
+download.file("github.com/cozygene/TCA/blob/master/vignettes/liu.cd4.chr22.RData?raw=true",
+  paste(data_path,"liu.cd4.chr22.RData",sep=""))
+```
 
-TCA requires measurements or estimates of the cell-type proportions of the individuals in the data. As we later note, TCA allows to re-estimate the cell-type proportions; this can be particularly useful in cases where we can obtain only very noisy estimates of the cell-type proportions.
+In addition, we will be sourcing the following .R file, which we will use for visualization of results.
+```r 
+source("https://raw.githubusercontent.com/cozygene/TCA/master/vignettes/vignette_analysis.R")
+```
+
+The .RData files include processed data to be used in the following subsections. To recreate these files from the original publicly available data files refer to the function `prep_data` in the [vignette_analysis.R](https://raw.githubusercontent.com/cozygene/TCA/master/vignettes/vignette_analysis.R) file. Note that this function requires three additional packages: GEOquery, data.table, and EpiDISH.
+
+## Data description
 
-Here, we use glint for estimating leukocyte proportions for each of the individuals in the whole-blood data following the Houseman et al. reference-based algorithm<sup>[4](#myfootnote4)</sup>. We run the following in the terminal / command line:
+We start by loading the two data files:
+
+```r 
+load("hannum.chr22.RData")
+load("liu.cd4.chr22.RData")
 ```
-python glint.py --datafile GSE42861_processed.txt --houseman --out GSE42861_processed
+
+This loads into the R environment two lists: `hannum` and `liu.cd4`. The `hannum` list has the following fields in it:
+
+* `X` - the whole-blood methylation matrix of sites by individuals (chromosome 22 only)
+* `W` - reference-based estimates of the proportions of 6 blood cell types for the individuals in the data (granulocytes, monocytes, CD4, CD8, NK, and B cells); these were estimated using EpiDISH <sup>[6](#myfootnote6)</sup>
+* `cov` - covariates for the individuals in the data
+
+The `cov` matrix includes a total of 20 variables: age, gender, 8 indicator variables for plate assignment, and 10 components that are expected to capture technical variation in the data. These components were constructed by calculating the first 10 principal components from a set of 1000 control probes that are not expected to capture biological variation but only technical variability in an approach similar to the one suggested by Lehne et al. <sup>[7](#myfootnote7)</sup>. Since we did not use the raw IDAT files, we considered 1000 sites with the lowest variance in the data as control probes.
+
+The second list, `liu.cd4`, contains the fields `X` and `cov` for the CD4-specific methylation data and the covariates matrix, respectively, similarly to the structure in the `hannum` list.
+The `cov` matrix includes a total of 11 variables: age, and 10 components that are expected to capture technical variation in the data (calculated the same way as for the Hannum data).
+Note that unlike the `hannum` list, `liu.cd4` does not have a field `W` since these data were collected from sorted CD4 cells.
+
+## Applying TCA to whole-blood data
+
+We use the `tca` function to fit the TCA model to the Hannum discovery data as follows:
+
+```r 
+tca.mdl.hannum <- tca(X = hannum$X,
+                      W = hannum$W,
+                      C1 = hannum$cov[,c("gender","age")],
+                      C2 = hannum$cov[,3:ncol(hannum$cov)])
 ```
-This results in a file named 'GSE42861_processed.houseman_estimates.txt', which includes the cell-type proportion estimates.
 
+Here, we use gender and age as covariates at the cell-type level (argument `C1`), and plate information and the components of technical variability as global covariates (argument `C2`).
+Including age as a covariate in `C1` allows us to extract p-values for the effects of age on methylation at the cell-type level under two statistical tests - a marginal conditional test and a joint test (under the assumption X|Y).
+
+As discussed earlier, we will consider a two-step approach in our analysis: a joint test for detecting associated sites, followed by a post-hoc analysis of these sites for the detection of differentially methylated cell types (using marginal conditional tests). In order to appreciate the difference between this two-step approach and a more straightforward approach of applying a marginal conditional test by itself, we start by plotting the distribution of p-values that are given by a joint test and those given by a marginal conditional test (considering only CD4 cells in this case).
+
+```r
+# Extract CpG-level p-values for the association with age, under a joint test
+tca.mdl.hannum.pvals.joint <- tca.mdl.hannum$gammas_hat_pvals.joint[,"age"]
+# Extract p-values for each cell type for its association with age, under a marginal conditional test
+tca.mdl.hannum.pvals.marg_cond <- 
+  tca.mdl.hannum$gammas_hat_pvals[,paste(colnames(hannum$W),".age",sep="")]  
+# qq-plots - for the p-values of the joint test, and for the marginal conditional p-values for CD4 cells
+plot_qq(list(tca.mdl.hannum.pvals.joint, tca.mdl.hannum.pvals.marg_cond[,"CD4T.age"]),
+        labels = c("Joint test with age", "CD4 marginal conditional test with age"),
+        ggarrange.nrow = 1,
+        ggarrange.ncol = 2,
+        experiment_wide_line = FALSE)
+```
+
+This results in the following figure:
+
+![**Association results for chromosome 22 in the Hannum data. Results are presented by quantile-quantile plots of the (negative-log transformed) p-values for CpG-level associations under a joint test (left) and for the associations with CD4 cells under a marginal conditional test (right). The dashed red horizontal line represents experiment-wide significance, and significant global deviation from the y=x line indicates an inflation arising from a badly specified model.**](figs/exp1.fig1.png) 
+
+These results show that the marginal conditional test for differential methylation in CD4 cells yield no experiment-wide significant associations. In contrast, the joint test, which tests for CpG-level differential methylation, presents strong signals. While we do not know at this point which cell types are driving those signals, we will soon see that some of the signals can be attributed to CD4 cells.
+
+Taking a closer look at the p-value distribution under the joint test, we observe a substantial inflation in results (inflation under a negative-log scale, which corresponds to deflation in p-values). In fact, age is known to be a factor that affects many methylation sites <sup>[8](#myfootnote8)</sup>, however, we should always be careful with inflated distributions as the one we observe here. Particularly, age is known to be correlated with cellular composition in blood <sup>[9](#myfootnote9)</sup>. 
+
+While TCA account for cell-type composition by accounting for the estimates of the cell-type proportions (i.e. `W`), there may still be some additional cell composition variation in the data that can further increase the inflation in results. In such cases, we can account for estimates of reference-free methods such as ReFACTor <sup>[10](#myfootnote10)</sup>, which can in principle be used for capturing variation that is not captured by the 6 cell types that were estimated by the reference-based. Indeed, ReFACTor was shown to improve correction over a reference-based approach in multiple whole-blood datasets <sup>[11](#myfootnote11)</sup>, presumably given its ability to capture additional variability on top of the specific set of predefined cell types. In the next subsection, we will apply ReFACTor and show that it can reduce some of the inflation.
+
+## Addressing the inflation using ReFACTor
+
+ReFACTor employs a column-sparse principal component analysis, which results in a set of principal components (ReFACTor components) that were shown to capture cellular composition variation in methylation data <sup>[10](#myfootnote10)</sup>. We next estimate ReFACTor components for the Hannum data and add them as additional covariates in the analysis in attempt to address the inflation we observe under the joint test.
 
+We run ReFACTor using the `refactor` function in the TCA package, while following the guidelines that are provided [here](https://glint-epigenetics.readthedocs.io/en/latest/tissueheterogeneity.html#refactor) <sup>[12](#myfootnote12)</sup>. Specifically, prior the application of ReFACTor, it is important to exclude from the data sites that introduces unwanted variation. This includes polymorphic sites, cross-reactive sites and sites from the sex chromosomes <sup>[13](#myfootnote13)</sup> <sup>[14](#myfootnote14)</sup>.
+Those sites have already been excluded from the data used in this tutorial as part of the data preparation.
 
-#### Notes
+Another important consideration in the application of ReFACTor is accounting for covariates that may have a dominant effect on the data. These primarily include factors that capture technical variability such as batch information. Of note, biological factors that are correlated with cell-type composition, such as age, should not be included as covariates in ReFACTor.
 
-The Houseman et al. algorithm requires methylation reference for the studied tissue (i.e. methylation collected from sorted cells of the cell types composing the same tissue type under study). While glint allows to specify methylation reference for tissues other than blood (see glint's [documentation](https://glint-epigenetics.readthedocs.io/en/latest/tissueheterogeneity.html#houseman)), such reference will not always be available. In the absence of reference data, a [semi-supervised approach](https://github.com/cozygene/BayesCCE) for estimating cell-type proportions can be taken; note that other reference-free methods currently do not provide cell-type proportion estimates but rather only linear combinations of the cell-type proportions<sup>[5](#myfootnote5)</sup>.
+We apply ReFACTor to the Hannum data while considering the plate covariates and the technical variation components.
+Note that here we use only data from chromosome 22, however, in general, the full data should be used (except for the features that should be excluded as mentioned above). We set the parameter dimension `k` to 6 (the same as the number of cell-types we use in our analysis).
 
-## Step 2: Subsetting the data
+```r 
+refactor.mdl.hannum <- refactor(X = hannum$X,
+                                k = 6,
+                                C = hannum$cov[,3:ncol(hannum$cov)])
+```
 
-In order to speed up the runtime of the following steps in this tutorial, we will extract and analyze only a small subset of the methylation sites in the data.
-Specifically, we will consider only the 500 sites listed in the file named 'GSE42861_processed.subset.cpg_identifiers.txt' - 499 of these sites were arbitrarily selected, and one more site was included by design (cg07839457). The latter was verified in the TCA paper to demonstrate a CD4-specific association with RA using sorted CD4 cells and will be used for validating our analysis.
+We can now rerun TCA, this time while accounting for the ReFACTor components as additional covariates in `C2` (i.e. as global covariates), and then evaluate the effect on the distribution of p-values.
 
-In the terminal / command line run glint for subsetting the full data as follows:
+```r 
+tca.mdl.hannum.2 <- tca(X = hannum$X,
+                        W = hannum$W,
+                        C1 = hannum$cov[,c("gender","age")],
+                        C2 = cbind(hannum$cov[,3:ncol(hannum$cov)],refactor.mdl.hannum$scores))
+# Extract the updated p-values of a joint test
+tca.mdl.hannum.2.pvals.joint <- tca.mdl.hannum.2$gammas_hat_pvals.joint[,"age"]
+# Extract the updated marginal conditional p-values
+tca.mdl.hannum.2.pvals.marg_cond <-
+  tca.mdl.hannum.2$gammas_hat_pvals[,paste(colnames(hannum$W),".age",sep="")]
+# qq-plots - for the new p-values of the joint test, and for the new p-values in CD4, under a marginal conditional test
+plot_qq(list(tca.mdl.hannum.2.pvals.joint, tca.mdl.hannum.2.pvals.marg_cond[,"CD4T.age"]),
+        labels = c("Joint test with age", "CD4 marginal conditional test with age"),
+        ggarrange.nrow = 1,
+        ggarrange.ncol = 2,
+        experiment_wide_line = FALSE)
 ```
-python glint.py --datafile GSE42861_processed.txt --include GSE42861_processed.subset.cpg_identifiers.txt --out GSE42861_processed.subset --txtsave
+
+![**Association results for chromosome 22 in the Hannum data, while accounting for the ReFACTor components. Results are presented by quantile-quantile plots of the (negative-log transformed) p-values for CpG-level associations under a joint test (left) and for the associations with CD4 cells under a marginal conditional test (right). The dashed red horizontal line represents expriment-wide significance, and significant global deviation from the y=x line indicates an inflation arising from a badly specified model.**](figs/exp1.fig2.png)
+
+We can see a reduction in the inflation we previously observed. Interestingly, we now also observe two experiment-wide significant hits that were not detected earlier under the marginal conditional test, providing further evidence that correction for additional cell composition variation was justified.
+
+We next look into the CpG-level hits found in the joint test, and move on to the second step of our association testing, wherein we consider the marginal conditional test that allows us to attribute the CpG-level hits to specific cell types.
+
+## Step 2: calling differentially methylated cell types
+
+Following the detection of CpG-level hits using the joint test, we now wish to understand what are the differentially methylated cell types that drive these associations. This can essentially be done by utilizing the cell-type level statistical tests allowed by TCA (i.e. marginal conditional or marginal tests), which can provide statistical evidence for the role of each cell type in each of the CpG-level hits.
+
+There is more than just one way to perform this second step. Here, for the purpose of this tutorial, and since we are interested in differential methylation with age in CD4 cells, we focus on hits that demonstrate the strongest statistical evidence for differential methylation in CD4 cells. In other words, we run for each of the CpG-level hits a cell-type level test (a marginal conditional test in our case), which yields for each site a p-value per cell type, and we focus on CpGs for which the p-value for CD4 cells is the lowest among all other cell types.
+
+```r 
+# Extract the hits based on the joint test
+hits.joint <- names(which(tca.mdl.hannum.2.pvals.joint < 0.05/nrow(hannum$X)))
+# Extract the hits from hits.joint where CD4 demonstrates the lowest p-value across all cell types
+cd4.hits <- names(which(tca.mdl.hannum.2.pvals.marg_cond[hits.joint,"CD4T.age"] ==
+  rowMins(tca.mdl.hannum.2.pvals.marg_cond[hits.joint,])))
 ```
-This results in a file named 'GSE42861_processed.subset.txt', which includes the 500 pre-selected sites for the 658 individuals in the study.
-Note that glint will also output two additional files ('GSE42861_processed.subset.samples.txt' and 'GSE42861_processed.subset.sites.txt') - these can be ignored in our analysis.
 
-## Step 3: Fitting the TCA model
+We end up with four CpGs that we predict to have CD4-specific differential methylation: cg01234420, cg09978533, cg23564243, and cg27187881 (saved in `cd4.hits`). In the next two subsections we will replicate them in independent data with sorted CD4 methylation and further evaluate them.
 
-At this point we have obtained estimates of the cell-type proportions ('GSE42861_processed.houseman_estimates.txt') and a subset of the original data file ('GSE42861_processed.subset.txt'). We next fit the TCA model to the subset of the data. Fitting the TCA model essentially means learning the parameters of the TCA model, which will then allow us to both perform association testing (using the function 'tcareg') and estimate cell-type-specific methylation levels (using the function 'tensor').
+When applying such a two-step approach, we should keep in mind is that we may want to take a second step that controls for a desired level of false positives rate.
+Put differently, while we address multiple testing in our first step (here we used a Bonferroni correction), one may want to design the post-hoc analysis in the second step to account for multiple testing.
+While the CpG-level associations are expected to still be valid under the same false positives rate we define in the first step, regardless of our second step analysis, addressing multiple testing in the second step will allow more control over the precision to detect truly methylated cell types in the CpG-level hits. Alternatively, one may also opt to control for a combined false positive rate across the two steps.
 
-We start by loading into R the TCA package, the data matrix (X), and the cell-type proportion estimates (W):
+At this point, we can already start to evaluate the benefit of the two-step approach and compare it with the more straightforward single-step marginal conditional approach.
+Specifically, we can look at the number of hits we got using the joint test and the number of hits that we would have gotten if we were to use a single-step marginal conditional test.
+Since the joint test considers CpG-level tests, the number of hypotheses to correct for is the number of sites in the data. The marginal conditional test, on the other hand, considers the same number of hypotheses for each cell type that we would like to evaluate.
+In this example, we are only interested in CD4 cells, however, in general, we would typically be interest in detecting differential methylation in any of the cell types, which would mean correcting for k times the number of hypotheses considered by the joint test (k being the number of cell types - for example, six in our analysis).
 
-```r
-library(TCA)
-library("data.table")
-X <- data.frame(fread(file = "GSE42861_processed.subset.txt"), row.names=1)
-W <- data.frame(fread(file = "GSE42861_processed.houseman_estimates.txt"), row.names=1)
+```r 
+sprintf("Detected %d associations using a joint test, %d associations using a marginal conditional test, and %d associations in CD4 cells using a marginal conditional test.",
+        sum(tca.mdl.hannum.2.pvals.joint <= 0.05/nrow(hannum$X)),
+        sum(tca.mdl.hannum.2.pvals.marg_cond <= 0.05/(nrow(hannum$X)*nrow(hannum$W))),
+        sum(tca.mdl.hannum.2.pvals.marg_cond[,"CD4T.age"] <=
+          0.05/(nrow(hannum$X)*nrow(hannum$W))))
 ```
 
-The analysis of DNA methylation and the interpretation of results may be misleading in the presence of factors that are correlated with changes in methylation. For that reason, TCA allows to specify design variables of the experiment (covariates) that will be accounted for.
+This shows that we get a total of 34 hits under the joint test and a total of 2 hits under the marginal conditional test (none of which in CD4 cells), which reveal an increased sensitivity  for the two-step approach.
+In principle, the CpG-level hits reported by the joint test may not represent true associations, however, our replication analysis in the next subsection provides evidence for the reliability of these results.
 
-TCA allows to account for covariates that may affect methylation biologically at a cell-type-specific level (such as age<sup>[6](#myfootnote7)</sup>) using the argument C1. In addition, TCA allows to consider covariates that may affect the observed mixtures rather than the cell-type-specific methylation levels (such as batch information) using the argument C2.
+Of note, the joint test may not always be the most powerful option. For more information see the last section in this vignette.
 
-<!--In our case, we will set the covaraites in the file named 'GSE42861_processed.covariates.txt' (age, sex, and smoking status) as the 'C1' covariates, and we will set the covariates in 'GSE42861_processed.control_probes_pcs.txt' (10 components that reflect technical variation) as the 'C2' covariates:
--->
-In our case, we will use age, sex, and smoking status as the C1 covariates, and batch information and 10 components that capture technical variation in the data as the C2 covariates:
+## Replication analysis in independent data with sorted CD4 cells
 
-```r
-covars <- data.frame(
-  fread(file = "GSE42861_processed.covariates.txt"), row.names=1)
-tech_var <- data.frame(
-  fread(file = "GSE42861_processed.control_probes_pcs.txt"),
-  row.names=1 )	# principal components calculated from control probes
-C1 <- covars[,2:4]	# sex, age, and smoking status
-C2 <- cbind(covars[,1],tech_var[,1:10])	# batch information and technical variation
+Validating the four hits we found in the Hannum data using the Liu et al. sorted CD4 data <sup>[4](#myfootnote4)</sup> is simple - since the data were collected from CD4 methylation we can perform a standard linear regression analysis and test each of our four CD4-specific hits for association with age.
+Note that in principle there are subtypes of CD4 (and one could in principle run TCA on sorted CD4 data to further dissect it into CD4 subtypes), however, here we treat CD4 as one cell type.
+
+```r 
+# Test our CD4 hits in the Liu data with purified CD4 methylation
+cd4.hits.liu.pvals <- unlist(lapply(1:length(cd4.hits),
+  function(x) summary(lm(y~., data.frame(y=liu.cd4$X[cd4.hits[x],],
+  liu.cd4$cov)))$coefficients["age","Pr(>|t|)"]))
 ```
 
-Now, we can finally fit the TCA model by running the 'tca' function. We will do so by indicating that we wish 'tca' to use multiple cores (by setting parallel = TRUE; effective only when using a multi-core machine):
+We get significant p-values in all four candidate CpGs: 1.85e-06, 1.65e-04, 1.97e-12, and 0.014 for cg01234420, cg09978533, cg23564243, and cg27187881, respectively.
+
+We can further plot the CD4 methylation in those CpGs - after adjusting their levels for covariates - as a function of age:
 
 ```r
-tca.mdl <- tca(X = X, W = W, C1 = C1, C2 = C2,
-               parallel = TRUE, log_file = "GSE42861.tca.log")
+# Plot adjusted methylation (i.e. adjusted for covariates) as a function of age in all four replicated CD4 associations
+dfs <-  vector("list", length = 4)
+for (i in 1:4){
+  r <- scale(residuals(lm(y~., data.frame(y=liu.cd4$X[cd4.hits[i],], liu.cd4$cov[,2:ncol(liu.cd4$cov)]))))
+  dfs[[i]] <- data.frame(x = liu.cd4$cov[,"age"], y = r)
+}
+plot_scatter(dfs = dfs,
+              ggarrange.ncol = 2,
+              ggarrange.nrow = 2,
+              xlab = "Age",
+              ylab = "Adjusted methylation level",
+              titles = paste("CD4 methylation in ",cd4.hits,sep=""))
 ```
-After several minutes, 'tca' will save the estimated parameters of the TCA model into the list 'tca.mdl'. In addition, the logs printed to the console will be saved into a file named 'GSE42861.tca.log'.
-For a full description of the arguments and the returned values of the 'tca' function, including additional arguments and options not described above, please refer to the package manual.
 
-#### Notes
+![**Methylation levels of the four validated CD4-specific hits in the Liu data as a function of age. Results are shown in each of the four CpGs after adjusting their methylation levels for covariates. Note that the p-values indicated on the plots are for the Pearson linear correlation of age with the adjusted methylation levels; therefore, they slightly differ from the association p-values.**](figs/exp1.fig4.png)
 
-* The above execution of the function 'tca' is expected to take several minutes. Increasing the size of the input data will increase the runtime linearly with the input size. For instance, if it takes your machine 2 minutes to run 'tca' on the example data with 500 sites, it is expected to require approximately 20 minutes for 5,0000 sites and so on. Allowing parallel computing (the argument 'parallel') provides a substantial decrease in runtime.
-* While for each of the covariates included in C2 the 'tca' function learns for each site a single effect size (i.e. one parameter), for each one of the covariates in 'C1', it learns for each site an effect size for each one of the cell types. For example, in our case, we have 7 cell types in W and 3 covariates in 'C1'. Together with the 11 covariates in C2, this results in a total of 32 parameters that the model learns for each site (in addition to the other parameters of the model).
-Therefore, it is advised to be mindful of the number of covariates included in the analysis, especially those that are included in C1. As always, the larger the sample size is the higher flexibility we have to include more covariates.
-* 'tca' allows to re-estimate the cell-type proportion estimates in W (see argument refit_W in 'tca'). This option is particularly useful when only noisy estimates of the cell-type proportions are available.
-Note, however, that 'tca' requires in this case methylation sites that capture the cell-type composition information in the data. Such sites can either be provided as an input or automatically be selected by 'tca' (see argument refit_W.features in 'tca'). In this tutorial, we include in the data only a subset of 500 arbitrary sites, which may not necessarily capture the cell-type composition information in the data. As a result, although in general TCA can improve the estimates of the initial W<sup>[1](#myfootnote1)</sup>, attempting to re-estimate W in the specific case of this tutorial is likely to result in worse estimates of W.
-* The TCA package currently does not support files with NA values. If your data includes NA values you would need to impute missing values prior to using TCA.
+As in any replication analysis, we should make sure that the distribution of p-values across all sites in the Liu data is well calibrated. To that end, we rerun the regression analysis with the Liu et al. data, only this time we consider all sites in the data and plot the distribution of p-values.
+While we do expect to see some inflation in the distribution of p-values for age, much like before in the Hannum data, it is good practice to evaluate the distribution of p-values data used for validation.
 
-## Step 4: Looking for cell-type-specific associations with RA
+```r 
+liu.cd4.regression.pvals <- unlist(lapply(1:nrow(liu.cd4$X),
+  function(x) summary(lm(y~.,data.frame(y = liu.cd4$X[x,],
+  liu.cd4$cov)))$coefficients["age","Pr(>|t|)"]))
+# qq-plot
+plot_qq(list(liu.cd4.regression.pvals), labels = "Linear regression (sorted CD4)")
+```
 
-TCA allows us to model a phenotype of interest as statistically related to a given methylation site through cell-type-specific effects. Under this framework, as described below, we can statistically test different hypotheses about the cell-type-specific effects. We refer to this framework as TCA regression (the 'tcareg' function).
+![**Association results for chromosome 22 in the Liu data with sorted CD4 cells. Results are presented by quantile-quantile plots of the (negative-log transformed) p-values for the associations using linear regression. The dashed red horizontal line represents expriment-wide significance, and significant global deviation from the y=x line indicates an inflation arising from a badly specified model.**](figs/exp1.fig3.png)
 
-We start by loading the rheumatoid arthritis (RA) phenotype.
+Qualitatively comparing the distribution of p-values to the one we observed earlier when we applied the joint test of TCA to the Hannum data, results seem to be reasonably calibrated at a similar level.
 
-```r
-y <- as.matrix(covars[,5])
+Of note, we never know what is the exact level of inflation to expect, especially when testing for differential methylation with factors that may affect a large number of sites. However, for most outcomes or phenotypes of interest plotting the distribution of p-values should demonstrate much less deviation from the y=x axis in the lower range of the X axis, thus allowing a simple and powerful diagnostic tool for evaluating model misspecification.
+
+# Example 2: detecting granulocyte-specific differential methylation with infant arousal
+
+## Overview
+
+In this example, we will apply TCA to cord-blood methylation data by Paquette et al. (n=334) <sup>[15](#myfootnote15)</sup> for the detection of cell-type-specific differential methylation with infant arousal (defined by the authors as the level of animation and motor activity during an exam; may be characterized by fussing and crying).
+Throughout this example, we will work under the assumption Y|X (i.e. methylation affects infant arousal).
+As before, we will consider only the methylation sites on chromosome 22 for the purpose of this tutorial. The total runtime of this example is around 4 minutes on a regular laptop.
+
+## Prerequisites and obtaining data
+
+This example requires the same  prerequisites described under example 1.
+
+Here, we will be using one .RData file that can be obtained as follows:
+```r 
+data_path <- "./" # where files will be stored
+# Download data files
+download.file("github.com/cozygene/TCA/blob/master/vignettes/paquette.chr22.RData?raw=true",
+  paste(data_path,"paquette.chr22.RData",sep=""))
 ```
 
-Much like in a standard regression analysis, TCA regression allows us to account for covariates (the argument C3 in the 'tcareg' function). Notably, we have already taken into account covariates when fitting the TCA model (C1 and C2). However, these were used for modeling the variation in methylation levels. The covariates in C3, on the other hand, can be used to account for factors that may potentially be correlated with the variation of the phenotype.
+Much like the data files in example 1, the .RData file include processed data to be used in the following subsections. To recreate it from the original publicly available Paquette data refer to the function `prep_data` in the [vignette_analysis.R](https://raw.githubusercontent.com/cozygene/TCA/master/vignettes/vignette_analysis.R) file.
 
-In our case, we include in C3 the variables age, sex, and smoking, as those are known to be correlated with RA status:
+In addition, as before, we will be sourcing the vignette_analysis.R file, which we will use for visualization of results.
+```r 
+source("https://raw.githubusercontent.com/cozygene/TCA/master/vignettes/vignette_analysis.R")
+```
 
-```r
-C3 <- covars[,2:4]  # age, sex, and smoking status
+## Data description
+
+We start by loading the data file:
+
+```r 
+load("paquette.chr22.RData")
 ```
-In addition, since RA is known to be correlated with leukocyte composition in blood, we should consider the cell-type composition in C3. In principle, we can simply do this by including in C3 the cell-type proportions (W). However, here we will use the ReFACTor reference-free algorithm<sup>[7](#myfootnote7)</sup> instead, as it was recently shown that in the context of methylation association studies it provides better correction for cell-type composition (i.e. better control for false positives) compared with reference-based estimates<sup>[8](#myfootnote8)</sup>.
 
-ReFACTor provides a set of principal components that we can include in C3. Importantly, these components are expected to capture the variation in cell-type composition through linear combinations of the cell-type proportions. As a result, while these components can be used for correction (which is invariant to linear combinations), they cannot be used as absolute estimates of the cell-type proportions and therefore should not be given as an input to the function 'tca' (i.e. as the argument W). A more in-depth evaluation and discussion about the output of reference-free and reference-based algorithms is available elsewhere<sup>[5](#myfootnote5)</sup>.
+This loads into the R environment a list named `paquette`, which includes the following fields:
 
-We will use the 'refactor' function for calculating the ReFACTor components. This will require us to use the full data matrix (219,096 methylation sites by 568 individuals). If you wish to skip this step for convenience, you can simply download the results of 'refactor', which can be found in the file named 'GSE42861_processed.refactor_components.txt' under the directory 'refactor_output' together with the rest of the [data files](https://www.dropbox.com/sh/xtlukhjzdhfhesc/AADBFX7atNPlf3hIIliZjo8ka?dl=0).
+* `X` - the cord-blood methylation matrix (chromosome 22 only)
+* `W` - reference-based estimates of the proportions of 4 blood cell types for the individuals in the data (CD8, granulocytes, monocytes, and nucleated red blood cells); estimated using ENmix <sup>[16](#myfootnote16)</sup>
+* `cov` - covariates for the individuals in the data
+* X.ref_cpgs - cord-blood methylation matrix of the CpGs used by Bakulski et al. for cord-blood reference <sup>[17](#myfootnote17)</sup>; these CpGs were used for estimating `W`
 
-```r
-# if you wish to skip this step, simply load the pre-computed ReFACTor components
-ref.scores <- data.frame(
-  fread(file = "GSE42861_processed.refactor_components.txt"), row.names=1)
-
-# otherwise, run the followings
-
-# load the full data matrix and run refactor
-X.full <- data.frame(
-  fread(file = "GSE42861_processed.txt"), row.names=1)
-ref <- refactor(X.full, k = 6, sparsity = 500,
-                C = cbind(C1,C2), C.remove = TRUE,
-                rand_svd = TRUE, log_file = "GSE42861.refactor.log")
-ref.scores <- ref$scores
+The `cov` matrix includes a total of 15 variables: gender, gestational age, arousal, two indicator variables for batch, and 10 components that are expected to capture technical variation in the data (constructed using low-variance probes as in Example 1).
+For `W`, cell-type proportions were estimated for all cell types in the cord-blood reference data (granulocytes, monocytes, B, CD4, CD8, NK, and nucleated red blood cells), however, we excluded cell types with mean abundance below 0.01 across all samples. This preprocessing step is especially important for relatively small datasets, wherein the signals of lowly abundant cell types are faint (and therefore TCA is not expected to detect cell-type-specific differential methylation in those cell types).
+
+## Applying TCA and TCA regression to cord blood data
+
+Taking the assumption Y|X in this case means that we will be using the `tcareg` function for statistical testing. Yet, as a first step, before applying `tcareg` for  fitting and testing a model for the phenotype, we need to fit the TCA model to the methylation data (using the `tca` function).
+
+We start by fitting the TCA model to the Paquette data as follows:
+
+```r 
+tca.mdl.paquette <- tca(X = paquette$X,
+                        W = paquette$W,
+                        C1 = paquette$cov[,c("gender","gestational_age")],
+                        C2 = paquette$cov[,4:ncol(paquette$cov)],
+                        constrain_mu = TRUE)
 ```
 
-For a full description of the arguments and the returned values of the 'refactor' function, including additional arguments and options, please refer to the package manual.
+Here, we use gender and gestational age as covariates at the cell-type level (argument `C1`), and batch information and the components of technical variability as global covariates (argument `C2`).
+Unlike our analysis in Example 1, here we take the assumption Y|X. For that reason, we do not include the arousal variable in `C1`, as it is not assumed to affect methylation; instead, we will soon use it as the phenotype in `tcareg`.
 
-Given the ReFACTor components, we can now construct the final C3, in which we also include the first 6 ReFACTor components<sup>[8](#myfootnote8)</sup>:
+Our execution of `tca` above introduced an additional argument of `tca` that we did not consider before: `constrain_mu`. This argument takes a boolean value (default is `FALSE`) and informs `tca` whether to constrain the mean parameters in the TCA model during the optimization (i.e. constrain to the range of possible values in the data). Setting `constrain_mu = TRUE` does not allow to calculate p-values for the covariates in the model. However, since we do not need to extract p-values from the function `tca` (we will perform statistical testing using the `tcareg` function, which takes the assumption Y|X), we can set `constrain_mu = TRUE` in order to allow more sensible estimates of the mean parameters in the model (those will be later used for re-estimating the cell-type proportions).
+For more information about the argument `constrain_mu` refer to the [package manual](https://cran.rstudio.com/web/packages/TCA/TCA.pdf).
+
+We next fit a model for the arousal variable using the `tcareg` function, which allows us  to model it as a function of methylation at cell-type resolution.
+Similarly to Example1, we consider a two-step approach in our analysis: a joint test for detecting associated sites, followed by a post-hoc analysis of the associated sites for the detection of differentially methylated cell types (using marginal conditional tests). 
+
+We start by running a joint test as follows:
+
+```r 
+C3_names <- c("gender","gestational_age","batch1","batch2")
+tcareg.mdl.paquette.joint <- tcareg(X = paquette$X,
+                                    tca.mdl = tca.mdl.paquette,
+                                    y = paquette$cov[,"arousal",drop=F],
+                                    C3 = paquette$cov[,C3_names],
+                                    test = "joint")
+```
+
+In the above we provide `tcareg` with the data matrix (argument `X`) and the TCA model we learned for the data (argument `tca.mdl`). In addition, we set the arousal variable as the phenotype of interest (argument `y`) and include the available covariates that may affect infant arousal (argument `C3`).
+Not that unlike in our execution of `tca`, where we modeled the methylation levels, in `tcareg` we do not consider the components of technical variability as covariates (i.e. in `C3`) - those are directly related to the methylation data and are not expected to be directly related to the phenotype; they are rather accounted for in our TCA model `tca.mdl.paquette` for the methylation.
+
+We can now plot the distribution of p-values:
 
 ```r
-C3 <- cbind(C3,ref.scores)
+plot_qq(list(tcareg.mdl.paquette.joint$pvals), labels = "Joint test with infant arousal")
 ```
 
-Now that we have the covariates we wish to consider in the TCA regression, we should chose a test to perform. The function 'tcareg' allows us to test several different types of hypotheses (tuned by the argument 'test'):
+![**Association results with infant arousal in chromosome 22 of the Paquette cord-blood data. Results are presented by quantile-quantile plots of the (negative-log transformed) p-values for CpG-level associations under a joint test. The dashed red horizontal line represents expriment-wide significance, and significant global deviation from the y=x line indicates an inflation arising from a badly specified model.**](figs/exp2.fig1.png)
 
-* **Marginal tests** (by setting test = 'marginal') - marginal tests will be performed for each cell type h (separately) as follows. For each site j, its effect size in cell type h will be estimated and tested while assuming zero effects in the rest of the cell types.
-* **Conditional marginal tests** (by setting test = 'marginal_conditional') - conditional marginal tests will be performed for each cell type h (separately) as follows. For each site j, its effect size in cell type h will be estimated and tested while also estimating and accounting for the effect sizes in the rest of the cell types.
-* **Joint tests** (by setting test = 'joint') - for each site j, the effect sizes of all cell types will be estimated and tested for their joint effect.
-* **Joint tests with a single effect size** (by setting test = 'single_effect') - for each site j, the effect sizes of all cell types will be estimated and tested for their joint effect, under the assumption that all effect sizes are equal. In cases where the assumption of equal effect sizes across cell types hold, this test will be more powerful than a joint test (owing to the fact that it uses only one degree of freedom).
-* **User-specified custom tests** (by setting test = 'custom') - for each site j, the effect sizes and the joint effect of a user-specified subset of the cell types (specified using the argument 'alternative_model') will be estimated and tested, while accounting for a user-specified background model (specified using the argument 'null_model').
+Similarly to what we observed in Example 1, we get an inflated distribution. In principle, we could try to take the same approach we used in Example 1 of accounting for more of the cell-type composition variation in the data by adding ReFACTor components as covariates. However, here we take an alternative approach of re-estimating the cell-type proportions in attempt to improve them; this is expected to be particularly beneficial in cases where our input estimates of the cell-type proportions are expected to be of low quality. 
+In practice, deciding on either re-estimating the cell-type proportions or accounting for more of the cell composition variation using a reference-free approach (or using both approaches) is context dependent, and both options should be considered in analysis for their impact on the global distribution of p-values.
 
-In our case, we wish to test the methylation sites in the data for associations with CD4 cells. In other words, we wish to perform marginal tests for associations with CD4. Since setting "test = 'marginal'" will perform a marginal association test for each of the cell types, we can instead set "test = 'custom'" and define a marginal association test for CD4 by setting 'alternative_model' to be CD4 cells and setting 'null_model' to be 'NULL'. This formulation requires from 'tcareg' that for each methylation site only a single effect for CD4 will be estimated and tested, while assuming no effects in the rest of the cell types - this is exactly a marginal test.
+## Re-estimating cell-type proportions under the TCA model
 
-We can now finally test for CD4-specific associations. In order to have a more informative view of the results, we indicate using the optional argument 'features_metadata' that 'tcareg' should include metadata about the methylation sites. Specifically, we will use the metadata file named 'HumanMethylationSites.txt', which is based on the Illumina manifest files and can be found [here](https://github.com/cozygene/TCA/tree/master/assets), and includes information such as chromosome numbers and positions for all the methylation sites in the Illumina human methylation platforms (27K, 450K and 850K/EPIC arrays).
+The `tca` function allows us to re-estimate cell-type proportions under the TCA model by setting the argument `refit_W` to `TRUE` (default is `FALSE`). This informs `tca` to re-estimate the input cell-type proportions `W`. Since not all methylation sites are highly differentially methylated between different cell types, in general, the procedure of re-estimating `W` should consider only a subset of sites in data that are most informative (i.e. most highly differentially methylated across cell types). `tca` can either get a list of the sites to use in the re-estimation, or, alternatively, it can apply ReFACTor for an unsupervised feature selection, in case a list of highly-informative sites is unavailable.
 
-```r
-cd4 <- colnames(W)[1] # the first column in W corresponds to CD4
-res <- tcareg(X = X, tca.mdl = tca.mdl, y = y, C3 = C3,
-              test = "custom", null_model = NULL, alternative_model = c(cd4),
-              save_results = TRUE, output = "GSE42861.tcareg.CD4",
-              log_file = "GSE42861.tcareg.log",
-              features_metadata = "HumanMethylationSites.txt")
+In our case, we analyze cord-blood methylation, for which reference data exists.
+Specifically, we can use the features defined by Bakulski et al. as most informative for learning cell-type proportions from cord-blood data. <sup>[17](#myfootnote17)</sup>.
+
+In principle, the `tca` function can fit the TCA model to the data while re-estimating `W` at the same time. However, in our case, we wish to fit the model to chromosome 22 only, and, at the same time, we want to re-estimate `W` from a set of CpGs that are not restricted to chromosome 22 (i.e. the data in `paquette$X.ref_cpgs`). For that reason, we use two separate executions of TCA as follows:
+
+```r 
+tca.mdl.paquette.refit_W <- tca(X = paquette$X.ref_cpgs,
+                                W = paquette$W,
+                                C1 = paquette$cov[,c("gender","gestational_age")],
+                                C2 = paquette$cov[,4:ncol(paquette$cov)],
+                                constrain_mu = TRUE,
+                                refit_W = TRUE,
+                                refit_W.features = rownames(paquette$X.ref_cpgs))
+# In the second execution of TCA use the re-estimated W from the first execution
+tca.mdl.paquette.2 <- tca(X = paquette$X,
+                          W = tca.mdl.paquette.refit_W$W,
+                          C1 = paquette$cov[,c("gender","gestational_age")],
+                          C2 = paquette$cov[,4:ncol(paquette$cov)],
+                          constrain_mu = TRUE)
 ```
-The 'tcareg' results will be saved into the list 'res'. In addition, since we indicated 'save_results = TRUE', the results will be saved into a file named "GSE42861.tcareg.CD4.custom.txt" (note that the file name is indicated by the argument 'output'). For a full list of the values returned by 'tcareg' and for additional arguments please refer to the package manual.
 
-We can now look at the association results - we start by plotting a quantile-quantile (qq) plot of the p-values:
+In the first run of `tca`, we use only the reference sites for getting a new estimate of W. Then, in the second run, we use the updated estimate of `W` in a new execution of `tca` on the data we wish to work with (i.e. the sites in chromosome 22).
 
-```r
-plot(-log10(seq(from = 1/500, to = 1, by = 1/500)), -log10(sort(res$pvals)),
-     xlab = "Expected -log(p-val)", ylab = "Observed -log(p-val)")
-abline(a=0, b=1); abline(h=-log10(0.05/500), col="red")
+Given the new TCA model with the updated estimates of `W`, we can now rerun `tcareg` and reevaluate the distribution of p-values under a joint test:
+
+```r 
+tcareg.mdl.paquette.joint.2 <- tcareg(X = paquette$X,
+                                      tca.mdl = tca.mdl.paquette.2,
+                                      y = paquette$cov[,"arousal",drop=F],
+                                      C3 = paquette$cov[,C3_names],
+                                      test = "joint")
+plot_qq(list(tcareg.mdl.paquette.joint.2$pvals), labels = "Joint test with infant arousal")
 ```
-This will output the following plot, which shows a single significant hit:
 
-![Results of the CD4 marginal test presented by a qq-plot of the -log10(pvalues).](figs/fig1.png) 
+![**Association results with infant arousal in chromosome 22 of the Paquette cord-blood data, after re-estimating the cell-type proportions. Results are presented by quantile-quantile plots of the (negative-log transformed) p-values for CpG-level associations under a joint test. The dashed red horizontal line represents expriment-wide significance, and significant global deviation from the y=x line indicates an inflation arising from a badly specified model.**](figs/exp2.fig2.png)
 
-Ranking the p-values shows that the association found is with the methylation site cg07839457.
+Re-estimating `W` successfully eliminated the inflation, and, in addition, revealed a single experiment-wide significant association that was not observed before (cg21771250). We next further perform a marginal conditional test on this hit and evaluate the results.
 
-```r
-hit.position <- order(res$pvals)[1] # 353
-hit <- rownames(X)[hit.position] # cg07839457
-hit.pval <- res$pvals[hit.position]
+## Detecting differential methylation at the cell-type level
+
+Given the CpG-level hit we found using the joint test, we shall now run a marginal conditional test, the second step in or two-step approach. This will allow us to get insight into the possible role of the different cell types in the association we found. We start by fitting marginal conditional tests to all the sites in the data with the goal of verifying no inflation in the distribution of p-values.
+
+```r 
+tcareg.mdl.paquette.2.marg_cond <- tcareg(X = paquette$X,
+                                          tca.mdl = tca.mdl.paquette.2,
+                                          y = paquette$cov[,"arousal",drop=F],
+                                          C3 = paquette$cov[,C3_names],
+                                          test = "marginal_conditional")
+plot_qq(split(tcareg.mdl.paquette.2.marg_cond$pvals,
+        rep(1:ncol(paquette$W), each = nrow(paquette$X))),
+        labels = paste(colnames(paquette$W),
+          " marginal conditional test with infant arousal", sep=""),
+        ggarrange.nrow = 2,
+        ggarrange.ncol = 2)
 ```
 
-#### Notes
-* Increasing the size of the input data will increase the runtime of the function 'tcareg' linearly with the input size. Allowing parallel computing (the argument 'parallel') can allow a substantial decrease in runtime.
-* The application of 'tcareg' for epigenome-wide association studies will often result in a deflation in the test statistic and therefore an inflation in p-values (i.e. more non-significant p-values than expected by chance). In that sense, the p-values returned by 'tcareg' are conservative, yet, 'tcareg' is still able to provide substantially better power than other more standard approaches; for a discussion and more information about the reasons for this inflation please refer to the TCA paper<sup>[1](#myfootnote1)</sup>.
-*  In this tutorial we perform a CD4-specific analysis using marginal tests. Importantly, owing to correlations between the proportions of different cell types, marginal tests may be prone to tagging incorrect cell types as associated (i.e. even though a marginal test does control for false positives in finding truly associated sites). In other words, a cell-type-specific association found by a marginal test may in fact be the result of that site being associated though a different cell type<sup>[1](#myfootnote1)</sup>. In principle, a conditional marginal test can be used to alleviate this problem by accounting for the effects of other cell types - by either setting the argument 'test' to 'marginal_conditional' for accounting for all other cell types, or by setting 'test' to 'custom' for accounting for a specific subset of the cell types. That said, for the same reason (i.e. correlations between cell types), a marginal conditional test is expected to be severely underpowered for detecting genome-wide significant associations. Therefore, a more reasonable approach to tackle this issue may be to consider conditional marginal tests as a post-hoc analysis for investigating the cell-type-specific effects of the already-detected set of associated sites.
+![**Association results with infant arousal in chromosome 22 of the Paquette cord-blood data, after re-estimating the cell-type proportions and using marginal conditional tests. Results are presented by quantile-quantile plots of the (negative-log transformed) p-values for associations with CD8 cells (top left), granulocytes (top right), monocytes (bottom left), and nucleated red blood cells (bottom right). The dashed red horizontal lines represent expriment-wide significance under adjustment for one cell type (thick line) and for all cell types (thin line). Significant global deviation from the y=x line indicates an inflation arising from a badly specified model.**](figs/exp2.fig3.png)
 
-## Step 5: Estimating cell-type-specific methylation
+Since we observe no inflation, we can proceed and extract a p-value for each cell type in the CpG-level association we found.
 
-Finally, we will apply the function 'tensor' for estimating the cell-type-specific methylation levels of the associated site we found in the previous step. Note that this step is not necessary given that we already detected the association, however, we will use it for visualization purposes. 'tensor' requires as an input the parameters of the fitted TCA model for the sites for which we wish to get cell-type-specific estimates. Since we are interested in estimates for just one particular site, we will first use the function 'tcasub' for subsetting the estimated model parameters to include only information about the associated site.
+```r 
+hit.joint <- rownames(paquette$X)[which(tcareg.mdl.paquette.joint.2$pvals <
+  0.05/nrow(paquette$X))]
+tcareg.mdl.paquette.2.marg_cond$pvals[hit.joint,]
+```
 
-```r
-# subset the estimates of the model parameters
-tca.mdl.sub <- tcasub(tca.mdl, features = c(hit), log_file = "GSE42861.tcasub.log")
+We get 0.38, 6.21e-4, 0.58, and 0.27 as the p-values for CD8 cells, granulocytes, Monocytes and nucleated red blood cells, respectively. This suggests a granulocyte-specific differential methylation in cg21771250.
 
-# estimate cell-type-specific methylation for the associated site
-Z_hat <- tensor(X = X[hit,], tca.mdl.sub, log_file = "GSE42861.tensor.log")
+In order to visualize the effect of the association we found, we can use the `tensor` function to get an explicit estimate of the granulocyte-specific methylation in cg21771250.
+We do so by first subsetting the TCA model to include only the parameters that are required for estimating the cell-type-specific levels of cg21771250 using the function `tcasub`, followed by an execution of `tensor`.
+Note that in general we do not have to subset the TCA model, however, this allows a substantial computational speedup in case we are interested in applying `tensor` only to a small number of sites.
+
+```r
+# Extract from the tca model the part that is relevant for the hit found
+tcasub.mdl.paquette.2 <- tcasub(tca.mdl = tca.mdl.paquette.2,
+                                features = hit.joint)
+# Calculate cell-type-specific methylation for the samples in the detected CpG
+tensor.mdl.hit <- tensor(tca.mdl = tcasub.mdl.paquette.2,
+                         X = paquette$X[hit.joint,,drop=F])
 ```
-This results in a list 'Z_hat' in which each element includes the estimated cell-type-specific methylation levels for one specific cell type (where the order of cell types is the same as the order of the cell types in W). This allows us, for example, to compare between RA cases and controls based on their estimated CD4-specific methylation levels in cg07839457:
+
+Finally, given the cell-type-specific methylation estimates for cg21771250 (in `tensor.mdl.hit`), we can plot the granulocyte-specific methylation levels with the infant arousal phenotype. We do so while adjusting the methylation levels for the covariates in our data. In order to further appreciate the differences between TCA and a standard analysis of the bulk data, we also plot the bulk methylation of cg21771250 (adjusted for Covariates) with our phenotype.
 
 ```r
-cd4 <- Z_hat[[1]] # the first column in W is CD4, therefore take the first element in Z_hat
-hit.cd4 <- cd4[1,]
-d <- data.frame(hit.cd4, y)
-boxplot(hit.cd4~y, data = d, main="CD4 methylation in cg07839457",
-        names=c("controls","cases"), ylab="Estimated methylation")
+# Adjust methylation levels for covariates
+r.gran <- scale(residuals(lm(y~., data.frame(y=tensor.mdl.hit[[2]][1,],
+  paquette$cov[,setdiff(1:ncol(paquette$cov),3)]))))
+df.gran <- data.frame(y = r.gran, x = paquette$cov[,"arousal"])
+# The bulk data should be further adjusted for cell-type composition (i.e. tca.mdl.paquette.2$W)
+r.bulk <- scale(residuals(lm(y~., data.frame(y=paquette$X[hit.joint,],
+  tca.mdl.paquette.2$W, paquette$cov[,setdiff(1:ncol(paquette$cov),3)]))))
+df.bulk <- data.frame(y = r.bulk, x = paquette$cov[,"arousal"])
+plot_scatter(dfs = list(df.bulk, df.gran),
+            ggarrange.ncol = 2,
+            ggarrange.nrow = 1,
+            xlab = "Infant arousal",
+            ylab = "Adjusted methylation level",
+            titles = paste(c("Whole-blood methylation in ","Granulocyte methylation in "),
+              hit.joint,sep=""))
 ```
 
-This results in the following boxplots:
+![**Methylation levels, adjusted for covariartes, as a function of infant arousal measurement. Results are shown for cg21771250 using either the bulk level of the CpG (left) or using the estimated granulocyte-specific methylation given by TCA (right).**](figs/exp2.fig4.png)
+
+As we can see from these plots, the benefit of TCA over the standard regression analysis is twofold; first, it allows the detection of a CpG-level association, which would have not been detected by a standard regression analysis (considering the low correlation between the bulk levels and the phenotype and the stringent experiment-wide significance threshold), and second, it provides insights into the differentially methylated cell types.
+
+# Further practical considerations in analysis {#practical_considerations}
+
+## Model directionality
+
+The first important decision to make when testing for differential methylation at cell-type resolution is how to set model directionality. In other words, we should decide whether it makes sense for the phenotype of interest to be affected by methylation (Y|X) or affect methylation (X|Y).
+
+In some cases, determining the model directionality is straightforward. As an example, when looking for associations with certain demographic factors such as age (as in our first example in this tutorial), it makes no sense to assume Y|X since methylation cannot alter someone's age.
+Yet, sometimes we may not know a-priori which way to go, or, alternatively, some outcomes may be statistically related to methylation in both directions (in different methylation sites).
 
+Studies with large sample sizes, wherein multiple testing is not a major burden, can circumvent the problem of setting the model directionality by considering both options.
+Another alternative would be to compare the results given by the two options. Particularly, in simulations, we observe that working under the assumption X|Y when the true underlying model is Y|X leads to substantially more errors in the detection of true differentially methylated cell types (i.e. compared with following the true underlying model Y|X).
+In cases where replication data is available, one can therefore use the differences in precision between both assumptions in order to determine the appropriate one.
+Importantly, setting model directionality based on whichever test yields better p-values is in general a bad practice; in that case, unless specifically adjusted for, false positives rates will not be controlled well if correcting for multiple testing only for the best performing test.
 
-![The distribution of the estimated CD4-specific cg07839457 methylation levels in RA cases and controls presented by boxplots.](figs/fig2.png) 
- 
- 
-#### Notes
-* The accuracy of the estimates given by the function 'tensor' depends on the number of different cell types and on their abundances. Particularly, 'tensor' will provide more accurate estimates in cases where fewer cell types are modeled, and in any case, more abundant cell types will be estimated more accurately than less abundant cell types.
+## Choosing statistical tests
+
+In general, we recommend to apply a joint test for an initial screening for CpG-level associations when the goal is de-novo detection of differential methylation. Then, marginal conditional tests can be performed as a post-hoc analysis for calling differentially methylated cell types in the candidate CpGs found using the joint test. This two-steps approach is expected to be very powerful in many scenarios since the joint test aggregates statistical evidence for CpG-level associations across all cell types (rather than from just one cell type at the time, as in the cell-type level tests - marginal or marginal conditional tests).
+
+While the joint test may be more useful in many cases, in scenarios where only a single cell type (or a small subset of cell types) is associated with the phenotype, joint tests may be less powerful than cell-type level tests, as a result of modeling cell types that provide no signal (at least in relatively small datasets). That said, cell-type level tests will typically require more stringent correction for multiple testing; unless only a particular cell type (or a small subset of cell types) is of interest.
+
+Given data for replication (e.g., from sorted or single cells), one may opt to replace marginal conditional tests with marginal tests. The latter are much more powerful than marginal conditional tests, yet, less precise (i.e. will have much higher tendency to report the wrong cell types as the differentially methylated ones). Thus, replication data can be used for validation of the candidate associations.
+Of note, in such cases, marginal tests alone may perform better (compared with screening for CpG-level associations using joint tests as a first step).
+
+## Limitations of TCA
+
+Typical tissue-level bulk data may consist of a large number of cell types. Particularly, in addition to several dominant (abundant) cell types, multiple rare and lowly abundant cell types may also be present and affect the bulk signals. Since those cell types will typically compose only a very small part of the variance in the data, TCA is unlikely to capture them unless very large sample sizes are available. It is therefore advised to restrict the number of cell types assumed when applying TCA according to the available sample size.
+
+TCA relies on cell-type proportions for estimating and detecting cell-type-specific methylation and differential methylation. Since proportions are essentially fractions that sum up to 1, we typically observe correlations between the proportions of different cell types.
+This leads to correlations between the estimates of cell-type-specific methylation, which induce multicollinearity between the estimates of different cell types. This, in turn, much like in standard linear regression, may lead do unreliable estimates of effect sizes even in cases where cell-type level differential methylation is correctly identified (this problem may be more severe under the Y|X assumption, when cell-type-specific methylation levels are explicitly modeled).
+
+Furthermore, as a result of the correlation between proportions of different cell types, TCA may sometimes tag wrong cell types as differentially methylated.
+In order to see why, consider the following example. Assume we have two cell types in our data, A and B, such that we have accurate estimates for the proportions of A and less accurate estimates for B, a truly differentially methylated cell type at some particular CpG under test.
+In that case, if the proportions of both cell types are highly correlated then 
+capturing cell-type-specific signals based on the estimated proportions of cell type B may fail to capture some signals that may be captured using the estimated proportions of cell type A (owing to the limited accuracy of the estimated proportions of B and the fact that the estimated proportions of A capture the proportions of B to some extent).
+This may result in calling A as a differentially methylated cell type, even though B is the true differentially methylated cell type in this case.
+
+The above limitation is not unique to TCA but is expected to arise to some extent in any method that uses cell-type proportions for obtaining cell-type resolution.
+This limitation provides the intuition for the compromised precision in the powerful marginal tests, which are more likely to lead to calling wrong cell types (although allowing to control for the desired level of false positives rate in the detection of differentially methylated sites).
+Importantly, applying a marginal conditional test instead of a marginal test mitigates this limitation by accounting for the other cell types (not completely though, owing to the same limitation explained above).
 
 ___
 
-<a name="myfootnote1">1</a>: Rahmani et al. "Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology." Nature Communications 2019.
+<a name="myfootnote1">1</a>: Rahmani et al. "Cell-type-specific resolution epigenetics without the need for cell sorting or single-cell biology." Nature communications 10.1 (2019): 1-11.
+
+<a name="myfootnote2">2</a>: Houseman et al. "DNA methylation arrays as surrogate measures of cell mixture distribution." BMC bioinformatics 13.1 (2012): 86.
+
+<a name="myfootnote3">3</a>: Rahmani et al. "BayesCCE: a Bayesian framework for estimating cell-type composition from DNA methylation without the need for methylation reference." Genome biology 19.1 (2018): 1-18.
+
+<a name="myfootnote4">4</a>: Hannum et al. "Genome-wide methylation profiles reveal quantitative views of human aging rates." Molecular cell 49.2 (2013): 359-367.
+
+<a name="myfootnote5">5</a>: Liu et al. "Blood monocyte transcriptome and epigenome analyses reveal loci associated with human atherosclerosis." Nature communications 8.1 (2017): 1-12.
+
+<a name="myfootnote6">6</a>: Teschendorff et al. "A comparison of reference-based algorithms for correcting cell-type heterogeneity in Epigenome-Wide Association Studies." BMC bioinformatics 18.1 (2017): 105.
+
+<a name="myfootnote7">7</a>: Lehne et al. "A coherent approach for analysis of the Illumina HumanMethylation450 BeadChip improves data quality and performance in epigenome-wide association studies." Genome biology 16.1 (2015): 37.
+
+<a name="myfootnote8">8</a>: Bell et al. "Epigenome-wide scans identify differentially methylated regions for age and age-related phenotypes in a healthy ageing population." PLoS genetics 8.4 (2012).
+
+<a name="myfootnote9">9</a>: Jaffe and Irizarry. "Accounting for cellular heterogeneity is critical in epigenome-wide association studies." Genome biology 15.2 (2014): R31.
+
+<a name="myfootnote10">10</a>: Rahmani et al. "Sparse PCA corrects for cell type heterogeneity in epigenome-wide association studies." Nature methods 13.5 (2016): 443.
 
-<a name="myfootnote2">2</a>: Liu et al. "Epigenome-wide association data implicate DNA methylation as an intermediary of genetic risk in rheumatoid arthritis." Nature biotechnology. 2013 Feb;31(2):142.
+<a name="myfootnote11">11</a>: Rahmani et al. "Correcting for cell-type heterogeneity in DNA methylation: a comprehensive evaluation." Nature methods 14.3 (2017): 218-219.
 
-<a name="myfootnote3">3</a>: Rahmani et al. "GLINT: a user-friendly toolset for the analysis of high-throughput DNA-methylation array data." Bioinformatics. 2017 Feb 8;33(12):1870-2.
+<a name="myfootnote12">12</a>: Rahmani et al. "GLINT: a user-friendly toolset for the analysis of high-throughput DNA-methylation array data." Bioinformatics 33.12 (2017): 1870-1872.
 
-<a name="myfootnote4">4</a>: Houseman et al. "DNA methylation arrays as surrogate measures of cell mixture distribution." BMC bioinformatics. 2012 Dec;13(1):86.
+<a name="myfootnote13">13</a>: Chen et al. "Discovery of cross-reactive probes and polymorphic CpGs in the Illumina Infinium HumanMethylation450 microarray." Epigenetics 8.2 (2013): 203-209.
 
-<a name="myfootnote5">5</a>: Rahmani et al. "BayesCCE: a Bayesian framework for estimating cell-type composition from DNA methylation without the need for methylation reference." Genome biology. 2018 Dec;19(1):141.
+<a name="myfootnote14">14</a>: McCartney et al. "Identification of polymorphic and off-target probe binding sites on the Illumina Infinium MethylationEPIC BeadChip." Genomics data 9 (2016): 22-24.
 
-<a name="myfootnote6">6</a>: Horvath "DNA methylation age of human tissues and cell types. Genome biology." 2013 Oct;14(10):3156.
+<a name="myfootnote15">15</a>: Paquette et al. "Regions of variable DNA methylation in human placenta associated with newborn neurobehavior." Epigenetics 11.8 (2016): 603-613.
 
-<a name="myfootnote7">7</a>: Rahmani et al. "Sparse PCA corrects for cell type heterogeneity in epigenome-wide association studies." Nature methods. 2016 May;13(5):443.
+<a name="myfootnote16">16</a>: Xu et al. "ENmix: a novel background correction method for Illumina HumanMethylation450 BeadChip." Nucleic acids research 44.3 (2016): e20-e20.
 
-<a name="myfootnote8">8</a>: Rahmani et al. "Correcting for cell-type heterogeneity in DNA methylation: a comprehensive evaluation." Nature methods. 2017 Mar;14(3):218.
+<a name="myfootnote17">17</a>: Bakulski et al. "DNA methylation of cord blood cell types: applications for mixed cell birth studies." Epigenetics 11.5 (2016): 354-362.
diff --git a/vignettes/vignette_analysis.R b/vignettes/vignette_analysis.R
new file mode 100644
index 0000000..7c14583
--- /dev/null
+++ b/vignettes/vignette_analysis.R
@@ -0,0 +1,427 @@
+library("TCA")
+library("ggplot2")
+library("ggpubr")
+library("pracma")
+library("matrixStats")
+
+prep_data <- function(data_path){
+
+  file_name1 <- paste(data_path,.Platform$file.sep,"hannum.chr22.RData",sep="")
+  file_name2 <- paste(data_path,.Platform$file.sep,"liu.cd4.chr22.RData",sep="")
+  file_name3 <- paste(data_path,.Platform$file.sep,"paquette.chr22.RData",sep="")
+
+  file_names <- c(file_name1, file_name2, file_name3)
+
+  if(any(!file.exists(file_name1, file_name2, file_name3))){
+    library("GEOquery")
+    library("data.table")
+    library("EpiDISH")
+  }
+
+  if (!file.exists(file_name1)){
+
+    # Download the Hannum et al. data
+    gse <- GEOquery::getGEO("GSE40279", destdir = data_path, GSEMatrix = TRUE)
+
+    # Extract methylation data
+    X.hannum <- Biobase::exprs(gse[[1]])
+
+    # covariates; note that there's also 'ethnicity' covariate in the data, however, it is perfectly captured by the plateinformation
+    plate.hannum <- as.numeric(as.factor((Biobase::pData(gse[[1]])[["plate:ch1"]])))
+    cov.hannum <- cbind(as.numeric((Biobase::pData(gse[[1]])[["age (y):ch1"]])), as.numeric(as.factor((Biobase::pData(gse[[1]])[["gender:ch1"]]))), indicator_vars(plate.hannum))
+    rownames(cov.hannum) <- colnames(X.hannum)
+    colnames(cov.hannum) <- c("age", "gender", "plate1", "plate2", "plate3", "plate4", "plate5", "plate6", "plate7", "plate8")
+
+    # Calculate principal components from control probes that reflect technical variability; since we are not working with IDAT files we don't have "real" control probes - instead, we use low variance sites that are not expected to capture any true biological variability.
+    low_var_pcs.hannum <- low_var_pcs(X.hannum, rank = 10, p = 1000)
+    cov.hannum <- cbind(cov.hannum, low_var_pcs.hannum)
+
+    # estimate cell-type proportions usign a reference-based approach
+    ref <- as.matrix(EpiDISH::centDHSbloodDMC.m[,c("Neutro","Eosino","CD4T","CD8T","Mono","B","NK")])
+    W.hannum <- EpiDISH::epidish(X.hannum, ref)$estF
+    # merge Neutro and Eosino
+    W.hannum.gran <- W.hannum[,"Neutro",drop=F] + W.hannum[,"Eosino",drop=F]
+    colnames(W.hannum.gran) <- "Gran"
+    W.hannum <- cbind(W.hannum.gran,W.hannum[,c("CD4T","CD8T","Mono","B","NK")])
+
+    # remove polymorphic sites, cross-reactive sites, and non-autosomal sites according to Chen et al.; in addition, remove low variance sites, as those are unlikely to demonstrate biological signal.
+    X.hannum.processed <- filter_data(X.hannum)
+
+    # keep only sites on chromosome 22 (so that tutorial can run quickly)
+    map <- read.table("https://github.com/cozygene/glint/blob/master/utils/assets/HumanMethylationSites?raw=true",sep=",",row.names = 1)
+    chrs <- map[rownames(X.hannum.processed),1]
+    chr22sites <- which(chrs == 22)
+    X.hannum.processed.22 <- X.hannum.processed[chr22sites,]
+
+    hannum <- list(X = X.hannum.processed.22, cov = cov.hannum, W = W.hannum)
+    save(hannum, file = file_name1, compress = "bzip2")
+
+    rm(hannum, gse, X.hannum, X.hannum.processed)
+    file.remove(paste(data_path,.Platform$file.sep,"GPL13534.soft",sep=""))
+    file.remove(paste(data_path,.Platform$file.sep,"GSE40279_series_matrix.txt.gz",sep=""))
+
+  }
+
+  if (!file.exists(file_name2)){
+
+    # Download the Liu et al. CD4  data
+
+    # covariates
+    gse <- GEOquery::getGEO("GSE56581", destdir = data_path, GSEMatrix = TRUE)
+    liu.cd4.age <- as.matrix(as.numeric(Biobase::pData(gse[[1]])[["age (yrs):ch1"]]))
+    colnames(liu.cd4.age) <- "age"
+
+    # methylation data
+    liu.cd4.methfile <- paste(data_path,.Platform$file.sep,"GSE56581_methylome_normalized.txt",sep="")
+    download.file("ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE56nnn/GSE56581/suppl/GSE56581_methylome_normalized.txt.gz", paste(liu.cd4.methfile,".gz",sep=""))
+    gunzip(paste(liu.cd4.methfile,".gz",sep=""))
+    X.liu.cd4 <- fread(liu.cd4.methfile)
+
+    X.liu.cd4.rownames <- X.liu.cd4$ID_REF
+    X.liu.cd4 <- as.matrix(X.liu.cd4[,2:ncol(X.liu.cd4)])
+    rownames(X.liu.cd4) <- X.liu.cd4.rownames
+    X.liu.cd4 <- X.liu.cd4[,seq(1,ncol(X.liu.cd4),2)]
+
+    # Calculate principal components from control probes that reflect technical variability; since we are not working with IDAT files we don't have "real" control probes - instead, we use low variance sites that are not expected to capture any true biological variability.
+    low_var_pcs.liu.cd4 <- low_var_pcs(X.liu.cd4, rank = 10)
+    cov.liu.cd4 <- cbind(liu.cd4.age, low_var_pcs.liu.cd4)
+
+    # keep only sites on chromosome 22 (so that tutorial can run quickly)
+    map <- read.table("https://github.com/cozygene/glint/blob/master/utils/assets/HumanMethylationSites?raw=true",sep=",",row.names = 1)
+    chrs <- map[rownames(X.liu.cd4),1]
+    chr22sites <- which(chrs == 22)
+    X.liu.cd4.22 <- X.liu.cd4[chr22sites,]
+
+    liu.cd4 <- list(X = X.liu.cd4.22, cov = cov.liu.cd4)
+    save(liu.cd4, file = file_name2)
+
+    rm(liu.cd4, gse, X.liu.cd4.22, X.liu.cd4)
+    file.remove(paste(data_path,.Platform$file.sep,"GSE56581_methylome_normalized.txt",sep=""))
+    file.remove(paste(data_path,.Platform$file.sep,"GSE56581_series_matrix.txt.gz",sep=""))
+
+  }
+
+  if (!file.exists(file_name3)){
+    # Paquette et al. (Epigenetics 2016)
+    gse <- GEOquery::getGEO("GSE75248", destdir = data_path, GSEMatrix = TRUE)
+
+    # methylation data
+    X.paquette <- Biobase::exprs(gse[[1]])
+
+    # Calculate principal components from control probes that reflect technical variability; since we are not working with IDAT files we don't have "real" control probes - instead, we use low variance sites that are not expected to capture any true biological variability.
+    low_var_pcs.paquette <- low_var_pcs(X.paquette, rank = 10, p = 1000)
+
+    # remove polymorphic sites, cross-reactive sites, and non-autosomal sites according to Chen et al.; in addition, remove low variance sites, as those are unlikely to demonstrate biological signal.
+    X.paquette.processed <- filter_data(X.paquette)
+
+    # keep only sites on chromosome 22 (so that tutorial can run quickly); Paquette et al. reported some signal in chromosome 16
+    map <- read.table("https://github.com/cozygene/glint/blob/master/utils/assets/HumanMethylationSites?raw=true",sep=",",row.names = 1)
+    chrs <- as.character(map[rownames(X.paquette.processed),1])
+    chr22sites <- which(chrs == "22")
+    X.paquette.processed.22 <- X.paquette.processed[chr22sites,]
+
+    # Extract covaraites
+    gestational_age <-as.numeric(unlist(lapply(Biobase::pData(gse[[1]])[["gestational age:ch1"]], function(x) strsplit(x,";")[[1]][1])))
+    arousal <- as.numeric(Biobase::pData(gse[[1]])[["arousal:ch1"]])
+    batch <- as.numeric(as.factor((Biobase::pData(gse[[1]])[["batch:ch1"]])))
+    batch[is.na(batch)] <- 3
+    batch <- indicator_vars(batch)
+    # since gender information is not available on the GEO record, get gender information that was inferred from the IDAR files based on X chromosome methylation
+    gender <- read.table("https://raw.githubusercontent.com/cozygene/TCA/master/vignettes/gender.paquette.txt", row.names = 1)
+    cov.paquette <- data.frame(gender,gestational_age,arousal,batch)
+    colnames(cov.paquette) <- c("gender","gestational_age","arousal","batch1","batch2")
+    rownames(cov.paquette) <- colnames(X.paquette.processed.22)
+    # remove na values
+    keep <- setdiff(1:nrow(cov.paquette) ,which(rowSums(is.na(cov.paquette)) > 0))
+    cov.paquette <- cbind(cov.paquette[keep,], low_var_pcs.paquette[keep,])
+
+    X.paquette.processed.22 <- X.paquette.processed.22[,keep]
+
+    # Load cell-type proportions usign a reference-based approach; these were estimated using the raw IDAT files of the data (available on GEO) with the package ENmix.
+    W.paquette <- read.table("https://raw.githubusercontent.com/cozygene/TCA/master/vignettes/W.paquette.txt", row.names = 1)
+    W.paquette <- W.paquette[,2:ncol(W.paquette)]
+    W.paquette[W.paquette < 0] <- 0
+    # remove lowly abundant cell types
+    W.paquette <- W.paquette[keep, colMeans(W.paquette)>=0.01]
+    rownames(W.paquette) <- rownames(cov.paquette)
+    # normalize cell type proportions (output from the ENmix package didn't require the proportions of an individual to sum up to 1)
+    W.paquette <- W.paquette/t(repmat(rowSums(W.paquette),ncol(W.paquette),1))
+
+    # get the set of cord blood reference CpGs that were used for estimating cell-type proportions; we will use these CpGs for re-estimating W
+    library("FlowSorted.CordBlood.450k")
+    ref.cord <- get("FlowSorted.CordBlood.450k")
+    ref.cord <- preprocessRaw(ref.cord)
+    ref.cord.cpgs <- rownames(FlowSorted.CordBlood.450k.ModelPars)
+    ref.cord.cpgs.intersect <- intersect(ref.cord.cpgs, rownames(X.paquette))
+
+    paquette <- list(X = X.paquette.processed.22, cov = cov.paquette, W = W.paquette, X.ref_cpgs = X.paquette[ref.cord.cpgs.intersect,keep])
+    save(paquette, file = file_name3)
+
+    rm(paquette, gse, X.paquette, X.paquette.processed, X.paquette.processed.22)
+    file.remove(paste(data_path,.Platform$file.sep,"GSE75248_series_matrix.txt.gz",sep=""))
+
+    ## note - the files W.paquette.txt and gender.paquette.txt were generated using the original IDAT files of the Paquette data.
+    # Gender information was inferred from methylation levels frmo the X chromosome as it is not available on Tthe GEO record.
+    # Code attched below; in order to run it, download the IDAT files from GEO accession number GSE75248 and set 'path' in 'readidat' below to the location of the files.
+    # require("ENmix")
+    # rgdat <- readidat(path = ".", manifestfile=NULL, recursive = FALSE, verbose = TRUE)
+    # meth <- getmeth(rgdat); rm(rgdat)
+    # W <- estimateCellProp(meth, refdata="FlowSorted.CordBlood.450k", cellTypes=NULL, nonnegative = TRUE, nProbes=50, normalize=TRUE, refplot=FALSE)
+    # write.table(W, file = "W.paquette.txt", quote = FALSE, sep = " ")
+    # beta <- getB(meth)
+    # map <- read.table("https://github.com/cozygene/glint/blob/master/utils/assets/HumanMethylationSites?raw=true",sep=",",row.names = 1)
+    # chrs <- as.character(map[rownames(beta),1])
+    # sites.X <- which(chrs == "X")
+    # sex_cpgs.pca.paquette <- prcomp(t(beta[sites.X,]), center=TRUE, scale=TRUE, rank = 2)
+    # write.table(sex_cpgs.pca.paquette$x[,1:2], file = "sex_cpgs.pca.paquette.txt", quote = FALSE, sep = " ")
+    # plot(sex_cpgs.pca.paquette$x[,1],sex_cpgs.pca.paquette$x[,2]) # shows a separation between males and females; note that the males/females ratio here perfectly match the one in the Paquette et al. paper.
+    # df <-data.frame(as.numeric(x[,1] > 0))
+    # colnames(df) <-"gender"; rownames(df) <- rownames(x)
+    # write.table(df,file = "gender.paquette.txt", quote = FALSE, sep = " ")
+
+  }
+
+  return(file_names)
+
+}
+
+
+indicator_vars <- function(x){
+  x.indicators <- matrix(0,length(x),length(unique(x))-1)
+  counter <- 1
+  u <- unique(x)
+  for (i in setdiff(u,u[length(u)])){
+    x.indicators[,counter] <- as.numeric(x == i)
+    counter <- counter + 1
+  }
+  return(x.indicators)
+}
+
+low_var_pcs <- function(X, rank = 10, p = 1000){
+  site.variances <- matrixStats::rowVars(X)
+  names(site.variances) <- rownames(X)
+  low.var.sites <- names(head(sort(site.variances), p))
+  low.var.pca <- prcomp(t(X[low.var.sites,]), center=TRUE, scale=TRUE, rank = rank)
+  return(low.var.pca$x)
+}
+
+filter_data <- function(X, var_th = 0.0001){
+  nonspecific_probes <- read.table("https://raw.githubusercontent.com/cozygene/glint/master/parsers/assets/nonspecific_probes.txt")[,1]
+  XY_probes <- read.table("https://raw.githubusercontent.com/cozygene/glint/master/parsers/assets/HumanMethylationSites_X_Y.txt")[,1]
+  polymorphic_probes <- read.table("https://raw.githubusercontent.com/cozygene/glint/master/parsers/assets/polymorphic_cpgs.txt")[,1]
+  # remove sites with very low variance that are unlikely to exhibit biological signal
+  site.variances <- matrixStats::rowVars(X)
+  names(site.variances) <- rownames(X)
+  low_var_sites <- names(which(site.variances < var_th))
+  exclude <- union(nonspecific_probes,union(XY_probes,union(low_var_sites,polymorphic_probes)))
+  return(X[setdiff(rownames(X),exclude),])
+}
+
+plot_qq <- function(pvals, labels, ggarrange.nrow = 1, ggarrange.ncol = 1, alpha = 0.05, experiment_wide_line = TRUE){
+  significance_th <- list(alpha/length(pvals[[1]]))
+  if(length(pvals)-1) significance_th[[2]] <- alpha/(length(pvals)*length(pvals[[1]]))
+  qqplots <- lapply(1:length(pvals), function(p){
+    df <- data.frame(pvals.obs = -log10(sort(pvals[[p]])), pvals.exp = -log10(sort((1:length(pvals[[1]]))/length(pvals[[1]]))));
+    qqplot <- ggplot(df, aes(x = pvals.exp, y = pvals.obs)) +
+      stat_binhex(geom = "point", bins=1000, size=1) +
+      geom_abline() +
+      ggtitle(labels[p]) +
+      xlab(expression(Expected~-log[10](P))) + ylab(expression(Observed~-log[10](P))) +
+      theme_bw() +
+      guides(fill="none") +
+      geom_hline(yintercept=-log10(significance_th[[1]]), linetype="dashed", color = "red", size=1)
+    if (length(significance_th)-1 & experiment_wide_line) qqplot <- qqplot + geom_hline(yintercept=-log10(significance_th[[2]]), linetype="dashed", color = "red", size=0.5)
+    return(qqplot)
+  })
+  ggarrange(plotlist = qqplots, ncol = ggarrange.ncol, nrow = ggarrange.nrow)
+}
+
+plot_scatter <- function(dfs, ggarrange.ncol, ggarrange.nrow, xlab, ylab, titles){
+  plots <- vector("list", length = length(dfs))
+  for (i in 1:length(dfs)){
+    df <- data.frame(y = dfs[[i]]$y, x = dfs[[i]]$x)
+    plots[[i]] <- ggplot(df, aes(x = x, y = y)) +
+      geom_point(alpha = 0.6) +
+      geom_smooth(method=lm) +
+      stat_cor(method = "pearson", colour = "blue") +
+      xlab(xlab) +
+      ylab(ylab) +
+      ggtitle(titles[i])
+  }
+  ggarrange(plotlist = plots, ncol = ggarrange.ncol, nrow = ggarrange.nrow)
+}
+
+
+if(FALSE){
+
+  # Set a path for storing data
+  data_path <- "./"
+
+  # Load the data
+  filenames <- prep_data(data_path)
+  for (filename in filenames) load(filename)
+
+  ## Experiment #1: detecting CD4 differential methylation with age; working under the assumption X|Y (i.e. age affects methylation levels)
+
+  # Apply the TCA model to the Hannum whole-blood data
+  tca.mdl.hannum <- tca(X = hannum$X,
+                        W = hannum$W,
+                        C1 = hannum$cov[,c("gender","age")],
+                        C2 = hannum$cov[,3:ncol(hannum$cov)])
+
+  # Extract p-values of a joint test
+  tca.mdl.hannum.pvals.joint <- tca.mdl.hannum$gammas_hat_pvals.joint[,"age"]
+
+  # Extract p-values for each cell type, under a marginal conditional test
+  tca.mdl.hannum.pvals.marg_cond <- tca.mdl.hannum$gammas_hat_pvals[,paste(colnames(hannum$W),".age",sep="")]
+
+  # qq-plots - for the p-values of the joint test, and for the p-values in CD4, under a marginal conditional test
+  plot_qq(list(tca.mdl.hannum.pvals.joint, tca.mdl.hannum.pvals.marg_cond[,"CD4T.age"]),
+          labels = c("Joint test with age", "CD4 marginal conditional test with age"),
+          ggarrange.nrow = 1,
+          ggarrange.ncol = 2,
+          experiment_wide_line = FALSE)
+
+  # Run ReFACTor to capture more of the variation of cell-type composition
+  refactor.mdl.hannum <- refactor(X = hannum$X,
+                                  k = 6,
+                                  C = hannum$cov[,3:ncol(hannum$cov)])
+
+  # Rerun TCA, this time include the ReFACTor components as additional covariates
+  tca.mdl.hannum.2 <- tca(X = hannum$X,
+                          W = hannum$W,
+                          C1 = hannum$cov[,c("gender","age")],
+                          C2 = cbind(hannum$cov[,3:ncol(hannum$cov)],refactor.mdl.hannum$scores))
+
+  # Extract the updated p-values of a joint test
+  tca.mdl.hannum.2.pvals.joint <- tca.mdl.hannum.2$gammas_hat_pvals.joint[,"age"]
+
+  # Extract the updated marginal conditional p-values
+  tca.mdl.hannum.2.pvals.marg_cond <- tca.mdl.hannum.2$gammas_hat_pvals[,paste(colnames(hannum$W),".age",sep="")]
+
+  # qq-plots - for the p-values of the joint test, and for the p-values in CD4, under a marginal conditional test
+  plot_qq(list(tca.mdl.hannum.2.pvals.joint, tca.mdl.hannum.2.pvals.marg_cond[,"CD4T.age"]),
+          labels = c("Joint test with age", "CD4 marginal conditional test with age"),
+          ggarrange.nrow = 1,
+          ggarrange.ncol = 2,
+          experiment_wide_line = FALSE)
+
+  # Extract the hits based on the joint test
+  hits.joint <- names(which(tca.mdl.hannum.2.pvals.joint < 0.05/nrow(hannum$X)))
+
+  # Extract the hits from hits.joint where CD4 cells demonstrate the lowest p-value across all cell types
+  cd4.hits <- names(which(tca.mdl.hannum.2.pvals.marg_cond[hits.joint,"CD4T.age"] == rowMins(tca.mdl.hannum.2.pvals.marg_cond[hits.joint,])))
+
+  sprintf("Detected %d associations using a joint test, %d associations using a marginal conditional test, and %d associations in CD4 cells using a marginal conditional test.",
+          sum(tca.mdl.hannum.2.pvals.joint <= 0.05/nrow(hannum$X)),
+          sum(tca.mdl.hannum.2.pvals.marg_cond <= 0.05/(nrow(hannum$X)*nrow(hannum$W))),
+          sum(tca.mdl.hannum.2.pvals.marg_cond[,"CD4T.age"] <= 0.05/(nrow(hannum$X)*nrow(hannum$W))))
+
+  # Replicate the CD4 hits in the Liu purified CD4 data
+  cd4.hits.liu.pvals <- unlist(lapply(1:length(cd4.hits),
+                                      function(x) summary(lm(y~., data.frame(y=liu.cd4$X[cd4.hits[x],], liu.cd4$cov)))$coefficients["age","Pr(>|t|)"]))
+
+  # Plot adjusted methylation (i.e. adjusted for covariates) as a function of age in all four replicated CD4 associations
+  dfs <-  vector("list", length = 4)
+  for (i in 1:4){
+    r <- scale(residuals(lm(y~., data.frame(y=liu.cd4$X[cd4.hits[i],], liu.cd4$cov[,2:ncol(liu.cd4$cov)]))))
+    dfs[[i]] <- data.frame(x = liu.cd4$cov[,"age"], y = r)
+  }
+  plot_scatter(dfs = dfs,
+               ggarrange.ncol = 2,
+               ggarrange.nrow = 2,
+               xlab = "Age",
+               ylab = "Adjusted methylation level",
+               titles = paste("CD4 methylation in ",cd4.hits,sep=""))
+
+  # Verify that p-values are calibrated in the Liu data
+  liu.cd4.regression.pvals <- unlist(lapply(1:nrow(liu.cd4$X),
+                                            function(x) summary(lm(y~.,data.frame(y = liu.cd4$X[x,],liu.cd4$cov)))$coefficients["age","Pr(>|t|)"]))
+  plot_qq(list(liu.cd4.regression.pvals), labels = "Linear regression (sorted CD4)")
+
+
+
+
+
+  ## Experiment #2: detecting differential methylation with infant arousal in granulocytes; working under the assumption Y|X (i.e. methylation levels affect or mediate components affecting infant arousal).
+
+  # Apply the TCA model to the Paquette data
+  tca.mdl.paquette <- tca(X = paquette$X,
+                          W = paquette$W,
+                          C1 = paquette$cov[,c("gender","gestational_age")],
+                          C2 = paquette$cov[,4:ncol(paquette$cov)],
+                          constrain_mu = TRUE)
+
+  # Run tcareg with a joint test and extract p-values; generate a qq-plot
+  C3_names <- c("gender","gestational_age","batch1","batch2")
+  tcareg.mdl.paquette.joint <- tcareg(X = paquette$X,
+                             tca.mdl = tca.mdl.paquette,
+                             y = paquette$cov[,"arousal",drop=F],
+                             C3 = paquette$cov[,C3_names],
+                             test = "joint")
+  plot_qq(list(tcareg.mdl.paquette.joint$pvals), labels = "Joint test with infant arousal")
+
+  # Since there is an inflation in the qq-plot we try to re-estimate the cell-type proportions
+  # First, run TCA using only the reference sites for getting a new estimate of W
+  tca.mdl.paquette.refit_W <- tca(X = paquette$X.ref_cpgs,
+                                  W = paquette$W,
+                                  C1 = paquette$cov[,c("gender","gestational_age")],
+                                  C2 = paquette$cov[,4:ncol(paquette$cov)],
+                                  constrain_mu = TRUE,
+                                  refit_W = TRUE,
+                                  refit_W.features = rownames(paquette$X.ref_cpgs))
+  # Use the updated estimate of W in a new execution of TCA on the data
+  tca.mdl.paquette.2 <- tca(X = paquette$X,
+                            W = tca.mdl.paquette.refit_W$W,
+                            C1 = paquette$cov[,c("gender","gestational_age")],
+                            C2 = paquette$cov[,4:ncol(paquette$cov)],
+                            constrain_mu = TRUE)
+
+  # Run tcareg with a joint test and extract p-values; generate a qq-plot
+  tcareg.mdl.paquette.joint.2 <- tcareg(X = paquette$X,
+                               tca.mdl = tca.mdl.paquette.2,
+                               y = paquette$cov[,"arousal",drop=F],
+                               C3 = paquette$cov[,C3_names],
+                               test = "joint")
+  plot_qq(list(tcareg.mdl.paquette.joint.2$pvals), labels = "Joint test with infant arousal")
+
+  # Run tcareg with a marginal conditional test; generate qq plots
+  tcareg.mdl.paquette.2.marg_cond <- tcareg(X = paquette$X,
+                                         tca.mdl = tca.mdl.paquette.2,
+                                         y = paquette$cov[,"arousal",drop=F],
+                                         C3 = paquette$cov[,C3_names],
+                                         test = "marginal_conditional")
+  plot_qq(split(tcareg.mdl.paquette.2.marg_cond$pvals,rep(1:ncol(paquette$W), each = nrow(paquette$X))),
+          labels = paste(colnames(paquette$W)," marginal conditional with arousal", sep=""),
+          ggarrange.nrow = 2,
+          ggarrange.ncol = 2)
+
+  # Exctract the hit we got from the joint test
+  hit.joint <- rownames(paquette$X)[which(tcareg.mdl.paquette.joint.2$pvals < 0.05/nrow(paquette$X))]
+
+  # The p-values of the marginal conditional test in the hit we found with the joint test suggest that the association is in granulocytes
+  tcareg.mdl.paquette.2.marg_cond$pvals[hit.joint,]
+
+  # Extract from the tca model the part that is relevant for the hit found
+  tcasub.mdl.paquette.2 <- tcasub(tca.mdl = tca.mdl.paquette.2,
+                                  features = hit.joint)
+
+  # Calculate cell-type-specific methylation for the samples in the detected CpG
+  tensor.mdl.hit <- tensor(tca.mdl = tcasub.mdl.paquette.2,
+                           X = paquette$X[hit.joint,,drop=F])
+
+  # Plot the estimated granulocyte-specific methylation with arasual; adjust methylation levels to covariates
+  # Compare with using the bulk data
+  r.gran <- scale(residuals(lm(y~., data.frame(y=tensor.mdl.hit[[2]][1,], paquette$cov[,setdiff(1:ncol(paquette$cov),3)]))))
+  df.gran <- data.frame(y = r.gran, x = paquette$cov[,"arousal"])
+  # The bulk data should be further adjusted for cell-type composition (i.e. tca.mdl.paquette.2$W)
+  r.bulk <- scale(residuals(lm(y~., data.frame(y=paquette$X[hit.joint,], tca.mdl.paquette.2$W, paquette$cov[,setdiff(1:ncol(paquette$cov),3)]))))
+  df.bulk <- data.frame(y = r.bulk, x = paquette$cov[,"arousal"])
+  plot_scatter(dfs = list(df.bulk, df.gran),
+               ggarrange.ncol = 2,
+               ggarrange.nrow = 1,
+               xlab = "Infant arousal",
+               ylab = "Adjusted methylation level",
+               titles = paste(c("Whole-blood methylation in ", "Granulocyte methylation in "),hit.joint,sep=""))
+
+}