MangiolaLaboratory · stemangiola · Mar 13, 2025 · Feb 19, 2025 · Mar 13, 2025 · Mar 13, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: sccomp
 Title: Tests differences in cell-type proportion for single-cell data, robust to outliers
-Version: 1.99.16
+Version: 1.99.17
 Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
                   role = c("aut", "cre"))
                   )

diff --git a/R/functions_multi_beta_binomial.R b/R/functions_multi_beta_binomial.R
@@ -26,7 +26,9 @@ sccomp_glm_data_frame_raw = function(.data,
                                      cores = 4,
                                      mcmc_seed = sample(1e5, 1),
                                      max_sampling_iterations = 20000,
-                                     pass_fit = TRUE , ...) {
+                                     pass_fit = TRUE,
+                                     sig_figs = 9,
+                                     ...) {
 
   # See https://community.rstudio.com/t/how-to-make-complete-nesting-work-with-quosures-and-tidyeval/16473
   # See https://github.com/tidyverse/tidyr/issues/506
@@ -99,7 +101,9 @@ sccomp_glm_data_frame_raw = function(.data,
       output_directory = output_directory,
       mcmc_seed = mcmc_seed,
       max_sampling_iterations = max_sampling_iterations,
-      pass_fit = pass_fit, ...
+      pass_fit = pass_fit,
+      sig_figs = sig_figs,
+      ...
     )
 }
 
@@ -130,6 +134,7 @@ sccomp_glm_data_frame_counts = function(.data,
                                         mcmc_seed = sample(1e5, 1),
                                         max_sampling_iterations = 20000,
                                         pass_fit = TRUE,
+                                        sig_figs = 9,
                                         ...) {
 
   # Prepare column same enquo
@@ -306,6 +311,7 @@ sccomp_glm_data_frame_counts = function(.data,
       seed = mcmc_seed,
       max_sampling_iterations = max_sampling_iterations,
       pars = c("beta", "alpha", "prec_coeff","prec_sd",   "alpha_normalised", "random_effect", "random_effect_2", "log_lik"),
+      sig_figs = sig_figs,
       ...
     )
 

diff --git a/R/methods.R b/R/methods.R
@@ -1,5 +1,3 @@
-
-
 #' Main Function for SCCOMP Estimate
 #'
 #' @description
@@ -45,9 +43,10 @@
 #' @param mcmc_seed An integer seed for MCMC reproducibility.
 #' @param max_sampling_iterations Integer to limit the maximum number of iterations for large datasets.
 #' @param pass_fit Logical, whether to include the Stan fit as an attribute in the output.
-#' @param .count **DEPRECATED**. Use `.abundance` instead.
-#' @param approximate_posterior_inference **DEPRECATED**. Use `inference_method` instead.
-#' @param variational_inference **DEPRECATED**. Use `inference_method` instead.
+#' @param sig_figs Number of significant figures to use for Stan model output. Default is 9.
+#' @param .count DEPRECATED. Use .abundance instead.
+#' @param approximate_posterior_inference DEPRECATED. Use inference_method instead.
+#' @param variational_inference DEPRECATED. Use inference_method instead.
 #' @param ... Additional arguments passed to the `cmdstanr::sample` function.
 #'
 #' @return A tibble (`tbl`) with the following columns:
@@ -136,6 +135,7 @@ sccomp_estimate <- function(.data,
                             mcmc_seed = sample(1e5, 1),
                             max_sampling_iterations = 20000,
                             pass_fit = TRUE,
+                            sig_figs = 9,
                             ...,
 
                             # DEPRECATED
@@ -187,6 +187,7 @@ sccomp_estimate.Seurat <- function(.data,
                                    mcmc_seed = sample(1e5, 1),
                                    max_sampling_iterations = 20000,
                                    pass_fit = TRUE,
+                                   sig_figs = 9,
                                    ...,
 
                                    # DEPRECATED
@@ -242,7 +243,9 @@ sccomp_estimate.Seurat <- function(.data,
       use_data = use_data,
       mcmc_seed = mcmc_seed,
       max_sampling_iterations = max_sampling_iterations,
-      pass_fit = pass_fit, ...
+      pass_fit = pass_fit,
+      sig_figs = sig_figs,
+      ...
     )
 }
 
@@ -275,6 +278,7 @@ sccomp_estimate.SingleCellExperiment <- function(.data,
                                                  mcmc_seed = sample(1e5, 1),
                                                  max_sampling_iterations = 20000,
                                                  pass_fit = TRUE,
+                                                 sig_figs = 9,
                                                  ...,
 
                                                  # DEPRECATED
@@ -331,7 +335,9 @@ sccomp_estimate.SingleCellExperiment <- function(.data,
       use_data = use_data,
       mcmc_seed = mcmc_seed,
       max_sampling_iterations = max_sampling_iterations,
-      pass_fit = pass_fit, ...
+      pass_fit = pass_fit,
+      sig_figs = sig_figs,
+      ...
     )
 }
 
@@ -408,7 +414,9 @@ sccomp_estimate.DFrame <- function(.data,
       use_data = use_data,
       mcmc_seed = mcmc_seed,
       max_sampling_iterations = max_sampling_iterations,
-      pass_fit = pass_fit, ...
+      pass_fit = pass_fit,
+      sig_figs = 9,
+      ...
     )
 }
 
@@ -442,6 +450,7 @@ sccomp_estimate.data.frame <- function(.data,
                                        mcmc_seed = sample(1e5, 1),
                                        max_sampling_iterations = 20000,
                                        pass_fit = TRUE,
+                                       sig_figs = 9,
                                        ...,
 
                                        # DEPRECATED
@@ -500,7 +509,9 @@ sccomp_estimate.data.frame <- function(.data,
       use_data = use_data,
       mcmc_seed = mcmc_seed,
       max_sampling_iterations = max_sampling_iterations,
-      pass_fit = pass_fit, ...
+      pass_fit = pass_fit,
+      sig_figs = sig_figs,
+      ...
     )
 
   else 
@@ -527,7 +538,9 @@ sccomp_estimate.data.frame <- function(.data,
       use_data = use_data,
       mcmc_seed = mcmc_seed,
       max_sampling_iterations = max_sampling_iterations,
-      pass_fit = pass_fit, ...
+      pass_fit = pass_fit,
+      sig_figs = sig_figs,
+      ...
     )
 
   message("sccomp says: to do hypothesis testing run `sccomp_test()`,
@@ -562,8 +575,9 @@ sccomp_estimate.data.frame <- function(.data,
 #' @param mcmc_seed Integer, used for Markov-chain Monte Carlo reproducibility. By default, a random number is sampled from 1 to 999999.
 #' @param max_sampling_iterations Integer, limits the maximum number of iterations in case a large dataset is used, to limit computation time.
 #' @param enable_loo Logical, whether to enable model comparison using the R package LOO. This is useful for comparing fits between models, similar to ANOVA.
+#' @param sig_figs Number of significant figures to use for Stan model output. Default is 9.
 #' @param approximate_posterior_inference DEPRECATED, use the `variational_inference` argument.
-#' @param variational_inference Logical, whether to use variational Bayes for posterior inference. It is faster and convenient. Setting this argument to `FALSE` runs full Bayesian (Hamiltonian Monte Carlo) inference, which is slower but the gold standard.
+#' @param variational_inference DEPRECATED Logical, whether to use variational Bayes for posterior inference. It is faster and convenient. Setting this argument to `FALSE` runs full Bayesian (Hamiltonian Monte Carlo) inference, which is slower but the gold standard.
 #' @param ... Additional arguments passed to the `cmdstanr::sample` function.
 #' 
 #' @return A tibble (`tbl`), with the following columns:
@@ -615,6 +629,7 @@ sccomp_remove_outliers <- function(.estimate,
                                    mcmc_seed = sample(1e5, 1),
                                    max_sampling_iterations = 20000,
                                    enable_loo = FALSE,
+                                   sig_figs = 9,
 
                                    # DEPRECATED
                                    approximate_posterior_inference = NULL,
@@ -645,6 +660,7 @@ sccomp_remove_outliers.sccomp_tbl = function(.estimate,
                                              mcmc_seed = sample(1e5, 1),
                                              max_sampling_iterations = 20000,
                                              enable_loo = FALSE,
+                                             sig_figs = 9,
 
                                              # DEPRECATED
                                              approximate_posterior_inference = NULL,
@@ -733,14 +749,17 @@ sccomp_remove_outliers.sccomp_tbl = function(.estimate,
 
         create_intercept = FALSE
       )),
+
     parallel_chains = ifelse(
       inference_method %in% c("variational", "pathfinder") | 
         attr(.estimate , "fit") |> is("CmdStanPathfinder"),
         1, 
        attr(.estimate , "fit")$num_chains()
       ), 
-    threads_per_chain = cores
+    threads_per_chain = cores,
+    sig_figs = sig_figs
 
+
   )
 
   # Free memory
@@ -824,11 +843,13 @@ sccomp_remove_outliers.sccomp_tbl = function(.estimate,
       verbose = verbose,
       seed = mcmc_seed,
       max_sampling_iterations = max_sampling_iterations,
-      pars = c("beta", "alpha", "prec_coeff", "prec_sd",   "alpha_normalised", "random_effect", "random_effect_2"),
+      pars = c("beta", "alpha", "prec_coeff", "prec_sd", "alpha_normalised", "random_effect", "random_effect_2"),
+      sig_figs = sig_figs,
       ...
     )
+
+  rng2 = mod_rng |> sample_safe(
 
-  rng2 =  mod_rng |> sample_safe(
     generate_quantities_fx,
     fit2$draws(format = "matrix"),
 
@@ -856,8 +877,10 @@ sccomp_remove_outliers.sccomp_tbl = function(.estimate,
       create_intercept = FALSE
 
     )),
+
     parallel_chains = ifelse(inference_method %in% c("variational", "pathfinder"), 1, fit2$num_chains()), 
-    threads_per_chain = cores
+    threads_per_chain = cores,
+    sig_figs = sig_figs
 
   )
 
@@ -1657,6 +1680,7 @@ sccomp_remove_unwanted_variation.sccomp_tbl = function(.data,
 #' @param number_of_draws An integer. How may copies of the data you want to draw from the model joint posterior distribution.
 #' @param mcmc_seed An integer. Used for Markov-chain Monte Carlo reproducibility. By default a random number is sampled from 1 to 999999. This itself can be controlled by set.seed()#' @param cores Integer, the number of cores to be used for parallel calculations.
 #' @param cores Integer, the number of cores to be used for parallel calculations.
+#' @param sig_figs Number of significant figures to use for Stan model output. Default is 9.
 #' 
 #' @return A tibble (`tbl`) with the following columns:
 #' \itemize{
@@ -1708,7 +1732,8 @@ simulate_data <- function(.data,
                        variability_multiplier = 5,
                        number_of_draws = 1,
                        mcmc_seed = sample(1e5, 1),
-                       cores = detectCores()) {
+                       cores = detectCores(),
+                       sig_figs = 9) {
 
   # Run the function
   check_and_install_cmdstanr()
@@ -1734,7 +1759,8 @@ simulate_data.tbl = function(.data,
                                     variability_multiplier = 5,
                                     number_of_draws = 1,
                                     mcmc_seed = sample(1e5, 1),
-                             cores = detectCores()){
+                             cores = detectCores(),
+                             sig_figs = 9) {
 
 
   .sample = enquo(.sample)
@@ -1781,8 +1807,8 @@ simulate_data.tbl = function(.data,
     data = data_for_model |> c(original_data) |> c(list(variability_multiplier = variability_multiplier)),
     seed = mcmc_seed,
     parallel_chains = attr(.estimate_object , "fit")$metadata()$threads_per_chain, 
-    threads_per_chain = cores
-
+    threads_per_chain = cores,
+    sig_figs = sig_figs
   )
 
   parsed_fit =

diff --git a/R/utilities.R b/R/utilities.R
@@ -250,6 +250,7 @@ as_matrix <- function(tbl, rownames = NULL) {
 #' @param additional_parameters_to_save A character vector
 #' @param data A data frame
 #' @param seed An integer
+#' @param sig_figs Number of significant figures to use for Stan model output. Default is 9.
 #' @param ... List of paramaters for vb function of Stan
 #'
 #' @return A Stan fit object
@@ -267,6 +268,7 @@ vb_iterative = function(model,
                         cores = 1, 
                         verbose = TRUE,
                         psis_resample = FALSE,
+                        sig_figs = 9,
                         ...) {
   res = NULL
   i = 0
@@ -289,6 +291,7 @@ vb_iterative = function(model,
             history_size = 100, 
             show_messages = verbose,
             psis_resample = psis_resample,
+            sig_figs = sig_figs,
             ...
           )
 
@@ -305,6 +308,7 @@ vb_iterative = function(model,
             init = init,
             show_messages = verbose,
             threads = cores,
+            sig_figs = sig_figs,
             ...
           )
 
@@ -459,6 +463,7 @@ fit_model = function(
     warmup_samples = 300, approximate_posterior_inference = NULL, inference_method, verbose = TRUE,
     seed , pars = c("beta", "alpha", "prec_coeff","prec_sd"), output_samples = NULL, chains=NULL, max_sampling_iterations = 20000, 
     output_directory = "sccomp_draws_files",
+    sig_figs = 9,
     ...
 )
 {
@@ -563,6 +568,7 @@ fit_model = function(
         init = init,
         output_dir = output_directory,
         show_messages = verbose,
+        sig_figs = sig_figs,
         ...
       ) |> 
         suppressWarnings()
@@ -595,7 +601,9 @@ fit_model = function(
       inference_method = inference_method, 
       cores = cores,
       psis_resample = FALSE, 
-      verbose = verbose
+      verbose = verbose,
+      sig_figs = sig_figs,
+      ...
     ) %>%
       suppressWarnings()
 

diff --git a/man/plot_1D_intervals.Rd b/man/plot_1D_intervals.Rd
diff --git a/man/sccomp_estimate.Rd b/man/sccomp_estimate.Rd