Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

9 significant figures #182

Merged
merged 9 commits into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: sccomp
Title: Tests differences in cell-type proportion for single-cell data, robust to outliers
Version: 1.99.16
Version: 1.99.17
Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
role = c("aut", "cre"))
)
Expand Down
10 changes: 8 additions & 2 deletions R/functions_multi_beta_binomial.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ sccomp_glm_data_frame_raw = function(.data,
cores = 4,
mcmc_seed = sample(1e5, 1),
max_sampling_iterations = 20000,
pass_fit = TRUE , ...) {
pass_fit = TRUE,
sig_figs = 9,
...) {

# See https://community.rstudio.com/t/how-to-make-complete-nesting-work-with-quosures-and-tidyeval/16473
# See https://github.com/tidyverse/tidyr/issues/506
Expand Down Expand Up @@ -99,7 +101,9 @@ sccomp_glm_data_frame_raw = function(.data,
output_directory = output_directory,
mcmc_seed = mcmc_seed,
max_sampling_iterations = max_sampling_iterations,
pass_fit = pass_fit, ...
pass_fit = pass_fit,
sig_figs = sig_figs,
...
)
}

Expand Down Expand Up @@ -130,6 +134,7 @@ sccomp_glm_data_frame_counts = function(.data,
mcmc_seed = sample(1e5, 1),
max_sampling_iterations = 20000,
pass_fit = TRUE,
sig_figs = 9,
...) {

# Prepare column same enquo
Expand Down Expand Up @@ -306,6 +311,7 @@ sccomp_glm_data_frame_counts = function(.data,
seed = mcmc_seed,
max_sampling_iterations = max_sampling_iterations,
pars = c("beta", "alpha", "prec_coeff","prec_sd", "alpha_normalised", "random_effect", "random_effect_2", "log_lik"),
sig_figs = sig_figs,
...
)

Expand Down
64 changes: 45 additions & 19 deletions R/methods.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


#' Main Function for SCCOMP Estimate
#'
#' @description
Expand Down Expand Up @@ -45,9 +43,10 @@
#' @param mcmc_seed An integer seed for MCMC reproducibility.
#' @param max_sampling_iterations Integer to limit the maximum number of iterations for large datasets.
#' @param pass_fit Logical, whether to include the Stan fit as an attribute in the output.
#' @param .count **DEPRECATED**. Use `.abundance` instead.
#' @param approximate_posterior_inference **DEPRECATED**. Use `inference_method` instead.
#' @param variational_inference **DEPRECATED**. Use `inference_method` instead.
#' @param sig_figs Number of significant figures to use for Stan model output. Default is 9.
#' @param .count DEPRECATED. Use .abundance instead.
#' @param approximate_posterior_inference DEPRECATED. Use inference_method instead.
#' @param variational_inference DEPRECATED. Use inference_method instead.
#' @param ... Additional arguments passed to the `cmdstanr::sample` function.
#'
#' @return A tibble (`tbl`) with the following columns:
Expand Down Expand Up @@ -136,6 +135,7 @@ sccomp_estimate <- function(.data,
mcmc_seed = sample(1e5, 1),
max_sampling_iterations = 20000,
pass_fit = TRUE,
sig_figs = 9,
...,

# DEPRECATED
Expand Down Expand Up @@ -187,6 +187,7 @@ sccomp_estimate.Seurat <- function(.data,
mcmc_seed = sample(1e5, 1),
max_sampling_iterations = 20000,
pass_fit = TRUE,
sig_figs = 9,
...,

# DEPRECATED
Expand Down Expand Up @@ -242,7 +243,9 @@ sccomp_estimate.Seurat <- function(.data,
use_data = use_data,
mcmc_seed = mcmc_seed,
max_sampling_iterations = max_sampling_iterations,
pass_fit = pass_fit, ...
pass_fit = pass_fit,
sig_figs = sig_figs,
...
)
}

Expand Down Expand Up @@ -275,6 +278,7 @@ sccomp_estimate.SingleCellExperiment <- function(.data,
mcmc_seed = sample(1e5, 1),
max_sampling_iterations = 20000,
pass_fit = TRUE,
sig_figs = 9,
...,

# DEPRECATED
Expand Down Expand Up @@ -331,7 +335,9 @@ sccomp_estimate.SingleCellExperiment <- function(.data,
use_data = use_data,
mcmc_seed = mcmc_seed,
max_sampling_iterations = max_sampling_iterations,
pass_fit = pass_fit, ...
pass_fit = pass_fit,
sig_figs = sig_figs,
...
)
}

Expand Down Expand Up @@ -408,7 +414,9 @@ sccomp_estimate.DFrame <- function(.data,
use_data = use_data,
mcmc_seed = mcmc_seed,
max_sampling_iterations = max_sampling_iterations,
pass_fit = pass_fit, ...
pass_fit = pass_fit,
sig_figs = 9,
...
)
}

Expand Down Expand Up @@ -442,6 +450,7 @@ sccomp_estimate.data.frame <- function(.data,
mcmc_seed = sample(1e5, 1),
max_sampling_iterations = 20000,
pass_fit = TRUE,
sig_figs = 9,
...,

# DEPRECATED
Expand Down Expand Up @@ -500,7 +509,9 @@ sccomp_estimate.data.frame <- function(.data,
use_data = use_data,
mcmc_seed = mcmc_seed,
max_sampling_iterations = max_sampling_iterations,
pass_fit = pass_fit, ...
pass_fit = pass_fit,
sig_figs = sig_figs,
...
)

else
Expand All @@ -527,7 +538,9 @@ sccomp_estimate.data.frame <- function(.data,
use_data = use_data,
mcmc_seed = mcmc_seed,
max_sampling_iterations = max_sampling_iterations,
pass_fit = pass_fit, ...
pass_fit = pass_fit,
sig_figs = sig_figs,
...
)

message("sccomp says: to do hypothesis testing run `sccomp_test()`,
Expand Down Expand Up @@ -562,8 +575,9 @@ sccomp_estimate.data.frame <- function(.data,
#' @param mcmc_seed Integer, used for Markov-chain Monte Carlo reproducibility. By default, a random number is sampled from 1 to 999999.
#' @param max_sampling_iterations Integer, limits the maximum number of iterations in case a large dataset is used, to limit computation time.
#' @param enable_loo Logical, whether to enable model comparison using the R package LOO. This is useful for comparing fits between models, similar to ANOVA.
#' @param sig_figs Number of significant figures to use for Stan model output. Default is 9.
#' @param approximate_posterior_inference DEPRECATED, use the `variational_inference` argument.
#' @param variational_inference Logical, whether to use variational Bayes for posterior inference. It is faster and convenient. Setting this argument to `FALSE` runs full Bayesian (Hamiltonian Monte Carlo) inference, which is slower but the gold standard.
#' @param variational_inference DEPRECATED Logical, whether to use variational Bayes for posterior inference. It is faster and convenient. Setting this argument to `FALSE` runs full Bayesian (Hamiltonian Monte Carlo) inference, which is slower but the gold standard.
#' @param ... Additional arguments passed to the `cmdstanr::sample` function.
#'
#' @return A tibble (`tbl`), with the following columns:
Expand Down Expand Up @@ -615,6 +629,7 @@ sccomp_remove_outliers <- function(.estimate,
mcmc_seed = sample(1e5, 1),
max_sampling_iterations = 20000,
enable_loo = FALSE,
sig_figs = 9,

# DEPRECATED
approximate_posterior_inference = NULL,
Expand Down Expand Up @@ -645,6 +660,7 @@ sccomp_remove_outliers.sccomp_tbl = function(.estimate,
mcmc_seed = sample(1e5, 1),
max_sampling_iterations = 20000,
enable_loo = FALSE,
sig_figs = 9,

# DEPRECATED
approximate_posterior_inference = NULL,
Expand Down Expand Up @@ -733,14 +749,17 @@ sccomp_remove_outliers.sccomp_tbl = function(.estimate,

create_intercept = FALSE
)),

parallel_chains = ifelse(
inference_method %in% c("variational", "pathfinder") |
attr(.estimate , "fit") |> is("CmdStanPathfinder"),
1,
attr(.estimate , "fit")$num_chains()
),
threads_per_chain = cores
threads_per_chain = cores,
sig_figs = sig_figs


)

# Free memory
Expand Down Expand Up @@ -824,11 +843,13 @@ sccomp_remove_outliers.sccomp_tbl = function(.estimate,
verbose = verbose,
seed = mcmc_seed,
max_sampling_iterations = max_sampling_iterations,
pars = c("beta", "alpha", "prec_coeff", "prec_sd", "alpha_normalised", "random_effect", "random_effect_2"),
pars = c("beta", "alpha", "prec_coeff", "prec_sd", "alpha_normalised", "random_effect", "random_effect_2"),
sig_figs = sig_figs,
...
)

rng2 = mod_rng |> sample_safe(

rng2 = mod_rng |> sample_safe(
generate_quantities_fx,
fit2$draws(format = "matrix"),

Expand Down Expand Up @@ -856,8 +877,10 @@ sccomp_remove_outliers.sccomp_tbl = function(.estimate,
create_intercept = FALSE

)),

parallel_chains = ifelse(inference_method %in% c("variational", "pathfinder"), 1, fit2$num_chains()),
threads_per_chain = cores
threads_per_chain = cores,
sig_figs = sig_figs

)

Expand Down Expand Up @@ -1657,6 +1680,7 @@ sccomp_remove_unwanted_variation.sccomp_tbl = function(.data,
#' @param number_of_draws An integer. How may copies of the data you want to draw from the model joint posterior distribution.
#' @param mcmc_seed An integer. Used for Markov-chain Monte Carlo reproducibility. By default a random number is sampled from 1 to 999999. This itself can be controlled by set.seed()#' @param cores Integer, the number of cores to be used for parallel calculations.
#' @param cores Integer, the number of cores to be used for parallel calculations.
#' @param sig_figs Number of significant figures to use for Stan model output. Default is 9.
#'
#' @return A tibble (`tbl`) with the following columns:
#' \itemize{
Expand Down Expand Up @@ -1708,7 +1732,8 @@ simulate_data <- function(.data,
variability_multiplier = 5,
number_of_draws = 1,
mcmc_seed = sample(1e5, 1),
cores = detectCores()) {
cores = detectCores(),
sig_figs = 9) {

# Run the function
check_and_install_cmdstanr()
Expand All @@ -1734,7 +1759,8 @@ simulate_data.tbl = function(.data,
variability_multiplier = 5,
number_of_draws = 1,
mcmc_seed = sample(1e5, 1),
cores = detectCores()){
cores = detectCores(),
sig_figs = 9) {


.sample = enquo(.sample)
Expand Down Expand Up @@ -1781,8 +1807,8 @@ simulate_data.tbl = function(.data,
data = data_for_model |> c(original_data) |> c(list(variability_multiplier = variability_multiplier)),
seed = mcmc_seed,
parallel_chains = attr(.estimate_object , "fit")$metadata()$threads_per_chain,
threads_per_chain = cores

threads_per_chain = cores,
sig_figs = sig_figs
)

parsed_fit =
Expand Down
10 changes: 9 additions & 1 deletion R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ as_matrix <- function(tbl, rownames = NULL) {
#' @param additional_parameters_to_save A character vector
#' @param data A data frame
#' @param seed An integer
#' @param sig_figs Number of significant figures to use for Stan model output. Default is 9.
#' @param ... List of paramaters for vb function of Stan
#'
#' @return A Stan fit object
Expand All @@ -267,6 +268,7 @@ vb_iterative = function(model,
cores = 1,
verbose = TRUE,
psis_resample = FALSE,
sig_figs = 9,
...) {
res = NULL
i = 0
Expand All @@ -289,6 +291,7 @@ vb_iterative = function(model,
history_size = 100,
show_messages = verbose,
psis_resample = psis_resample,
sig_figs = sig_figs,
...
)

Expand All @@ -305,6 +308,7 @@ vb_iterative = function(model,
init = init,
show_messages = verbose,
threads = cores,
sig_figs = sig_figs,
...
)

Expand Down Expand Up @@ -459,6 +463,7 @@ fit_model = function(
warmup_samples = 300, approximate_posterior_inference = NULL, inference_method, verbose = TRUE,
seed , pars = c("beta", "alpha", "prec_coeff","prec_sd"), output_samples = NULL, chains=NULL, max_sampling_iterations = 20000,
output_directory = "sccomp_draws_files",
sig_figs = 9,
...
)
{
Expand Down Expand Up @@ -563,6 +568,7 @@ fit_model = function(
init = init,
output_dir = output_directory,
show_messages = verbose,
sig_figs = sig_figs,
...
) |>
suppressWarnings()
Expand Down Expand Up @@ -595,7 +601,9 @@ fit_model = function(
inference_method = inference_method,
cores = cores,
psis_resample = FALSE,
verbose = verbose
verbose = verbose,
sig_figs = sig_figs,
...
) %>%
suppressWarnings()

Expand Down
27 changes: 24 additions & 3 deletions man/plot_1D_intervals.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions man/sccomp_estimate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading