Skip to content

Commit b3266e6

Browse files
authored
Merge pull request #182 from MangiolaLaboratory/increase-significant-figures
9 significant figures
2 parents e2209e2 + ee94294 commit b3266e6

8 files changed

+101
-31
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: sccomp
22
Title: Tests differences in cell-type proportion for single-cell data, robust to outliers
3-
Version: 1.99.16
3+
Version: 1.99.17
44
Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
55
role = c("aut", "cre"))
66
)

R/functions_multi_beta_binomial.R

+8-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ sccomp_glm_data_frame_raw = function(.data,
2626
cores = 4,
2727
mcmc_seed = sample(1e5, 1),
2828
max_sampling_iterations = 20000,
29-
pass_fit = TRUE , ...) {
29+
pass_fit = TRUE,
30+
sig_figs = 9,
31+
...) {
3032

3133
# See https://community.rstudio.com/t/how-to-make-complete-nesting-work-with-quosures-and-tidyeval/16473
3234
# See https://github.com/tidyverse/tidyr/issues/506
@@ -99,7 +101,9 @@ sccomp_glm_data_frame_raw = function(.data,
99101
output_directory = output_directory,
100102
mcmc_seed = mcmc_seed,
101103
max_sampling_iterations = max_sampling_iterations,
102-
pass_fit = pass_fit, ...
104+
pass_fit = pass_fit,
105+
sig_figs = sig_figs,
106+
...
103107
)
104108
}
105109

@@ -130,6 +134,7 @@ sccomp_glm_data_frame_counts = function(.data,
130134
mcmc_seed = sample(1e5, 1),
131135
max_sampling_iterations = 20000,
132136
pass_fit = TRUE,
137+
sig_figs = 9,
133138
...) {
134139

135140
# Prepare column same enquo
@@ -306,6 +311,7 @@ sccomp_glm_data_frame_counts = function(.data,
306311
seed = mcmc_seed,
307312
max_sampling_iterations = max_sampling_iterations,
308313
pars = c("beta", "alpha", "prec_coeff","prec_sd", "alpha_normalised", "random_effect", "random_effect_2", "log_lik"),
314+
sig_figs = sig_figs,
309315
...
310316
)
311317

R/methods.R

+45-19
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
2-
31
#' Main Function for SCCOMP Estimate
42
#'
53
#' @description
@@ -45,9 +43,10 @@
4543
#' @param mcmc_seed An integer seed for MCMC reproducibility.
4644
#' @param max_sampling_iterations Integer to limit the maximum number of iterations for large datasets.
4745
#' @param pass_fit Logical, whether to include the Stan fit as an attribute in the output.
48-
#' @param .count **DEPRECATED**. Use `.abundance` instead.
49-
#' @param approximate_posterior_inference **DEPRECATED**. Use `inference_method` instead.
50-
#' @param variational_inference **DEPRECATED**. Use `inference_method` instead.
46+
#' @param sig_figs Number of significant figures to use for Stan model output. Default is 9.
47+
#' @param .count DEPRECATED. Use .abundance instead.
48+
#' @param approximate_posterior_inference DEPRECATED. Use inference_method instead.
49+
#' @param variational_inference DEPRECATED. Use inference_method instead.
5150
#' @param ... Additional arguments passed to the `cmdstanr::sample` function.
5251
#'
5352
#' @return A tibble (`tbl`) with the following columns:
@@ -136,6 +135,7 @@ sccomp_estimate <- function(.data,
136135
mcmc_seed = sample(1e5, 1),
137136
max_sampling_iterations = 20000,
138137
pass_fit = TRUE,
138+
sig_figs = 9,
139139
...,
140140

141141
# DEPRECATED
@@ -187,6 +187,7 @@ sccomp_estimate.Seurat <- function(.data,
187187
mcmc_seed = sample(1e5, 1),
188188
max_sampling_iterations = 20000,
189189
pass_fit = TRUE,
190+
sig_figs = 9,
190191
...,
191192

192193
# DEPRECATED
@@ -242,7 +243,9 @@ sccomp_estimate.Seurat <- function(.data,
242243
use_data = use_data,
243244
mcmc_seed = mcmc_seed,
244245
max_sampling_iterations = max_sampling_iterations,
245-
pass_fit = pass_fit, ...
246+
pass_fit = pass_fit,
247+
sig_figs = sig_figs,
248+
...
246249
)
247250
}
248251

@@ -275,6 +278,7 @@ sccomp_estimate.SingleCellExperiment <- function(.data,
275278
mcmc_seed = sample(1e5, 1),
276279
max_sampling_iterations = 20000,
277280
pass_fit = TRUE,
281+
sig_figs = 9,
278282
...,
279283

280284
# DEPRECATED
@@ -331,7 +335,9 @@ sccomp_estimate.SingleCellExperiment <- function(.data,
331335
use_data = use_data,
332336
mcmc_seed = mcmc_seed,
333337
max_sampling_iterations = max_sampling_iterations,
334-
pass_fit = pass_fit, ...
338+
pass_fit = pass_fit,
339+
sig_figs = sig_figs,
340+
...
335341
)
336342
}
337343

@@ -408,7 +414,9 @@ sccomp_estimate.DFrame <- function(.data,
408414
use_data = use_data,
409415
mcmc_seed = mcmc_seed,
410416
max_sampling_iterations = max_sampling_iterations,
411-
pass_fit = pass_fit, ...
417+
pass_fit = pass_fit,
418+
sig_figs = 9,
419+
...
412420
)
413421
}
414422

@@ -442,6 +450,7 @@ sccomp_estimate.data.frame <- function(.data,
442450
mcmc_seed = sample(1e5, 1),
443451
max_sampling_iterations = 20000,
444452
pass_fit = TRUE,
453+
sig_figs = 9,
445454
...,
446455

447456
# DEPRECATED
@@ -500,7 +509,9 @@ sccomp_estimate.data.frame <- function(.data,
500509
use_data = use_data,
501510
mcmc_seed = mcmc_seed,
502511
max_sampling_iterations = max_sampling_iterations,
503-
pass_fit = pass_fit, ...
512+
pass_fit = pass_fit,
513+
sig_figs = sig_figs,
514+
...
504515
)
505516

506517
else
@@ -527,7 +538,9 @@ sccomp_estimate.data.frame <- function(.data,
527538
use_data = use_data,
528539
mcmc_seed = mcmc_seed,
529540
max_sampling_iterations = max_sampling_iterations,
530-
pass_fit = pass_fit, ...
541+
pass_fit = pass_fit,
542+
sig_figs = sig_figs,
543+
...
531544
)
532545

533546
message("sccomp says: to do hypothesis testing run `sccomp_test()`,
@@ -562,8 +575,9 @@ sccomp_estimate.data.frame <- function(.data,
562575
#' @param mcmc_seed Integer, used for Markov-chain Monte Carlo reproducibility. By default, a random number is sampled from 1 to 999999.
563576
#' @param max_sampling_iterations Integer, limits the maximum number of iterations in case a large dataset is used, to limit computation time.
564577
#' @param enable_loo Logical, whether to enable model comparison using the R package LOO. This is useful for comparing fits between models, similar to ANOVA.
578+
#' @param sig_figs Number of significant figures to use for Stan model output. Default is 9.
565579
#' @param approximate_posterior_inference DEPRECATED, use the `variational_inference` argument.
566-
#' @param variational_inference Logical, whether to use variational Bayes for posterior inference. It is faster and convenient. Setting this argument to `FALSE` runs full Bayesian (Hamiltonian Monte Carlo) inference, which is slower but the gold standard.
580+
#' @param variational_inference DEPRECATED Logical, whether to use variational Bayes for posterior inference. It is faster and convenient. Setting this argument to `FALSE` runs full Bayesian (Hamiltonian Monte Carlo) inference, which is slower but the gold standard.
567581
#' @param ... Additional arguments passed to the `cmdstanr::sample` function.
568582
#'
569583
#' @return A tibble (`tbl`), with the following columns:
@@ -615,6 +629,7 @@ sccomp_remove_outliers <- function(.estimate,
615629
mcmc_seed = sample(1e5, 1),
616630
max_sampling_iterations = 20000,
617631
enable_loo = FALSE,
632+
sig_figs = 9,
618633

619634
# DEPRECATED
620635
approximate_posterior_inference = NULL,
@@ -645,6 +660,7 @@ sccomp_remove_outliers.sccomp_tbl = function(.estimate,
645660
mcmc_seed = sample(1e5, 1),
646661
max_sampling_iterations = 20000,
647662
enable_loo = FALSE,
663+
sig_figs = 9,
648664

649665
# DEPRECATED
650666
approximate_posterior_inference = NULL,
@@ -733,14 +749,17 @@ sccomp_remove_outliers.sccomp_tbl = function(.estimate,
733749

734750
create_intercept = FALSE
735751
)),
752+
736753
parallel_chains = ifelse(
737754
inference_method %in% c("variational", "pathfinder") |
738755
attr(.estimate , "fit") |> is("CmdStanPathfinder"),
739756
1,
740757
attr(.estimate , "fit")$num_chains()
741758
),
742-
threads_per_chain = cores
759+
threads_per_chain = cores,
760+
sig_figs = sig_figs
743761

762+
744763
)
745764

746765
# Free memory
@@ -824,11 +843,13 @@ sccomp_remove_outliers.sccomp_tbl = function(.estimate,
824843
verbose = verbose,
825844
seed = mcmc_seed,
826845
max_sampling_iterations = max_sampling_iterations,
827-
pars = c("beta", "alpha", "prec_coeff", "prec_sd", "alpha_normalised", "random_effect", "random_effect_2"),
846+
pars = c("beta", "alpha", "prec_coeff", "prec_sd", "alpha_normalised", "random_effect", "random_effect_2"),
847+
sig_figs = sig_figs,
828848
...
829849
)
850+
851+
rng2 = mod_rng |> sample_safe(
830852

831-
rng2 = mod_rng |> sample_safe(
832853
generate_quantities_fx,
833854
fit2$draws(format = "matrix"),
834855

@@ -856,8 +877,10 @@ sccomp_remove_outliers.sccomp_tbl = function(.estimate,
856877
create_intercept = FALSE
857878

858879
)),
880+
859881
parallel_chains = ifelse(inference_method %in% c("variational", "pathfinder"), 1, fit2$num_chains()),
860-
threads_per_chain = cores
882+
threads_per_chain = cores,
883+
sig_figs = sig_figs
861884

862885
)
863886

@@ -1657,6 +1680,7 @@ sccomp_remove_unwanted_variation.sccomp_tbl = function(.data,
16571680
#' @param number_of_draws An integer. How may copies of the data you want to draw from the model joint posterior distribution.
16581681
#' @param mcmc_seed An integer. Used for Markov-chain Monte Carlo reproducibility. By default a random number is sampled from 1 to 999999. This itself can be controlled by set.seed()#' @param cores Integer, the number of cores to be used for parallel calculations.
16591682
#' @param cores Integer, the number of cores to be used for parallel calculations.
1683+
#' @param sig_figs Number of significant figures to use for Stan model output. Default is 9.
16601684
#'
16611685
#' @return A tibble (`tbl`) with the following columns:
16621686
#' \itemize{
@@ -1708,7 +1732,8 @@ simulate_data <- function(.data,
17081732
variability_multiplier = 5,
17091733
number_of_draws = 1,
17101734
mcmc_seed = sample(1e5, 1),
1711-
cores = detectCores()) {
1735+
cores = detectCores(),
1736+
sig_figs = 9) {
17121737

17131738
# Run the function
17141739
check_and_install_cmdstanr()
@@ -1734,7 +1759,8 @@ simulate_data.tbl = function(.data,
17341759
variability_multiplier = 5,
17351760
number_of_draws = 1,
17361761
mcmc_seed = sample(1e5, 1),
1737-
cores = detectCores()){
1762+
cores = detectCores(),
1763+
sig_figs = 9) {
17381764

17391765

17401766
.sample = enquo(.sample)
@@ -1781,8 +1807,8 @@ simulate_data.tbl = function(.data,
17811807
data = data_for_model |> c(original_data) |> c(list(variability_multiplier = variability_multiplier)),
17821808
seed = mcmc_seed,
17831809
parallel_chains = attr(.estimate_object , "fit")$metadata()$threads_per_chain,
1784-
threads_per_chain = cores
1785-
1810+
threads_per_chain = cores,
1811+
sig_figs = sig_figs
17861812
)
17871813

17881814
parsed_fit =

R/utilities.R

+9-1
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ as_matrix <- function(tbl, rownames = NULL) {
250250
#' @param additional_parameters_to_save A character vector
251251
#' @param data A data frame
252252
#' @param seed An integer
253+
#' @param sig_figs Number of significant figures to use for Stan model output. Default is 9.
253254
#' @param ... List of paramaters for vb function of Stan
254255
#'
255256
#' @return A Stan fit object
@@ -267,6 +268,7 @@ vb_iterative = function(model,
267268
cores = 1,
268269
verbose = TRUE,
269270
psis_resample = FALSE,
271+
sig_figs = 9,
270272
...) {
271273
res = NULL
272274
i = 0
@@ -289,6 +291,7 @@ vb_iterative = function(model,
289291
history_size = 100,
290292
show_messages = verbose,
291293
psis_resample = psis_resample,
294+
sig_figs = sig_figs,
292295
...
293296
)
294297

@@ -305,6 +308,7 @@ vb_iterative = function(model,
305308
init = init,
306309
show_messages = verbose,
307310
threads = cores,
311+
sig_figs = sig_figs,
308312
...
309313
)
310314

@@ -459,6 +463,7 @@ fit_model = function(
459463
warmup_samples = 300, approximate_posterior_inference = NULL, inference_method, verbose = TRUE,
460464
seed , pars = c("beta", "alpha", "prec_coeff","prec_sd"), output_samples = NULL, chains=NULL, max_sampling_iterations = 20000,
461465
output_directory = "sccomp_draws_files",
466+
sig_figs = 9,
462467
...
463468
)
464469
{
@@ -563,6 +568,7 @@ fit_model = function(
563568
init = init,
564569
output_dir = output_directory,
565570
show_messages = verbose,
571+
sig_figs = sig_figs,
566572
...
567573
) |>
568574
suppressWarnings()
@@ -595,7 +601,9 @@ fit_model = function(
595601
inference_method = inference_method,
596602
cores = cores,
597603
psis_resample = FALSE,
598-
verbose = verbose
604+
verbose = verbose,
605+
sig_figs = sig_figs,
606+
...
599607
) %>%
600608
suppressWarnings()
601609

man/plot_1D_intervals.Rd

+24-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/sccomp_estimate.Rd

+6-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)