Skip to content

Commit 19a5182

Browse files
authored
Merge pull request #183 from MangiolaLaboratory/improve-boxplot
Improve boxplot
2 parents f5c8f18 + 51408b3 commit 19a5182

12 files changed

+239
-184
lines changed

R/methods.R

+6-8
Original file line numberDiff line numberDiff line change
@@ -1877,7 +1877,8 @@ sccomp_boxplot = function(
18771877

18781878
pivot_wider(names_from = parameter, values_from = c(contains("c_"), contains("v_"))) |>
18791879
unnest(count_data) |>
1880-
with_groups(!!.sample, ~ mutate(.x, proportion = (!!.count)/sum(!!.count)) )
1880+
with_groups(!!.sample, ~ mutate(.x, proportion = (!!.count)/sum(!!.count)) ) |>
1881+
mutate(is_zero = proportion==0)
18811882

18821883
if(remove_unwanted_effects){
18831884
.data_adjusted =
@@ -1890,7 +1891,8 @@ sccomp_boxplot = function(
18901891
select(-proportion) |>
18911892
left_join(.data_adjusted, by = join_by(!!.cell_group, !!.sample))
18921893
}
1893-
1894+
else
1895+
message( "sccomp says: When visualising proportions, especially for complex models, consider setting `remove_unwanted_effects=TRUE`. This will adjust the proportions, preserving only the observed effect.")
18941896

18951897
# If I don't have outliers add them
18961898
if(!"outlier" %in% colnames(data_proportion)) data_proportion = data_proportion |> mutate(outlier = FALSE)
@@ -2013,13 +2015,9 @@ else {
20132015
# If discrete
20142016
else
20152017
my_plot =
2016-
plot_boxplot(
2018+
sccomp_boxplot(
20172019
.data = x,
2018-
data_proportion = data_proportion,
2019-
factor_of_interest = .x,
2020-
.cell_group = !!.cell_group,
2021-
.sample = !!.sample,
2022-
my_theme = multipanel_theme,
2020+
factor = .x,
20232021
significance_threshold = significance_threshold
20242022
)
20252023

R/utilities.R

+9-6
Original file line numberDiff line numberDiff line change
@@ -2028,7 +2028,10 @@ plot_2D_intervals = function(
20282028
#' @noRd
20292029
plot_boxplot = function(
20302030
.data, data_proportion, factor_of_interest, .cell_group,
2031-
.sample, significance_threshold = 0.05, my_theme, remove_unwanted_effects = FALSE
2031+
.sample,
2032+
significance_threshold = 0.05,
2033+
my_theme,
2034+
remove_unwanted_effects = FALSE
20322035
){
20332036

20342037
# Define the variables as NULL to avoid CRAN NOTES
@@ -2123,15 +2126,15 @@ plot_boxplot = function(
21232126
simulated_proportion =
21242127
.data |>
21252128
sccomp_replicate(formula_composition = formula_composition, number_of_draws = 100) |>
2126-
left_join(data_proportion %>% distinct(!!as.symbol(factor_of_interest), !!.sample, !!.cell_group))
2129+
left_join(data_proportion %>% distinct(!!as.symbol(factor_of_interest), !!.sample, !!.cell_group, is_zero))
21272130

21282131
my_boxplot = my_boxplot +
21292132

21302133
# Add boxplot for simulated proportions
21312134
stat_summary(
21322135
aes(!!as.symbol(factor_of_interest), (generated_proportions)),
21332136
fun.data = calc_boxplot_stat, geom="boxplot",
2134-
outlier.shape = NA, outlier.color = NA,outlier.size = 0,
2137+
outlier.shape = NA, outlier.color = NA, outlier.size = 0,
21352138
fatten = 0.5, lwd=0.2,
21362139
data =
21372140
simulated_proportion %>%
@@ -2180,7 +2183,7 @@ plot_boxplot = function(
21802183

21812184
# Add jittered points for individual data
21822185
geom_jitter(
2183-
aes(!!as.symbol(factor_of_interest), proportion, shape=outlier, color=outlier, group=!!as.symbol(factor_of_interest)),
2186+
aes(!!as.symbol(factor_of_interest), proportion, shape=is_zero, color=outlier, group=!!as.symbol(factor_of_interest)),
21842187
data = data_proportion,
21852188
position=position_jitterdodge(jitter.height = 0, jitter.width = 0.2),
21862189
size = 0.5
@@ -2193,13 +2196,13 @@ plot_boxplot = function(
21932196
nrow = 4
21942197
) +
21952198
scale_color_manual(values = c("black", "#e11f28")) +
2199+
scale_shape_manual(values = c(16, 21)) +
21962200
scale_y_continuous(trans=S_sqrt_trans(), labels = dropLeadingZero) +
21972201
scale_fill_discrete(na.value = "white") +
21982202
xlab("Biological condition") +
21992203
ylab("Cell-group proportion") +
2200-
guides(color="none", alpha="none", size="none") +
2204+
guides( alpha="none", size="none") +
22012205
labs(fill="Significant difference") +
2202-
ggtitle("Note: Be careful judging significance (or outliers) visually for lowly abundant cell groups. \nVisualising proportion hides the uncertainty characteristic of count data, that a count-based statistical model can estimate.") +
22032206
my_theme +
22042207
theme(axis.text.x = element_text(angle=20, hjust = 1), title = element_text(size = 3))
22052208
}

README.md

+210-170
Large diffs are not rendered by default.

inst/figures/unnamed-chunk-12-1.png

1.5 KB
Loading

inst/figures/unnamed-chunk-13-1.png

59.7 KB
Loading

inst/figures/unnamed-chunk-14-1.png

15.5 KB
Loading

inst/figures/unnamed-chunk-15-1.png

-3.93 KB
Loading

inst/figures/unnamed-chunk-27-1.png

30.5 KB
Loading

inst/figures/unnamed-chunk-28-1.png

-38.8 KB
Loading

inst/figures/unnamed-chunk-29-1.png

79.6 KB
Loading

man/fragments/intro.Rmd

+7
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,13 @@ sccomp_result |>
170170
sccomp_boxplot(factor = "type")
171171
```
172172

173+
You can plot proportions adjusted for unwanted effects. This is helpful especially for complex models, where multiple factors can significantly impact the proportions.
174+
175+
```{r, eval = instantiate::stan_cmdstan_exists()}
176+
sccomp_result |>
177+
sccomp_boxplot(factor = "type", remove_unwanted_effects = TRUE)
178+
```
179+
173180
A plot of estimates of differential composition (c_) on the x-axis and differential variability (v_) on the y-axis. The error bars represent 95% credible intervals. The dashed lines represent the minimal effect that the hypothesis test is based on. An effect is labelled as significant if it exceeds the minimal effect according to the 95% credible interval. Facets represent the covariates in the model.
174181

175182
```{r, eval = instantiate::stan_cmdstan_exists()}

vignettes/introduction.Rmd

+7
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,13 @@ sccomp_result |>
219219
sccomp_boxplot(factor = "type")
220220
```
221221

222+
You can plot proportions adjusted for unwanted effects. This is helpful especially for complex models, where multiple factors can significantly impact the proportions.
223+
224+
```{r, eval = instantiate::stan_cmdstan_exists()}
225+
sccomp_result |>
226+
sccomp_boxplot(factor = "type", remove_unwanted_effects = TRUE)
227+
```
228+
222229
A plot of estimates of differential composition (c_) on the x-axis and differential variability (v_) on the y-axis. The error bars represent 95% credible intervals. The dashed lines represent the minimal effect that the hypothesis test is based on. An effect is labelled as significant if it exceeds the minimal effect according to the 95% credible interval. Facets represent the covariates in the model.
223230

224231
```{r, eval = instantiate::stan_cmdstan_exists()}

0 commit comments

Comments
 (0)