Skip to content

Commit b99992c

Browse files
authored
Merge pull request #78 from stemangiola/clean-readme-vignette-from-tidy
update README and Vignette
2 parents dc27e23 + bae44bc commit b99992c

10 files changed

+330
-337
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,5 @@ dev/*pdf
1515
dev/dplyr-master/*
1616
tidySingleCellExperiment.Rproj
1717
tidySingleCellExperiment.Rproj
18+
/doc/
19+
/Meta/

README.Rmd

+79-80
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ Website: [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCell
1818

1919
Please also have a look at
2020

21+
- [tidySummarizedExperiment]((https://stemangiola.github.io/tidySummarizedExperiment/) for tidy manipulation of SummarizedExperiment objects)
2122
- [tidyseurat](https://stemangiola.github.io/tidyseurat/) for tidy manipulation of Seurat objects
2223
- [tidybulk](https://stemangiola.github.io/tidybulk/) for tidy bulk RNA-seq data analysis
23-
- [nanny](https://github.com/stemangiola/nanny) for tidy high-level data analysis and manipulation
2424
- [tidygate](https://github.com/stemangiola/tidygate) for adding custom gate information to your tibble
2525
- [tidyHeatmap](https://stemangiola.github.io/tidyHeatmap/) for heatmaps produced with tidy principles
2626

@@ -38,14 +38,13 @@ SingleCellExperiment-compatible Functions | Description
3838

3939
tidyverse Packages | Description
4040
------------ | -------------
41-
`dplyr` | All `dplyr` tibble functions (e.g. `tidySingleCellExperiment::select`)
42-
`tidyr` | All `tidyr` tibble functions (e.g. `tidySingleCellExperiment::pivot_longer`)
43-
`ggplot2` | `ggplot` (`tidySingleCellExperiment::ggplot`)
44-
`plotly` | `plot_ly` (`tidySingleCellExperiment::plot_ly`)
41+
`dplyr` | All `dplyr` tibble functions (e.g. `select`)
42+
`tidyr` | All `tidyr` tibble functions (e.g. `pivot_longer`)
43+
`ggplot2` | `ggplot` (`ggplot`)
44+
`plotly` | `plot_ly` (`plot_ly`)
4545

4646
Utilities | Description
4747
------------ | -------------
48-
`tidy` | Add `tidySingleCellExperiment` invisible layer over a SingleCellExperiment object
4948
`as_tibble` | Convert cell-wise information to a `tbl_df`
5049
`join_features` | Add feature-wise information, returns a `tbl_df`
5150
`aggregate_cells` | Aggregate cell gene-transcription abundance as pseudobulk tissue
@@ -70,15 +69,15 @@ library(SingleR)
7069
library(SingleCellSignalR)
7170
7271
# Tidyverse-compatible packages
73-
library(ggplot2)
7472
library(purrr)
73+
library(magrittr)
7574
library(tidyHeatmap)
7675
7776
# Both
7877
library(tidySingleCellExperiment)
7978
```
8079

81-
# Create `tidySingleCellExperiment`, the best of both worlds!
80+
# Data representation of `tidySingleCellExperiment`
8281

8382
This is a *SingleCellExperiment* object but it is evaluated as a tibble. So it is compatible both with SingleCellExperiment and tidyverse.
8483

@@ -111,11 +110,11 @@ We may want to extract the run/sample name out of it into a separate column. Tid
111110
```{r}
112111
# Create sample column
113112
pbmc_small_polished <-
114-
pbmc_small_tidy %>%
113+
pbmc_small_tidy |>
115114
extract(file, "sample", "../data/([a-z0-9]+)/outs.+", remove=FALSE)
116115
117116
# Reorder to have sample column up front
118-
pbmc_small_polished %>%
117+
pbmc_small_polished |>
119118
select(sample, everything())
120119
```
121120

@@ -153,17 +152,17 @@ We can treat `pbmc_small_polished` as a tibble for plotting.
153152
Here we plot number of features per cell.
154153

155154
```{r plot1}
156-
pbmc_small_polished %>%
157-
tidySingleCellExperiment::ggplot(aes(nFeature_RNA, fill=groups)) +
155+
pbmc_small_polished |>
156+
ggplot(aes(nFeature_RNA, fill=groups)) +
158157
geom_histogram() +
159158
custom_theme
160159
```
161160

162161
Here we plot total features per cell.
163162

164163
```{r plot2}
165-
pbmc_small_polished %>%
166-
tidySingleCellExperiment::ggplot(aes(groups, nCount_RNA, fill=groups)) +
164+
pbmc_small_polished |>
165+
ggplot(aes(groups, nCount_RNA, fill=groups)) +
167166
geom_boxplot(outlier.shape=NA) +
168167
geom_jitter(width=0.1) +
169168
custom_theme
@@ -172,8 +171,8 @@ pbmc_small_polished %>%
172171
Here we plot abundance of two features for each group.
173172

174173
```{r}
175-
pbmc_small_polished %>%
176-
join_features(features=c("HLA-DRA", "LYZ")) %>%
174+
pbmc_small_polished |>
175+
join_features(features=c("HLA-DRA", "LYZ")) |>
177176
ggplot(aes(groups, .abundance_counts + 1, fill=groups)) +
178177
geom_boxplot(outlier.shape=NA) +
179178
geom_jitter(aes(size=nCount_RNA), alpha=0.5, width=0.2) +
@@ -188,13 +187,13 @@ We can also treat `pbmc_small_polished` as a *SingleCellExperiment* object and p
188187
```{r preprocess}
189188
# Identify variable genes with scran
190189
variable_genes <-
191-
pbmc_small_polished %>%
192-
modelGeneVar() %>%
190+
pbmc_small_polished |>
191+
modelGeneVar() |>
193192
getTopHVGs(prop=0.1)
194193
195194
# Perform PCA with scater
196195
pbmc_small_pca <-
197-
pbmc_small_polished %>%
196+
pbmc_small_polished |>
198197
runPCA(subset_row=variable_genes)
199198
200199
pbmc_small_pca
@@ -204,9 +203,9 @@ If a tidyverse-compatible package is not included in the tidySingleCellExperimen
204203

205204
```{r pc_plot}
206205
# Create pairs plot with GGally
207-
pbmc_small_pca %>%
208-
as_tibble() %>%
209-
select(contains("PC"), everything()) %>%
206+
pbmc_small_pca |>
207+
as_tibble() |>
208+
select(contains("PC"), everything()) |>
210209
GGally::ggpairs(columns=1:5, ggplot2::aes(colour=groups)) +
211210
custom_theme
212211
```
@@ -220,41 +219,41 @@ pbmc_small_cluster <- pbmc_small_pca
220219
221220
# Assign clusters to the 'colLabels' of the SingleCellExperiment object
222221
colLabels(pbmc_small_cluster) <-
223-
pbmc_small_pca %>%
224-
buildSNNGraph(use.dimred="PCA") %>%
222+
pbmc_small_pca |>
223+
buildSNNGraph(use.dimred="PCA") |>
225224
igraph::cluster_walktrap() %$%
226-
membership %>%
225+
membership |>
227226
as.factor()
228227
229228
# Reorder columns
230-
pbmc_small_cluster %>% select(label, everything())
229+
pbmc_small_cluster |> select(label, everything())
231230
```
232231

233232
And interrogate the output as if it was a regular tibble.
234233

235234
```{r cluster count}
236235
# Count number of cells for each cluster per group
237-
pbmc_small_cluster %>%
238-
tidySingleCellExperiment::count(groups, label)
236+
pbmc_small_cluster |>
237+
count(groups, label)
239238
```
240239

241240
We can identify and visualise cluster markers combining SingleCellExperiment, tidyverse functions and tidyHeatmap [@mangiola2020tidyheatmap]
242241

243242
```{r}
244243
# Identify top 10 markers per cluster
245244
marker_genes <-
246-
pbmc_small_cluster %>%
247-
findMarkers(groups=pbmc_small_cluster$label) %>%
248-
as.list() %>%
249-
map(~ .x %>%
250-
head(10) %>%
251-
rownames()) %>%
245+
pbmc_small_cluster |>
246+
findMarkers(groups=pbmc_small_cluster$label) |>
247+
as.list() |>
248+
map(~ .x |>
249+
head(10) |>
250+
rownames()) |>
252251
unlist()
253252
254253
# Plot heatmap
255-
pbmc_small_cluster %>%
256-
join_features(features=marker_genes) %>%
257-
group_by(label) %>%
254+
pbmc_small_cluster |>
255+
join_features(features=marker_genes) |>
256+
group_by(label) |>
258257
heatmap(.feature, .cell, .abundance_counts, .scale="column")
259258
```
260259

@@ -264,14 +263,14 @@ We can calculate the first 3 UMAP dimensions using the SingleCellExperiment fram
264263

265264
```{r umap}
266265
pbmc_small_UMAP <-
267-
pbmc_small_cluster %>%
266+
pbmc_small_cluster |>
268267
runUMAP(ncomponents=3)
269268
```
270269

271270
And we can plot the result in 3D using plotly.
272271

273272
```{r umap plot, eval=FALSE}
274-
pbmc_small_UMAP %>%
273+
pbmc_small_UMAP |>
275274
plot_ly(
276275
x=~`UMAP1`,
277276
y=~`UMAP2`,
@@ -295,47 +294,47 @@ blueprint <- celldex::BlueprintEncodeData()
295294
# Infer cell identities
296295
cell_type_df <-
297296
298-
assays(pbmc_small_UMAP)$logcounts %>%
299-
Matrix::Matrix(sparse = TRUE) %>%
297+
assays(pbmc_small_UMAP)$logcounts |>
298+
Matrix::Matrix(sparse = TRUE) |>
300299
SingleR::SingleR(
301300
ref = blueprint,
302301
labels = blueprint$label.main,
303302
method = "single"
304-
) %>%
305-
as.data.frame() %>%
306-
as_tibble(rownames="cell") %>%
303+
) |>
304+
as.data.frame() |>
305+
as_tibble(rownames="cell") |>
307306
select(cell, first.labels)
308307
```
309308

310309
```{r}
311310
# Join UMAP and cell type info
312311
pbmc_small_cell_type <-
313-
pbmc_small_UMAP %>%
312+
pbmc_small_UMAP |>
314313
left_join(cell_type_df, by="cell")
315314
316315
# Reorder columns
317-
pbmc_small_cell_type %>%
318-
tidySingleCellExperiment::select(cell, first.labels, everything())
316+
pbmc_small_cell_type |>
317+
select(cell, first.labels, everything())
319318
```
320319

321320
We can easily summarise the results. For example, we can see how cell type classification overlaps with cluster classification.
322321

323322
```{r}
324323
# Count number of cells for each cell type per cluster
325-
pbmc_small_cell_type %>%
324+
pbmc_small_cell_type |>
326325
count(label, first.labels)
327326
```
328327

329328
We can easily reshape the data for building information-rich faceted plots.
330329

331330
```{r}
332-
pbmc_small_cell_type %>%
331+
pbmc_small_cell_type |>
333332
334333
# Reshape and add classifier column
335334
pivot_longer(
336335
cols=c(label, first.labels),
337336
names_to="classifier", values_to="label"
338-
) %>%
337+
) |>
339338
340339
# UMAP plots for cell type and cluster
341340
ggplot(aes(UMAP1, UMAP2, color=label)) +
@@ -347,13 +346,13 @@ pbmc_small_cell_type %>%
347346
We can easily plot gene correlation per cell category, adding multi-layer annotations.
348347

349348
```{r}
350-
pbmc_small_cell_type %>%
349+
pbmc_small_cell_type |>
351350
352351
# Add some mitochondrial abundance values
353-
mutate(mitochondrial=rnorm(dplyr::n())) %>%
352+
mutate(mitochondrial=rnorm(dplyr::n())) |>
354353
355354
# Plot correlation
356-
join_features(features=c("CST3", "LYZ"), shape="wide") %>%
355+
join_features(features=c("CST3", "LYZ"), shape="wide") |>
357356
ggplot(aes(CST3 + 1, LYZ + 1, color=groups, size=mitochondrial)) +
358357
geom_point() +
359358
facet_wrap(~first.labels, scales="free") +
@@ -368,9 +367,9 @@ A powerful tool we can use with tidySingleCellExperiment is tidyverse `nest`. We
368367

369368
```{r}
370369
pbmc_small_nested <-
371-
pbmc_small_cell_type %>%
372-
filter(first.labels != "Erythrocytes") %>%
373-
mutate(cell_class=dplyr::if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
370+
pbmc_small_cell_type |>
371+
filter(first.labels != "Erythrocytes") |>
372+
mutate(cell_class=dplyr::if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) |>
374373
nest(data=-cell_class)
375374
376375
pbmc_small_nested
@@ -380,24 +379,24 @@ Now we can independently for the lymphoid and myeloid subsets (i) find variable
380379

381380
```{r warning=FALSE}
382381
pbmc_small_nested_reanalysed <-
383-
pbmc_small_nested %>%
382+
pbmc_small_nested |>
384383
mutate(data=map(
385384
data, ~ {
386385
.x <- runPCA(.x, subset_row=variable_genes)
387386
388387
variable_genes <-
389-
.x %>%
390-
modelGeneVar() %>%
388+
.x |>
389+
modelGeneVar() |>
391390
getTopHVGs(prop=0.3)
392391
393392
colLabels(.x) <-
394-
.x %>%
395-
buildSNNGraph(use.dimred="PCA") %>%
393+
.x |>
394+
buildSNNGraph(use.dimred="PCA") |>
396395
igraph::cluster_walktrap() %$%
397-
membership %>%
396+
membership |>
398397
as.factor()
399398
400-
.x %>% runUMAP(ncomponents=3)
399+
.x |> runUMAP(ncomponents=3)
401400
}
402401
))
403402
@@ -407,14 +406,14 @@ pbmc_small_nested_reanalysed
407406
We can then unnest and plot the new classification.
408407

409408
```{r}
410-
pbmc_small_nested_reanalysed %>%
409+
pbmc_small_nested_reanalysed |>
411410
412411
# Convert to tibble otherwise SingleCellExperiment drops reduced dimensions when unifying data sets.
413-
mutate(data=map(data, ~ .x %>% as_tibble())) %>%
414-
unnest(data) %>%
412+
mutate(data=map(data, ~ .x |> as_tibble())) |>
413+
unnest(data) |>
415414
416415
# Define unique clusters
417-
unite("cluster", c(cell_class, label), remove=FALSE) %>%
416+
unite("cluster", c(cell_class, label), remove=FALSE) |>
418417
419418
# Plotting
420419
ggplot(aes(UMAP1, UMAP2, color=cluster)) +
@@ -427,32 +426,32 @@ We can perform a large number of functional analyses on data subsets. For exampl
427426

428427
```{r, eval=FALSE}
429428
pbmc_small_nested_interactions <-
430-
pbmc_small_nested_reanalysed %>%
429+
pbmc_small_nested_reanalysed |>
431430
432431
# Unnest based on cell category
433-
unnest(data) %>%
432+
unnest(data) |>
434433
435434
# Create unambiguous clusters
436-
mutate(integrated_clusters=first.labels %>% as.factor() %>% as.integer()) %>%
435+
mutate(integrated_clusters=first.labels |> as.factor() |> as.integer()) |>
437436
438437
# Nest based on sample
439-
tidySingleCellExperiment::nest(data=-sample) %>%
440-
tidySingleCellExperiment::mutate(interactions=map(data, ~ {
438+
nest(data=-sample) |>
439+
mutate(interactions=map(data, ~ {
441440
442441
# Produce variables. Yuck!
443442
cluster <- colData(.x)$integrated_clusters
444-
data <- data.frame(assays(.x) %>% as.list() %>% .[[1]] %>% as.matrix())
443+
data <- data.frame(assays(.x) |> as.list() |> extract2(1) |> as.matrix())
445444
446445
# Ligand/Receptor analysis using SingleCellSignalR
447-
data %>%
448-
cell_signaling(genes=rownames(data), cluster=cluster) %>%
449-
inter_network(data=data, signal=., genes=rownames(data), cluster=cluster) %$%
450-
`individual-networks` %>%
446+
data |>
447+
cell_signaling(genes=rownames(data), cluster=cluster) |>
448+
inter_network(data=data, signal=_, genes=rownames(data), cluster=cluster) %$%
449+
`individual-networks` |>
451450
map_dfr(~ bind_rows(as_tibble(.x)))
452451
}))
453452
454-
pbmc_small_nested_interactions %>%
455-
select(-data) %>%
453+
pbmc_small_nested_interactions |>
454+
select(-data) |>
456455
unnest(interactions)
457456
```
458457

@@ -469,6 +468,6 @@ Sometimes, it is necessary to aggregate the gene-transcript abundance from a gro
469468
In tidySingleCellExperiment, cell aggregation can be achieved using the `aggregate_cells` function.
470469

471470
```{r}
472-
pbmc_small_tidy %>%
471+
pbmc_small_tidy |>
473472
aggregate_cells(groups, assays = "counts")
474473
```

0 commit comments

Comments
 (0)