@@ -18,9 +18,9 @@ Website: [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCell
18
18
19
19
Please also have a look at
20
20
21
+ - [ tidySummarizedExperiment] ((https://stemangiola.github.io/tidySummarizedExperiment/ ) for tidy manipulation of SummarizedExperiment objects)
21
22
- [ tidyseurat] ( https://stemangiola.github.io/tidyseurat/ ) for tidy manipulation of Seurat objects
22
23
- [ tidybulk] ( https://stemangiola.github.io/tidybulk/ ) for tidy bulk RNA-seq data analysis
23
- - [ nanny] ( https://github.com/stemangiola/nanny ) for tidy high-level data analysis and manipulation
24
24
- [ tidygate] ( https://github.com/stemangiola/tidygate ) for adding custom gate information to your tibble
25
25
- [ tidyHeatmap] ( https://stemangiola.github.io/tidyHeatmap/ ) for heatmaps produced with tidy principles
26
26
@@ -38,14 +38,13 @@ SingleCellExperiment-compatible Functions | Description
38
38
39
39
tidyverse Packages | Description
40
40
------------ | -------------
41
- ` dplyr ` | All ` dplyr ` tibble functions (e.g. ` tidySingleCellExperiment:: select` )
42
- ` tidyr ` | All ` tidyr ` tibble functions (e.g. ` tidySingleCellExperiment:: pivot_longer` )
43
- ` ggplot2 ` | ` ggplot ` (` tidySingleCellExperiment:: ggplot` )
44
- ` plotly ` | ` plot_ly ` (` tidySingleCellExperiment:: plot_ly` )
41
+ ` dplyr ` | All ` dplyr ` tibble functions (e.g. ` select ` )
42
+ ` tidyr ` | All ` tidyr ` tibble functions (e.g. ` pivot_longer ` )
43
+ ` ggplot2 ` | ` ggplot ` (` ggplot ` )
44
+ ` plotly ` | ` plot_ly ` (` plot_ly ` )
45
45
46
46
Utilities | Description
47
47
------------ | -------------
48
- ` tidy ` | Add ` tidySingleCellExperiment ` invisible layer over a SingleCellExperiment object
49
48
` as_tibble ` | Convert cell-wise information to a ` tbl_df `
50
49
` join_features ` | Add feature-wise information, returns a ` tbl_df `
51
50
` aggregate_cells ` | Aggregate cell gene-transcription abundance as pseudobulk tissue
@@ -70,15 +69,15 @@ library(SingleR)
70
69
library(SingleCellSignalR)
71
70
72
71
# Tidyverse-compatible packages
73
- library(ggplot2)
74
72
library(purrr)
73
+ library(magrittr)
75
74
library(tidyHeatmap)
76
75
77
76
# Both
78
77
library(tidySingleCellExperiment)
79
78
```
80
79
81
- # Create ` tidySingleCellExperiment ` , the best of both worlds!
80
+ # Data representation of ` tidySingleCellExperiment `
82
81
83
82
This is a * SingleCellExperiment* object but it is evaluated as a tibble. So it is compatible both with SingleCellExperiment and tidyverse.
84
83
@@ -111,11 +110,11 @@ We may want to extract the run/sample name out of it into a separate column. Tid
111
110
``` {r}
112
111
# Create sample column
113
112
pbmc_small_polished <-
114
- pbmc_small_tidy %>%
113
+ pbmc_small_tidy |>
115
114
extract(file, "sample", "../data/([a-z0-9]+)/outs.+", remove=FALSE)
116
115
117
116
# Reorder to have sample column up front
118
- pbmc_small_polished %>%
117
+ pbmc_small_polished |>
119
118
select(sample, everything())
120
119
```
121
120
@@ -153,17 +152,17 @@ We can treat `pbmc_small_polished` as a tibble for plotting.
153
152
Here we plot number of features per cell.
154
153
155
154
``` {r plot1}
156
- pbmc_small_polished %>%
157
- tidySingleCellExperiment:: ggplot(aes(nFeature_RNA, fill=groups)) +
155
+ pbmc_small_polished |>
156
+ ggplot(aes(nFeature_RNA, fill=groups)) +
158
157
geom_histogram() +
159
158
custom_theme
160
159
```
161
160
162
161
Here we plot total features per cell.
163
162
164
163
``` {r plot2}
165
- pbmc_small_polished %>%
166
- tidySingleCellExperiment:: ggplot(aes(groups, nCount_RNA, fill=groups)) +
164
+ pbmc_small_polished |>
165
+ ggplot(aes(groups, nCount_RNA, fill=groups)) +
167
166
geom_boxplot(outlier.shape=NA) +
168
167
geom_jitter(width=0.1) +
169
168
custom_theme
@@ -172,8 +171,8 @@ pbmc_small_polished %>%
172
171
Here we plot abundance of two features for each group.
173
172
174
173
``` {r}
175
- pbmc_small_polished %>%
176
- join_features(features=c("HLA-DRA", "LYZ")) %>%
174
+ pbmc_small_polished |>
175
+ join_features(features=c("HLA-DRA", "LYZ")) |>
177
176
ggplot(aes(groups, .abundance_counts + 1, fill=groups)) +
178
177
geom_boxplot(outlier.shape=NA) +
179
178
geom_jitter(aes(size=nCount_RNA), alpha=0.5, width=0.2) +
@@ -188,13 +187,13 @@ We can also treat `pbmc_small_polished` as a *SingleCellExperiment* object and p
188
187
``` {r preprocess}
189
188
# Identify variable genes with scran
190
189
variable_genes <-
191
- pbmc_small_polished %>%
192
- modelGeneVar() %>%
190
+ pbmc_small_polished |>
191
+ modelGeneVar() |>
193
192
getTopHVGs(prop=0.1)
194
193
195
194
# Perform PCA with scater
196
195
pbmc_small_pca <-
197
- pbmc_small_polished %>%
196
+ pbmc_small_polished |>
198
197
runPCA(subset_row=variable_genes)
199
198
200
199
pbmc_small_pca
@@ -204,9 +203,9 @@ If a tidyverse-compatible package is not included in the tidySingleCellExperimen
204
203
205
204
``` {r pc_plot}
206
205
# Create pairs plot with GGally
207
- pbmc_small_pca %>%
208
- as_tibble() %>%
209
- select(contains("PC"), everything()) %>%
206
+ pbmc_small_pca |>
207
+ as_tibble() |>
208
+ select(contains("PC"), everything()) |>
210
209
GGally::ggpairs(columns=1:5, ggplot2::aes(colour=groups)) +
211
210
custom_theme
212
211
```
@@ -220,41 +219,41 @@ pbmc_small_cluster <- pbmc_small_pca
220
219
221
220
# Assign clusters to the 'colLabels' of the SingleCellExperiment object
222
221
colLabels(pbmc_small_cluster) <-
223
- pbmc_small_pca %>%
224
- buildSNNGraph(use.dimred="PCA") %>%
222
+ pbmc_small_pca |>
223
+ buildSNNGraph(use.dimred="PCA") |>
225
224
igraph::cluster_walktrap() %$%
226
- membership %>%
225
+ membership |>
227
226
as.factor()
228
227
229
228
# Reorder columns
230
- pbmc_small_cluster %>% select(label, everything())
229
+ pbmc_small_cluster |> select(label, everything())
231
230
```
232
231
233
232
And interrogate the output as if it was a regular tibble.
234
233
235
234
``` {r cluster count}
236
235
# Count number of cells for each cluster per group
237
- pbmc_small_cluster %>%
238
- tidySingleCellExperiment:: count(groups, label)
236
+ pbmc_small_cluster |>
237
+ count(groups, label)
239
238
```
240
239
241
240
We can identify and visualise cluster markers combining SingleCellExperiment, tidyverse functions and tidyHeatmap [ @mangiola2020tidyheatmap ]
242
241
243
242
``` {r}
244
243
# Identify top 10 markers per cluster
245
244
marker_genes <-
246
- pbmc_small_cluster %>%
247
- findMarkers(groups=pbmc_small_cluster$label) %>%
248
- as.list() %>%
249
- map(~ .x %>%
250
- head(10) %>%
251
- rownames()) %>%
245
+ pbmc_small_cluster |>
246
+ findMarkers(groups=pbmc_small_cluster$label) |>
247
+ as.list() |>
248
+ map(~ .x |>
249
+ head(10) |>
250
+ rownames()) |>
252
251
unlist()
253
252
254
253
# Plot heatmap
255
- pbmc_small_cluster %>%
256
- join_features(features=marker_genes) %>%
257
- group_by(label) %>%
254
+ pbmc_small_cluster |>
255
+ join_features(features=marker_genes) |>
256
+ group_by(label) |>
258
257
heatmap(.feature, .cell, .abundance_counts, .scale="column")
259
258
```
260
259
@@ -264,14 +263,14 @@ We can calculate the first 3 UMAP dimensions using the SingleCellExperiment fram
264
263
265
264
``` {r umap}
266
265
pbmc_small_UMAP <-
267
- pbmc_small_cluster %>%
266
+ pbmc_small_cluster |>
268
267
runUMAP(ncomponents=3)
269
268
```
270
269
271
270
And we can plot the result in 3D using plotly.
272
271
273
272
``` {r umap plot, eval=FALSE}
274
- pbmc_small_UMAP %>%
273
+ pbmc_small_UMAP |>
275
274
plot_ly(
276
275
x=~`UMAP1`,
277
276
y=~`UMAP2`,
@@ -295,47 +294,47 @@ blueprint <- celldex::BlueprintEncodeData()
295
294
# Infer cell identities
296
295
cell_type_df <-
297
296
298
- assays(pbmc_small_UMAP)$logcounts %>%
299
- Matrix::Matrix(sparse = TRUE) %>%
297
+ assays(pbmc_small_UMAP)$logcounts |>
298
+ Matrix::Matrix(sparse = TRUE) |>
300
299
SingleR::SingleR(
301
300
ref = blueprint,
302
301
labels = blueprint$label.main,
303
302
method = "single"
304
- ) %>%
305
- as.data.frame() %>%
306
- as_tibble(rownames="cell") %>%
303
+ ) |>
304
+ as.data.frame() |>
305
+ as_tibble(rownames="cell") |>
307
306
select(cell, first.labels)
308
307
```
309
308
310
309
``` {r}
311
310
# Join UMAP and cell type info
312
311
pbmc_small_cell_type <-
313
- pbmc_small_UMAP %>%
312
+ pbmc_small_UMAP |>
314
313
left_join(cell_type_df, by="cell")
315
314
316
315
# Reorder columns
317
- pbmc_small_cell_type %>%
318
- tidySingleCellExperiment:: select(cell, first.labels, everything())
316
+ pbmc_small_cell_type |>
317
+ select(cell, first.labels, everything())
319
318
```
320
319
321
320
We can easily summarise the results. For example, we can see how cell type classification overlaps with cluster classification.
322
321
323
322
``` {r}
324
323
# Count number of cells for each cell type per cluster
325
- pbmc_small_cell_type %>%
324
+ pbmc_small_cell_type |>
326
325
count(label, first.labels)
327
326
```
328
327
329
328
We can easily reshape the data for building information-rich faceted plots.
330
329
331
330
``` {r}
332
- pbmc_small_cell_type %>%
331
+ pbmc_small_cell_type |>
333
332
334
333
# Reshape and add classifier column
335
334
pivot_longer(
336
335
cols=c(label, first.labels),
337
336
names_to="classifier", values_to="label"
338
- ) %>%
337
+ ) |>
339
338
340
339
# UMAP plots for cell type and cluster
341
340
ggplot(aes(UMAP1, UMAP2, color=label)) +
@@ -347,13 +346,13 @@ pbmc_small_cell_type %>%
347
346
We can easily plot gene correlation per cell category, adding multi-layer annotations.
348
347
349
348
``` {r}
350
- pbmc_small_cell_type %>%
349
+ pbmc_small_cell_type |>
351
350
352
351
# Add some mitochondrial abundance values
353
- mutate(mitochondrial=rnorm(dplyr::n())) %>%
352
+ mutate(mitochondrial=rnorm(dplyr::n())) |>
354
353
355
354
# Plot correlation
356
- join_features(features=c("CST3", "LYZ"), shape="wide") %>%
355
+ join_features(features=c("CST3", "LYZ"), shape="wide") |>
357
356
ggplot(aes(CST3 + 1, LYZ + 1, color=groups, size=mitochondrial)) +
358
357
geom_point() +
359
358
facet_wrap(~first.labels, scales="free") +
@@ -368,9 +367,9 @@ A powerful tool we can use with tidySingleCellExperiment is tidyverse `nest`. We
368
367
369
368
``` {r}
370
369
pbmc_small_nested <-
371
- pbmc_small_cell_type %>%
372
- filter(first.labels != "Erythrocytes") %>%
373
- mutate(cell_class=dplyr::if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
370
+ pbmc_small_cell_type |>
371
+ filter(first.labels != "Erythrocytes") |>
372
+ mutate(cell_class=dplyr::if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) |>
374
373
nest(data=-cell_class)
375
374
376
375
pbmc_small_nested
@@ -380,24 +379,24 @@ Now we can independently for the lymphoid and myeloid subsets (i) find variable
380
379
381
380
``` {r warning=FALSE}
382
381
pbmc_small_nested_reanalysed <-
383
- pbmc_small_nested %>%
382
+ pbmc_small_nested |>
384
383
mutate(data=map(
385
384
data, ~ {
386
385
.x <- runPCA(.x, subset_row=variable_genes)
387
386
388
387
variable_genes <-
389
- .x %>%
390
- modelGeneVar() %>%
388
+ .x |>
389
+ modelGeneVar() |>
391
390
getTopHVGs(prop=0.3)
392
391
393
392
colLabels(.x) <-
394
- .x %>%
395
- buildSNNGraph(use.dimred="PCA") %>%
393
+ .x |>
394
+ buildSNNGraph(use.dimred="PCA") |>
396
395
igraph::cluster_walktrap() %$%
397
- membership %>%
396
+ membership |>
398
397
as.factor()
399
398
400
- .x %>% runUMAP(ncomponents=3)
399
+ .x |> runUMAP(ncomponents=3)
401
400
}
402
401
))
403
402
@@ -407,14 +406,14 @@ pbmc_small_nested_reanalysed
407
406
We can then unnest and plot the new classification.
408
407
409
408
``` {r}
410
- pbmc_small_nested_reanalysed %>%
409
+ pbmc_small_nested_reanalysed |>
411
410
412
411
# Convert to tibble otherwise SingleCellExperiment drops reduced dimensions when unifying data sets.
413
- mutate(data=map(data, ~ .x %>% as_tibble())) %>%
414
- unnest(data) %>%
412
+ mutate(data=map(data, ~ .x |> as_tibble())) |>
413
+ unnest(data) |>
415
414
416
415
# Define unique clusters
417
- unite("cluster", c(cell_class, label), remove=FALSE) %>%
416
+ unite("cluster", c(cell_class, label), remove=FALSE) |>
418
417
419
418
# Plotting
420
419
ggplot(aes(UMAP1, UMAP2, color=cluster)) +
@@ -427,32 +426,32 @@ We can perform a large number of functional analyses on data subsets. For exampl
427
426
428
427
``` {r, eval=FALSE}
429
428
pbmc_small_nested_interactions <-
430
- pbmc_small_nested_reanalysed %>%
429
+ pbmc_small_nested_reanalysed |>
431
430
432
431
# Unnest based on cell category
433
- unnest(data) %>%
432
+ unnest(data) |>
434
433
435
434
# Create unambiguous clusters
436
- mutate(integrated_clusters=first.labels %>% as.factor() %>% as.integer()) %>%
435
+ mutate(integrated_clusters=first.labels |> as.factor() |> as.integer()) |>
437
436
438
437
# Nest based on sample
439
- tidySingleCellExperiment:: nest(data=-sample) %>%
440
- tidySingleCellExperiment:: mutate(interactions=map(data, ~ {
438
+ nest(data=-sample) |>
439
+ mutate(interactions=map(data, ~ {
441
440
442
441
# Produce variables. Yuck!
443
442
cluster <- colData(.x)$integrated_clusters
444
- data <- data.frame(assays(.x) %>% as.list() %>% .[[1]] %>% as.matrix())
443
+ data <- data.frame(assays(.x) |> as.list() |> extract2(1) |> as.matrix())
445
444
446
445
# Ligand/Receptor analysis using SingleCellSignalR
447
- data %>%
448
- cell_signaling(genes=rownames(data), cluster=cluster) %>%
449
- inter_network(data=data, signal=. , genes=rownames(data), cluster=cluster) %$%
450
- `individual-networks` %>%
446
+ data |>
447
+ cell_signaling(genes=rownames(data), cluster=cluster) |>
448
+ inter_network(data=data, signal=_ , genes=rownames(data), cluster=cluster) %$%
449
+ `individual-networks` |>
451
450
map_dfr(~ bind_rows(as_tibble(.x)))
452
451
}))
453
452
454
- pbmc_small_nested_interactions %>%
455
- select(-data) %>%
453
+ pbmc_small_nested_interactions |>
454
+ select(-data) |>
456
455
unnest(interactions)
457
456
```
458
457
@@ -469,6 +468,6 @@ Sometimes, it is necessary to aggregate the gene-transcript abundance from a gro
469
468
In tidySingleCellExperiment, cell aggregation can be achieved using the ` aggregate_cells ` function.
470
469
471
470
``` {r}
472
- pbmc_small_tidy %>%
471
+ pbmc_small_tidy |>
473
472
aggregate_cells(groups, assays = "counts")
474
473
```
0 commit comments