stemangiola
diff --git a/‎.gitignore
+2 b/‎.gitignore
+2
diff --git a/‎README.Rmd
+79-80 b/‎README.Rmd
+79-80
@@ -15,3 +15,5 @@ dev/*pdf
 dev/dplyr-master/*
 tidySingleCellExperiment.Rproj
 tidySingleCellExperiment.Rproj
+/doc/
+/Meta/
@@ -18,9 +18,9 @@ Website: [tidySingleCellExperiment](https://stemangiola.github.io/tidySingleCell
 
 Please also have a look at 
 
+- [tidySummarizedExperiment]((https://stemangiola.github.io/tidySummarizedExperiment/) for tidy manipulation of SummarizedExperiment objects)
 - [tidyseurat](https://stemangiola.github.io/tidyseurat/) for tidy manipulation of Seurat objects
 - [tidybulk](https://stemangiola.github.io/tidybulk/) for tidy bulk RNA-seq data analysis
-- [nanny](https://github.com/stemangiola/nanny) for tidy high-level data analysis and manipulation 
 - [tidygate](https://github.com/stemangiola/tidygate) for adding custom gate information to your tibble 
 - [tidyHeatmap](https://stemangiola.github.io/tidyHeatmap/) for heatmaps produced with tidy principles
 
@@ -38,14 +38,13 @@ SingleCellExperiment-compatible Functions | Description
 
 tidyverse Packages | Description
 ------------ | -------------
-`dplyr` | All `dplyr` tibble functions (e.g. `tidySingleCellExperiment::select`)
-`tidyr` | All `tidyr` tibble functions (e.g. `tidySingleCellExperiment::pivot_longer`)
-`ggplot2` | `ggplot` (`tidySingleCellExperiment::ggplot`)
-`plotly` | `plot_ly` (`tidySingleCellExperiment::plot_ly`)
+`dplyr` | All `dplyr` tibble functions (e.g. `select`)
+`tidyr` | All `tidyr` tibble functions (e.g. `pivot_longer`)
+`ggplot2` | `ggplot` (`ggplot`)
+`plotly` | `plot_ly` (`plot_ly`)
 
 Utilities | Description
 ------------ | -------------
-`tidy` | Add `tidySingleCellExperiment` invisible layer over a SingleCellExperiment object
 `as_tibble` | Convert cell-wise information to a `tbl_df`
 `join_features` | Add feature-wise information, returns a `tbl_df`
 `aggregate_cells` | Aggregate cell gene-transcription abundance as pseudobulk tissue
@@ -70,15 +69,15 @@ library(SingleR)
 library(SingleCellSignalR)
 
 # Tidyverse-compatible packages
-library(ggplot2)
 library(purrr)
+library(magrittr)
 library(tidyHeatmap)
 
 # Both
 library(tidySingleCellExperiment)
 ```
 
-# Create `tidySingleCellExperiment`, the best of both worlds!
+# Data representation of `tidySingleCellExperiment`
 
 This is a *SingleCellExperiment* object but it is evaluated as a tibble. So it is compatible both with SingleCellExperiment and tidyverse. 
 
@@ -111,11 +110,11 @@ We may want to extract the run/sample name out of it into a separate column. Tid
 ```{r}
 # Create sample column
 pbmc_small_polished <-
-    pbmc_small_tidy %>%
+    pbmc_small_tidy |>
     extract(file, "sample", "../data/([a-z0-9]+)/outs.+", remove=FALSE)
 
 # Reorder to have sample column up front
-pbmc_small_polished %>%
+pbmc_small_polished |>
     select(sample, everything())
 ```
 
@@ -153,17 +152,17 @@ We can treat `pbmc_small_polished` as a tibble for plotting.
 Here we plot number of features per cell.
 
 ```{r plot1}
-pbmc_small_polished %>%
-    tidySingleCellExperiment::ggplot(aes(nFeature_RNA, fill=groups)) +
+pbmc_small_polished |>
+    ggplot(aes(nFeature_RNA, fill=groups)) +
     geom_histogram() +
     custom_theme
 ```
 
 Here we plot total features per cell.
 
 ```{r plot2}
-pbmc_small_polished %>%
-    tidySingleCellExperiment::ggplot(aes(groups, nCount_RNA, fill=groups)) +
+pbmc_small_polished |>
+    ggplot(aes(groups, nCount_RNA, fill=groups)) +
     geom_boxplot(outlier.shape=NA) +
     geom_jitter(width=0.1) +
     custom_theme
@@ -172,8 +171,8 @@ pbmc_small_polished %>%
 Here we plot abundance of two features for each group.
 
 ```{r}
-pbmc_small_polished %>%
-    join_features(features=c("HLA-DRA", "LYZ")) %>%
+pbmc_small_polished |>
+    join_features(features=c("HLA-DRA", "LYZ")) |>
     ggplot(aes(groups, .abundance_counts + 1, fill=groups)) +
     geom_boxplot(outlier.shape=NA) +
     geom_jitter(aes(size=nCount_RNA), alpha=0.5, width=0.2) +
@@ -188,13 +187,13 @@ We can also treat `pbmc_small_polished` as a *SingleCellExperiment* object and p
 ```{r preprocess}
 # Identify variable genes with scran
 variable_genes <-
-    pbmc_small_polished %>%
-    modelGeneVar() %>%
+    pbmc_small_polished |>
+    modelGeneVar() |>
     getTopHVGs(prop=0.1)
 
 # Perform PCA with scater
 pbmc_small_pca <-
-    pbmc_small_polished %>%
+    pbmc_small_polished |>
     runPCA(subset_row=variable_genes)
 
 pbmc_small_pca
@@ -204,9 +203,9 @@ If a tidyverse-compatible package is not included in the tidySingleCellExperimen
 
 ```{r pc_plot}
 # Create pairs plot with GGally
-pbmc_small_pca %>%
-    as_tibble() %>%
-    select(contains("PC"), everything()) %>%
+pbmc_small_pca |>
+    as_tibble() |>
+    select(contains("PC"), everything()) |>
     GGally::ggpairs(columns=1:5, ggplot2::aes(colour=groups)) +
     custom_theme
 ```
@@ -220,41 +219,41 @@ pbmc_small_cluster <- pbmc_small_pca
 
 # Assign clusters to the 'colLabels' of the SingleCellExperiment object
 colLabels(pbmc_small_cluster) <-
-    pbmc_small_pca %>%
-    buildSNNGraph(use.dimred="PCA") %>%
+    pbmc_small_pca |>
+    buildSNNGraph(use.dimred="PCA") |>
     igraph::cluster_walktrap() %$%
-    membership %>%
+    membership |>
     as.factor()
 
 # Reorder columns
-pbmc_small_cluster %>% select(label, everything())
+pbmc_small_cluster |> select(label, everything())
 ```
 
 And interrogate the output as if it was a regular tibble.
 
 ```{r cluster count}
 # Count number of cells for each cluster per group
-pbmc_small_cluster %>%
-    tidySingleCellExperiment::count(groups, label)
+pbmc_small_cluster |>
+    count(groups, label)
 ```
 
 We can identify and visualise cluster markers combining SingleCellExperiment, tidyverse functions and tidyHeatmap [@mangiola2020tidyheatmap]
 
 ```{r}
 # Identify top 10 markers per cluster
 marker_genes <-
-    pbmc_small_cluster %>%
-    findMarkers(groups=pbmc_small_cluster$label) %>%
-    as.list() %>%
-    map(~ .x %>%
-        head(10) %>%
-        rownames()) %>%
+    pbmc_small_cluster |>
+    findMarkers(groups=pbmc_small_cluster$label) |>
+    as.list() |>
+    map(~ .x |>
+        head(10) |>
+        rownames()) |>
     unlist()
 
 # Plot heatmap
-pbmc_small_cluster %>%
-    join_features(features=marker_genes) %>%
-    group_by(label) %>%
+pbmc_small_cluster |>
+    join_features(features=marker_genes) |>
+    group_by(label) |>
     heatmap(.feature, .cell, .abundance_counts, .scale="column")
 ```
 
@@ -264,14 +263,14 @@ We can calculate the first 3 UMAP dimensions using the SingleCellExperiment fram
 
 ```{r umap}
 pbmc_small_UMAP <-
-    pbmc_small_cluster %>%
+    pbmc_small_cluster |>
     runUMAP(ncomponents=3)
 ```
 
 And we can plot the result in 3D using plotly.
 
 ```{r umap plot, eval=FALSE}
-pbmc_small_UMAP %>%
+pbmc_small_UMAP |>
     plot_ly(
         x=~`UMAP1`,
         y=~`UMAP2`,
@@ -295,47 +294,47 @@ blueprint <- celldex::BlueprintEncodeData()
 # Infer cell identities
 cell_type_df <-
 
-    assays(pbmc_small_UMAP)$logcounts %>%
-    Matrix::Matrix(sparse = TRUE) %>%
+    assays(pbmc_small_UMAP)$logcounts |>
+    Matrix::Matrix(sparse = TRUE) |>
     SingleR::SingleR(
         ref = blueprint,
         labels = blueprint$label.main,
         method = "single"
-    ) %>%
-    as.data.frame() %>%
-    as_tibble(rownames="cell") %>%
+    ) |>
+    as.data.frame() |>
+    as_tibble(rownames="cell") |>
     select(cell, first.labels)
 ```
 
 ```{r}
 # Join UMAP and cell type info
 pbmc_small_cell_type <-
-    pbmc_small_UMAP %>%
+    pbmc_small_UMAP |>
     left_join(cell_type_df, by="cell")
 
 # Reorder columns
-pbmc_small_cell_type %>%
-    tidySingleCellExperiment::select(cell, first.labels, everything())
+pbmc_small_cell_type |>
+    select(cell, first.labels, everything())
 ```
 
 We can easily summarise the results. For example, we can see how cell type classification overlaps with cluster classification.
 
 ```{r}
 # Count number of cells for each cell type per cluster
-pbmc_small_cell_type %>%
+pbmc_small_cell_type |>
     count(label, first.labels)
 ```
 
 We can easily reshape the data for building information-rich faceted plots.
 
 ```{r}
-pbmc_small_cell_type %>%
+pbmc_small_cell_type |>
 
     # Reshape and add classifier column
     pivot_longer(
         cols=c(label, first.labels),
         names_to="classifier", values_to="label"
-    ) %>%
+    ) |>
 
     # UMAP plots for cell type and cluster
     ggplot(aes(UMAP1, UMAP2, color=label)) +
@@ -347,13 +346,13 @@ pbmc_small_cell_type %>%
 We can easily plot gene correlation per cell category, adding multi-layer annotations.
 
 ```{r}
-pbmc_small_cell_type %>%
+pbmc_small_cell_type |>
 
     # Add some mitochondrial abundance values
-    mutate(mitochondrial=rnorm(dplyr::n())) %>%
+    mutate(mitochondrial=rnorm(dplyr::n())) |>
 
     # Plot correlation
-    join_features(features=c("CST3", "LYZ"), shape="wide") %>%
+    join_features(features=c("CST3", "LYZ"), shape="wide") |>
     ggplot(aes(CST3 + 1, LYZ + 1, color=groups, size=mitochondrial)) +
     geom_point() +
     facet_wrap(~first.labels, scales="free") +
@@ -368,9 +367,9 @@ A powerful tool we can use with tidySingleCellExperiment is tidyverse `nest`. We
 
 ```{r}
 pbmc_small_nested <-
-    pbmc_small_cell_type %>%
-    filter(first.labels != "Erythrocytes") %>%
-    mutate(cell_class=dplyr::if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
+    pbmc_small_cell_type |>
+    filter(first.labels != "Erythrocytes") |>
+    mutate(cell_class=dplyr::if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) |>
     nest(data=-cell_class)
 
 pbmc_small_nested
@@ -380,24 +379,24 @@ Now we can independently for the lymphoid and myeloid subsets (i) find variable
 
 ```{r warning=FALSE}
 pbmc_small_nested_reanalysed <-
-    pbmc_small_nested %>%
+    pbmc_small_nested |>
     mutate(data=map(
         data, ~ {
             .x <- runPCA(.x, subset_row=variable_genes)
 
             variable_genes <-
-                .x %>%
-                modelGeneVar() %>%
+                .x |>
+                modelGeneVar() |>
                 getTopHVGs(prop=0.3)
 
             colLabels(.x) <-
-                .x %>%
-                buildSNNGraph(use.dimred="PCA") %>%
+                .x |>
+                buildSNNGraph(use.dimred="PCA") |>
                 igraph::cluster_walktrap() %$%
-                membership %>%
+                membership |>
                 as.factor()
 
-            .x %>% runUMAP(ncomponents=3)
+            .x |> runUMAP(ncomponents=3)
         }
     ))
 
@@ -407,14 +406,14 @@ pbmc_small_nested_reanalysed
 We can then unnest and plot the new classification.
 
 ```{r}
-pbmc_small_nested_reanalysed %>%
+pbmc_small_nested_reanalysed |>
 
     # Convert to tibble otherwise SingleCellExperiment drops reduced dimensions when unifying data sets.
-    mutate(data=map(data, ~ .x %>% as_tibble())) %>%
-    unnest(data) %>%
+    mutate(data=map(data, ~ .x |> as_tibble())) |>
+    unnest(data) |>
 
     # Define unique clusters
-    unite("cluster", c(cell_class, label), remove=FALSE) %>%
+    unite("cluster", c(cell_class, label), remove=FALSE) |>
 
     # Plotting
     ggplot(aes(UMAP1, UMAP2, color=cluster)) +
@@ -427,32 +426,32 @@ We can perform a large number of functional analyses on data subsets. For exampl
 
 ```{r, eval=FALSE}
 pbmc_small_nested_interactions <-
-    pbmc_small_nested_reanalysed %>%
+    pbmc_small_nested_reanalysed |>
 
     # Unnest based on cell category
-    unnest(data) %>%
+    unnest(data) |>
 
     # Create unambiguous clusters
-    mutate(integrated_clusters=first.labels %>% as.factor() %>% as.integer()) %>%
+    mutate(integrated_clusters=first.labels |> as.factor() |> as.integer()) |>
 
     # Nest based on sample
-    tidySingleCellExperiment::nest(data=-sample) %>%
-    tidySingleCellExperiment::mutate(interactions=map(data, ~ {
+    nest(data=-sample) |>
+    mutate(interactions=map(data, ~ {
 
         # Produce variables. Yuck!
         cluster <- colData(.x)$integrated_clusters
-        data <- data.frame(assays(.x) %>% as.list() %>% .[[1]] %>% as.matrix())
+        data <- data.frame(assays(.x) |> as.list() |> extract2(1) |> as.matrix())
 
         # Ligand/Receptor analysis using SingleCellSignalR
-        data %>%
-            cell_signaling(genes=rownames(data), cluster=cluster) %>%
-            inter_network(data=data, signal=., genes=rownames(data), cluster=cluster) %$%
-            `individual-networks` %>%
+        data |>
+            cell_signaling(genes=rownames(data), cluster=cluster) |>
+            inter_network(data=data, signal=_, genes=rownames(data), cluster=cluster) %$%
+            `individual-networks` |>
             map_dfr(~ bind_rows(as_tibble(.x)))
     }))
 
-pbmc_small_nested_interactions %>%
-    select(-data) %>%
+pbmc_small_nested_interactions |>
+    select(-data) |>
     unnest(interactions)
 ```
 
@@ -469,6 +468,6 @@ Sometimes, it is necessary to aggregate the gene-transcript abundance from a gro
 In tidySingleCellExperiment, cell aggregation can be achieved using the `aggregate_cells` function.
 
 ```{r}
-pbmc_small_tidy %>%
+pbmc_small_tidy |>
   aggregate_cells(groups, assays = "counts")
 ```