How to add boxplots or density plots side-by-side a scatterplot: a single cell case study

introduce ggside using single cell data

The ggside R package provides a new way to visualize data by combining the flexibility of ggplot2 with the power of side-by-side plots.

We will use a single cell dataset to demonstrate its usage.

ggside allows users to create side-by-side plots of multiple variables, such as gene expression, cell type, and experimental conditions. This can be helpful for identifying patterns and trends in scRNA-seq data that would be difficult to see in individual plots. Additionally, ggside provides a number of features that make it easy to customize the appearance of side-by-side plots, such as changing the color scheme, adding labels, and adjusting the layout.

Load libraries

# install.packages("ggside")
library(ggside)
library(Seurat)
library(dplyr)
library(SeuratData)

load the 3k pbmc dataset.

data("pbmc3k")

pbmc3k
#> An object of class Seurat 
#> 13714 features across 2700 samples within 1 assay 
#> Active assay: RNA (13714 features, 0 variable features)

2700 immune cells from blood.

routine processing

pbmc3k<- pbmc3k %>% 
  NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
  FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
  ScaleData() %>%
  RunPCA(verbose = FALSE) %>%
  FindNeighbors(dims = 1:10, verbose = FALSE) %>%
  FindClusters(resolution = 0.5, verbose = FALSE) %>%
  RunUMAP(dims = 1:10, verbose = FALSE)

Idents(pbmc3k)<- pbmc3k$seurat_annotations

DimPlot(pbmc3k, label = TRUE, repel=TRUE) + NoLegend()

some helper functions to extract the gene expression values from the seurat object

matrix_to_expression_df<- function(x, obj){
        df<- x %>%
                as.matrix() %>% 
                as.data.frame() %>%
                tibble::rownames_to_column(var= "gene") %>%
                tidyr::pivot_longer(cols = -1, names_to = "cell", values_to = "expression") %>%
                tidyr::pivot_wider(names_from = "gene", values_from = expression) %>%
                left_join(obj@meta.data %>% 
                                  tibble::rownames_to_column(var = "cell"))
        return(df)
}


get_expression_data<- function(obj, assay = "RNA", slot = "data", 
                               genes = NULL, cells = NULL){
        if (is.null(genes) & !is.null(cells)){
                df<- GetAssayData(obj, assay = assay, slot = slot)[, cells, drop = FALSE] %>%
                        matrix_to_expression_df(obj = obj)
        } else if (!is.null(genes) & is.null(cells)){
                df <- GetAssayData(obj, assay = assay, slot = slot)[genes, , drop = FALSE] %>%
                        matrix_to_expression_df(obj = obj)
        } else if (is.null(genes & is.null(cells))){
                df <- GetAssayData(obj, assay = assay, slot = slot)[, , drop = FALSE] %>%
                        matrix_to_expression_df(obj = obj)
        } else {
                df<- GetAssayData(obj, assay = assay, slot = slot)[genes, cells, drop = FALSE] %>%
                        matrix_to_expression_df(obj = obj)
        }
        return(df)
}

Test the function

df<- get_expression_data(obj = pbmc3k, genes = c("CD14", "FCGR3A"))

head(df)
#> # A tibble: 6 × 9
#>   cell           CD14 FCGR3A orig.ident nCount_RNA nFeature_RNA seurat_annotati…
#>   <chr>         <dbl>  <dbl> <fct>           <dbl>        <int> <fct>           
#> 1 AAACATACAACC…     0   0    pbmc3k           2419          779 Memory CD4 T    
#> 2 AAACATTGAGCT…     0   0    pbmc3k           4903         1352 B               
#> 3 AAACATTGATCA…     0   0    pbmc3k           3147         1129 Memory CD4 T    
#> 4 AAACCGTGCTTC…     0   1.57 pbmc3k           2639          960 CD14+ Mono      
#> 5 AAACCGTGTATG…     0   0    pbmc3k            980          521 NK              
#> 6 AAACGCACTGGT…     0   0    pbmc3k           2163          781 Memory CD4 T    
#> # … with 2 more variables: RNA_snn_res.0.5 <fct>, seurat_clusters <fct>

Let’s only focus on the monocytes and use CD14 and CD16/FCGR3A as an example.

A plain scatter plot:

df %>%
  filter(seurat_annotations %in% c("CD14+ Mono", "FCGR3A+ Mono")) %>%
  ggplot(aes(x= CD14, y = FCGR3A)) +
  geom_point(aes(color = seurat_annotations))

CD14+ monocytes are mostly CD14+CD16- and CD16+ monocytes are mostly CD16+CD14- which makes sense.There are also some intermedidate cells that are CD14+CD16+ in the middle.

a scatter plot adding two boxplots:

df %>%
  filter(seurat_annotations %in% c("CD14+ Mono", "FCGR3A+ Mono")) %>%
  ggplot(aes(x= CD14, y = FCGR3A)) +
  geom_point(aes(color = seurat_annotations)) +
  geom_xsideboxplot(aes(y = seurat_annotations, color = seurat_annotations), 
                    orientation = "y") +
  geom_ysideboxplot(aes(x = seurat_annotations, color = seurat_annotations), 
                    orientation = "x")+
  scale_xsidey_discrete() +
  scale_ysidex_discrete()+
  theme(ggside.panel.scale.x = 0.2,
       ggside.panel.scale.y = 0.3)

a scatterplot adding one boxplot and one density plot

df %>%
  filter(seurat_annotations %in% c("CD14+ Mono", "FCGR3A+ Mono")) %>%
  ggplot(aes(x= CD14, y = FCGR3A)) +
  geom_point(aes(color = seurat_annotations)) +
  geom_xsideboxplot(aes(y = seurat_annotations, color = seurat_annotations), 
                    orientation = "y") +
  geom_ysidedensity(aes(x = after_stat(density), color = seurat_annotations, fill = seurat_annotations), 
                    position = "stack", alpha = 0.4) +
  scale_xsidey_discrete() +
  scale_ysidex_continuous(guide = guide_axis(angle = 90), minor_breaks = NULL) +
  theme(ggside.panel.scale.x = 0.2,
       ggside.panel.scale.y = 0.4)

alternative way: use patchwork

https://patchwork.data-imaginist.com/

library(patchwork)

p1<- df %>%
  filter(seurat_annotations %in% c("CD14+ Mono", "FCGR3A+ Mono")) %>%
  ggplot(aes(x= seurat_annotations, y = CD14)) +
  geom_boxplot(aes(color = seurat_annotations)) + 
  xlab("") +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    #legend.position = "none", legend.text = element_blank()
  )+
  coord_flip()

p2<- df %>%
  filter(seurat_annotations %in% c("CD14+ Mono", "FCGR3A+ Mono")) %>%
  ggplot(aes(x= CD14, y = FCGR3A)) +
  geom_point(aes(color = seurat_annotations)) +
  theme(legend.position = "none", legend.text = element_blank()) 

p3<- df %>%
  filter(seurat_annotations %in% c("CD14+ Mono", "FCGR3A+ Mono")) %>%
  ggplot(aes(x= seurat_annotations, y = FCGR3A)) +
  geom_boxplot(aes(color = seurat_annotations)) +
  theme(legend.position = "none") +
  ylab("") +
  xlab("") +
  theme(
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank()
  ) 

p1 + plot_spacer() + p2 + p3 +
   plot_layout(widths = c(4, 2), heights = c(1, 5),
               guides = 'collect')

I hope you enjoyed this post! More later. Happy Learning!

I made a video for this in my chatomics youtube channel, check it out!

Related

Next
Previous
comments powered by Disqus