neighborhood/cellular niches analysis with spatial transcriptome data in Seurat and Bioconductor

Aug 8, 2023 21 min read bioinformatics, scRNAseq, spatial

Spatial transcriptome cellular niche analysis using 10x xenium data

go to https://www.10xgenomics.com/resources/datasets

There is a lung cancer and a breast cancer dataset. Let’s work on the lung cancer one.

https://www.10xgenomics.com/resources/datasets/xenium-human-lung-preview-data-1-standard

37G zipped file!

wget https://s3-us-west-2.amazonaws.com/10x.files/samples/xenium/1.3.0/Xenium_Preview_Human_Lung_Cancer_With_Add_on_2_FFPE/Xenium_Preview_Human_Lung_Cancer_With_Add_on_2_FFPE_outs.zip

unzip Xenium_Preview_Human_Lung_Cancer_With_Add_on_2_FFPE_outs.zip 

sudo tar xvzf cell_feature_matrix.tar.gz

cell_feature_matrix/ cell_feature_matrix/barcodes.tsv.gz cell_feature_matrix/features.tsv.gz cell_feature_matrix/matrix.mtx.gz

read in the data with Seurat

We really only care about the cell by gene count matrix which is inside the cell_feature_matrix folder, and the cell location x,y coordinates: cells.csv.gz.

The data/xenium folder below contains the cells.csv.gz file and the cell_feature_matrix folder.

The transcripts.csv.gz contains the molecule location information which is not needed for most of the analysis if you do not work on subcelluar locations of the RNAs.

library(Seurat)
library(dplyr)
library(here)
library(tictoc)

xenium_input<- ReadXenium(
  data.dir = "~/github_repos/compbio_tutorials/data/xenium",
  outs = "matrix",
  type = "centroids",
  mols.qv.threshold = 20
)

construct a seurat object. https://github.com/satijalab/seurat/blob/763259d05991d40721dee99c9919ec6d4491d15e/R/convenience.R#L186

segmentations.data <- list(
    "centroids" = CreateCentroids(xenium_input$centroids)
    #"segmentation" = CreateSegmentation(data$segmentations)
  )

coords <- CreateFOV(
    coords = segmentations.data,
    type = "centroids",
    molecules = NULL,
    assay = "Xenium"
  )

xenium.obj <- CreateSeuratObject(counts = xenium_input$matrix[["Gene Expression"]], assay = "Xenium")
xenium.obj[["NegativeControlCodeword"]] <- CreateAssayObject(counts = xenium_input$matrix[["Negative Control Codeword"]])

xenium.obj[["NegativeControlProbe"]] <- CreateAssayObject(counts = xenium_input$matrix[["Negative Control Probe"]])

xenium.obj[["fov"]] <- coords

filter the cells

# remove cells with 0 counts
xenium.obj <- subset(xenium.obj, subset = nCount_Xenium > 0)

VlnPlot(xenium.obj, features = c("nFeature_Xenium", "nCount_Xenium"), ncol = 2, pt.size = 0)

median(xenium.obj$nCount_Xenium)

#> [1] 105

median(xenium.obj$nFeature_Xenium)

#> [1] 62

median number of detected genes is around 62, and the median counts per cell is 105.

ImageFeaturePlot(xenium.obj, features = c("EPCAM", "CD4", "CD8A", "CD79A"), max.cutoff = c(20,
    15, 6, 6), size = 0.75, cols = c("white", "red"))

EPCAM mark the epithelial/tumor cells, and we also see some CD4, CD8 T cells and B cells.

standard processing

This takes 30mins

tic()
xenium.obj <- SCTransform(xenium.obj, assay = "Xenium")

#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

xenium.obj <- RunPCA(xenium.obj, npcs = 30, features = rownames(xenium.obj))
xenium.obj <- RunUMAP(xenium.obj, dims = 1:30)
xenium.obj <- FindNeighbors(xenium.obj, reduction = "pca", dims = 1:30)
xenium.obj <- FindClusters(xenium.obj, resolution = 0.3)

#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#> 
#> Number of nodes: 530893
#> Number of edges: 17467378
#> 
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.9360
#> Number of communities: 21
#> Elapsed time: 824 seconds

toc()

#> 2232.137 sec elapsed

DimPlot(xenium.obj)

FeaturePlot(xenium.obj, features = c("CD4", "CD8A", "EPCAM", "NKG7", "GZMB",
                                     "FOXP3"))

ImageDimPlot(xenium.obj, cols = "polychrome", size = 0.75)

Find all markers of the clusters and annotate the clusters with marker genes

# devtools::install_github("immunogenomics/presto")
library(presto)

all_markers<- presto::wilcoxauc(xenium.obj, assay = "data",
                                seurat_assay = "Xenium")

all_markers %>% head()

#>   feature group     avgExpr        logFC   statistic       auc          pval
#> 1     ACE     0 0.221668593 -0.253886240 15537669449 0.4531079  0.000000e+00
#> 2    ACE2     0 0.005328125 -0.002081364 17111525722 0.4990046  9.218905e-10
#> 3   ACKR1     0 0.021724399 -0.062837265 16836952359 0.4909975 2.384530e-141
#> 4   ACTR3     0 0.965692723 -0.381113472 14948197656 0.4359178  0.000000e+00
#> 5  ADAM17     0 0.339432110  0.106478761 18344176529 0.5349510  0.000000e+00
#> 6  ADAM28     0 0.036061174 -0.027966375 16783860239 0.4894492 4.246117e-134
#>            padj    pct_in    pct_out
#> 1  0.000000e+00 16.297950 24.8933350
#> 2  1.047481e-09  0.516868  0.7159286
#> 3 4.744851e-141  1.856207  3.6336342
#> 4  0.000000e+00 53.824690 62.5498209
#> 5  0.000000e+00 25.355762 18.6602346
#> 6 8.239989e-134  3.178273  5.2801384

# presto is much faster than FindAllMarkers
presto::top_markers(all_markers, n = 10, auc_min = 0.5, pct_in_min = 20)

#> # A tibble: 10 × 17
#>     rank `0`   `1`   `10`  `11`  `12`  `13`  `14`  `15`  `2`   `3`   `4`   `5`  
#>    <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#>  1     1 LTBP2 MARCO CD24  FCGBP CHIT1 MMRN1 KIT   MS4A1 SFTPD VWF   CD2   EPCAM
#>  2     2 FBN1  CD68  AGR3  AIF1  CD68  VWF   IL1R… IRF8  MUC1  GNG11 IL7R  MET  
#>  3     3 SLIT3 VSIG4 CP    CD68  LGMN  ECSCR MS4A2 FCMR  SFTA3 TMEM… KLRB1 KRT7 
#>  4     4 THBS2 ALOX5 TMC5  S100B PLA2… LYVE1 SLC1… FDCSP DMBT1 CD34  CD3E  MUC1 
#>  5     5 COL5… MCEM… FOXJ1 FCGR… MS4A… ACKR1 HPGDS IL7R  ATP1… ECSCR GZMA  BAIA…
#>  6     6 SVEP1 FCGR… TACS… MMP12 MARCO SELP  PTGS1 TNFR… RNF1… ACE   CTLA4 TACS…
#>  7     7 SFRP2 CD163 ATP1… CD14  SLC1… GNG11 ALOX5 BANK1 TMPR… SHAN… CD3D  CDH1 
#>  8     8 CTSK  CTSL  EHF   CD4   CD163 PLVAP FCER… GPR1… C16o… APOL… STAT4 RBM3 
#>  9     9 COL8… MS4A… CCDC… IGSF6 CTSK  ADGR… P2RX1 CD2   MALL  ADGR… GPR1… TOMM7
#> 10    10 NID1  AIF1  BAIA… VSIG4 IGSF6 SHAN… AREG  RARR… AGR3  FCN3  CD247 HNRN…
#> # ℹ 4 more variables: `6` <chr>, `7` <chr>, `8` <chr>, `9` <chr>

Based on the marker genes, I manually annotate the cell types.

cluster 0: fibroblast.
cluster 1: M2-like macrophage.
cluster 10: ciliated epithelial cells.
cluster 11: M2-like macrophage.
cluster 12: M2-like macrophage.
cluster 13: endothelia-1.
cluster 14: mast cell.
cluster 15: B cells.
cluster 2: alveolar epithelial cells.
cluster 3: endothelial-2.
cluster 4: T/NK cells.
cluster 5: alveolar epithelial cells.
cluster 6: smooth muscle cells.
cluster 7: M2-like macrophage.
cluster 8: M1-like macrophage.
cluster 9: plasma cells.

Add the cell annotations

annotations<- c("fibroblast", "M2-like macrophage-1", "ciliated epithelial cells", "M2-like macrophage-2", "M2-like macrophage-3", "endothelial-1", "mast cell", "B cells-1", "alveolar epithelial cells-1", "endothelial-2", "T/NK cells", "alveolar epithelial cells-2", "smooth muscle cells", "M2-like macrophage-4", "M1-like macrophage", "plasma cells")

names(annotations)<- c(0,1,10,11,12,13,14,15,2,3,4,5,6,7,8,9)

xenium.obj$annotations<- annotations[as.character(xenium.obj$seurat_clusters)]

cluster 4 is the T/NK cells, I want to subcluster it to more fine-grained clusters

table(xenium.obj$seurat_clusters)

#> 
#>     0     1     2     3     4     5     6     7     8     9    10    11    12 
#> 75261 64079 63996 61209 55166 52187 31937 31335 17504 16194 15057 13049 12487 
#>    13    14    15 
#> 12412  5525  3495

xenium.obj<- FindSubCluster(xenium.obj, cluster = 4, graph.name = "SCT_snn",
                            resolution = 0.2)

#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#> 
#> Number of nodes: 55166
#> Number of edges: 1705627
#> 
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.8845
#> Number of communities: 12
#> Elapsed time: 19 seconds

table(xenium.obj$sub.cluster)

#> 
#>     0     1    10    11    12    13    14    15     2     3   4_0   4_1   4_2 
#> 75261 64079 15057 13049 12487 12412  5525  3495 63996 61209 24009 11719  5917 
#>   4_3   4_4   4_5   4_6     5     6     7     8     9 
#>  5624  5068  1862   967 52187 31937 31335 17504 16194

Idents(xenium.obj)<- xenium.obj$sub.cluster
scCustomize::Clustered_DotPlot(xenium.obj[, xenium.obj$sub.cluster %>%
                                            stringr::str_detect("^4_")], 
                               features = c("CD4", "CD8A", "NKG7", "GZMB", 
                               "PDCD1", "FOXP3"), plot_km_elbow = FALSE)

all_markers2<- presto::wilcoxauc(xenium.obj[,xenium.obj$sub.cluster %>%
                                            stringr::str_detect("^4_")],
                                 assay = "data", seurat_assay = "Xenium")

presto::top_markers(all_markers2, n = 20, auc_min = 0.5, pct_in_min = 20)

#> # A tibble: 20 × 8
#>     rank `4_0`    `4_1`  `4_2`   `4_3`  `4_4`   `4_5`    `4_6`  
#>    <int> <chr>    <chr>  <chr>   <chr>  <chr>   <chr>    <chr>  
#>  1     1 IL7R     KLRD1  SFRP2   CXCL13 CXCL9   PRG4     CHIT1  
#>  2     2 CD3E     KLRB1  THBS2   CTLA4  CCR7    SLIT3    CD68   
#>  3     3 CD2      GZMA   CTSK    CXCL9  IDO1    LTBP2    LGMN   
#>  4     4 CTLA4    FGFBP2 LTBP2   CD2    UBD     SVEP1    CTSK   
#>  5     5 CD3D     GZMB   THY1    CD8A   FSCN1   FBN1     IGSF6  
#>  6     6 CD28     FCGR3A COL5A2  UBD    IL7R    SERPINA3 MS4A4A 
#>  7     7 GPR171   CD247  RARRES1 GPI    CD83    TGM2     PLA2G7 
#>  8     8 GPR183   KRT7   FBN1    CHI3L1 CXCL10  APOD     MARCO  
#>  9     9 CD4      MUC1   COL8A1  MUC1   IRF8    NID1     CTSA   
#> 10    10 TC2N     ADGRE5 ALDH1A3 KRT7   NFKB1   ENAH     AIF1   
#> 11    11 LCK      AREG   IGF1    AIF1   GRINA   IL7R     RARRES1
#> 12    12 FCMR     SFTPD  PLA2G2A CD3E   GPR183  EPHX1    CD4    
#> 13    13 CXCR6    EPCAM  PDPN    PSMA1  S100B   PDPN     SLC1A3 
#> 14    14 ANKRD36C SFTA3  APOD    EPCAM  CD86    HIF1A    LAMP2  
#> 15    15 GPSM3    ATP1B1 SLIT3   CD68   IRF1    RARRES1  FCGR1A 
#> 16    16 STAT4    GNG11  NID1    CD27   CMTM6   CD4      CTSL   
#> 17    17 TMEM123  DMBT1  TGM2    CTSL   ACTR3   CTSK     FCGR3A 
#> 18    18 SRSF7    VWF    ENAH    SRSF2  AIF1    COL8A1   HEXA   
#> 19    19 CD163    RUNX3  EIF3E   CD3D   ALOX5   PCBP2    FCMR   
#> 20    20 CD8A     EPHX1  SVEP1   HNRNPC TMEM123 LGALS3BP ACE

more fine-grained annotation

4_0: CD4 Tcells.
4_1: NK cells.
4_2: fibroblast.
4_3: CD8 Tcells.
4_5: PRG4 is a fibroblast marker we also see CD4 in this cluster.
4_6: macrophage.

Interesting, subcluster the NK/T cells revealed fibroblasts and macrophage (4_6) populations. My experience with spatial data from 10x, nanostring and Vizgen are similar, they all have spill out transcripts of cells that maybe contaminating the nearby cells. Also, there is no clear cut of CD4 T cells and CD8 T cells.

neighorhoods analysis in Seurat

Cellular neighborhood/niche is aggregation of cells can result in ‘cellular neighbourhoods’. A neighbourhood is defined as a group of cells that with a similar composition together. These can be homotypic, containing cells of a single class (e.g. immune cells), or heterotypic (e.g. a mixture of tumour and immune cells).

In one of my previous post, I showed you how to identify cellular neighborhood/niches within seurat.

Let’s repeat it.

xenium_input$centroids %>% head()

#>          x        y       cell
#> 1 366.5239 849.1828 aaaaaacg-1
#> 2 364.5222 863.8438 aaaaafen-1
#> 3 381.3140 863.7556 aaaaanfe-1
#> 4 388.0256 864.1576 aaaabekh-1
#> 5 394.2588 869.2458 aaaacacf-1
#> 6 390.1672 846.2146 aaaakgka-1

mat<- xenium_input$centroids[,1:2] 
mat<- as.matrix(mat)
rownames(mat)<- xenium_input$centroids$cell

Reorder the cells in the coordinates matrix as the same order as in the Seurat object.

Cells(xenium.obj) %>% head()

#> [1] "aaaaaacg-1" "aaaaafen-1" "aaaaanfe-1" "aaaabekh-1" "aaaacacf-1"
#> [6] "aaaakgka-1"

## make sure the order of the cells is the same 
all.equal(Cells(xenium.obj), rownames(mat))

#> [1] "Lengths (530893, 531165) differ (string compare on first 530893)"
#> [2] "519130 string mismatches"

head(mat)

#>                   x        y
#> aaaaaacg-1 366.5239 849.1828
#> aaaaafen-1 364.5222 863.8438
#> aaaaanfe-1 381.3140 863.7556
#> aaaabekh-1 388.0256 864.1576
#> aaaacacf-1 394.2588 869.2458
#> aaaakgka-1 390.1672 846.2146

# reorder the matrix rows
mat<- mat[Cells(xenium.obj), ]
head(mat)

#>                   x        y
#> aaaaaacg-1 366.5239 849.1828
#> aaaaafen-1 364.5222 863.8438
#> aaaaanfe-1 381.3140 863.7556
#> aaaabekh-1 388.0256 864.1576
#> aaaacacf-1 394.2588 869.2458
#> aaaakgka-1 390.1672 846.2146

all.equal(Cells(xenium.obj), rownames(mat))

#> [1] TRUE

Find the closest cells for each cell within 50 um radius.

library(dbscan)
eps <- 50
tic()
nn <- frNN(x= mat, eps = eps)
toc()

#> 21.538 sec elapsed

nn$id %>%
  head(n=3)

#> $`aaaaaacg-1`
#>  [1]     2     3     6 82996     4 82999 40480 82992  6704 82997   120     5
#> [13]     7     8 83001 82998 82991 40489     9 83009 82995 83004 83006 83007
#> 
#> $`aaaaafen-1`
#>  [1] 82996     1 82992     3 82997 82999     4  6704 82998 82991 83001   120
#> [13]     5     6 82995     7     8 83000 83006 40480 83004 83009 84185 83007
#> [25] 83002 84187  6707     9 83010
#> 
#> $`aaaaanfe-1`
#>  [1]     4  6704 82999     5     7     2 82996 83001 82997     6     1     8
#> [13] 83009 82998 83004 82992 83007 83006 40480     9 83010 83000 82995 83014
#> [25] 83002 82991 83008 83015 40489 83005    10    11   120 40481    12

It is a list of list containing the indices of the cells that are within 50um radius.

Create the neighborhood count matrix.

# the same cell order
all.equal(colnames(xenium.obj), names(nn$id))

#> [1] TRUE

nn$id %>%
  stack() %>%
  head()

#>   values        ind
#> 1      2 aaaaaacg-1
#> 2      3 aaaaaacg-1
#> 3      6 aaaaaacg-1
#> 4  82996 aaaaaacg-1
#> 5      4 aaaaaacg-1
#> 6  82999 aaaaaacg-1

## make it to a dataframe 
nn_df<- nn$id %>%
  stack()

get the annotations for cells that are within the 50 um of each cell.

cluster_ids<- xenium.obj$annotations %>% unname()

## vectorization, so much faster than the solution in my old post
nn_df$cluster_id<- cluster_ids[nn_df$values]

head(nn_df)

#>   values        ind                  cluster_id
#> 1      2 aaaaaacg-1        M2-like macrophage-2
#> 2      3 aaaaaacg-1 alveolar epithelial cells-1
#> 3      6 aaaaaacg-1                   mast cell
#> 4  82996 aaaaaacg-1                   B cells-1
#> 5      4 aaaaaacg-1                   mast cell
#> 6  82999 aaaaaacg-1                   B cells-1

nn_df$cluster_id<- factor(nn_df$cluster_id)
tic()
nn_count<- nn_df %>%
  group_by(ind) %>%
  count(cluster_id, .drop = FALSE)
toc()

#> 64.036 sec elapsed

head(nn_count)

#> # A tibble: 6 × 3
#> # Groups:   ind [1]
#>   ind        cluster_id                      n
#>   <fct>      <fct>                       <int>
#> 1 aaaaaacg-1 alveolar epithelial cells-1     1
#> 2 aaaaaacg-1 alveolar epithelial cells-2     0
#> 3 aaaaaacg-1 B cells-1                       5
#> 4 aaaaaacg-1 ciliated epithelial cells       0
#> 5 aaaaaacg-1 endothelial-1                   0
#> 6 aaaaaacg-1 endothelial-2                   0

pivot it to a wide format and create a cell x cluster_id matrix

nn_count<- nn_count %>%
  tidyr::pivot_wider(names_from = cluster_id, values_from = n)

head(nn_count)

#> # A tibble: 6 × 17
#> # Groups:   ind [6]
#>   ind        `alveolar epithelial cells-1` alveolar epithelial cel…¹ `B cells-1`
#>   <fct>                              <int>                     <int>       <int>
#> 1 aaaaaacg-1                             1                         0           5
#> 2 aaaaafen-1                             1                         0           4
#> 3 aaaaanfe-1                             0                         2           5
#> 4 aaaabekh-1                             1                         3           5
#> 5 aaaacacf-1                             1                         3           5
#> 6 aaaakgka-1                             1                         3           5
#> # ℹ abbreviated name: ¹`alveolar epithelial cells-2`
#> # ℹ 13 more variables: `ciliated epithelial cells` <int>,
#> #   `endothelial-1` <int>, `endothelial-2` <int>, fibroblast <int>,
#> #   `M1-like macrophage` <int>, `M2-like macrophage-1` <int>,
#> #   `M2-like macrophage-2` <int>, `M2-like macrophage-3` <int>,
#> #   `M2-like macrophage-4` <int>, `mast cell` <int>, `plasma cells` <int>,
#> #   `smooth muscle cells` <int>, `T/NK cells` <int>

# convert to a matrix
nn_mat<- nn_count[,-1] %>% as.matrix()
rownames(nn_mat)<- nn_count$ind

head(nn_mat)

#>            alveolar epithelial cells-1 alveolar epithelial cells-2 B cells-1
#> aaaaaacg-1                           1                           0         5
#> aaaaafen-1                           1                           0         4
#> aaaaanfe-1                           0                           2         5
#> aaaabekh-1                           1                           3         5
#> aaaacacf-1                           1                           3         5
#> aaaakgka-1                           1                           3         5
#>            ciliated epithelial cells endothelial-1 endothelial-2 fibroblast
#> aaaaaacg-1                         0             0             0          0
#> aaaaafen-1                         0             0             0          0
#> aaaaanfe-1                         0             0             0          0
#> aaaabekh-1                         0             0             0          0
#> aaaacacf-1                         0             0             0          0
#> aaaakgka-1                         1             0             0          0
#>            M1-like macrophage M2-like macrophage-1 M2-like macrophage-2
#> aaaaaacg-1                  0                    0                    3
#> aaaaafen-1                  0                    0                    2
#> aaaaanfe-1                  0                    0                    3
#> aaaabekh-1                  0                    0                    2
#> aaaacacf-1                  0                    0                    3
#> aaaakgka-1                  0                    0                    4
#>            M2-like macrophage-3 M2-like macrophage-4 mast cell plasma cells
#> aaaaaacg-1                    1                    0        14            0
#> aaaaafen-1                    1                    0        20            1
#> aaaaanfe-1                    1                    0        23            1
#> aaaabekh-1                    1                    0        24            1
#> aaaacacf-1                    1                    0        25            1
#> aaaakgka-1                    1                    0        17            1
#>            smooth muscle cells T/NK cells
#> aaaaaacg-1                   0          0
#> aaaaafen-1                   0          0
#> aaaaanfe-1                   0          0
#> aaaabekh-1                   0          0
#> aaaacacf-1                   0          0
#> aaaakgka-1                   0          0

all.equal(rownames(nn_mat), Cells(xenium.obj))

#> [1] TRUE

k-means clustering

set.seed(123)

# I cheated here to make k=13 so I can compare it with the Louvian clustering later
k_means_res<- kmeans(nn_mat, centers = 13)

k_means_id<- k_means_res$cluster %>%
  tibble::enframe(name = "cell_id", value = "kmeans_cluster")

head(k_means_id)

#> # A tibble: 6 × 2
#>   cell_id    kmeans_cluster
#>   <chr>               <int>
#> 1 aaaaaacg-1             11
#> 2 aaaaafen-1             11
#> 3 aaaaanfe-1             11
#> 4 aaaabekh-1             11
#> 5 aaaacacf-1             11
#> 6 aaaakgka-1             11

# add it to the metadata solot for nn_obj below
k_means_df<- as.data.frame(k_means_id)
rownames(k_means_df)<- k_means_id$cell_id

Louvian clustering just like scRANseq data

Create a Seurat object and do a regular single-cell count matrix analysis, but now we only have 16 features (clusters) instead of 20,000 genes.

nn_obj<- CreateSeuratObject(counts = t(nn_mat),  min.features = 5)

routine processing

nn_obj<- SCTransform(nn_obj, vst.flavor = "v2")

#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

## we only have 16 clusters/celltypes, max pc 16
nn_obj <- RunPCA(nn_obj, npcs = 15, features = rownames(nn_obj))
ElbowPlot(nn_obj)

nn_obj <- FindNeighbors(nn_obj, reduction = "pca", dims = 1:15)
nn_obj <- FindClusters(nn_obj, resolution = 0.3)

#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#> 
#> Number of nodes: 483243
#> Number of edges: 12786808
#> 
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.9186
#> Number of communities: 13
#> Elapsed time: 286 seconds

nn_obj <- RunUMAP(nn_obj, dims = 1:10)


DimPlot(nn_obj)

Suerat V5 has a function to detect neighborhood

https://satijalab.org/seurat/articles/seurat5_spatial_vignette_2

I do not have Seurat V5 installed, so the code below is not run.

xenium.obj <- BuildNicheAssay(object = xenium.obj, group.by = "annotations",
    niches.k = 5, neighbors.k = 30)

compare k-means and the Louvian clustering

nn_obj<- AddMetaData(nn_obj, metadata = k_means_df )

nn_obj@meta.data %>%
  head()

#>               orig.ident nCount_RNA nFeature_RNA nCount_SCT nFeature_SCT
#> aaaaaacg-1 SeuratProject         24            5         44            5
#> aaaaafen-1 SeuratProject         29            6         45            6
#> aaaaanfe-1 SeuratProject         35            6         44            6
#> aaaabekh-1 SeuratProject         37            7         44            7
#> aaaacacf-1 SeuratProject         39            7         44            7
#> aaaakgka-1 SeuratProject         33            8         43            8
#>            SCT_snn_res.0.3 seurat_clusters    cell_id kmeans_cluster
#> aaaaaacg-1               3               3 aaaaaacg-1             11
#> aaaaafen-1               3               3 aaaaafen-1             11
#> aaaaanfe-1               3               3 aaaaanfe-1             11
#> aaaabekh-1               3               3 aaaabekh-1             11
#> aaaacacf-1               3               3 aaaacacf-1             11
#> aaaakgka-1               3               3 aaaakgka-1             11

table(nn_obj$kmeans_cluster, nn_obj$seurat_clusters)

#>     
#>          0     1     2     3     4     5     6     7     8     9    10    11
#>   1      0     0     0     0     0     0     0     0 10316     0     0     0
#>   2  14180  4543  1409  4753 11705  2406  3303  6691  5165    13   475   395
#>   3  62713   568   884  4075  5189   685  6473  4867   607   128   102     0
#>   4   8065  1942  2310 18508  5356 12663 22928 10179  8409  1015  1219   107
#>   5     23  3466     0  1097  1528 23703  2949   711  1570  5675  1102   541
#>   6      0   494 13303     8   752     0    33   139   188     0    20     0
#>   7      0 20142     0     0     0     0     0     0   471    10     3     0
#>   8      0   405     0     3     1   101     1     0   203  9862    38     0
#>   9   1578  1986 39849  1011  5615   212  2465  8996  1369     7   331     2
#>   10     0  6485     0     0     0     0     0     0    68     0     0     0
#>   11     0   362     1 16396    49  1479     8     0  1189    67    50     9
#>   12     0 25085     0    15    11    42     0    17   874   246  1178     8
#>   13     0  1189     0   344 12245  1038    23   567   426   543 12502  2816
#>     
#>         12
#>   1      0
#>   2     43
#>   3      0
#>   4     96
#>   5      0
#>   6      0
#>   7    307
#>   8      8
#>   9     56
#>   10    10
#>   11     0
#>   12  1022
#>   13    60

Some help functions from scclusteval to calculate the jaccard distances for two difference clustering methods:

#devtools::install_github("crazyhottommy/scclusteval")

library(scclusteval)
PairWiseJaccardSetsHeatmap(Idents(nn_obj), k_means_res$cluster, best_match = TRUE)

x-axis is the k-means clustering: cluster1-13. y-axis is the Louvian clustering: cluster0-12.

There are some 1-1 cluster mapping for the two different clustering methods.

Add the niche information to the original seurat object.

nn_meta<- nn_obj@meta.data %>%
  select(cell_id, SCT_snn_res.0.3, kmeans_cluster) 

xenium.obj<- AddMetaData(xenium.obj, nn_meta)

characterize the neighborhoods/niches by looking at the composition of cell types

library(ComplexHeatmap)

# calculate the average abundance of cell types per niche
average_cell_type_abundance<- AverageExpression(
  nn_obj,
  assays = NULL,
  features = rownames(nn_obj),
  return.seurat = FALSE,
  group.by = "SCT_snn_res.0.3",
)

cell_fun = function(j, i, x, y, width, height, fill) {
                grid::grid.rect(x = x, y = y, width = width *0.99, 
                                height = height *0.99,
                                gp = grid::gpar(col = "grey", 
                                                fill = fill, lty = 1, lwd = 0.5))
}

col_fun=circlize::colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))


Heatmap(t(scale(t(average_cell_type_abundance$SCT))),
        show_row_dend = FALSE,
        show_column_dend = FALSE,
        rect_gp = grid::gpar(type = "none"),
        cell_fun = cell_fun,
        col = col_fun,
        column_names_rot = 45)

T/NK cells are mainly in niche 8 and 12.

scCustomize::Clustered_DotPlot(nn_obj, features= rownames(nn_obj), plot_km_elbow = FALSE)

scCustomize::Stacked_VlnPlot(nn_obj, features= rownames(nn_obj))

Niche 1 contains most of the endothelia-1 cells and looks likely there is a blood vessel there.

p1<- ImageDimPlot(xenium.obj, fov = "fov", cols = "polychrome", axes = TRUE, 
                  group.by = "annotations")


## the cells are colored by the clustering of the cells by neighborhood 
p2<- ImageDimPlot(xenium.obj, fov = "fov", cols = "polychrome", axes = TRUE, 
                  group.by = "SCT_snn_res.0.3" )
        
p1 + p2

or table it using the tally counts

mat2<- table(xenium.obj$SCT_snn_res.0.3, xenium.obj$annotations) 

head(mat2)

#>    
#>     alveolar epithelial cells-1 alveolar epithelial cells-2 B cells-1
#>   0                         474                         352      7004
#>   1                        2992                        2431      1870
#>   2                         447                         478      2684
#>   3                         638                         597      3232
#>   4                        3541                         456      3819
#>   5                         445                         424      2632
#>    
#>     ciliated epithelial cells endothelial-1 endothelial-2 fibroblast
#>   0                     22020            87           309      13367
#>   1                      4829         33208           700       7492
#>   2                      8012            71           140       4601
#>   3                      4798            47           903       7517
#>   4                      5621           135           647       4325
#>   5                      2184            66          8190      12937
#>    
#>     M1-like macrophage M2-like macrophage-1 M2-like macrophage-2
#>   0                817                 7618                25948
#>   1                542                 3383                 2315
#>   2                369                29140                 6498
#>   3                741                  987                 5026
#>   4                463                 4054                 4424
#>   5                775                  498                 2809
#>    
#>     M2-like macrophage-3 M2-like macrophage-4 mast cell plasma cells
#>   0                 6919                  392       487          220
#>   1                 4802                  223       759          217
#>   2                 3401                  122       526          197
#>   3                 4014                 1410     15572          163
#>   4                12405                  761       577          600
#>   5                 5486                 1372      3180          265
#>    
#>     smooth muscle cells T/NK cells
#>   0                 422        123
#>   1                 637        267
#>   2                 967        103
#>   3                 464        101
#>   4                 512        111
#>   5                 956        110

Heatmap(t(scale(mat2)),
        show_row_dend = FALSE,
        show_column_dend = FALSE,
        rect_gp = grid::gpar(type = "none"),
        cell_fun = cell_fun,
        col = col_fun,
        column_names_rot = 45)

mat3<- table(xenium.obj$kmeans_cluster, xenium.obj$annotations)
Heatmap(t(scale(mat3)),
        show_row_dend = FALSE,
        show_column_dend = FALSE,
        rect_gp = grid::gpar(type = "none"),
        cell_fun = cell_fun)

Tally the counts may obscure the fine structures of the niches. For example, it only shows T/NK cells in niche 8 but not so clear in niche 12.

neigbhorhood analysis in SpatialExperiment

The SPIAT bioconductor package provides functions to do it and more functions to analyze the spatial data. But it only works with SpatialExperiment object. Paper describing it.

Convert the seurat object to SpatialExperiment object.

https://github.com/drighelli/SpatialExperiment/issues/115

library(SpatialExperiment)
library(Seurat)
# BiocManager::install("SpatialExperiment")
library(dplyr)
# BiocManager::install("SPIAT")
library(SPIAT)


## Function
seurat_to_spe <- function(seu, sample_id, img_id) {
    ## Convert to SCE
    sce <- Seurat::as.SingleCellExperiment(seu)
    
    ## Extract spatial coordinates
    spatialCoords <- as.matrix(
        seu@images[[img_id]]@boundaries$centroids@coords[, c("x", "y")])
    
    # the column names are specific for SPAIT
    colnames(spatialCoords)<- c("Cell.X.Position", "Cell.Y.Position")
    
    ## Extract and process image data
    #img <- SpatialExperiment::SpatialImage(
    #    x = as.raster(seu@images[[img_id]]@image))
    
    #imgData <- DataFrame(
    #    sample_id = sample_id,
    #    image_id = img_id,
    #    data = I(list(img)),
    #    scaleFactor = seu@images[[img_id]]@scale.factors$lowres)
    
    # Convert to SpatialExperiment
    spe <- SpatialExperiment(
        assays = assays(sce),
        rowData = rowData(sce),
        colData = colData(sce),
        metadata = metadata(sce),
        reducedDims = reducedDims(sce),
        altExps = altExps(sce),
        sample_id = sample_id,
        spatialCoords = spatialCoords,
        #imgData = imgData
    )
    # indicate all spots are on the tissue
    spe$in_tissue <- 1
    spe$sample_id <- sample_id
    # Return Spatial Experiment object
    spe
}


spe<- seurat_to_spe(seu = xenium.obj, sample_id = "1", img_id = 1)

inspect the data

# what's the gene and cell number
dim(spe)

#> [1]    392 530893

assay(spe)[1:5, 1:5]

#> 5 x 5 sparse Matrix of class "dgCMatrix"
#>        aaaaaacg-1 aaaaafen-1 aaaaanfe-1 aaaabekh-1 aaaacacf-1
#> ACE             .          .          .          .          .
#> ACE2            .          .          .          .          .
#> ACKR1           .          .          .          .          .
#> ACTR3           1          3          .          .          .
#> ADAM17          .          .          .          .          .

colData(spe)

#> DataFrame with 530893 rows and 18 columns
#>               orig.ident nCount_Xenium nFeature_Xenium
#>                 <factor>     <numeric>       <integer>
#> aaaaaacg-1 SeuratProject           147              80
#> aaaaafen-1 SeuratProject           104              60
#> aaaaanfe-1 SeuratProject            78              56
#> aaaabekh-1 SeuratProject            51              36
#> aaaacacf-1 SeuratProject            46              36
#> ...                  ...           ...             ...
#> oilgiagl-1 SeuratProject           174              82
#> oilgidkk-1 SeuratProject           178              93
#> oilgkofb-1 SeuratProject           109              53
#> oilglapp-1 SeuratProject           183              87
#> oilhbjia-1 SeuratProject            50              39
#>            nCount_NegativeControlCodeword nFeature_NegativeControlCodeword
#>                                 <numeric>                        <integer>
#> aaaaaacg-1                              0                                0
#> aaaaafen-1                              0                                0
#> aaaaanfe-1                              0                                0
#> aaaabekh-1                              0                                0
#> aaaacacf-1                              0                                0
#> ...                                   ...                              ...
#> oilgiagl-1                              0                                0
#> oilgidkk-1                              0                                0
#> oilgkofb-1                              0                                0
#> oilglapp-1                              0                                0
#> oilhbjia-1                              0                                0
#>            nCount_NegativeControlProbe nFeature_NegativeControlProbe nCount_SCT
#>                              <numeric>                     <integer>  <numeric>
#> aaaaaacg-1                           0                             0        125
#> aaaaafen-1                           0                             0        104
#> aaaaanfe-1                           0                             0         86
#> aaaabekh-1                           0                             0         89
#> aaaacacf-1                           0                             0         89
#> ...                                ...                           ...        ...
#> oilgiagl-1                           0                             0        121
#> oilgidkk-1                           0                             0        127
#> oilgkofb-1                           0                             0        109
#> oilglapp-1                           0                             0        123
#> oilhbjia-1                           0                             0         89
#>            nFeature_SCT SCT_snn_res.0.3 seurat_clusters            annotations
#>               <integer>        <factor>        <factor>            <character>
#> aaaaaacg-1           80               3              15           plasma cells
#> aaaaafen-1           60               3              3    M2-like macrophage-2
#> aaaaanfe-1           56               3              8  alveolar epithelial ..
#> aaaabekh-1           36               3              6               mast cell
#> aaaacacf-1           37               3              6               mast cell
#> ...                 ...             ...             ...                    ...
#> oilgiagl-1           77               0               2 ciliated epithelial ..
#> oilgidkk-1           90               0               2 ciliated epithelial ..
#> oilgkofb-1           53               2               1   M2-like macrophage-1
#> oilglapp-1           83               2               1   M2-like macrophage-1
#> oilhbjia-1           39               2               1   M2-like macrophage-1
#>            sub.cluster     cell_id kmeans_cluster    ident   sample_id
#>            <character> <character>      <integer> <factor> <character>
#> aaaaaacg-1          15  aaaaaacg-1             11       15           1
#> aaaaafen-1           3  aaaaafen-1             11       3            1
#> aaaaanfe-1           8  aaaaanfe-1             11       8            1
#> aaaabekh-1           6  aaaabekh-1             11       6            1
#> aaaacacf-1           6  aaaacacf-1             11       6            1
#> ...                ...         ...            ...      ...         ...
#> oilgiagl-1           2  oilgiagl-1              3        2           1
#> oilgidkk-1           2  oilgidkk-1              3        2           1
#> oilgkofb-1           1  oilgkofb-1              4        1           1
#> oilglapp-1           1  oilglapp-1              4        1           1
#> oilhbjia-1           1  oilhbjia-1              4        1           1
#>            in_tissue
#>            <numeric>
#> aaaaaacg-1         1
#> aaaaafen-1         1
#> aaaaanfe-1         1
#> aaaabekh-1         1
#> aaaacacf-1         1
#> ...              ...
#> oilgiagl-1         1
#> oilgidkk-1         1
#> oilgkofb-1         1
#> oilglapp-1         1
#> oilhbjia-1         1

spe$seurat_clusters %>% head()

#> [1] 15 3  8  6  6  6 
#> Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15

spatialCoords(spe)[1:5, ]

#>      Cell.X.Position Cell.Y.Position
#> [1,]        366.5239        849.1828
#> [2,]        364.5222        863.8438
#> [3,]        381.3140        863.7556
#> [4,]        388.0256        864.1576
#> [5,]        394.2588        869.2458

average_minimum_distance(spe)

#> [1] 10.12947

tic()
spe <- identify_neighborhoods(
  spe, method = "dbscan", min_neighborhood_size = 100,
  cell_types_of_interest = annotations, radius = 50, 
  feature_colname = "annotations")

toc()

#> 18.81 sec elapsed

It adds a new column of the niche ID in the colData:

colData(spe) %>% head(n = 3)

#> DataFrame with 3 rows and 20 columns
#>       Cell.ID    orig.ident nCount_Xenium nFeature_Xenium
#>   <character>      <factor>     <numeric>       <integer>
#> 1  aaaaaacg-1 SeuratProject           147              80
#> 2  aaaaafen-1 SeuratProject           104              60
#> 3  aaaaanfe-1 SeuratProject            78              56
#>   nCount_NegativeControlCodeword nFeature_NegativeControlCodeword
#>                        <numeric>                        <integer>
#> 1                              0                                0
#> 2                              0                                0
#> 3                              0                                0
#>   nCount_NegativeControlProbe nFeature_NegativeControlProbe nCount_SCT
#>                     <numeric>                     <integer>  <numeric>
#> 1                           0                             0        125
#> 2                           0                             0        104
#> 3                           0                             0         86
#>   nFeature_SCT SCT_snn_res.0.3 seurat_clusters            annotations
#>      <integer>        <factor>        <factor>            <character>
#> 1           80               3              15           plasma cells
#> 2           60               3              3    M2-like macrophage-2
#> 3           56               3              8  alveolar epithelial ..
#>   sub.cluster     cell_id kmeans_cluster    ident   sample_id in_tissue
#>   <character> <character>      <integer> <factor> <character> <numeric>
#> 1          15  aaaaaacg-1             11       15           1         1
#> 2           3  aaaaafen-1             11       3            1         1
#> 3           8  aaaaanfe-1             11       8            1         1
#>   Neighborhood
#>    <character>
#> 1    Cluster_1
#> 2    Cluster_1
#> 3    Cluster_1

plot the cell type composition:

neighorhoods_vis <- 
  composition_of_neighborhoods(spe, feature_colname = "annotations")

neighorhoods_vis <- 
  neighorhoods_vis[neighorhoods_vis$Total_number_of_cells >=5,]

plot_composition_heatmap(neighorhoods_vis,
                         feature_colname="annotations")

There is no parameters exposed to fine-tune the number of neighborhoods with dbscan in the identify_neighborhoods function.

There are many other functions you can use from the SPAIT package. Make sure you go through the tutorial.

That’s it for today! Happy Learning.

bioinformatics scRNAseq spatial