How to make a triangle correlation heatmap with p-values labeled

In this blog post, I am going to show you how to make a correlation heatmap with p-values and significant values labeled in the heatmap body. Let’s use the PBMC single cell data as an example.

You may want to read my previous blog post How to do gene correlation for single-cell RNAseq data.

Load libraries

library(dplyr)
library(Seurat)
library(patchwork)
library(ggplot2)
library(ComplexHeatmap)
library(SeuratData)
library(hdWGCNA)
library(WGCNA)
set.seed(1234)

prepare the data

data("pbmc3k")

pbmc3k
#> An object of class Seurat 
#> 13714 features across 2700 samples within 1 assay 
#> Active assay: RNA (13714 features, 0 variable features)
## routine processing
pbmc3k<- pbmc3k %>% 
  NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
  FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
  ScaleData() %>%
  RunPCA(verbose = FALSE) %>%
  FindNeighbors(dims = 1:10, verbose = FALSE) %>%
  FindClusters(resolution = 0.5, verbose = FALSE) %>%
  RunUMAP(dims = 1:10, verbose = FALSE)

Idents(pbmc3k)<- pbmc3k$seurat_annotations

pbmc3k<- pbmc3k[, !is.na(pbmc3k$seurat_annotations)]

construct metacell

pbmc3k <- SetupForWGCNA(
  pbmc3k,
  gene_select = "fraction", # the gene selection approach
  fraction = 0.05, # fraction of cells that a gene needs to be expressed in order to be included
  wgcna_name = "tutorial" # the name of the hdWGCNA experiment
)


# construct metacells in each group.
# choosing K is an art. 
pbmc3k <- MetacellsByGroups(
  seurat_obj = pbmc3k,
  group.by = c("seurat_annotations", "orig.ident"), # specify the columns in seurat_obj@meta.data to group by
  k = 10, # nearest-neighbors parameter
  max_shared = 5, # maximum number of shared cells between two metacells
  ident.group = 'seurat_annotations' # set the Idents of the metacell seurat object
)

# extract the metacell seurat object 
pbmc_metacell <- GetMetacellObject(pbmc3k)

pbmc_metacell@meta.data %>% head()
#>            orig.ident nCount_RNA nFeature_RNA
#> B#pbmc3k_1     pbmc3k     1794.8         3237
#> B#pbmc3k_2     pbmc3k     4170.4         4962
#> B#pbmc3k_3     pbmc3k     1765.7         3233
#> B#pbmc3k_4     pbmc3k     1731.3         3060
#> B#pbmc3k_5     pbmc3k     1982.2         3468
#> B#pbmc3k_6     pbmc3k     3084.5         4351
#>                                                                                                                                                     cells_merged
#> B#pbmc3k_1 CGCCATTGGAGCAG,TGCCGACTCTCCCA,GGTACATGAAAGCA,GATTGGACGGTGTT,ATCCCGTGGCTGAT,CCAGCTACCAGCTA,TTGGAGACTATGGC,GAAGTCACCCTGTC,CGAGGGCTACGACT,TTATCCGACTAGTG
#> B#pbmc3k_2 AAACATTGAGCTAC,TATGTCACGGAACG,AAACTTGAAAAACG,GGAGCCACCTTCTA,CATTTGTGGGATCT,GCTAGAACAGAGGC,CAGGTTGAGGATCT,ACTGAGACGTTGGT,ACGAAGCTCTGAGT,CGCGAGACAGGTCT
#> B#pbmc3k_3 CTACTATGTAAAGG,CCAGGTCTACACCA,TTGGTACTGAATCC,ATCGCGCTTTTCGT,GCACTAGAAGATGA,ATGACGTGACGACT,GGGCCAACGCGTTA,CGCCATTGGAGCAG,GAAAGCCTACGTTG,CATCATACGGAGCA
#> B#pbmc3k_4 GAGAAATGTTCTCA,ACCCAAGATTCACT,GGGCCAACGCGTTA,GAGTGGGAGTCTTT,TATTGCTGCCGTTC,GGCAATACGCTAAC,CGAGGGCTACGACT,CAGTGATGTACGCA,TCGATTTGTCGTGA,TGATCACTTCTACT
#> B#pbmc3k_5 CAGTTACTAAGGTA,AAGTGGCTTGGAGG,TGATTCACTGTCAG,TGCGAAACAGTCAC,TATACCACCTGATG,TTATTCCTGGTACT,TTGACACTGATAAG,CGAGGGCTACGACT,TACTCAACTGCTAG,TAGCTACTGAATAG
#> B#pbmc3k_6 TTCCAAACCTCCCA,CGCGATCTTTCTTG,AAATCAACAATGCC,ATCCCGTGGCTGAT,ACGAAGCTCTGAGT,TTTAGAGATCCTCG,AAACATTGAGCTAC,ACGACCCTTGACAC,GTAGCAACCATTTC,GAATTAACTGAAGA
#>            seurat_annotations
#> B#pbmc3k_1                  B
#> B#pbmc3k_2                  B
#> B#pbmc3k_3                  B
#> B#pbmc3k_4                  B
#> B#pbmc3k_5                  B
#> B#pbmc3k_6                  B

Hmisc::rcorr function gives the pair-wise correlation coefficient and p-value

genes<- c("GNLY", "NKG7", "KLRD1", "ITGB2", "PRF1", "IFNG", "NCAM1", "FCGR3A", "CCR7", "CXCR3", "IL2RB")
cells<- WhichCells(pbmc_metacell, expression = seurat_annotations == "NK")

mat<- pbmc_metacell@assays$RNA@data[genes, cells] %>% 
  as.matrix() %>%
  t()

# Hmisc package gives the pair-wise correlation coefficient and p-value
cor_res<- Hmisc::rcorr(mat) 
cor_mat<- cor_res$r

# sometimes, you may have NA in the matrix, and clustering does not play well with it
# a simple hack is to turn the NA to 0
cor_mat[is.na(cor_mat)]<- 0


cor_p<- cor_res$P
cor_p
#>                GNLY         NKG7      KLRD1     ITGB2         PRF1       IFNG
#> GNLY             NA 1.034720e-02 0.01446758 0.2740351 5.598974e-06 0.13474999
#> NKG7   1.034720e-02           NA 0.09914756 0.2480082 7.635844e-05 0.59502786
#> KLRD1  1.446758e-02 9.914756e-02         NA 0.1174652 1.951340e-01 0.54200187
#> ITGB2  2.740351e-01 2.480082e-01 0.11746519        NA 1.956304e-01 0.77673864
#> PRF1   5.598974e-06 7.635844e-05 0.19513395 0.1956304           NA 0.27047205
#> IFNG   1.347500e-01 5.950279e-01 0.54200187 0.7767386 2.704720e-01         NA
#> NCAM1  8.662654e-03 6.572153e-01 0.39486873 0.3767390 3.435796e-04 0.60012618
#> FCGR3A 2.259989e-05 4.024618e-07 0.92716018 0.3407721 3.353928e-10 0.45453683
#> CCR7   4.702197e-02 1.452948e-02 0.34889863 0.2674199 2.461824e-04 0.78811106
#> CXCR3  9.232615e-13 8.725519e-04 0.06138810 0.1045795 6.629722e-05 0.02079542
#> IL2RB  1.199037e-02 7.699923e-02 0.16780117 0.1016665 9.815965e-04 0.09288220
#>               NCAM1       FCGR3A         CCR7        CXCR3        IL2RB
#> GNLY   0.0086626537 2.259989e-05 4.702197e-02 9.232615e-13 0.0119903711
#> NKG7   0.6572153099 4.024618e-07 1.452948e-02 8.725519e-04 0.0769992304
#> KLRD1  0.3948687306 9.271602e-01 3.488986e-01 6.138810e-02 0.1678011737
#> ITGB2  0.3767389762 3.407721e-01 2.674199e-01 1.045795e-01 0.1016664987
#> PRF1   0.0003435796 3.353928e-10 2.461824e-04 6.629722e-05 0.0009815965
#> IFNG   0.6001261787 4.545368e-01 7.881111e-01 2.079542e-02 0.0928821980
#> NCAM1            NA 4.384662e-01 2.119770e-02 1.787253e-01 0.7278303881
#> FCGR3A 0.4384662288           NA 1.065590e-01 1.939171e-06 0.0523142134
#> CCR7   0.0211977006 1.065590e-01           NA 5.449032e-07 0.0040966177
#> CXCR3  0.1787252622 1.939171e-06 5.449032e-07           NA 0.0001233260
#> IL2RB  0.7278303881 5.231421e-02 4.096618e-03 1.233260e-04           NA
## add significant **
cor_p[is.nan(cor_p)]<- 1

## the diagonal are NA, make them 1 
cor_p[is.na(cor_p)]<- 1

col_fun<- circlize::colorRamp2(c(-1, 0, 1), c("green", "white", "red"))

Only lable the correlation coefficients with p-values that are <=0.05; add * for p value <=0.05 and ** for p value <=0.01

cell_fun = function(j, i, x, y, w, h, fill){
    if(as.numeric(x) <= 1 - as.numeric(y) + 1e-6) {
            grid.rect(x, y, w, h, gp = gpar(fill = fill, col = fill))
    }
  
    if (cor_p[i, j]  < 0.01 & as.numeric(x) <= 1 - as.numeric(y) + 1e-6){
      grid.text(paste0(sprintf("%.2f", cor_mat[i, j]),"**"), x, y, gp = gpar(fontsize = 10))
    } else if (cor_p[i, j]  <= 0.05 & as.numeric(x) <= 1 - as.numeric(y) + 1e-6){
      grid.text(paste0(sprintf("%.2f", cor_mat[i, j]),"*"), x, y, gp = gpar(fontsize = 10))
    }
}


hp<- ComplexHeatmap::Heatmap(cor_mat,
                        rect_gp = gpar(type = "none"),
                        column_dend_side = "bottom",
                        column_title = "NK cells",
                        name = "correlation", col = col_fun,
                        cell_fun = cell_fun,
                        cluster_rows = T, cluster_columns = T,
                        row_names_side = "left")

lgd_list = list(
    Legend( labels = c("<0.01", "<0.05"), title = "pvalue",
            graphics = list(
              function(x, y, w, h) grid.text("**", x = x, y = y,
                                               gp = gpar(fill = "black")),
              function(x, y, w, h) grid.text("*", x = x, y = y,
                                               gp = gpar(fill = "black")))
            ))


draw(hp, annotation_legend_list = lgd_list, ht_gap = unit(1, "cm") )

Related

Next
Previous
comments powered by Disqus