跟著iMeta學做圖|ggplot2繪製相關性分析線面組合熱圖

生信寶典 發佈 2023-02-09T10:54:07.023075+00:00

本文代碼已經上傳至https://github.com/iMetaScience/iMetaPlot230118corr 如果你使用本代碼,請引用:Changchao Li. 2023.

本文代碼已經上傳至https://GitHub.com/iMetaScience/iMetaPlot230118corr 如果你使用本代碼,請引用:Changchao Li. 2023. Destabilized microbial networks with distinct performances of abundant and rare biospheres in maintaining networks under increasing salinity stress. iMeta 1: e79. https://doi.org/10.1002/imt2.79

代碼編寫及注釋:農心生信工作室

寫在前面

相關性熱圖 (Correlation Heatmap) 的使用在微生物組研究中非常普遍,尤其是線面組合的相關性熱圖,其中的相關性熱圖通常表示環境因子間的pearson相關係數,連線則表示物種組成與各環境因子的Mantel相關性。本期我們挑選2023年1月9日刊登在iMeta上的Destabilized microbial networks with distinct performances of abundant and rare biospheres in maintaining networks under increasing salinity stress,選擇文章的Figure 1C進行復現,講解和探討如何基於ggplot2繪製線面組合的相關性熱圖,先上原圖:

接下來,我們將通過詳盡的代碼逐步拆解原圖,最終實現對原圖的復現。

R包檢測和安裝

01

安裝核心R包ggplot2以及一些功能輔助性R包,並載入所有R包。

options(repos = list(CRAN = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
if (!require("ggplot2"))
  install.packages('ggplot2') 
if (!require("vegan"))
  install.packages('vegan')
if (!require("tidyverse"))
  install.packages('tidyverse') 
if (!require("BiocManager"))
  install.packages('BiocManager') 
if (!require("ComplexHeatmap"))
  BiocManager::install('ComplexHeatmap')
# 檢查開發者工具devtools,如沒有則安裝
if (!require("devtools"))
  install.packages("devtools")
# 加載開發者工具devtools
library(devtools)
# 檢查linkET包,沒有則通過github安裝最新版
if (!require("linkET"))
  install_github("Hy4m/linkET", force = TRUE)
# 加載包
library(ggplot2)
library(vegan)
library(linkET)
library(tidyverse)
library(ComplexHeatmap)

讀取數據及數據處理

02

繪製線面組合相關性熱圖,需要環境因子數據和物種組成數據。示例數據可在GitHub上獲取。

#讀取數據
#根據原文附表S1,獲得樣品的物理化學特性和地理分布情況,作為環境因子表
env <- read.csv("env.CSV",row.names = 1,header = T)
#生成一個物種組成的豐度表,行為樣本,列為物種,行數必須與環境因子表的行數相同
spec <- read.csv("spec.CSV",row.names = 1,header = T)

03

計算環境因子的pearson相關係數,並根據繪圖需求對數據進行處理。

corM <- cor(env,method = "pearson")#計算相關係數矩陣
#因為需要繪製的是上三角熱圖,需對矩陣進行處理,保留對角線及一半的數據即可
ncr <- nrow(corM)
for (i in 1:ncr){
  for (j in 1:ncr){
    if (j>i){
      corM[i,j] <- NA
    }
  }
}


corM <- as.data.frame(corM)%>%mutate(colID = rownames(corM))


#寬錶轉長表
corM_long <- pivot_longer(corM,cols = -"colID",names_to = "env",values_to = "cor")%>%na.omit()


#根據原圖,固定x軸和y軸的順序
corM_long$colID <- factor(corM_long$colID,levels = rownames(corM))
corM_long$env <- factor(corM_long$env,levels = rev(rownames(corM)))

繪圖預覽

04

使用ggplot2包繪製一個簡單的上三角相關性熱圖:

p <- ggplot(corM_long)+
  geom_tile(aes(colID,env),fill = "white",color = "grey")+ #添加方框
  geom_point(aes(colID,env,size = abs(cor),color = cor)) #添加散點

05

對相關性熱圖進行美化:

col_fun <- colorRampPalette(c("#FFC107","white","#3F51B5"))(50) #設置漸變色
p <- ggplot(corM_long)+
  geom_tile(aes(colID,env),fill = "white",color = "grey")+ #添加方框
  geom_point(aes(colID,env,size = abs(cor),fill = cor),color = "black",shape = 21)+ #添加散點
  scale_x_discrete(position = "top")+ #x軸移動到頂部
  scale_y_discrete(position = "right")+ #y軸移動到右側
  scale_size_continuous(range = c(1,10))+
  scale_fill_gradientn(colours = col_fun)+ #漸變色設置
  theme(axis.ticks = element_blank(),
        axis.title = element_blank(),
        panel.background = element_blank())+ #去除背景
  guides(size = "none",  #隱藏size圖例
         fill = guide_colorbar(title = "Pearson's r"))  #修改fill圖例標題

08

用Mantel test檢驗物種組成和不同環境因子之間的相關性。這裡可以使用linkET包中的mantel_test()函數輕鬆完成檢驗:

mantel <- mantel_test(spec = spec,env = env)
#spec參數後是物種組成數據,env參數後是環境因子數據

07

生成繪製連接弧線所需的數據框。包括連線的起點和終點坐標,根據mantel r的值確定線條粗細,mantel p的值確定線條顏色:

n = nrow(corM)
curve_df <- data.frame(
  x0 = rep(-1,n),
  y0 = rep(6,n),
  x1 = c(0:(n-1)-0.1),
  y1 = c(n:1)
)%>%cbind(mantel)%>%
  mutate(
    line_col = ifelse(p <= 0.001, '#D1C4E9', NA),  #根據p值不同設置連接曲線的顏色不同
    line_col = ifelse(p > 0.001 & p <=  0.01, '#607D8B', line_col),
    line_col = ifelse(p > 0.01 & p <=  0.05, '#FF5722', line_col),
    line_col = ifelse(p > 0.05, '#4CAF50', line_col),
    linewidth = ifelse(r >=  0.4, 4, NA),
    # 根據r值不同設置連接曲線的寬度不同
    linewidth = ifelse(r >=  0.2 & r < 0.4, 2, linewidth),
    linewidth = ifelse(r < 0.2, 0.8, linewidth)
  )

08

繪製帶有連線的相關性熱圖:

p <- ggplot(corM_long)+
  geom_tile(aes(colID,env),fill = "white",color = "grey")+ #添加方框
  geom_point(aes(colID,env,size = abs(cor),fill = cor),color = "black",shape = 21)+ #添加散點
  scale_x_discrete(position = "top")+ #x軸移動到頂部
  scale_y_discrete(position = "right")+ #y軸移動到右側
  scale_size_continuous(range = c(1,10))+
  scale_fill_gradientn(colours = col_fun)+ #漸變色設置
  theme(axis.ticks = element_blank(),
        axis.title = element_blank(),
        panel.background = element_blank())+ #去除背景
  guides(size = "none",  #隱藏size圖例
         fill = guide_colorbar(title = "Pearson's r"))+  #修改fill圖例標題
  geom_curve(data = curve_df,aes(x = x0,y = y0,xend = x1,yend = y1), #根據起點終點繪製弧線
             size = curve_df$linewidth, #根據r值設置線條寬度
             color = curve_df$line_col,  #根據p值設置線條顏色
             curvature = 0.2)+ #設置弧度
  geom_point(data = curve_df,aes(x = x1,y = y1),size = 3,color = "red") #添加連線終點的點

09

我們發現,目前的圖形缺少連線顏色和粗細的圖例。為此,我們利用顧祖光博士開發的ComplexHeatmap包(關於ComplexHeatmap包的使用,可以參考往期推文跟著iMeta學做圖|ComplexHeatmap繪製多樣的熱圖),繪製一個單獨的圖例,並置於畫布的左下方:

pdf("fig5.pdf",width = 10.04, height = 6.26)
grid.newpage()
#重新創建一個1行1列的布局
pushViewport(viewport(layout = grid.layout(nrow = 1, ncol = 1)))
vp_value <- function(row, col){
  viewport(layout.pos.row = row, layout.pos.col = col)
} 
p1 <- ggplot(corM_long)+
  geom_tile(aes(colID,env),fill = "white",color = "grey")+ #添加方框
  geom_point(aes(colID,env,size = abs(cor),fill = cor),color = "black",shape = 21)+ #添加散點
  scale_x_discrete(position = "top")+ #x軸移動到頂部
  scale_y_discrete(position = "right")+ #y軸移動到右側
  scale_size_continuous(range = c(1,10))+
  scale_fill_gradientn(colours = col_fun)+ #漸變色設置
  theme(axis.ticks = element_blank(),
        axis.title = element_blank(),
        panel.background = element_blank())+ #去除背景
  guides(size = "none",  #隱藏size圖例
         fill = guide_colorbar(title = "Pearson's r"))+  #修改fill圖例標題
  geom_curve(data = curve_df,aes(x = x0,y = y0,xend = x1,yend = y1), #根據起點終點繪製弧線
             size = curve_df$linewidth, #根據r值設置線條寬度
             color = curve_df$line_col,  #根據p值設置線條顏色
             curvature = 0.2)+ #設置弧度
  geom_point(data = curve_df,aes(x = x1,y = y1),size = 3,color = "red") #添加連線終點的點
#將圖p1添加進畫布
print(p1,vp = vp_value(row = 1, col = 1))


#利用ComplexHeatmap的Legend()函數繪製單獨的圖例
#繪製Mantel's p圖例
lgd1 = Legend(at = 1:4, legend_gp = gpar(fill = c('#D1C4E9','#607D8B','#FF5722','#4CAF50')), 
              title = "Mantel's p",
              labels  = c("≤ 0.001","0.001 - 0.01","0.01 - 0.05","> 0.05"),
              nr = 1)
#繪製Mantel's r圖例
lgd2 = Legend(labels  = c("< 0.02","0.02 - 0.04","≥ 0.04"),
              title = "Mantel's r",
              #graphics參數自定義圖例的高度
              graphics = list(
                function(x, y, w, h) grid.rect(x, y, w, h*0.1*0.8, gp = gpar(fill = "black")),
                function(x, y, w, h) grid.rect(x, y, w, h*0.1*2, gp = gpar(fill = "black")),
                function(x, y, w, h) grid.rect(x, y, w, h*0.1*4, gp = gpar(fill = "black"))
              ),nr = 1)
lgd = packLegend(lgd1, lgd2)


#將圖例添加進相關性熱圖中
draw(lgd, x = unit(45, "mm"), y = unit(5, "mm"), just = c( "bottom"))
dev.off()
#> quartz_off_screen 
#>                 2

完整代碼

if (!require("ggplot2"))
  install.packages('ggplot2') 
if (!require("vegan"))
  install.packages('vegan')
if (!require("tidyverse"))
  install.packages('tidyverse') 
if (!require("BiocManager"))
  install.packages('BiocManager') 
if (!require("ComplexHeatmap"))
  BiocManager::install('ComplexHeatmap')
# 檢查開發者工具devtools,如沒有則安裝
if (!require("devtools"))
  install.packages("devtools")
# 加載開發者工具devtools
library(devtools)
# 檢查linkET包,沒有則通過github安裝最新版
if (!require("linkET"))
  install_github("Hy4m/linkET", force = TRUE)
# 加載包
library(ggplot2)
library(vegan)
library(linkET)
library(tidyverse)
library(ComplexHeatmap)
#讀取數據
#根據原文附表S1,獲得樣品的物理化學特性和地理分布情況,作為環境因子表
env <- read.csv("env.CSV",row.names = 1,header = T)
#生成一個物種組成的豐度表,行為樣本,列為物種,行數必須與環境因子表的行數相同
spec <- read.csv("spec.CSV",row.names = 1,header = T)


corM <- cor(env,method = "pearson")#計算相關係數矩陣
#因為需要繪製的是上三角熱圖,需對矩陣進行處理,保留對角線及一半的數據即可
ncr <- nrow(corM)
for (i in 1:ncr){
  for (j in 1:ncr){
    if (j>i){
      corM[i,j] <- NA
    }
  }
}


corM <- as.data.frame(corM)%>%mutate(colID = rownames(corM))


#寬錶轉長表
corM_long <- pivot_longer(corM,cols = -"colID",names_to = "env",values_to = "cor")%>%na.omit()


#根據原圖,固定x軸和y軸的順序
corM_long$colID <- factor(corM_long$colID,levels = rownames(corM))
corM_long$env <- factor(corM_long$env,levels = rev(rownames(corM)))


mantel <- mantel_test(spec = spec,env = env)
#spec參數後是物種組成數據,env參數後是環境因子數據
n = nrow(corM)
curve_df <- data.frame(
  x0 = rep(-1,n),
  y0 = rep(6,n),
  x1 = c(0:(n-1)-0.1),
  y1 = c(n:1)
)%>%cbind(mantel)%>%
  mutate(
    line_col = ifelse(p <=  0.001, '#D1C4E9', NA),  #根據p值不同設置連接曲線的顏色不同
    line_col = ifelse(p > 0.001 & p <=  0.01, '#607D8B', line_col),
    line_col = ifelse(p > 0.01 & p <=  0.05, '#FF5722', line_col),
    line_col = ifelse(p > 0.05, '#4CAF50', line_col),
    linewidth = ifelse(r >=  0.4, 4, NA),
    # 根據r值不同設置連接曲線的寬度不同
    linewidth = ifelse(r >=  0.2 & r < 0.4, 2, linewidth),
    linewidth = ifelse(r < 0.2, 0.8, linewidth)
  )


pdf("Figure1A.pdf",width = 10.04, height = 6.26)
grid.newpage()
#重新創建一個1行1列的布局
pushViewport(viewport(layout = grid.layout(nrow = 1, ncol = 1)))
vp_value <- function(row, col){
  viewport(layout.pos.row = row, layout.pos.col = col)
} 
p1 <- ggplot(corM_long)+
  geom_tile(aes(colID,env),fill = "white",color = "grey")+ #添加方框
  geom_point(aes(colID,env,size = abs(cor),fill = cor),color = "black",shape = 21)+ #添加散點
  scale_x_discrete(position = "top")+ #x軸移動到頂部
  scale_y_discrete(position = "right")+ #y軸移動到右側
  scale_size_continuous(range = c(1,10))+
  scale_fill_gradientn(colours = col_fun)+ #漸變色設置
  theme(axis.ticks = element_blank(),
        axis.title = element_blank(),
        panel.background = element_blank())+ #去除背景
  guides(size = "none",  #隱藏size圖例
         fill = guide_colorbar(title = "Pearson's r"))+  #修改fill圖例標題
  geom_curve(data = curve_df,aes(x = x0,y = y0,xend = x1,yend = y1), #根據起點終點繪製弧線
             size = curve_df$linewidth, #根據r值設置線條寬度
             color = curve_df$line_col,  #根據p值設置線條顏色
             curvature = 0.2)+ #設置弧度
  geom_point(data = curve_df,aes(x = x1,y = y1),size = 3,color = "red") #添加連線終點的點
#將圖p1添加進畫布
print(p1,vp = vp_value(row = 1, col = 1))


#利用ComplexHeatmap的Legend()函數繪製單獨的圖例
#繪製Mantel's p圖例
lgd1 = Legend(at = 1:4, legend_gp = gpar(fill = c('#D1C4E9','#607D8B','#FF5722','#4CAF50')), 
              title = "Mantel's p",
              labels  = c("≤ 0.001","0.001 - 0.01","0.01 - 0.05","> 0.05"),
              nr = 1)
#繪製Mantel's r圖例
lgd2 = Legend(labels  = c("< 0.02","0.02 - 0.04","≥ 0.04"),
              title = "Mantel's r",
              #graphics參數自定義圖例的高度
              graphics = list(
                function(x, y, w, h) grid.rect(x, y, w, h*0.1*0.8, gp = gpar(fill = "black")),
                function(x, y, w, h) grid.rect(x, y, w, h*0.1*2, gp = gpar(fill = "black")),
                function(x, y, w, h) grid.rect(x, y, w, h*0.1*4, gp = gpar(fill = "black"))
              ),nr = 1)
lgd = packLegend(lgd1, lgd2)


#將圖例添加進相關性熱圖中
draw(lgd, x = unit(45, "mm"), y = unit(5, "mm"), just = c( "bottom"))


dev.off()
#> quartz_off_screen 
#>                 2

以上數據和代碼僅供大家參考,如有不完善之處,歡迎大家指正!

關鍵字: