细胞互作
单细胞测序是在一样样本中可以检测成千上万个细胞的转录组情况,所以很多情况下我们可以根据配体受体等基因的表达情况来看一下细胞之间有没有相互作用的关系,目前有这种功能的软件还是很多的,在这里我们来测试几种比较火的
1 cell chat:We construct a database of interactions among ligands, receptors and their cofactors that accurately represent known heteromeric molecular complexes. We then develop CellChat, a tool that is able to quantitatively infer and analyze intercellular communication networks from single-cell RNA-sequencing (scRNA-seq) data.
2 cellphoneDB:CellPhoneDB是一个可公开获取的受体、配体及其相互作用的存储库。配体和受体都包含亚基结构,CellPhoneDB获取信息的数据库有: UniProt, Ensembl, PDB, the IMEx consortium, IUPHAR。
3Nichnet:收集了涵盖配体-受体,信号转导和基因调控相互作用的多个互补数据源 ,细胞通讯过程是配体---受体---信号蛋白---转录调节子---靶基因,所以除了配受体,清楚转录调节因子和靶基因对我们了解机制也是非常有帮助。
测试1:cellchat
1 软件安装
devtools::install_github("sqjin/CellChat")
2 加载包
suppressMessages({
library(Seurat)
library(SeuratWrappers)
library(SeuratDisk)
library(CellChat)
library(ggplot2)
library(ggalluvial)
library(NMF)
})
3 加载数据
PRO<-readRDS('./PRO1.rds')
Idents(PRO)<-'orig.ident'
PRO<-subset(PRO,idents = 'Bong_marrow1')
Idents(PRO)<-'cluster'
#PRO1<-subset(PRO,idents = 'Neutrophils',invert = TRUE)
4 创建cellchat分析对象,导入数据库
cellchat <- createCellChat( object = PRO) #创建cellchat对象
#levels(cellchat@idents)#查看cellchat的分群情况
groupSize <- as.numeric(table(cellchat@idents))
groupSize#每个cluster中的细胞数
CellChatDB <- CellChatDB.mouse #导入需要用的数据库
#如果是人的数据 CellChatDB <- CellChatDB.human
# colnames(CellChatDB$interaction)#可以看一下cellDB$interaction中都包含什么
#Show the description of CellChatDB databse
showDatabaseCategory(CellChatDB) #可以看下数据库中包含什么
image.png
5 初步分析
#可以选择我们感兴趣的通路进行分析,在这里选择了Secreted Signaling,大家可以跟进自己的需求进行选择
CellChatDB.use <- subsetDB(CellChatDB, search = "Secreted Signaling") # use Secreted Signaling
# 选择我们想使用的数据库
cellchat@DB <- CellChatDB.use
#提取signaling gene,结果保存在data.signaling
cellchat <- subsetData(cellchat)
#head(cellchat@data.signaling)
future::plan("multiprocess", workers = 4) #实现并行,不着急的可以不用这个,一步一步跑
#寻找每个细胞中高表达的供体和受体
cellchat <- identifyOverExpressedGenes(cellchat)
cellchat <- identifyOverExpressedInteractions(cellchat)#结果存在cellchat@LR$LRsig中 #上一步运行的结果存在cellchat@LR$LRsig中
cellchat <- projectData(cellchat, PPI.mouse) #A diffusion process is used to smooth genes’ expression values based on their neighbors’ defined in a high-confidence experimentally validated protein-protein network.结果保存在@data.project
df.net<-subsetCommunication(cellchat) #it returns a data frame consisting of all the inferred cell-cell communications
#head(df.net)
write.csv(df.net,'net_lr.csv')
# 推断细胞通讯网络
cellchat <- computeCommunProb(cellchat, raw.use = TRUE) #Compute the communication probability/strength between any interacting cell groups
# Filter out the cell-cell communication if there are only few number of cells in certain cell groups
cellchat <- filterCommunication(cellchat, min.cells = 10) #过滤掉10个一下细胞的群
#推测细胞互作的概率
cellchat <- computeCommunProbPathway(cellchat) #Compute the communication probability on signaling pathway level by summarizing all related ligands/receptors
df.netp<-subsetCommunication(cellchat,slot.name='netP')
write.csv(df.netp,'net_pathway.csv')
#Calculate the aggregated network by counting the number of links or summarizing the communication probability
cellchat <- aggregateNet(cellchat)
6 细胞之间通讯的数量和强度,外圈的大小象征细胞的多小,线的粗细是相互作用的多少和强度
netVisual_circle(cellchat@net$count, vertex.weight = groupSize, weight.scale = T, label.edge= F, title.name = "Number of interactions")
netVisual_circle(cellchat@net$weight, vertex.weight = groupSize, weight.scale = T, label.edge= F, title.name = "Interaction weights/strength")
image.png
7 分着看每个cluster和其它细胞亚型的相互作用关系,可能更清楚一些
mat <- cellchat@net$weight#看每个细胞的互作情况,这个是看的强度,也可以把weigh换成count
par(mfrow = c(4,4), xpd=TRUE)
#pdf('sub_cir.pdf',width = 14,height = 6)
for (i in 1:nrow(mat)) {
mat2 <- matrix(0, nrow = nrow(mat), ncol = ncol(mat), dimnames = dimnames(mat))
mat2[i, ] <- mat[i, ]
netVisual_circle(mat2, vertex.weight = groupSize, weight.scale = T,edge.weight.max = max(mat), vertex.label.cex = 0.2,,title.name = rownames(mat)[i])
}
image.png
8 查看自己感兴趣的通路在不同cluster中的表达情况,在这里以CLL通路为例子
#查看自己关注的某个通路的情况
cellchat@netP$pathways#可以看一下有哪些重要的通路
pathways.show <- c("CCL")#选择其中的一条通路来看
#levels(cellchat@idents)
#vertex.receiver = c(1,2,4,6)
netVisual_aggregate(cellchat, signaling = pathways.show,vertex.receiver = vertex.receiver)
netVisual_aggregate(cellchat, signaling = pathways.show, layout = "chord")#以圈图的形式展示
netVisual_heatmap(cellchat, signaling = pathways.show, color.heatmap = "Reds")#以热图的形式展示
image.png
9 计算每个供受体对选定的信号通路的贡献值
pathways.show <- c("CCL")#选择其中的一条通路来看
netAnalysis_contribution(cellchat, signaling = pathways.show) #计算每个配体受体对整体信号通路的贡献,并可视化由单个配体受体对调节的细胞通信
image.png
10 单个配受体介导的细胞互作,左部分显示自分泌和旁分泌向我们选定的细胞组发出信号,右部分显示向我们选定的组发出信号。
pairLR.CCL <- extractEnrichedLR(cellchat, signaling = pathways.show, geneLR.return = FALSE)
head(pairLR.CCL)
LR.show <- pairLR.CXCL[1,] # 选择了第一行的供受体对
# Hierarchy plot
vertex.receiver = seq(1,4) # a numeric vector
netVisual_individual(cellchat, signaling = pathways.show, pairLR.use = LR.show, vertex.receiver = vertex.receiver,layout = "hierarchy")#如果想把图画成其它的形状,可以在layout参数中进行修改
image.png
11 某个特定细胞类型和某系细胞类型的互作
netVisual_bubble(cellchat, sources.use = 4, targets.use = c(5:11), remove.isolate = FALSE) #4,5,11分别是levels(cellchat@idents)中细胞类型的序列号
netVisual_bubble(cellchat, sources.use = 4, targets.use = c(5:11), signaling ='CCL',remove.isolate = FALSE)#可以用signaling的参数指定自己关注的通路
#netVisual_chord_gene(cellchat, sources.use = 4, targets.use = c(5:11), lab.cex = 0.5,legend.pos.y = 30) #也可以画成圆形的圈图
image.png
12 查看关注的通路基因在不同cluster中的表达情况
image.png
13 判定在某个通路中,每种细胞类型扮演的角色
cellchat <- netAnalysis_computeCentrality(cellchat, slot.name = "netP") # the slot 'netP' means the inferred intercellular communication network of signaling pathways
# Visualize the computed centrality scores using heatmap, allowing ready identification of major signaling roles of cell groups
netAnalysis_signalingRole_network(cellchat, signaling = pathways.show, width = 8, height = 2.5, font.size = 10)
#看一下每种细胞类型扮演的是输出还是靶细胞的角色
gg1 <- netAnalysis_signalingRole_scatter(cellchat)
#
gg2 <- netAnalysis_signalingRole_scatter(cellchat, signaling = "CCL")#CCL互作中各个细胞类型扮演的角色
#> Signaling role analysis on the cell-cell communication network from user's input
gg1 + gg2
# Signaling role analysis on the aggregated cell-cell communication network from all signaling pathways
ht1 <- netAnalysis_signalingRole_heatmap(cellchat, pattern = "outgoing")
ht2 <- netAnalysis_signalingRole_heatmap(cellchat, pattern = "incoming")
ht1 + ht2
image.png
14 细胞和通路之间的协同关系
selectK(cellchat, pattern = "outgoing")
image.png
nPatterns = 3#从上图来看3的时候2个指标就开始下降,所以我们这里选的3
cellchat <- identifyCommunicationPatterns(cellchat, pattern = "outgoing", k = nPatterns)#看一下不同的细胞类型和通路被分配到哪个模式
#netAnalysis_river(cellchat, pattern = "outgoing")#可以使用桑基图的形式进行展示
netAnalysis_dot(cellchat, pattern = "outgoing")#使用dotplot的形式进行展示
image.png
#使用相同的方法分析incoming(传入信号模式)
selectK(cellchat, pattern = "incoming")
nPatterns = 5
cellchat <- identifyCommunicationPatterns(cellchat, pattern = "incoming", k = nPatterns)
#netAnalysis_river(cellchat, pattern = "incoming")
#netAnalysis_dot(cellchat, pattern = "incoming")
15 量化细胞之间的相似性
#根据信号组的结果相似性识别信号组,功能相似性将type改为'functional'
cellchat <- computeNetSimilarity(cellchat, type = "structural")
cellchat <- netEmbedding(cellchat, type = "structural")
#> Manifold learning of the signaling networks for a single dataset
cellchat <- netClustering(cellchat, type = "structural")
#> Classification learning of the signaling networks for a single dataset
# Visualization in 2D-space
netVisual_embedding(cellchat, type = "structural", label.size = 3.5)
netVisual_embeddingZoomIn(cellchat, type = "structural", nCol = 2)
image.png
测试2 cellphoneDB
1 软件安装
pip install cellphonedb
2 cellphoneDB目前只能做人的细胞互作,如果要做其它的物种的,要进行基因转换,为了测试软件,我们找了几个人的样本进行测试,首先我们先用R来生成cellphoneDB需要用到的数据
PRO<-readRDS('./lung.diff_PRO.rds')
write.table(as.matrix(PRO@assays$RNA@data), 'lungcellphonedb_count.txt', sep='\t', quote=F)
meta_data <- cbind(rownames(PRO@meta.data), PRO@meta.data[,'new_ident', drop=F])
meta_data <- as.matrix(meta_data)
meta_data[is.na(meta_data)] = "Unkown" # 细胞类型中不能有NA
write.table(meta_data, 'lungcellphonedb_meta.txt', sep='\t', quote=F, row.names=F)
3 进行cellphoneDB分析
cellphonedb method statistical_analysis lungcellphonedb_meta.txt lungcellphonedb_count.txt --counts-data=gene_name
cellphonedb plot dot_plot
cellphonedb plot heatmap_plot cellphonedb_meta.txt
跑完之后文件夹下面大概有以下几个文件
image.png
4 在R中画图,加载需要用到的包
library(psych)
library(qgraph)
library(igraph)
library(RColorBrewer)
library(scales)
library(tidyverse)
5 读入数据
mynet <- read.delim('./out/count_network.txt', check.names = FALSE)
mynet%>%dplyr::filter(count>0) -> mynet #过滤掉0值
net<- graph_from_data_frame(mynet) #将数据变成我们能处理的形式
plot(net) #可以画一下细胞互作的图
#在使用igraph时候,报了GLPK is not available, Unimplemented function call的错,所以就把igraph卸载了重新安装,并在安装前设置igraph和GLPK的连接关系
#Sys.setenv(CPPFLAGS="-DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem ~/miniconda2/envs/cellphonedb/include")
#Sys.setenv(LDFLAGS="-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,-rpath,~/miniconda2/envs/cellphonedb/lib -Wl,-rpath-link,~/miniconda2/envs/cellphonedb/lib -L~/miniconda2/envs/cellphonedb/lib")
#install.packages ( "ABC" ,repos= "http://mirrors.ustc.edu.cn/CRAN/" ) #在这里如果R包安装有问题,可以尝试使用lib指定一下安装路径。
image.png
6 展示形式调整
net2<-net#这步骤是必须的,否则下边画分开的图片的时候容易出问题
karate_groups <- cluster_optimal(net)
coords <- layout_in_circle(net, order = order(membership(karate_groups))) # 设置网络布局
color1<-c("OrangeRed","SlateBlue3","DarkOrange","GreenYellow","Purple","DarkSlateGray","Gold","DeepPink2","Red4","#4682B4","#FFDAB9","#708090","#836FFF","#CDC673","#CD9B1D","#FF6EB4","#CDB5CD","DarkGreen","#008B8B","#43CD80")
E(net)$width <- E(net)$count / 30
for (i in 1: length(unique(mynet$SOURCE)) ){
E(net2)[map(unique(mynet$SOURCE),function(x) {
get.edge.ids(net2,vp = c(unique(mynet$SOURCE)[i],x))
})%>% unlist()]$color <- color1[i]
}
plot(net2, edge.arrow.size=.1,
edge.curved=0, #可调整曲线的弯度
vertex.color=color1,
vertex.frame.color="#555555", #圈的颜色
vertex.label.color="black",#标签字体的颜色
layout = coords,
vertex.label.cex=1)
image.png
par(mfrow=c(3,4), mar=c(.4,.4,.4,.4))
for (i in 1: length(unique(mynet$SOURCE)) ){
net1<-net
#添加count信息,如果不需要显示相互作用的数目,则不需要这一步
E(net1)$count <- ""
E(net1)[map(unique(mynet$SOURCE),function(x) {
get.edge.ids(net,vp = c(unique(mynet$SOURCE)[i],x))
})%>% unlist()]$count <- E(net)[map(unique(mynet$SOURCE),function(x) {
get.edge.ids(net,vp = c(unique(mynet$SOURCE)[i],x))
})%>% unlist()]$count
#分开绘图
E(net1)[map(unique(mynet$SOURCE),function(x) {
get.edge.ids(net,vp = c(unique(mynet$SOURCE)[i],x))
})%>% unlist()]$color <- color1[i]
plot(net1, edge.arrow.size=0.1,
edge.curved=0.4,
edge.label = E(net1)$count, # 绘制边的权重
vertex.color=color1,
vertex.frame.color="#555555",
vertex.label.color="black",
layout = coords,
vertex.label.cex=1 #节点字体大小
)
}
image.png
7 查看关注基因的表达情况
mypvalues=read.table("./out/pvalues.txt",header = T,sep = "\t",stringsAsFactors = F) #
mymeans=read.table("./out/means.txt",header = T,sep = "\t",stringsAsFactors = F) #
th1 <- grep("IL2|IL12|IL18|IL27|IFNG|IL10|TNF$|TNF |LTA|LTB|STAT1|CCR5|CXCR3|IL12RB1|IFNGR1|TBX21|STAT4", mymeans$interacting_pair,value = T) #输入感兴趣的基因
mymeans %>% dplyr::filter(interacting_pair %in% th1)%>%
dplyr::select("interacting_pair",starts_with("CancerCells"),ends_with("CancerCells")) %>%
reshape2::melt() -> meansdf
colnames(meansdf)<- c("interacting_pair","celltype","means")
mypvalues %>% dplyr::filter(interacting_pair %in% th1)%>%
dplyr::select("interacting_pair",starts_with("CancerCells"),ends_with("CancerCells"))%>%
reshape2::melt()-> pvalsdf
colnames(pvalsdf)<- c("interacting_pair","celltype","pvals")
pvalsdf$linker<- paste0(pvalsdf$interacting_pair,"_",pvalsdf$celltype)
meansdf$linker<- paste0(meansdf$interacting_pair,"_",meansdf$celltype)
pldf <- merge(pvalsdf,meansdf,by = "linker")
ggplot(pldf,aes(celltype.x,interacting_pair.x) )+
geom_point(aes(color=means,size=-log10(pvals+0.0001)) ) +
scale_size_continuous(range = c(1,3))+
scale_color_gradient2(high="red",mid = "yellow",low ="darkblue",midpoint = 1 )+ theme_bw()+
theme(axis.text.x = element_text(angle = -45,hjust = -0.1,vjust = 0.8))
image.png
测试3 NicheNet
1 安装包
library(devtools)
install_github("saeyslab/nichenetr")
2 加载包
library(nichenetr)
library(Seurat)
library(tidyverse)
3 读入数据库,从网上读报错,直接下载到本地进来读
#ligand_target_matrix <- readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds"))
#lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds"))
#weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds"))
# weighted_networks列表包含两个数据框,lr_sig是配体-受体权重信号网络,gr是配体-靶基因权重调控网络
#wget -c https://zenodo.org/record/3260758/files/ligand_target_matrix.rds #每次加载很慢还容易报错,所以就把需要的内容下载下来了
#wget -c https://zenodo.org/record/3260758/files/lr_network.rds
#wget -c https://zenodo.org/record/3260758/files/weighted_networks.rds
ligand_target_matrix <- readRDS("ligand_target_matrix.rds")
lr_network = readRDS("lr_network.rds")
weighted_networks = readRDS("weighted_networks.rds")
4 读入自己的数据,构建nichnet数据分析模式
PRO<-readRDS(lung.diff_PRO.rds')
nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = PRO,
top_n_ligands = 20,
receiver = "CancerCells",
sender = c("TCells","ECs","BCells","PlasmaCells","Fibroblasts","AT2"),
condition_colname = "orig.ident",
condition_oi = "Sample_3a",
condition_reference = "Sample_3b",
ligand_target_matrix = ligand_target_matrix,
lr_network = lr_network,
weighted_networks = weighted_networks,
organism = "human")
5 查看配体的在不同cluster中的表达情况
x <- nichenet_output$ligand_activities
head(x)
write.csv(x, "ligand_activities.csv", row.names = F)
p<-DotPlot(PRO, features = nichenet_output$top_ligands, cols = "RdYlBu") + RotatedAxis()
ggsave("top20_ligands.pdf", p, width = 12, height = 6)
p1 = DotPlot(PRO, features = nichenet_output$top_ligands, split.by = "orig.ident",cols = clustcol) + RotatedAxis()#可以看每个样本中的top20的配体的表达情况
ggsave("top20_ligands_compare.pdf", p1, width = 12, height = 20)
p2 = VlnPlot(PRO, features = nichenet_output$top_ligands,
split.by = "orig.ident", pt.size = 0, combine = T) #每个样本中top20的基因表达情况的小提琴图
ggsave("VlnPlot_ligands_compare.pdf", p2, width = 24, height = 24)
p
p2
6 查看配体对靶基因的调控情况
# 查看top配体调控的靶基因及其评分
x <- nichenet_output$ligand_target_matrix
head(x)
write.csv(x, "ligand_target.csv", row.names = F)
p = nichenet_output$ligand_target_heatmap
p
ggsave("Heatmap_ligand-target.png", p, width = 12, height = 6)
p1 = VlnPlot(PRO %>% subset(idents = "CancerCells"), features = nichenet_output$top_targets,
split.by = "orig.ident", pt.size = 0, combine = T, ncol = 8,cols = clustcol)
ggsave("Targets_Expression_vlnplot.pdf", p1, width = 20, height = 20)
p2<-DotPlot(PRO %>% subset(idents = "CancerCells"),
features = nichenet_output$top_targets,
split.by = "orig.ident",cols = clustcol) + RotatedAxis()
ggsave("Targets_Expression_dotplot.png", p2, width = 12, height = 6)
p
p1
p2
7 查看受体情况
## 查看受体情况
# 查看配体-受体互作
x <- nichenet_output$ligand_receptor_matrix
write.csv(x, "ligand_receptor.csv", row.names = F)
p = nichenet_output$ligand_receptor_heatmap
ggsave("Heatmap_ligand-receptor.png", p, width = 12, height = 6)
# 查看受体表达情况
p1<-DotPlot(PRO %>% subset(idents = "CancerCells"),
features = nichenet_output$top_receptors,
split.by = "orig.ident",cols = clustcol) + RotatedAxis()
ggsave("Receptors_Expression_dotplot.png", p1, width = 12, height = 6)
p2 = VlnPlot(PRO %>% subset(idents = "CancerCells"), features = nichenet_output$top_receptors,
split.by = "orig.ident", pt.size = 0, combine = T, ncol = 6,cols = clustcol)
ggsave("Receptors_Expression_vlnplot.png", p2, width = 20, height = 20)
# 有文献报道的配体-受体
# Show ‘bona fide’ ligand-receptor links that are described in the literature and not predicted based on PPI
x <- nichenet_output$ligand_receptor_matrix_bonafide
write.csv(x, "ligand_receptor_bonafide.csv", row.names = F)
p3 = nichenet_output$ligand_receptor_heatmap_bonafide
ggsave("Heatmap_ligand-receptor_bonafide.png", p3, width = 8, height = 4)
p
p1
p2
p3