单细胞的GSVA通路分析和CellPhoneDB文件的准备
2020-10-06 本文已影响0人
碌碌无为的杰少
单细胞的通路分析
在做单细胞的通路分析的时候,第一就想做GO,KEGG,当然这是对于差异基因,但是单细胞的差异基因往往没有特殊的标准,得不到自己想要的结果,所以放弃了,然后考虑做GSEA,就这样反复思考,寝食难安,非常煎熬半个月后,得到群里面小伙伴和Kinesin的指导,再弄清楚GSEA和GSVA原理后,终于明白单细胞的文章做通路分析的时候为什么都用GSVA分析,所以以后大家遇到单细胞的通路分析的时候直接用GSVA吧,对于一个临床小白也是一步一步弄清楚pathway到底是什么东西了,迫不及待拾起来了好久没写的简书,下面就复现一篇CNS里面的图吧。
图片.png
提取表达矩阵
library(ggplot2)
library(clusterProfiler)
library(org.Hs.eg.db)
library(dplyr)
DD1 <- SubsetData(sub_singlecell,
# 提取数据根据的组名,提取两个亚群分群
subset.name = "celltype1",
# 提取的组别
accept.value = c("Intraepithelial CD8+ T cells","Exhausted CD8+ T cells"))
expr <- as.data.frame(DD1@assays$RNA@data)
expr$ID <- rownames(expr)
s2e <- bitr(expr$ID,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)#人类 转换成ENTREZID
#其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
expr <- inner_join(expr,s2e,by=c("ID"="SYMBOL"))
rownames(expr) <- expr$ENTREZID
expr <- expr[,-524]
expr <- expr[,-523]
meta <- DD1@meta.data[,c("celltype1")]
加载GSEA通路基因集合
有C1和C2两个数据集,C1记载了50个通路,比较少,我选的C2,6000多个通路的
kegggmt <- read.gmt("c2.all.v7.2.entrez.gmt")
colnames(kegggmt)
kegg_list = split(kegggmt$gene, kegggmt$ont)
图片.png
GSVA分析
expr=as.matrix(expr)
kegg2 <- gsva(expr, kegg_list, kcdf="Gaussian",method = "gsva",parallel.sz=1)
用limma包作图并绘图
library(limma)
exprSet <- kegg2
### 差异分析
## 创建分组
group <- factor(meta,levels = c("Intraepithelial CD8+ T cells","Exhausted CD8+ T cells"),ordered = F)
## 分组变成向量,并且限定leves的顺序
## levels里面,把对照组放在前面
## 1.构建比较矩阵
design <- model.matrix(~group)
## 比较矩阵命名
colnames(design) <- levels(group)
##2.线性模型拟合
fit <- lmFit(exprSet,design)
##3.贝叶斯检验
fit2 <- eBayes(fit)
#4.输出差异分析结果,其中coef的数目不能操过design的列数
# 此处的2代表的是第二列和第一列的比较
allDiff=topTable(fit2,adjust='fdr',coef=2,number=Inf)
#根据allDiff找出你自己感兴趣的通路
up <- c("KEGG_CELL_ADHESION_MOLECULES_CAM","REACTOME_PD_1_SIGNALING",'REACTOME_INTERFERON_GAMMA_SIGNALING','REACTOME_TCR_SIGNALING','REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM','REACTOME_DNA_REPLICATION','REACTOME_CHEMOKINE_RECEPTORS_BIND_CHEMOKINES','REACTOME_MHC_CLASS_II_ANTIGEN_PRESENTATION','REACTOME_SIGNALING_BY_NOTCH','REACTOME_G2_M_CHECKPOINTS','REACTOME_CELL_CYCLE','REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA')
down <- c('REACTOME_DECTIN_2_FAMILY','REACTOME_REGULATION_OF_KIT_SIGNALING','REACTOME_ROLE_OF_LAT2_NTAL_LAB_ON_CALCIUM_MOBILIZATION','REACTOME_ANTI_INFLAMMATORY_RESPONSE_FAVOURING_LEISHMANIA_PARASITE_INFECTION','REACTOME_ACTIVATED_NTRK3_SIGNALS_THROUGH_PI3K','KEGG_FC_EPSILON_RI_SIGNALING_PATHWAY','KEGG_ERBB_SIGNALING_PATHWAY','REACTOME_PROSTANOID_LIGAND_RECEPTORS','REACTOME_PI3K_EVENTS_IN_ERBB4_SIGNALING')
TEST <- c(up,down)
p <- allDiff
p$ID <- rownames(p)
q <- p[TEST,]
group1 <- c(rep("Exhausted CD8+ T cells",12),rep("Intraepithelial CD8+ T cells",9))
df <- data.frame(ID = q$ID, score = q$t,group=group1 )
# 按照score排序
sortdf <- df[order(df$score),]
sortdf$ID <- factor(sortdf$ID, levels = sortdf$ID)#增加通路ID那一列
head(sortdf)
t <- ggplot(sortdf, aes(ID, score,fill=group)) + geom_bar(stat = 'identity') +
coord_flip() +
theme_bw() + #去除背景色
theme(panel.grid =element_blank())+
theme(panel.border = element_rect(size = 0.6))
t
图片.png
绘制通路热图
kegg3 <- as.data.frame(kegg2)
kegg4 <- kegg3[TEST,]
group <- meta
annotation_col <- data.frame(group)
rownames(annotation_col) <- colnames(kegg2)
library(pheatmap)
kk2 <- pheatmap(kegg4,
cluster_rows = F,
cluster_cols = F,
annotation_col =annotation_col,
annotation_legend=T,
show_rownames = F,
show_colnames = F,
color =colorRampPalette(c("blue", "white","red"))(100),
cellwidth = 0.5, cellheight = 13,
fontsize = 10)
pdf(("kk2.pdf"),width = 15,height = 7)
print(kk2)
dev.off()
图片.png