单细胞转录组复现

单细胞的GSVA通路分析和CellPhoneDB文件的准备

2020-10-06  本文已影响0人  碌碌无为的杰少

单细胞的通路分析

在做单细胞的通路分析的时候,第一就想做GO,KEGG,当然这是对于差异基因,但是单细胞的差异基因往往没有特殊的标准,得不到自己想要的结果,所以放弃了,然后考虑做GSEA,就这样反复思考,寝食难安,非常煎熬半个月后,得到群里面小伙伴和Kinesin的指导,再弄清楚GSEA和GSVA原理后,终于明白单细胞的文章做通路分析的时候为什么都用GSVA分析,所以以后大家遇到单细胞的通路分析的时候直接用GSVA吧,对于一个临床小白也是一步一步弄清楚pathway到底是什么东西了,迫不及待拾起来了好久没写的简书,下面就复现一篇CNS里面的图吧。


图片.png

提取表达矩阵

library(ggplot2)
library(clusterProfiler)
library(org.Hs.eg.db)
library(dplyr)
DD1 <-  SubsetData(sub_singlecell,
                   # 提取数据根据的组名,提取两个亚群分群
                   subset.name = "celltype1", 
                   # 提取的组别
                   accept.value = c("Intraepithelial  CD8+ T cells","Exhausted CD8+  T cells")) 
expr <- as.data.frame(DD1@assays$RNA@data)
expr$ID <- rownames(expr)
s2e <- bitr(expr$ID, 
            fromType = "SYMBOL",
            toType = "ENTREZID",
            OrgDb = org.Hs.eg.db)#人类 转换成ENTREZID
#其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
expr <- inner_join(expr,s2e,by=c("ID"="SYMBOL"))
rownames(expr) <- expr$ENTREZID
expr <- expr[,-524]
expr <- expr[,-523]
meta <- DD1@meta.data[,c("celltype1")]

加载GSEA通路基因集合

有C1和C2两个数据集,C1记载了50个通路,比较少,我选的C2,6000多个通路的

kegggmt <- read.gmt("c2.all.v7.2.entrez.gmt")
colnames(kegggmt)
kegg_list = split(kegggmt$gene, kegggmt$ont)
图片.png

GSVA分析

expr=as.matrix(expr)
kegg2 <- gsva(expr, kegg_list, kcdf="Gaussian",method = "gsva",parallel.sz=1)


用limma包作图并绘图

library(limma)
exprSet <- kegg2
### 差异分析
## 创建分组
group <- factor(meta,levels = c("Intraepithelial  CD8+ T cells","Exhausted CD8+  T cells"),ordered = F)
## 分组变成向量,并且限定leves的顺序
## levels里面,把对照组放在前面
## 1.构建比较矩阵
design <- model.matrix(~group)
## 比较矩阵命名
colnames(design) <- levels(group)
##2.线性模型拟合
fit <- lmFit(exprSet,design)
##3.贝叶斯检验
fit2 <- eBayes(fit)
#4.输出差异分析结果,其中coef的数目不能操过design的列数
# 此处的2代表的是第二列和第一列的比较
allDiff=topTable(fit2,adjust='fdr',coef=2,number=Inf)
#根据allDiff找出你自己感兴趣的通路
up <- c("KEGG_CELL_ADHESION_MOLECULES_CAM","REACTOME_PD_1_SIGNALING",'REACTOME_INTERFERON_GAMMA_SIGNALING','REACTOME_TCR_SIGNALING','REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM','REACTOME_DNA_REPLICATION','REACTOME_CHEMOKINE_RECEPTORS_BIND_CHEMOKINES','REACTOME_MHC_CLASS_II_ANTIGEN_PRESENTATION','REACTOME_SIGNALING_BY_NOTCH','REACTOME_G2_M_CHECKPOINTS','REACTOME_CELL_CYCLE','REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA')
down <- c('REACTOME_DECTIN_2_FAMILY','REACTOME_REGULATION_OF_KIT_SIGNALING','REACTOME_ROLE_OF_LAT2_NTAL_LAB_ON_CALCIUM_MOBILIZATION','REACTOME_ANTI_INFLAMMATORY_RESPONSE_FAVOURING_LEISHMANIA_PARASITE_INFECTION','REACTOME_ACTIVATED_NTRK3_SIGNALS_THROUGH_PI3K','KEGG_FC_EPSILON_RI_SIGNALING_PATHWAY','KEGG_ERBB_SIGNALING_PATHWAY','REACTOME_PROSTANOID_LIGAND_RECEPTORS','REACTOME_PI3K_EVENTS_IN_ERBB4_SIGNALING')
TEST <- c(up,down)

p <- allDiff
p$ID <- rownames(p) 
q <- p[TEST,]
group1 <- c(rep("Exhausted CD8+  T cells",12),rep("Intraepithelial  CD8+ T cells",9)) 
df <- data.frame(ID = q$ID, score = q$t,group=group1 )
# 按照score排序
sortdf <- df[order(df$score),]
sortdf$ID <- factor(sortdf$ID, levels = sortdf$ID)#增加通路ID那一列
head(sortdf)
t <- ggplot(sortdf, aes(ID, score,fill=group)) + geom_bar(stat = 'identity') + 
                    coord_flip() + 
                    theme_bw() + #去除背景色
                    theme(panel.grid =element_blank())+
                    theme(panel.border = element_rect(size = 0.6)) 
t
图片.png

绘制通路热图

kegg3 <- as.data.frame(kegg2) 
kegg4 <- kegg3[TEST,]
group <- meta
annotation_col <- data.frame(group)
rownames(annotation_col) <- colnames(kegg2)
library(pheatmap)
kk2 <- pheatmap(kegg4,
                cluster_rows = F,
                cluster_cols = F,
                annotation_col =annotation_col,
                annotation_legend=T, 
                show_rownames = F,
                show_colnames = F,
                color =colorRampPalette(c("blue", "white","red"))(100),
                cellwidth = 0.5, cellheight = 13,
                fontsize = 10)
pdf(("kk2.pdf"),width = 15,height = 7)
print(kk2)
dev.off()
图片.png
上一篇 下一篇

猜你喜欢

热点阅读