火山图
2020-01-19 本文已影响0人
生信编程日常
在分析RNAseq和microarray数据的差异基因的时候,常常用到火山图,需要的数据是包含Fold change(FC) 和pvalue的矩阵,通常横坐标用log2(FC)表示,
纵坐标用-log10(pvalue)表示。差异越大的基因位于左上角和右上角。
library(ggplot2)
#cell cycle pos
# RNAseq_genes 为FC和Pvalue的矩阵
# rownames(RNAseq_genes)<-RNAseq_genes$Row.names
#tiff("volcanoplot_cellcycle_pos.tiff",width = 550,height = 350)
RNAseq_genes_CC_pos<-RNAseq_genes[intersect(pos_cellcycle,rownames(RNAseq_genes)),]
image.png
RNAseq_genes_CC_pos$change <- as.factor(ifelse(RNAseq_genes_CC_pos$PValue < 0.05 & abs(RNAseq_genes_CC_pos$logFC) > log2(1.5),ifelse(RNAseq_genes_CC_pos$logFC> log2(1.5),'up_genes','down_genes'),'not_DEgenes'))
ggplot(data = RNAseq_genes_CC_pos, aes(x = logFC, y = -log10(PValue), color = change)) +geom_point(alpha=0.8, size = 1) +
theme_bw(base_size = 15) +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank()
) +scale_color_manual(name = "", values = c("red","blue", "grey"), limits = c('up_genes','down_genes','not_DEgenes')))
#dev.off()
image.png
加上显著基因的标签
library(ggrepel)
RNAseq_genes_CC_pos$sign <- ifelse(RNAseq_genes_CC_pos$PValue < 0.05 & abs(RNAseq_genes_CC_pos$logFC) > log2(2),rownames(RNAseq_genes_CC_pos),NA)
ggplot(data = RNAseq_genes_CC_pos, aes(x = logFC, y = -log10(PValue), color = change)) +geom_point(alpha=0.8, size = 1) +
theme_bw(base_size = 15) +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank()
) +scale_color_manual(name = "", values = c("red","blue", "grey"), limits = c('up_genes','down_genes','not_DEgenes'))+geom_text_repel(aes(label =sign), size = 4.5,color = "black")
image.png
欢迎关注微信公众号:生信编程日常
公众号二维码.jpg参考:
https://zhuanlan.zhihu.com/p/34992159
https://www.jianshu.com/p/003bdf9e6169