在火山图上标注自定义基因
2023-02-26 本文已影响0人
找兔子的小萝卜
#Open Data
rm(list = ls())
options(stringsAsFactors = F)
library(readxl)
data<- read_excel("G1 vs G2.xlsx")
output <- data
#处理矩阵
#output <- output[,-c(5:9)]
#output <- output[,c(1:5)]
#output$logFC <- output$logFC*(-1)
#定义change
output$change = ifelse(output$PValue < 0.05 & abs(output$logFC) >= 1,
ifelse(output$logFC> 1 ,'Up','Down'),
'Stable')
##自己注释
#读入length
#library(dplyr)
#colnames(list)[1] = 'NAME'
#colnames(output)[1] = 'NAME'
#exp <- inner_join(output,list,by ="NAME")
#需要突出显示的基因列表(自定义)
geneList0 <- c('CYR61','CYP24A1','ANKRD1','CTGF','VDR')
output<- output[!duplicated(output$GeneSymbol),]
class(output)
output=as.data.frame(output)
##此步可以标注差异最大的gene
#data$label=ifelse(data$p.value < 0.000001 & abs(log2(data$FC)) >= 1,data$symbol,"")
##去掉遗漏值
sum(is.na(output$GeneSymbol))
#which(is.na(output$GeneSymbol))
#output<-output[-22,]
#sum(is.na(output$GeneSymbol))
row.names(output) <- output$GeneSymbol
geneList <- output[geneList0,]
sum(output$change=='Up')
sum(output$change=='Down')
#先画图
library('ggplot2')
p <- ggplot(# 数据、映射、颜色
output, aes(x = logFC, y = -log10(PValue), colour=change)) +
geom_point(alpha=0.5, size=2) +
scale_color_manual(values=c("#2d7287","#d2dae2","#990715"))+
#突出表示差异基因
geom_point(data=geneList,aes(x = logFC, y = -log10(PValue)),colour="black",size=3)+
#辅助线
geom_vline(xintercept=c(-1,1),lty=3,col="black",lwd=0.8) +
geom_hline(yintercept = -log10(0.05),lty=3,col="black",lwd=0.8) +
labs(x="log2(fold change)",y="-log10 (p-value)")+ # 坐标轴# 坐标轴和图标题title="Volcano plot",
theme_bw()+ #去除背景色
theme(panel.grid = element_blank())+ #去除网格线
#xlim(-2, 2)+ #设置坐标轴范围
#图例
theme(plot.title = element_text(hjust = 0.5,size=24),
legend.position="bottom",
legend.title = element_blank(),
legend.text=element_text(size=18),
legend.key.size = unit(1, 'cm'),
legend.background = element_rect(fill="gray90", linetype="solid",colour ="gray"),
axis.title.x =element_text(size=18),
axis.title.y=element_text(size=18),
axis.text=element_text(size=14,face = "bold"))
p
#行名变为数字
rownames(output) <- 1:nrow(output)
#标记出5个基因的label
geneList <- as.data.frame(geneList0)
geneList[,2] <- geneList
colnames(geneList) <- c("gene","GeneSymbol")
#output$GeneSymbol
geneList$GeneSymbol
#class(output)
#class(geneList)
c <-merge(output,geneList,by='GeneSymbol', all.x=T) #增加label列,以突出显示指定基因
##添加标签
library(ggrepel)
p + geom_label_repel(data = c,
aes(x = logFC, y = -log10(PValue), label = gene),
size = 4,color="black",
#box.padding = unit(0.5, "lines"),
#point.padding = unit(0.8, "lines"),
#segment.color = "black", #连线的颜色
#segment.size = 0.4, #连线的粗细
#arrow = arrow(length=unit(0.01, "npc")), #标签、点之间连线的箭头
show.legend = FALSE)