生信

在火山图上标注自定义基因

2023-02-26  本文已影响0人  找兔子的小萝卜
#Open Data
rm(list = ls())
options(stringsAsFactors = F)
library(readxl)

data<- read_excel("G1 vs G2.xlsx")
output <- data
#处理矩阵
#output <- output[,-c(5:9)]
#output <- output[,c(1:5)]
#output$logFC <- output$logFC*(-1)
#定义change
output$change = ifelse(output$PValue < 0.05 & abs(output$logFC) >= 1, 
                       ifelse(output$logFC> 1 ,'Up','Down'),
                       'Stable')
##自己注释
#读入length
#library(dplyr)
#colnames(list)[1] = 'NAME'
#colnames(output)[1] = 'NAME'


#exp <- inner_join(output,list,by ="NAME")


#需要突出显示的基因列表(自定义)
geneList0 <- c('CYR61','CYP24A1','ANKRD1','CTGF','VDR')
output<- output[!duplicated(output$GeneSymbol),]
class(output)
output=as.data.frame(output)
##此步可以标注差异最大的gene
#data$label=ifelse(data$p.value < 0.000001 & abs(log2(data$FC)) >= 1,data$symbol,"")



##去掉遗漏值

sum(is.na(output$GeneSymbol))
#which(is.na(output$GeneSymbol))
#output<-output[-22,]
#sum(is.na(output$GeneSymbol))
row.names(output) <- output$GeneSymbol

geneList <- output[geneList0,]


sum(output$change=='Up')
sum(output$change=='Down')

#先画图
library('ggplot2')
p <- ggplot(# 数据、映射、颜色
  output, aes(x = logFC, y = -log10(PValue), colour=change)) +
  geom_point(alpha=0.5, size=2) +
  scale_color_manual(values=c("#2d7287","#d2dae2","#990715"))+
  #突出表示差异基因
  geom_point(data=geneList,aes(x = logFC, y = -log10(PValue)),colour="black",size=3)+
  #辅助线
  geom_vline(xintercept=c(-1,1),lty=3,col="black",lwd=0.8) +
  geom_hline(yintercept = -log10(0.05),lty=3,col="black",lwd=0.8) +
  labs(x="log2(fold change)",y="-log10 (p-value)")+   # 坐标轴# 坐标轴和图标题title="Volcano plot",
  theme_bw()+    #去除背景色
  theme(panel.grid = element_blank())+  #去除网格线
  #xlim(-2, 2)+   #设置坐标轴范围
  #图例
  theme(plot.title = element_text(hjust = 0.5,size=24), 
        legend.position="bottom", 
        legend.title = element_blank(),
        legend.text=element_text(size=18),
        legend.key.size = unit(1, 'cm'),
        legend.background = element_rect(fill="gray90", linetype="solid",colour ="gray"),
        axis.title.x =element_text(size=18), 
        axis.title.y=element_text(size=18),
        axis.text=element_text(size=14,face = "bold"))
p


#行名变为数字
rownames(output) <- 1:nrow(output)

#标记出5个基因的label
geneList <- as.data.frame(geneList0)
geneList[,2] <- geneList
colnames(geneList) <-  c("gene","GeneSymbol")
#output$GeneSymbol
geneList$GeneSymbol
#class(output)
#class(geneList)

c <-merge(output,geneList,by='GeneSymbol', all.x=T)  #增加label列,以突出显示指定基因

##添加标签
library(ggrepel)
p + geom_label_repel(data = c, 
                     aes(x = logFC, y = -log10(PValue), label = gene),
                     size = 4,color="black",
                     #box.padding = unit(0.5, "lines"),
                     #point.padding = unit(0.8, "lines"), 
                     #segment.color = "black",   #连线的颜色
                     #segment.size = 0.4,  #连线的粗细
                     #arrow = arrow(length=unit(0.01, "npc")), #标签、点之间连线的箭头
                     show.legend = FALSE)
上一篇 下一篇

猜你喜欢

热点阅读