RNAseq 分析后 R 结果可视化volcano plot和h
2020-04-20 本文已影响0人
奔跑的Forrest
这个文章跟着之前的文章完整转录组RNAseq分析流程(tophat2+cufflink+cuffdiff)用了之前分析的 cuffdiff 的结果,参考视频 https://www.bilibili.com/video/BV1gW411Y7Qf
文中用到的hic的数据由于是别人的东西,就不方便放出来,可以看一下孟叔视频,加群后可以在群文件下载
###############################################
#rnaseq 可视化 孟b站 R绘图
#############################################
#content
#1. volcano plot
#2. heatmap plot
#3. cytogram plot
#4. plot with layout
#############################################
rm(list = ls())
#############################################
#part one
#1.input table cuffdiff
#############################################
#volcano plot
#############################################
cuffdiff_result = read.table(file = "../Desktop/test_data/rnaseq_test_date/diff_out1/gene_exp.diff",header = T)
ctrl_fpkm = cuffdiff_result$value_1
treat_fpkm = cuffdiff_result$value_2
log2_foldchange = log2(treat_fpkm / ctrl_fpkm)
#此时会出现NaN(无穷),除数为0 -inf(负无穷),被除数为0,log2无法计算
log2_foldchange[ctrl_fpkm==0]=0 #考虑ctrl组有等于0的情况剔除,将其赋值为0
log2_foldchange[treat_fpkm==0]=0 #同理
log10_p_value = log10(cuffdiff_result$p_value)*-1
######################################## 1st edition
plot(x=log2_foldchange,y=log10_p_value)
# 1.p_value 等于0的值一般不画
# 2.显著和不显著的颜色要标出
plot(x=log2_foldchange,y=log10_p_value,xlim = c(-4,4),ylim = c(0.001,4))
#限制坐标轴,x轴-4到4 ,y轴 0.001到4
######################################## 2nd edition
log10_p_value.filter = log10_p_value[log10_p_value >= 0.001]
log2_foldchange.filter = log2_foldchange[log10_p_value >= 0.001]
plot(x=log2_foldchange.filter,y=log10_p_value.filter,xlim = c(-4,4),ylim = c(0,4))
######################################## 3rd edition
#上色,实心
plot(x=log2_foldchange.filter,y=log10_p_value.filter,
xlim = c(-4,4),ylim = c(0,4),
col = rgb(0,0,1,0.1),pch=16)
####################################### 4th edition
#将显著性的点标出来,需要写一个判断
length(log2_foldchange.filter)
col_vector = rep(rgb(0,0,1,0.1),length(log2_foldchange.filter))
#选出大于等于-1*log10(0.001)的点将其标为红色
col_vector[log10_p_value.filter >= -1*log10(0.001)] = rgb(1,0,0)
plot(x=log2_foldchange.filter,y=log10_p_value.filter,
xlim = c(-4,4),ylim = c(0,4),
col = col_vector,pch=16)
######################################## 5th edition
#真实情况下考虑的情况很多
#1. p-value <=0.05
#2. ctrl,treat FPKM >0
#3. foldchange > 2 | <0.5
select_sign_vector = (cuffdiff_result$value_1 > 0) & (cuffdiff_result$value_2 > 0) & (abs(log2_foldchange) >= 1) & (cuffdiff_result$value_1 >=1 | cuffdiff_result$value_2 >=1) & (cuffdiff_result$p_value <=0.05)
log10_p_value.filter = log10_p_value[log10_p_value >= 0.001]
log2_foldchange.filter = log2_foldchange[log10_p_value >= 0.001]
select_sign_vector.filter = select_sign_vector[log10_p_value >= 0.001]
col_vector = rep(rgb(0,0,1,0.1),length(log2_foldchange.filter))
col_vector[select_sign_vector.filter] = rgb(1,0,0)
plot(x=log2_foldchange.filter,y=log10_p_value.filter,
xlim = c(-4,4),ylim = c(0,4),
col = col_vector,pch=16)
#加辅助线
abline(h = -1*log10(0.05),lwd =2 , lty = 3 , col="#4C5B61")
####################################################################
# part two
# heatmap
#######################################################################
#step 1 画方框
plot(x = c(1:10),y=c(1:10),type="n")
# plot (0,0) (1,0) (1,1) (0,1)
rect(xleft = 0 , ybottom = 0,xright = 1 , ytop = 1,col = rgb(1,0,0))
rect(xleft = 5 , ybottom = 5,xright = 6 , ytop = 6,col = rgb(0,1,0))
# input matrix
# xleft, ybottom,xright,ytop
# color
# output image
input_matrix = matrix(c(1:36),6,6)
#set image size
x_size = dim(input_matrix)[1]
y_size = dim(input_matrix)[2]
#如果,我们从0,0 向6,6方向找规律
#先画,第一行,再画第二行
# 1st row
# xleft 0,1,2,3,4,5
# ybottom 0,0,0,0,0,0
# xright 1,2,3,4,5,6
# ytop 1,1,1,1,1,1
#2nd row
# xleft 0,1,2,3,4,5
# ybottom 1,1,1,1,1,1
# xright 1,2,3,4,5,6
# ytop 2,2,2,2,2,2
#其他同理
my_xleft = rep(c(0:(x_size-1)),y_size)
my_xright = my_xleft + 1
my_ybottom = rep(c(0:(y_size - 1)),each = y_size)
my_ytop = my_ybottom + 1
plot(x=c(0:x_size),y=c(0:y_size),type="n")
rect(xleft = my_xleft ,ybottom = my_ybottom ,xright = my_xright,ytop = my_ytop)
# 下一步,确定颜色
# 先画最简单的,最小值是白色,最大值是红色,中间线性变换
mat.max = max(input_matrix)
input_matrix.rate = input_matrix / mat.max
col.mat = rgb(1,0,0,input_matrix.rate)
plot(x=c(0:x_size),y=c(0:y_size),type="n")
rect(xleft = my_xleft ,ybottom = my_ybottom ,xright = my_xright,ytop = my_ytop,col = col.mat)
######此时画的热图与表格对应不上,应该将表格转置
##去掉方格线
plot(x=c(0:x_size),y=c(0:y_size),type="n",xlab = "",ylab = "",axes = F,frame.plot = F)
rect(xleft = my_xleft ,ybottom = my_ybottom ,xright = my_xright,ytop = my_ytop,col = col.mat,border = F)
# 真实的Hi—c数据
hic_mat.raw = read.table(file="../Desktop/test_data/R基础绘图(live5)/20171203-Live-R_partII/data_file/chr_16_100000_MAPQ20.txt",sep ="," ,header=F)
input_matrix = as.matrix(hic_mat.raw)
x_size = dim(input_matrix)[1]
y_size = dim(input_matrix)[2]
my_xleft = rep(c(0:(x_size-1)),each = x_size)
my_xright = my_xleft + 1
my_ybottom = rep(c((y_size - 1):0),y_size)
my_ytop = my_ybottom + 1
mat.max = quantile(input_matrix,prob=0.95)
input_matrix.rate = input_matrix / mat.max
input_matrix.rate[input_matrix.rate>1]=1
col.mat = rgb(1,0,0,as.vector(as.matrix(input_matrix.rate)))
plot(x=c(0:x_size),y=c(0:y_size),type="n",xlab = "",ylab = "",axes = F,frame.plot = F)
rect(xleft = my_xleft ,ybottom = my_ybottom ,xright = my_xright,ytop = my_ytop,col = col.mat,border = F)
# 为了快速画图,我们尝试直接把图片生成
png(file="../Desktop/test_hic.png",width = 2000,height = 2000)
plot(x=c(0:x_size),y=c(0:y_size),type="n",xlab = "",ylab = "",axes = F,frame.plot = F)
rect(xleft = my_xleft ,ybottom = my_ybottom ,xright = my_xright,ytop = my_ytop,col = col.mat,border = F)
#记得关掉绘图函数
dev.off()