将salmon生成的数据导入R语言做DESeq2差异分析
2021-10-24 本文已影响0人
FANHONGZENG
用salmon走完RNA-Seq上游分析后,就要把数据导入R里做下游分析
使用R包tximport导入数据
library(tximport)
library(GenomicFeatures)
txd<-makeTxDbFromGFF(file='../RNAseq/Arabidopsis/project/AtRTDv2_QUASI_19April2016.gtf',format='gtf',dataSource='AtRTDv2_QUASI_19April2016.gtf',organism ='Arabidopsis thaliana') ##导入gtf文件
str(txd)
keytypes(txd)
txTogene<-AnnotationDbi::select(txd,keys =keys(txd,'TXNAME'),keytype='TXNAME',columns=c('TXNAME','GENEID'))
head(txTogene)
data<-c('SRR3581878_output/quant.sf','SRR3581712_output/quant.sf','SRR3581836_output/quant.sf','SRR3581356_output/quant.sf','SRR3581676_output/quant.sf','SRR3581843_output/quant.sf','SRR3581865_output/quant.sf','SRR3581699_output/quant.sf','SRR3581703_output/quant.sf','SRR3581869_output/quant.sf','SRR3581705_output/quant.sf','SRR3581871_output/quant.sf')
samplelist<-c('seed-1','seed-2','root-1','root-2','leaf-1','leaf-2','flower-1','flower-2','pedicel-1','pedicel-2','internode-1','internode-2')
names(data)<-samplelist
txi<-tximport(data,type='salmon',tx2gene=txTogene)
write.csv(txi,'txi.csv',row.names=TRUE)
这样就把salmon的文件成功导入了R里
接下来使用R包DESeq2做差异分析
DESeq2需要3个矩阵:表达矩阵(countData),分组矩阵(colData),差异比较矩阵(design)
library(DESeq2)
sampleNames<-c('seed-1','seed-2','root-1','root-2','leaf-1','leaf-2','flower-1','flower-2','pedicel-1','pedicel-2','internode-1','internode-2')
sampleGroup<-c('seed','seed','root','root','leaf','leaf','flower','flower','pedicel','pedicle','internode','internode') ##差异比较矩阵
sampleTable<-data.frame(sampleName=sampleNames,type=sampleGroup) ##分组矩阵
rownames(sampleTable)<-colnames(txi$counts)
dds<-DESeqDataSetFromTximport(txi,sampleTable,design=~type) ##差异分析
dds<-DESeq(dds)
res<-results(dds) ##从DESeq 分析中提取结果表
resordered<-res[order(res$padj),] ##以padj对res排序
write.csv(resordered,'res.csv',row.names=TRUE)
这里的res.csv里面包含log2FoldChange和pvalue值,可以用来作图寻找差异基因
pheatmap 画热图
library(pheatmap)
exprSet<-read.csv(file = 'txi.csv',header = T,row.names = 1) ##使用第一步得到的txi.csv表达矩阵
exprSet<-exprSet[,(13:24)]
head(resOrdered)
DEG<-as.data.frame(resOrdered)
nrDEG=na.omit(DEG)
choose_gene=head(rownames(nrDEG),50) ##选择50个基因画热图
choose_matrix=exprSet[choose_gene,]
choose_matrix=t(scale(t(choose_matrix)))
pheatmap(choose_matrix,filename = 'DEseq_heatmap.png')
heatmap热图
ggplot 画火山图
火山图可以看到上调和下调的差异基因
nrDEG$change = as.factor(ifelse(nrDEG$pvalue< 0.05 & abs(nrDEG$log2FoldChange) > 2,
ifelse(nrDEG$log2FoldChange>2 ,'UP','DOWN'),'NOT'))
this_tile <- paste0('Cutoff for logFC is ',round(2,3),
'\nThe number of up gene is ',nrow(nrDEG[nrDEG$change =='UP',]) ,
'\nThe number of down gene is ',nrow(nrDEG[nrDEG$change =='DOWN',])
)
library(ggplot2)
g = ggplot(data=nrDEG,
aes(x=log2FoldChange, y=-log10(pvalue),
color=change)) +
geom_point(alpha=0.4, size=1.75) +
theme_set(theme_set(theme_bw(base_size=20)))+
xlab("log2 fold change") + ylab("-log10 p-value") +
ggtitle( this_tile ) + theme(plot.title = element_text(size=15,hjust = 0.5))+
scale_colour_manual(values = c('blue','black','red')) ## corresponding to the levels(res$change)
print(g)
ggsave(g,filename = 'volcano.png')
火山图