RNA-seq 分析流程(二)DEseq2 分析差异表达基因
2020-11-09 本文已影响0人
煮梦斋_bioinfo
linux分析部分详见RNA-seq分析流程(一)https://www.jianshu.com/p/e8a0be4121b1
rm(list=ls())
if(!require(DESeq2))BiocManager::install("DESeq2")
library(rio)
library(dplyr)
Gene=import("seed_gene_list.csv",header = T)
#构造数据库,前面是处理,后面是对照
database=Gene[,c(21:23,13:15)] #test,control
head(database)
#rep可以根据你的重复多少而定,如果是3个重复用3
condition = factor(c(rep("treat", 3), rep("untreat", 3)), levels = c("treat", "untreat"))
countdata = round(as.matrix(database))
coldata = data.frame(row.names = colnames(countdata), condition)
dds = DESeqDataSetFromMatrix(countdata, colData = coldata, design = ~ condition)
dds = DESeq(dds)
sizeFactors(dds)
head(dds)
res <- results(dds, contrast = c("condition", "treat", "untreat"))
head(res)
summary(res)
resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)),by="row.names",sort=FALSE)
head(resdata)
merge_list <- data.frame(Gene,resdata)
resdata <- merge_list
#这里我的选择标准是FDR <0.05,Foldchange绝对值大于1,等于说是要差异2倍以上
diff_gene <- subset(resdata, padj < 0.05 & (log2FoldChange > 1 | log2FoldChange < -1))
up_gene = subset(resdata, padj < 0.05 & (log2FoldChange > 1))
down_gene = subset(resdata, padj < 0.05 & (log2FoldChange < -1))
write.csv(resdata,file = "DEseq2_tot_water_halfhr_vs_dry_seed.csv",row.names = FALSE)
write.csv(diff_gene,file = "DEseq2_diffgene_water_halfhr_vs_dry_seed.csv",row.names = FALSE)
write.csv(up_gene,file = "DEseq2_upgene_water_halfhr_vs_dry_seed.csv",row.names = FALSE)
write.csv(down_gene,file = "DEseq2_downgene__water_halfhr_vs_dry_seed.csv",row.names = FALSE)