DESeq2差异分析实战

2020-02-07  本文已影响0人  多啦A梦詹

解螺旋的例子,示例数据见https://github.com/xianxiongma/demoData/blob/master/gene_counts.xls.

读取文件

library("DESeq2")
counts <- read.table("gene_counts.xls", sep = "\t", header = T, 
    row.names = 1)
knitr::kable(head(counts))
A1 A2 A3 B1 B2 B3
A1BG 535 600 337 727 969 582
A1CF 0 0 0 0 0 0
A2M 0 2 0 0 0 0
A2ML1 5 0 10 1 1 1
A3GALT2 1 1 1 0 0 0
A4GALT 566 567 433 983 1050 749

创建样本信息表

colData <- data.frame(row.names = c("A1", "A2", "A3", "B1", 
    "B2", "B3"), condition = factor(c("control", "control", 
    "control", "case", "case", "case"), levels = c("control", 
    "case")))
knitr::kable(colData)
condition
A1 control
A2 control
A3 control
B1 case
B2 case
B3 case

构建DESeqDataSet对象

dds <- DESeqDataSetFromMatrix(countData = counts, colData = colData, 
    design = ~condition)
dds
## class: DESeqDataSet 
## dim: 20030 6 
## metadata(1): version
## assays(1): counts
## rownames(20030): A1BG A1CF ... ZZEF1 ZZZ3
## rowData names(0):
## colnames(6): A1 A2 ... B2 B3
## colData names(1): condition

DESeq函数差异分析

dds <- DESeq(dds)

## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
sizeFactors(dds)
##        A1        A2        A3        B1        B2 
## 1.2245534 0.8961380 1.3670448 0.8036123 0.8675225 
##        B3 
## 1.0255837

通过results函数提取差异结果

res <- results(dds)
knitr::kable(head(res))
baseMean log2FoldChange lfcSE stat pvalue padj
A1BG 657.0119228 0.9376274 0.4159191 2.2543502 0.0241741 0.1716017
A1CF 0.0000000 NA NA NA NA NA
A2M 0.3719665 -1.8140749 4.0138883 -0.4519495 0.6513054 NA
A2ML1 2.4617190 -1.7854247 1.8192485 -0.9814078 0.3263917 NA
A3GALT2 0.4440048 -2.1050945 3.8402173 -0.5481707 0.5835747 NA
A4GALT 762.5919472 1.1653116 0.3729851 3.1242848 0.0017824 0.0672184

保存结果

class(res)
## [1] "DESeqResults"
## attr(,"package")
## [1] "DESeq2"
res <- as.data.frame(res)
res <- cbind(rownames(res), res)
colnames(res) <- c("gene_id", "baseMean", "log2FoldChange", 
    "lfcSE", "stat", "pval", "padj")
# write.table(res,'case-vs-control-all-DESeq2.gene.xls',sep = '\t',col.names = TRUE, row.names = FALSE, quote = FALSE,na ='')

保存差异结果

resSig <- res[which(res$pval < 0.05 & abs(res$log2FoldChange) > 
    1), ]
resSig[which(resSig$log2FoldChange > 0), "up_down"] <- "Up"
resSig[which(resSig$log2FoldChange < 0), "up_down"] <- "Down"
# write.table(resSig,'case-vs-control-diff-pval-0.05-FC-2-DESeq2.gene.xls',sep= '\t', col.names = TRUE, row.names = FALSE, quote = FALSE, na = '')
上一篇下一篇

猜你喜欢

热点阅读