解螺旋的例子,示例数据见https://github.com/xianxiongma/demoData/blob/master/gene_counts.xls.
读取文件
library("DESeq2")
counts <- read.table("gene_counts.xls", sep = "\t", header = T,
row.names = 1)
knitr::kable(head(counts))
|
A1 |
A2 |
A3 |
B1 |
B2 |
B3 |
A1BG |
535 |
600 |
337 |
727 |
969 |
582 |
A1CF |
0 |
0 |
0 |
0 |
0 |
0 |
A2M |
0 |
2 |
0 |
0 |
0 |
0 |
A2ML1 |
5 |
0 |
10 |
1 |
1 |
1 |
A3GALT2 |
1 |
1 |
1 |
0 |
0 |
0 |
A4GALT |
566 |
567 |
433 |
983 |
1050 |
749 |
创建样本信息表
colData <- data.frame(row.names = c("A1", "A2", "A3", "B1",
"B2", "B3"), condition = factor(c("control", "control",
"control", "case", "case", "case"), levels = c("control",
"case")))
knitr::kable(colData)
|
condition |
A1 |
control |
A2 |
control |
A3 |
control |
B1 |
case |
B2 |
case |
B3 |
case |
构建DESeqDataSet对象
dds <- DESeqDataSetFromMatrix(countData = counts, colData = colData,
design = ~condition)
dds
## class: DESeqDataSet
## dim: 20030 6
## metadata(1): version
## assays(1): counts
## rownames(20030): A1BG A1CF ... ZZEF1 ZZZ3
## rowData names(0):
## colnames(6): A1 A2 ... B2 B3
## colData names(1): condition
DESeq函数差异分析
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
sizeFactors(dds)
## A1 A2 A3 B1 B2
## 1.2245534 0.8961380 1.3670448 0.8036123 0.8675225
## B3
## 1.0255837
通过results函数提取差异结果
res <- results(dds)
knitr::kable(head(res))
|
baseMean |
log2FoldChange |
lfcSE |
stat |
pvalue |
padj |
A1BG |
657.0119228 |
0.9376274 |
0.4159191 |
2.2543502 |
0.0241741 |
0.1716017 |
A1CF |
0.0000000 |
NA |
NA |
NA |
NA |
NA |
A2M |
0.3719665 |
-1.8140749 |
4.0138883 |
-0.4519495 |
0.6513054 |
NA |
A2ML1 |
2.4617190 |
-1.7854247 |
1.8192485 |
-0.9814078 |
0.3263917 |
NA |
A3GALT2 |
0.4440048 |
-2.1050945 |
3.8402173 |
-0.5481707 |
0.5835747 |
NA |
A4GALT |
762.5919472 |
1.1653116 |
0.3729851 |
3.1242848 |
0.0017824 |
0.0672184 |
保存结果
class(res)
## [1] "DESeqResults"
## attr(,"package")
## [1] "DESeq2"
res <- as.data.frame(res)
res <- cbind(rownames(res), res)
colnames(res) <- c("gene_id", "baseMean", "log2FoldChange",
"lfcSE", "stat", "pval", "padj")
# write.table(res,'case-vs-control-all-DESeq2.gene.xls',sep = '\t',col.names = TRUE, row.names = FALSE, quote = FALSE,na ='')
保存差异结果
resSig <- res[which(res$pval < 0.05 & abs(res$log2FoldChange) >
1), ]
resSig[which(resSig$log2FoldChange > 0), "up_down"] <- "Up"
resSig[which(resSig$log2FoldChange < 0), "up_down"] <- "Down"
# write.table(resSig,'case-vs-control-diff-pval-0.05-FC-2-DESeq2.gene.xls',sep= '\t', col.names = TRUE, row.names = FALSE, quote = FALSE, na = '')