2021-06-27 芯片数据limma差异分析
2021-06-27 本文已影响0人
学习生信的小兔子
加载相应的包
rm(list=ls())
options(stringsAsFactors = FALSE)
Sys.setenv("language"='en')
#DGE for microarray by limma
library(ggplot2)
library(limma)
setwd("D:/GEO数据挖掘与meta分析/练习/芯片数据limma差异分析(代码)/芯片数据limma差异分析(代码)")
导入数据
rawexprSet=read.csv("exprs-19samples.csv",header=TRUE,row.names=1,check.names = FALSE)
#读取矩阵文件
dim(rawexprSet)
[1] 39426 19
#exprSet=log2(rawexprSet)#log2转换
exprSet=rawexprSet#如果已经是log2数据,运行这一步
exprSet[1:5,1:5]
GSM1386832 GSM1386833 GSM1386834 GSM1386835 GSM1386836
ILMN_1343291 0.4605613 0.82011750 -0.8167353 0.13896560 0.09584236
ILMN_1343295 -0.8032513 0.71279240 -1.6947494 1.23427820 -0.36104536
ILMN_1651199 0.2524052 0.07226562 0.5513372 -0.10347176 0.22701597
ILMN_1651209 0.2555184 0.06649685 0.3488650 -0.05129576 0.57850313
ILMN_1651210 0.4912973 0.18652153 0.7219935 -0.07959080 0.45670270
数据来源-GEO2R处理数据
## 画箱线图,比较数据分布情况
par(mfrow=c(1,2))
boxplot(data.frame(exprSet),col="blue")
ggsave("boxplot.pdf", width = 8, height = 5)
dev.off()
limma分析
#读取样本分类信息
group <- read.csv("datTraits.csv",header=TRUE,row.names=1,check.names = FALSE)
table(group$treat_type)
atherosclerotic control
9 10
design <- model.matrix(~0+factor(group$treat_type))#把group设置成一个model matrix#
colnames(design)=levels(factor(group$treat_type))
rownames(design)=colnames(exprSet)
design
fit <- lmFit(exprSet,design)##线性拟合
cont.matrix<-makeContrasts(atherosclerotic-control,levels = design)
fit2=contrasts.fit(fit,cont.matrix)##用对比模型进行差值计算
fit2 <- eBayes(fit2) ##贝叶斯检验
##eBayes() with trend=TRUE
tempOutput = topTable(fit2,coef=1,n=Inf,adjust="BH",sort.by="B",resort.by="M")
nrDEG = na.omit(tempOutput)
write.csv(nrDEG, "limmaOut.csv")
group
design
筛选差异基因
#筛选有显著差异的基因 adj.P.Val意思是矫正后P值
foldChange=1.5 #fold change=1意思是差异是两倍
pvalue =0.01 #0.05
diff <- nrDEG
diffSig = diff[(diff$P.Val < pvalue & (diff$logFC>foldChange | diff$logFC<(-foldChange))),]
dim(diffSig)
[1] 523 6
#write.table(diffSig, file="diffSig.xls",sep="\t",quote=F)
write.csv(diffSig, "diffSig.csv")
#把上调和下调分别输入up和down两个文件
diffUp = diff[(diff$P.Val < pvalue & (diff$logFC>foldChange)),]#foldchange>0是上调,foldchange<0是下调#
dim(diffUp)
[1] 20 6
#write.table(diffUp, file="up.xls",sep="\t",quote=F)#把上调和下调分别输入up和down两个文件#
write.csv(diffUp, "diffUp.csv")
diffDown = diff[(diff$P.Val< pvalue & (diff$logFC<(-foldChange))),]
dim(diffDown)
[1] 503 6
#write.table(diffDown, file="down.xls",sep="\t",quote=F)
write.csv(diffDown, "diffDown.csv")