转录组专题:limma与芯片数据差异表达分析
2020-06-30 本文已影响0人
挽山
#原始数据为count表
#source("https://bioconductor.org/biocLite.R")
#options(BioC_mirror="http://mirrors.ustc.edu.cn/bioc/")
#biocLite("limma")
#选择路径保存
setwd('E:/')
library(limma)
library(edgeR)
#表达矩阵
exprSet<-read.csv(file.choose(),header = T,sep = ",") #file="12_gene_count_matrix.csv"
head(exprSet)
#列名为样本号
row.names(exprSet)<-exprSet[,1]
exprSet<-exprSet[,-1]
head(exprSet)
#分组信息
condition<-factor(c(rep("ASD",2),rep("Healthy",4),rep("ASD",1),rep("Healthy",2),rep("ASD",3)), levels = c("ASD","Healthy"))
condition
#分组矩阵
design<-model.matrix(~0+condition)
colnames(design)<-levels(condition)
rownames(design)<-colnames(exprSet)
design
v<-voom(exprSet,
design,
normalize = 'quantile',
plot=TRUE)
fit<-lmFit(v, design)
fit2<-eBayes(fit)
#声明比较矩阵
cont.matrix<-makeContrasts(contrasts = c('ASD-Healthy'), levels = design)
fit3<-contrasts.fit(fit2, cont.matrix)
#结果
DEG1<-topTable(fit3, coef = 2, n = Inf) #
DEG2<-na.omit(DEG1)
head(DEG2); dim(DEG2)
#完整保存
write.table(diff_final,"diff_signif_final_limma.txt",row.names = T,quote = F,sep = "\t")
#设置阈值 FC=2^log2FC
p = 0.05
padj = 0.1
foldChange = 1.5
#FDR
diff_signif1<-DEG2[(DEG2$adj.P.Val < padj &
(DEG2$logFC > foldChange | DEG2$logFC < (-foldChange))),]
dim(diff_signif1)
#不矫正
diff_signif2<-DEG2[(DEG2$P.Value < p &
(DEG2$logFC > foldChange | DEG2$logFC < (-foldChange))),]
dim(diff_signif2)
#排序(选有用的三列)
diff_final<-diff_signif[order(diff_signif$logFC), c(1,4,5)] #选择是否矫正
head(diff_final);dim(diff_final)
#筛选保存
write.table(diff_final,"diff_signif_final_limma.txt",row.names = T,quote = F,sep = "\t")
#save(diff_final, file = 'limma_diff.Rdata')
#差异基因注释======================================================================
#注释文件
ensembl2symbol<-read.table(file.choose(),header=T, sep="\t") #用矩阵,biomart自动有标题
head(ensembl2symbol)
symbol2id<-read.table(file = file.choose(),header = T,sep = '\t')
head(symbol2id);colnames(symbol2id)<-c('gene_symbol','gene_id','gene_symbol2')
symbol2id<-symbol2id[,c(1,2)]
#DEG注释(diff_final 或 DEG2)
#DEG<-read.table(file.choose(),header=T, sep="\t")
#head(DEG)
colnames(diff_final)[1]<-"Ensembl"
#library(tidyr)
#y<-separate(MAT, col=ensembl,into=c("ENSG","dot"),sep="\\.",remove = T);head(y)
#ensembl2symbol
ensg2id_dif<-merge(diff_final,ensembl2symbol,by.x="Ensembl",by.y="Gene.stable.ID.version",all=F,sort=F)
head(ensg2id_dif); dim(ensg2id_dif)
exprSet_new<-ensg2id_dif[,c(14,2:13)];head(ensg2id_dif);dim(ensg2id_dif)
write.table(exprSet_new,"dif_note_limma.txt",row.names = F,quote = F,sep = "\t")