转录组专题：limma与芯片数据差异表达分析

2020-06-30 本文已影响0人挽山
#原始数据为count表
#source("https://bioconductor.org/biocLite.R")
#options(BioC_mirror="http://mirrors.ustc.edu.cn/bioc/")
#biocLite("limma")

#选择路径保存
setwd('E:/')

library(limma)
library(edgeR)
#表达矩阵
exprSet<-read.csv(file.choose(),header = T,sep = ",") #file="12_gene_count_matrix.csv"
head(exprSet)

#列名为样本号
row.names(exprSet)<-exprSet[,1]
exprSet<-exprSet[,-1]
head(exprSet)

#分组信息
condition<-factor(c(rep("ASD",2),rep("Healthy",4),rep("ASD",1),rep("Healthy",2),rep("ASD",3)), levels = c("ASD","Healthy"))
condition

#分组矩阵
design<-model.matrix(~0+condition)
colnames(design)<-levels(condition)
rownames(design)<-colnames(exprSet)
design

v<-voom(exprSet, 
        design, 
        normalize = 'quantile', 
        plot=TRUE)

fit<-lmFit(v, design)
fit2<-eBayes(fit)

#声明比较矩阵
cont.matrix<-makeContrasts(contrasts = c('ASD-Healthy'), levels = design)
fit3<-contrasts.fit(fit2, cont.matrix)

#结果
DEG1<-topTable(fit3, coef = 2, n = Inf) #
DEG2<-na.omit(DEG1)
head(DEG2); dim(DEG2)

#完整保存
write.table(diff_final,"diff_signif_final_limma.txt",row.names = T,quote = F,sep = "\t")

#设置阈值 FC=2^log2FC
p = 0.05
padj = 0.1
foldChange = 1.5

#FDR
diff_signif1<-DEG2[(DEG2$adj.P.Val < padj & 
                       (DEG2$logFC > foldChange | DEG2$logFC < (-foldChange))),]
dim(diff_signif1)

#不矫正
diff_signif2<-DEG2[(DEG2$P.Value < p & 
                          (DEG2$logFC > foldChange | DEG2$logFC < (-foldChange))),]
dim(diff_signif2)

#排序（选有用的三列）
diff_final<-diff_signif[order(diff_signif$logFC), c(1,4,5)] #选择是否矫正
head(diff_final);dim(diff_final)

#筛选保存
write.table(diff_final,"diff_signif_final_limma.txt",row.names = T,quote = F,sep = "\t")

#save(diff_final, file = 'limma_diff.Rdata')

#差异基因注释======================================================================

#注释文件
ensembl2symbol<-read.table(file.choose(),header=T, sep="\t") #用矩阵，biomart自动有标题
head(ensembl2symbol)

symbol2id<-read.table(file = file.choose(),header = T,sep = '\t')
head(symbol2id);colnames(symbol2id)<-c('gene_symbol','gene_id','gene_symbol2')
symbol2id<-symbol2id[,c(1,2)]

#DEG注释(diff_final 或 DEG2)
#DEG<-read.table(file.choose(),header=T, sep="\t") 
#head(DEG)
colnames(diff_final)[1]<-"Ensembl"

#library(tidyr)
#y<-separate(MAT, col=ensembl,into=c("ENSG","dot"),sep="\\.",remove = T);head(y)

#ensembl2symbol
ensg2id_dif<-merge(diff_final,ensembl2symbol,by.x="Ensembl",by.y="Gene.stable.ID.version",all=F,sort=F)
head(ensg2id_dif); dim(ensg2id_dif)

exprSet_new<-ensg2id_dif[,c(14,2:13)];head(ensg2id_dif);dim(ensg2id_dif)

write.table(exprSet_new,"dif_note_limma.txt",row.names = F,quote = F,sep = "\t")
转录组专题：limma与芯片数据差异表达分析

猜你喜欢

热点阅读