收入即学习

FPKM差异表达分析

2020-10-12  本文已影响0人  谢京合_数据分析人

读取表达矩阵

setwd("E:/8.差异表达基因/")
a <- read.table("RNAmatrix.txt",header = T)

去重复

b <- a[a$gene_name,]

去掉第一行第一列

rownames(c) <- c[,1]

c <- c[,-1]

选择所需数据

a <- c[,-()]

c <- c[,10:15]

dat1<-as.data.frame(c)
dim(dat1)
dat1[1:4,1:4]

很多表达量为0的样本,直接选择在某个基因如果在3个样品中的表达量为零,则直接舍去。

apply(dat1,1,function(x){sum(floor(x)==0)>3})
dat1<-dat1[!apply(dat1,1,function(x){sum(floor(x)==0)>3}),]
dim(dat1)
head(dat1)
write.csv(dat1,"dat1.csv")

boxplot(dat1)

差异很大取log归一化

dat3 <- log(dat1)#下游分析的结果有缺失值,故选择log(dat2 + 1)

dat4 <- log(dat1 + 1)

boxplot(dat3)

boxplot(dat4)

write.csv(dat3,"dat3.csv")

write.csv(dat4,"dat4.csv")

差异基因分析

library(limma)

group <- c(rep("normal",50),rep("cancer",374)) 
head(group)
View(group)

group <- factor(group)
design <- model.matrix(~0 + group)
colnames(design) <- levels(group)
design


contrast.matrix <- makeContrasts(normal - cancer,  
                                 levels=design)

contrast.matrix

fit <- lmFit(dat4,design)
fit2 <- contrasts.fit(fit, contrast.matrix) 
fit2 <- eBayes(fit2)

allDiff1=topTable(fit2,adjust='fdr',coef=1,number=Inf) 
save(dat4,group,allDiff1,file = "RNAmatrix_result.txt")

write.csv(allDiff1, file = "normal_vs_cancer.csv" )
上一篇 下一篇

猜你喜欢

热点阅读