计算基因表达量fpkm和tpm
2020-08-12 本文已影响0人
牛顿大尉
简单粗暴,附上代码
rate <- counts / lengths
rate / sum(counts) * 1e9
}
#----------------循环读入和写出---------
path='F:/matrix'#路径下全部是待处理的count文件
filename=dir(path)
for(i in 1:length(filename)){
control1<-read.table(file=paste(path,filename[i],sep='/'),sep = "\t",col.names = c("gene_id","count"))
control2<-read.table("F:/merge/gene1.length.txt",sep = "\t",col.names = c("gene_id","genelen"))#固定的
count_len<-merge(control1,control2,by='gene_id')
exprSet_rpkm=rpkm(count_len['count'],count_len['genelen'])
exprSet_tpm=1e6*exprSet_rpkm/colSums(exprSet_rpkm)
prefix<-filename[i]#设置输出文件前缀名
final<- cbind(count_len['gene_id'],exprSet_tpm)
write.csv(final,paste0(prefix,"_tpm.csv"),row.names=FALSE)
}