DEseq2 计算FPKM和FC值
2022-10-25 本文已影响0人
一直想要成为大牛的科研狗
R
#install.packages("BiocManager")
#BiocManager::install('DESeq2')
#BiocManager::install("GenomicFeatures")
library("DESeq2")
###data in
countDataraw <- as.matrix(read.csv("gene_count_matrix111.csv", row.names="Geneid"))
colDataraw <- read.csv("phenodata111.csv")
rownames(colDataraw) <- colDataraw$sample
cn <- colDataraw$group
###rownames =?colnames
all(rownames(colData) %in% colnames(countDataraw))
countDataraw <- countDataraw[ , rownames(colData)]
all(rownames(colData) == colnames(countDataraw))
# dds matrix
ddsHTSeq <- DESeqDataSetFromMatrix(countData = countDataraw,
colData = colDataraw,
design = ~ group)
ddsHTSeq = estimateSizeFactors(ddsHTSeq)
###count
ExpNor <- counts(ddsHTSeq,normalized = TRUE)
###fpkm
library(GenomicFeatures)
txdb <- makeTxDbFromGFF("merged.gtf", format = "gtf", circ_seqs = character())
ebg <- exonsBy(txdb, by="gene")
exon_gene_sizes <- sum(width(reduce(ebg)))
mcols(ddsHTSeq)$basepairs <- exon_gene_sizes
fpkm_out = fpkm(ddsHTSeq)
write.table(as.data.frame(fpkm_out), file="fpkm_out.txt",sep="\t",quote = FALSE)
###FC
####Analysis 1v1
ExpNorLoc = {}
for (i in c(1:10)) {
for (j in c(1:10) ) {
if (i < j){
s3 = i*3
s2 = i*3 -1
s1 = i*3 -2
s6 = j*3
s5 = j*3 -1
s4 = j*3 -2
countData <- countDataraw[,c(s1,s2,s3,s4,s5,s6)] #
colData <- colDataraw[c(s1,s2,s3,s4,s5,s6),]
kword = paste (sub("NormalizedFPKM_","",cn[s1]),sub("NormalizedFPKM_","",cn[s4]),sep = "_VS_")
fc <- paste("Log2Foldchange",kword,sep = "_")
pv <- paste("Pvalue",kword,sep = "_")
pj <- paste("adjPvalue",kword,sep = "_") ###
all(rownames(colData) %in% colnames(countData))
countData <- countData[ , rownames(colData)]
all(rownames(colData) == colnames(countData))
ddsHTSeq <- DESeqDataSetFromMatrix(countData = countData,
colData = colData,
design = ~ group)
dds <- DESeq(ddsHTSeq)
res <- results(dds)
ExpNorLoc[[fc]] <- res$log2FoldChange
ExpNorLoc[[pv]] <- res$pvalue
ExpNorLoc[[pj]] <- res$padj ###
}
}
}
# filter count
keep <- rowSums(counts(dds) >= 10) >= 3 #过滤低表达基因,至少有3个样品都满足10个以上的reads数
dds <- dds[keep, ]
dds <- DESeq(dds)
res <- results(dds)
diff = res
diff <- na.omit(diff) ## 去除缺失值NA
dim(diff)
write.csv(diff,"all_diff.csv")