走进转录组R生信相关

DEseq2 计算FPKM和FC值

2022-10-25  本文已影响0人  一直想要成为大牛的科研狗

R

#install.packages("BiocManager")
#BiocManager::install('DESeq2') 
#BiocManager::install("GenomicFeatures") 

library("DESeq2")
###data in
countDataraw <- as.matrix(read.csv("gene_count_matrix111.csv", row.names="Geneid"))
colDataraw <- read.csv("phenodata111.csv")
rownames(colDataraw) <- colDataraw$sample
cn <- colDataraw$group

###rownames =?colnames
all(rownames(colData) %in% colnames(countDataraw))
countDataraw <- countDataraw[ , rownames(colData)]
all(rownames(colData) == colnames(countDataraw))

# dds matrix
ddsHTSeq <- DESeqDataSetFromMatrix(countData = countDataraw, 
                                   colData = colDataraw, 
                                   design = ~ group)
ddsHTSeq = estimateSizeFactors(ddsHTSeq) 

###count
ExpNor <- counts(ddsHTSeq,normalized = TRUE)
###fpkm
library(GenomicFeatures)
txdb <- makeTxDbFromGFF("merged.gtf", format = "gtf", circ_seqs = character())
ebg <- exonsBy(txdb, by="gene")
exon_gene_sizes <- sum(width(reduce(ebg)))
mcols(ddsHTSeq)$basepairs <- exon_gene_sizes
fpkm_out = fpkm(ddsHTSeq)
write.table(as.data.frame(fpkm_out), file="fpkm_out.txt",sep="\t",quote = FALSE)

###FC
####Analysis 1v1
ExpNorLoc = {}
for (i in c(1:10)) {
  for (j in c(1:10) ) {
    if (i < j){
      s3 = i*3
      s2 = i*3 -1
      s1 = i*3 -2
      s6 = j*3
      s5 = j*3 -1
      s4 = j*3 -2           
      countData <- countDataraw[,c(s1,s2,s3,s4,s5,s6)] # 
      colData <- colDataraw[c(s1,s2,s3,s4,s5,s6),]
      kword = paste (sub("NormalizedFPKM_","",cn[s1]),sub("NormalizedFPKM_","",cn[s4]),sep = "_VS_")
      fc <- paste("Log2Foldchange",kword,sep = "_")
      pv <- paste("Pvalue",kword,sep = "_")
      pj <- paste("adjPvalue",kword,sep = "_") ###
      all(rownames(colData) %in% colnames(countData))
      countData <- countData[ , rownames(colData)]
      all(rownames(colData) == colnames(countData))
      ddsHTSeq <- DESeqDataSetFromMatrix(countData = countData, 
                                         colData = colData, 
                                         design = ~ group)

      dds <- DESeq(ddsHTSeq) 
      res <- results(dds) 
      ExpNorLoc[[fc]] <- res$log2FoldChange
      ExpNorLoc[[pv]] <- res$pvalue
      ExpNorLoc[[pj]] <- res$padj  ###
    }
  }
}






# filter count
keep <- rowSums(counts(dds) >= 10) >= 3  #过滤低表达基因,至少有3个样品都满足10个以上的reads数  
dds <- dds[keep, ]

dds <- DESeq(dds)
res <- results(dds)
diff = res
diff <- na.omit(diff)  ## 去除缺失值NA
dim(diff)
write.csv(diff,"all_diff.csv")

上一篇下一篇

猜你喜欢

热点阅读