my RNA-seq生物信息学RNASeq 数据分析

hisat2+featurecounts+DESeq2

2018-12-28  本文已影响454人  njmujjc

##从hisat2官网下载打包好的reference,然后比对##

hisat2 -p 16 -x ../reference/hg19/hisat2/grch37_snp_tran/genome_snp_tran -1 gjy2_1.fastq.gz-2 gjy2_2.fastq.gz-S gjy2.sam--summary-file gjy2.txt

samtools sort --threads 15 -o **.bam **.sam

##featurecounts计算rawcounts##

featureCounts -T 15 -p -t exon -g gene_id -a ../../reference/hg19/hisat2/ensemble_gtf/Homo_sapiens.GRCh37.75.gtf -o gjy1_featurecounts.txt**.bam

##取rawcounts列##

cut -f 1,7 gjy1_featurecounts.txt|grep -v '^#' > gjy1_rawcounts.txt

##在Rsudio中分析差异基因###

setwd('xx_result')

library("DESeq2")

directory <-'xx/06_featurecounts'

directory

sampleFiles <- grep("wy",list.files(directory),value=TRUE)

sampleFiles

###注意control要放在前面####

sampleCondition <- c("control","control","KO","KO","KO")

sampleCondition

sampleTable <- data.frame(sampleName= sampleFiles,fileName = sampleFiles,condition = sampleCondition)

sampleTable

dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,directory = directory,design= ~ condition)

dds

dds <- dds [ rowSums(counts(dds)) > 1, ]

#PCA#

rld<-rlog(dds)

plotPCA(rld)

dds<-DESeq(dds)

res <- results(dds)

head(res)

summary(res)

resOrdered <- res[order(res$padj),]

resOrdered=as.data.frame(resOrdered)

head(resOrdered)

resOrdered=na.omit(resOrdered)

DEmiRNA=resOrdered[abs(resOrdered$log2FoldChange)>log2(1.5) & resOrdered$padj <0.01 ,]

head(resOrdered)

write.csv(resOrdered,"hisat2_samtools_htseq_DESeq2_output.csv")

##volcano.plot###

alpha <- 0.01 # Threshold on the adjusted p-value

cols <- densCols(res$log2FoldChange, -log10(res$pvalue))

plot(res$log2FoldChange, -log10(res$padj), col=cols, panel.first=grid(),

main="Volcano plot", xlab="Effect size: log2(fold-change)", ylab="-log10(adjusted p-value)",

pch=20, cex=0.6)

abline(v=0)

abline(v=c(-1,1), col="brown")

abline(h=-log10(alpha), col="brown")

gn.selected <- abs(res$log2FoldChange) > 2.5 & res$padj < alpha

text(res$log2FoldChange[gn.selected],

-log10(res$padj)[gn.selected],

lab=rownames(res)[gn.selected ], cex=0.7)

#MA图#

library("geneplotter")

plotMA(res,main="DESeq2",ylim=c(-2,2))

#heatmap#

select<-order(rowMeans(counts(dds,normalized=TRUE)),decreasing = TRUE)[1:666]

nt<-normTransform(dds)

log2.norm.counts<-assay(nt)[select,]

df<-as.data.frame(colData(dds))

library(pheatmap)

pheatmap(log2.norm.counts,cluster_rows = TRUE,show_rownames = FALSE,cluster_cols = TRUE,annotation_col = df)

上一篇下一篇

猜你喜欢

热点阅读