RNA-seq实战
2021-01-20 本文已影响0人
晓颖_9b6f
代码如下
#1、下载电脑里没有的的软件
conda install Trim Galore cutadapt subread salmon
#2、下载数据
#只下载三个转录组数据 未经处理、地塞米松处理组、沙丁胺醇处理组。剩下的那些数据太多,后面太难处理了。
prefetch SRR1039508
prefetch SRR1039509
prefetch SRR1039510
#下载参考基因组,hg38.fa。用迅雷下载的
#下载注释,gencode.v36.annotation.gtf.gz
wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_36/gencode.v36.annotation.gtf.gz
#3、具体流程
#SRR--fastq
conda create -n rna python=2
source activate rna
fastq-dump --gzip --split-3 -O SRR1039508
fastq-dump --gzip --split-3 -O SRR1039509
fastq-dump --gzip --split-3 -O SRR1039510
#质控
fastqc -t 2 -o SRR1039508_1.fastq.gz \ SRR1039508_2.fastq.gz
fastqc -t 2 -o SRR1039509_1.fastq.gz \ SRR1039509_2.fastq.gz
fastqc -t 2 -o SRR10395010_1.fastq.gz \ SRR1039510_2.fastq.gz
trim_galore -q 20 --phred33 --stringency 3 --length 20 -o SRR1039508.fastq.gz
trim_galore -q 20 --phred33 --stringency 3 --length 20 -o SRR1039509.fastq.gz
trim_galore -q 20 --phred33 --stringency 3 --length 20 -o SRR1039510.fastq.gz
#比对
id="SRR1039508"
hisat2 -p 2 -x /home/xiaoying/hg38/genome \ -1 ${id}_1_val_1.fq.gz \ -2 ${id}_2_val_2.fq.gz \ -S ${id}.hisat.sam
id="SRR1039509"
hisat2 -p 2 -x /home/xiaoying/hg38/genome \ -1 ${id}_1_val_1.fq.gz \ -2 ${id}_2_val_2.fq.gz \ -S ${id}.hisat.sam
id="SRR1039510"
hisat2 -p 2 -x /home/xiaoying/hg38/genome \ -1 ${id}_1_val_1.fq.gz \ -2 ${id}_2_val_2.fq.gz \ -S ${id}.hisat.sam
#sam转bam
samtools view -bS -h SRR103908.sam \ -o SRR1039508.bam
samtools view -bS -h SRR103909.sam \ -o SRR1039509.bam
samtools view -bS -h SRR103910.sam \ -o SRR1039510.bam
#bam排序
samtools sort -@ 5 SRR1039508.bam \ -o SRR1039508.sort.bam
samtools sort -@ 5 SRR1039509.bam \ -o SRR1039509.sort.bam
samtools sort -@ 5 SRR1039510.bam \ -o SRR1039510.sort.bam
#计数
for fn in {508..510}
do
featureCounts -T 5 -p -t exon -g gene_id \ -a /home/xiaoying/gencode.v25.annotation.gtf.gz \ -o SRR1039$fn.counts.txt SRR1039$fn.hisat.sort.bam
done
#salmon输出结果
salmon index -t /home/xiaoying/Homo_sapiens.GRCh38.cdna.all.fa -i hg381_index
salmon quant -i hg381_index -l A -1 SRR1039508_1.fastq -2 SRR1039508_2.fastq -o SRR1039510_quant
salmon quant -i hg381_index -l A -1 SRR1039509_1.fastq -2 SRR1039509_2.fastq -o SRR1039509_quant
salmon quant -i hg381_index -l A -1 SRR1039510_1.fastq -2 SRR1039510_2.fastq -o SRR1039510_quant
#salmon结果倒入R
image.png
image.png