单细胞数据分析

RNA速率分析流程

2021-08-05  本文已影响0人  小贝学生信
image.png

示例数据:GSE178911→GSM5400792→ SRR14915863~SRR14915866

https://sra-download.ncbi.nlm.nih.gov/traces/sra7/SRZ/014915/SRR14915863/201008_D0_scRNA_fastq_S1_L001_R1_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra7/SRZ/014915/SRR14915863/201008_D0_scRNA_fastq_S1_L001_R2_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra30/SRZ/014915/SRR14915864/201008_D0_scRNA_fastq_S1_L002_R1_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra30/SRZ/014915/SRR14915864/201008_D0_scRNA_fastq_S1_L002_R2_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra51/SRZ/014915/SRR14915865/201008_D0_scRNA_fastq_S1_L003_R1_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra51/SRZ/014915/SRR14915865/201008_D0_scRNA_fastq_S1_L003_R2_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra67/SRZ/014915/SRR14915866/201008_D0_scRNA_fastq_S1_L004_R1_001.fastq.gz
https://sra-download.ncbi.nlm.nih.gov/traces/sra67/SRZ/014915/SRR14915866/201008_D0_scRNA_fastq_S1_L004_R2_001.fastq.gz

1、cellranger比对(linux)

bin=~/biosoft/cellranger-6.0.2/bin/cellranger
db=./refdata-gex-GRCh38-2020-A
fq_dir=./fastq/GSM5400792

$bin count --id=201008_D0_scRNA_fastq \
--localcores=10 \
--transcriptome=$db \
--fastqs=$fq_dir \
--sample=201008_D0_scRNA_fastq \
--expect-cells=3000 \
--nosecondary
#比对时间视使用线程数而定

2、velocyto生成loom文件(linux)

conda create -n velocyto 
conda activate velocyto 
conda install samtools
conda install numpy scipy cython numba matplotlib scikit-learn h5py click
pip install pysam
 # `PyPI`安装
pip install velocyto

http://velocyto.org/velocyto.py/tutorial/cli.html
You might want to mask expressed repetitive elements, since those count could constitute a confounding factor in the downstream analysis. To do so you would need to download an appropriate expressed repeat annotation (for example from UCSC genome browser and make sure to select GTF as output format).
https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=611454127_NtvlaW6xBSIRYJEBI0iRDEWisITa&clade=mammal&org=Human&db=0&hgta_group=allTracks&hgta_track=rmsk&hgta_table=rmsk&hgta_regionType=genome&position=&hgta_outputType=gff&hgta_outFileName=mm10_rmsk.gtf

cellranger_outDir=201008_D0_scRNA_fastq
cellranger_gtf=refdata-gex-GRCh38-2020-A/genes/genes.gtf
rmsk_gtf=rmsk.gtf
ls -lh $rmsk_gtf  $cellranger_outDir $cellranger_gtf

velocyto run10x -m $rmsk_gtf  $cellranger_outDir $cellranger_gtf
#比较耗时

3、Seurat标准流程分析(Rstudio)

4 、velocyto.R速率分析、可视化(linux环境的R)

library(Seurat)
library(velocyto.R)
library(tidyverse)
# sce_all = readRDS("sce.rds")
# sce_all  = SplitObject(sce_all, split.by = "orig.ident")
# sce_1 = sce_all[[1]]
sce_1 = readRDS("sce.rds")
sce_1@assays$RNA@counts[1:4,1:4]
velo_1 <- read.loom.matrices("201008_D0_scRNA_fastq.loom")
velo_1$spliced[1:4,1:4]


#以Seurat对象为标准,统一loom文件里的细胞名和数量
colnames(velo_1$spliced)<-gsub("x","-1",colnames(velo_1$spliced))
colnames(velo_1$spliced)<-gsub("201008_D0_scRNA_fastq:","1_",colnames(velo_1$spliced))
colnames(velo_1$unspliced)<-colnames(velo_1$spliced)
colnames(velo_1$ambiguous)<-colnames(velo_1$spliced)

velo_1$spliced<-velo_1$spliced[,rownames(sce_1@meta.data)]
velo_1$unspliced<-velo_1$unspliced[,rownames(sce_1@meta.data)]
velo_1$ambiguous<-velo_1$ambiguous[,rownames(sce_1@meta.data)]

#速率分析
sp <- velo_1$spliced
unsp <- velo_1$unspliced
sce_1_umap <- sce_1@reductions$umap@cell.embeddings
ident.colors <- (scales::hue_pal())(n = length(x = levels(x = sce_1)))
names(x = ident.colors) <- levels(x = sce_1)
cell.colors <- ident.colors[Idents(object = sce_1)]
names(x = cell.colors) <- colnames(x = sce_1)
fit.quantile = 0.02
cell.dist <- as.dist(1-armaCor(t(sce_1@reductions$pca@cell.embeddings)))
rvel.cd <- gene.relative.velocity.estimates(sp,unsp,deltaT=2, kCells=10, 
                                            cell.dist=cell.dist,fit.quantile=fit.quantile,n.cores=24)

#可视化
Idents(sce_1) = "fine"
gg <- UMAPPlot(sce_1)
colors <- as.list(ggplot_build(gg)$data[[1]]$colour)
names(colors) <- rownames(sce_1_umap)
show.velocity.on.embedding.cor(sce_1_umap,rvel.cd,n=30,scale='sqrt',cell.colors=ac(colors,alpha=0.5),cex=0.8,arrow.scale=2,show.grid.flow=T,min.grid.cell.mass=1.0,grid.n=50,arrow.lwd=1,do.par=F,cell.border.alpha =0.1,USE_OPENMP=1,n.cores=24,main="Cell Velocity")
#比较耗时~~

在加载Seurat对象时,可以仅提取感兴趣的亚群,进行分析~

上一篇下一篇

猜你喜欢

热点阅读