10X V(D)J实战

2020-08-28 本文已影响0人 nvzhang

文献

Generation and function of progenitor T cells from StemRegenin-1–expanded CD34+ human hematopoietic progenitor ells

背景

Broader clinical application of umbilical cord blood (UCB), as a source of hematopoietic stem/progenitor cells (HSPCs), is limited by low CD34+ and T-cell numbers, contributing to slow lymphohematopoietic recovery, infection, and relapse. Studies have evaluated the safety, feasibility, and expedited neutrophil recovery associated with the transplantation of CD34+ HSPCs from ex vivo expansion cultures using the aryl hydrocarbon receptor antagonist StemRegenin-1 (SR1).To expedite immune recovery, we hypothesized that SR1-expanded HSPCs together with proT cells could overcome the known T-cell immune deficiency that occurs post-HSCT. Single-cell RNA sequencing of peripheral CD3+ T cells from mice injected with either naive or SR1 proT cells revealed functional subsets of T cells with polyclonal T-cell receptor repertoires.

数据结构：

Superseries	Subseries
GSE135929	GSE135927	5' scRNA Seq
	GSE135928	3' VDJ Seq

其中5' scRNA Seq数据使用cellranger -count进行分析，得到barcode,matrix,features三个结果，用来下游分析；3' VDJ Seq数据使用cellranger -vdj进行分析，得到contigs.annotation.csv文件，用来下游分析。（这些结果也能在GEO上直接下载）

准备

mkdir -p bloodadv2019/{srr,cellranger,fastq}
cd bloodadv2019/srr
for i in `seq 69 84 `;
do prefetch SRR99927${i} ;done
for i in `seq 69 84 `;
do fastq-dump --gzip --split-files -A SRR9927${i}  SRR9927${i} -O ../fastq; done
#将fastq文件名修改为cellranger能识别的形式（[Sample Name]_S1_L00[Lane Number]_[Read Type]_001.fastq.gz）
ls *_1.fastq.gz | while read id ;
do mv ${id%%_*}_1.fastq.gz ${id%%_*}_S1_L001_I1_001.fastq.gz;
mv ${id%%_*}_2.fastq.gz ${id%%_*}_S1_L001_R1_001.fastq.gz;
mv ${id%%_*}_3.fastq.gz  ${id%%_*}_S1_L001_R2_001.fastq.gz;
done

cellranger -count

安装cellranger和相应参考文库：https://cloud.tencent.com/developer/article/1606141

cd ../cellranger
for i in `seq 73 76`;
do /home/ubuntu/cellranger-4.0.0/cellranger count --id=SR1_rna --fastqs=/home/ubuntu/bloodadv2019/fastq/ --sample=SRR99927${i}_S1 --transcriptome=/home/ubuntu/refdata-cellranger-GRCh38-and-mm10-3.1.0 --nosecondary;
done
for i in `seq 69 72`;
do /home/ubuntu/cellranger-4.0.0/cellranger count --id=naive_rna --fastqs=/home/ubuntu/bloodadv2019/fastq/ --sample=SRR99927${i}_S1 --transcriptome=/home/ubuntu/refdata-cellranger-GRCh38-and-mm10-3.1.0 --nosecondary;
done

cellranger -vdj

for i in `seq 81 84`;
do /home/ubuntu/cellranger-4.0.0/cellranger vdj --id=SR1_vdj --fastqs=/home/ubuntu/bloodadv2019/fastq/ --sample=Naive_TCR --reference=/home/ubuntu/refdata-cellranger-vdj-GRCh38-alts-ensembl-4.0.0/;
done
for i in `seq 77 80`;
do /home/ubuntu/cellranger-4.0.0/cellranger vdj --id=naive_vdj --fastqs=/home/ubuntu/bloodadv2019/fastq/ --sample=Naive_TCR --reference=/home/ubuntu/refdata-cellranger-vdj-GRCh38-alts-ensembl-4.0.0/;
done

结果整合

（还在施工）

Seurat

三个结果带前缀的话会报错

library(Seurat)
library(cowplot)
library(hdf5r)
#加载Cellranger output file
input_naive <- Read10X(data.dir = "/RStudio/working_path/10x vdj/GSE135929_RAW/filtered_gene_bc_matrices/naive/")
input_SR1 <- Read10X(data.dir = "/RStudio/working_path/10x vdj/GSE135929_RAW/filtered_gene_bc_matrices/SR1/")
naive <- CreateSeuratObject(counts = input_naive,project = "10X")
SR1 <- CreateSeuratObject(counts = input_SR1,project = "10X")
#设置阈值过滤细胞
naive$percent.mito <- PercentageFeatureSet(naive, pattern = "^mt-")
VlnPlot(object = naive,features = c('nFeature_RNA','nCount_RNA','percent.mito'))
naive <- subset(naive,nCount_RNA >= 92 & nCount_RNA <= 30000)
SR1$percent.mito <- PercentageFeatureSet(SR1, pattern = "^mt-")
VlnPlot(object = SR1,features = c('nFeature_RNA','nCount_RNA','percent.mito'))
SR1 <- subset(SR1,nCount_RNA >= 37 & nCount_RNA <= 15000)
#add contigs
add_contigs <- function(contigs_path, seurat_obj){
contigs <- read.csv(contigs_path)
contigs <- contigs[!duplicated(contigs$barcode), ]
rownames(contigs) <- contigs[,1]
contigs[,1] <- NULL
contigs_seurat <- AddMetaData(seurat_obj, metadata=contigs)}
s_naive <- add_contigs("./contig/GSM4038045_Naive.csv",naive)
s_SR1 <- add_contigs("./contig/GSM4038045_Naive.csv",SR1)
#进行常规workflow
workflow <- function(seurat_objective){
seu_obj <- NormalizeData(seurat_objective, normalization.method = "LogNormalize", scale.factor = 10000)
seu_obj <- FindVariableFeatures(seu_obj,selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(seu_obj)
seu_obj <- ScaleData(seu_obj, features = all.genes)
seu_obj <- RunPCA(seu_obj, features = VariableFeatures(object = seu_obj))
seu_obj <- RunTSNE(seu_obj, features = VariableFeatures(object = seu_obj))}
s_naive <- workflow(s_naive)
s_SR1 <- workflow(s_SR1)
#cluster
cluster <- function(seu_obj,n){
use.pcs = 1:10
s <- FindNeighbors(seu_obj, dims = use.pcs)
s <- FindClusters(s, resolution = c(n))
s <- RunUMAP(s, dims = use.pcs)}
c_naive <- cluster(s_naive,0.45)
c_SR1 <-cluster(s_SR1,0.8)
#plot
##changetable
current.cluster.ids <- c(0, 1, 2, 3, 4, 5, 6)
new.cluster.ids <- c("C1","C2","C3","C4","C5","C6","C7")
c_naive@active.ident <- plyr::mapvalues(x = c_naive@active.ident,
  from = current.cluster.ids,to = new.cluster.ids)
c_SR1@active.ident <- plyr::mapvalues(x = c_SR1@active.ident,
from = current.cluster.ids,to = new.cluster.ids)
plot <- function(x,title){
p1 <- DimPlot(x, reduction = "tsne", label = TRUE) + labs(title = title)
p2 <- FeaturePlot(x, reduction = "tsne", features = "CD4") +labs(title = title, subtitle = "CD4")
p3 <- FeaturePlot(x, reduction = "tsne", features = "CD8A")+labs(title = title, subtitle = "CD8A")
library(patchwork)
p1+p2+p3}
plot(c_naive,"Naive")
plot(c_SR1,"SR1")

图片.png

#findmarkers
findmarkers <- function(x,title){s_markers <- FindAllMarkers(x,min.pct = 0.25)
top5 <- s_markers %>% group_by(cluster) %>% top_n(5, avg_logFC)
DoHeatmap(x, features = top5$gene) + NoLegend()+ labs(title = title)}
findmarkers(c_naive,"Naive")
findmarkers(c_SR1,"SR1")

???如何做出像原文中指定cluster的热图

immunarch

用原始contig数据做出的图太奇怪了，所以我按chain的类型拆成了TRA和TRB.....

library(immunarch)
naive_contig <- read.csv("./contig/GSM4038045_Naive.csv")
SR1_contig <- read.csv("./contig/GSM4038046_SR1.csv")
split <- function(x,prefix){
TRA <- x[which(x$chain=="TRA"),]
TRB <- x[which(x$chain=="TRB"),]
write.csv(TRA,paste0(prefix,"_TRA.csv"))
write.csv(TRB,paste0(prefix,"_TRB.csv")) }
split(naive_contig,"naive")
split(SR1_contig,"SR1")
TCR <- repLoad("./contig/TCR/")
TRA <- repLoad("./contig/TRA/")
TRB <- repLoad("./contig/TRB/")
names(TCR$data) <- c("Naive","SR1")
names(TRA$data) <- c("Naive","SR1")
names(TRB$data) <- c("Naive","SR1")

repDiversity(.data = TCR$data, .method = "div", .q = 1) %>% vis()
p4 <- spectratype(TRA$data[[1]], .col="aa+v",.quant = "count") %>% vis()
p5 <- spectratype(TRA$data[[2]], .col="aa+v",.quant = "count") %>% vis()
p4/p5
imm_gu <- geneUsage(TRB$data[c(1, 2)], "hs.trbv", .norm = T)
vis(imm_gu)