scTCR-seq分析: MIXCR + scRepertoir
2024-05-06 本文已影响0人
重拾生活信心
rm(list=ls())
#devtools::install_github("ncborcherding/scRepertoire")
# Load the package into R
library(immunarch)
library(scRepertoire)
library(tidyverse)
library(SingleCellExperiment)
library(Seurat)
read_mixcr_n_trans<- function(file,...){
df <- read.delim(file)
# transform the cell id {be consist with RNA data}
{
well<- gsub(df$cellId,pattern = "[AGCT]*-",replacement = "")
hp<- gsub(df$cellId,pattern = "-[AGCT]*-.*",replacement = "")
rt<- str_extract(df$cellId,pattern = "-[AGCT]{10}")%>%
str_sub(.,start = 2,end = nchar(.))
}
# add a column named "tagValueCELL" {as normal mixcr output}
df$tagValueCELL<- paste(well,hp,rt,sep ="_" )
return(df)
}
# Step1: Load MIXCR output -----------------------------------------
fn1 <- "mixcr_output/xxxxxxxxxxxx.clones.tsv"
fn2 <- "mixcr_output/xxxxxxxxxxxx..clones.tsv"
fn3 <- "mixcr_output/xxxxxxxxxxxx..clones.tsv"
filelist <- c(fn1,fn2,fn3)
samples <- c("A","B","C")
contig_list<- lapply(filelist, function(x) read_mixcr_n_trans(x))
colnames(contig_list[[1]])
# convert to "scRepertoire" style
contig.list <- loadContigs(input = contig_list,
format = "MiXCR")
names(contig.list)
colnames(contig.list[[1]])
head((contig.list[[1]]))
#[1] "barcode" "chain" "reads" "v_gene" "d_gene" "j_gene" "c_gene" "cdr3_nt" "cdr3"
combined.TCR <- combineTCR(contig.list,
# samples = samples, # names of different samples # can be NULL
removeNA = FALSE,
removeMulti = FALSE,
filterMulti = FALSE)
names(combined.TCR)
colnames(combined.TCR[[1]])
head((combined.TCR[[1]]))[,1:5] # sample name pasted to cell_id
# [1] "barcode" "sample" "TCR1" "cdr3_aa1" "cdr3_nt1" "TCR2" "cdr3_aa2" "cdr3_nt2"
#[9] "CTgene" "CTnt" "CTaa" "CTstrict"
# Basic clone analysis-------------------------------------
## 独特克隆数量-----
#the total or relative numbers of unique clones.
clonalQuant(combined.TCR,
cloneCall="strict",
chain = "both",
scale = TRUE)
# 指定自定义分组进行统计
clonalQuant(combined.TCR, cloneCall = "gene", group.by = "sample", scale = TRUE)
##克隆丰度 ----
clonalAbundance(combined.TCR,
cloneCall = "gene",
scale = FALSE)
#密度图
clonalAbundance(combined.TCR, cloneCall = "gene", scale = TRUE)
##克隆长度-----
clonalLength(combined.TCR,
cloneCall="aa", #cloneCall can only be “nt” or “aa”
chain = "both") # “both” for combined chain visualization
# “TRA”, “TRB”, “TRD”, “TRG”, “IGH” or “IGL” to select single chain
clonalLength(combined.TCR,
cloneCall="aa",
chain = "TRA",
scale = TRUE)
## 克隆比较
# 冲积图
clonalCompare(combined.TCR,
top.clones = 10,
samples = samples,
# highlight.clones = c("CAYRSGARDDKIIF"),
cloneCall="aa",
graph = "alluvial")
#散点图
clonalScatter(combined.TCR,
cloneCall ="gene",
x.axis = samples[1],
y.axis = samples[2],
dot.size = "total",
graph = "proportion")
clonalScatter(combined.TCR,
cloneCall ="gene",
x.axis = samples[2],
y.axis = samples[3],
dot.size = "total",
graph = "proportion")
##克隆稳态
clonalHomeostasis(combined.TCR,
cloneCall = "gene")
## 克隆比例
clonalProportion(combined.TCR,
cloneCall = "gene")
## TCR cluster
sub_combined <- clonalCluster(combined.TCR[[1]],
chain = "TRA",
sequence = "aa",
threshold = 0.85,
group.by = NULL)
sub_combined$TRA_cluster
#Cluster denotes if the cluster was called using the normalized Levenshtein distance, which takes the edit distance calculated between 2 sequences
#and divides that by the mean of the sequence lengths.
#Unconnected sequences will have NA values.
##combine with RNA ------
# seurat object after umap :
combined <- readRDS(file = "RNA/Seurat_output/prefiltered_combined.rds")
sce <- Seurat::as.SingleCellExperiment(combined)
sce <- combineExpression(combined.TCR,
sce,
cloneCall="gene",
#group.by = "sample",
proportion = TRUE)
#Define color palette
colorblind_vector <- hcl.colors(n=7, palette = "inferno", fixup = TRUE)
colnames(colData(sce))
singleCellTK::plotUMAP(sce,colorBy = "cloneSize") +
scale_color_manual(values=rev(colorblind_vector[c(1,3,5,7)]))