scTCR-seq分析: MIXCR + scRepertoir

2024-05-06  本文已影响0人  重拾生活信心
rm(list=ls())
#devtools::install_github("ncborcherding/scRepertoire")
# Load the package into R
library(immunarch)  
library(scRepertoire)
library(tidyverse)
library(SingleCellExperiment)
library(Seurat)
read_mixcr_n_trans<- function(file,...){
  df <- read.delim(file)
  # transform the cell id {be consist with RNA data}
  {
    well<- gsub(df$cellId,pattern = "[AGCT]*-",replacement = "")
    hp<- gsub(df$cellId,pattern = "-[AGCT]*-.*",replacement = "")
    rt<- str_extract(df$cellId,pattern = "-[AGCT]{10}")%>%
      str_sub(.,start = 2,end = nchar(.))
  }
  # add a column named "tagValueCELL"  {as normal mixcr output}
  df$tagValueCELL<- paste(well,hp,rt,sep ="_" )
  return(df)
}

# Step1: Load MIXCR output -----------------------------------------

fn1 <- "mixcr_output/xxxxxxxxxxxx.clones.tsv"
fn2 <- "mixcr_output/xxxxxxxxxxxx..clones.tsv"
fn3 <- "mixcr_output/xxxxxxxxxxxx..clones.tsv"

filelist <- c(fn1,fn2,fn3)
samples <- c("A","B","C")

contig_list<- lapply(filelist, function(x) read_mixcr_n_trans(x))
  colnames(contig_list[[1]])
  
# convert to "scRepertoire" style
contig.list <- loadContigs(input = contig_list, 
                           format = "MiXCR")
  names(contig.list)
  colnames(contig.list[[1]])
  head((contig.list[[1]]))
#[1] "barcode" "chain"   "reads"   "v_gene"  "d_gene"  "j_gene"  "c_gene"  "cdr3_nt" "cdr3"
  
combined.TCR <- combineTCR(contig.list, 
                         #  samples = samples, # names of different samples # can be NULL
                           removeNA = FALSE, 
                           removeMulti = FALSE, 
                           filterMulti = FALSE)
names(combined.TCR)
colnames(combined.TCR[[1]])
head((combined.TCR[[1]]))[,1:5]   # sample name pasted to cell_id

# [1] "barcode"  "sample"   "TCR1"     "cdr3_aa1" "cdr3_nt1" "TCR2"     "cdr3_aa2" "cdr3_nt2"
#[9] "CTgene"   "CTnt"     "CTaa"     "CTstrict"

# Basic clone analysis-------------------------------------

## 独特克隆数量----- 
#the total or relative numbers of unique clones.
clonalQuant(combined.TCR, 
            cloneCall="strict", 
            chain = "both", 
            scale = TRUE)
  # 指定自定义分组进行统计
clonalQuant(combined.TCR, cloneCall = "gene", group.by = "sample", scale = TRUE)

##克隆丰度 ----
clonalAbundance(combined.TCR, 
                cloneCall = "gene", 
                scale = FALSE)
#密度图
clonalAbundance(combined.TCR, cloneCall = "gene", scale = TRUE)

##克隆长度-----

clonalLength(combined.TCR, 
             cloneCall="aa", #cloneCall can only be “nt” or “aa”
             chain = "both")   # “both” for combined chain visualization
                              # “TRA”, “TRB”, “TRD”, “TRG”, “IGH” or “IGL” to select single chain

clonalLength(combined.TCR, 
             cloneCall="aa", 
             chain = "TRA", 
             scale = TRUE) 

## 克隆比较
# 冲积图
clonalCompare(combined.TCR, 
              top.clones = 10, 
              samples = samples, 
              # highlight.clones = c("CAYRSGARDDKIIF"),
              cloneCall="aa", 
              graph = "alluvial")
#散点图
clonalScatter(combined.TCR, 
              cloneCall ="gene", 
              x.axis = samples[1], 
              y.axis = samples[2],
              dot.size = "total",
              graph = "proportion")
clonalScatter(combined.TCR, 
              cloneCall ="gene", 
              x.axis = samples[2], 
              y.axis = samples[3],
              dot.size = "total",
              graph = "proportion")

##克隆稳态
clonalHomeostasis(combined.TCR, 
                  cloneCall = "gene")
## 克隆比例
clonalProportion(combined.TCR, 
                 cloneCall = "gene") 

## TCR cluster
sub_combined <- clonalCluster(combined.TCR[[1]], 
                              chain = "TRA", 
                              sequence = "aa", 
                              threshold = 0.85, 
                              group.by = NULL)
sub_combined$TRA_cluster
#Cluster denotes if the cluster was called using the normalized Levenshtein distance, which takes the edit distance calculated between 2 sequences 
#and divides that by the mean of the sequence lengths.
#Unconnected sequences will have NA values.


##combine with RNA ------
# seurat object after umap :
combined <- readRDS(file = "RNA/Seurat_output/prefiltered_combined.rds")

sce <- Seurat::as.SingleCellExperiment(combined)

sce <- combineExpression(combined.TCR, 
                         sce, 
                         cloneCall="gene", 
                         #group.by = "sample", 
                         proportion = TRUE)

#Define color palette 
colorblind_vector <- hcl.colors(n=7, palette = "inferno", fixup = TRUE)
colnames(colData(sce))
singleCellTK::plotUMAP(sce,colorBy = "cloneSize") +
  scale_color_manual(values=rev(colorblind_vector[c(1,3,5,7)]))

上一篇下一篇

猜你喜欢

热点阅读