复现3:AD外周血的单细胞转录组与免疫组库分析
2021-09-08 本文已影响0人
小贝学生信
Single-Cell RNA Sequencing of Peripheral Blood Reveals Immune Cell Signatures in Alzheimer's Disease
PMID: 34447367 | IF=7.561 | Front Immunol. 2021 Aug 9
1、关于文章
-
文献思路:作者对来自AD病人与Normal对照的外周血取样,同时进行单细胞转录组测序与单细胞免疫组库测序,以研究AD病人的外周血免疫微环境的“潜在变化”,具体分析思路如下图所示。
-
文章相关图表
单细胞类型注释结果
基于细胞类型水平,AD与NC差异基因的富集分析
免疫组库分析(TCR)
免疫组库以及数据分析的相关教程参考如下
(1) 淋巴细胞、抗原受体以及免疫组库测序的意义(录屏版)_哔哩哔哩_bilibili
(2)上游比对:【cellranger】https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/using/vdj
https://support.10xgenomics.com/single-cell-vdj/software/downloads/latest
(3)下游分析:【scRepertoire】免疫组库数据分析1-scRepertoire - 简书 (jianshu.com)
- 测序数据下载
(1)单细胞转录数据:https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE181279
(2)单细胞免疫组库测序数据(原始fastq数据)
2、复现
努力复现的内容主要包括:scRNA-seq的细胞类型注释、差异分析、富集分析;T细胞免疫组库的相关分析。分析工具主要为Serve Rstudio。
2.1 scRNA-seq
(1)数据预处理
setwd("~/imm/")
library(Seurat)
fs=list.files('./GSE181279_RAW/','^GSM')
fs
library(tidyverse)
samples=str_split(fs,'_',simplify = T)[,1]
samples
lapply(unique(samples),function(x){
y=fs[grepl(x,fs)]
folder=paste0("GSE181279_RAW/", str_split(y[1],'_',simplify = T)[,2])
dir.create(folder,recursive = T)
#为每个样本创建子文件夹
file.rename(paste0("GSE181279_RAW/",y[1]),file.path(folder,"barcodes.tsv.gz"))
#重命名文件,并移动到相应的子文件夹里
file.rename(paste0("GSE181279_RAW/",y[2]),file.path(folder,"features.tsv.gz"))
file.rename(paste0("GSE181279_RAW/",y[3]),file.path(folder,"matrix.mtx.gz"))
})
library(Seurat)
samples=list.files("GSE181279_RAW/")
samples
dir <- file.path('./GSE181279_RAW',samples)
names(dir) <- samples
#合并方法1
counts <- Read10X(data.dir = dir)
scRNA = CreateSeuratObject(counts)
dim(scRNA) #查看基因数和细胞总数
table(scRNA@meta.data$orig.ident) #查看每个样本的细胞数
head(scRNA@meta.data)
scRNA$group = substr(scRNA$orig.ident, 1, 2)
table(scRNA$group)
#检查数据与文章的细胞数相符,所以应该是已经质控后的
feats <- c("nFeature_RNA")
p1=VlnPlot(scRNA, features = feats, pt.size = 0) +
NoLegend()
feats <- c("nCount_RNA")
p2=VlnPlot(scRNA, features = feats, pt.size = 0) +
NoLegend()
mito_genes=rownames(scRNA)[grep("^MT-", rownames(scRNA))]
str(mito_genes)
scRNA=PercentageFeatureSet(scRNA, "^MT-", col.name = "percent_mito")
fivenum(scRNA@meta.data$percent_mito)
feats <- c("percent_mito")
p3=VlnPlot(scRNA, features = feats, pt.size = 0) +
NoLegend()
p1 | p2 | p3
(2)标准化、高变基因、归一化、降维
#标准化-归一化-高变基因
scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize", scale.factor = 10000)
scRNA <- FindVariableFeatures(scRNA, selection.method = "vst", nfeatures = 2000)
# this sclae step may take a long time if set "vars.to.regress"
scRNA <- ScaleData(scRNA, features = VariableFeatures(scRNA),
vars.to.regress = c("nFeature_RNA","percent_mito"))
# 可视化高变基因
top10 <- head(VariableFeatures(scRNA), 10)
p_hvg <- VariableFeaturePlot(scRNA)
library(ggplot2)
LabelPoints(plot = p_hvg, points = top10, repel = TRUE, size=2.5) +
theme(legend.position = c(0.1,0.8))
scRNA <- RunPCA(scRNA, features = VariableFeatures(scRNA))
ElbowPlot(scRNA, ndims=30, reduction="pca")
pc.num=1:20
# tSNE takes longer time than umap
scRNA = RunTSNE(scRNA, dims = pc.num)
scRNA = RunUMAP(scRNA, dims = pc.num)
DimPlot(scRNA, reduction = "tsne", group.by = "group") +
DimPlot(scRNA, reduction = "umap", group.by = "group")
(3)分群、细胞类型注释
scRNA <- FindNeighbors(scRNA, dims = pc.num)
scRNA <- FindClusters(scRNA, resolution = c(0.01,0.05,0.1,0.2,0.3,0.4,0.5))
library(clustree)
library(patchwork)
p_clu = clustree(scRNA@meta.data, prefix = "RNA_snn_res.")
Idents(scRNA) = scRNA$RNA_snn_res.0.3
table(scRNA@active.ident)
p_tsne = DimPlot(scRNA, reduction = "tsne", label = T)
p_clu | p_tsne
因为是外周血样本,所以主要是免疫相关细胞,相关marker基因也比较常见~
- 先注意查看下每种marker基因在所有cluster的表达情况
# T cell 0 1 2 3 4 6 12 13
cg=c("CD3D","CD3E","CD3G")
DotPlot(scRNA, assay = "RNA",
features = cg) + coord_flip() + p_tsne
# naive T : 10, 12,14,15
# CD4+ T : CD4 --- 0,1,2
# CD8+ T : CD8A, CD8B --- 6, 13
# NKT : 3,4
cg=c("CD3D","CD3E","CD3G",
"CD4",
"CD8A","CD8B",
"GZMB", "PRF1",
"FGFBP2", "CX3CR1",
"LEF1","SELL")
DotPlot(scRNA, assay = "RNA",
features = cg) + coord_flip() + p_tsne
# B cell : 5,9,11
cg=c("CD19","CD79A","CD79B")
DotPlot(scRNA, assay = "RNA",
features = cg) + coord_flip() + p_tsne
# natural killer (NK) cells --- 7,8
cg=c("NKG7","GZMB","GNLY","NCR1")
DotPlot(scRNA, assay = "RNA",
features = cg) + coord_flip() + p_tsne
# monocyte–macrophage --- 16,17,19
cg=c("CD14","CD68")
DotPlot(scRNA, assay = "RNA",
features = cg) + coord_flip() + p_tsne
# mixed with hemoglobin and platelets --- 18
cg=c("PF4")
DotPlot(scRNA, assay = "RNA",
features = cg) + coord_flip() + p_tsne
- 然后统一观察
scRNA@active.ident = factor(scRNA@active.ident, levels = c(15,10,12,14,
0,1,2,13,6,3,4,
8,7,5,9,11,
16,17,19,18))
cgs = list(
Tcell = c("CD3D","CD3E","CD3G"),
naiveT = c("LEF1","SELL"),
`CD4+T` = c("CD4"),
`CD8+T` = c("CD8A", "CD8B"),
NK = c("GZMB","NKG7","GNLY","NCR1"),
Bcell = c("CD19","CD79A","CD79B"),
`Mono/Macr` = c("CD14","CD68"),
Mixed = c("PF4"))
p_tmp=DotPlot(scRNA, features = cgs, assay = "RNA") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
p_tmp
- 最后进行注释
scRNA$celltype = case_when(
scRNA@active.ident %in% c(10,12,14,15) ~ "naive T",
scRNA@active.ident %in% c(0,1,2) ~ "CD4+ T",
scRNA@active.ident %in% c(6,13) ~ "CD8+ T",
scRNA@active.ident %in% c(3,4) ~ "NKT",
scRNA@active.ident %in% c(7,8) ~ "NK",
scRNA@active.ident %in% c(5,9,11) ~ "B",
scRNA@active.ident %in% c(16,17,19) ~ "Mono/Macr",
scRNA@active.ident %in% c(18) ~ "Mixed"
)
table(scRNA$celltype)
Idents(scRNA) = scRNA$celltype
table(scRNA@active.ident)
#调整因子顺序,美观的dotplot
scRNA@active.ident = factor(scRNA@active.ident,
levels = c("naive T","CD4+ T","CD8+ T","NKT",
"NK","B","Mono/Macr","Mixed"))
p_anno=DimPlot(scRNA, reduction = "tsne",
cols = c("#e41a1c","#377eb8","#4daf4a","#984ea3",
"#ff7f00","#ffff33","#a65628","#f781bf"))
p_tmp=DotPlot(scRNA, features = cgs, assay = "RNA") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
p_group=DimPlot(scRNA, reduction = "tsne", group.by = "group",
cols = c("#d7191c","#2b83ba"))
p_tmp + p_anno
image.png
- 相关marker基因的单独展示(文章的图)
FeaturePlot(scRNA, features = "CD3D", reduction = "tsne",
cols = c("#d9d9d9", "#ef3b2c")) | p_anno
FeaturePlot(scRNA, features = "CD4", reduction = "tsne",
cols = c("#d9d9d9", "#ef3b2c")) | p_anno
FeaturePlot(scRNA, features = "CD8A", reduction = "tsne",
cols = c("#d9d9d9", "#ef3b2c")) | p_anno
VlnPlot(scRNA, features = c("CD79A","CD3D","CD4","GZMB"), group.by = "RNA_snn_res.0.3",
pt.size = 0, ncol = 1) + NoLegend()
(4)细胞组成占比
pie_data = scRNA@meta.data[,c("celltype","group")]
library(ggplot2)
#remotes::install_github("Rkabacoff/ggpie")
library(ggpie)
pie_AD = ggpie(subset(pie_data, group == "AD"), celltype, offset=0.7, title="AD")
pie_NC = ggpie(subset(pie_data, group == "NC"), celltype, offset=0.7, title="NC")
pie_AD | pie_NC
(5)FindAllMarkers细胞类型marker gene热图
library(future)
plan()
plan("multiprocess", workers = 4)
plan()
table(scRNA@active.ident)
scRNA = scRNA[,!scRNA@active.ident %in% "Mixed"]
table(scRNA@active.ident)
scRNA_sample = subset(scRNA, downsample = 2000)
table(scRNA_sample@active.ident)
diff.wilcox = FindAllMarkers(scRNA_sample)
head(diff.wilcox)
library(tidyverse)
all.markers = diff.wilcox %>% select(gene, everything()) %>%
subset(p_val<0.05 & abs(diff.wilcox$avg_log2FC) > 0.5)
top20 = all.markers %>% group_by(cluster) %>% top_n(n = 20, wt = avg_log2FC)
scRNA_sample = ScaleData(scRNA_sample, features = top20$gene,
vars.to.regress = c("nFeature_RNA","percent_mito"))
DoHeatmap(scRNA_sample, features = top20$gene) +
scale_fill_gradientn(colors = c("#2171b5", "#f7fbff", "#ef3b2c"))
(6)细胞类型水平的AD与NC差异分析
- 仅分析5种免疫细胞类型
scRNA_sub = scRNA[,scRNA@active.ident %in% c("B","CD4+ T","CD8+ T","NK","Mono/Macr")]
table(scRNA_sub$main, scRNA_sub$group)
#差异分析
scRNA_sub$compare = paste(scRNA_sub$main, scRNA_sub$group, sep = "_")
table(scRNA_sub$compare)
ct = levels(scRNA_sub@active.ident)
all_markers = lapply(ct, function(x){
# x = ct[1]
print(x)
markers <- FindMarkers(scRNA_sub, group.by = "compare",
logfc.threshold = 0.1,
ident.1 = paste(x,"AD",sep = "_"),
ident.2 = paste(x,"NC",sep = "_"))
#markers_sig <- subset(markers, p_val_adj < 0.1)
return(markers)
})
length(all_markers)
names(all_markers) = ct
lapply(all_markers,nrow)
all_markers_sig = lapply(all_markers, function(x){
markers_sig <- subset(x, p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)
#markers_sig <- subset(x, p_val < 0.0001)
})
sapply(all_markers_sig,nrow)
# CD4+ T NK CD8+ T B Mono/Macr
# 141 214 186 154 179
degs = lapply(all_markers_sig, rownames)
#install.packages("UpSetR")
library(UpSetR)
#upset(fromList(degs))
combns = lapply(1:10, function(i){
c(combn(names(degs),2)[,i])
})
mt = fromList(degs)
#list name不能含有特殊字符
colnames(mt) = gsub(" ","",colnames(mt))
colnames(mt) = gsub("[+]","",colnames(mt))
colnames(mt) = gsub("[/]","",colnames(mt))
combns = lapply(1:10, function(i){
c(combn(colnames(mt),2)[,i])
})
#展示出仅两两交集的关系
UpSetR::upset(mt, intersections = combns)
(7)富集分析
library(clusterProfiler)
library(org.Hs.eg.db)
msigdb_reac = clusterProfiler::read.gmt("./geneset/c2.cp.reactome.v7.4.symbols.gmt")
msigdb_kegg = clusterProfiler::read.gmt("./geneset/c2.cp.kegg.v7.4.symbols.gmt")
msigdb_bp = clusterProfiler::read.gmt("./geneset/c5.go.bp.v7.4.symbols.gmt")
msigdb = rbind(msigdb_reac, msigdb_kegg, msigdb_bp)
res_CD4 = enricher(degs$`CD4+ T`, TERM2GENE = msigdb,
pvalueCutoff = 1, qvalueCutoff = 1)
res_CD8 = enricher(degs$`CD8+ T`, TERM2GENE = msigdb,
pvalueCutoff = 1, qvalueCutoff = 1)
res_NK = enricher(degs$NK, TERM2GENE = msigdb,
pvalueCutoff = 1, qvalueCutoff = 1)
res_B = enricher(degs$B, TERM2GENE = msigdb,
pvalueCutoff = 1, qvalueCutoff = 1)
res_Mo = enricher(degs$`Mono/Macr`, TERM2GENE = msigdb,
pvalueCutoff = 1, qvalueCutoff = 1)
#参考文章,展示特定20条通路在5种免疫细胞DEG的富集程度
library(clusterProfiler)
library(org.Hs.eg.db)
msigdb_reac = clusterProfiler::read.gmt("./geneset/c2.cp.reactome.v7.4.symbols.gmt")
msigdb_kegg = clusterProfiler::read.gmt("./geneset/c2.cp.kegg.v7.4.symbols.gmt")
msigdb_bp = clusterProfiler::read.gmt("./geneset/c5.go.bp.v7.4.symbols.gmt")
msigdb = rbind(msigdb_reac, msigdb_kegg, msigdb_bp)
res_CD4 = enricher(degs$`CD4+ T`, TERM2GENE = msigdb,
pvalueCutoff = 1, qvalueCutoff = 1)
res_CD8 = enricher(degs$`CD8+ T`, TERM2GENE = msigdb,
pvalueCutoff = 1, qvalueCutoff = 1)
res_NK = enricher(degs$NK, TERM2GENE = msigdb,
pvalueCutoff = 1, qvalueCutoff = 1)
res_B = enricher(degs$B, TERM2GENE = msigdb,
pvalueCutoff = 1, qvalueCutoff = 1)
res_Mo = enricher(degs$`Mono/Macr`, TERM2GENE = msigdb,
pvalueCutoff = 1, qvalueCutoff = 1)
#Top20 genesets
pathways = c("GOBP_POSITIVE_REGULATION_OF_CELL_DEATH",
"GOBP_PHAGOCYTOSIS",
"GOBP_REGULATION_OF_TUMOR_NECROSIS_FACTOR_MEDIATED_SIGNALING_PATHWAY",
"GOBP_CELLULAR_DEFENSE_RESPONSE",
"GOBP_POSITIVE_REGULATION_OF_DEFENSE_RESPONSE",
"GOBP_RESPONSE_TO_BACTERIUM",
"GOBP_OSTEOCLAST_DIFFERENTIATION",
"GOBP_INTERFERON_GAMMA_PRODUCTION",
"REACTOME_ADAPTIVE_IMMUNE_SYSTEM",
"GOBP_LYMPHOCYTE_ACTIVATION",
"GOBP_IMMUNE_RESPONSE_REGULATING_SIGNALING_PATHWAY",
"GOBP_LEUKOCYTE_MIGRATION",
"GOBP_MYELOID_LEUKOCYTE_ACTIVATION",
"REACTOME_HEMOSTASIS",
"KEGG_NATURAL_KILLER_CELL_MEDIATED_CYTOTOXICITY",
"KEGG_HEMATOPOIETIC_CELL_LINEAGE",
"GOBP_CYTOKINE_MEDIATED_SIGNALING_PATHWAY",
"GOBP_ADAPTIVE_IMMUNE_RESPONSE",
"GOBP_B_CELL_ACTIVATION")
test = res_CD4@result
colnames(test)
enrich_res2 = list(
CD4 = res_CD4,
CD8 = res_CD8,
NK = res_NK,
Mo = res_Mo,
B = res_B)
res_hp = lapply(enrich_res2, function(x){
-log10(x@result[pathways,"pvalue"])
})
res_hp = do.call(cbind, res_hp)
rownames(res_hp) = pathways
res_hp[is.na(res_hp)] = -5
pheatmap::pheatmap(res_hp, cluster_rows = F, cluster_cols = F,
color = colorRampPalette(colors = c("grey","white","red"))(100))
2.2 免疫组库分析(TCR为例)
(1)上游比对
- 以其中一个数据为例
#GSM5494112 AD2_TCR
#SRR15319897
#SRR15319898
#SRR15319899
#SRR15319900
cat fq.txt | while read id
do
wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/${id:0:6}/0${id:0-2}/${id}/${id}_1.fastq.gz
wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/${id:0:6}/0${id:0-2}/${id}/${id}_2.fastq.gz
done
mv SRR15319897_1.fastq.gz AD2_TCR_S2_L001_R1_001.fastq.gz
mv SRR15319897_2.fastq.gz AD2_TCR_S2_L001_R2_001.fastq.gz
mv SRR15319898_1.fastq.gz AD2_TCR_S2_L002_R1_001.fastq.gz
mv SRR15319898_2.fastq.gz AD2_TCR_S2_L002_R2_001.fastq.gz
mv SRR15319899_1.fastq.gz AD2_TCR_S2_L003_R1_001.fastq.gz
mv SRR15319899_2.fastq.gz AD2_TCR_S2_L003_R2_001.fastq.gz
mv SRR15319900_1.fastq.gz AD2_TCR_S2_L004_R1_001.fastq.gz
mv SRR15319900_2.fastq.gz AD2_TCR_S2_L004_R2_001.fastq.gz
bin=~/biosoft/cellranger-6.0.2/bin/cellranger
db=~/imm/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0
fq_dir=./fastq/
$bin vdj --id=AD2_TCR \
--reference=$db \
--fastqs=$fq_dir \
--sample=AD2_TCR \
--localcores=8 \
--localmem=64
(2)下游分析
【2.1】 整合所有TCR数据,以及分组信息
# library(devtools)
# install_github("ncborcherding/scRepertoire")
library(Seurat)
library(scRepertoire)
library(tidyverse)
library(patchwork)
fls = list.files("VDJ/", pattern = "*TCR_filtered_contig_annotations.csv", full.names = T)
TCR = lapply(fls, function(x){
tcr = read.csv(x)
return(tcr)
})
combined_TCR <- combineTCR(TCR,
samples = c("AD1", "AD2", "AD3", "NC1", "NC2"),
cells = "T-AB")
combined_TCR <- addVariable(combined_TCR, name = "group",
variables = c("AD", "AD", "AD", "NC", "NC"))
names(combined_TCR) = c("AD1", "AD2", "AD3", "NC1", "NC2")
str(combined_TCR)
【2.2】 图A:每个样本的Top10 克隆型; 图B:样本克隆型类型的分布情况(按AD、NC分组)
#P_A: 以氨基酸序列作为克隆型依据
Top10_clone = lapply(combined_TCR, function(x){
head(sort(table(x$CTaa), decreasing = T),10) / nrow(x)
})
Top10_clone_stat = do.call(cbind, Top10_clone)
rownames(Top10_clone_stat) = 1:10
Top10_clone_stat=reshape2::melt(Top10_clone_stat)
colnames(Top10_clone_stat) = c("ID","sample","Freq")
Top10_clone_stat$group = substr(Top10_clone_stat$sample,1,2)
head(Top10_clone_stat)
Top10_clone_stat$sample = factor(Top10_clone_stat$sample,
levels = c("NC1","NC2","AD1","AD2","AD3"))
p_A = ggplot(Top10_clone_stat, aes(x=sample, y=Freq, color = group)) +
geom_boxplot() +
ggtitle(label = "The relative frequency of\n the 10 most abundant clonesis") +
theme_bw() +
theme(
# Hide panel borders and remove grid lines
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Change axis line
axis.line = element_line(colour = "black", size = 0.5),
#no legend
legend.position = "none",
# center legend
plot.title = element_text(hjust = 0.5)
)
#P_B: 样本克隆型的分布情况
clone_NC1 = unique(combined_TCR$NC1$CTaa)
clone_NC2 = unique(combined_TCR$NC2$CTaa)
# intersect(clone_NC1, clone_NC2)
veen_NC = list(NC1 = clone_NC1,
NC2 = clone_NC2)
library(VennDiagram)
library(cowplot)
library(ggplotify)
library(gridGraphics)
# plt=venn.diagram(veen_NC, filename=NULL,fill = c("#beaed4", "#fdc086"),
# cat.pos = c(0, 0))
# p_tmp <- grobTree(plt)
# as.ggplot(plot_grid(p_tmp))
v = draw.pairwise.venn(
area1 = 61,
area2 = 59,
cross.area = 20,
category = c("NC1", "NC2"),
fill = c("#beaed4", "#fdc086"),
cat.pos = c(-180,-180),
lty = 1, cat.cex = 2)
#lapply(v, function(i) i$label)
v[[5]]$label = 3573
v[[6]]$label = 3435
v[[7]]$label = 137
p_tmp <- grobTree(v)
p_B1=as.ggplot(plot_grid(p_tmp))
clone_AD1 = unique(combined_TCR$AD1$CTaa)
clone_AD2 = unique(combined_TCR$AD2$CTaa)
clone_AD3 = unique(combined_TCR$AD3$CTaa)
# veen_NC = list(AD1 = clone_AD1,
# AD3 = clone_AD3,
# AD2 = clone_AD2)
# plt=venn.diagram(veen_NC, filename=NULL)
# p_tmp <- grobTree(plt)
# as.ggplot(plot_grid(p_tmp))
v = draw.triple.venn(
area1 = 60,
area2 = 60,
area3 = 60,
n12 = 21,
n23 = 22,
n13 = 19,
n123 = 10,
category = c("AD2", "AD1", "AD3"),
fill = c("#8dd3c7", "#b3cde3", "#ccebc5"),
lty = 1,
cat.cex = 2,
cat.just=list(c(0.5,0) , c(0.5,0) , c(0.5,0.7)),
cat.pos = c(150,-150,0),
rotation.degree =180)
#lapply(v, function(i) i$label)
v[[7]]$label = 4380
v[[8]]$label = 0
v[[9]]$label = 3704
v[[10]]$label = 1
v[[11]]$label = 0
v[[12]]$label = 0
v[[13]]$label = 4168
p_tmp <- grobTree(v)
p_B2=as.ggplot(plot_grid(p_tmp))
p_B = p_B1 | p_B2
#合并图片
(p_A | p_B) + plot_layout(widths = c(1,2))
【2.3】 图C:AD、NC组的克隆型多样性评价; 图D:每个样本的TCR受体氨基酸序列长度分布特征
#P_C
p=clonalDiversity(combined_TCR, cloneCall = "aa", group = "group")
diver_stat = p$data
diver_stat_inv = subset(diver_stat, variable == "Inv.Simpson")
p_C1=ggplot(diver_stat_inv, aes(x=group, y=value )) +
geom_bar(stat = "identity", fill = "#5fba7e", width=0.4) +
ggtitle("Invsimpson") +
scale_y_continuous(expand=c(0,0)) +
theme_bw() +
theme(
# Hide panel borders and remove grid lines
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Change axis line
axis.line = element_line(colour = "black", size = 0.5),
# center legend
plot.title = element_text(hjust = 0.5)
)
diver_stat_shan = subset(diver_stat, variable == "Shannon")
p_C2=ggplot(diver_stat_shan, aes(x=group, y=value )) +
geom_bar(stat = "identity", fill = "#fed976", width=0.4) +
coord_cartesian(ylim = c(7.8, 8.5)) +
ggtitle("Shannon") +
scale_y_continuous(expand=c(0,0)) +
theme_bw() +
theme(
# Hide panel borders and remove grid lines
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Change axis line
axis.line = element_line(colour = "black", size = 0.5),
# center legend
plot.title = element_text(hjust = 0.5))
p_C = p_C1 | p_C2
p_D1 = lengthContig(combined_TCR, cloneCall="aa", chain = "TRA") +
scale_fill_manual(values=c("#fec44f", "#fe9929", "#ec7014", "#74a9cf", "#3690c0")) +
ggtitle(label = "TRA")
p_D2 = lengthContig(combined_TCR, cloneCall="aa", chain = "TRB") +
scale_fill_manual(values=c("#fec44f", "#fe9929", "#ec7014", "#74a9cf", "#3690c0")) +
ggtitle(label = "TRB")
p_D = p_D1 / p_D2 + plot_layout(guides = "collect")
(p_C | p_D)
【2.4】 图E:TCR受体的V基因、J基因家族丰度分布
## TRB V/J gene family
### V
gene_family = lapply(combined_TCR, function(x){
x = combined_TCR[[1]]
return(x$TCR2)
})
gene_family_NC = c(gene_family[[4]], gene_family[[5]])
gene_family_NC_V = stringr::str_split(gene_family_NC, "[.]",simplify = T)[,1]
table(gene_family_NC_V)
sle_J = c("TRBV5-1","TRBV28","TRBV7-9","TRBV19","TRBV3-1","TRBV29-1",
"TRBV5-4","TRBV7-8","TRBV20-1","TRBV16","TRBV9","TRBV7-3")
gene_family_NC_V[!(gene_family_NC_V %in% sle_J)] = "other"
table(gene_family_NC_V)
gene_family_AD = c(gene_family[[1]], gene_family[[2]], gene_family[[3]])
gene_family_AD_V = stringr::str_split(gene_family_AD, "[.]",simplify = T)[,1]
table(gene_family_AD_V)
gene_family_AD_V[!(gene_family_AD_V %in% sle_J)] = "other"
table(gene_family_AD_V)
tb1 = as.data.frame(table(gene_family_NC_V))
colnames(tb1) = c("TRBV","Frequency")
tb1$group = "NC"
tb2 = as.data.frame(table(gene_family_AD_V))
colnames(tb2) = c("TRBV","Frequency")
tb2$group = "AD"
tb=rbind(tb1, tb2)
tb$TRBV = factor(tb$TRBV, levels = rev(c("other","TRBV5-1","TRBV28","TRBV7-9","TRBV19","TRBV3-1","TRBV29-1",
"TRBV5-4","TRBV7-8","TRBV20-1","TRBV16","TRBV9","TRBV7-3")))
p_E1=ggplot(tb, aes(x=group, y=Frequency, fill=TRBV)) +
geom_bar(stat = "identity", position="fill", width = 0.5) +
scale_fill_manual(values=rev(c("#bdbdbd","#9e0142","#d53e4f","#f46d43","#fdae61","#fee08b",
"#ffffbf","#e6f598","#abdda4","#66c2a5","#3288bd","#5e4fa2","#003c30"))) +
ggtitle(label = "TRBV family") +
scale_y_continuous(expand=c(0,0)) +
theme_bw() +
theme(
# Hide panel borders and remove grid lines
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Change axis line
axis.line = element_line(colour = "black", size = 0.5),
# center legend
plot.title = element_text(hjust = 0.5))
### J
gene_family = lapply(combined_TCR, function(x){
x = combined_TCR[[1]]
return(x$TCR2)
})
gene_family_NC = c(gene_family[[4]], gene_family[[5]])
gene_family_NC_J = stringr::str_split(gene_family_NC, "[.]",simplify = T)[,3]
table(gene_family_NC_J)
gene_family_NC_J[gene_family_NC_J == ""] = "other"
table(gene_family_NC_J)
gene_family_AD = c(gene_family[[1]], gene_family[[2]], gene_family[[3]])
gene_family_AD_J = stringr::str_split(gene_family_AD, "[.]",simplify = T)[,3]
table(gene_family_AD_J)
gene_family_AD_J[gene_family_AD_J == ""] = "other"
table(gene_family_AD_J)
tb1 = as.data.frame(table(gene_family_NC_J))
colnames(tb1) = c("TRBJ","Frequency")
tb1$group = "NC"
tb2 = as.data.frame(table(gene_family_AD_J))
colnames(tb2) = c("TRBJ","Frequency")
tb2$group = "AD"
tb=rbind(tb1, tb2)
p_E2=ggplot(tb, aes(x=group, y=Frequency, fill=TRBJ)) +
geom_bar(stat = "identity", position="fill", width = 0.5) +
scale_fill_manual(values=c("#bdbdbd","#9e0142","#d53e4f","#f46d43","#fdae61","#fee08b","#bf812d",
"#ffffbf","#e6f598","#abdda4","#66c2a5","#3288bd","#5e4fa2","#003c30")) +
ggtitle(label = "TRBJ family") +
scale_y_continuous(expand=c(0,0)) +
theme_bw() +
theme(
# Hide panel borders and remove grid lines
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
# Change axis line
axis.line = element_line(colour = "black", size = 0.5),
# center legend
plot.title = element_text(hjust = 0.5))
p_E = p_E1 | p_E2