单细胞文章复现丨单细胞测序揭示肺腺癌中不同的肿瘤微环境模式
文章标题
Single-cell RNA sequencing reveals distinct tumor microenvironmental patterns in lung adenocarcinoma
这篇文章通过单细胞RNA测序技术,对十例人类肺腺癌和十例正常组织进行了深入研究,发现肺癌细胞的转录组存在异质性,反映了肿瘤的病理分级和基因突变途径。同时,文章还发现存在两种不同的微环境模式,即免疫激活的CP²E模式和惰性的N³MC模式。这两种模式在预后方面具有不同的影响。此外,文章还发现微环境标记基因和签名在整体肿瘤中具有预后价值。简单来说,这篇文章通过单细胞RNA测序技术,提供了基于微环境的额外预后信息,并可能有助于预测治疗反应和揭示未来治疗方法的潜在目标细胞群。
虽然原文提供了代码,但是作者之前用到的很多生信包和软件已经过时很久了,包括Seurat都已经更新到V5了,所以今天就用自己的方法来复现一下文章中的fig.1中的图。
Fig.1一、准备安装包和设置一些画图参数
library(Seurat)
library(dplyr)
library(reticulate)
library(sctransform)
library(cowplot)
library(ggplot2)
library(viridis)
library(tidyr)
library(magrittr)
library(reshape2)
library(readxl)
library(readr)
library(stringr)
# 后续绘制的 ggplot2 图形都应用 cowplot 主题
theme_set(theme_cowplot())
#color scheme
use_colors <- c(Tumor ="brown2",# 肿瘤的颜色为棕色系
Normal ="deepskyblue2",# 正常的颜色为深天蓝色系
G1 ="#46ACC8",# 细胞周期G1期的颜色为亮蓝色
G2M ="#E58601",# 细胞周期G2/M期的颜色为橙色
S ="#B40F20",# 细胞周期S期的颜色为深红色
Epithelial ="seagreen",# 上皮细胞的颜色为海洋绿
Immune ="darkgoldenrod2",# 免疫细胞的颜色为深金黄色
Stromal ="steelblue",# 间质细胞的颜色为钢蓝色
p018 ="#E2D200",# p018标签的颜色为柠檬黄
p019 ="#46ACC8",# p019标签的颜色为亮蓝色
p023 ="#E58601",# p023标签的颜色为橙色
p024 ="#B40F20",# p024标签的颜色为深红色
p027 ="#0B775E",# p027标签的颜色为深绿色
p028 ="#E1BD6D",# p028标签的颜色为浅棕色
p029 ="#35274A",# p029标签的颜色为深紫色
p030 ="#F2300F",# p030标签的颜色为鲜红色
p031 ="#7294D4",# p031标签的颜色为浅蓝色
p032 ="#5B1A18",# p032标签的颜色为深棕色
p033 ="#9C964A",# p033标签的颜色为淡黄色
p034 ="#FD6467"# p034标签的颜色为粉红色)
二、数据加载和质控(到这之前基本都是用的原文代码)
SuppFig1A,Before filtering SuppFig1Bsetwd("~/Data1/德国单细胞文章复现/01_初步处理/")
samples <- read_excel("../data/metadata/patients_metadata.xlsx", range = cell_cols("A:A")) %>% .$sample_id
for(iinseq_along(samples)){
# 读取数据
assign(paste0("scs_data", i), Read10X(data.dir = paste0("../data/cellranger/", samples[i],"/filtered_feature_bc_matrix")))
}
### create seurat objects from cellranger files
# 使用for循环遍历样本列表中的每个样本
for(iinseq_along(samples)) {
assign(paste0("seu_obj", i), CreateSeuratObject(
counts = eval(parse(text = paste0("scs_data", i))),# 将表达矩阵数据作为counts参数传入,使用eval和parse函数动态获取对应的变量
project = samples[i],# 将项目名称作为project参数传入
min.cells =3# 设置最小细胞数阈值为3,用于过滤低质量细胞
))
}
### merge data sets 合并多个Seurat对象为一个新的Seurat对象
seu_obj <- merge(seu_obj1,# 第一个原始Seurat对象
y = c(seu_obj2, seu_obj3, seu_obj4, seu_obj5, seu_obj6, seu_obj7, seu_obj8, seu_obj9, seu_obj10, seu_obj11, seu_obj12, seu_obj13, seu_obj14, seu_obj15, seu_obj16, seu_obj17, seu_obj18, seu_obj19, seu_obj20),add.cell.ids = samples, project ="lung")
# add.cell.ids参数添加样本标识,最后设置项目名称为"lung"
seu_obj <- JoinLayers(seu_obj)
### calculate mitochondrial, hemoglobin and ribosomal gene counts
# 计算并添加MT基因组比例到Seurat对象中
seu_obj <- PercentageFeatureSet(seu_obj, pattern ="^MT-", col.name ="pMT")
# 计算并添加HBA/HBB基因组比例到Seurat对象中
seu_obj <- PercentageFeatureSet(seu_obj, pattern ="^HBA|^HBB", col.name ="pHB")
# 计算并添加RPS/RPL基因组比例到Seurat对象中,核糖体蛋白基因
seu_obj <- PercentageFeatureSet(seu_obj, pattern ="^RPS|^RPL", col.name ="pRP")
#### Data Filtering ####
## Before filtering
seu_obj_unfiltered <- seu_obj
qc_std_plot(seu_obj_unfiltered)
####After filtering####
seu_obj_unfiltered <- readRDS("seurat_objects/all_unfiltered.RDS")
#过滤
seu_obj <- subset(seu_obj_unfiltered, subset = nFeature_RNA > nFeature_lower & nFeature_RNA < nFeature_upper & nCount_RNA > nCount_lower & nCount_RNA < nCount_upper & pMT < pMT_upper & pHB < pHB_upper)
三、 Data normalization/harmony去批次 /降维聚类
harmony去批次后的降维聚类图是不是和文献中的UMAP图形状大致吻合seu_obj<- NormalizeData(seu_obj, normalization.method="LogNormalize",scale.factor=1e4)
seu_obj<- FindVariableFeatures(seu_obj)
#?FindVariableFeatures
seu_obj<- ScaleData(seu_obj)
#?ScaleData
seu_obj<- RunPCA(seu_obj)
#?RunPCA(挑选显著PC,最好run一下,避免引入噪音)
ElbowPlot(seu_obj,ndims=50) ####
#Seurat 5版本有断层数据需要连接下
# seu_obj <- JoinLayers(seu_obj)
library(harmony)
###Harmony
options(repr.plot.height=2.5, repr.plot.width = 6)
seu_obj_harmony<- seu_obj %>%
RunHarmony(group.by.vars="orig.ident", plot_convergence = TRUE)
#### Dimensionality reduction降维聚类####
seu_obj_harmony<- FindNeighbors(seu_obj_harmony, reduction = "harmony",dims=1:15)
seu_obj_harmony<- FindClusters(seu_obj_harmony, resolution = 0.2)
seu_obj_harmony<- RunUMAP(seu_obj_harmony, dims = 1:15, reduction="harmony")
print(DimPlot(seu_obj_harmony, reduction = "umap",label = T) + labs(title = paste0("resolution: ", 0.2)))
四、Main cell type annotation/大类细胞注释
我用harmony去批次整合的图 文献fig.1D的原图(肉眼可以看到细胞形状和细胞数量大致相同)mainmarkers<c("PECAM1","VWF","ACTA2","JCHAIN","MS4A1","PTPRC","CD68","KIT","EPCAM","CDH1","KRT7","KRT19")
DotPlot(seu_obj_harmony, features = mainmarkers, group.by ="RNA_snn_res.0.2") +
coord_flip() +
scale_color_viridis()
ggsave2("DotPlot_mainmarkers.png", path ="output2/annotation", width =30, height =8, units ="cm")
DimPlot(seu_obj_harmony, group.by ="RNA_snn_res.0.2", label = T, label.size =5)
ggsave2("DimPlot_all_clusters.png", path ="output/annotation", width =20, height =20, units ="cm")
Idents(seu_obj_harmony) <- seu_obj_harmony$RNA_snn_res.0.2
#这里三个分成大类免疫、上皮、基质
# saveRDS(seu_obj, file = "seurat_objects/SCT_snn_res.0.085.RDS")
# seu_obj<- readRDS("seurat_objects/SCT_snn_res.0.085.RDS")
annotation_curated_main <-read_excel("../data/curated_annotation/curated_annotation_main改_harmony.xlsx")
new_ids_main <- annotation_curated_main$main_cell_type
names(new_ids_main) <- levels(seu_obj_harmony)
seu_obj_harmony <- RenameIdents(seu_obj_harmony, new_ids_main)
seu_obj_harmony@meta.data$main_cell_type<- Idents(seu_obj_harmony)
DimPlot(seu_obj_harmony, group.by ="main_cell_type")
DimPlot(seu_obj_harmony, group.by ="RNA_snn_res.0.2",label = T)
# Add metadata
metatable <- read_excel("../data/metadata/patients_metadata.xlsx")
metadata <- FetchData(seu_obj_harmony,"orig.ident")
metadata$cell_id<- rownames(metadata)
metadata$sample_id<- metadata$orig.ident
metadata <- left_join(x = metadata, y = metatable, by ="sample_id")
rownames(metadata) <- metadata$cell_id
seu_obj_harmony <- AddMetaData(seu_obj_harmony, metadata = metadata)
五、Cell cycle scoring/细胞周期评分
可以看到大部分细胞周期评分都很低可以不用去除Cell cycle相关基因### add cell cycle, cc.genes loaded with Seurat
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
score_cc <-function(seu_obj_harmony){
seu_obj_harmony <- CellCycleScoring(seu_obj_harmony, s.genes, g2m.genes)
seu_obj_harmony@meta.data$CC.Diff <- seu_obj_harmony@meta.data$S.Score - seu_obj_harmony@meta.data$G2M.Score
return(seu_obj_harmony)
}
seu_obj_harmony <- score_cc(seu_obj_harmony)
FeatureScatter(seu_obj_harmony,"G2M.Score","S.Score", group.by ="Phase") +
coord_fixed(ratio =1)
六、 plots for figure 1
Fig1B复现 Fig1C复现 Fig1D复现 Fig1D细胞比例图复现DimPlot(seu_obj_harmony,group.by="tissue_type", cols = use_colors)
ggsave2("Fig1B.png", path ="../results", width =15, height =15, units ="cm")
DimPlot(seu_obj_harmony,group.by="patient_id", cols = use_colors, pt.size =1)
ggsave2("Fig1C.png", path ="../results", width =15, height =15, units ="cm")
DimPlot(seu_obj_harmony,group.by="main_cell_type", cols = use_colors, pt.size =1)
ggsave2("Fig1D_umap.png", path ="../results", width =15, height =15, units ="cm")
cell_types <- FetchData(seu_obj_harmony, vars = c("sample_id","main_cell_type","tissue_type")) %>%
mutate(main_cell_type = factor(main_cell_type, levels = c("Stromal","Immune","Epithelial"))) %>%
mutate(sample_id = factor(sample_id, levels = rev(c("p018t","p019t","p023t","p024t","p027t","p028t","p030t","p031t","p032t","p033t","p034t","p018n","p019n","p027n","p028n","p029n","p030n","p031n","p032n","p033n","p034n"))))
ggplot(data = cell_types) +
geom_bar(mapping = aes(x = sample_id, fill = main_cell_type, ), position ="fill", width =0.75) +
scale_fill_manual(values = use_colors) +
coord_flip()
ggsave2("Fig1D_barplot.pdf", path ="../results", width =15, height =30, units ="cm")
remove(seu_obj_harmony)
至此,就把文献中fig1和figS1的图复现完成啦!完整代码请移步👸号
敬请期待此文献后续的复现内容!