单细胞多样本整合分析

2022-05-23 本文已影响0人小洁忘了怎么分身

代码主要来自：https://satijalab.org/seurat/articles/integration_introduction.html

1.数据准备

不得不说，网速的限制是无处不在啊。这个数据有点大，官网给的下载方式是用代码，在大陆基本上不可能成功咯。还是把包下载到本地，用本地安装R包的方法靠谱一点

rm(list = ls())
library(Seurat)
library(SeuratData)
library(patchwork)
# install dataset
#InstallData("ifnb")
#install.packages("ifnb.SeuratData_3.1.0.tar.gz",repos = NULL)
# load dataset
ifnb = LoadData("ifnb")

2.了解和拆分数据

因为是用来做整合的例子，而内置数据是个整体的数据，所以要把它拆分掉。

# split the dataset into a list of two seurat objects (stim and CTRL)
head(ifnb@meta.data)
##                   orig.ident nCount_RNA nFeature_RNA stim seurat_annotations
## AAACATACATTTCC.1 IMMUNE_CTRL       3017          877 CTRL          CD14 Mono
## AAACATACCAGAAA.1 IMMUNE_CTRL       2481          713 CTRL          CD14 Mono
## AAACATACCTCGCT.1 IMMUNE_CTRL       3420          850 CTRL          CD14 Mono
## AAACATACCTGGTA.1 IMMUNE_CTRL       3156         1109 CTRL                pDC
## AAACATACGATGAA.1 IMMUNE_CTRL       1868          634 CTRL       CD4 Memory T
## AAACATACGGCATT.1 IMMUNE_CTRL       1581          557 CTRL          CD14 Mono
table(ifnb@meta.data$stim)
## 
## CTRL STIM 
## 6548 7451
ifnb.list <- SplitObject(ifnb, split.by = "stim")
length(ifnb.list)
## [1] 2

可以看到ctrl和stim组各自的细胞数量。

3.完成整合

两个拆分后的对象分别Normalize，找高变化基因，寻找锚点，结合在一起。

# normalize and identify variable features for each dataset independently
ifnb.list <- lapply(X = ifnb.list, FUN = function(x) {
  x <- NormalizeData(x)
  x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = ifnb.list)
immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features)
# this command creates an 'integrated' data assay
immune.combined <- IntegrateData(anchorset = immune.anchors)
# specify that we will perform downstream analysis on the corrected data note that the
# original unmodified data still resides in the 'RNA' assay
DefaultAssay(immune.combined) <- "integrated"

之后的分析默认使用整合后的数据integrated。

4.常规的降维聚类分群

# Run the standard workflow for visualization and clustering
immune.combined <- ScaleData(immune.combined, verbose = FALSE)
immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE)
immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30)
immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30)
immune.combined <- FindClusters(immune.combined, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 13999
## Number of edges: 569703
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9057
## Number of communities: 16
## Elapsed time: 1 seconds
# Visualization
p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "stim")
p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE)
p1 + p2

5.singleR注释

官网使用的是根据marker基因手动识别细胞类型，设置了标签。我这里用singleR偷个懒。。。
singleR分的类比较粗糙，右边那一大片都是单核细胞，没有那么具体。而手动的话可以继续细分。

# 注释
library(celldex)
library(SingleR)
#ref <- celldex::HumanPrimaryCellAtlasData()
ref <- get(load("single_ref/ref_Hematopoietic.RData"))
library(BiocParallel)
pred.scRNA <- SingleR(test = immune.combined@assays$integrated@data, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = immune.combined@active.ident)
pred.scRNA$pruned.labels
##  [1] "Monocytes"       "CD8+ T cells"    "CD4+ T cells"    "Monocytes"      
##  [5] "B cells"         "CD8+ T cells"    "NK cells"        "CD4+ T cells"   
##  [9] "Monocytes"       "B cells"         "CD8+ T cells"    "Dendritic cells"
## [13] "Monocytes"       "Monocytes"       "HSCs"
plotScoreHeatmap(pred.scRNA, clusters=pred.scRNA@rownames, fontsize.row = 9,show_colnames = T)

new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(immune.combined)
levels(immune.combined)
##  [1] "0"  "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14"
immune.combined <- RenameIdents(immune.combined,new.cluster.ids)
levels(immune.combined)
## [1] "Monocytes"       "CD8+ T cells"    "CD4+ T cells"    "B cells"        
## [5] "NK cells"        "Dendritic cells" "HSCs"
UMAPPlot(object = immune.combined, pt.size = 0.5, label = TRUE)