单细胞多样本整合-GSE162631

2022-07-16 本文已影响0人小洁忘了怎么分身

GSE162631，4个胶质瘤样本，总计5万多个细胞。
Seurat+CCA整合+singleR跑了跑。

library(dplyr)
library(Seurat)
library(patchwork)
dirs = dir(pattern = "^R")
f = "dat.Rdata"
if(!file.exists(f)){
  scelist = list()
for(i in 1:length(dirs)){
  x = Read10X(data.dir = dirs[[i]])

  scelist[[i]] <- CreateSeuratObject(counts = x, 
                                     project = paste0("R",i))
  scelist[[i]][["percent.mt"]] <- PercentageFeatureSet(scelist[[i]], pattern = "^MT-")
  scelist[[i]] <- subset(scelist[[i]], subset = percent.mt < 10)
}
names(scelist)  = paste0("R",1:4)
sum(sapply(scelist, function(x)ncol(x@assays$RNA@counts)))

# normalize and identify variable features for each dataset independently
scelist <- lapply(X = scelist, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 3000)
  })

  features <- SelectIntegrationFeatures(object.list = scelist)
  immune.anchors <- FindIntegrationAnchors(object.list = scelist, anchor.features = features)
  immune.combined <- IntegrateData(anchorset = immune.anchors)
  DefaultAssay(immune.combined) <- "integrated"

  # Run the standard workflow for visualization and clustering
  immune.combined <- ScaleData(immune.combined, verbose = FALSE)
  immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE)
  immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30)
  immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30)
  immune.combined <- FindClusters(immune.combined, resolution = 0.5)
  save(immune.combined,file = f)
}
load(f)
p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "orig.ident")
p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE)
p1 + p2

# 注释
library(celldex)
library(SingleR)
#ref <- celldex::HumanPrimaryCellAtlasData()
ref <- get(load("../single_ref/ref_Human_all.RData"))
library(BiocParallel)
pred.scRNA <- SingleR(test = immune.combined@assays$integrated@data, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = immune.combined@active.ident)
pred.scRNA$pruned.labels
##  [1] "Macrophage"        "Macrophage"        "Monocyte"         
##  [4] "Macrophage"        "Macrophage"        "Macrophage"       
##  [7] "Macrophage"        "Monocyte"          "Neutrophils"      
## [10] "Neutrophils"       "Endothelial_cells" "Monocyte"         
## [13] "Macrophage"        "Macrophage"        "Tissue_stem_cells"
## [16] "NK_cell"           "Monocyte"          "B_cell"
plotScoreHeatmap(pred.scRNA, clusters=pred.scRNA@rownames, fontsize.row = 9,show_colnames = T)

new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(immune.combined)
levels(immune.combined)
##  [1] "0"  "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14"
## [16] "15" "16" "17"
immune.combined <- RenameIdents(immune.combined,new.cluster.ids)
levels(immune.combined)
## [1] "Macrophage"        "Monocyte"          "Neutrophils"      
## [4] "Endothelial_cells" "Tissue_stem_cells" "NK_cell"          
## [7] "B_cell"
UMAPPlot(object = immune.combined, pt.size = 0.5, label = TRUE)

单细胞多样本整合-GSE162631

猜你喜欢

热点阅读