单细胞转录组单细胞

利用SCTransform和RunHarmony做普通的单细胞批

2021-01-11  本文已影响0人  碌碌无为的杰少
这篇文章从GSE132465下载,https://www.jianshu.com/p/44d30d3194e4这篇文章详细的解释了SCTransform是干什么的,所以我用这个方法试试如何去除批次效应

常规方法跑一遍

本文数据经过过滤过的,所以不需要过滤

library(Seurat)
library(ggplot2)
library(cowplot)
library(Matrix)
library(dplyr)
library(ggsci)

library(tidyverse)
library(SingleR)
library(harmony)
library(patchwork)

exp1 <- data.table::fread('GSE132465_GEO_processed_CRC_10X_raw_UMI_count_matrix.txt',data.table = F)
test <- exp1[1:10,1:10]
rownames(exp1) <- exp1$Index
exp1 <-exp1[,-1]
experiment.aggregate <- CreateSeuratObject(
  exp1,
  project = "multi", 
  min.cells = 10,
  min.features = 200,
  names.field = 1,
  names.delim = "_")
View(experiment.aggregate@meta.data)

experiment.aggregate[["percent.mt"]] <- PercentageFeatureSet(experiment.aggregate, 
                                                             pattern = "^MT-")
#save(experiment.aggregate, file = "experiment.aggregate.Rdata")
experiment.aggregate <- NormalizeData(experiment.aggregate, 
                                      normalization.method = "LogNormalize",
                                      scale.factor = 10000)

#计算表达量变化显著的基因FindVariableFeatures
experiment.aggregate <- FindVariableFeatures(experiment.aggregate, 
                                             selection.method = "vst",
                                             nfeatures = 1000)
all.genes <- rownames(experiment.aggregate)
experiment.aggregate <- ScaleData(experiment.aggregate,
                                  features = all.genes, verbose = FALSE
)
experiment.aggregate <- RunPCA(object = experiment.aggregate, 
                               features = VariableFeatures(experiment.aggregate),
                               verbose = F,npcs = 50)
ElbowPlot(experiment.aggregate, ndims = 50)
dim.use <- 1:20
experiment.aggregate <- FindNeighbors(experiment.aggregate, dims = dim.use)
#基于细胞之间的相似性计算具体的cluster
experiment.aggregate <- FindClusters(experiment.aggregate, resolution = 0.6)
#TSNE算法聚类(降维)
experiment.aggregate <- RunTSNE(experiment.aggregate, dims = dim.use, do.fast = TRUE)
experiment.aggregate <- RunUMAP(experiment.aggregate, dims = dim.use, do.fast = TRUE)
dir.create("Overview")
source("./function.R")
refdata <- get(load("Resource/SingleR_ref/ref_BE_259s.RData"))
library(SingleR)
experiment.aggregate <- cell_identify(experiment.aggregate, refdata, output = "Overview")

##tSNE图展示结果
p4 <- DimPlot(experiment.aggregate, reduction = "tsne", label = T) + NoLegend()

p5 <- DimPlot(experiment.aggregate, reduction = "tsne", label = T,
              group.by = "SingleR") + NoLegend()
pc1 = p4|p5
pc1

p6 <- DimPlot(experiment.aggregate, reduction = "tsne", group.by = "orig.ident", 
              split.by = "orig.ident", ncol = 4) + NoLegend()
ggsave("tsne.png", p6, width = 9, height = 12)
p1 <- DimPlot(experiment.aggregate, reduction = "umap", label = T) + NoLegend()

p2 <- DimPlot(experiment.aggregate, reduction = "umap", label = T,
              group.by = "SingleR") + NoLegend()
pc = p1|p2
pc

p3 <- DimPlot(experiment.aggregate, reduction = "umap", group.by = "orig.ident", 
              split.by = "orig.ident", ncol = 4) + NoLegend()
ggsave("umap.png", p3, width = 9, height = 12)
图片.png
图片.png
图片.png
图片.png
感觉没啥批次效应

导入数据入seraut对象,常规操作一下

library(Seurat)
library(ggplot2)
library(cowplot)
library(Matrix)
library(dplyr)
library(ggsci)
library(tidyverse)
library(SingleR)
library(harmony)
library(patchwork)

exp1 <- data.table::fread('GSE132465_GEO_processed_CRC_10X_raw_UMI_count_matrix.txt',data.table = F)
test <- exp1[1:10,1:10]
rownames(exp1) <- exp1$Index
exp1 <-exp1[,-1]
experiment.aggregate <- CreateSeuratObject(
  exp1,
  project = "multi", 
  min.cells = 10,
  min.features = 200,
  names.field = 1,
  names.delim = "_")
View(experiment.aggregate@meta.data)

experiment.aggregate[["percent.mt"]] <- PercentageFeatureSet(experiment.aggregate, 
                                                             pattern = "^MT-")
save(experiment.aggregate, file = "experiment.aggregate.Rdata")
experiment.aggregate <- NormalizeData(experiment.aggregate, 
                                      normalization.method = "LogNormalize",
                                      scale.factor = 10000)

#计算表达量变化显著的基因FindVariableFeatures
experiment.aggregate <- FindVariableFeatures(experiment.aggregate, 
                                             selection.method = "vst",
                                             nfeatures = 1000)
all.genes <- rownames(experiment.aggregate)
experiment.aggregate <- ScaleData(experiment.aggregate,
                                  features = all.genes, verbose = FALSE
)

细胞周期基因

if(T){
  g2m_genes <- cc.genes$g2m.genes
  g2m_genes <- CaseMatch(search=g2m_genes, match=rownames(experiment.aggregate))
  s_genes <- cc.genes$s.genes    
  s_genes <- CaseMatch(search=s_genes, match=rownames(experiment.aggregate))
  experiment.aggregate <- CellCycleScoring(experiment.aggregate, g2m.features=g2m_genes, s.features=s_genes)
  tmp <- RunPCA(experiment.aggregate, features = c(g2m_genes, s_genes), verbose = F)
  p <- DimPlot(tmp, reduction = "pca", group.by = "orig.ident")
  ggsave("QC/CellCycle_pca.png", p, width = 8, height = 6)
  rm(tmp)
}

去除批次

experiment.aggregate <- SCTransform(experiment.aggregate, return.only.var.genes = F, 
                     vars.to.regress = c( vars.to.regress = c("S.Score", "G2M.Score")))
memory.limit()
##使用harmony整合数据
experiment.aggregate <- RunPCA(experiment.aggregate, npcs=50, verbose=FALSE)
experiment.aggregate <- RunHarmony(experiment.aggregate, group.by.vars="orig.ident", assay.use="SCT")

ElbowPlot(experiment.aggregate, ndims = 40)
pc.num=1:20
scRNA <- RunTSNE(experiment.aggregate, reduction="harmony", dims=pc.num) %>% 
  RunUMAP(reduction="harmony", dims=pc.num) %>%
  FindNeighbors(reduction="harmony", dims=pc.num) %>% 
  FindClusters(resolution=0.6)

细胞注释

dir.create("Overview")
source("./function.R")
refdata <- get(load("Resource/SingleR_ref/ref_BE_259s.RData"))
library(SingleR)
scRNA <- cell_identify(scRNA, refdata, output = "Overview")

作Umap图

p1 <- DimPlot(scRNA, reduction = "umap", label = T) + NoLegend()

p2 <- DimPlot(scRNA, reduction = "umap", label = T,
              group.by = "SingleR") + NoLegend()
pc = p1|p2
pc
图片.png

查看批次效应

p3 <- DimPlot(scRNA, reduction = "umap", group.by = "orig.ident", 
              split.by = "orig.ident", ncol = 4) + NoLegend()
ggsave("umap.png", p3, width = 9, height = 12)
图片.png

作tsne图

p4 <- DimPlot(scRNA, reduction = "tsne", label = T) + NoLegend()

p5 <- DimPlot(scRNA, reduction = "tsne", label = T,
              group.by = "SingleR") + NoLegend()
pc1 = p4|p5
pc1
图片.png

批次效应

p6 <- DimPlot(scRNA, reduction = "tsne", group.by = "orig.ident", 
              split.by = "orig.ident", ncol = 4) + NoLegend()
ggsave("tsne.png", p6, width = 9, height = 12)
图片.png
上一篇下一篇

猜你喜欢

热点阅读