利用SCTransform和RunHarmony做普通的单细胞批
2021-01-11 本文已影响0人
碌碌无为的杰少
这篇文章从GSE132465下载,https://www.jianshu.com/p/44d30d3194e4这篇文章详细的解释了SCTransform是干什么的,所以我用这个方法试试如何去除批次效应
常规方法跑一遍
本文数据经过过滤过的,所以不需要过滤
library(Seurat)
library(ggplot2)
library(cowplot)
library(Matrix)
library(dplyr)
library(ggsci)
library(tidyverse)
library(SingleR)
library(harmony)
library(patchwork)
exp1 <- data.table::fread('GSE132465_GEO_processed_CRC_10X_raw_UMI_count_matrix.txt',data.table = F)
test <- exp1[1:10,1:10]
rownames(exp1) <- exp1$Index
exp1 <-exp1[,-1]
experiment.aggregate <- CreateSeuratObject(
exp1,
project = "multi",
min.cells = 10,
min.features = 200,
names.field = 1,
names.delim = "_")
View(experiment.aggregate@meta.data)
experiment.aggregate[["percent.mt"]] <- PercentageFeatureSet(experiment.aggregate,
pattern = "^MT-")
#save(experiment.aggregate, file = "experiment.aggregate.Rdata")
experiment.aggregate <- NormalizeData(experiment.aggregate,
normalization.method = "LogNormalize",
scale.factor = 10000)
#计算表达量变化显著的基因FindVariableFeatures
experiment.aggregate <- FindVariableFeatures(experiment.aggregate,
selection.method = "vst",
nfeatures = 1000)
all.genes <- rownames(experiment.aggregate)
experiment.aggregate <- ScaleData(experiment.aggregate,
features = all.genes, verbose = FALSE
)
experiment.aggregate <- RunPCA(object = experiment.aggregate,
features = VariableFeatures(experiment.aggregate),
verbose = F,npcs = 50)
ElbowPlot(experiment.aggregate, ndims = 50)
dim.use <- 1:20
experiment.aggregate <- FindNeighbors(experiment.aggregate, dims = dim.use)
#基于细胞之间的相似性计算具体的cluster
experiment.aggregate <- FindClusters(experiment.aggregate, resolution = 0.6)
#TSNE算法聚类(降维)
experiment.aggregate <- RunTSNE(experiment.aggregate, dims = dim.use, do.fast = TRUE)
experiment.aggregate <- RunUMAP(experiment.aggregate, dims = dim.use, do.fast = TRUE)
dir.create("Overview")
source("./function.R")
refdata <- get(load("Resource/SingleR_ref/ref_BE_259s.RData"))
library(SingleR)
experiment.aggregate <- cell_identify(experiment.aggregate, refdata, output = "Overview")
##tSNE图展示结果
p4 <- DimPlot(experiment.aggregate, reduction = "tsne", label = T) + NoLegend()
p5 <- DimPlot(experiment.aggregate, reduction = "tsne", label = T,
group.by = "SingleR") + NoLegend()
pc1 = p4|p5
pc1
p6 <- DimPlot(experiment.aggregate, reduction = "tsne", group.by = "orig.ident",
split.by = "orig.ident", ncol = 4) + NoLegend()
ggsave("tsne.png", p6, width = 9, height = 12)
p1 <- DimPlot(experiment.aggregate, reduction = "umap", label = T) + NoLegend()
p2 <- DimPlot(experiment.aggregate, reduction = "umap", label = T,
group.by = "SingleR") + NoLegend()
pc = p1|p2
pc
p3 <- DimPlot(experiment.aggregate, reduction = "umap", group.by = "orig.ident",
split.by = "orig.ident", ncol = 4) + NoLegend()
ggsave("umap.png", p3, width = 9, height = 12)
图片.png
图片.png
图片.png
图片.png
感觉没啥批次效应
导入数据入seraut对象,常规操作一下
library(Seurat)
library(ggplot2)
library(cowplot)
library(Matrix)
library(dplyr)
library(ggsci)
library(tidyverse)
library(SingleR)
library(harmony)
library(patchwork)
exp1 <- data.table::fread('GSE132465_GEO_processed_CRC_10X_raw_UMI_count_matrix.txt',data.table = F)
test <- exp1[1:10,1:10]
rownames(exp1) <- exp1$Index
exp1 <-exp1[,-1]
experiment.aggregate <- CreateSeuratObject(
exp1,
project = "multi",
min.cells = 10,
min.features = 200,
names.field = 1,
names.delim = "_")
View(experiment.aggregate@meta.data)
experiment.aggregate[["percent.mt"]] <- PercentageFeatureSet(experiment.aggregate,
pattern = "^MT-")
save(experiment.aggregate, file = "experiment.aggregate.Rdata")
experiment.aggregate <- NormalizeData(experiment.aggregate,
normalization.method = "LogNormalize",
scale.factor = 10000)
#计算表达量变化显著的基因FindVariableFeatures
experiment.aggregate <- FindVariableFeatures(experiment.aggregate,
selection.method = "vst",
nfeatures = 1000)
all.genes <- rownames(experiment.aggregate)
experiment.aggregate <- ScaleData(experiment.aggregate,
features = all.genes, verbose = FALSE
)
细胞周期基因
if(T){
g2m_genes <- cc.genes$g2m.genes
g2m_genes <- CaseMatch(search=g2m_genes, match=rownames(experiment.aggregate))
s_genes <- cc.genes$s.genes
s_genes <- CaseMatch(search=s_genes, match=rownames(experiment.aggregate))
experiment.aggregate <- CellCycleScoring(experiment.aggregate, g2m.features=g2m_genes, s.features=s_genes)
tmp <- RunPCA(experiment.aggregate, features = c(g2m_genes, s_genes), verbose = F)
p <- DimPlot(tmp, reduction = "pca", group.by = "orig.ident")
ggsave("QC/CellCycle_pca.png", p, width = 8, height = 6)
rm(tmp)
}
去除批次
experiment.aggregate <- SCTransform(experiment.aggregate, return.only.var.genes = F,
vars.to.regress = c( vars.to.regress = c("S.Score", "G2M.Score")))
memory.limit()
##使用harmony整合数据
experiment.aggregate <- RunPCA(experiment.aggregate, npcs=50, verbose=FALSE)
experiment.aggregate <- RunHarmony(experiment.aggregate, group.by.vars="orig.ident", assay.use="SCT")
ElbowPlot(experiment.aggregate, ndims = 40)
pc.num=1:20
scRNA <- RunTSNE(experiment.aggregate, reduction="harmony", dims=pc.num) %>%
RunUMAP(reduction="harmony", dims=pc.num) %>%
FindNeighbors(reduction="harmony", dims=pc.num) %>%
FindClusters(resolution=0.6)
细胞注释
dir.create("Overview")
source("./function.R")
refdata <- get(load("Resource/SingleR_ref/ref_BE_259s.RData"))
library(SingleR)
scRNA <- cell_identify(scRNA, refdata, output = "Overview")
作Umap图
p1 <- DimPlot(scRNA, reduction = "umap", label = T) + NoLegend()
p2 <- DimPlot(scRNA, reduction = "umap", label = T,
group.by = "SingleR") + NoLegend()
pc = p1|p2
pc
图片.png
查看批次效应
p3 <- DimPlot(scRNA, reduction = "umap", group.by = "orig.ident",
split.by = "orig.ident", ncol = 4) + NoLegend()
ggsave("umap.png", p3, width = 9, height = 12)
图片.png
作tsne图
p4 <- DimPlot(scRNA, reduction = "tsne", label = T) + NoLegend()
p5 <- DimPlot(scRNA, reduction = "tsne", label = T,
group.by = "SingleR") + NoLegend()
pc1 = p4|p5
pc1
图片.png
批次效应
p6 <- DimPlot(scRNA, reduction = "tsne", group.by = "orig.ident",
split.by = "orig.ident", ncol = 4) + NoLegend()
ggsave("tsne.png", p6, width = 9, height = 12)
图片.png