单细胞TPM数据的处理及批次效应的去除

2021-01-25 本文已影响0人碌碌无为的杰少

harmony真的好用，天才的发明者。复现一篇30多个样本的主图，这个数据我也想了好久，由于作者提供了tpm数据，这个数据相当于标准化之后的，就相当于SCT之后的，所以不需要对样本进行标准化了，主图是这样的。

图片.png

exp1 <- data.table::fread('GSE132465_GEO_processed_CRC_10X_natural_log_TPM_matrix.txt',data.table = F)
rownames(exp1) <- exp1$Index
exp1 <-exp1[,-1]
experiment.aggregate <- CreateSeuratObject(
  exp1,
  project = "multi", 
  min.cells = 10,
  min.features = 200,
  names.field = 1,
  names.delim = "_")
experiment.aggregate[["percent.mt"]] <- PercentageFeatureSet(experiment.aggregate, 
                                                             pattern = "^MT-")
view(experiment.aggregate@meta.data)
experiment.aggregate <- FindVariableFeatures(experiment.aggregate, 
                                             selection.method = "vst",
                                             nfeatures = 1000)
all.genes <- rownames(experiment.aggregate)
experiment.aggregate <- ScaleData(experiment.aggregate,
                                  features = all.genes, verbose = FALSE
)
experiment.aggregate <- RunPCA(object = experiment.aggregate, 
                               features = VariableFeatures(experiment.aggregate),
                               verbose = F,npcs = 50)
experiment.aggregate <- RunHarmony(experiment.aggregate, group.by.vars = "orig.ident")
A1 <- ElbowPlot(experiment.aggregate, ndims = 50)
pdf("A1.pdf",width = 6,height = 5)

print(A1)
dev.off()

dim.use <- 1:20
experiment.aggregate <- FindNeighbors(experiment.aggregate, dims = dim.use, reduction = "harmony")
#基于细胞之间的相似性计算具体的cluster
experiment.aggregate <- FindClusters(experiment.aggregate, resolution = 0.6, reduction = "harmony")
#TSNE算法聚类（降维）
experiment.aggregate <- RunTSNE(experiment.aggregate, dims = dim.use, do.fast = TRUE, reduction = "harmony")
experiment.aggregate@meta.data$celltype3 = "NA"


# 更改 celltype 信息，设置细胞群新名称
experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$seurat_clusters %in% c(1,2,6,8)), "celltype3"] = "T cells"
experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$seurat_clusters %in% c(7,11,15,19)), "celltype3"] = "Stromal cells"
experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$seurat_clusters %in% c(3,4,13,18)), "celltype3"] = "B cells"
experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$seurat_clusters %in% c(0,10,12,14,16,17)), "celltype3"] = "Epithelial cells"
experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$seurat_clusters %in% c(5,9)), "celltype3"] = "Myloid cells"
experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$seurat_clusters %in% c(20)), "celltype3"] = "Mast cells"

A2 <- DimPlot(experiment.aggregate, reduction = "tsne", label = T,group.by = 'celltype3') +NoLegend()
A2
pdf("A2.pdf",width = 8,height = 6)
print(A2)
dev.off()

singleB<-  SubsetData(experiment.aggregate,
                      # 提取数据根据的组名
                      subset.name = "celltype3", 
                      # 提取的组别
                      accept.value = c('B cells'))
singleT2<-  SubsetData(experiment.aggregate,
                      # 提取数据根据的组名
                      subset.name = "celltype3", 
                      # 提取的组别
                      accept.value = c('T cells'))
save(singleB, file = "singleB.Rdata")
save(singleT2, file = "singleT.Rdata")

最后的图是这样的，感觉一点不差于原文的CCA去除批次效应的结果

图片.png

单细胞TPM数据的处理及批次效应的去除

猜你喜欢

热点阅读