单细胞学习

Seurat之整合分析(2)---跨物种

2022-12-16  本文已影响0人  jjjscuedu

上面那个帖子介绍了FindIntegrationAnchors和IntegrateData,说也适合整合两个datasets只有部分重叠。那么,从这个角度出发,也适合整合两个跨物种的数据集,利用重叠的homolog基因来实现重叠的部分,所以我们拿arabidopsis和rice的单细胞数据来试试看(随便从paper里面选了一组拟南芥根和水稻根的数据作为测试而已)。

注:我是用cellranger获得了比对矩阵文件。然后根据arabidopsis和rice的ortholog基因,把rice ortholog的基因ID换成了arabidopsis,方便后面构造arabidopsis和rice的可比文件。

library(Seurat)

library(SeuratData)

library(patchwork)

library(harmony)

#library(rliger)

library(RColorBrewer)

library(tidyverse)

library(reshape2)

library(ggsci)

library(ggstatsplot)

#常规的构建seurat对象

arabidopsis.data <- Read10X(data.dir = "arabidopsis/filtered_feature_bc_matrix/")

rice.data <- Read10X(data.dir = "rice/filtered_feature_bc_matrix/")

arabidopsis <- CreateSeuratObject(counts = arabidopsis.data, project = "arabidopsis", min.cells = 3, min.features = 200)

rice <- CreateSeuratObject(counts = rice.data, project = "rice", min.cells = 3, min.features = 200)

#做了一些简单的过滤

arabidopsis[["percent.mg"]] <- PercentageFeatureSet(arabidopsis, pattern ="^ATMG")

arabidopsis[["percent.cg"]] <- PercentageFeatureSet(arabidopsis, pattern ="^ATCG")

arabidopsis <- subset(arabidopsis, subset = nFeature_RNA >500& nFeature_RNA <5000& percent.mg <1 & percent.cg <3)

rice<- subset(rice, subset = nFeature_RNA >500& nFeature_RNA <7000)

#获得拟南芥和水稻共有基因的

common_genes <- rownames(arabidopsis)[rownames(arabidopsis) %in% rownames(rice)]

> length(common_genes)

[1] 9042

#提取共有的部分进行对比

arabidopsis <- arabidopsis[rownames(arabidopsis) %in% common_genes,]

rice <- rice[rownames(rice) %in% common_genes,]

#下面的和上面一个帖子一样了

wb_list <- list()

wb_list[["arabidopsis"]]  <- arabidopsis

wb_list[["rice"]]  <- rice

wb_list <- lapply(X = wb_list, FUN = function(x) {

    x <- NormalizeData(x)

    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)

})

wb_anchors <- FindIntegrationAnchors(object.list = wb_list, dims = 1:30)

wb_seurat  <- IntegrateData(anchorset = wb_anchors, dims = 1:30)

DefaultAssay(wb_seurat) <- "RNA"

wb_seurat <- NormalizeData(wb_seurat, verbose = F)

wb_seurat <- FindVariableFeatures(wb_seurat, selection.method = "vst", nfeatures = 2000, verbose = F)

wb_seurat <- ScaleData(wb_seurat, verbose = F)

wb_seurat <- RunPCA(wb_seurat, npcs = 30, verbose = F)

wb_seurat <- RunUMAP(wb_seurat, reduction = "pca", dims = 1:30, verbose = F)

p1<-DimPlot(wb_seurat,reduction = "umap") + scale_color_npg()+

plot_annotation(title = "arabidopsis and rice, before integration")

DefaultAssay(wb_seurat) <- "integrated"

wb_seurat <- ScaleData(wb_seurat, verbose = F)

wb_seurat <- RunPCA(wb_seurat, npcs = 30, verbose = F)

wb_seurat <- RunUMAP(wb_seurat, reduction = "pca", dims = 1:30, verbose = F)

p2<-DimPlot(wb_seurat, reduction = "umap") +

scale_color_npg()+

plot_annotation(title = "arabidopsis and rice, after integration")

p1|p2

#我们可以明显看出合并之前和合并之后的差异。但是我个人觉得貌似合并的效果也没有那么好,不确定是真实的细胞类型的差异,还是算法带来的误差差异。

#下面就是常规的聚类和可视化

wb_seurat <- FindNeighbors(wb_seurat, dims = 1:30, k.param = 10, verbose = F)

wb_seurat <- FindClusters(wb_seurat, verbose = F)

ncluster <- length(unique(wb_seurat[[]]$seurat_clusters))

mycol <- colorRampPalette(brewer.pal(8, "Set2"))(ncluster)

DimPlot(wb_seurat,label = T, reduction = "umap",cols = mycol, repel = T) +NoLegend()

#我们也可以查看每个cluster下面,不同物种之间的差异。

count_table <- table(wb_seurat@meta.data$seurat_clusters, wb_seurat@meta.data$orig.ident)

count_table

#### 可视化

count_table %>%

as.data.frame() %>%

ggbarstats(x = Var2, y = Var1,counts = Freq)+

scale_fill_npg()

后面,我们也可以根据marker基因来注释每个cluster,同时查看不同细胞类型在物种间的差异,例如上面一个帖子讲的那样。

上一篇下一篇

猜你喜欢

热点阅读