单细胞学习

跨物种单细胞比较(1)

2022-12-17  本文已影响0人  jjjscuedu

前面关于MetaNeighbor的帖子,我们学习了如何用MetaNeighbor来比较两个数据集细胞类型注释的一致性。而用Seurat的FindIntegrationAnchors和IntegrateData函数,我们学习了如何跨物种对两个seurat数据集进行整合,今天结合这2个学习,我们尝试能否衡量两个物种细胞类型注释的一致性。(但是方法不一定正确和有效,也没有进行合理性的评价)

数据集我们依旧选取的和前面一样,拿arabidopsis和rice的单细胞数据来试试看(随便从paper里面选了一组拟南芥根和水稻根的数据作为测试而已)。

对于测试数据集的构建,和前面的部分是一样的。

library(Seurat)

library(SeuratData)

library(patchwork)

library(harmony)

#library(rliger)

library(RColorBrewer)

library(tidyverse)

library(reshape2)

library(ggsci)

library(ggstatsplot)

#常规的构建seurat对象

arabidopsis.data <- Read10X(data.dir = "arabidopsis/filtered_feature_bc_matrix/")

rice.data <- Read10X(data.dir = "rice/filtered_feature_bc_matrix/")

arabidopsis <- CreateSeuratObject(counts = arabidopsis.data, project = "arabidopsis", min.cells = 3, min.features = 200)

rice <- CreateSeuratObject(counts = rice.data, project = "rice", min.cells = 3, min.features = 200)

#做了一些简单的过滤

arabidopsis[["percent.mg"]] <- PercentageFeatureSet(arabidopsis, pattern ="^ATMG")

arabidopsis[["percent.cg"]] <- PercentageFeatureSet(arabidopsis, pattern ="^ATCG")

arabidopsis <- subset(arabidopsis, subset = nFeature_RNA >500& nFeature_RNA <5000& percent.mg <1 & percent.cg <3)

rice<- subset(rice, subset = nFeature_RNA >500& nFeature_RNA <7000)

#下面就是常规的seurat的一些分析

arabidopsis<- NormalizeData(arabidopsis, normalization.method ="LogNormalize", scale.factor =10000)

arabidopsis <- FindVariableFeatures(arabidopsis, selection.method ="vst", nfeatures =5000)

all.genes <- rownames(arabidopsis)

arabidopsis <- ScaleData(arabidopsis, features = all.genes)

arabidopsis <- RunPCA(arabidopsis, features = VariableFeatures(object = arabidopsis))

arabidopsis <- FindNeighbors(arabidopsis, dims = 1:30)

arabidopsis <- FindClusters(arabidopsis, resolution = 0.5)

arabidopsis <- RunUMAP(arabidopsis, dims = 1:30)

arabidopsis <- RunTSNE(arabidopsis, dims = 1:30)

rice<- NormalizeData(rice, normalization.method ="LogNormalize", scale.factor =10000)

rice <- FindVariableFeatures(rice, selection.method ="vst", nfeatures =5000)

all.genes <- rownames(rice)

rice <- ScaleData(rice, features = all.genes)

rice <- RunPCA(rice, features = VariableFeatures(object = rice))

rice <- FindNeighbors(rice, dims = 1:30)

rice <- FindClusters(rice, resolution = 0.5)

rice <- RunUMAP(rice, dims = 1:30)

rice <- RunTSNE(rice, dims = 1:30)

#下面用一些常用的marker基因来标记每个cluster,简单做一下,没有深入对比

Phloem_companion_cell<-c("AT3G19550")

Quiescent_center<-c("AT1G69490","AT3G16150","AT3G53040")

Xylem<-c("AT1G68810")

Non_hair_cell<-c("AT1G68560","AT5G66800")

Cortex<-c("AT1G44970","AT3G12700","AT5G44130")

Epidermis<-c("AT2G16230")

Root_endodermis<-c("AT1G61590","AT4G02090","AT5G42180")

Phloem<-c("AT1G79430","AT4G03500")

Protophloem_sieve_element<-c("AT1G14730")

Protoxylem<-c("AT1G66810")

Trichoblast<-c("AT1G48930")

Lateral_root_cap<-c("AT4G36250","AT5G17520")

Stele<-c("AT4G37650")

all <- c(Phloem_companion_cell,Quiescent_center,Xylem,Non_hair_cell,Cortex,Epidermis,Root_endodermis,Phloem,Protophloem_sieve_element,Protoxylem,Trichoblast,Lateral_root_cap,Stele)

DotPlot(arabidopsis, features = all, cols = c("blue", "red"), dot.scale = 8) +RotatedAxis()

arabidopsis<- RenameIdents(arabidopsis, '0' = "Cortex", '1' = "Lateral root cap",

            '2' = "Non hair cell",'3' = "Cortex", '4' = "Cortex", '5' = "Non hair cell",

'6' = "Stele", '7' = "Xylem", '8' = "Trichoblast", '9' = "Quiescent center",

'10' = "Cortex", '11' = "Stele", '12' = "Quiescent center", '13' = "Quiescent center",

'14' = "Root endodermis",'15'="Root endodermis",'16'="Trichoblast",'17'="Root endodermis",'18'="Xylem",

'19'="Phloem",'20'="Protophloem sieve element",'21'="Phloem",'22'="Cortex")  

DimPlot(arabidopsis, reduction = "tsne", label = TRUE)

list <- c("EXPA8","AT5G50200","OsPAP10c","OsSNDP1","PT2","CSLD1","OsEXPA17","OsbHLH115",

          "OsMST1","LSI2","OsADF3","AT1G33260","OsAAP6","LSI1","OsSCR2",

          "ERF131","AT1G51340","OsEMSA1","OsCLIP","AT3G02850","CESA7","BC6","OsSWN6",

          "OsHB4","OsFTIP1","NRAMP3","AT1G59750","Os03g0279200","AT3G45980","AT2G29570","OsRPA32",

          "AT1G20930","CycB1","ND1","AT3G25980","AT4G14330","AT1G63100","OsDLK","AT2G38905")

DotPlot(rice, features = all, cols = c("blue", "red"), dot.scale = 8) +RotatedAxis()

rice<- RenameIdents(rice, '0' = "Exodermis", '1' = "Root endodermis",

            '2' = "Meristem",'3' = "Root endodermis", '4' = "Vascular cylinder", '5' = "Vascular cylinder",

'6' = "Epidermis", '7' = "Epidermis", '8' = "Exodermis", '9' = "Vascular cylinder",

'10' = "Exodermis", '11' = "Meristem", '12' = "Root endodermis", '13' = "Vascular cylinder",

'14' = "Vascular cylinder",'15'="Vascular cylinder",'16'="Meristem",'17'="Pericyle",'18'="Vascular cylinder",

'19'="Epidermis",'20'="Exodermis")

DimPlot(rice, reduction = "tsne", label = TRUE)++NoLegend()

arabidopsis$celltype <- Idents(arabidopsis)

rice$celltype <- Idents(rice)

#获得拟南芥和水稻共有基因的

common_genes <- rownames(arabidopsis)[rownames(arabidopsis) %in% rownames(rice)]

> length(common_genes)

[1] 9042

#提取共有的部分进行对比

arabidopsis <- arabidopsis[rownames(arabidopsis) %in% common_genes,]

rice <- rice[rownames(rice) %in% common_genes,]

#转化成SCE格式的对象

arabidopsis.sce <- as.SingleCellExperiment(arabidopsis)

rice.sce <- as.SingleCellExperiment(rice)

下面,利用前面的MetaNeighbor来衡量拟南芥和水稻之间cell type的相似性。

#拟南芥的数据作为训练集,当然也可以利用MetaNeighbor例子1的做法,把拟南芥和水稻作为一个整体。

pretrained_model <- MetaNeighbor::trainModel(var_genes = rownames(arabidopsis.sce),

                        dat = arabidopsis.sce,

                        study_id = arabidopsis.sce$orig.ident,

                        cell_type = arabidopsis.sce$celltype)

write.table(pretrained_model,"pretrained_arabidopsis_celltype.txt")

arabidopsis_celltype <- read.table("pretrained_arabidopsis_celltype.txt",check.names=FALSE)

colnames(colData(rice.sce))

aurocs <- MetaNeighborUS(trained_model=arabidopsis_celltype ,

                        dat = rice.sce,

                        study_id = rice.sce$orig.ident,

                        cell_type = rice.sce$celltype,

fast_version=TRUE)

plotHeatmapPretrained(aurocs)

aurocs里面就是拟南芥和水稻之间每个cell type之间的AUROC值,可以认为一致性,也可以当做相似性(但是,因为我在这个测试数据里面只是大概粗糙的注释了一下,不具备太多实际的生物学意义)。

我们还可以以桑基图的形式展示各个细胞类型在2个物种之间的关系。

上一篇 下一篇

猜你喜欢

热点阅读